| Keyword |
Description |
Syntax |
Example |
| Import Pandas |
Imports the Pandas library. |
import pandas as pd |
import pandas as pd |
| DataFrame |
Creates a tabular data structure from a list or dictionary. |
pd.DataFrame(data) |
data = {'Name': ['Braund, Mr. Owen Harris', 'Heikkinen, Miss. Laina'], 'Age': [22, 26], 'Survived': [0, 1]} df = pd.DataFrame(data) print(df) |
| Series |
Creates a one-dimensional labeled array. |
pd.Series(data) |
ages = pd.Series([22, 38, 26, 35]) print(ages) |
| read_csv() |
Reads a CSV file into a DataFrame. |
pd.read_csv(path) |
df = pd.read_csv('titanic.csv') print(df.head()) |
| read_excel() |
Reads an Excel file into a DataFrame. |
pd.read_excel(path) |
df = pd.read_excel('titanic.xlsx', sheet_name='passengers') |
| Keyword |
Description |
Syntax |
Example |
| head() |
Returns the first n rows. |
df.head(n=5) |
print(df.head(3)) # Shows first 3 passengers |
| tail() |
Returns the last n rows. |
df.tail(n=5) |
print(df.tail(3)) # Shows last 3 passengers |
| info() |
Summary of DataFrame including index, columns, non-null counts, and memory usage. |
df.info() |
df.info() # RangeIndex: 891 entries... # Data columns (total 12 columns)... |
| describe() |
Generates descriptive statistics (mean, std, min, max) for numerical columns. |
df.describe() |
print(df.describe()) # Stats for Age, Fare, Pclass, etc. |
| shape |
Returns a tuple (rows, columns). |
df.shape |
print(df.shape) # (891, 12) |
| columns |
Returns the column labels. |
df.columns |
print(df.columns.tolist()) # ['PassengerId', 'Survived', 'Pclass', 'Name'...] |
| index |
Returns the index (row labels) of the DataFrame. |
df.index |
print(df.index) # RangeIndex(start=0, stop=891, step=1) |
| dtypes |
Returns data types of each column. |
df.dtypes |
print(df.dtypes) # Age: float64, Name: object |
| Keyword |
Description |
Syntax |
Example |
| loc[] |
Access rows/columns by label or boolean array. |
df.loc[labels] |
# Select Name and Age for index 5 to 10 print(df.loc[5:10, ['Name', 'Age']]) |
| iloc[] |
Access rows/columns by integer position. |
df.iloc[positions] |
# Select first 5 rows and first 3 columns print(df.iloc[:5, :3]) |
| set_index() |
Sets a column as the index. |
df.set_index(keys) |
df.set_index('PassengerId', inplace=True) # Rows now accessed by PassengerId |
| reset_index() |
Resets the index to default integer index. |
df.reset_index() |
df.reset_index(inplace=True) |
| Keyword |
Description |
Syntax |
Example |
| Adding Columns |
Create a new column by assigning a value or calculation to a new key. |
df['New'] = val |
# Create 'FamilySize' from SibSp + Parch df['FamilySize'] = df['SibSp'] + df['Parch'] + 1 |
| rename() |
Renames columns or index labels. |
df.rename(columns={...}) |
df.rename(columns={'Pclass': 'PassengerClass'}, inplace=True) |
| drop() |
Removes specified rows or columns. |
df.drop(labels, axis) |
# Drop the 'Cabin' column df.drop('Cabin', axis=1, inplace=True) |
| sort_values() |
Sorts by values along an axis. |
df.sort_values(by) |
# Sort by Class (asc) then Fare (desc) df.sort_values(by=['Pclass', 'Fare'], ascending=[True, False], inplace=True) |
| astype() |
Casts a pandas object to a specified dtype. |
df.astype(dtype) |
# Convert Survived to boolean df['Survived'] = df['Survived'].astype(bool) |
| Keyword |
Description |
Syntax |
Example |
| mean() / median() / mode() |
Central tendency measures. |
df.mean() |
avg_fare = df['Fare'].mean() median_age = df['Age'].median() |
| std() / var() |
Standard deviation and variance. |
df.std() |
age_variability = df['Age'].std() |
| min() / max() |
Minimum and maximum values. |
df.min() |
youngest = df['Age'].min() # 0.42 oldest = df['Age'].max() # 80.0 |
| quantile() |
Values at given quantile (0 to 1). |
df.quantile(q) |
# 75th percentile of Fare high_fare = df['Fare'].quantile(0.75) |
| value_counts() |
Counts of unique values (frequency). |
series.value_counts() |
print(df['Pclass'].value_counts()) # 3: 491, 1: 216, 2: 184 |
| Keyword |
Description |
Syntax |
Example |
| groupby() |
Groups DataFrame using a mapper or by columns. |
df.groupby(by) |
# Average survival rate by Pclass print(df.groupby('Pclass')['Survived'].mean()) |
| agg() |
Aggregates using one or more operations. |
grouped.agg(func) |
# Mean Fare and Max Age per Class print(df.groupby('Pclass').agg({'Fare': 'mean', 'Age': 'max'})) |
| pivot_table() |
Creates a spreadsheet-style pivot table. |
pd.pivot_table() |
# Survival rate by Sex and Class pt = pd.pivot_table(df, values='Survived', index='Sex', columns='Pclass', aggfunc='mean') |
| Keyword |
Description |
Syntax |
Example |
| to_datetime() |
Converts argument to datetime. |
pd.to_datetime(col) |
# Assuming we have a date column df['Date'] = pd.to_datetime(df['DateStr']) |
| resample() |
Frequency conversion (e.g., daily to monthly). |
df.resample(freq) |
# Monthly mean sales monthly_sales = sales_df.resample('M').mean() |
| rolling() |
Rolling window calculations. |
df.rolling(window) |
# 7-day moving average ma7 = stock_df['Price'].rolling(window=7).mean() |
| shift() |
Shifts index by desired number of periods. |
df.shift(periods) |
# Compare today vs yesterday df['PrevDay'] = df['Price'].shift(1) |