
Python
Learn essential DataFrame operations to manipulate and combine data effectively.
A DataFrame is a 2D table with labeled rows and columns. Think of it as a spreadsheet in Python.
import pandas as pd
# Create DataFrame
df = pd.DataFrame({
'name': ['Alice', 'Bob', 'Carol'],
'age': [25, 30, 28],
'salary': [50000, 60000, 55000]
})
print(df)
# name age salary
# 0 Alice 25 50000
# 1 Bob 30 60000
# 2 Carol 28 55000import pandas as pd
# Sample data
df1 = pd.DataFrame({
'id': [1, 2, 3],
'name': ['Alice', 'Bob', 'Carol']
})
df2 = pd.DataFrame({
'id': [1, 2, 3],
'salary': [50000, 60000, 55000]
})
# Merge on common column
merged = pd.merge(df1, df2, on='id')
print(merged)
# Concatenate vertically (stack rows)
df3 = pd.DataFrame({'id': [4], 'name': ['David']})
stacked = pd.concat([df1, df3], ignore_index=True)
# Concatenate horizontally (add columns)
side_by_side = pd.concat([df1, df2], axis=1)import pandas as pd
df = pd.DataFrame({
'name': ['Alice', 'Bob', 'Carol', 'David'],
'score': [92, 78, 95, 88],
'department': ['Sales', 'IT', 'Sales', 'IT']
})
# Sort by column
sorted_by_name = df.sort_values('name')
sorted_by_score = df.sort_values('score', ascending=False)
# Sort by multiple columns
sorted_multi = df.sort_values(['department', 'score'], ascending=[True, False])
# Reset index after sorting
df_reset = df.sort_values('score').reset_index(drop=True)
# Ranking
df['rank'] = df['score'].rank(ascending=False)import pandas as pd
df = pd.DataFrame({
'name': ['Alice', 'Bob', 'Carol', 'David'],
'quarter': ['Q1', 'Q1', 'Q2', 'Q2'],
'sales': [100, 150, 200, 175]
})
# Pivot: transform rows to columns
pivot = df.pivot(index='name', columns='quarter', values='sales')
print(pivot)
# Melt: transform columns to rows
melted = pd.melt(pivot.reset_index())
# Transpose: flip rows and columns
transposed = df.set_index('name').Timport pandas as pd
df = pd.DataFrame({
'name': ['Alice', 'Bob', 'Carol'],
'age': [25, 30, 28]
})
# Add column
df['age_group'] = df['age'].apply(lambda x: 'Young' if x < 30 else 'Senior')
# Add multiple columns at once
df['city'] = ['New York', 'London', 'Paris']
df['salary'] = [50000, 60000, 55000]
# Drop columns
df = df.drop(['city'], axis=1)
# Drop rows
df = df.drop(0) # Drop first row
df = df[df['age'] > 25] # Keep only age > 25import pandas as pd
df = pd.DataFrame({
'name': ['Alice', 'Bob', 'Carol'],
'age': [25, 30, 28],
'salary': [50000, 60000, 55000]
})
# Apply function to column (returns Series)
df['age_squared'] = df['age'].apply(lambda x: x ** 2)
# Apply function to entire row
df['info'] = df.apply(lambda row: f"{row['name']} ({row['age']})", axis=1)
# Custom function
def categorize_salary(salary):
if salary < 55000:
return 'Low'
elif salary < 65000:
return 'Medium'
else:
return 'High'
df['salary_level'] = df['salary'].apply(categorize_salary)import pandas as pd
# Create sales dataset
sales_data = {
'date': ['2024-01-01', '2024-01-02', '2024-01-03', '2024-01-04'],
'product': ['Laptop', 'Mouse', 'Laptop', 'Keyboard'],
'quantity': [2, 5, 1, 3],
'price_per_unit': [1000, 25, 1000, 50]
}
df = pd.DataFrame(sales_data)
# Calculate total per sale
df['total'] = df['quantity'] * df['price_per_unit']
# Find high-value sales (>100)
high_value = df[df['total'] > 100]
print(f"High-value sales: {len(high_value)}")
# Total by product
totals = df.groupby('product')['total'].sum()
print(totals)
# Best-selling product by quantity
product_qty = df.groupby('product')['quantity'].sum()
best_product = product_qty.idxmax()
print(f"Best seller: {best_product}")| Method | Purpose | Example |
|---|---|---|
| `merge()` | Join tables | `df1.merge(df2, on='id')` |
| `concat()` | Stack tables | `pd.concat([df1, df2])` |
| `sort_values()` | Sort rows | `df.sort_values('age')` |
| `groupby()` | Group data | `df.groupby('dept')` |
| `apply()` | Apply function | `df['col'].apply(func)` |
| `drop()` | Remove rows/columns | `df.drop('col', axis=1)` |
| `fillna()` | Handle missing values | `df.fillna(0)` |
Resources
Ojasa Mirai
Master AI-powered development skills through structured learning, real projects, and verified credentials. Whether you're upskilling your team or launching your career, we deliver the skills companies actually need.
Learn Deep • Build Real • Verify Skills • Launch Forward