
Python
Master rolling windows, expanding operations, and sophisticated grouping patterns.
import pandas as pd
import numpy as np
# Time series data
df = pd.DataFrame({
'date': pd.date_range('2024-01-01', periods=20),
'price': np.random.randint(100, 150, 20)
})
# Rolling mean
df['rolling_mean_5'] = df['price'].rolling(window=5).mean()
# Rolling sum
df['rolling_sum_3'] = df['price'].rolling(window=3).sum()
# Multiple rolling calculations at once
rolling = df['price'].rolling(window=5).agg(['mean', 'std', 'min', 'max'])
# Custom rolling function
def price_range(x):
return x.max() - x.min()
df['rolling_range'] = df['price'].rolling(window=5).apply(price_range)
# Center the window
df['centered_mean'] = df['price'].rolling(window=5, center=True).mean()
# Min periods (require N non-null values)
df['rolling_mean_strict'] = df['price'].rolling(window=5, min_periods=5).mean()import pandas as pd
df = pd.DataFrame({
'value': [10, 20, 15, 25, 30],
'date': pd.date_range('2024-01-01', periods=5)
})
# Expanding mean (cumulative average)
df['expanding_mean'] = df['value'].expanding().mean()
# Expanding sum (cumulative sum)
df['expanding_sum'] = df['value'].expanding().sum()
# Expanding max (running maximum)
df['expanding_max'] = df['value'].expanding().max()
# Multiple expanding operations
df_expanded = df['value'].expanding().agg(['mean', 'std', 'count'])import pandas as pd
import numpy as np
df = pd.DataFrame({
'group': ['A', 'A', 'A', 'B', 'B', 'B'],
'value': [10, 20, 15, 30, 25, 35]
})
# Rolling within groups
df['rolling_mean'] = df.groupby('group')['value'].rolling(window=2).mean().reset_index(0, drop=True)
# Expanding within groups
df['expanding_sum'] = df.groupby('group')['value'].expanding().sum().reset_index(0, drop=True)
# Rolling with multiple functions
rolling_stats = df.groupby('group')['value'].rolling(window=2).agg(['mean', 'sum']).reset_index(0, drop=True)import pandas as pd
import numpy as np
df = pd.DataFrame({
'group': ['A', 'A', 'B', 'B', 'C', 'C'],
'value': [10, 20, 15, 25, 30, 35]
})
# Define custom aggregation
def custom_agg(x):
return {
'sum': x.sum(),
'mean': x.mean(),
'std': x.std(),
'coefficient_of_variation': x.std() / x.mean()
}
result = df.groupby('group')['value'].apply(custom_agg).unstack()
# Multiple custom functions
def percentile_25(x):
return x.quantile(0.25)
def percentile_75(x):
return x.quantile(0.75)
result = df.groupby('group')['value'].agg([
'mean',
('p25', percentile_25),
('p75', percentile_75),
('iqr', lambda x: x.quantile(0.75) - x.quantile(0.25))
])import pandas as pd
df = pd.DataFrame({
'year': [2023, 2023, 2023, 2024, 2024, 2024],
'region': ['North', 'South', 'North', 'South', 'North', 'South'],
'sales': [100, 150, 120, 200, 180, 210]
})
# Group by multiple columns
result = df.groupby(['year', 'region'])['sales'].sum()
# Named aggregations
result = df.groupby(['year', 'region']).agg(
total=('sales', 'sum'),
average=('sales', 'mean'),
count=('sales', 'count')
)
# Transform: add group values as new column
df['year_region_avg'] = df.groupby(['year', 'region'])['sales'].transform('mean')
# Rank within groups
df['rank'] = df.groupby('year')['sales'].rank(ascending=False)import pandas as pd
df = pd.DataFrame({
'date': pd.date_range('2024-01-01', periods=31),
'value': range(1, 32)
})
df.set_index('date', inplace=True)
# Resample to weekly
weekly = df.resample('W').sum()
# Resample to monthly with multiple aggregations
monthly = df.resample('M').agg({
'value': ['sum', 'mean', 'std']
})
# Resample with custom aggregation
quarterly = df.resample('Q').agg({
'value': lambda x: x.nlargest(3).sum() # Sum of top 3 values
})
# Resample with label and closed position
weekly = df.resample('W', label='left', closed='left').sum()import pandas as pd
df = pd.DataFrame({
'group': ['A', 'A', 'B', 'B', 'C', 'C'],
'value': [10, 20, 15, 25, 30, 5]
})
# Keep only groups with sum > 30
result = df.groupby('group').filter(lambda x: x['value'].sum() > 30)
# Keep groups with mean > 20
result = df.groupby('group').filter(lambda x: x['value'].mean() > 20)
# Keep groups with more than 1 element where value > 15
result = df.groupby('group').filter(
lambda x: (len(x) > 1) & (x['value'] > 15).any()
)import pandas as pd
df = pd.DataFrame({
'date': ['2024-01-01', '2024-01-01', '2024-01-02', '2024-01-02'],
'product': ['A', 'B', 'A', 'B'],
'quantity': [10, 20, 15, 25],
'price': [100, 50, 100, 50]
})
# Pivot with multiple aggregations
pivot = df.pivot_table(
values=['quantity', 'price'],
index='date',
columns='product',
aggfunc={'quantity': 'sum', 'price': 'mean'},
fill_value=0
)
# Margins (totals)
pivot_margins = df.pivot_table(
values='quantity',
index='date',
columns='product',
aggfunc='sum',
margins=True,
margins_name='Total'
)import pandas as pd
df = pd.DataFrame({
'group': ['A', 'A', 'A', 'B', 'B', 'B'],
'value': [100, 150, 120, 200, 180, 210]
})
# Top N per group
top_per_group = df.groupby('group').apply(
lambda x: x.nlargest(2, 'value')
).reset_index(drop=True)
# Bottom N per group
bottom_per_group = df.groupby('group').apply(
lambda x: x.nsmallest(1, 'value')
).reset_index(drop=True)Continue: Advanced Data Cleaning | Advanced NumPy
Resources
Ojasa Mirai
Master AI-powered development skills through structured learning, real projects, and verified credentials. Whether you're upskilling your team or launching your career, we deliver the skills companies actually need.
Learn Deep • Build Real • Verify Skills • Launch Forward