
Python
Learn to filter DataFrames to extract only the data you need using conditions and criteria.
Filter rows based on a single condition.
import pandas as pd
df = pd.DataFrame({
'name': ['Alice', 'Bob', 'Carol', 'David'],
'age': [25, 30, 28, 35],
'salary': [50000, 60000, 55000, 70000]
})
# Filter: age > 28
older = df[df['age'] > 28]
print(older)
# Filter: salary > 55000
high_earners = df[df['salary'] > 55000]
print(high_earners)
# Filter: name starts with 'C'
names_c = df[df['name'].str.startswith('C')]
print(names_c)Combine conditions with `&` (and) and `|` (or).
import pandas as pd
df = pd.DataFrame({
'name': ['Alice', 'Bob', 'Carol', 'David'],
'age': [25, 30, 28, 35],
'department': ['Sales', 'IT', 'Sales', 'IT'],
'salary': [50000, 60000, 55000, 70000]
})
# AND: age > 25 AND department = 'Sales'
result = df[(df['age'] > 25) & (df['department'] == 'Sales')]
print(result)
# OR: department = 'IT' OR salary > 55000
result = df[(df['department'] == 'IT') | (df['salary'] > 55000)]
print(result)
# NOT: NOT (age = 30)
result = df[~(df['age'] == 30)]
print(result)
# Complex: (age > 25 AND department = 'Sales') OR salary > 65000
result = df[((df['age'] > 25) & (df['department'] == 'Sales')) | (df['salary'] > 65000)]Select rows where a column matches any value in a list.
import pandas as pd
df = pd.DataFrame({
'name': ['Alice', 'Bob', 'Carol', 'David'],
'city': ['New York', 'London', 'Paris', 'Berlin'],
'salary': [50000, 60000, 55000, 70000]
})
# Filter: city in specific list
cities_of_interest = ['New York', 'Paris']
result = df[df['city'].isin(cities_of_interest)]
print(result)
# Filter: salary NOT in range
excluded_salaries = [50000, 70000]
result = df[~df['salary'].isin(excluded_salaries)]Filter text data using string methods.
import pandas as pd
df = pd.DataFrame({
'name': ['Alice', 'Bob', 'Carol', 'David'],
'email': ['alice@example.com', 'bob@test.com', 'carol@example.com', 'david@test.com']
})
# Contains
gmail = df[df['email'].str.contains('example')]
# Starts with
starts_a = df[df['name'].str.startswith('A')]
# Ends with
ends_id = df[df['name'].str.endswith('id')]
# Case-insensitive
upper_c = df[df['name'].str.upper().str.startswith('C')]
# Exact match (case-sensitive)
exact = df[df['name'] == 'Alice']Select values within a range.
import pandas as pd
df = pd.DataFrame({
'name': ['Alice', 'Bob', 'Carol', 'David'],
'age': [25, 30, 28, 35],
'salary': [50000, 60000, 55000, 70000]
})
# Between (inclusive)
age_range = df[df['age'].between(26, 32)]
print(age_range)
# Salary in range
salary_range = df[df['salary'].between(55000, 65000)]import pandas as pd
# Customer data
customers = {
'customer_id': [1, 2, 3, 4, 5],
'name': ['Alice', 'Bob', 'Carol', 'David', 'Eve'],
'age': [25, 45, 32, 28, 55],
'location': ['NYC', 'LA', 'NYC', 'Chicago', 'LA'],
'total_purchases': [1500, 8000, 3000, 2000, 5000]
}
df = pd.DataFrame(customers)
# VIP customers: >5000 in purchases
vips = df[df['total_purchases'] > 5000]
print(f"VIP customers: {len(vips)}")
# Young customers in NYC
young_nyc = df[(df['age'] < 35) & (df['location'] == 'NYC')]
print(f"Young customers in NYC: {young_nyc}")
# Big spenders age 30+
big_spenders_30plus = df[(df['age'] >= 30) & (df['total_purchases'] > 3000)]
print(f"Big spenders 30+: {len(big_spenders_30plus)}")
# Customers from NYC or LA with purchases > 2000
target = df[df['location'].isin(['NYC', 'LA']) & (df['total_purchases'] > 2000)]
print(f"Target customers: {len(target)}")import pandas as pd
df = pd.DataFrame({
'name': ['Alice', 'Bob', 'Carol'],
'age': [25, 30, 28]
})
# loc: label-based (condition-based)
result = df.loc[df['age'] > 26]
# iloc: position-based
result = df.iloc[0:2] # First two rows
# loc with multiple criteria
result = df.loc[(df['age'] > 25) & (df['name'] != 'Bob')]| Method | Purpose | Example | |
|---|---|---|---|
| Comparison | Simple condition | `df[df['age'] > 25]` | |
| `isin()` | Match multiple values | `df[df['city'].isin(['NYC', 'LA'])]` | |
| `between()` | Range check | `df[df['age'].between(25, 35)]` | |
| String methods | Text filtering | `df[df['name'].str.startswith('A')]` | |
| `&`, `\ | `, `~` | Combine conditions | `(df['a'] > 1) & (df['b'] < 10)` |
Continue: Aggregation & Grouping | Data Cleaning
Resources
Ojasa Mirai
Master AI-powered development skills through structured learning, real projects, and verified credentials. Whether you're upskilling your team or launching your career, we deliver the skills companies actually need.
Learn Deep • Build Real • Verify Skills • Launch Forward