
Python
Custom iterators and lazy evaluation are fundamental to writing efficient, scalable Python code. This section explores implementing custom iterators, generator protocols, and memory optimization strategies.
# Every iterator must implement two methods:
# 1. __iter__(): Returns self (the iterator object)
# 2. __next__(): Returns next value or raises StopIteration
class CountUp:
"""Custom iterator that counts up to a limit."""
def __init__(self, max):
self.max = max
self.current = 0
def __iter__(self):
"""Return the iterator object (self)."""
return self
def __next__(self):
"""Return next value or raise StopIteration."""
if self.current < self.max:
self.current += 1
return self.current
else:
raise StopIteration
# Using the custom iterator
counter = CountUp(3)
for num in counter:
print(num) # Output: 1, 2, 3
# More efficient: create fresh iterator each time
for num in CountUp(3):
print(num)
# Convert to list
numbers = list(CountUp(5))
print(numbers) # [1, 2, 3, 4, 5]class Fibonacci:
"""Iterable that generates Fibonacci numbers."""
def __init__(self, max_terms):
self.max_terms = max_terms
def __iter__(self):
"""Return a new iterator."""
return FibonacciIterator(self.max_terms)
class FibonacciIterator:
"""Iterator implementation."""
def __init__(self, max_terms):
self.max_terms = max_terms
self.current = 0
self.a, self.b = 0, 1
def __iter__(self):
return self
def __next__(self):
if self.current < self.max_terms:
value = self.a
self.a, self.b = self.b, self.a + self.b
self.current += 1
return value
else:
raise StopIteration
# Iterable advantage: can iterate multiple times
fib = Fibonacci(5)
for num in fib:
print(num, end=' ') # 0 1 1 2 3
print()
for num in fib: # Works again!
print(num, end=' ') # 0 1 1 2 3
# Compare with iterator (exhausted after first use)
class BadFibonacci:
def __init__(self, max_terms):
self.max_terms = max_terms
self.current = 0
self.a, self.b = 0, 1
def __iter__(self):
return self # Returns self, not a new iterator
def __next__(self):
if self.current < self.max_terms:
value = self.a
self.a, self.b = self.b, self.a + self.b
self.current += 1
return value
else:
raise StopIteration
bad_fib = BadFibonacci(3)
list1 = list(bad_fib) # [0, 1, 1]
list2 = list(bad_fib) # [] - exhausted!class WindowIterator:
"""Iterates over overlapping windows of fixed size."""
def __init__(self, iterable, window_size):
self.iterable = iterable
self.window_size = window_size
def __iter__(self):
return WindowIteratorImpl(self.iterable, self.window_size)
class WindowIteratorImpl:
def __init__(self, iterable, window_size):
self.iterator = iter(iterable)
self.window_size = window_size
self.window = []
# Fill initial window
try:
for _ in range(window_size):
self.window.append(next(self.iterator))
except StopIteration:
raise ValueError(f"Iterable smaller than window size {window_size}")
def __iter__(self):
return self
def __next__(self):
if len(self.window) < self.window_size:
raise StopIteration
result = tuple(self.window)
try:
self.window.append(next(self.iterator))
self.window.pop(0)
except StopIteration:
self.window = []
return result
# Usage
words = ['the', 'quick', 'brown', 'fox', 'jumps']
bigrams = WindowIterator(words, 2)
for window in bigrams:
print(' '.join(window))
# Output:
# the quick
# quick brown
# brown fox
# fox jumps
# Trigrams (3-word windows)
trigrams = WindowIterator(words, 3)
for window in trigrams:
print(' '.join(window))class FlattenIterator:
"""Flattens nested iterables of arbitrary depth."""
def __init__(self, nested_iterable):
self.nested_iterable = nested_iterable
def __iter__(self):
return self._flatten(self.nested_iterable)
def _flatten(self, iterable):
"""Recursively flatten nested iterables."""
for item in iterable:
# Check if item is iterable (but not string)
if isinstance(item, (list, tuple, set)) or (
hasattr(item, '__iter__') and not isinstance(item, (str, bytes))
):
# Recursively flatten
yield from self._flatten(item)
else:
yield item
# Usage
nested = [1, [2, 3, [4, 5]], 6, [7, [8, 9]]]
flattened = FlattenIterator(nested)
print(list(flattened))
# Output: [1, 2, 3, 4, 5, 6, 7, 8, 9]
# Works with mixed types
mixed = [1, ['a', 'b'], [2, [3, 4]], 'x']
for item in FlattenIterator(mixed):
print(item, end=' ')
# Output: 1 a b 2 3 4 xclass ZipLongest:
"""Like itertools.zip_longest but with custom implementation."""
def __init__(self, *iterables, fillvalue=None):
self.iterables = iterables
self.fillvalue = fillvalue
def __iter__(self):
iterators = [iter(it) for it in self.iterables]
active = list(range(len(iterators)))
values = [None] * len(iterators)
while active:
for i in list(active):
try:
values[i] = next(iterators[i])
except StopIteration:
values[i] = self.fillvalue
active.remove(i)
if active or any(v is not self.fillvalue for v in values):
yield tuple(values)
# Usage
list1 = [1, 2, 3]
list2 = ['a', 'b']
list3 = [10, 20, 30, 40]
for values in ZipLongest(list1, list2, list3, fillvalue='X'):
print(values)
# Output:
# (1, 'a', 10)
# (2, 'b', 20)
# (3, 'X', 30)
# ('X', 'X', 40)class FilterIterator:
"""Custom filter iterator with additional features."""
def __init__(self, predicate, iterable, cache_rejections=False):
self.predicate = predicate
self.iterable = iterable
self.cache_rejections = cache_rejections
self.rejected_items = []
def __iter__(self):
for item in self.iterable:
if self.predicate(item):
yield item
elif self.cache_rejections:
self.rejected_items.append(item)
# Usage
numbers = range(10)
even_filter = FilterIterator(lambda x: x % 2 == 0, numbers, cache_rejections=True)
evens = list(even_filter)
print(f"Even numbers: {evens}")
print(f"Odd numbers (rejected): {even_filter.rejected_items}")# Generator function - uses yield
def countdown(n):
"""Generator that counts down from n."""
while n > 0:
yield n
n -= 1
# Generators are lazy - values computed on-demand
gen = countdown(5)
print(next(gen)) # 5
print(next(gen)) # 4
print(next(gen)) # 3
# Rest of values
print(list(gen)) # [2, 1]
# Generator expressions (also lazy)
squares = (x**2 for x in range(1000000))
first_10_squares = list(itertools.islice(squares, 10))
# Memory comparison
import sys
import itertools
list_comp = [x**2 for x in range(10000)]
gen_exp = (x**2 for x in range(10000))
print(f"List: {sys.getsizeof(list_comp)} bytes")
print(f"Generator: {sys.getsizeof(gen_exp)} bytes")def stateful_generator():
"""Generator that maintains state across yields."""
count = 0
total = 0
while True:
value = yield total
if value is None:
value = 0
count += 1
total += value
# Two-way communication with generators
gen = stateful_generator()
print(next(gen)) # Prime the generator - returns 0
print(gen.send(5)) # Send 5, get running total: 5
print(gen.send(10)) # Send 10, get running total: 15
print(gen.send(3)) # Send 3, get running total: 18
# Usage in data processing
def cumulative_sum_generator():
"""Compute cumulative sum with streaming input."""
total = 0
while True:
value = yield total
if value is None:
break
total += value
# Usage
cum_sum = cumulative_sum_generator()
next(cum_sum) # Prime
results = []
for num in [1, 2, 3, 4, 5]:
results.append(cum_sum.send(num))
print(f"Cumulative sums: {results}")
# Output: [1, 3, 6, 10, 15]def read_data(filename):
"""Stage 1: Read lines from file."""
with open(filename, 'r') as f:
for line in f:
yield line.rstrip('\n')
def parse_csv(lines):
"""Stage 2: Parse CSV lines."""
for line in lines:
fields = line.split(',')
yield fields
def filter_empty(rows):
"""Stage 3: Filter empty rows."""
for row in rows:
if any(field.strip() for field in row):
yield row
def convert_types(rows):
"""Stage 4: Convert field types."""
for row in rows:
try:
# Assume: id, name, age
yield (int(row[0]), row[1].strip(), int(row[2]))
except (ValueError, IndexError):
continue
# Pipeline composition
def process_data_pipeline(filename):
"""Compose all stages into a pipeline."""
lines = read_data(filename)
rows = parse_csv(lines)
clean_rows = filter_empty(rows)
typed_rows = convert_types(clean_rows)
return typed_rows
# Lazy evaluation - only processes what's needed
for person_id, name, age in process_data_pipeline('data.csv'):
if age > 18:
print(f"{name} (ID: {person_id}) is an adult")
# Only processes one row at a time - memory efficient!def delegating_generator():
"""Use yield from to delegate to sub-generators."""
# Yield from first sub-generator
yield from range(3)
# Yield from second sub-generator
yield from ['a', 'b', 'c']
# Yield from nested generator
def sub_gen():
yield 10
yield 20
yield from sub_gen()
result = list(delegating_generator())
print(result) # [0, 1, 2, 'a', 'b', 'c', 10, 20]
# More practical example: Tree traversal
class Node:
def __init__(self, value, children=None):
self.value = value
self.children = children or []
def traverse_tree(node):
"""Traverse tree yielding all values."""
yield node.value
for child in node.children:
yield from traverse_tree(child)
# Build tree
root = Node(1, [
Node(2, [Node(4), Node(5)]),
Node(3, [Node(6)])
])
values = list(traverse_tree(root))
print(f"Tree values: {values}") # [1, 2, 4, 5, 3, 6]import timeit
# Eager evaluation
def eager_process(n):
squares = [x**2 for x in range(n)]
even_squares = [x for x in squares if x % 2 == 0]
return sum(even_squares)
# Lazy evaluation
def lazy_process(n):
squares = (x**2 for x in range(n))
even_squares = (x for x in squares if x % 2 == 0)
return sum(even_squares)
# Performance comparison
n = 1000000
eager_time = timeit.timeit(lambda: eager_process(n), number=1)
lazy_time = timeit.timeit(lambda: lazy_process(n), number=1)
print(f"Eager: {eager_time:.4f}s")
print(f"Lazy: {lazy_time:.4f}s")
print(f"Speedup: {eager_time/lazy_time:.2f}x")
# Memory comparison
import tracemalloc
tracemalloc.start()
eager_result = eager_process(100000)
eager_mem = tracemalloc.get_traced_memory()[1]
tracemalloc.stop()
tracemalloc.start()
lazy_result = lazy_process(100000)
lazy_mem = tracemalloc.get_traced_memory()[1]
print(f"Eager memory: {eager_mem / 1024:.1f} KB")
print(f"Lazy memory: {lazy_mem / 1024:.1f} KB")def read_large_file(filepath, chunk_size=8192):
"""Lazily read large file in chunks."""
with open(filepath, 'rb') as f:
while True:
chunk = f.read(chunk_size)
if not chunk:
break
yield chunk
def process_file_lazily(filepath):
"""Process large file without loading into memory."""
for chunk in read_large_file(filepath):
# Process chunk without storing entire file
lines = chunk.decode('utf-8').split('\n')
for line in lines:
if line.strip():
yield process_line(line)
def process_line(line):
return line.strip().upper()
# Memory usage is constant regardless of file size
# Example: Process 1GB file with minimal memory overheaddef memoized_generator(fn):
"""Cache results of generator function."""
cache = {}
def wrapper(*args):
if args not in cache:
cache[args] = list(fn(*args))
return iter(cache[args])
return wrapper
@memoized_generator
def expensive_generator(n):
"""Generator that's expensive to compute."""
print(f"Computing generator for n={n}")
for i in range(n):
yield i**2
# First call computes and caches
print(list(expensive_generator(5)))
# Output: Computing generator for n=5, [0, 1, 4, 9, 16]
# Subsequent calls use cache (no "Computing..." message)
print(list(expensive_generator(5)))
# Output: [0, 1, 4, 9, 16]def enumerate_with_info(iterable):
"""Enhanced enumerate with additional information."""
items = list(iterable)
total = len(items)
for index, item in enumerate(items):
is_first = (index == 0)
is_last = (index == total - 1)
is_even = (index % 2 == 0)
progress = (index + 1) / total
yield {
'index': index,
'item': item,
'is_first': is_first,
'is_last': is_last,
'is_even': is_even,
'progress': progress,
'total': total
}
# Usage
items = ['apple', 'banana', 'cherry']
for info in enumerate_with_info(items):
print(f"[{info['progress']:.0%}] {info['index']}: {info['item']}")
if info['is_last']:
print("Done!")class LookaheadIterator:
"""Iterator that can peek at the next element."""
def __init__(self, iterable):
self.iterator = iter(iterable)
self.next_value = None
self.has_next = False
self._advance()
def _advance(self):
try:
self.next_value = next(self.iterator)
self.has_next = True
except StopIteration:
self.has_next = False
def __iter__(self):
return self
def __next__(self):
if not self.has_next:
raise StopIteration
value = self.next_value
self._advance()
return value
def peek(self):
"""Peek at next value without consuming it."""
return self.next_value if self.has_next else None
# Usage
numbers = [1, 2, 3, 4, 5]
lookahead = LookaheadIterator(numbers)
for num in lookahead:
next_num = lookahead.peek()
if next_num and next_num > num:
print(f"{num} < {next_num}")
else:
print(f"{num} is last or equal")| Pattern | Memory | Speed | Complexity | Best For |
|---|---|---|---|---|
| List | High | Fast | Low | Small collections |
| Generator | Very Low | Very Fast | Low | Large/infinite sequences |
| Custom Iterator | Flexible | Fast | Medium | Specialized iteration |
| Lazy Pipeline | Very Low | Medium | Medium | Data processing |
| Eager Caching | High | Very Fast | Medium | Repeated access |
| Window Iterator | Low | Fast | Medium | Sliding windows |
Explore these advanced topics:
Master custom iterators and unlock Python's lazy evaluation power!
Resources
Ojasa Mirai
Master AI-powered development skills through structured learning, real projects, and verified credentials. Whether you're upskilling your team or launching your career, we deliver the skills companies actually need.
Learn Deep • Build Real • Verify Skills • Launch Forward