
Python
Nested loops power multi-dimensional algorithms. Mastering them requires understanding Big O analysis, matrix operations, and sophisticated optimization techniques.
# O(n) - Linear
def linear_loop(n):
for i in range(n):
print(i)
# O(n²) - Nested loops
def quadratic_loop(n):
for i in range(n):
for j in range(n):
print(i, j)
# O(n³) - Triple nested
def cubic_loop(n):
for i in range(n):
for j in range(n):
for k in range(n):
print(i, j, k)
# O(n log n) - Hybrid
def nlogn_loop(n):
for i in range(n):
j = 1
while j < n:
print(i, j)
j *= 2 # Exponential jump
# O(n²) with early termination (still O(n²) worst case)
def optimized_quadratic(n):
count = 0
for i in range(n):
for j in range(i): # Inner loop depends on i
count += 1
return count # Returns n(n-1)/2 ≈ O(n²)
# O(log n * log n) - Geometric progress
def logarithmic_nested(n):
i = 1
while i < n:
j = 1
while j < n:
print(i, j)
j *= 2
i *= 2
# Practical complexity comparison
import timeit
def compare_complexities(n=100):
"""Compare actual execution times."""
operations = {
'O(n)': lambda: sum(1 for i in range(n)),
'O(n²)': lambda: sum(1 for i in range(n) for j in range(n)),
'O(n³)': lambda: sum(1 for i in range(n) for j in range(n) for k in range(n)),
}
for name, op in operations.items():
time_taken = timeit.timeit(op, number=10)
print(f"{name} with n={n}: {time_taken:.4f}s")def analyze_operations(n):
"""Count total operations in various patterns."""
# Pattern 1: Fixed iterations
# Operations: n × m = O(n*m)
count1 = sum(1 for i in range(n) for j in range(100))
print(f"Fixed inner loop: {count1} operations")
# Pattern 2: Dependent bounds (triangular)
# Operations: 1 + 2 + 3 + ... + n = n(n+1)/2 = O(n²)
count2 = sum(1 for i in range(n) for j in range(i + 1))
print(f"Triangular loop: {count2} operations = {n*(n+1)//2}")
# Pattern 3: Halving (geometric)
# Operations: n + n/2 + n/4 + ... = O(n)
count3 = 0
i = n
while i >= 1:
count3 += i
i //= 2
print(f"Geometric halving: {count3} operations")
# Pattern 4: Nested geometric
# Operations: O(n log n)
count4 = sum(1 for i in range(n) for _ in iter(lambda j=1: j <= n and not (j := j*2), None))
# Pattern 5: Sequential loops (not nested)
# Operations: n + m = O(n + m)
count5a = sum(1 for i in range(n))
count5b = sum(1 for j in range(n))
count5 = count5a + count5b
print(f"Sequential loops: {count5} operations")
analyze_operations(100)def create_matrix(rows, cols, value=0):
"""Create matrix initialized with value."""
return [[value for _ in range(cols)] for _ in range(rows)]
def matrix_transpose(matrix):
"""Transpose matrix - O(m × n) time, O(m × n) space."""
rows = len(matrix)
cols = len(matrix[0]) if rows > 0 else 0
transposed = [[matrix[j][i] for j in range(rows)] for i in range(cols)]
return transposed
def matrix_add(a, b):
"""Add two matrices - O(m × n)."""
if len(a) != len(b) or len(a[0]) != len(b[0]):
raise ValueError("Matrices must have same dimensions")
rows = len(a)
cols = len(a[0])
result = [[0] * cols for _ in range(rows)]
for i in range(rows):
for j in range(cols):
result[i][j] = a[i][j] + b[i][j]
return result
def matrix_multiply(a, b):
"""
Multiply two matrices - O(n³) for n×n matrices.
a: m × n matrix
b: n × p matrix
result: m × p matrix
"""
rows_a = len(a)
cols_a = len(a[0]) if a else 0
rows_b = len(b)
cols_b = len(b[0]) if b else 0
if cols_a != rows_b:
raise ValueError("Incompatible dimensions for multiplication")
result = [[0] * cols_b for _ in range(rows_a)]
for i in range(rows_a):
for j in range(cols_b):
for k in range(cols_a):
result[i][j] += a[i][k] * b[k][j]
return result
# Usage
A = [[1, 2], [3, 4]]
B = [[5, 6], [7, 8]]
print("A + B =", matrix_add(A, B))
print("A × B =", matrix_multiply(A, B))
print("A^T =", matrix_transpose(A))def matrix_multiply_strassen(a, b):
"""
Strassen's algorithm for matrix multiplication.
Time complexity: O(n^2.81) instead of O(n³)
Better for large matrices (typically n > 64)
"""
def add(x, y):
n = len(x)
return [[x[i][j] + y[i][j] for j in range(n)] for i in range(n)]
def subtract(x, y):
n = len(x)
return [[x[i][j] - y[i][j] for j in range(n)] for i in range(n)]
def strassen_helper(x, y):
n = len(x)
# Base case: use standard multiplication
if n == 1:
return [[x[0][0] * y[0][0]]]
if n == 2:
# Direct multiplication for 2×2
return [[
x[0][0]*y[0][0] + x[0][1]*y[1][0],
x[0][0]*y[0][1] + x[0][1]*y[1][1]
], [
x[1][0]*y[0][0] + x[1][1]*y[1][0],
x[1][0]*y[0][1] + x[1][1]*y[1][1]
]]
# Divide matrices into quadrants
mid = n // 2
# Split A into A11, A12, A21, A22
A11 = [row[:mid] for row in x[:mid]]
A12 = [row[mid:] for row in x[:mid]]
A21 = [row[:mid] for row in x[mid:]]
A22 = [row[mid:] for row in x[mid:]]
# Split B
B11 = [row[:mid] for row in y[:mid]]
B12 = [row[mid:] for row in y[:mid]]
B21 = [row[:mid] for row in y[mid:]]
B22 = [row[mid:] for row in y[mid:]]
# Compute 7 products (Strassen's key insight)
M1 = strassen_helper(add(A11, A22), add(B11, B22))
M2 = strassen_helper(add(A21, A22), B11)
M3 = strassen_helper(A11, subtract(B12, B22))
M4 = strassen_helper(A22, subtract(B21, B11))
M5 = strassen_helper(add(A11, A12), B22)
M6 = strassen_helper(subtract(A21, A11), add(B11, B12))
M7 = strassen_helper(subtract(A12, A22), add(B21, B22))
# Combine results
C11 = add(subtract(add(M1, M4), M5), M7)
C12 = add(M3, M5)
C21 = add(M2, M4)
C22 = add(subtract(add(M1, M3), M2), M6)
# Merge quadrants
result = []
for i in range(mid):
result.append(C11[i] + C12[i])
for i in range(mid):
result.append(C21[i] + C22[i])
return result
# Pad to power of 2 if needed
n = len(a)
if n & (n - 1) != 0: # Not power of 2
size = 1
while size < n:
size *= 2
padded_a = [row + [0] * (size - len(row)) for row in a] + [[0] * size] * (size - n)
padded_b = [row + [0] * (size - len(row)) for row in b] + [[0] * size] * (size - n)
result = strassen_helper(padded_a, padded_b)
return [row[:n] for row in result[:n]]
return strassen_helper(a, b)
# For large matrices, Strassen is faster
# For small matrices (< 64×64), standard multiplication is usually faster due to overheaddef find_path_in_matrix(matrix, target_sum):
"""
Find all paths from top-left to bottom-right with target sum.
Time complexity: O(m × n × 2^max(m,n)) worst case
"""
if not matrix:
return []
rows = len(matrix)
cols = len(matrix[0])
paths = []
current_path = []
def dfs(row, col, current_sum):
"""Depth-first search for paths."""
# Out of bounds or already visited via different path
if row < 0 or row >= rows or col < 0 or col >= cols:
return
current_sum += matrix[row][col]
current_path.append((row, col))
# Reached bottom-right corner
if row == rows - 1 and col == cols - 1:
if current_sum == target_sum:
paths.append(current_path.copy())
else:
# Try moving right or down
dfs(row + 1, col, current_sum)
dfs(row, col + 1, current_sum)
current_path.pop()
dfs(0, 0, 0)
return paths
# Usage
matrix = [[1, 2, 3], [4, 5, 6], [7, 8, 9]]
paths = find_path_in_matrix(matrix, 25)
print(f"Paths with sum 25: {paths}")
def spiral_matrix(matrix):
"""
Traverse matrix in spiral order.
Time complexity: O(m × n)
Space complexity: O(1) (not counting output)
"""
if not matrix:
return []
rows = len(matrix)
cols = len(matrix[0])
result = []
top, bottom = 0, rows - 1
left, right = 0, cols - 1
while top <= bottom and left <= right:
# Traverse right
for col in range(left, right + 1):
result.append(matrix[top][col])
top += 1
# Traverse down
for row in range(top, bottom + 1):
result.append(matrix[row][right])
right -= 1
# Traverse left
if top <= bottom:
for col in range(right, left - 1, -1):
result.append(matrix[bottom][col])
bottom -= 1
# Traverse up
if left <= right:
for row in range(bottom, top - 1, -1):
result.append(matrix[row][left])
left += 1
return result
# Usage
matrix = [[1, 2, 3], [4, 5, 6], [7, 8, 9]]
print("Spiral:", spiral_matrix(matrix))
# Output: [1, 2, 3, 6, 9, 8, 7, 4, 5]def matrix_multiply_naive(a, b):
"""Standard nested loop order."""
n = len(a)
c = [[0] * n for _ in range(n)]
for i in range(n):
for j in range(n):
for k in range(n):
c[i][j] += a[i][k] * b[k][j]
return c
def matrix_multiply_optimized(a, b):
"""Reordered loops for cache locality."""
n = len(a)
c = [[0] * n for _ in range(n)]
# Different loop order: k-i-j instead of i-j-k
# Improves cache performance
for k in range(n):
for i in range(n):
for j in range(n):
c[i][j] += a[i][k] * b[k][j]
return c
def matrix_multiply_blocked(a, b, block_size=32):
"""
Block matrix multiplication for cache optimization.
Process matrices in blocks to improve cache hit rate.
"""
n = len(a)
c = [[0] * n for _ in range(n)]
for i in range(0, n, block_size):
for j in range(0, n, block_size):
for k in range(0, n, block_size):
# Multiply blocks
for ii in range(i, min(i + block_size, n)):
for jj in range(j, min(j + block_size, n)):
for kk in range(k, min(k + block_size, n)):
c[ii][jj] += a[ii][kk] * b[kk][jj]
return c
# Performance comparison
import time
n = 512
a = [[i+j for j in range(n)] for i in range(n)]
b = [[i*j for j in range(n)] for i in range(n)]
start = time.time()
matrix_multiply_naive(a, b)
naive_time = time.time() - start
start = time.time()
matrix_multiply_optimized(a, b)
optimized_time = time.time() - start
print(f"Naive: {naive_time:.3f}s")
print(f"Optimized: {optimized_time:.3f}s")
print(f"Speedup: {naive_time/optimized_time:.2f}x")def sum_array_standard(arr):
"""Standard loop."""
total = 0
for x in arr:
total += x
return total
def sum_array_unrolled_2(arr):
"""Unroll loop by 2 - process 2 elements per iteration."""
total = 0
i = 0
# Process pairs
while i + 1 < len(arr):
total += arr[i] + arr[i + 1]
i += 2
# Handle remainder
if i < len(arr):
total += arr[i]
return total
def sum_array_unrolled_4(arr):
"""Unroll loop by 4."""
total = 0
i = 0
while i + 3 < len(arr):
total += arr[i] + arr[i + 1] + arr[i + 2] + arr[i + 3]
i += 4
# Handle remainder
while i < len(arr):
total += arr[i]
i += 1
return total
# Performance impact (usually modest in Python)
import timeit
data = list(range(10000))
std_time = timeit.timeit(lambda: sum_array_standard(data), number=1000)
unroll2_time = timeit.timeit(lambda: sum_array_unrolled_2(data), number=1000)
unroll4_time = timeit.timeit(lambda: sum_array_unrolled_4(data), number=1000)
print(f"Standard: {std_time:.4f}s")
print(f"Unroll 2: {unroll2_time:.4f}s")
print(f"Unroll 4: {unroll4_time:.4f}s")def convolve_2d(image, kernel):
"""
2D convolution operation commonly used in image processing.
Time complexity: O((H-Kh+1) × (W-Kw+1) × Kh × Kw)
"""
img_height, img_width = len(image), len(image[0])
ker_height, ker_width = len(kernel), len(kernel[0])
output_height = img_height - ker_height + 1
output_width = img_width - ker_width + 1
output = [[0] * output_width for _ in range(output_height)]
for i in range(output_height):
for j in range(output_width):
# Apply kernel
for ki in range(ker_height):
for kj in range(ker_width):
output[i][j] += image[i + ki][j + kj] * kernel[ki][kj]
return output
# Edge detection example
sobel_x = [[-1, 0, 1], [-2, 0, 2], [-1, 0, 1]]
image = [
[10, 20, 30],
[40, 50, 60],
[70, 80, 90]
]
edges = convolve_2d(image, sobel_x)
print("Edges detected:", edges)| Pattern | Complexity | Use Case | Optimization |
|---|---|---|---|
| Fixed nested | O(m×n) | Matrix operations | Loop fusion |
| Triangular | O(n²) | Comparisons | Early termination |
| Geometric | O(n log n) | Divide-and-conquer | Block processing |
| Triple nested | O(n³) | Intensive ops | Strassen/Cache blocking |
| Spiral traversal | O(m×n) | Grid operations | Pointer manipulation |
Explore these advanced topics:
Master nested loops and optimize for real-world performance!
Resources
Ojasa Mirai
Master AI-powered development skills through structured learning, real projects, and verified credentials. Whether you're upskilling your team or launching your career, we deliver the skills companies actually need.
Learn Deep • Build Real • Verify Skills • Launch Forward