Ojasa Mirai

Ojasa Mirai

Python

Loading...

Learning Level

🟢 Beginner🔵 Advanced
Why Loops?For Loops BasicsWhile Loops BasicsLoop ControlIterating ListsLoop PatternsNested LoopsDebugging LoopsBest Practices
Python/Loops/Nested Loops

Advanced Nested Loops and Matrix Operations

Nested loops power multi-dimensional algorithms. Mastering them requires understanding Big O analysis, matrix operations, and sophisticated optimization techniques.

Understanding Nested Loop Complexity

Big O Analysis of Nested Loops

# O(n) - Linear
def linear_loop(n):
    for i in range(n):
        print(i)

# O(n²) - Nested loops
def quadratic_loop(n):
    for i in range(n):
        for j in range(n):
            print(i, j)

# O(n³) - Triple nested
def cubic_loop(n):
    for i in range(n):
        for j in range(n):
            for k in range(n):
                print(i, j, k)

# O(n log n) - Hybrid
def nlogn_loop(n):
    for i in range(n):
        j = 1
        while j < n:
            print(i, j)
            j *= 2  # Exponential jump

# O(n²) with early termination (still O(n²) worst case)
def optimized_quadratic(n):
    count = 0
    for i in range(n):
        for j in range(i):  # Inner loop depends on i
            count += 1
    return count  # Returns n(n-1)/2 ≈ O(n²)

# O(log n * log n) - Geometric progress
def logarithmic_nested(n):
    i = 1
    while i < n:
        j = 1
        while j < n:
            print(i, j)
            j *= 2
        i *= 2

# Practical complexity comparison
import timeit

def compare_complexities(n=100):
    """Compare actual execution times."""

    operations = {
        'O(n)': lambda: sum(1 for i in range(n)),
        'O(n²)': lambda: sum(1 for i in range(n) for j in range(n)),
        'O(n³)': lambda: sum(1 for i in range(n) for j in range(n) for k in range(n)),
    }

    for name, op in operations.items():
        time_taken = timeit.timeit(op, number=10)
        print(f"{name} with n={n}: {time_taken:.4f}s")

Analyzing Loop Bounds

def analyze_operations(n):
    """Count total operations in various patterns."""

    # Pattern 1: Fixed iterations
    # Operations: n × m = O(n*m)
    count1 = sum(1 for i in range(n) for j in range(100))
    print(f"Fixed inner loop: {count1} operations")

    # Pattern 2: Dependent bounds (triangular)
    # Operations: 1 + 2 + 3 + ... + n = n(n+1)/2 = O(n²)
    count2 = sum(1 for i in range(n) for j in range(i + 1))
    print(f"Triangular loop: {count2} operations = {n*(n+1)//2}")

    # Pattern 3: Halving (geometric)
    # Operations: n + n/2 + n/4 + ... = O(n)
    count3 = 0
    i = n
    while i >= 1:
        count3 += i
        i //= 2
    print(f"Geometric halving: {count3} operations")

    # Pattern 4: Nested geometric
    # Operations: O(n log n)
    count4 = sum(1 for i in range(n) for _ in iter(lambda j=1: j <= n and not (j := j*2), None))

    # Pattern 5: Sequential loops (not nested)
    # Operations: n + m = O(n + m)
    count5a = sum(1 for i in range(n))
    count5b = sum(1 for j in range(n))
    count5 = count5a + count5b
    print(f"Sequential loops: {count5} operations")

analyze_operations(100)

Matrix Operations

Basic Matrix Operations with Loops

def create_matrix(rows, cols, value=0):
    """Create matrix initialized with value."""
    return [[value for _ in range(cols)] for _ in range(rows)]

def matrix_transpose(matrix):
    """Transpose matrix - O(m × n) time, O(m × n) space."""
    rows = len(matrix)
    cols = len(matrix[0]) if rows > 0 else 0

    transposed = [[matrix[j][i] for j in range(rows)] for i in range(cols)]
    return transposed

def matrix_add(a, b):
    """Add two matrices - O(m × n)."""
    if len(a) != len(b) or len(a[0]) != len(b[0]):
        raise ValueError("Matrices must have same dimensions")

    rows = len(a)
    cols = len(a[0])
    result = [[0] * cols for _ in range(rows)]

    for i in range(rows):
        for j in range(cols):
            result[i][j] = a[i][j] + b[i][j]

    return result

def matrix_multiply(a, b):
    """
    Multiply two matrices - O(n³) for n×n matrices.

    a: m × n matrix
    b: n × p matrix
    result: m × p matrix
    """
    rows_a = len(a)
    cols_a = len(a[0]) if a else 0
    rows_b = len(b)
    cols_b = len(b[0]) if b else 0

    if cols_a != rows_b:
        raise ValueError("Incompatible dimensions for multiplication")

    result = [[0] * cols_b for _ in range(rows_a)]

    for i in range(rows_a):
        for j in range(cols_b):
            for k in range(cols_a):
                result[i][j] += a[i][k] * b[k][j]

    return result

# Usage
A = [[1, 2], [3, 4]]
B = [[5, 6], [7, 8]]

print("A + B =", matrix_add(A, B))
print("A × B =", matrix_multiply(A, B))
print("A^T =", matrix_transpose(A))

Optimized Matrix Operations

def matrix_multiply_strassen(a, b):
    """
    Strassen's algorithm for matrix multiplication.

    Time complexity: O(n^2.81) instead of O(n³)
    Better for large matrices (typically n > 64)
    """

    def add(x, y):
        n = len(x)
        return [[x[i][j] + y[i][j] for j in range(n)] for i in range(n)]

    def subtract(x, y):
        n = len(x)
        return [[x[i][j] - y[i][j] for j in range(n)] for i in range(n)]

    def strassen_helper(x, y):
        n = len(x)

        # Base case: use standard multiplication
        if n == 1:
            return [[x[0][0] * y[0][0]]]

        if n == 2:
            # Direct multiplication for 2×2
            return [[
                x[0][0]*y[0][0] + x[0][1]*y[1][0],
                x[0][0]*y[0][1] + x[0][1]*y[1][1]
            ], [
                x[1][0]*y[0][0] + x[1][1]*y[1][0],
                x[1][0]*y[0][1] + x[1][1]*y[1][1]
            ]]

        # Divide matrices into quadrants
        mid = n // 2

        # Split A into A11, A12, A21, A22
        A11 = [row[:mid] for row in x[:mid]]
        A12 = [row[mid:] for row in x[:mid]]
        A21 = [row[:mid] for row in x[mid:]]
        A22 = [row[mid:] for row in x[mid:]]

        # Split B
        B11 = [row[:mid] for row in y[:mid]]
        B12 = [row[mid:] for row in y[:mid]]
        B21 = [row[:mid] for row in y[mid:]]
        B22 = [row[mid:] for row in y[mid:]]

        # Compute 7 products (Strassen's key insight)
        M1 = strassen_helper(add(A11, A22), add(B11, B22))
        M2 = strassen_helper(add(A21, A22), B11)
        M3 = strassen_helper(A11, subtract(B12, B22))
        M4 = strassen_helper(A22, subtract(B21, B11))
        M5 = strassen_helper(add(A11, A12), B22)
        M6 = strassen_helper(subtract(A21, A11), add(B11, B12))
        M7 = strassen_helper(subtract(A12, A22), add(B21, B22))

        # Combine results
        C11 = add(subtract(add(M1, M4), M5), M7)
        C12 = add(M3, M5)
        C21 = add(M2, M4)
        C22 = add(subtract(add(M1, M3), M2), M6)

        # Merge quadrants
        result = []
        for i in range(mid):
            result.append(C11[i] + C12[i])
        for i in range(mid):
            result.append(C21[i] + C22[i])

        return result

    # Pad to power of 2 if needed
    n = len(a)
    if n & (n - 1) != 0:  # Not power of 2
        size = 1
        while size < n:
            size *= 2

        padded_a = [row + [0] * (size - len(row)) for row in a] + [[0] * size] * (size - n)
        padded_b = [row + [0] * (size - len(row)) for row in b] + [[0] * size] * (size - n)

        result = strassen_helper(padded_a, padded_b)
        return [row[:n] for row in result[:n]]

    return strassen_helper(a, b)

# For large matrices, Strassen is faster
# For small matrices (< 64×64), standard multiplication is usually faster due to overhead

Finding Patterns in Matrices

def find_path_in_matrix(matrix, target_sum):
    """
    Find all paths from top-left to bottom-right with target sum.

    Time complexity: O(m × n × 2^max(m,n)) worst case
    """

    if not matrix:
        return []

    rows = len(matrix)
    cols = len(matrix[0])
    paths = []
    current_path = []

    def dfs(row, col, current_sum):
        """Depth-first search for paths."""
        # Out of bounds or already visited via different path
        if row < 0 or row >= rows or col < 0 or col >= cols:
            return

        current_sum += matrix[row][col]
        current_path.append((row, col))

        # Reached bottom-right corner
        if row == rows - 1 and col == cols - 1:
            if current_sum == target_sum:
                paths.append(current_path.copy())
        else:
            # Try moving right or down
            dfs(row + 1, col, current_sum)
            dfs(row, col + 1, current_sum)

        current_path.pop()

    dfs(0, 0, 0)
    return paths

# Usage
matrix = [[1, 2, 3], [4, 5, 6], [7, 8, 9]]
paths = find_path_in_matrix(matrix, 25)
print(f"Paths with sum 25: {paths}")

def spiral_matrix(matrix):
    """
    Traverse matrix in spiral order.

    Time complexity: O(m × n)
    Space complexity: O(1) (not counting output)
    """

    if not matrix:
        return []

    rows = len(matrix)
    cols = len(matrix[0])
    result = []

    top, bottom = 0, rows - 1
    left, right = 0, cols - 1

    while top <= bottom and left <= right:
        # Traverse right
        for col in range(left, right + 1):
            result.append(matrix[top][col])
        top += 1

        # Traverse down
        for row in range(top, bottom + 1):
            result.append(matrix[row][right])
        right -= 1

        # Traverse left
        if top <= bottom:
            for col in range(right, left - 1, -1):
                result.append(matrix[bottom][col])
            bottom -= 1

        # Traverse up
        if left <= right:
            for row in range(bottom, top - 1, -1):
                result.append(matrix[row][left])
            left += 1

    return result

# Usage
matrix = [[1, 2, 3], [4, 5, 6], [7, 8, 9]]
print("Spiral:", spiral_matrix(matrix))
# Output: [1, 2, 3, 6, 9, 8, 7, 4, 5]

Optimizing Nested Loops

Loop Interchange and Reordering

def matrix_multiply_naive(a, b):
    """Standard nested loop order."""
    n = len(a)
    c = [[0] * n for _ in range(n)]

    for i in range(n):
        for j in range(n):
            for k in range(n):
                c[i][j] += a[i][k] * b[k][j]

    return c

def matrix_multiply_optimized(a, b):
    """Reordered loops for cache locality."""
    n = len(a)
    c = [[0] * n for _ in range(n)]

    # Different loop order: k-i-j instead of i-j-k
    # Improves cache performance
    for k in range(n):
        for i in range(n):
            for j in range(n):
                c[i][j] += a[i][k] * b[k][j]

    return c

def matrix_multiply_blocked(a, b, block_size=32):
    """
    Block matrix multiplication for cache optimization.

    Process matrices in blocks to improve cache hit rate.
    """
    n = len(a)
    c = [[0] * n for _ in range(n)]

    for i in range(0, n, block_size):
        for j in range(0, n, block_size):
            for k in range(0, n, block_size):
                # Multiply blocks
                for ii in range(i, min(i + block_size, n)):
                    for jj in range(j, min(j + block_size, n)):
                        for kk in range(k, min(k + block_size, n)):
                            c[ii][jj] += a[ii][kk] * b[kk][jj]

    return c

# Performance comparison
import time

n = 512
a = [[i+j for j in range(n)] for i in range(n)]
b = [[i*j for j in range(n)] for i in range(n)]

start = time.time()
matrix_multiply_naive(a, b)
naive_time = time.time() - start

start = time.time()
matrix_multiply_optimized(a, b)
optimized_time = time.time() - start

print(f"Naive: {naive_time:.3f}s")
print(f"Optimized: {optimized_time:.3f}s")
print(f"Speedup: {naive_time/optimized_time:.2f}x")

Loop Unrolling

def sum_array_standard(arr):
    """Standard loop."""
    total = 0
    for x in arr:
        total += x
    return total

def sum_array_unrolled_2(arr):
    """Unroll loop by 2 - process 2 elements per iteration."""
    total = 0
    i = 0

    # Process pairs
    while i + 1 < len(arr):
        total += arr[i] + arr[i + 1]
        i += 2

    # Handle remainder
    if i < len(arr):
        total += arr[i]

    return total

def sum_array_unrolled_4(arr):
    """Unroll loop by 4."""
    total = 0
    i = 0

    while i + 3 < len(arr):
        total += arr[i] + arr[i + 1] + arr[i + 2] + arr[i + 3]
        i += 4

    # Handle remainder
    while i < len(arr):
        total += arr[i]
        i += 1

    return total

# Performance impact (usually modest in Python)
import timeit

data = list(range(10000))

std_time = timeit.timeit(lambda: sum_array_standard(data), number=1000)
unroll2_time = timeit.timeit(lambda: sum_array_unrolled_2(data), number=1000)
unroll4_time = timeit.timeit(lambda: sum_array_unrolled_4(data), number=1000)

print(f"Standard: {std_time:.4f}s")
print(f"Unroll 2: {unroll2_time:.4f}s")
print(f"Unroll 4: {unroll4_time:.4f}s")

Advanced Nested Loop Patterns

Convolution (Image Processing)

def convolve_2d(image, kernel):
    """
    2D convolution operation commonly used in image processing.

    Time complexity: O((H-Kh+1) × (W-Kw+1) × Kh × Kw)
    """
    img_height, img_width = len(image), len(image[0])
    ker_height, ker_width = len(kernel), len(kernel[0])

    output_height = img_height - ker_height + 1
    output_width = img_width - ker_width + 1

    output = [[0] * output_width for _ in range(output_height)]

    for i in range(output_height):
        for j in range(output_width):
            # Apply kernel
            for ki in range(ker_height):
                for kj in range(ker_width):
                    output[i][j] += image[i + ki][j + kj] * kernel[ki][kj]

    return output

# Edge detection example
sobel_x = [[-1, 0, 1], [-2, 0, 2], [-1, 0, 1]]

image = [
    [10, 20, 30],
    [40, 50, 60],
    [70, 80, 90]
]

edges = convolve_2d(image, sobel_x)
print("Edges detected:", edges)

Key Takeaways

PatternComplexityUse CaseOptimization
Fixed nestedO(m×n)Matrix operationsLoop fusion
TriangularO(n²)ComparisonsEarly termination
GeometricO(n log n)Divide-and-conquerBlock processing
Triple nestedO(n³)Intensive opsStrassen/Cache blocking
Spiral traversalO(m×n)Grid operationsPointer manipulation

What's Next?

Explore these advanced topics:

  • **Debugging Loops (Advanced)**: Profiling and bottleneck detection
  • **Best Practices (Advanced)**: Pythonic idioms and concurrent loops
  • **Loop Patterns (Advanced)**: Algorithms and dynamic programming
  • **Performance Optimization**: Caching and vectorization
  • **Parallel Processing**: NumPy and multiprocessing

Master nested loops and optimize for real-world performance!


Resources

Python Docs

Ojasa Mirai

Master AI-powered development skills through structured learning, real projects, and verified credentials. Whether you're upskilling your team or launching your career, we deliver the skills companies actually need.

Learn Deep • Build Real • Verify Skills • Launch Forward

Courses

PythonFastapiReactJSCloud

© 2026 Ojasa Mirai. All rights reserved.

TwitterGitHubLinkedIn