Ojasa Mirai

Python

Learning Level

Data Processing Overview CSV Data Handling Pandas Basics DataFrames Data Filtering Aggregation & Grouping Data Cleaning & Wrangling NumPy Arrays Data Visualization Basics

Python/Data Processing/Numpy Arrays

🔢 Advanced NumPy — Scientific Computing Mastery

Master advanced indexing, broadcasting rules, and optimize numerical computations.

🎯 Advanced Indexing

import numpy as np

arr = np.arange(20).reshape(4, 5)

# Fancy indexing (integer arrays)
rows = np.array([0, 2, 3])
cols = np.array([1, 3, 4])
result = arr[rows, cols]  # Select (0,1), (2,3), (3,4)

# Boolean indexing with 2D conditions
mask = arr > 10
result = arr[mask]  # 1D array of values > 10

# Advanced boolean indexing
result = arr[(arr > 5) & (arr < 15)]

# Conditional indexing
result = np.where(arr > 10, arr, 0)  # Replace values

# Using np.ix_ for orthogonal indexing
rows = np.array([0, 2])
cols = np.array([1, 3, 4])
result = arr[np.ix_(rows, cols)]  # Orthogonal (2, 3) result

📊 Advanced Broadcasting

import numpy as np

# Broadcasting rules
a = np.array([[1, 2, 3]])        # (1, 3)
b = np.array([[1], [2], [3]])    # (3, 1)
result = a + b                     # (3, 3) via broadcasting

# Broadcasting examples
x = np.arange(4)                  # (4,)
y = np.arange(3).reshape(3, 1)    # (3, 1)
result = x + y                     # (3, 4)

# Practical: normalize columns
data = np.array([[1, 10, 100],
                 [2, 20, 200],
                 [3, 30, 300]])

column_means = data.mean(axis=0)   # (3,)
normalized = data - column_means    # Broadcasting

🔄 Memory-Efficient Operations

import numpy as np

# Avoid copying data
original = np.arange(10)
view = original[2:5]               # View, not copy
view[0] = 999                       # Modifies original!

# Check if array owns data
print(view.flags['OWNDATA'])        # False

# Explicit copy
copy = original.copy()

# In-place operations
arr = np.arange(10)
arr *= 2                            # No new array created
arr += 10                           # No new array created

# Memory layout affects performance
arr_c = np.array([[1, 2], [3, 4]], order='C')  # Row-major (fast row iteration)
arr_f = np.array([[1, 2], [3, 4]], order='F')  # Column-major (fast column iteration)

# Using dtype to reduce memory
arr = np.array([1, 2, 3, 256], dtype=np.uint8)  # 1 byte per element

🎯 Universal Functions (ufuncs)

import numpy as np

arr = np.array([1, 2, 3, 4, 5])

# Built-in ufuncs
result = np.sqrt(arr)              # Element-wise sqrt
result = np.exp(arr)               # Element-wise exponential
result = np.log(arr)               # Element-wise natural log
result = np.sin(arr)               # Element-wise sine

# Ufunc methods
np.add.reduce(arr)                 # Sum (like reduce)
np.multiply.accumulate(arr)        # Cumulative product
np.add.outer(arr, arr)             # Outer product

# Custom ufunc
def custom_func(x, y):
    return x**2 + y**2

ufunc = np.frompyfunc(custom_func, 2, 1)
result = ufunc(np.array([1, 2, 3]), np.array([4, 5, 6]))

📈 Linear Algebra Operations

import numpy as np
from numpy.linalg import matrix_rank, inv, eigvals, qr, svd

# Matrix rank
A = np.array([[1, 2], [3, 4]])
rank = matrix_rank(A)

# Matrix inverse
A_inv = inv(A)
print(np.dot(A, A_inv))            # Identity matrix

# Eigenvalues and eigenvectors
eigenvalues = eigvals(A)

# QR decomposition
Q, R = qr(A)

# Singular Value Decomposition (SVD)
U, s, Vt = svd(A)

# Solve linear system Ax = b
A = np.array([[3, 1], [1, 2]])
b = np.array([9, 8])
x = np.linalg.solve(A, b)

🎨 Structured Arrays

import numpy as np

# Define structured dtype
dt = np.dtype([('name', 'U10'), ('age', 'i4'), ('salary', 'f4')])

# Create structured array
employees = np.array([
    ('Alice', 25, 50000),
    ('Bob', 30, 60000),
    ('Carol', 28, 55000)
], dtype=dt)

# Access fields
print(employees['name'])            # All names
print(employees['age'])             # All ages
print(employees[0]['salary'])       # First employee's salary

# Field operations
employees['salary'] *= 1.1          # Raise all salaries

🔀 Stacking and Concatenation

import numpy as np

a = np.array([[1, 2], [3, 4]])
b = np.array([[5, 6], [7, 8]])

# Stack vertically (row-wise)
vstacked = np.vstack([a, b])

# Stack horizontally (column-wise)
hstacked = np.hstack([a, b])

# Stack along new axis
stacked = np.stack([a, b], axis=0)  # (2, 2, 2)
stacked = np.stack([a, b], axis=1)  # (2, 2, 2) different order

# Concatenate
concatenated = np.concatenate([a, b], axis=0)

# Tile (repeat array)
tiled = np.tile(a, (2, 2))          # Repeat 2x2

📊 Real-World Example: Image Processing

import numpy as np

# Simulate image (100x100 RGB)
image = np.random.randint(0, 256, (100, 100, 3), dtype=np.uint8)

# Convert to grayscale
grayscale = np.dot(image[...,:3], [0.299, 0.587, 0.114])

# Blur using convolution
kernel = np.ones((3, 3)) / 9
from scipy.ndimage import convolve
blurred = convolve(grayscale, kernel)

# Edge detection (Sobel)
from scipy.ndimage import sobel
edges = sobel(grayscale)

# Normalize to 0-1
normalized = (edges - edges.min()) / (edges.max() - edges.min())

⚡ Performance Optimization

import numpy as np
import timeit

# Vectorized vs loop
def loop_sum(n):
    total = 0
    for i in range(n):
        total += i
    return total

def vectorized_sum(n):
    return np.sum(np.arange(n))

# Timing
n = 1000000
time_loop = timeit.timeit(lambda: loop_sum(n), number=1)
time_vec = timeit.timeit(lambda: vectorized_sum(n), number=1)

print(f"Loop: {time_loop:.4f}s")
print(f"Vectorized: {time_vec:.4f}s")
print(f"Speedup: {time_loop/time_vec:.1f}x")

# Use appropriate dtypes
int32_arr = np.arange(1000000, dtype=np.int32)
int64_arr = np.arange(1000000, dtype=np.int64)

# Contiguous arrays are faster
arr_c = np.array([[1, 2, 3], [4, 5, 6]], order='C')
arr_f = np.array([[1, 2, 3], [4, 5, 6]], order='F')
print(arr_c.flags['C_CONTIGUOUS'])
print(arr_f.flags['F_CONTIGUOUS'])

🔑 Key Takeaways

✅ Use fancy indexing for advanced selection

✅ Understand broadcasting rules to avoid unnecessary copies

✅ Use in-place operations for memory efficiency

✅ Master ufuncs for element-wise operations

✅ Use linear algebra functions for matrix operations

✅ Use structured arrays for heterogeneous data

✅ Vectorize code for significant performance gains

✅ Choose appropriate dtypes and memory layouts

Continue: Advanced Visualization

Resources

Python Docs

Ojasa Mirai

Master AI-powered development skills through structured learning, real projects, and verified credentials. Whether you're upskilling your team or launching your career, we deliver the skills companies actually need.

Learn Deep • Build Real • Verify Skills • Launch Forward

Courses

Python Fastapi ReactJS Cloud

Resources

Blog & Articles GitHub Projects Video Tutorials

Ecosystem

Ojasa Mirai Site My Growth Learning Portal Community Discord

Twitter GitHub LinkedIn