
Python
Profile modules, optimize imports, and implement advanced caching strategies.
Measure and analyze import performance in detail.
import sys
import time
import importlib.util
from typing import Dict, List, Tuple
class ImportProfiler:
"""Profile module imports."""
def __init__(self):
self.import_times: Dict[str, float] = {}
self.module_sizes: Dict[str, int] = {}
self.dependency_graph: Dict[str, List[str]] = {}
def profile_import(self, module_name: str) -> float:
"""Profile single import."""
start = time.perf_counter()
# Clear from cache to force fresh import
if module_name in sys.modules:
del sys.modules[module_name]
importlib.import_module(module_name)
elapsed = time.perf_counter() - start
self.import_times[module_name] = elapsed
return elapsed
def profile_all_imports(self, modules: List[str]) -> Dict[str, float]:
"""Profile multiple imports."""
results = {}
for module in modules:
try:
elapsed = self.profile_import(module)
results[module] = elapsed
except ImportError:
results[module] = None
return results
def get_slowest_imports(self, n: int = 10) -> List[Tuple[str, float]]:
"""Get slowest imports."""
sorted_times = sorted(
self.import_times.items(),
key=lambda x: x[1],
reverse=True
)
return sorted_times[:n]
def analyze_import_chain(self, module_name: str):
"""Analyze full import chain."""
import ast
from pathlib import Path
spec = importlib.util.find_spec(module_name)
if not spec or not spec.origin:
return {}
with open(spec.origin) as f:
tree = ast.parse(f.read())
imports = {module_name: []}
for node in ast.walk(tree):
if isinstance(node, ast.Import):
for alias in node.names:
imports[module_name].append(alias.name)
elif isinstance(node, ast.ImportFrom):
if node.module:
imports[module_name].append(node.module)
return imports
# Usage
profiler = ImportProfiler()
modules = ["json", "requests", "numpy", "pandas"]
results = profiler.profile_all_imports(modules)
print("Slowest imports:")
for module, elapsed in profiler.get_slowest_imports(3):
print(f" {module}: {elapsed*1000:.2f}ms")import functools
import hashlib
import pickle
from pathlib import Path
from typing import Callable, Any
class ModuleCache:
"""Advanced caching for module imports."""
def __init__(self, cache_dir: Path = None):
self.cache_dir = cache_dir or Path(".cache")
self.cache_dir.mkdir(exist_ok=True)
self.memory_cache = {}
def _get_cache_key(self, module_name: str) -> str:
"""Generate cache key."""
return hashlib.md5(module_name.encode()).hexdigest()
def _get_cache_path(self, module_name: str) -> Path:
"""Get cache file path."""
key = self._get_cache_key(module_name)
return self.cache_dir / f"{key}.cache"
def get(self, module_name: str) -> Any:
"""Get from cache."""
# Try memory cache first
if module_name in self.memory_cache:
return self.memory_cache[module_name]
# Try disk cache
cache_path = self._get_cache_path(module_name)
if cache_path.exists():
try:
with open(cache_path, "rb") as f:
data = pickle.load(f)
self.memory_cache[module_name] = data
return data
except Exception:
pass
return None
def set(self, module_name: str, data: Any) -> None:
"""Store in cache."""
# Memory cache
self.memory_cache[module_name] = data
# Disk cache
cache_path = self._get_cache_path(module_name)
try:
with open(cache_path, "wb") as f:
pickle.dump(data, f)
except Exception:
pass
def clear(self) -> None:
"""Clear all caches."""
self.memory_cache.clear()
for cache_file in self.cache_dir.glob("*.cache"):
cache_file.unlink()
# Usage
cache = ModuleCache()
# Expensive operation
def load_module_metadata(module_name):
cached = cache.get(module_name)
if cached:
return cached
# Expensive computation
import importlib.util
spec = importlib.util.find_spec(module_name)
metadata = {
"name": module_name,
"file": spec.origin if spec else None,
}
cache.set(module_name, metadata)
return metadatafrom typing import Any
class LazyModuleProxy:
"""Proxy for lazy module loading."""
def __init__(self, module_name: str):
self._module_name = module_name
self._module = None
def _load(self):
"""Load module on first access."""
if self._module is None:
import importlib
self._module = importlib.import_module(self._module_name)
return self._module
def __getattr__(self, name: str) -> Any:
"""Forward attribute access to real module."""
return getattr(self._load(), name)
def __dir__(self):
"""Forward dir() to real module."""
return dir(self._load())
# Usage
# Don't load until needed
numpy_lazy = LazyModuleProxy("numpy")
# Only loads when accessed
array = numpy_lazy.array([1, 2, 3])import sys
import importlib
from pathlib import Path
class MemoryProfiler:
"""Profile memory usage of modules."""
@staticmethod
def get_module_size(module_name: str) -> int:
"""Get approximate size of module."""
module = sys.modules.get(module_name)
if not module:
return 0
return sys.getsizeof(module)
@staticmethod
def get_all_module_sizes() -> dict:
"""Get sizes of all loaded modules."""
sizes = {}
for name, module in sys.modules.items():
if module:
sizes[name] = sys.getsizeof(module)
return sizes
@staticmethod
def analyze_import_memory(module_name: str) -> dict:
"""Analyze memory before and after import."""
import gc
gc.collect()
before = sum(sys.getsizeof(obj) for obj in gc.get_objects())
importlib.import_module(module_name)
after = sum(sys.getsizeof(obj) for obj in gc.get_objects())
return {
"module": module_name,
"before": before,
"after": after,
"difference": after - before
}
@staticmethod
def report_largest_modules(n: int = 10):
"""Report largest loaded modules."""
sizes = MemoryProfiler.get_all_module_sizes()
sorted_modules = sorted(
sizes.items(),
key=lambda x: x[1],
reverse=True
)
print(f"Top {n} largest modules:")
for name, size in sorted_modules[:n]:
print(f" {name}: {size:,} bytes")
# Usage
MemoryProfiler.report_largest_modules(5)import py_compile
import compileall
from pathlib import Path
class BytecodeOptimizer:
"""Optimize bytecode compilation."""
@staticmethod
def compile_module(source_path: str, output_path: str = None):
"""Compile Python module to bytecode."""
py_compile.compile(source_path, output_path, doraise=True)
@staticmethod
def compile_package(package_dir: str, optimize: int = 2):
"""Compile entire package."""
# optimize: 0 = no optimization
# 1 = remove __doc__ strings
# 2 = remove __doc__ and assert statements
compileall.compile_dir(
package_dir,
optimize=optimize,
force=True
)
@staticmethod
def get_bytecode_stats(package_dir: str) -> dict:
"""Analyze bytecode files."""
pyc_files = Path(package_dir).rglob("*.pyc")
stats = {
"count": 0,
"total_size": 0,
"files": []
}
for pyc_file in pyc_files:
size = pyc_file.stat().st_size
stats["count"] += 1
stats["total_size"] += size
stats["files"].append({
"path": str(pyc_file),
"size": size
})
return stats
# Usage
optimizer = BytecodeOptimizer()
optimizer.compile_package("./mypackage", optimize=2)
stats = optimizer.get_bytecode_stats("./mypackage")
print(f"Compiled {stats['count']} files")# Pattern 1: Selective imports based on platform
import sys
if sys.platform == "win32":
from . import windows_specific
else:
from . import unix_specific
# Pattern 2: Conditional heavy imports
def use_numpy():
"""Use numpy only when needed."""
import numpy as np
return np.array([1, 2, 3])
# Pattern 3: Deferred imports in functions
def process_data(data):
"""Only import pandas when needed."""
import pandas as pd
return pd.DataFrame(data)
# Pattern 4: Module __getattr__ for lazy imports
def __getattr__(name):
"""Lazy import submodules."""
if name == "heavy_module":
from . import heavy_module
return heavy_module
raise AttributeError(f"module {__name__!r} has no attribute {name!r}")
# Pattern 5: Entry point optimization
def main():
"""Main entry point with minimal imports."""
import sys
import argparse
parser = argparse.ArgumentParser()
# ... only import heavy modules based on argumentsimport timeit
from typing import Callable, List, Tuple
class ImportBenchmark:
"""Benchmark import performance."""
def __init__(self, modules: List[str]):
self.modules = modules
self.results = {}
def run_benchmark(self, number: int = 3) -> dict:
"""Run import benchmark."""
for module in self.modules:
# Time first import (with module loading)
first = timeit.timeit(
f"import {module}",
setup="",
number=1
)
# Time cached import
cached = timeit.timeit(
f"import {module}",
setup=f"import {module}",
number=number
)
self.results[module] = {
"first_import": first,
"cached_average": cached / number
}
return self.results
def print_report(self):
"""Print benchmark report."""
print("Import Performance Report")
print("=" * 60)
print(f"{'Module':<30} {'First':<15} {'Cached':<15}")
print("-" * 60)
for module, times in sorted(self.results.items()):
first = times["first_import"]
cached = times["cached_average"]
print(f"{module:<30} {first*1000:>8.2f}ms {cached*1000:>10.4f}ms")
# Usage
benchmark = ImportBenchmark(["json", "requests", "numpy", "pandas"])
benchmark.run_benchmark()
benchmark.print_report()Ready to practice? Challenges | Quiz
Resources
Ojasa Mirai
Master AI-powered development skills through structured learning, real projects, and verified credentials. Whether you're upskilling your team or launching your career, we deliver the skills companies actually need.
Learn Deep • Build Real • Verify Skills • Launch Forward