
Python
Explore advanced search algorithms, performance optimization, and handling complex replacements.
def build_bad_char_table(pattern):
"""Build bad character table for Boyer-Moore"""
table = {}
for i, char in enumerate(pattern):
table[char] = len(pattern) - i - 1
return table
def boyer_moore_search(text, pattern):
"""Boyer-Moore string search - fast for large alphabets"""
if not pattern:
return 0
bad_char = build_bad_char_table(pattern)
i = len(pattern) - 1
j = len(pattern) - 1
while i < len(text):
if text[i] == pattern[j]:
if j == 0:
return i
i -= 1
j -= 1
else:
skip = bad_char.get(text[i], len(pattern))
i += len(pattern) - min(j, skip)
j = len(pattern) - 1
return -1
# Test
text = "ABABDABACDABABCABAB"
pattern = "ABABCAB"
print(boyer_moore_search(text, pattern)) # 9def rabin_karp_search(text, pattern, prime=101):
"""Rabin-Karp algorithm - good for multiple patterns"""
pattern_len = len(pattern)
text_len = len(text)
pattern_hash = hash(pattern)
text_hash = hash(text[:pattern_len])
for i in range(text_len - pattern_len + 1):
if text_hash == pattern_hash:
if text[i:i + pattern_len] == pattern:
return i
if i < text_len - pattern_len:
# Rolling hash
text_hash = hash(text[i + 1:i + pattern_len + 1])
return -1
# Multiple pattern search
def find_all_patterns(text, patterns):
"""Find all occurrences of multiple patterns"""
results = {}
for pattern in patterns:
pos = rabin_karp_search(text, pattern)
if pos >= 0:
results[pattern] = pos
return results
text = "AABAACAADAABAABA"
patterns = ["AABA", "AAD"]
print(find_all_patterns(text, patterns))
# {'AABA': 0, 'AAD': 6}import re
# Replace with callback function
text = "Hello 123 World 456"
def replacement_callback(match):
"""Replace number with its word equivalent"""
num = int(match.group())
words = {1: 'one', 2: 'two', 3: 'three', 4: 'four', 5: 'five',
6: 'six'}
return words.get(num, str(num))
result = re.sub(r'\d', replacement_callback, text)
print(result) # Hello one two three World four five six
# Efficient multi-pattern replacement
def multi_replace(text, replacements):
"""Replace multiple patterns efficiently"""
pattern = '|'.join(re.escape(k) for k in replacements.keys())
return re.sub(pattern, lambda m: replacements[m.group()], text)
text = "cat dog cat bird dog"
replacements = {"cat": "feline", "dog": "canine"}
print(multi_replace(text, replacements))
# feline canine feline bird canineimport timeit
import re
text = "The quick brown fox jumps over the lazy dog. " * 100
# Method 1: Multiple find/replace
def method_multiple(t):
t = t.replace("quick", "slow")
t = t.replace("fox", "dog")
return t
# Method 2: Regex with alternation
def method_regex(t):
return re.sub(r"quick|fox", lambda m: {
"quick": "slow", "fox": "dog"
}[m.group()], t)
# Method 3: Single compile
pattern = re.compile(r"quick|fox")
def method_compiled(t):
return pattern.sub(lambda m: {
"quick": "slow", "fox": "dog"
}[m.group()], t)
print("Multiple:", timeit.timeit(method_multiple, setup=lambda: None, number=1000))
print("Regex:", timeit.timeit(method_regex, setup=lambda: None, number=1000))
print("Compiled:", timeit.timeit(method_compiled, setup=lambda: None, number=1000))import re
import unicodedata
def case_insensitive_find(text, pattern, normalize=True):
"""Case-insensitive search with Unicode normalization"""
if normalize:
text = unicodedata.normalize("NFKD", text).lower()
pattern = unicodedata.normalize("NFKD", pattern).lower()
else:
text = text.casefold()
pattern = pattern.casefold()
return text.find(pattern)
# Test with accents
result = case_insensitive_find("Café", "café")
print(result) # 0
# Regex approach
def regex_case_insensitive_find(text, pattern):
"""Using regex with case insensitive flag"""
match = re.search(pattern, text, re.IGNORECASE | re.UNICODE)
return match.start() if match else -1
result = regex_case_insensitive_find("Hello WORLD", "world")
print(result) # 6class TrieNode:
def __init__(self):
self.children = {}
self.is_end = False
self.pattern = None
class AhoCorasick:
"""Aho-Corasick algorithm for multiple pattern matching"""
def __init__(self):
self.root = TrieNode()
def add_pattern(self, pattern):
"""Add pattern to trie"""
node = self.root
for char in pattern:
if char not in node.children:
node.children[char] = TrieNode()
node = node.children[char]
node.is_end = True
node.pattern = pattern
def search(self, text):
"""Find all patterns in text"""
results = []
node = self.root
for i, char in enumerate(text):
while node != self.root and char not in node.children:
node = self.root
if char in node.children:
node = node.children[char]
if node.is_end:
results.append((i - len(node.pattern) + 1, node.pattern))
return results
# Usage
ac = AhoCorasick()
ac.add_pattern("he")
ac.add_pattern("she")
ac.add_pattern("her")
ac.add_pattern("hers")
text = "ushers"
print(ac.search(text))
# [(1, 'she'), (2, 'he'), (3, 'her'), (4, 'hers')]| Concept | Remember |
|---|---|
| Boyer-Moore | Best for long patterns in large texts |
| Rabin-Karp | Good for multiple pattern search |
| Compiled regex | Use re.compile() for repeated searches |
| Callback functions | Enable complex replacement logic |
| Aho-Corasick | Optimal for many simultaneous patterns |
Learn advanced string validation techniques.
Ready to practice? Challenges | Quiz
Resources
Ojasa Mirai
Master AI-powered development skills through structured learning, real projects, and verified credentials. Whether you're upskilling your team or launching your career, we deliver the skills companies actually need.
Learn Deep • Build Real • Verify Skills • Launch Forward