Ojasa Mirai

Python

Learning Level

String Basics & Creation String Indexing & Slicing Common String Methods String Formatting Essentials Finding & Replacing Text String Testing & Validation Regular Expression Basics Text Splitting & Joining Practical String Projects

Python/String Manipulation/Finding Replacing Text

🔍 Finding & Replacing Text — Algorithms & Optimization

Explore advanced search algorithms, performance optimization, and handling complex replacements.

🎯 Boyer-Moore Algorithm

def build_bad_char_table(pattern):
    """Build bad character table for Boyer-Moore"""
    table = {}
    for i, char in enumerate(pattern):
        table[char] = len(pattern) - i - 1
    return table

def boyer_moore_search(text, pattern):
    """Boyer-Moore string search - fast for large alphabets"""
    if not pattern:
        return 0

    bad_char = build_bad_char_table(pattern)
    i = len(pattern) - 1
    j = len(pattern) - 1

    while i < len(text):
        if text[i] == pattern[j]:
            if j == 0:
                return i
            i -= 1
            j -= 1
        else:
            skip = bad_char.get(text[i], len(pattern))
            i += len(pattern) - min(j, skip)
            j = len(pattern) - 1

    return -1

# Test
text = "ABABDABACDABABCABAB"
pattern = "ABABCAB"
print(boyer_moore_search(text, pattern))  # 9

💡 Rabin-Karp Algorithm

def rabin_karp_search(text, pattern, prime=101):
    """Rabin-Karp algorithm - good for multiple patterns"""
    pattern_len = len(pattern)
    text_len = len(text)

    pattern_hash = hash(pattern)
    text_hash = hash(text[:pattern_len])

    for i in range(text_len - pattern_len + 1):
        if text_hash == pattern_hash:
            if text[i:i + pattern_len] == pattern:
                return i

        if i < text_len - pattern_len:
            # Rolling hash
            text_hash = hash(text[i + 1:i + pattern_len + 1])

    return -1

# Multiple pattern search
def find_all_patterns(text, patterns):
    """Find all occurrences of multiple patterns"""
    results = {}
    for pattern in patterns:
        pos = rabin_karp_search(text, pattern)
        if pos >= 0:
            results[pattern] = pos
    return results

text = "AABAACAADAABAABA"
patterns = ["AABA", "AAD"]
print(find_all_patterns(text, patterns))
# {'AABA': 0, 'AAD': 6}

🎨 Advanced Replacement Strategies

import re

# Replace with callback function
text = "Hello 123 World 456"

def replacement_callback(match):
    """Replace number with its word equivalent"""
    num = int(match.group())
    words = {1: 'one', 2: 'two', 3: 'three', 4: 'four', 5: 'five',
             6: 'six'}
    return words.get(num, str(num))

result = re.sub(r'\d', replacement_callback, text)
print(result)  # Hello one two three World four five six

# Efficient multi-pattern replacement
def multi_replace(text, replacements):
    """Replace multiple patterns efficiently"""
    pattern = '|'.join(re.escape(k) for k in replacements.keys())
    return re.sub(pattern, lambda m: replacements[m.group()], text)

text = "cat dog cat bird dog"
replacements = {"cat": "feline", "dog": "canine"}
print(multi_replace(text, replacements))
# feline canine feline bird canine

📊 Performance Analysis

import timeit
import re

text = "The quick brown fox jumps over the lazy dog. " * 100

# Method 1: Multiple find/replace
def method_multiple(t):
    t = t.replace("quick", "slow")
    t = t.replace("fox", "dog")
    return t

# Method 2: Regex with alternation
def method_regex(t):
    return re.sub(r"quick|fox", lambda m: {
        "quick": "slow", "fox": "dog"
    }[m.group()], t)

# Method 3: Single compile
pattern = re.compile(r"quick|fox")
def method_compiled(t):
    return pattern.sub(lambda m: {
        "quick": "slow", "fox": "dog"
    }[m.group()], t)

print("Multiple:", timeit.timeit(method_multiple, setup=lambda: None, number=1000))
print("Regex:", timeit.timeit(method_regex, setup=lambda: None, number=1000))
print("Compiled:", timeit.timeit(method_compiled, setup=lambda: None, number=1000))

💡 Case-Insensitive Searching

import re
import unicodedata

def case_insensitive_find(text, pattern, normalize=True):
    """Case-insensitive search with Unicode normalization"""
    if normalize:
        text = unicodedata.normalize("NFKD", text).lower()
        pattern = unicodedata.normalize("NFKD", pattern).lower()
    else:
        text = text.casefold()
        pattern = pattern.casefold()

    return text.find(pattern)

# Test with accents
result = case_insensitive_find("Café", "café")
print(result)  # 0

# Regex approach
def regex_case_insensitive_find(text, pattern):
    """Using regex with case insensitive flag"""
    match = re.search(pattern, text, re.IGNORECASE | re.UNICODE)
    return match.start() if match else -1

result = regex_case_insensitive_find("Hello WORLD", "world")
print(result)  # 6

🔑 Trie Structure for Multiple Patterns

class TrieNode:
    def __init__(self):
        self.children = {}
        self.is_end = False
        self.pattern = None

class AhoCorasick:
    """Aho-Corasick algorithm for multiple pattern matching"""

    def __init__(self):
        self.root = TrieNode()

    def add_pattern(self, pattern):
        """Add pattern to trie"""
        node = self.root
        for char in pattern:
            if char not in node.children:
                node.children[char] = TrieNode()
            node = node.children[char]
        node.is_end = True
        node.pattern = pattern

    def search(self, text):
        """Find all patterns in text"""
        results = []
        node = self.root

        for i, char in enumerate(text):
            while node != self.root and char not in node.children:
                node = self.root

            if char in node.children:
                node = node.children[char]

            if node.is_end:
                results.append((i - len(node.pattern) + 1, node.pattern))

        return results

# Usage
ac = AhoCorasick()
ac.add_pattern("he")
ac.add_pattern("she")
ac.add_pattern("her")
ac.add_pattern("hers")

text = "ushers"
print(ac.search(text))
# [(1, 'she'), (2, 'he'), (3, 'her'), (4, 'hers')]

🔑 Key Takeaways

Concept	Remember
Boyer-Moore	Best for long patterns in large texts
Rabin-Karp	Good for multiple pattern search
Compiled regex	Use re.compile() for repeated searches
Callback functions	Enable complex replacement logic
Aho-Corasick	Optimal for many simultaneous patterns

🔗 What's Next?

Learn advanced string validation techniques.

Ready to practice? Challenges | Quiz

Resources

Python Docs

Ojasa Mirai

Master AI-powered development skills through structured learning, real projects, and verified credentials. Whether you're upskilling your team or launching your career, we deliver the skills companies actually need.

Learn Deep • Build Real • Verify Skills • Launch Forward

Courses

Python Fastapi ReactJS Cloud

Resources

Blog & Articles GitHub Projects Video Tutorials

Ecosystem

Ojasa Mirai Site My Growth Learning Portal Community Discord

Twitter GitHub LinkedIn