Ojasa Mirai

Ojasa Mirai

Python

Loading...

Learning Level

🟢 Beginner🔵 Advanced
String Basics & CreationString Indexing & SlicingCommon String MethodsString Formatting EssentialsFinding & Replacing TextString Testing & ValidationRegular Expression BasicsText Splitting & JoiningPractical String Projects
Python/String Manipulation/String Testing Validation

✅ String Testing & Validation — Advanced Techniques

Master Unicode categories, fuzzy matching, and building robust validators.


🎯 Unicode Category Testing

import unicodedata

def test_unicode_category(text):
    """Analyze Unicode categories"""
    for char in text:
        category = unicodedata.category(char)
        name = unicodedata.name(char, "UNKNOWN")
        print(f"{char}: {category} - {name}")

test_unicode_category("Hello123!你好")
# H: Lu - LATIN CAPITAL LETTER H
# e: Ll - LATIN SMALL LETTER E
# 1: Nd - DIGIT ONE
# !: Po - EXCLAMATION MARK
# ä½ : Lo - CJK UNIFIED IDEOGRAPH-4F60

# Filter by category
def filter_by_category(text, category):
    """Extract characters of specific category"""
    return "".join(c for c in text if unicodedata.category(c) == category)

text = "Hello123World!你好"
print(filter_by_category(text, "Lu"))   # HW (uppercase)
print(filter_by_category(text, "Nd"))   # 123 (digits)
print(filter_by_category(text, "Lo"))   # 你好 (letters)

💡 Fuzzy String Matching

from difflib import SequenceMatcher, get_close_matches
import re

def similarity_ratio(str1, str2):
    """Calculate similarity between two strings"""
    matcher = SequenceMatcher(None, str1, str2)
    return matcher.ratio()

# Simple matching
str1 = "hello"
str2 = "hallo"
print(f"Similarity: {similarity_ratio(str1, str2):.2%}")
# Similarity: 80.00%

# Find close matches
word = "speling"
words = ["spelling", "selling", "smelling", "spelling"]
matches = get_close_matches(word, words, n=2, cutoff=0.6)
print(matches)  # ['spelling', 'selling']

# Levenshtein distance
def levenshtein_distance(str1, str2):
    """Calculate Levenshtein distance"""
    if len(str1) < len(str2):
        return levenshtein_distance(str2, str1)

    if len(str2) == 0:
        return len(str1)

    previous_row = range(len(str2) + 1)
    for i, c1 in enumerate(str1):
        current_row = [i + 1]
        for j, c2 in enumerate(str2):
            insertions = previous_row[j + 1] + 1
            deletions = current_row[j] + 1
            substitutions = previous_row[j] + (c1 != c2)
            current_row.append(min(insertions, deletions, substitutions))
        previous_row = current_row

    return previous_row[-1]

print(levenshtein_distance("kitten", "sitting"))  # 3

🎨 Advanced Validation

import re
from functools import wraps

# Email validator (RFC 5322 simplified)
def is_valid_email(email):
    """Validate email format"""
    pattern = r'^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$'
    return re.match(pattern, email) is not None

# URL validator
def is_valid_url(url):
    """Validate URL format"""
    pattern = r'^https?://[^\s/$.?#].[^\s]*$'
    return re.match(pattern, url) is not None

# Strong password validator
def validate_password(password):
    """Detailed password validation"""
    issues = []

    if len(password) < 12:
        issues.append("At least 12 characters required")

    if not re.search(r'[a-z]', password):
        issues.append("Include lowercase letters")

    if not re.search(r'[A-Z]', password):
        issues.append("Include uppercase letters")

    if not re.search(r'\d', password):
        issues.append("Include numbers")

    if not re.search(r'[!@#$%^&*(),.?":{}|<>]', password):
        issues.append("Include special characters")

    return {
        "valid": len(issues) == 0,
        "issues": issues
    }

# Validator decorator
def validate(validator_func):
    """Decorator for validation"""
    def decorator(func):
        @wraps(func)
        def wrapper(value, *args, **kwargs):
            if not validator_func(value):
                raise ValueError(f"Invalid input: {value}")
            return func(value, *args, **kwargs)
        return wrapper
    return decorator

@validate(is_valid_email)
def send_email(email):
    return f"Email sent to {email}"

try:
    print(send_email("user@example.com"))
    print(send_email("invalid-email"))
except ValueError as e:
    print(e)

📊 Custom Validation Classes

class StringValidator:
    """Comprehensive string validator"""

    def __init__(self, value):
        self.value = value
        self.errors = []

    def min_length(self, length):
        """Check minimum length"""
        if len(self.value) < length:
            self.errors.append(f"Must be at least {length} characters")
        return self

    def max_length(self, length):
        """Check maximum length"""
        if len(self.value) > length:
            self.errors.append(f"Must be at most {length} characters")
        return self

    def match_pattern(self, pattern, message="Invalid format"):
        """Check regex pattern"""
        if not re.match(pattern, self.value):
            self.errors.append(message)
        return self

    def no_spaces(self):
        """Disallow spaces"""
        if " " in self.value:
            self.errors.append("No spaces allowed")
        return self

    def is_valid(self):
        """Return validation result"""
        return len(self.errors) == 0

    def get_errors(self):
        """Get error messages"""
        return self.errors

# Usage
validator = (StringValidator("admin123")
             .min_length(6)
             .max_length(20)
             .no_spaces()
             .match_pattern(r'^[a-zA-Z0-9_]+$', "Only alphanumeric and underscore"))

print(f"Valid: {validator.is_valid()}")
print(f"Errors: {validator.get_errors()}")

💡 Phonetic Matching

import re

def metaphone(word):
    """Simplified Metaphone algorithm"""
    word = word.upper()

    # Drop duplicates
    word = re.sub(r'(.)\1+', r'\1', word)

    # Remove non-alpha
    word = re.sub(r'[^A-Z]', '', word)

    # Replace patterns
    replacements = [
        (r'^KN', 'N'),
        (r'^WR', 'R'),
        (r'TCH', 'CH'),
        (r'DG', 'G'),
        (r'^H[AEIOUWY]', 'H'),
    ]

    for pattern, replacement in replacements:
        word = re.sub(pattern, replacement, word)

    return word

print(metaphone("knight"))  # Simplified phonetic
print(metaphone("knight") == metaphone("night"))

🔑 Key Takeaways

ConceptRemember
Unicode categoriesUse `unicodedata.category()` for multilingual support
Fuzzy matchingUse difflib for approximate string matching
Levenshtein distanceMeasure edit distance between strings
Validation decoratorsUse for clean, reusable validation code
Fluent buildersChain validation methods for readability

🔗 What's Next?

Learn advanced regular expressions.


Ready to practice? Challenges | Quiz


Resources

Python Docs

Ojasa Mirai

Master AI-powered development skills through structured learning, real projects, and verified credentials. Whether you're upskilling your team or launching your career, we deliver the skills companies actually need.

Learn Deep • Build Real • Verify Skills • Launch Forward

Courses

PythonFastapiReactJSCloud

© 2026 Ojasa Mirai. All rights reserved.

TwitterGitHubLinkedIn