
Python
Master Unicode categories, fuzzy matching, and building robust validators.
import unicodedata
def test_unicode_category(text):
"""Analyze Unicode categories"""
for char in text:
category = unicodedata.category(char)
name = unicodedata.name(char, "UNKNOWN")
print(f"{char}: {category} - {name}")
test_unicode_category("Hello123!ä½ å¥½")
# H: Lu - LATIN CAPITAL LETTER H
# e: Ll - LATIN SMALL LETTER E
# 1: Nd - DIGIT ONE
# !: Po - EXCLAMATION MARK
# ä½ : Lo - CJK UNIFIED IDEOGRAPH-4F60
# Filter by category
def filter_by_category(text, category):
"""Extract characters of specific category"""
return "".join(c for c in text if unicodedata.category(c) == category)
text = "Hello123World!ä½ å¥½"
print(filter_by_category(text, "Lu")) # HW (uppercase)
print(filter_by_category(text, "Nd")) # 123 (digits)
print(filter_by_category(text, "Lo")) # ä½ å¥½ (letters)from difflib import SequenceMatcher, get_close_matches
import re
def similarity_ratio(str1, str2):
"""Calculate similarity between two strings"""
matcher = SequenceMatcher(None, str1, str2)
return matcher.ratio()
# Simple matching
str1 = "hello"
str2 = "hallo"
print(f"Similarity: {similarity_ratio(str1, str2):.2%}")
# Similarity: 80.00%
# Find close matches
word = "speling"
words = ["spelling", "selling", "smelling", "spelling"]
matches = get_close_matches(word, words, n=2, cutoff=0.6)
print(matches) # ['spelling', 'selling']
# Levenshtein distance
def levenshtein_distance(str1, str2):
"""Calculate Levenshtein distance"""
if len(str1) < len(str2):
return levenshtein_distance(str2, str1)
if len(str2) == 0:
return len(str1)
previous_row = range(len(str2) + 1)
for i, c1 in enumerate(str1):
current_row = [i + 1]
for j, c2 in enumerate(str2):
insertions = previous_row[j + 1] + 1
deletions = current_row[j] + 1
substitutions = previous_row[j] + (c1 != c2)
current_row.append(min(insertions, deletions, substitutions))
previous_row = current_row
return previous_row[-1]
print(levenshtein_distance("kitten", "sitting")) # 3import re
from functools import wraps
# Email validator (RFC 5322 simplified)
def is_valid_email(email):
"""Validate email format"""
pattern = r'^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$'
return re.match(pattern, email) is not None
# URL validator
def is_valid_url(url):
"""Validate URL format"""
pattern = r'^https?://[^\s/$.?#].[^\s]*$'
return re.match(pattern, url) is not None
# Strong password validator
def validate_password(password):
"""Detailed password validation"""
issues = []
if len(password) < 12:
issues.append("At least 12 characters required")
if not re.search(r'[a-z]', password):
issues.append("Include lowercase letters")
if not re.search(r'[A-Z]', password):
issues.append("Include uppercase letters")
if not re.search(r'\d', password):
issues.append("Include numbers")
if not re.search(r'[!@#$%^&*(),.?":{}|<>]', password):
issues.append("Include special characters")
return {
"valid": len(issues) == 0,
"issues": issues
}
# Validator decorator
def validate(validator_func):
"""Decorator for validation"""
def decorator(func):
@wraps(func)
def wrapper(value, *args, **kwargs):
if not validator_func(value):
raise ValueError(f"Invalid input: {value}")
return func(value, *args, **kwargs)
return wrapper
return decorator
@validate(is_valid_email)
def send_email(email):
return f"Email sent to {email}"
try:
print(send_email("user@example.com"))
print(send_email("invalid-email"))
except ValueError as e:
print(e)class StringValidator:
"""Comprehensive string validator"""
def __init__(self, value):
self.value = value
self.errors = []
def min_length(self, length):
"""Check minimum length"""
if len(self.value) < length:
self.errors.append(f"Must be at least {length} characters")
return self
def max_length(self, length):
"""Check maximum length"""
if len(self.value) > length:
self.errors.append(f"Must be at most {length} characters")
return self
def match_pattern(self, pattern, message="Invalid format"):
"""Check regex pattern"""
if not re.match(pattern, self.value):
self.errors.append(message)
return self
def no_spaces(self):
"""Disallow spaces"""
if " " in self.value:
self.errors.append("No spaces allowed")
return self
def is_valid(self):
"""Return validation result"""
return len(self.errors) == 0
def get_errors(self):
"""Get error messages"""
return self.errors
# Usage
validator = (StringValidator("admin123")
.min_length(6)
.max_length(20)
.no_spaces()
.match_pattern(r'^[a-zA-Z0-9_]+$', "Only alphanumeric and underscore"))
print(f"Valid: {validator.is_valid()}")
print(f"Errors: {validator.get_errors()}")import re
def metaphone(word):
"""Simplified Metaphone algorithm"""
word = word.upper()
# Drop duplicates
word = re.sub(r'(.)\1+', r'\1', word)
# Remove non-alpha
word = re.sub(r'[^A-Z]', '', word)
# Replace patterns
replacements = [
(r'^KN', 'N'),
(r'^WR', 'R'),
(r'TCH', 'CH'),
(r'DG', 'G'),
(r'^H[AEIOUWY]', 'H'),
]
for pattern, replacement in replacements:
word = re.sub(pattern, replacement, word)
return word
print(metaphone("knight")) # Simplified phonetic
print(metaphone("knight") == metaphone("night"))| Concept | Remember |
|---|---|
| Unicode categories | Use `unicodedata.category()` for multilingual support |
| Fuzzy matching | Use difflib for approximate string matching |
| Levenshtein distance | Measure edit distance between strings |
| Validation decorators | Use for clean, reusable validation code |
| Fluent builders | Chain validation methods for readability |
Learn advanced regular expressions.
Ready to practice? Challenges | Quiz
Resources
Ojasa Mirai
Master AI-powered development skills through structured learning, real projects, and verified credentials. Whether you're upskilling your team or launching your career, we deliver the skills companies actually need.
Learn Deep • Build Real • Verify Skills • Launch Forward