44import sys
55import threading
66
7+ import scrubadub
78from human_id import generate_id
89from loguru import logger
910
1314_logging_initialized = False
1415_logging_lock = threading .Lock ()
1516
16- # PII Patterns for redaction (pre-compiled for performance)
17- # Note: More specific patterns must come before general ones (e.g., sk-ant- before sk-)
18- _COMPILED_PII_PATTERNS = [
19- # Email addresses
20- (
21- re .compile (r"\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,}\b" ),
22- "[REDACTED_EMAIL]" ,
23- ),
24- # Anthropic API keys (sk-ant-...) - must be before OpenAI pattern
25- (re .compile (r"sk-ant-[a-zA-Z0-9-]{20,}" ), "[REDACTED_API_KEY]" ),
26- # OpenAI API keys (sk-...)
27- (re .compile (r"sk-[a-zA-Z0-9]{20,}" ), "[REDACTED_API_KEY]" ),
28- # Stripe API keys (sk_live_*, sk_test_*, pk_live_*, pk_test_*, rk_live_*, rk_test_*)
29- (re .compile (r"[spr]k_(live|test)_[a-zA-Z0-9]{20,}" ), "[REDACTED_API_KEY]" ),
30- # Authorization Bearer tokens
31- (re .compile (r"Bearer\s+[a-zA-Z0-9._\-]{20,}" ), "[REDACTED_BEARER_TOKEN]" ),
32- # Generic project/API keys (common formats: xxx_key_*, api_key=*, apikey=*)
33- (re .compile (r"(?i)(api[_-]?key|project[_-]?key|secret[_-]?key)[=:\s]+['\"]?[a-zA-Z0-9_\-]{16,}['\"]?" ), "[REDACTED_KEY]" ),
34- ]
17+
18+ class _LogScrubber :
19+ """
20+ Optimized single-pass log scrubber.
21+ Uses scrubadub for general PII and a compiled multi-pattern regex for secrets.
22+ """
23+
24+ def __init__ (self ):
25+ config = global_config .logging .redaction
26+ self .enabled = config .enabled
27+ self .use_default_pii = config .use_default_pii
28+ self .patterns = config .patterns
29+
30+ # Initialize scrubadub
31+ self .scrubber = None
32+ if self .enabled and self .use_default_pii :
33+ self .scrubber = scrubadub .Scrubber ()
34+ # Remove default FilenameDetector if it's too aggressive, but usually it's fine
35+ # We can customize detectors here if needed
36+
37+ # Compile custom patterns into a single-pass regex
38+ self .combined_regex = None
39+ self .placeholder_map = {}
40+
41+ if self .enabled and self .patterns :
42+ regex_parts = []
43+ for i , p in enumerate (self .patterns ):
44+ group_name = f"p{ i } "
45+ regex_parts .append (f"(?P<{ group_name } >{ p .regex } )" )
46+ self .placeholder_map [group_name ] = p .placeholder
47+
48+ self .combined_regex = re .compile ("|" .join (regex_parts ))
49+
50+ def _redact_callback (self , match ):
51+ """Callback for re.sub to return the correct placeholder for the matched group."""
52+ group_name = match .lastgroup
53+ return self .placeholder_map .get (group_name , "[REDACTED]" )
54+
55+ def scrub (self , text : str ) -> str :
56+ """Scrub sensitive data from text in a single pass."""
57+ if not self .enabled or not text :
58+ return text
59+
60+ # 1. Scrub general PII using scrubadub
61+ if self .scrubber :
62+ text = self .scrubber .clean (text )
63+
64+ # 2. Scrub custom secrets (single pass)
65+ if self .combined_regex :
66+ text = self .combined_regex .sub (self ._redact_callback , text )
67+
68+ return text
69+
70+
71+ # Initialize the singleton scrubber
72+ _SCRUBBER = _LogScrubber ()
3573
3674
3775def scrub_sensitive_data (record ):
3876 """
3977 Patch function to scrub sensitive data from the log record.
4078 Modifies record["message"] and record["exception"] in place.
4179 """
80+ if not _SCRUBBER .enabled :
81+ return
82+
4283 # Scrub main message
43- message = record ["message" ]
44- for pattern , placeholder in _COMPILED_PII_PATTERNS :
45- message = pattern .sub (placeholder , message )
46- record ["message" ] = message
84+ record ["message" ] = _SCRUBBER .scrub (record ["message" ])
4785
4886 # Scrub exception if present
4987 exception = record .get ("exception" )
5088 if exception :
5189 type_ , value , tb = exception
5290 value_str = str (value )
53- redacted = False
54- for pattern , placeholder in _COMPILED_PII_PATTERNS :
55- if pattern .search (value_str ):
56- value_str = pattern .sub (placeholder , value_str )
57- redacted = True
91+ scrubbed_value_str = _SCRUBBER .scrub (value_str )
5892
59- if redacted :
93+ if scrubbed_value_str != value_str :
6094 # Re-instantiate the exception with the redacted message to preserve loguru formatting
6195 try :
6296 # Most standard exceptions accept a single string argument
63- new_value = type_ (value_str )
97+ new_value = type_ (scrubbed_value_str )
6498 except Exception :
6599 # Fallback to a generic Exception if type instantiation fails
66- new_value = Exception (value_str )
100+ new_value = Exception (scrubbed_value_str )
67101
68102 # Preserve traceback and context metadata
69103 new_value .__traceback__ = tb
@@ -72,6 +106,13 @@ def scrub_sensitive_data(record):
72106
73107 record ["exception" ] = (type_ , new_value , tb )
74108
109+ # Scrub extra context if present
110+ extra = record .get ("extra" )
111+ if extra :
112+ for key , val in extra .items ():
113+ if isinstance (val , str ):
114+ extra [key ] = _SCRUBBER .scrub (val )
115+
75116
76117def _should_show_location (level : str ) -> bool :
77118 """Determine if location should be shown for given log level"""
0 commit comments