44from dataclasses import dataclass
55from pathlib import Path
66
7+ # A forward declaration is needed for the type hint in this file
8+ class PrivlogConfig : ...
9+
710LOG_FUNCS = {"debug" , "info" , "warning" , "error" , "critical" , "exception" }
811
912# High-confidence = ERROR
@@ -118,8 +121,9 @@ def _get_expr_sensitivity(expr: ast.AST) -> str | None:
118121
119122
120123class _Visitor (ast .NodeVisitor ):
121- def __init__ (self , path : str ) -> None :
124+ def __init__ (self , path : str , config : PrivlogConfig ) -> None :
122125 self .path = path
126+ self .config = config
123127 self .findings : list [AstFinding ] = []
124128
125129 def _add_finding (self , node : ast .Call , code : str , message : str , severity : str ) -> None :
@@ -141,16 +145,18 @@ def _add_finding(self, node: ast.Call, code: str, message: str, severity: str) -
141145 def visit_Call (self , node : ast .Call ) -> None :
142146 is_log = _is_logging_call (node )
143147 is_print = _is_print_call (node )
148+
149+ # Determine if it's a custom wrapper call
150+ func_name = node .func .id if isinstance (node .func , ast .Name ) else ""
151+ is_custom_wrapper = func_name in self .config .custom_wrappers
144152
145- if not is_log and not is_print :
153+ if not is_log and not is_print and not is_custom_wrapper :
146154 self .generic_visit (node )
147155 return
148156
149157 # Check 1: Direct sensitive identifiers in formatted strings/args
150- if node .args :
158+ if node .args and ( is_log or is_print ) :
151159 args_to_check : list [ast .AST ] = []
152- # For print calls, all arguments are checked directly.
153- # For log calls, only format arguments are checked.
154160 if is_print :
155161 args_to_check .extend (node .args )
156162
@@ -162,13 +168,11 @@ def visit_Call(self, node: ast.Call) -> None:
162168 elif (isinstance (first_arg , ast .Call ) and isinstance (first_arg .func , ast .Attribute ) and first_arg .func .attr == "format" ):
163169 args_to_check .extend (first_arg .args )
164170 args_to_check .extend (kw .value for kw in first_arg .keywords )
165- # Case 1c: %-formatting (for logs only, print doesn't use this pattern )
171+ # Case 1c: %-formatting (for logs only)
166172 elif (is_log and len (node .args ) > 1 and isinstance (first_arg , ast .Constant ) and isinstance (first_arg .value , str ) and "%" in first_arg .value ):
167173 if len (node .args ) == 2 and isinstance (node .args [1 ], (ast .Tuple , ast .Dict )):
168174 if isinstance (node .args [1 ], ast .Tuple ):
169175 args_to_check .extend (node .args [1 ].elts )
170- elif isinstance (node .args [1 ], ast .Dict ):
171- args_to_check .extend (node .args [1 ].values )
172176 else :
173177 args_to_check .extend (node .args [1 :])
174178
@@ -182,36 +186,39 @@ def visit_Call(self, node: ast.Call) -> None:
182186
183187 # Check 2: Heuristic checks for dictionary/object logging
184188 if is_log :
185- # LM2201: Use of 'extra' keyword
186189 for keyword in node .keywords :
187190 if keyword .arg == 'extra' :
188- self ._add_finding (node , "LM2201" , "Logging with 'extra' parameter can hide sensitive data. Review manually." , "WARNING" )
191+ self ._add_finding (node , "LM2201" , "Logging with 'extra' can hide sensitive data. Review manually." , "WARNING" )
189192 break
190193
191- # LM2202/LM2203/LM2302/LM2303: Serialized objects
194+ # Check 3: Custom wrapper checks
195+ if is_custom_wrapper :
196+ wrapper_rules = self .config .custom_wrappers [func_name ]
197+ for kw in node .keywords :
198+ if kw .arg in wrapper_rules :
199+ severity = wrapper_rules [kw .arg ]
200+ self ._add_finding (node , "LM2401" , f"Sensitive argument '{ kw .arg } ' passed to custom wrapper '{ func_name } '." , severity )
201+
202+ # Common heuristic checks for all call types
192203 for arg in node .args :
193204 if isinstance (arg , ast .Call ) and isinstance (arg .func , ast .Attribute ):
194- # json.dumps(foo)
195205 if isinstance (arg .func .value , ast .Name ) and arg .func .value .id == 'json' and arg .func .attr == 'dumps' :
196206 code = "LM2302" if is_print else "LM2202"
197- self ._add_finding (node , code , "Potentially sensitive object serialized as JSON. Review manually." , "WARNING" )
198- break
199- # foo.to_dict()
207+ self ._add_finding (node , code , "Object serialized as JSON may be sensitive. Review manually." , "WARNING" )
200208 if arg .func .attr == 'to_dict' :
201209 code = "LM2303" if is_print else "LM2203"
202- self ._add_finding (node , code , "Object converted to dict can hide sensitive data. Review manually." , "WARNING" )
203- break
210+ self ._add_finding (node , code , "Object converted to dict may be sensitive. Review manually." , "WARNING" )
204211
205212 self .generic_visit (node )
206213
207214
208- def run_ast_checks (root : Path ) -> list [AstFinding ]:
215+ def run_ast_checks (root : Path , config : PrivlogConfig ) -> list [AstFinding ]:
209216 findings : list [AstFinding ] = []
210217 for py in root .rglob ("*.py" ):
211218 try :
212219 text = py .read_text (encoding = "utf-8" , errors = "replace" )
213220 tree = ast .parse (text )
214- v = _Visitor (str (py ))
221+ v = _Visitor (str (py ), config )
215222 v .visit (tree )
216223 findings .extend (v .findings )
217224 except SyntaxError :
0 commit comments