Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,118 @@
/**
* Provides a reusable data-flow configuration for tracking class instances
* through global data-flow with full path support.
*
* This module is designed for quality queries that check whether instances
* of certain classes reach operations that require a specific interface
* (e.g., `__contains__`, `__iter__`, `__hash__`).
*
* The configuration uses two flow states:
* - `TrackingClass`: tracking a reference to the class itself
* - `TrackingInstance`: tracking an instance of the class
*
* At instantiation points (e.g., `cls()`), the state transitions from
* `TrackingClass` to `TrackingInstance`. Sinks are only matched in the
* `TrackingInstance` state.
*/

private import python
import semmle.python.dataflow.new.DataFlow
private import semmle.python.dataflow.new.internal.DataFlowDispatch
private import semmle.python.ApiGraphs

/** A flow state for tracking class references and their instances. */
abstract class ClassInstanceFlowState extends string {
bindingset[this]
ClassInstanceFlowState() { any() }
}

/** A state signifying that the tracked value is a reference to the class itself. */
class TrackingClass extends ClassInstanceFlowState {
TrackingClass() { this = "TrackingClass" }
}

/** A state signifying that the tracked value is an instance of the class. */
class TrackingInstance extends ClassInstanceFlowState {
TrackingInstance() { this = "TrackingInstance" }
}

/**
* Signature module for parameterizing `ClassInstanceFlow` per query.
*/
signature module ClassInstanceFlowSig {
/** Holds if `cls` is a class whose instances should be tracked to sinks. */
predicate isRelevantClass(Class cls);

/** Holds if `sink` is a location where reaching instances indicate a violation. */
predicate isInstanceSink(DataFlow::Node sink);

/**
* Holds if an `isinstance` check against `checkedType` should act as a barrier,
* suppressing alerts when the instance has been verified to have the expected interface.
*/
predicate isGuardType(DataFlow::Node checkedType);
}

/**
* Constructs a global data-flow configuration for tracking instances of
* relevant classes from their definition to violation sinks.
*/
module ClassInstanceFlow<ClassInstanceFlowSig Sig> {
/**
* Holds if `guard` is an `isinstance` call checking `node` against a type
* that should suppress the alert.
*/
private predicate isinstanceGuard(DataFlow::GuardNode guard, ControlFlowNode node, boolean branch) {
exists(DataFlow::CallCfgNode isinstance_call |
isinstance_call = API::builtin("isinstance").getACall() and
isinstance_call.getArg(0).asCfgNode() = node and
(
Sig::isGuardType(isinstance_call.getArg(1))
or
// Also handle tuples of types: isinstance(x, (T1, T2))
Sig::isGuardType(DataFlow::exprNode(isinstance_call.getArg(1).asExpr().(Tuple).getAnElt()))
) and
guard = isinstance_call.asCfgNode() and
branch = true
)
}

private module Config implements DataFlow::StateConfigSig {
class FlowState = ClassInstanceFlowState;

predicate isSource(DataFlow::Node source, FlowState state) {
exists(ClassExpr ce |
Sig::isRelevantClass(ce.getInnerScope()) and
source.asExpr() = ce and
state instanceof TrackingClass
)
}

predicate isSink(DataFlow::Node sink, FlowState state) {
Sig::isInstanceSink(sink) and
state instanceof TrackingInstance
}

predicate isBarrier(DataFlow::Node node) {
node = DataFlow::BarrierGuard<isinstanceGuard/3>::getABarrierNode()
}

predicate isAdditionalFlowStep(
DataFlow::Node nodeFrom, FlowState stateFrom, DataFlow::Node nodeTo, FlowState stateTo
) {
// Instantiation: class reference at the call function position
// flows to the call result as an instance.
stateFrom instanceof TrackingClass and
stateTo instanceof TrackingInstance and
exists(CallNode call |
nodeFrom.asCfgNode() = call.getFunction() and
nodeTo.asCfgNode() = call and
// Exclude decorator applications, where the result is a proxy
// rather than a typical instance.
not call.getNode() = any(FunctionExpr fe).getADecoratorCall()
)
}
}

module Flow = DataFlow::GlobalWithState<Config>;
}
54 changes: 38 additions & 16 deletions python/ql/src/Expressions/ContainsNonContainer.ql
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
/**
* @name Membership test with a non-container
* @description A membership test, such as 'item in sequence', with a non-container on the right hand side will raise a 'TypeError'.
* @kind problem
* @kind path-problem
* @tags quality
* reliability
* correctness
Expand All @@ -12,25 +12,47 @@
*/

import python
private import LegacyPointsTo
import semmle.python.dataflow.new.DataFlow
private import semmle.python.dataflow.new.internal.DataFlowDispatch
private import semmle.python.dataflow.new.internal.ClassInstanceFlow
private import semmle.python.ApiGraphs

predicate rhs_in_expr(ControlFlowNode rhs, Compare cmp) {
exists(Cmpop op, int i | cmp.getOp(i) = op and cmp.getComparator(i) = rhs.getNode() |
predicate rhs_in_expr(Expr rhs, Compare cmp) {
exists(Cmpop op, int i | cmp.getOp(i) = op and cmp.getComparator(i) = rhs |
op instanceof In or op instanceof NotIn
)
}

module ContainsNonContainerSig implements ClassInstanceFlowSig {
predicate isRelevantClass(Class cls) {
not DuckTyping::isContainer(cls) and
not DuckTyping::hasUnresolvedBase(getADirectSuperclass*(cls)) and
not exists(CallNode setattr_call |
setattr_call.getFunction().(NameNode).getId() = "setattr" and
setattr_call.getArg(0).(NameNode).getId() = cls.getName() and
setattr_call.getScope() = cls.getScope()
)
}

predicate isInstanceSink(DataFlow::Node sink) { rhs_in_expr(sink.asExpr(), _) }

predicate isGuardType(DataFlow::Node checkedType) {
checkedType =
API::builtin(["list", "tuple", "set", "frozenset", "dict", "str", "bytes", "bytearray"])
.getAValueReachableFromSource()
}
}

module ContainsNonContainerFlow = ClassInstanceFlow<ContainsNonContainerSig>;

import ContainsNonContainerFlow::Flow::PathGraph

from
ControlFlowNodeWithPointsTo non_seq, Compare cmp, Value v, ClassValue cls, ControlFlowNode origin
ContainsNonContainerFlow::Flow::PathNode source, ContainsNonContainerFlow::Flow::PathNode sink,
ClassExpr ce
where
rhs_in_expr(non_seq, cmp) and
non_seq.pointsTo(_, v, origin) and
v.getClass() = cls and
not Types::failedInference(cls, _) and
not cls.hasAttribute("__contains__") and
not cls.hasAttribute("__iter__") and
not cls.hasAttribute("__getitem__") and
not cls = ClassValue::nonetype() and
not cls = Value::named("types.MappingProxyType")
select cmp, "This test may raise an Exception as the $@ may be of non-container class $@.", origin,
"target", cls, cls.getName()
ContainsNonContainerFlow::Flow::flowPath(source, sink) and
source.getNode().asExpr() = ce
select sink.getNode(), source, sink,
"This test may raise an Exception as the $@ may be of non-container class $@.", source.getNode(),
"target", ce.getInnerScope(), ce.getInnerScope().getName()
Original file line number Diff line number Diff line change
@@ -1,2 +1,22 @@
| expressions_test.py:89:8:89:15 | Compare | This test may raise an Exception as the $@ may be of non-container class $@. | expressions_test.py:88:11:88:17 | ControlFlowNode for XIter() | target | expressions_test.py:77:1:77:20 | class XIter | XIter |
| expressions_test.py:91:8:91:19 | Compare | This test may raise an Exception as the $@ may be of non-container class $@. | expressions_test.py:88:11:88:17 | ControlFlowNode for XIter() | target | expressions_test.py:77:1:77:20 | class XIter | XIter |
edges
| expressions_test.py:77:1:77:20 | ControlFlowNode for ClassExpr | expressions_test.py:77:7:77:11 | ControlFlowNode for XIter | provenance | |
| expressions_test.py:77:7:77:11 | ControlFlowNode for XIter | expressions_test.py:88:11:88:15 | ControlFlowNode for XIter | provenance | |
| expressions_test.py:88:5:88:7 | ControlFlowNode for seq | expressions_test.py:89:13:89:15 | ControlFlowNode for seq | provenance | |
| expressions_test.py:88:5:88:7 | ControlFlowNode for seq | expressions_test.py:91:17:91:19 | ControlFlowNode for seq | provenance | |
| expressions_test.py:88:5:88:7 | ControlFlowNode for seq | expressions_test.py:91:17:91:19 | ControlFlowNode for seq | provenance | |
| expressions_test.py:88:11:88:15 | ControlFlowNode for XIter | expressions_test.py:88:11:88:17 | ControlFlowNode for XIter() | provenance | Config |
| expressions_test.py:88:11:88:17 | ControlFlowNode for XIter() | expressions_test.py:88:5:88:7 | ControlFlowNode for seq | provenance | |
nodes
| expressions_test.py:77:1:77:20 | ControlFlowNode for ClassExpr | semmle.label | ControlFlowNode for ClassExpr |
| expressions_test.py:77:7:77:11 | ControlFlowNode for XIter | semmle.label | ControlFlowNode for XIter |
| expressions_test.py:88:5:88:7 | ControlFlowNode for seq | semmle.label | ControlFlowNode for seq |
| expressions_test.py:88:11:88:15 | ControlFlowNode for XIter | semmle.label | ControlFlowNode for XIter |
| expressions_test.py:88:11:88:17 | ControlFlowNode for XIter() | semmle.label | ControlFlowNode for XIter() |
| expressions_test.py:89:13:89:15 | ControlFlowNode for seq | semmle.label | ControlFlowNode for seq |
| expressions_test.py:91:17:91:19 | ControlFlowNode for seq | semmle.label | ControlFlowNode for seq |
| expressions_test.py:91:17:91:19 | ControlFlowNode for seq | semmle.label | ControlFlowNode for seq |
subpaths
#select
| expressions_test.py:89:13:89:15 | ControlFlowNode for seq | expressions_test.py:77:1:77:20 | ControlFlowNode for ClassExpr | expressions_test.py:89:13:89:15 | ControlFlowNode for seq | This test may raise an Exception as the $@ may be of non-container class $@. | expressions_test.py:77:1:77:20 | ControlFlowNode for ClassExpr | target | expressions_test.py:77:1:77:20 | Class XIter | XIter |
| expressions_test.py:91:17:91:19 | ControlFlowNode for seq | expressions_test.py:77:1:77:20 | ControlFlowNode for ClassExpr | expressions_test.py:91:17:91:19 | ControlFlowNode for seq | This test may raise an Exception as the $@ may be of non-container class $@. | expressions_test.py:77:1:77:20 | ControlFlowNode for ClassExpr | target | expressions_test.py:77:1:77:20 | Class XIter | XIter |
| expressions_test.py:91:17:91:19 | ControlFlowNode for seq | expressions_test.py:77:1:77:20 | ControlFlowNode for ClassExpr | expressions_test.py:91:17:91:19 | ControlFlowNode for seq | This test may raise an Exception as the $@ may be of non-container class $@. | expressions_test.py:77:1:77:20 | ControlFlowNode for ClassExpr | target | expressions_test.py:77:1:77:20 | Class XIter | XIter |
Original file line number Diff line number Diff line change
Expand Up @@ -279,3 +279,62 @@ def local():
def apply(f):
pass
apply(foo)([1])

# Class used as a decorator: the runtime value at attribute access is the
# function's return value, not the decorator class instance.
class cached_property(object):
def __init__(self, func):
self.func = func
def __get__(self, obj, cls):
val = self.func(obj)
setattr(obj, self.func.__name__, val)
return val

class MyForm(object):
@cached_property
def changed_data(self):
return [1, 2, 3]

def test_decorator_class(form):
f = MyForm()
# OK: cached_property is a descriptor; the actual runtime value is a list.
if "name" in f.changed_data:
pass

# Class with dynamically added methods via setattr: we cannot statically
# determine its full interface, so we should not flag it.
class DynamicProxy(object):
def __init__(self, args):
self._args = args

for method_name in ["__contains__", "__iter__", "__len__"]:
def wrapper(self, *args, __method_name=method_name):
pass
setattr(DynamicProxy, method_name, wrapper)

def test_dynamic_methods():
proxy = DynamicProxy(())
# OK: __contains__ is added dynamically via setattr.
if "name" in proxy:
pass

# isinstance guard should suppress non-container warning
def guarded_contains(x):
obj = XIter()
if isinstance(obj, dict):
if x in obj: # OK: guarded by isinstance
pass

def guarded_contains_tuple(x):
obj = XIter()
if isinstance(obj, (list, dict, set)):
if x in obj: # OK: guarded by isinstance with tuple of types
pass

# Negated isinstance guard: early return when NOT a container
def guarded_contains_negated(x):
obj = XIter()
if not isinstance(obj, dict):
return
if x in obj: # OK: guarded by negated isinstance + early return
pass
Loading