Skip to content

Commit ef7565d

Browse files
committed
Update hypothesis tests
1 parent d9be396 commit ef7565d

9 files changed

Lines changed: 146 additions & 76 deletions

File tree

docker/Dockerfile-fuzz

Lines changed: 2 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,19 +1,8 @@
1-
FROM fedora:42 AS fuzz
1+
FROM danielflook/python-minifier-build:python3.14-2025-08-21 AS fuzz
22

3-
SHELL ["/bin/bash", "-o", "pipefail", "-c"]
4-
5-
RUN <<EOF
6-
dnf install -y \
7-
python3 \
8-
python3-pip
9-
dnf clean all && rm -rf /var/cache/dnf/*
10-
EOF
11-
12-
RUN pip install --no-cache-dir hypothesis[cli] hypofuzz
3+
RUN pip3 install --no-cache-dir --root-user-action=ignore 'hypothesis[cli]' hypofuzz
134

145
COPY fuzz.sh /fuzz.sh
15-
16-
WORKDIR /tmp/work
176
ENTRYPOINT ["/fuzz.sh"]
187

198
EXPOSE 9999/tcp

docker/fuzz.sh

Lines changed: 23 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,26 @@
11
#!/usr/bin/env bash
22

3-
pip install .
3+
pip3 install --root-user-action=ignore .
44

5-
exec hypothesis fuzz hypo_test
5+
# Check if --timeout is specified
6+
timeout_duration=""
7+
args=()
8+
while [[ $# -gt 0 ]]; do
9+
case $1 in
10+
--timeout)
11+
timeout_duration="$2"
12+
shift 2
13+
;;
14+
*)
15+
args+=("$1")
16+
shift
17+
;;
18+
esac
19+
done
20+
21+
# Run with timeout if specified, otherwise run normally
22+
if [[ -n "$timeout_duration" ]]; then
23+
exec timeout "$timeout_duration" hypothesis fuzz hypo_test "${args[@]}"
24+
else
25+
exec hypothesis fuzz hypo_test "${args[@]}"
26+
fi

hypo_test/README.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,8 @@ The hypothesis strategies in this directory generate an AST that python can pars
44
It does not take care to generate semantically valid programs.
55
Failure cases should shrink into valid programs, though.
66

7+
pytest hypo_test --hypothesis-show-statistics --hypothesis-explain
8+
79
TODO:
810
Assignment targets: (in comprehensions too)
911

hypo_test/expressions.py

Lines changed: 91 additions & 55 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
import ast
1+
import python_minifier.ast_compat as ast
22
import keyword
33
import math
44
import string
@@ -25,16 +25,16 @@
2525

2626
comparison_operators = sampled_from(
2727
[
28-
ast.Eq(),
28+
ast.Eq(), # Most common comparison
2929
ast.NotEq(),
30-
ast.Lt(),
31-
ast.LtE(),
30+
ast.Lt(), # Simple ordering
3231
ast.Gt(),
32+
ast.LtE(),
3333
ast.GtE(),
34-
ast.Is(),
35-
ast.IsNot(),
36-
ast.In(),
37-
ast.NotIn()
34+
ast.In(), # Membership tests
35+
ast.NotIn(),
36+
ast.Is(), # Identity tests (less common)
37+
ast.IsNot()
3838
]
3939
)
4040

@@ -45,26 +45,45 @@
4545
def Num(draw) -> ast.AST:
4646
def to_node(n) -> ast.AST:
4747
if isinstance(n, int):
48-
return ast.Num(n) if n >= 0 else ast.UnaryOp(ast.USub(), ast.Num(abs(n)))
48+
return ast.Constant(value=n) if n >= 0 else ast.UnaryOp(ast.USub(), ast.Constant(value=abs(n)))
4949
elif isinstance(n, float):
50-
return ast.Num(n) if math.copysign(1.0, n) > 0.0 else ast.UnaryOp(ast.USub(), ast.Num(abs(n)))
50+
return ast.Constant(value=n) if math.copysign(1.0, n) > 0.0 else ast.UnaryOp(ast.USub(), ast.Constant(value=abs(n)))
5151
elif isinstance(n, complex):
5252
node = ast.parse(str(n), mode='eval')
5353
return node.body
5454

5555
raise ValueError(n)
5656

57-
return to_node(draw(integers() | floats(allow_nan=False) | complex_numbers(allow_infinity=True, allow_nan=False)))
57+
return to_node(draw(one_of(
58+
integers(), # Shrinks to 0
59+
floats(allow_nan=False), # Shrinks to 0.0
60+
complex_numbers(allow_infinity=True, allow_nan=False) # Most complex
61+
)))
5862

5963

6064
@composite
61-
def Str(draw) -> ast.Str:
62-
return ast.Str(''.join(draw(lists(characters(), min_size=0, max_size=3))))
65+
def Str(draw) -> ast.Constant:
66+
# Choose between simple and complex strings for better shrinking
67+
use_simple = draw(booleans())
68+
69+
if use_simple:
70+
# Simple ASCII strings that shrink well
71+
s = draw(text(string.ascii_letters + string.digits + ' ', min_size=0, max_size=3))
72+
else:
73+
# Complex unicode for thorough testing
74+
# Only filter out surrogates which are invalid in Python strings
75+
safe_chars = characters(
76+
blacklist_categories=['Cs'], # No surrogates
77+
max_codepoint=0xFFFF # Stay within BMP for simplicity
78+
)
79+
s = ''.join(draw(lists(safe_chars, min_size=0, max_size=3)))
80+
81+
return ast.Constant(value=s)
6382

6483

6584
@composite
66-
def Bytes(draw) -> ast.Bytes:
67-
return ast.Bytes(draw(binary(max_size=3)))
85+
def Bytes(draw) -> ast.Constant:
86+
return ast.Constant(value=draw(binary(max_size=3)))
6887

6988

7089
@composite
@@ -88,40 +107,57 @@ def Set(draw, expression) -> ast.Set:
88107
@composite
89108
def Dict(draw, expression) -> ast.Dict:
90109
d = draw(dictionaries(expression, expression, min_size=0, max_size=3))
91-
return ast.Dict(keys=list(d.keys()), values=list(d.values()))
110+
items = list(d.items()) # Get items as pairs to maintain key-value relationships
111+
return ast.Dict(keys=[k for k, v in items], values=[v for k, v in items])
92112

93113

94114
@composite
95-
def NameConstant(draw) -> ast.NameConstant:
96-
return ast.NameConstant(draw(sampled_from([None, True, False])))
115+
def NameConstant(draw) -> ast.Constant:
116+
return ast.Constant(value=draw(sampled_from([None, False, True])))
97117

98118

99119
# endregion
100120

101121
@composite
102122
def name(draw) -> SearchStrategy:
103-
other_id_start = [chr(i) for i in [0x1885, 0x1886, 0x2118, 0x212E, 0x309B, 0x309C]]
104-
other_id_continue = [chr(i) for i in [0x00B7, 0x0387, 0x19DA] + list(range(1369, 1371 + 1))]
105-
106-
xid_start = draw(characters(whitelist_categories=['Lu', 'Ll', 'Lt', 'Lm', 'Lo', 'Nl'], whitelist_characters=['_'] + other_id_start, blacklist_characters=' '))
107-
xid_continue = draw(
108-
lists(
109-
characters(whitelist_categories=['Lu', 'Ll', 'Lt', 'Lm', 'Lo', 'Nl', 'Mn', 'Mc', 'Nd', 'Pc'], whitelist_characters=['_'] + other_id_start + other_id_continue, blacklist_characters=' '),
110-
min_size=0,
111-
max_size=2
123+
# Choose between simple and complex, but in a way that shrinks to simple
124+
use_unicode = draw(booleans())
125+
126+
if not use_unicode:
127+
# Simple ASCII names (will be the shrunk case)
128+
first = draw(sampled_from(string.ascii_letters + '_'))
129+
rest = draw(text(string.ascii_letters + string.digits + '_', min_size=0, max_size=2))
130+
n = first + rest
131+
else:
132+
# Complex unicode names (for thorough testing)
133+
other_id_start = [chr(i) for i in [0x1885, 0x1886, 0x2118, 0x212E, 0x309B, 0x309C]]
134+
other_id_continue = [chr(i) for i in [0x00B7, 0x0387, 0x19DA] + list(range(1369, 1371 + 1))]
135+
136+
xid_start = draw(characters(whitelist_categories=['Lu', 'Ll', 'Lt', 'Lm', 'Lo', 'Nl'],
137+
whitelist_characters=['_'] + other_id_start,
138+
blacklist_characters=' '))
139+
xid_continue = draw(
140+
lists(
141+
characters(whitelist_categories=['Lu', 'Ll', 'Lt', 'Lm', 'Lo', 'Nl', 'Mn', 'Mc', 'Nd', 'Pc'],
142+
whitelist_characters=['_'] + other_id_start + other_id_continue,
143+
blacklist_characters=' '),
144+
min_size=0,
145+
max_size=2
146+
)
112147
)
113-
)
148+
n = xid_start + ''.join(xid_continue)
149+
n = unicodedata.normalize('NFKC', n)
114150

115-
n = xid_start + ''.join(xid_continue)
151+
# Handle keywords by prefixing with underscore
152+
if n in keyword.kwlist:
153+
return '_' + n
116154

117-
normalised = unicodedata.normalize('NFKC', n)
118-
assume(normalised not in keyword.kwlist)
119-
assume(' ' not in normalised)
120-
try:
121-
ast.parse(normalised, mode='eval')
122-
except Exception:
155+
# Validate it's a proper identifier
156+
if not n.isidentifier():
157+
# Shouldn't happen with our generation, but just in case
123158
assume(False)
124-
return normalised
159+
160+
return n
125161

126162

127163
@composite
@@ -131,7 +167,7 @@ def Name(draw, ctx=ast.Load) -> ast.Name:
131167

132168
@composite
133169
def UnaryOp(draw, expression) -> ast.UnaryOp:
134-
op = draw(sampled_from([ast.USub(), ast.UAdd(), ast.Not(), ast.Invert()]))
170+
op = draw(sampled_from([ast.UAdd(), ast.USub(), ast.Not(), ast.Invert()]))
135171
l = draw(expression)
136172
return ast.UnaryOp(op, l)
137173

@@ -152,20 +188,19 @@ def BinOp(draw, expression) -> ast.BinOp:
152188
op = draw(
153189
sampled_from(
154190
[
155-
ast.Add(),
191+
ast.Add(), # Most common arithmetic
156192
ast.Sub(),
157193
ast.Mult(),
158194
ast.Div(),
195+
ast.Mod(), # Common operations
159196
ast.FloorDiv(),
160-
ast.Mod(),
161-
ast.Pow(),
162-
ast.LShift(),
163-
ast.RShift(),
197+
ast.Pow(), # Less common
198+
ast.BitAnd(), # Bitwise operations
164199
ast.BitOr(),
165200
ast.BitXor(),
166-
ast.BitOr(),
167-
ast.BitAnd(),
168-
ast.MatMult()
201+
ast.LShift(),
202+
ast.RShift(),
203+
ast.MatMult() # Least common (matrix mult)
169204
]
170205
)
171206
)
@@ -209,7 +244,8 @@ def IfExp(draw, expression) -> ast.IfExp:
209244
@composite
210245
def Attribute(draw, expression) -> ast.Attribute:
211246
value = draw(expression)
212-
attr = draw(text(alphabet=string.ascii_letters, min_size=1, max_size=3).filter(lambda n: n not in keyword.kwlist))
247+
# Use our improved name strategy for attributes too
248+
attr = draw(name())
213249
return ast.Attribute(value, attr, ast.Load())
214250

215251
@composite
@@ -229,21 +265,21 @@ def Await(draw, expression) -> ast.Await:
229265

230266
@composite
231267
def Index(draw, expression) -> ast.Index:
232-
return ast.Index(draw(Ellipsis() | expression))
268+
return ast.Index(draw(one_of(Ellipsis(), expression)))
233269

234270

235271
@composite
236272
def Slice(draw, expression) -> ast.Slice:
237273
return ast.Slice(
238274
lower=draw(expression),
239275
upper=draw(expression),
240-
step=draw(none() | expression)
276+
step=draw(one_of(none(), expression))
241277
)
242278

243279

244280
@composite
245-
def Ellipsis(draw) -> ast.Ellipsis:
246-
return ast.Ellipsis()
281+
def Ellipsis(draw) -> ast.Constant:
282+
return ast.Constant(value=...)
247283

248284

249285
@composite
@@ -266,7 +302,7 @@ def ExtSlice(draw, expression) -> ast.ExtSlice:
266302
def Subscript(draw, expression, ctx=ast.Load) -> ast.Subscript:
267303
return ast.Subscript(
268304
value=draw(expression),
269-
slice=draw(Index(expression) | Slice(expression) | ExtSlice(expression)),
305+
slice=draw(one_of(Index(expression), Slice(expression), ExtSlice(expression))),
270306
ctx=ctx()
271307
)
272308

@@ -275,7 +311,7 @@ def Subscript(draw, expression, ctx=ast.Load) -> ast.Subscript:
275311
def arg(draw, allow_annotation=True) -> ast.arg:
276312

277313
if allow_annotation:
278-
annotation = draw(none() | expression())
314+
annotation = draw(one_of(none(), expression()))
279315
else:
280316
annotation = None
281317

@@ -293,10 +329,10 @@ def arguments(draw, for_lambda=False) -> ast.arguments:
293329
args = draw(lists(arg(allow_annotation), max_size=2))
294330
posonlyargs = draw(lists(arg(allow_annotation), max_size=2))
295331
kwonlyargs = draw(lists(arg(allow_annotation), max_size=2))
296-
vararg = draw(none() | arg(allow_annotation))
297-
kwarg = draw(none() | arg(allow_annotation))
332+
vararg = draw(one_of(none(), arg(allow_annotation)))
333+
kwarg = draw(one_of(none(), arg(allow_annotation)))
298334
defaults = []
299-
kw_defaults = draw(lists(none() | expression(), max_size=len(kwonlyargs), min_size=len(kwonlyargs)))
335+
kw_defaults = draw(lists(one_of(none(), expression()), max_size=len(kwonlyargs), min_size=len(kwonlyargs)))
300336
return ast.arguments(
301337
posonlyargs=posonlyargs,
302338
args=args,

hypo_test/folding.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
import ast
1+
import python_minifier.ast_compat as ast
22

33
from hypothesis.strategies import SearchStrategy, composite, lists, recursive, sampled_from
44

hypo_test/module.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
import ast
1+
import python_minifier.ast_compat as ast
22

33
from hypothesis import assume
44
from hypothesis.strategies import SearchStrategy, booleans, composite, integers, lists, none, one_of, recursive, sampled_from

hypo_test/patterns.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
import ast
1+
import python_minifier.ast_compat as ast
22
import keyword
33
import string
44

hypo_test/test_it.py

Lines changed: 24 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,9 @@
1-
import ast
1+
import python_minifier.ast_compat as ast
22

33
from datetime import timedelta
44

55
from hypothesis import HealthCheck, Verbosity, given, note, settings
6+
from hypothesis.strategies import booleans
67

78
from python_minifier.ast_compare import compare_ast
89
from python_minifier.ast_printer import print_ast
@@ -15,6 +16,7 @@
1516
from .folding import FoldableExpression
1617
from .module import Module, TypeAlias
1718
from .patterns import Pattern
19+
from .strings import StringExpression
1820

1921

2022
@given(node=Expression())
@@ -66,7 +68,7 @@ def test_pattern(node):
6668

6769

6870
@given(node=FoldableExpression())
69-
@settings(report_multiple_bugs=False, deadline=timedelta(seconds=1), max_examples=1000, suppress_health_check=[HealthCheck.too_slow]) # verbosity=Verbosity.verbose
71+
@settings(report_multiple_bugs=False, deadline=timedelta(seconds=1), max_examples=100, suppress_health_check=[HealthCheck.too_slow]) # verbosity=Verbosity.verbose
7072
def test_folding(node):
7173
assert isinstance(node, ast.AST)
7274
note(print_ast(node))
@@ -122,3 +124,23 @@ def test_function_type_param(node):
122124
code = printer(module)
123125
note(code)
124126
compare_ast(module, ast.parse(code, 'test_function_type_param'))
127+
128+
129+
@given(node=StringExpression())
130+
@settings(report_multiple_bugs=False, deadline=timedelta(seconds=10), max_examples=100, suppress_health_check=[HealthCheck.too_slow]) # verbosity=Verbosity.verbose
131+
def test_string_expression(node):
132+
assert isinstance(node, ast.Expression)
133+
134+
note(ast.dump(node))
135+
printer = ExpressionPrinter()
136+
code = printer(node)
137+
note(code)
138+
compare_ast(node, ast.parse(code, 'test_string_expression', 'eval'))
139+
140+
141+
@given(value=booleans())
142+
@settings(report_multiple_bugs=False, deadline=timedelta(seconds=1), max_examples=100)
143+
def test_boolean_failure(value):
144+
"""Simple test that draws a boolean and asserts it to observe failure behavior."""
145+
note(f"Generated boolean value: {value}")
146+
assert value, "This test will fail when value is False"

0 commit comments

Comments
 (0)