1- import ast
1+ import python_minifier . ast_compat as ast
22import keyword
33import math
44import string
2525
2626comparison_operators = sampled_from (
2727 [
28- ast .Eq (),
28+ ast .Eq (), # Most common comparison
2929 ast .NotEq (),
30- ast .Lt (),
31- ast .LtE (),
30+ ast .Lt (), # Simple ordering
3231 ast .Gt (),
32+ ast .LtE (),
3333 ast .GtE (),
34- ast .Is (),
35- ast .IsNot (),
36- ast .In (),
37- ast .NotIn ()
34+ ast .In (), # Membership tests
35+ ast .NotIn (),
36+ ast .Is (), # Identity tests (less common)
37+ ast .IsNot ()
3838 ]
3939)
4040
4545def Num (draw ) -> ast .AST :
4646 def to_node (n ) -> ast .AST :
4747 if isinstance (n , int ):
48- return ast .Num ( n ) if n >= 0 else ast .UnaryOp (ast .USub (), ast .Num ( abs (n )))
48+ return ast .Constant ( value = n ) if n >= 0 else ast .UnaryOp (ast .USub (), ast .Constant ( value = abs (n )))
4949 elif isinstance (n , float ):
50- return ast .Num ( n ) if math .copysign (1.0 , n ) > 0.0 else ast .UnaryOp (ast .USub (), ast .Num ( abs (n )))
50+ return ast .Constant ( value = n ) if math .copysign (1.0 , n ) > 0.0 else ast .UnaryOp (ast .USub (), ast .Constant ( value = abs (n )))
5151 elif isinstance (n , complex ):
5252 node = ast .parse (str (n ), mode = 'eval' )
5353 return node .body
5454
5555 raise ValueError (n )
5656
57- return to_node (draw (integers () | floats (allow_nan = False ) | complex_numbers (allow_infinity = True , allow_nan = False )))
57+ return to_node (draw (one_of (
58+ integers (), # Shrinks to 0
59+ floats (allow_nan = False ), # Shrinks to 0.0
60+ complex_numbers (allow_infinity = True , allow_nan = False ) # Most complex
61+ )))
5862
5963
6064@composite
61- def Str (draw ) -> ast .Str :
62- return ast .Str ('' .join (draw (lists (characters (), min_size = 0 , max_size = 3 ))))
65+ def Str (draw ) -> ast .Constant :
66+ # Choose between simple and complex strings for better shrinking
67+ use_simple = draw (booleans ())
68+
69+ if use_simple :
70+ # Simple ASCII strings that shrink well
71+ s = draw (text (string .ascii_letters + string .digits + ' ' , min_size = 0 , max_size = 3 ))
72+ else :
73+ # Complex unicode for thorough testing
74+ # Only filter out surrogates which are invalid in Python strings
75+ safe_chars = characters (
76+ blacklist_categories = ['Cs' ], # No surrogates
77+ max_codepoint = 0xFFFF # Stay within BMP for simplicity
78+ )
79+ s = '' .join (draw (lists (safe_chars , min_size = 0 , max_size = 3 )))
80+
81+ return ast .Constant (value = s )
6382
6483
6584@composite
66- def Bytes (draw ) -> ast .Bytes :
67- return ast .Bytes ( draw (binary (max_size = 3 )))
85+ def Bytes (draw ) -> ast .Constant :
86+ return ast .Constant ( value = draw (binary (max_size = 3 )))
6887
6988
7089@composite
@@ -88,40 +107,57 @@ def Set(draw, expression) -> ast.Set:
88107@composite
89108def Dict (draw , expression ) -> ast .Dict :
90109 d = draw (dictionaries (expression , expression , min_size = 0 , max_size = 3 ))
91- return ast .Dict (keys = list (d .keys ()), values = list (d .values ()))
110+ items = list (d .items ()) # Get items as pairs to maintain key-value relationships
111+ return ast .Dict (keys = [k for k , v in items ], values = [v for k , v in items ])
92112
93113
94114@composite
95- def NameConstant (draw ) -> ast .NameConstant :
96- return ast .NameConstant ( draw (sampled_from ([None , True , False ])))
115+ def NameConstant (draw ) -> ast .Constant :
116+ return ast .Constant ( value = draw (sampled_from ([None , False , True ])))
97117
98118
99119# endregion
100120
101121@composite
102122def name (draw ) -> SearchStrategy :
103- other_id_start = [chr (i ) for i in [0x1885 , 0x1886 , 0x2118 , 0x212E , 0x309B , 0x309C ]]
104- other_id_continue = [chr (i ) for i in [0x00B7 , 0x0387 , 0x19DA ] + list (range (1369 , 1371 + 1 ))]
105-
106- xid_start = draw (characters (whitelist_categories = ['Lu' , 'Ll' , 'Lt' , 'Lm' , 'Lo' , 'Nl' ], whitelist_characters = ['_' ] + other_id_start , blacklist_characters = ' ' ))
107- xid_continue = draw (
108- lists (
109- characters (whitelist_categories = ['Lu' , 'Ll' , 'Lt' , 'Lm' , 'Lo' , 'Nl' , 'Mn' , 'Mc' , 'Nd' , 'Pc' ], whitelist_characters = ['_' ] + other_id_start + other_id_continue , blacklist_characters = ' ' ),
110- min_size = 0 ,
111- max_size = 2
123+ # Choose between simple and complex, but in a way that shrinks to simple
124+ use_unicode = draw (booleans ())
125+
126+ if not use_unicode :
127+ # Simple ASCII names (will be the shrunk case)
128+ first = draw (sampled_from (string .ascii_letters + '_' ))
129+ rest = draw (text (string .ascii_letters + string .digits + '_' , min_size = 0 , max_size = 2 ))
130+ n = first + rest
131+ else :
132+ # Complex unicode names (for thorough testing)
133+ other_id_start = [chr (i ) for i in [0x1885 , 0x1886 , 0x2118 , 0x212E , 0x309B , 0x309C ]]
134+ other_id_continue = [chr (i ) for i in [0x00B7 , 0x0387 , 0x19DA ] + list (range (1369 , 1371 + 1 ))]
135+
136+ xid_start = draw (characters (whitelist_categories = ['Lu' , 'Ll' , 'Lt' , 'Lm' , 'Lo' , 'Nl' ],
137+ whitelist_characters = ['_' ] + other_id_start ,
138+ blacklist_characters = ' ' ))
139+ xid_continue = draw (
140+ lists (
141+ characters (whitelist_categories = ['Lu' , 'Ll' , 'Lt' , 'Lm' , 'Lo' , 'Nl' , 'Mn' , 'Mc' , 'Nd' , 'Pc' ],
142+ whitelist_characters = ['_' ] + other_id_start + other_id_continue ,
143+ blacklist_characters = ' ' ),
144+ min_size = 0 ,
145+ max_size = 2
146+ )
112147 )
113- )
148+ n = xid_start + '' .join (xid_continue )
149+ n = unicodedata .normalize ('NFKC' , n )
114150
115- n = xid_start + '' .join (xid_continue )
151+ # Handle keywords by prefixing with underscore
152+ if n in keyword .kwlist :
153+ return '_' + n
116154
117- normalised = unicodedata .normalize ('NFKC' , n )
118- assume (normalised not in keyword .kwlist )
119- assume (' ' not in normalised )
120- try :
121- ast .parse (normalised , mode = 'eval' )
122- except Exception :
155+ # Validate it's a proper identifier
156+ if not n .isidentifier ():
157+ # Shouldn't happen with our generation, but just in case
123158 assume (False )
124- return normalised
159+
160+ return n
125161
126162
127163@composite
@@ -131,7 +167,7 @@ def Name(draw, ctx=ast.Load) -> ast.Name:
131167
132168@composite
133169def UnaryOp (draw , expression ) -> ast .UnaryOp :
134- op = draw (sampled_from ([ast .USub (), ast .UAdd (), ast .Not (), ast .Invert ()]))
170+ op = draw (sampled_from ([ast .UAdd (), ast .USub (), ast .Not (), ast .Invert ()]))
135171 l = draw (expression )
136172 return ast .UnaryOp (op , l )
137173
@@ -152,20 +188,19 @@ def BinOp(draw, expression) -> ast.BinOp:
152188 op = draw (
153189 sampled_from (
154190 [
155- ast .Add (),
191+ ast .Add (), # Most common arithmetic
156192 ast .Sub (),
157193 ast .Mult (),
158194 ast .Div (),
195+ ast .Mod (), # Common operations
159196 ast .FloorDiv (),
160- ast .Mod (),
161- ast .Pow (),
162- ast .LShift (),
163- ast .RShift (),
197+ ast .Pow (), # Less common
198+ ast .BitAnd (), # Bitwise operations
164199 ast .BitOr (),
165200 ast .BitXor (),
166- ast .BitOr (),
167- ast .BitAnd (),
168- ast .MatMult ()
201+ ast .LShift (),
202+ ast .RShift (),
203+ ast .MatMult () # Least common (matrix mult)
169204 ]
170205 )
171206 )
@@ -209,7 +244,8 @@ def IfExp(draw, expression) -> ast.IfExp:
209244@composite
210245def Attribute (draw , expression ) -> ast .Attribute :
211246 value = draw (expression )
212- attr = draw (text (alphabet = string .ascii_letters , min_size = 1 , max_size = 3 ).filter (lambda n : n not in keyword .kwlist ))
247+ # Use our improved name strategy for attributes too
248+ attr = draw (name ())
213249 return ast .Attribute (value , attr , ast .Load ())
214250
215251@composite
@@ -229,21 +265,21 @@ def Await(draw, expression) -> ast.Await:
229265
230266@composite
231267def Index (draw , expression ) -> ast .Index :
232- return ast .Index (draw (Ellipsis () | expression ))
268+ return ast .Index (draw (one_of ( Ellipsis (), expression ) ))
233269
234270
235271@composite
236272def Slice (draw , expression ) -> ast .Slice :
237273 return ast .Slice (
238274 lower = draw (expression ),
239275 upper = draw (expression ),
240- step = draw (none () | expression )
276+ step = draw (one_of ( none (), expression ) )
241277 )
242278
243279
244280@composite
245- def Ellipsis (draw ) -> ast .Ellipsis :
246- return ast .Ellipsis ( )
281+ def Ellipsis (draw ) -> ast .Constant :
282+ return ast .Constant ( value = ... )
247283
248284
249285@composite
@@ -266,7 +302,7 @@ def ExtSlice(draw, expression) -> ast.ExtSlice:
266302def Subscript (draw , expression , ctx = ast .Load ) -> ast .Subscript :
267303 return ast .Subscript (
268304 value = draw (expression ),
269- slice = draw (Index (expression ) | Slice (expression ) | ExtSlice (expression )),
305+ slice = draw (one_of ( Index (expression ), Slice (expression ), ExtSlice (expression ) )),
270306 ctx = ctx ()
271307 )
272308
@@ -275,7 +311,7 @@ def Subscript(draw, expression, ctx=ast.Load) -> ast.Subscript:
275311def arg (draw , allow_annotation = True ) -> ast .arg :
276312
277313 if allow_annotation :
278- annotation = draw (none () | expression ())
314+ annotation = draw (one_of ( none (), expression () ))
279315 else :
280316 annotation = None
281317
@@ -293,10 +329,10 @@ def arguments(draw, for_lambda=False) -> ast.arguments:
293329 args = draw (lists (arg (allow_annotation ), max_size = 2 ))
294330 posonlyargs = draw (lists (arg (allow_annotation ), max_size = 2 ))
295331 kwonlyargs = draw (lists (arg (allow_annotation ), max_size = 2 ))
296- vararg = draw (none () | arg (allow_annotation ))
297- kwarg = draw (none () | arg (allow_annotation ))
332+ vararg = draw (one_of ( none (), arg (allow_annotation ) ))
333+ kwarg = draw (one_of ( none (), arg (allow_annotation ) ))
298334 defaults = []
299- kw_defaults = draw (lists (none () | expression (), max_size = len (kwonlyargs ), min_size = len (kwonlyargs )))
335+ kw_defaults = draw (lists (one_of ( none (), expression () ), max_size = len (kwonlyargs ), min_size = len (kwonlyargs )))
300336 return ast .arguments (
301337 posonlyargs = posonlyargs ,
302338 args = args ,
0 commit comments