Skip to content

Commit d4caf3e

Browse files
authored
Merge pull request #131 from dflook/windows-test
Better unicode support
2 parents b5e9446 + cdcadc8 commit d4caf3e

15 files changed

Lines changed: 608 additions & 24 deletions
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
# Markdownlint configuration for corpus test reports
2+
# Disable line length rule since tables naturally exceed 80 characters
3+
MD013: false

.github/github_sucks

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
2+
3+

.github/workflows/test.yaml

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,3 +30,47 @@ jobs:
3030
image: danielflook/python-minifier-build:${{ matrix.python }}-2024-09-15
3131
run: |
3232
tox -r -e $(echo "${{ matrix.python }}" | tr -d .)
33+
34+
test-windows:
35+
name: Test Windows
36+
runs-on: windows-2022
37+
strategy:
38+
fail-fast: false
39+
matrix:
40+
python-version: ['2.7', '3.6', '3.7', '3.8', '3.9', '3.10', '3.11', '3.12', '3.13']
41+
steps:
42+
- name: Checkout
43+
uses: actions/checkout@v4.2.2
44+
with:
45+
fetch-depth: 1
46+
show-progress: false
47+
persist-credentials: false
48+
49+
- name: Set up Python
50+
if: ${{ matrix.python-version != '2.7' }}
51+
uses: actions/setup-python@v5
52+
with:
53+
python-version: ${{ matrix.python-version }}
54+
55+
- name: Set up Python
56+
if: ${{ matrix.python-version == '2.7' }}
57+
uses: LizardByte/actions/actions/setup_python@eddc8fc8b27048e25040e37e3585bd3ef9a968ed # master
58+
with:
59+
python-version: ${{ matrix.python-version }}
60+
61+
- name: Set version statically
62+
shell: powershell
63+
run: |
64+
$content = Get-Content setup.py
65+
$content = $content -replace "setup_requires=.*", "version='0.0.0',"
66+
$content = $content -replace "use_scm_version=.*", ""
67+
Set-Content setup.py $content
68+
69+
- name: Install tox
70+
run: |
71+
python -m pip install --upgrade pip
72+
pip install tox
73+
74+
- name: Run tests
75+
run: |
76+
tox -c tox-windows.ini -r -e ${{ matrix.python-version }}

.github/workflows/test_corpus.yaml

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -144,4 +144,11 @@ jobs:
144144
volumes: |
145145
/corpus-results:/corpus-results
146146
run: |
147-
python3.13 workflow/corpus_test/generate_report.py /corpus-results ${{ inputs.ref }} ${{ steps.ref.outputs.commit }} ${{ inputs.base-ref }} ${{ steps.base-ref.outputs.commit }} >> $GITHUB_STEP_SUMMARY
147+
python3.13 workflow/corpus_test/generate_report.py /corpus-results ${{ inputs.ref }} ${{ steps.ref.outputs.commit }} ${{ inputs.base-ref }} ${{ steps.base-ref.outputs.commit }} | tee -a $GITHUB_STEP_SUMMARY > report.md
148+
149+
- name: Lint Report
150+
uses: DavidAnson/markdownlint-cli2-action@05f32210e84442804257b2a6f20b273450ec8265 # v19
151+
continue-on-error: true
152+
with:
153+
config: '.config/corpus_report.markdownlint.yaml'
154+
globs: 'report.md'

corpus_test/generate_report.py

Lines changed: 39 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,33 @@
99

1010
ENHANCED_REPORT = os.environ.get('ENHANCED_REPORT', True)
1111

12+
def is_recursion_error(python_version: str, result: Result) -> bool:
13+
"""
14+
Check if the result is a recursion error
15+
"""
16+
if result.outcome == 'RecursionError':
17+
return True
18+
19+
if python_version in ['2.7', '3.3', '3.4']:
20+
# In these versions, the recursion error is raised as an Exception
21+
return result.outcome.startswith('Exception: maximum recursion depth exceeded')
22+
23+
return False
24+
25+
def is_syntax_error(python_version: str, result: Result) -> bool:
26+
"""
27+
Check if the result is a syntax error
28+
"""
29+
if result.outcome == 'SyntaxError':
30+
return True
31+
32+
if python_version == '2.7' and result.outcome == 'Exception: compile() expected string without null bytes':
33+
return True
34+
35+
if python_version != '2.7' and result.outcome == 'Exception: source code string cannot contain null bytes':
36+
return True
37+
38+
return False
1239

1340
@dataclass
1441
class ResultSet:
@@ -45,11 +72,11 @@ def add(self, result: Result):
4572
if result.original_size < result.minified_size:
4673
self.larger_than_original_count += 1
4774

48-
if result.outcome == 'RecursionError':
75+
if is_recursion_error(self.python_version, result):
4976
self.recursion_error_count += 1
5077
elif result.outcome == 'UnstableMinification':
5178
self.unstable_minification_count += 1
52-
elif result.outcome.startswith('Exception'):
79+
elif result.outcome.startswith('Exception') and not is_syntax_error(self.python_version, result):
5380
self.exception_count += 1
5481

5582
@property
@@ -74,13 +101,13 @@ def larger_than_original(self) -> Iterable[Result]:
74101
def recursion_error(self) -> Iterable[Result]:
75102
"""Return those entries that have a recursion error"""
76103
for result in self.entries.values():
77-
if result.outcome == 'RecursionError':
104+
if is_recursion_error(self.python_version, result):
78105
yield result
79106

80107
def exception(self) -> Iterable[Result]:
81108
"""Return those entries that have an exception"""
82109
for result in self.entries.values():
83-
if result.outcome.startswith('Exception'):
110+
if result.outcome.startswith('Exception') and not is_syntax_error(self.python_version, result) and not is_recursion_error(self.python_version, result):
84111
yield result
85112

86113
def unstable_minification(self) -> Iterable[Result]:
@@ -184,7 +211,7 @@ def format_difference(compare: Iterable[Result], base: Iterable[Result]) -> str:
184211
return s
185212

186213

187-
def report_larger_than_original(results_dir: str, python_versions: str, minifier_sha: str) -> str:
214+
def report_larger_than_original(results_dir: str, python_versions: list[str], minifier_sha: str) -> str:
188215
yield '''
189216
## Larger than original
190217
@@ -203,7 +230,7 @@ def report_larger_than_original(results_dir: str, python_versions: str, minifier
203230
yield f'| {entry.corpus_entry} | {entry.original_size} | {entry.minified_size} ({entry.minified_size - entry.original_size:+}) |'
204231

205232

206-
def report_unstable(results_dir: str, python_versions: str, minifier_sha: str) -> str:
233+
def report_unstable(results_dir: str, python_versions: list[str], minifier_sha: str) -> str:
207234
yield '''
208235
## Unstable
209236
@@ -222,7 +249,7 @@ def report_unstable(results_dir: str, python_versions: str, minifier_sha: str) -
222249
yield f'| {entry.corpus_entry} | {python_version} | {entry.original_size} |'
223250

224251

225-
def report_exceptions(results_dir: str, python_versions: str, minifier_sha: str) -> str:
252+
def report_exceptions(results_dir: str, python_versions: list[str], minifier_sha: str) -> str:
226253
yield '''
227254
## Exceptions
228255
@@ -244,10 +271,10 @@ def report_exceptions(results_dir: str, python_versions: str, minifier_sha: str)
244271
yield f'| {entry.corpus_entry} | {python_version} | {entry.outcome} |'
245272

246273
if not exceptions_found:
247-
yield ' None | | |'
274+
yield '| None | | |'
248275

249276

250-
def report_larger_than_base(results_dir: str, python_versions: str, minifier_sha: str, base_sha: str) -> str:
277+
def report_larger_than_base(results_dir: str, python_versions: list[str], minifier_sha: str, base_sha: str) -> str:
251278
yield '''
252279
## Top 10 Larger than base
253280
@@ -277,7 +304,7 @@ def report_larger_than_base(results_dir: str, python_versions: str, minifier_sha
277304
yield '| N/A | N/A | N/A |'
278305

279306

280-
def report_slowest(results_dir: str, python_versions: str, minifier_sha: str) -> str:
307+
def report_slowest(results_dir: str, python_versions: list[str], minifier_sha: str) -> str:
281308
yield '''
282309
## Top 10 Slowest
283310
@@ -360,15 +387,15 @@ def report(results_dir: str, minifier_ref: str, minifier_sha: str, base_ref: str
360387
f'| {format_difference(summary.larger_than_original(), base_summary.larger_than_original())} ' +
361388
f'| {format_difference(summary.recursion_error(), base_summary.recursion_error())} ' +
362389
f'| {format_difference(summary.unstable_minification(), base_summary.unstable_minification())} ' +
363-
f'| {format_difference(summary.exception(), base_summary.exception())} '
390+
f'| {format_difference(summary.exception(), base_summary.exception())} |'
364391
)
365392

366393
if ENHANCED_REPORT:
367394
yield from report_larger_than_original(results_dir, ['3.13'], minifier_sha)
368395
yield from report_larger_than_base(results_dir, ['3.13'], minifier_sha, base_sha)
369396
yield from report_slowest(results_dir, ['3.13'], minifier_sha)
370397
yield from report_unstable(results_dir, ['2.7', '3.3', '3.4', '3.5', '3.6', '3.7', '3.8', '3.9', '3.10', '3.11', '3.12', '3.13'], minifier_sha)
371-
yield from report_exceptions(results_dir, ['3.6', '3.7', '3.8', '3.9', '3.10', '3.11', '3.12', '3.13'], minifier_sha)
398+
yield from report_exceptions(results_dir, ['2.7', '3.3', '3.4', '3.5', '3.6', '3.7', '3.8', '3.9', '3.10', '3.11', '3.12', '3.13'], minifier_sha)
372399

373400

374401
def main():

corpus_test/generate_results.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,10 @@ def minify_corpus_entry(corpus_path, corpus_entry):
5050
# Source is too deep
5151
result.outcome = 'RecursionError'
5252

53+
except ValueError:
54+
# Source is not valid Python
55+
result.outcome = 'ValueError'
56+
5357
except SyntaxError:
5458
# Source not valid for this version of Python
5559
result.outcome = 'SyntaxError'

src/python_minifier/__init__.pyi

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
import ast
22

3-
from typing import Any, AnyStr, List, Optional, Text, Union
3+
from typing import Any, List, Optional, Text, Union
44

55
from .transforms.remove_annotations_options import RemoveAnnotationsOptions as RemoveAnnotationsOptions
66

@@ -10,7 +10,7 @@ class UnstableMinification(RuntimeError):
1010

1111

1212
def minify(
13-
source: AnyStr,
13+
source: Union[str, bytes],
1414
filename: Optional[str] = ...,
1515
remove_annotations: Union[bool, RemoveAnnotationsOptions] = ...,
1616
remove_pass: bool = ...,
@@ -36,7 +36,7 @@ def unparse(module: ast.Module) -> Text: ...
3636

3737

3838
def awslambda(
39-
source: AnyStr,
39+
source: Union[str, bytes],
4040
filename: Optional[Text] = ...,
4141
entrypoint: Optional[Text] = ...
4242
) -> Text: ...

src/python_minifier/__main__.py

Lines changed: 26 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,27 @@
77
from python_minifier import minify
88
from python_minifier.transforms.remove_annotations_options import RemoveAnnotationsOptions
99

10+
# Python 2.7 compatibility for UTF-8 file writing
11+
if sys.version_info[0] == 2:
12+
import codecs
13+
def open_utf8(filename, mode):
14+
return codecs.open(filename, mode, encoding='utf-8')
15+
else:
16+
def open_utf8(filename, mode):
17+
return open(filename, mode, encoding='utf-8')
18+
19+
def safe_stdout_write(text):
20+
"""Write text to stdout with proper encoding handling."""
21+
try:
22+
sys.stdout.write(text)
23+
except UnicodeEncodeError:
24+
# Fallback: encode to UTF-8 and write to stdout.buffer (Python 3) or sys.stdout (Python 2)
25+
if sys.version_info[0] >= 3 and hasattr(sys.stdout, 'buffer'):
26+
sys.stdout.buffer.write(text.encode('utf-8'))
27+
else:
28+
# Python 2.7 or no buffer attribute - write UTF-8 encoded bytes
29+
sys.stdout.write(text.encode('utf-8'))
30+
1031

1132
if sys.version_info >= (3, 8):
1233
from importlib import metadata
@@ -53,10 +74,10 @@ def main():
5374
source = sys.stdin.buffer.read() if sys.version_info >= (3, 0) else sys.stdin.read()
5475
minified = do_minify(source, 'stdin', args)
5576
if args.output:
56-
with open(args.output, 'w') as f:
77+
with open_utf8(args.output, 'w') as f:
5778
f.write(minified)
5879
else:
59-
sys.stdout.write(minified)
80+
safe_stdout_write(minified)
6081

6182
else:
6283
# minify source paths
@@ -70,13 +91,13 @@ def main():
7091
minified = do_minify(source, path, args)
7192

7293
if args.in_place:
73-
with open(path, 'w') as f:
94+
with open_utf8(path, 'w') as f:
7495
f.write(minified)
7596
elif args.output:
76-
with open(args.output, 'w') as f:
97+
with open_utf8(args.output, 'w') as f:
7798
f.write(minified)
7899
else:
79-
sys.stdout.write(minified)
100+
safe_stdout_write(minified)
80101

81102

82103
def parse_args():

src/python_minifier/module_printer.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -28,11 +28,15 @@ def __call__(self, module):
2828
assert isinstance(module, ast.Module)
2929

3030
self.visit_Module(module)
31-
return str(self.printer).rstrip('\n' + self.indent_char + ';')
31+
# On Python 2.7, preserve unicode strings to avoid encoding issues
32+
code = unicode(self.printer) if sys.version_info[0] < 3 else str(self.printer)
33+
return code.rstrip('\n' + self.indent_char + ';')
3234

3335
@property
3436
def code(self):
35-
return str(self.printer).rstrip('\n' + self.indent_char + ';')
37+
# On Python 2.7, preserve unicode strings to avoid encoding issues
38+
code = unicode(self.printer) if sys.version_info[0] < 3 else str(self.printer)
39+
return code.rstrip('\n' + self.indent_char + ';')
3640

3741
# region Simple Statements
3842

src/python_minifier/token_printer.py

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -91,14 +91,22 @@ def __init__(self, prefer_single_line=False, allow_invalid_num_warnings=False):
9191
self._prefer_single_line = prefer_single_line
9292
self._allow_invalid_num_warnings = allow_invalid_num_warnings
9393

94-
self._code = ''
94+
# Initialize as unicode string on Python 2.7 to handle Unicode content
95+
if sys.version_info[0] < 3:
96+
self._code = u''
97+
else:
98+
self._code = ''
9599
self.indent = 0
96100
self.unicode_literals = False
97101
self.previous_token = TokenTypes.NoToken
98102

99103
def __str__(self):
100104
"""Return the output code."""
101105
return self._code
106+
107+
def __unicode__(self):
108+
"""Return the output code as unicode (for Python 2.7 compatibility)."""
109+
return self._code
102110

103111
def identifier(self, name):
104112
"""Add an identifier to the output code."""

0 commit comments

Comments
 (0)