-
-
Notifications
You must be signed in to change notification settings - Fork 52
Expand file tree
/
Copy pathgenerate_results.py
More file actions
151 lines (107 loc) · 4.86 KB
/
generate_results.py
File metadata and controls
151 lines (107 loc) · 4.86 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
import argparse
import datetime
import gzip
import os
import sys
import time
import logging
import python_minifier
from result import Result, ResultWriter
try:
RE = RecursionError
except NameError:
# Python 2
class RE(Exception):
pass
def minify_corpus_entry(corpus_path, corpus_entry):
"""
Minify a single entry in the corpus and return the result
:param str corpus_path: Path to the corpus
:param str corpus_entry: A file in the corpus
:rtype: Result
"""
if os.path.isfile(os.path.join(corpus_path, corpus_entry + '.py.gz')):
with gzip.open(os.path.join(corpus_path, corpus_entry + '.py.gz'), 'rb') as f:
source = f.read()
else:
with open(os.path.join(corpus_path, corpus_entry), 'rb') as f:
source = f.read()
result = Result(corpus_entry, len(source), 0, 0, '')
start_time = time.time()
try:
minified = python_minifier.minify(source, filename=corpus_entry)
end_time = time.time()
result.time = end_time - start_time
result.minified_size = len(minified)
result.outcome = 'Minified'
except RE:
# Source is too deep
result.outcome = 'RecursionError'
except SyntaxError:
# Source not valid for this version of Python
result.outcome = 'SyntaxError'
except python_minifier.UnstableMinification:
# Minification does not equal original source
end_time = time.time()
result.time = end_time - start_time
result.outcome = 'UnstableMinification'
except Exception as exception:
result.outcome = 'Exception: ' + str(exception)
return result
def corpus_test(corpus_path, results_path, sha, regenerate_results):
"""
Test the minifier on the entire corpus
The results are written to a csv file in the results directory.
The name of the file is results_<python_version>_<sha>.csv
If the file already exists and regenerate_results is False, the test is skipped.
:param str corpus_path: Path to the corpus
:param str results_path: Path to the results directory
:param str sha: The python-minifier sha we are testing
:param bool regenerate_results: Regenerate results even if they are present
"""
python_version = '.'.join([str(s) for s in sys.version_info[:2]])
log_path = 'results_' + python_version + '_' + sha + '.log'
print('Logging in GitHub Actions is absolute garbage. Logs are going to ' + log_path)
logging.basicConfig(filename=os.path.join(results_path, log_path), level=logging.DEBUG)
corpus_entries = [entry[:-len('.py.gz')] for entry in os.listdir(corpus_path)]
results_file_path = os.path.join(results_path, 'results_' + python_version + '_' + sha + '.csv')
if os.path.isfile(results_file_path):
logging.info('Results file already exists: %s', results_file_path)
if regenerate_results:
os.remove(results_file_path)
total_entries = len(corpus_entries)
logging.info('Testing python-minifier on %d entries' % total_entries)
tested_entries = 0
start_time = time.time()
next_checkpoint = time.time() + 60
with ResultWriter(results_file_path) as result_writer:
logging.info('%d results already present' % len(result_writer))
for entry in corpus_entries:
if entry in result_writer:
continue
logging.debug(entry)
result = minify_corpus_entry(corpus_path, entry)
result_writer.write(result)
tested_entries += 1
sys.stdout.flush()
if time.time() > next_checkpoint:
percent = len(result_writer) / total_entries * 100
time_per_entry = (time.time() - start_time) / tested_entries
entries_remaining = len(corpus_entries) - len(result_writer)
time_remaining = int(entries_remaining * time_per_entry)
logging.info('Tested %d/%d entries (%d%%) %s seconds remaining' % (len(result_writer), total_entries, percent, time_remaining))
sys.stdout.flush()
next_checkpoint = time.time() + 60
logging.info('Finished')
def bool_parse(value):
return value == 'true'
def main():
parser = argparse.ArgumentParser(description='Test python-minifier on a corpus of Python files.')
parser.add_argument('corpus_dir', type=str, help='Path to corpus directory', default='corpus')
parser.add_argument('results_dir', type=str, help='Path to results directory', default='results')
parser.add_argument('minifier_sha', type=str, help='The python-minifier sha we are testing')
parser.add_argument('regenerate_results', type=bool_parse, help='Regenerate results even if they are present', default='false')
args = parser.parse_args()
corpus_test(args.corpus_dir, args.results_dir, args.minifier_sha, args.regenerate_results)
if __name__ == '__main__':
main()