|
17 | 17 |
|
18 | 18 | ./check-submodule.py compute-ci-metrics -p LOCAL_DIR -l TREE_SITTER_LANGUAGE |
19 | 19 |
|
| 20 | +To compare metrics and retrieve the structural JSON of differences |
| 21 | +in addition to the files containing the minimal tests: |
20 | 22 |
|
21 | | -To compare metrics and retrieve minimal tests: |
22 | | -
|
23 | | -1. Install deepdiff: pip install deepdiff |
| 23 | +1. Install json-diff from here: https://github.com/Luni-4/json-diff/releases |
| 24 | +2. Install json-minimal-tests from here: https://github.com/Luni-4/json-minimal-tests/releases |
24 | 25 |
|
25 | 26 | ./check-submodule.py compare-metrics -l TREE_SITTER_LANGUAGE |
| 27 | +
|
| 28 | +NOTE: Add the paths of the software above to the PATH environment variable! |
26 | 29 | """ |
27 | 30 |
|
28 | 31 | import argparse |
29 | | -import asyncio |
30 | | -import json |
31 | | -import math |
32 | 32 | import pathlib |
33 | | -import re |
34 | 33 | import subprocess |
35 | 34 | import sys |
36 | 35 | import typing as T |
37 | 36 |
|
38 | | -import deepdiff |
39 | | - |
40 | 37 | # The /tmp directory will be used as workdir |
41 | 38 | WORKDIR = pathlib.Path("/tmp") |
42 | 39 | # Suffix for the directory containing the old metrics |
|
69 | 66 | "tree-sitter-python": ["*.py"], |
70 | 67 | } |
71 | 68 |
|
72 | | - |
73 | | -class JsonDiff: |
74 | | - def __init__( |
75 | | - self, |
76 | | - old_metrics: T.List[pathlib.Path], |
77 | | - new_metrics: T.List[pathlib.Path], |
78 | | - compare_dir: pathlib.Path, |
79 | | - max_workers: int, |
80 | | - ): |
81 | | - self.compare_dir = compare_dir |
82 | | - self.max_workers = max_workers |
83 | | - |
84 | | - # Max number of file paths in a sublist |
85 | | - n = math.ceil(len(old_metrics) / max_workers) |
86 | | - |
87 | | - # Assign a certain number of filepaths to each worker |
88 | | - self.workers_filepaths = [ |
89 | | - zip(old_metrics[i * n : (i + 1) * n], new_metrics[i * n : (i + 1) * n]) |
90 | | - for i in range((len(old_metrics) + n - 1) // n) |
91 | | - ] |
92 | | - |
93 | | - # Run asynchronous comparisons between json files. |
94 | | - async def diff(self): |
95 | | - # Save minimal tests in the chosen directory. |
96 | | - def _worker(worker_list: T.List[pathlib.Path]): |
97 | | - for old_filename, new_filename in worker_list: |
98 | | - |
99 | | - # Compute minimal tests |
100 | | - compute_minimal_tests(old_filename, new_filename, self.compare_dir) |
101 | | - |
102 | | - # Define the max number of coroutines used to compare json files |
103 | | - await asyncio.gather( |
104 | | - *(_worker(worker_filepaths) for worker_filepaths in self.workers_filepaths) |
105 | | - ) |
106 | | - |
107 | | - |
108 | 69 | # Run a subprocess. |
109 | 70 | def run_subprocess(cmd: str, *args: T.Union[str, pathlib.Path]) -> None: |
110 | 71 | subprocess.run([cmd, *args]) |
@@ -138,106 +99,6 @@ def run_rca( |
138 | 99 | ) |
139 | 100 |
|
140 | 101 |
|
141 | | -# Find the difference between the two json metric files. |
142 | | -def get_json_diff( |
143 | | - first_file: pathlib.Path, second_file: pathlib.Path |
144 | | -) -> T.Tuple[T.Dict[str, T.Any], T.Dict[str, T.Any]]: |
145 | | - with open(first_file, "r") as input_file: |
146 | | - t1 = json.load(input_file) |
147 | | - |
148 | | - with open(second_file, "r") as input_file: |
149 | | - t2 = json.load(input_file) |
150 | | - |
151 | | - diff = deepdiff.DeepDiff(t1, t2, ignore_order=True) |
152 | | - |
153 | | - return (t1, diff) |
154 | | - |
155 | | - |
156 | | -# Save the filename and the list of code spans associated to the differences |
157 | | -# in a dictionary. |
158 | | -def get_metrics_diff_span( |
159 | | - first_json: T.Dict[str, T.Any], diff: T.Dict[str, T.Any] |
160 | | -) -> T.Dict[str, T.List[T.Tuple[int, int]]]: |
161 | | - # Search for this pattern in the differences object |
162 | | - prog = re.compile(r"\['spaces'\]\[\d+\]") |
163 | | - |
164 | | - output = {"name": first_json["name"], "spaces_spans": []} |
165 | | - |
166 | | - for value in diff["values_changed"]: |
167 | | - val = "".join(prog.findall(value)) |
168 | | - # Subtracting one because files starts from 0 |
169 | | - start_line = eval(f'first_json{val}["start_line"]') - 1 |
170 | | - end_line = eval(f'first_json{val}["end_line"]') |
171 | | - output["spaces_spans"].append((start_line, end_line)) |
172 | | - |
173 | | - # Print the path of the repository file containing the differences |
174 | | - print(first_json["name"]) |
175 | | - |
176 | | - return output |
177 | | - |
178 | | - |
179 | | -# Dump minimal tests code in an output file. |
180 | | -def dump_minimal_tests( |
181 | | - code_spans_object: T.Dict[str, T.List[T.Tuple[int, int]]], |
182 | | - new_filename: pathlib.Path, |
183 | | - compare_dir: pathlib.Path, |
184 | | -) -> None: |
185 | | - # Remove duplicates from the list of spans |
186 | | - spans_list = dict.fromkeys(code_spans_object["spaces_spans"]) |
187 | | - |
188 | | - # Get filename |
189 | | - filename = code_spans_object["name"] |
190 | | - |
191 | | - # Read code spans from the input source code |
192 | | - with open(filename, "r", encoding="utf-8", errors="ignore") as input_file: |
193 | | - # Decode only utf-8 source code files |
194 | | - lines = input_file.readlines() |
195 | | - |
196 | | - # Write spans to output file |
197 | | - output_path = compare_dir / new_filename.stem |
198 | | - with open(output_path, "w") as output_file: |
199 | | - for span in spans_list: |
200 | | - output_file.write("Minimal test:\n") |
201 | | - output_file.write("".join(lines[span[0] : span[1]]) + "\n") |
202 | | - |
203 | | - |
204 | | -# Compute minimal tests. |
205 | | -def compute_minimal_tests( |
206 | | - old_filename: pathlib.Path, new_filename: pathlib.Path, compare_dir: pathlib.Path |
207 | | -) -> None: |
208 | | - # Find the difference between the two json files with the aim of |
209 | | - # getting some minimal tests |
210 | | - first_json, diff = get_json_diff(old_filename, new_filename) |
211 | | - |
212 | | - # If two json files are identical, return |
213 | | - if not diff: |
214 | | - return |
215 | | - |
216 | | - # Retrieve the code spans associated to the differences |
217 | | - code_spans_object = get_metrics_diff_span(first_json, diff) |
218 | | - |
219 | | - # Dump the minimal tests retrived from code spans on a file with the |
220 | | - # same extension of the analyzed source code |
221 | | - dump_minimal_tests(code_spans_object, new_filename, compare_dir) |
222 | | - |
223 | | - |
224 | | -# Save json files of differences and minimal tests in the chosen directory |
225 | | -# concurrently. |
226 | | -def save_diff_files( |
227 | | - old_dir: pathlib.Path, new_dir: pathlib.Path, compare_dir: pathlib.Path |
228 | | -) -> None: |
229 | | - # Get all metric files in old and new directories |
230 | | - old_paths = sorted(pathlib.Path(old_dir).glob("*.json")) |
231 | | - new_paths = sorted(pathlib.Path(new_dir).glob("*.json")) |
232 | | - |
233 | | - # Create a new coroutines handler |
234 | | - json_diff = JsonDiff(old_paths, new_paths, compare_dir, 4) |
235 | | - |
236 | | - # Find the differences between json files and save the results in a |
237 | | - # chosen directory asynchronously |
238 | | - asyncio.run(json_diff.diff()) |
239 | | - |
240 | | - |
241 | 102 | # Compute continuous integration metrics before and after a |
242 | 103 | # tree-sitter-language update. |
243 | 104 | def compute_ci_metrics(args: argparse.Namespace) -> None: |
@@ -342,8 +203,13 @@ def compare_metrics(args: argparse.Namespace) -> None: |
342 | 203 | # Create compare directory |
343 | 204 | compare_dir.mkdir(parents=True, exist_ok=True) |
344 | 205 |
|
345 | | - # Save files of differences and minimal tests in the chosen directory |
346 | | - save_diff_files(old_dir, new_dir, compare_dir) |
| 206 | + # Get JSON of differences |
| 207 | + print("\nSave JSON of differences in", compare_dir) |
| 208 | + run_subprocess("json-diff-cli", "--raw-json", "-o", compare_dir, old_dir, new_dir) |
| 209 | + |
| 210 | + # Get minimal tests |
| 211 | + print("\nSave minimal tests in", compare_dir) |
| 212 | + run_subprocess("json-minimal-tests", "-o", compare_dir, old_dir, new_dir) |
347 | 213 |
|
348 | 214 |
|
349 | 215 | def main() -> None: |
|
0 commit comments