Skip to content

Commit 586d15e

Browse files
vdusekclaude
andauthored
ci: Sort generated model classes alphabetically with topological ordering (#727)
## Summary - Add alphabetical class sorting (with topological ordering to respect inheritance) to `postprocess_generated_models.py`, so that regeneration from a reordered OpenAPI spec produces minimal diffs. - Make `add_docs_group_decorators` idempotent so the script can be safely re-run. - Apply the new sorting to `_models.py`. ## Note - This is a workaround until [datamodel-code-generator#3087](koxudaxi/datamodel-code-generator#3087) is implemented (if ever). --------- Co-authored-by: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1 parent d52bc8d commit 586d15e

4 files changed

Lines changed: 3094 additions & 2458 deletions

File tree

pyproject.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -176,6 +176,7 @@ max-branches = 18
176176
addopts = "-r a --verbose"
177177
asyncio_default_fixture_loop_scope = "function"
178178
asyncio_mode = "auto"
179+
pythonpath = ["."]
179180
timeout = 1800
180181

181182
[tool.ty.environment]

scripts/postprocess_generated_models.py

Lines changed: 110 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -10,14 +10,19 @@ class alongside the canonical `ErrorType(StrEnum)`. This script removes the dupl
1010
rewires references to use `ErrorType`.
1111
- Missing @docs_group decorator: Adds `@docs_group('Models')` to all model classes for API
1212
reference documentation grouping, along with the required import.
13+
- Class sorting: Sorts class definitions alphabetically (with topological ordering to respect inheritance
14+
dependencies), so that regeneration from a reordered OpenAPI spec produces minimal diffs.
1315
"""
1416

1517
from __future__ import annotations
1618

19+
import heapq
1720
import re
21+
from collections import defaultdict
1822
from pathlib import Path
1923

2024
MODELS_PATH = Path(__file__).resolve().parent.parent / 'src' / 'apify_client' / '_models.py'
25+
DOCS_GROUP_DECORATOR = "@docs_group('Models')"
2126

2227
# Map of camelCase discriminator values to their snake_case equivalents.
2328
# Add new entries here as needed when the OpenAPI spec introduces new discriminators.
@@ -54,26 +59,118 @@ def deduplicate_error_type_enum(content: str) -> str:
5459

5560

5661
def add_docs_group_decorators(content: str) -> str:
57-
"""Add `@docs_group('Models')` decorator to all model classes and the required import."""
58-
# Add the import after the existing imports.
59-
content = re.sub(
60-
r'(from pydantic import [^\n]+\n)',
61-
r'\1\nfrom apify_client._docs import docs_group\n',
62-
content,
63-
)
64-
# Add @docs_group('Models') before every class definition.
65-
return re.sub(
66-
r'\nclass ',
67-
"\n@docs_group('Models')\nclass ",
68-
content,
69-
)
62+
"""Add `@docs_group('Models')` decorator to all model classes and the required import.
63+
64+
This function is idempotent — it skips the import and decorators if they already exist.
65+
"""
66+
# Add the import after the existing imports (only if not already present).
67+
if 'from apify_client._docs import docs_group' not in content:
68+
content = re.sub(
69+
r'(from pydantic import [^\n]+\n)',
70+
r'\1\nfrom apify_client._docs import docs_group\n',
71+
content,
72+
)
73+
# Add @docs_group('Models') before class definitions not already preceded by it.
74+
lines = content.split('\n')
75+
result: list[str] = []
76+
for line in lines:
77+
if line.startswith('class ') and (not result or result[-1] != DOCS_GROUP_DECORATOR):
78+
result.append(DOCS_GROUP_DECORATOR)
79+
result.append(line)
80+
return '\n'.join(result)
81+
82+
83+
def sort_classes(content: str) -> str:
84+
"""Sort class definitions alphabetically while respecting inheritance order.
85+
86+
Uses topological sorting so that base classes always appear before their subclasses, with alphabetical ordering as
87+
the tie-breaker. This makes the output deterministic regardless of the order in the OpenAPI spec, which keeps diffs
88+
minimal across regenerations.
89+
90+
Only the class statement's base-class expression creates an ordering constraint — field type annotations are lazy
91+
strings thanks to `from __future__ import annotations` and don't require forward declaration.
92+
"""
93+
lines = content.split('\n')
94+
95+
# Find where class blocks start (first @docs_group decorator).
96+
header_end = 0
97+
for i, line in enumerate(lines):
98+
if line == DOCS_GROUP_DECORATOR:
99+
header_end = i
100+
break
101+
102+
# Strip trailing blank lines from the header; we re-add spacing later.
103+
header_lines = lines[:header_end]
104+
while header_lines and not header_lines[-1].strip():
105+
header_lines.pop()
106+
header = '\n'.join(header_lines)
107+
108+
# Split the remainder into class blocks.
109+
# Each block starts with `@docs_group('Models')` on its own line.
110+
rest = '\n'.join(lines[header_end:])
111+
decorator_escaped = re.escape(DOCS_GROUP_DECORATOR)
112+
raw_blocks = re.split(rf'(?=^{decorator_escaped}$)', rest, flags=re.MULTILINE)
113+
blocks = [b.strip() for b in raw_blocks if b.strip()]
114+
115+
# Parse each block: extract class name and base-class dependencies.
116+
class_blocks: dict[str, str] = {}
117+
class_deps: dict[str, set[str]] = {}
118+
119+
for block in blocks:
120+
match = re.search(r'^class\s+(\w+)\(([^)]+)\):', block, re.MULTILINE)
121+
if not match:
122+
continue
123+
class_name = match.group(1)
124+
base_expr = match.group(2)
125+
126+
# Collect all capitalized identifiers from the base-class expression.
127+
referenced = set(re.findall(r'\b([A-Z]\w+)\b', base_expr))
128+
class_blocks[class_name] = block
129+
class_deps[class_name] = referenced
130+
131+
if len(class_blocks) != len(blocks):
132+
# Some blocks didn't match the class regex — fall back to avoid data loss.
133+
return content
134+
135+
all_names = set(class_blocks)
136+
137+
# Build the dependency graph (only in-file references matter).
138+
in_degree: dict[str, int] = {}
139+
reverse: dict[str, set[str]] = defaultdict(set)
140+
141+
for name, refs in class_deps.items():
142+
local_deps = (refs & all_names) - {name}
143+
in_degree[name] = len(local_deps)
144+
for dep in local_deps:
145+
reverse[dep].add(name)
146+
147+
# Kahn's algorithm with a min-heap for alphabetical tie-breaking.
148+
heap = sorted(name for name, degree in in_degree.items() if degree == 0)
149+
heapq.heapify(heap)
150+
151+
sorted_names: list[str] = []
152+
while heap:
153+
name = heapq.heappop(heap)
154+
sorted_names.append(name)
155+
for dependent in reverse[name]:
156+
in_degree[dependent] -= 1
157+
if in_degree[dependent] == 0:
158+
heapq.heappush(heap, dependent)
159+
160+
if len(sorted_names) != len(class_blocks):
161+
# Cycle detected — fall back to the original order to avoid data loss.
162+
return content
163+
164+
sorted_blocks = [class_blocks[name] for name in sorted_names]
165+
return header + '\n\n\n' + '\n\n\n'.join(sorted_blocks) + '\n'
70166

71167

72168
def main() -> None:
73169
content = MODELS_PATH.read_text()
74170
fixed = fix_discriminators(content)
75171
fixed = deduplicate_error_type_enum(fixed)
76172
fixed = add_docs_group_decorators(fixed)
173+
fixed = sort_classes(fixed)
77174

78175
if fixed != content:
79176
MODELS_PATH.write_text(fixed)

0 commit comments

Comments
 (0)