Skip to content

Commit 3d4ce3f

Browse files
vdusekclaude
andauthored
chore: replace custom sort postprocess with keep_model_order config (#734)
## Summary - Enables `keep_model_order = true` in `[tool.datamodel-codegen]` config. - Removes the custom `sort_classes` postprocess step and its 16 unit tests. - Verified by regenerating `src/apify_client/_models.py` — zero diff vs. the previous custom-sort output. `datamodel-code-generator`'s built-in `--keep-model-order` already does a topological sort with class-name ordering, which is equivalent to what our custom script was doing. Discussed with the maintainer in koxudaxi/datamodel-code-generator#3087. Co-authored-by: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
1 parent 1a2b642 commit 3d4ce3f

3 files changed

Lines changed: 1 addition & 361 deletions

File tree

pyproject.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -229,6 +229,7 @@ aliases = "datamodel_codegen_aliases.json"
229229
formatters = ["ruff-check", "ruff-format"]
230230
custom_file_header = "# generated by datamodel-codegen"
231231
disable_timestamp = true
232+
keep_model_order = true
232233

233234
[tool.uv]
234235
# Minimal defense against supply-chain atatcks.

scripts/postprocess_generated_models.py

Lines changed: 0 additions & 90 deletions
Original file line numberDiff line numberDiff line change
@@ -10,15 +10,11 @@ class alongside the canonical `ErrorType(StrEnum)`. This script removes the dupl
1010
rewires references to use `ErrorType`.
1111
- Missing @docs_group decorator: Adds `@docs_group('Models')` to all model classes for API
1212
reference documentation grouping, along with the required import.
13-
- Class sorting: Sorts class definitions alphabetically (with topological ordering to respect inheritance
14-
dependencies), so that regeneration from a reordered OpenAPI spec produces minimal diffs.
1513
"""
1614

1715
from __future__ import annotations
1816

19-
import heapq
2017
import re
21-
from collections import defaultdict
2218
from pathlib import Path
2319

2420
MODELS_PATH = Path(__file__).resolve().parent.parent / 'src' / 'apify_client' / '_models.py'
@@ -80,97 +76,11 @@ def add_docs_group_decorators(content: str) -> str:
8076
return '\n'.join(result)
8177

8278

83-
def sort_classes(content: str) -> str:
84-
"""Sort class definitions alphabetically while respecting inheritance order.
85-
86-
Uses topological sorting so that base classes always appear before their subclasses, with alphabetical ordering as
87-
the tie-breaker. This makes the output deterministic regardless of the order in the OpenAPI spec, which keeps diffs
88-
minimal across regenerations.
89-
90-
Only the class statement's base-class expression creates an ordering constraint — field type annotations are lazy
91-
strings thanks to `from __future__ import annotations` and don't require forward declaration.
92-
"""
93-
lines = content.split('\n')
94-
95-
# Find where class blocks start (first @docs_group decorator).
96-
header_end = 0
97-
for i, line in enumerate(lines):
98-
if line == DOCS_GROUP_DECORATOR:
99-
header_end = i
100-
break
101-
102-
# Strip trailing blank lines from the header; we re-add spacing later.
103-
header_lines = lines[:header_end]
104-
while header_lines and not header_lines[-1].strip():
105-
header_lines.pop()
106-
header = '\n'.join(header_lines)
107-
108-
# Split the remainder into class blocks.
109-
# Each block starts with `@docs_group('Models')` on its own line.
110-
rest = '\n'.join(lines[header_end:])
111-
decorator_escaped = re.escape(DOCS_GROUP_DECORATOR)
112-
raw_blocks = re.split(rf'(?=^{decorator_escaped}$)', rest, flags=re.MULTILINE)
113-
blocks = [b.strip() for b in raw_blocks if b.strip()]
114-
115-
# Parse each block: extract class name and base-class dependencies.
116-
class_blocks: dict[str, str] = {}
117-
class_deps: dict[str, set[str]] = {}
118-
119-
for block in blocks:
120-
match = re.search(r'^class\s+(\w+)\(([^)]+)\):', block, re.MULTILINE)
121-
if not match:
122-
continue
123-
class_name = match.group(1)
124-
base_expr = match.group(2)
125-
126-
# Collect all capitalized identifiers from the base-class expression.
127-
referenced = set(re.findall(r'\b([A-Z]\w+)\b', base_expr))
128-
class_blocks[class_name] = block
129-
class_deps[class_name] = referenced
130-
131-
if len(class_blocks) != len(blocks):
132-
# Some blocks didn't match the class regex — fall back to avoid data loss.
133-
return content
134-
135-
all_names = set(class_blocks)
136-
137-
# Build the dependency graph (only in-file references matter).
138-
in_degree: dict[str, int] = {}
139-
reverse: dict[str, set[str]] = defaultdict(set)
140-
141-
for name, refs in class_deps.items():
142-
local_deps = (refs & all_names) - {name}
143-
in_degree[name] = len(local_deps)
144-
for dep in local_deps:
145-
reverse[dep].add(name)
146-
147-
# Kahn's algorithm with a min-heap for alphabetical tie-breaking.
148-
heap = sorted(name for name, degree in in_degree.items() if degree == 0)
149-
heapq.heapify(heap)
150-
151-
sorted_names: list[str] = []
152-
while heap:
153-
name = heapq.heappop(heap)
154-
sorted_names.append(name)
155-
for dependent in reverse[name]:
156-
in_degree[dependent] -= 1
157-
if in_degree[dependent] == 0:
158-
heapq.heappush(heap, dependent)
159-
160-
if len(sorted_names) != len(class_blocks):
161-
# Cycle detected — fall back to the original order to avoid data loss.
162-
return content
163-
164-
sorted_blocks = [class_blocks[name] for name in sorted_names]
165-
return header + '\n\n\n' + '\n\n\n'.join(sorted_blocks) + '\n'
166-
167-
16879
def main() -> None:
16980
content = MODELS_PATH.read_text()
17081
fixed = fix_discriminators(content)
17182
fixed = deduplicate_error_type_enum(fixed)
17283
fixed = add_docs_group_decorators(fixed)
173-
fixed = sort_classes(fixed)
17484

17585
if fixed != content:
17686
MODELS_PATH.write_text(fixed)

0 commit comments

Comments
 (0)