|
| 1 | +from __future__ import annotations |
| 2 | + |
| 3 | +import pathlib |
| 4 | +from collections.abc import Iterable, Sequence |
| 5 | + |
| 6 | +REPO_ROOT = pathlib.Path(__file__).resolve().parent.parent |
| 7 | +EM_DASH = chr(0x2014) |
| 8 | +ROOT_SKIP_DIRS = { |
| 9 | + ".git", |
| 10 | + ".venv", |
| 11 | + ".uv_cache", |
| 12 | + ".uv-cache", |
| 13 | + ".uv_tools", |
| 14 | + ".uv-tools", |
| 15 | + ".cache", |
| 16 | + "node_modules", |
| 17 | + ".next", |
| 18 | +} |
| 19 | +RECURSIVE_SKIP_DIRS = {"__pycache__", ".pytest_cache"} |
| 20 | +SKIP_PATH_PREFIXES = { |
| 21 | + ("docs", ".next"), |
| 22 | + ("docs", "node_modules"), |
| 23 | +} |
| 24 | +SKIP_SUFFIXES = { |
| 25 | + ".png", |
| 26 | + ".jpg", |
| 27 | + ".jpeg", |
| 28 | + ".gif", |
| 29 | + ".webp", |
| 30 | + ".ico", |
| 31 | + ".mp4", |
| 32 | + ".mov", |
| 33 | + ".mp3", |
| 34 | + ".woff", |
| 35 | + ".woff2", |
| 36 | + ".ttf", |
| 37 | + ".otf", |
| 38 | + ".eot", |
| 39 | + ".pdf", |
| 40 | + ".zip", |
| 41 | + ".tar", |
| 42 | + ".gz", |
| 43 | + ".bz2", |
| 44 | + ".7z", |
| 45 | + ".ckpt", |
| 46 | + ".bin", |
| 47 | + ".pyc", |
| 48 | + ".pyo", |
| 49 | + ".db", |
| 50 | +} |
| 51 | + |
| 52 | + |
| 53 | +def iter_text_files(root: pathlib.Path) -> Iterable[pathlib.Path]: |
| 54 | + for path in root.rglob("*"): |
| 55 | + if not path.is_file(): |
| 56 | + continue |
| 57 | + rel = path.relative_to(root) |
| 58 | + rel_parts = rel.parts |
| 59 | + if rel_parts and rel_parts[0] in ROOT_SKIP_DIRS: |
| 60 | + continue |
| 61 | + if any(rel_parts[: len(prefix)] == prefix for prefix in SKIP_PATH_PREFIXES): |
| 62 | + continue |
| 63 | + dir_parts = rel_parts[:-1] |
| 64 | + if any(part in RECURSIVE_SKIP_DIRS for part in dir_parts): |
| 65 | + continue |
| 66 | + if path.suffix.lower() in SKIP_SUFFIXES: |
| 67 | + continue |
| 68 | + yield path |
| 69 | + |
| 70 | + |
| 71 | +def find_em_dashes(path: pathlib.Path) -> Sequence[tuple[int, str]]: |
| 72 | + try: |
| 73 | + text = path.read_text(encoding="utf-8", errors="ignore") |
| 74 | + except OSError: |
| 75 | + return [] |
| 76 | + lines: list[tuple[int, str]] = [] |
| 77 | + for lineno, line in enumerate(text.splitlines(), start=1): |
| 78 | + if EM_DASH in line: |
| 79 | + lines.append((lineno, line)) |
| 80 | + return lines |
| 81 | + |
| 82 | + |
| 83 | +def main() -> int: |
| 84 | + violations: list[tuple[pathlib.Path, int, str]] = [] |
| 85 | + for path in iter_text_files(REPO_ROOT): |
| 86 | + for lineno, line in find_em_dashes(path): |
| 87 | + violations.append((path.relative_to(REPO_ROOT), lineno, line.strip())) |
| 88 | + if violations: |
| 89 | + print( |
| 90 | + f"AI writing check failed: {EM_DASH!r} (em dash) detected in the repository" |
| 91 | + ) |
| 92 | + for rel_path, lineno, snippet in violations: |
| 93 | + print(f"{rel_path}:{lineno}: {snippet}") |
| 94 | + print("Please remove the em dash or explain why it is acceptable.") |
| 95 | + return 1 |
| 96 | + print("AI writing check passed (no em dash found).") |
| 97 | + return 0 |
| 98 | + |
| 99 | + |
| 100 | +if __name__ == "__main__": |
| 101 | + raise SystemExit(main()) |
0 commit comments