Skip to content

Commit 6ef2486

Browse files
committed
add whitespace checking script for pydocs
1 parent 6af9ce3 commit 6ef2486

1 file changed

Lines changed: 210 additions & 0 deletions

File tree

Lines changed: 210 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,210 @@
1+
#!/usr/bin/env python3
2+
"""
3+
Script to identify docstring formatting issues in Python files.
4+
5+
Checks for:
6+
1. Bullet lists (lines starting with *, -, +) without blank line before them
7+
2. Numbered lists (lines starting with digits and .) without blank line before them
8+
3. Code blocks (lines starting with >>>) without blank line before them
9+
4. reStructuredText directives (lines starting with ..) without blank line before them
10+
11+
These are common reStructuredText/Sphinx formatting issues that can cause
12+
documentation to render incorrectly.
13+
14+
The script attempts to avoid false positives by:
15+
- Skipping content inside literal blocks (after :: markers)
16+
- Ignoring items that follow Sphinx field markers (:param:, :Example:, etc.)
17+
- Handling Python interactive session output (lines between >>> prompts)
18+
- Recognizing indented continuations
19+
20+
Known limitations:
21+
- May flag some valid trailing >>> prompts in code examples
22+
- Line numbers are approximate (offset from docstring start)
23+
- Some complex nested structures may not be handled perfectly
24+
25+
Usage:
26+
python check_docstring_formatting.py [directory]
27+
28+
If no directory is specified, defaults to ../python relative to this script.
29+
"""
30+
31+
import os
32+
import re
33+
import ast
34+
import sys
35+
from pathlib import Path
36+
37+
38+
def get_docstrings_from_file(filepath):
39+
"""Extract all docstrings from a Python file with their line numbers."""
40+
try:
41+
with open(filepath, 'r', encoding='utf-8') as f:
42+
content = f.read()
43+
44+
tree = ast.parse(content, filename=str(filepath))
45+
docstrings = []
46+
47+
for node in ast.walk(tree):
48+
# Only check nodes that can have docstrings
49+
if isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef, ast.ClassDef, ast.Module)):
50+
try:
51+
docstring = ast.get_docstring(node, clean=False)
52+
if docstring:
53+
# Get the line number where the docstring starts
54+
if isinstance(node, ast.Module):
55+
# Module docstring is at the top
56+
line_num = 1
57+
else:
58+
# For functions/classes, it's the first statement
59+
line_num = node.body[0].lineno if node.body else node.lineno
60+
61+
docstrings.append((line_num, docstring, type(node).__name__))
62+
except:
63+
# Skip if we can't get the docstring
64+
pass
65+
66+
return docstrings
67+
except Exception as e:
68+
# Only show actual parse errors, not docstring extraction issues
69+
if "parsing" in str(e).lower() or "syntax" in str(e).lower():
70+
print(f"Error parsing {filepath}: {e}", file=sys.stderr)
71+
return []
72+
73+
74+
def check_docstring_formatting(docstring):
75+
"""
76+
Check for formatting issues in a docstring.
77+
78+
Returns a list of (line_offset, issue_description) tuples.
79+
"""
80+
issues = []
81+
lines = docstring.split('\n')
82+
83+
# Patterns that should have a blank line before them
84+
patterns = [
85+
(r'^\s*[\*\-\+]\s+', 'bullet list item'),
86+
(r'^\s*\d+\.\s+', 'numbered list item'),
87+
(r'^\s*>>>', 'code block'),
88+
(r'^\s*\.\.\s+', 'reStructuredText directive'),
89+
]
90+
91+
# Sphinx field patterns that can contain code blocks or lists
92+
sphinx_field_pattern = r'^\s*:[A-Za-z_][A-Za-z0-9_]*:'
93+
94+
# Track if we're in a literal block (started by ::)
95+
in_literal_block = False
96+
literal_block_indent = 0
97+
98+
for i, line in enumerate(lines):
99+
# Skip the first line (always part of the opening)
100+
if i == 0:
101+
continue
102+
103+
current_indent = len(line) - len(line.lstrip())
104+
stripped = line.strip()
105+
106+
# Check if previous line ended with :: (literal block marker)
107+
if i > 0:
108+
prev_line = lines[i - 1]
109+
if prev_line.rstrip().endswith('::'):
110+
in_literal_block = True
111+
literal_block_indent = len(prev_line) - len(prev_line.lstrip())
112+
113+
# If we're in a literal block and dedented, we're out
114+
if in_literal_block and stripped and current_indent <= literal_block_indent:
115+
in_literal_block = False
116+
117+
# Skip checks if we're inside a literal block
118+
if in_literal_block:
119+
continue
120+
121+
# Check each pattern
122+
for pattern, description in patterns:
123+
if re.match(pattern, line):
124+
# Check if previous line is blank or also matches a list pattern
125+
prev_line = lines[i - 1] if i > 0 else ''
126+
127+
# If previous line is not blank
128+
if prev_line.strip() != '':
129+
# Check if previous line is also a list item (which is OK)
130+
is_prev_list = any(re.match(p[0], prev_line) for p in patterns)
131+
132+
# Check if previous line is a Sphinx field (like :Example:, :param:, etc.)
133+
is_sphinx_field = re.match(sphinx_field_pattern, prev_line)
134+
135+
# Check if we're indented under a previous section
136+
# If current line is more indented than previous non-blank line, it's likely continuation
137+
prev_indent = len(prev_line) - len(prev_line.lstrip())
138+
is_indented_continuation = current_indent > prev_indent
139+
140+
# Special case: >>> code blocks can have output lines between prompts
141+
# If current line is >>> and previous line has same or greater indent (but isn't also >>>),
142+
# it's likely output from previous command
143+
is_code_output = (description == 'code block' and
144+
prev_indent >= 0 and
145+
not prev_line.strip().startswith('>>>') and
146+
not prev_line.strip().startswith('...'))
147+
148+
if not is_prev_list and not is_sphinx_field and not is_indented_continuation and not is_code_output:
149+
issues.append((i + 1, f"{description} without blank line before it"))
150+
break # Only report one issue per line
151+
152+
return issues
153+
154+
155+
def find_python_files(root_dir):
156+
"""Find all Python files in the given directory."""
157+
root = Path(root_dir)
158+
return list(root.rglob('*.py'))
159+
160+
161+
def main():
162+
# Default to checking the python directory relative to this script
163+
script_dir = Path(__file__).parent
164+
python_dir = script_dir.parent / 'python'
165+
166+
if len(sys.argv) > 1:
167+
python_dir = Path(sys.argv[1])
168+
169+
if not python_dir.exists():
170+
print(f"Error: Directory {python_dir} does not exist", file=sys.stderr)
171+
sys.exit(1)
172+
173+
print(f"Checking Python files in: {python_dir}")
174+
print("=" * 80)
175+
176+
files_with_issues = 0
177+
total_issues = 0
178+
179+
for py_file in sorted(find_python_files(python_dir)):
180+
docstrings = get_docstrings_from_file(py_file)
181+
file_issues = []
182+
183+
for doc_line_num, docstring, node_type in docstrings:
184+
issues = check_docstring_formatting(docstring)
185+
if issues:
186+
for line_offset, issue_desc in issues:
187+
# Calculate absolute line number in file
188+
# This is approximate since we don't have exact positions
189+
abs_line = doc_line_num + line_offset
190+
file_issues.append((abs_line, issue_desc, node_type))
191+
192+
if file_issues:
193+
files_with_issues += 1
194+
total_issues += len(file_issues)
195+
196+
# Make path relative to python_dir for cleaner output
197+
rel_path = py_file.relative_to(python_dir.parent)
198+
print(f"\n{rel_path}:")
199+
200+
for line_num, issue_desc, node_type in sorted(file_issues):
201+
print(f" Line ~{line_num} ({node_type}): {issue_desc}")
202+
203+
print("\n" + "=" * 80)
204+
print(f"Summary: Found {total_issues} issues in {files_with_issues} files")
205+
206+
return 0 if total_issues == 0 else 1
207+
208+
209+
if __name__ == '__main__':
210+
sys.exit(main())

0 commit comments

Comments
 (0)