|
| 1 | +#!/usr/bin/env python3 |
| 2 | +""" |
| 3 | +Script to identify docstring formatting issues in Python files. |
| 4 | +
|
| 5 | +Checks for: |
| 6 | +1. Bullet lists (lines starting with *, -, +) without blank line before them |
| 7 | +2. Numbered lists (lines starting with digits and .) without blank line before them |
| 8 | +3. Code blocks (lines starting with >>>) without blank line before them |
| 9 | +4. reStructuredText directives (lines starting with ..) without blank line before them |
| 10 | +
|
| 11 | +These are common reStructuredText/Sphinx formatting issues that can cause |
| 12 | +documentation to render incorrectly. |
| 13 | +
|
| 14 | +The script attempts to avoid false positives by: |
| 15 | +- Skipping content inside literal blocks (after :: markers) |
| 16 | +- Ignoring items that follow Sphinx field markers (:param:, :Example:, etc.) |
| 17 | +- Handling Python interactive session output (lines between >>> prompts) |
| 18 | +- Recognizing indented continuations |
| 19 | +
|
| 20 | +Known limitations: |
| 21 | +- May flag some valid trailing >>> prompts in code examples |
| 22 | +- Line numbers are approximate (offset from docstring start) |
| 23 | +- Some complex nested structures may not be handled perfectly |
| 24 | +
|
| 25 | +Usage: |
| 26 | + python check_docstring_formatting.py [directory] |
| 27 | +
|
| 28 | +If no directory is specified, defaults to ../python relative to this script. |
| 29 | +""" |
| 30 | + |
| 31 | +import os |
| 32 | +import re |
| 33 | +import ast |
| 34 | +import sys |
| 35 | +from pathlib import Path |
| 36 | + |
| 37 | + |
| 38 | +def get_docstrings_from_file(filepath): |
| 39 | + """Extract all docstrings from a Python file with their line numbers.""" |
| 40 | + try: |
| 41 | + with open(filepath, 'r', encoding='utf-8') as f: |
| 42 | + content = f.read() |
| 43 | + |
| 44 | + tree = ast.parse(content, filename=str(filepath)) |
| 45 | + docstrings = [] |
| 46 | + |
| 47 | + for node in ast.walk(tree): |
| 48 | + # Only check nodes that can have docstrings |
| 49 | + if isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef, ast.ClassDef, ast.Module)): |
| 50 | + try: |
| 51 | + docstring = ast.get_docstring(node, clean=False) |
| 52 | + if docstring: |
| 53 | + # Get the line number where the docstring starts |
| 54 | + if isinstance(node, ast.Module): |
| 55 | + # Module docstring is at the top |
| 56 | + line_num = 1 |
| 57 | + else: |
| 58 | + # For functions/classes, it's the first statement |
| 59 | + line_num = node.body[0].lineno if node.body else node.lineno |
| 60 | + |
| 61 | + docstrings.append((line_num, docstring, type(node).__name__)) |
| 62 | + except: |
| 63 | + # Skip if we can't get the docstring |
| 64 | + pass |
| 65 | + |
| 66 | + return docstrings |
| 67 | + except Exception as e: |
| 68 | + # Only show actual parse errors, not docstring extraction issues |
| 69 | + if "parsing" in str(e).lower() or "syntax" in str(e).lower(): |
| 70 | + print(f"Error parsing {filepath}: {e}", file=sys.stderr) |
| 71 | + return [] |
| 72 | + |
| 73 | + |
| 74 | +def check_docstring_formatting(docstring): |
| 75 | + """ |
| 76 | + Check for formatting issues in a docstring. |
| 77 | +
|
| 78 | + Returns a list of (line_offset, issue_description) tuples. |
| 79 | + """ |
| 80 | + issues = [] |
| 81 | + lines = docstring.split('\n') |
| 82 | + |
| 83 | + # Patterns that should have a blank line before them |
| 84 | + patterns = [ |
| 85 | + (r'^\s*[\*\-\+]\s+', 'bullet list item'), |
| 86 | + (r'^\s*\d+\.\s+', 'numbered list item'), |
| 87 | + (r'^\s*>>>', 'code block'), |
| 88 | + (r'^\s*\.\.\s+', 'reStructuredText directive'), |
| 89 | + ] |
| 90 | + |
| 91 | + # Sphinx field patterns that can contain code blocks or lists |
| 92 | + sphinx_field_pattern = r'^\s*:[A-Za-z_][A-Za-z0-9_]*:' |
| 93 | + |
| 94 | + # Track if we're in a literal block (started by ::) |
| 95 | + in_literal_block = False |
| 96 | + literal_block_indent = 0 |
| 97 | + |
| 98 | + for i, line in enumerate(lines): |
| 99 | + # Skip the first line (always part of the opening) |
| 100 | + if i == 0: |
| 101 | + continue |
| 102 | + |
| 103 | + current_indent = len(line) - len(line.lstrip()) |
| 104 | + stripped = line.strip() |
| 105 | + |
| 106 | + # Check if previous line ended with :: (literal block marker) |
| 107 | + if i > 0: |
| 108 | + prev_line = lines[i - 1] |
| 109 | + if prev_line.rstrip().endswith('::'): |
| 110 | + in_literal_block = True |
| 111 | + literal_block_indent = len(prev_line) - len(prev_line.lstrip()) |
| 112 | + |
| 113 | + # If we're in a literal block and dedented, we're out |
| 114 | + if in_literal_block and stripped and current_indent <= literal_block_indent: |
| 115 | + in_literal_block = False |
| 116 | + |
| 117 | + # Skip checks if we're inside a literal block |
| 118 | + if in_literal_block: |
| 119 | + continue |
| 120 | + |
| 121 | + # Check each pattern |
| 122 | + for pattern, description in patterns: |
| 123 | + if re.match(pattern, line): |
| 124 | + # Check if previous line is blank or also matches a list pattern |
| 125 | + prev_line = lines[i - 1] if i > 0 else '' |
| 126 | + |
| 127 | + # If previous line is not blank |
| 128 | + if prev_line.strip() != '': |
| 129 | + # Check if previous line is also a list item (which is OK) |
| 130 | + is_prev_list = any(re.match(p[0], prev_line) for p in patterns) |
| 131 | + |
| 132 | + # Check if previous line is a Sphinx field (like :Example:, :param:, etc.) |
| 133 | + is_sphinx_field = re.match(sphinx_field_pattern, prev_line) |
| 134 | + |
| 135 | + # Check if we're indented under a previous section |
| 136 | + # If current line is more indented than previous non-blank line, it's likely continuation |
| 137 | + prev_indent = len(prev_line) - len(prev_line.lstrip()) |
| 138 | + is_indented_continuation = current_indent > prev_indent |
| 139 | + |
| 140 | + # Special case: >>> code blocks can have output lines between prompts |
| 141 | + # If current line is >>> and previous line has same or greater indent (but isn't also >>>), |
| 142 | + # it's likely output from previous command |
| 143 | + is_code_output = (description == 'code block' and |
| 144 | + prev_indent >= 0 and |
| 145 | + not prev_line.strip().startswith('>>>') and |
| 146 | + not prev_line.strip().startswith('...')) |
| 147 | + |
| 148 | + if not is_prev_list and not is_sphinx_field and not is_indented_continuation and not is_code_output: |
| 149 | + issues.append((i + 1, f"{description} without blank line before it")) |
| 150 | + break # Only report one issue per line |
| 151 | + |
| 152 | + return issues |
| 153 | + |
| 154 | + |
| 155 | +def find_python_files(root_dir): |
| 156 | + """Find all Python files in the given directory.""" |
| 157 | + root = Path(root_dir) |
| 158 | + return list(root.rglob('*.py')) |
| 159 | + |
| 160 | + |
| 161 | +def main(): |
| 162 | + # Default to checking the python directory relative to this script |
| 163 | + script_dir = Path(__file__).parent |
| 164 | + python_dir = script_dir.parent / 'python' |
| 165 | + |
| 166 | + if len(sys.argv) > 1: |
| 167 | + python_dir = Path(sys.argv[1]) |
| 168 | + |
| 169 | + if not python_dir.exists(): |
| 170 | + print(f"Error: Directory {python_dir} does not exist", file=sys.stderr) |
| 171 | + sys.exit(1) |
| 172 | + |
| 173 | + print(f"Checking Python files in: {python_dir}") |
| 174 | + print("=" * 80) |
| 175 | + |
| 176 | + files_with_issues = 0 |
| 177 | + total_issues = 0 |
| 178 | + |
| 179 | + for py_file in sorted(find_python_files(python_dir)): |
| 180 | + docstrings = get_docstrings_from_file(py_file) |
| 181 | + file_issues = [] |
| 182 | + |
| 183 | + for doc_line_num, docstring, node_type in docstrings: |
| 184 | + issues = check_docstring_formatting(docstring) |
| 185 | + if issues: |
| 186 | + for line_offset, issue_desc in issues: |
| 187 | + # Calculate absolute line number in file |
| 188 | + # This is approximate since we don't have exact positions |
| 189 | + abs_line = doc_line_num + line_offset |
| 190 | + file_issues.append((abs_line, issue_desc, node_type)) |
| 191 | + |
| 192 | + if file_issues: |
| 193 | + files_with_issues += 1 |
| 194 | + total_issues += len(file_issues) |
| 195 | + |
| 196 | + # Make path relative to python_dir for cleaner output |
| 197 | + rel_path = py_file.relative_to(python_dir.parent) |
| 198 | + print(f"\n{rel_path}:") |
| 199 | + |
| 200 | + for line_num, issue_desc, node_type in sorted(file_issues): |
| 201 | + print(f" Line ~{line_num} ({node_type}): {issue_desc}") |
| 202 | + |
| 203 | + print("\n" + "=" * 80) |
| 204 | + print(f"Summary: Found {total_issues} issues in {files_with_issues} files") |
| 205 | + |
| 206 | + return 0 if total_issues == 0 else 1 |
| 207 | + |
| 208 | + |
| 209 | +if __name__ == '__main__': |
| 210 | + sys.exit(main()) |
0 commit comments