|
| 1 | +"""Shared utilities for build scripts. |
| 2 | +
|
| 3 | +This module provides common functions for loading spreadsheets, |
| 4 | +validating data, and injecting content into HTML templates. |
| 5 | +""" |
| 6 | +from pathlib import Path |
| 7 | +from typing import Any, Dict, List, Optional |
| 8 | +import openpyxl |
| 9 | + |
| 10 | + |
| 11 | +def load_spreadsheet(filepath: Path) -> List[Dict[str, Any]]: |
| 12 | + """Load Excel spreadsheet and return list of row dictionaries. |
| 13 | +
|
| 14 | + Args: |
| 15 | + filepath: Path to the .xlsx file |
| 16 | +
|
| 17 | + Returns: |
| 18 | + List of dictionaries, one per row, with column headers as keys. |
| 19 | + Empty cells are converted to empty strings. |
| 20 | +
|
| 21 | + Raises: |
| 22 | + FileNotFoundError: If the spreadsheet doesn't exist |
| 23 | + openpyxl.utils.exceptions.InvalidFileException: If file is not valid xlsx |
| 24 | + """ |
| 25 | + wb = openpyxl.load_workbook(filepath, read_only=True, data_only=True) |
| 26 | + sheet = wb.active |
| 27 | + |
| 28 | + # Get headers from first row |
| 29 | + headers = [cell.value for cell in sheet[1]] |
| 30 | + |
| 31 | + # Validate headers - no None values allowed |
| 32 | + if None in headers: |
| 33 | + raise ValueError(f"Spreadsheet has empty header cells: {headers}") |
| 34 | + |
| 35 | + rows = [] |
| 36 | + for row in sheet.iter_rows(min_row=2, values_only=True): |
| 37 | + # Skip completely empty rows |
| 38 | + if not any(cell is not None for cell in row): |
| 39 | + continue |
| 40 | + |
| 41 | + # Create dict, converting None to empty string for consistency |
| 42 | + row_dict = {} |
| 43 | + for header, value in zip(headers, row): |
| 44 | + if value is None: |
| 45 | + row_dict[header] = '' |
| 46 | + else: |
| 47 | + row_dict[header] = value |
| 48 | + rows.append(row_dict) |
| 49 | + |
| 50 | + wb.close() |
| 51 | + return rows |
| 52 | + |
| 53 | + |
| 54 | +def inject_content(template_path: Path, output_path: Path, |
| 55 | + replacements: Dict[str, str]) -> None: |
| 56 | + """Inject generated content into template at marker locations. |
| 57 | +
|
| 58 | + Markers in the template should be HTML comments like: <!-- MARKER_NAME --> |
| 59 | +
|
| 60 | + Args: |
| 61 | + template_path: Path to the template HTML file |
| 62 | + output_path: Path where the generated HTML will be written |
| 63 | + replacements: Dictionary mapping marker names to HTML content |
| 64 | +
|
| 65 | + Raises: |
| 66 | + FileNotFoundError: If template doesn't exist |
| 67 | + ValueError: If a marker is not found in the template |
| 68 | + """ |
| 69 | + content = template_path.read_text(encoding='utf-8') |
| 70 | + |
| 71 | + for marker, html in replacements.items(): |
| 72 | + pattern = f'<!-- {marker} -->' |
| 73 | + if pattern not in content: |
| 74 | + raise ValueError( |
| 75 | + f"Marker '{pattern}' not found in template {template_path}" |
| 76 | + ) |
| 77 | + content = content.replace(pattern, html) |
| 78 | + |
| 79 | + output_path.write_text(content, encoding='utf-8') |
| 80 | + |
| 81 | + |
| 82 | +def validate_required_fields(row: Dict[str, Any], required: List[str], |
| 83 | + row_num: int) -> List[str]: |
| 84 | + """Validate that required fields are present and non-empty. |
| 85 | +
|
| 86 | + Args: |
| 87 | + row: Dictionary of field values from a spreadsheet row |
| 88 | + required: List of required field names |
| 89 | + row_num: Row number (for error messages), 1-indexed from data rows |
| 90 | +
|
| 91 | + Returns: |
| 92 | + List of error messages (empty if all fields valid) |
| 93 | + """ |
| 94 | + errors = [] |
| 95 | + for field in required: |
| 96 | + value = row.get(field) |
| 97 | + if value is None or (isinstance(value, str) and value.strip() == ''): |
| 98 | + errors.append(f"Row {row_num}: Missing required field '{field}'") |
| 99 | + return errors |
| 100 | + |
| 101 | + |
| 102 | +def validate_url_format(url: str) -> bool: |
| 103 | + """Check if a string looks like a valid URL. |
| 104 | +
|
| 105 | + Args: |
| 106 | + url: String to validate |
| 107 | +
|
| 108 | + Returns: |
| 109 | + True if URL starts with http:// or https://, False otherwise |
| 110 | + """ |
| 111 | + if not url or not isinstance(url, str): |
| 112 | + return False |
| 113 | + url = url.strip() |
| 114 | + return url.startswith('http://') or url.startswith('https://') |
| 115 | + |
| 116 | + |
| 117 | +def check_file_exists(filepath: Path, base_dir: Path) -> Optional[str]: |
| 118 | + """Check if a referenced file exists. |
| 119 | +
|
| 120 | + Args: |
| 121 | + filepath: Filename (not full path) referenced in spreadsheet |
| 122 | + base_dir: Directory where the file should exist |
| 123 | +
|
| 124 | + Returns: |
| 125 | + Error message if file doesn't exist, None if it exists |
| 126 | + """ |
| 127 | + if not filepath or not str(filepath).strip(): |
| 128 | + return None # Empty is OK for optional fields |
| 129 | + |
| 130 | + full_path = base_dir / str(filepath).strip() |
| 131 | + if not full_path.exists(): |
| 132 | + return f"File not found: {full_path}" |
| 133 | + return None |
0 commit comments