44import json
55import re
66import shutil
7- from datetime import datetime , timezone
7+ from datetime import UTC , datetime
88from pathlib import Path
9- from typing import TypedDict
9+ from typing import Any
1010
1111from jinja2 import Environment , FileSystemLoader
12- from readme_parser import parse_readme , parse_sponsors
13-
14-
15- class StarData (TypedDict ):
16- stars : int
17- owner : str
18- last_commit_at : str
19- fetched_at : str
20-
12+ from readme_parser import ParsedGroup , ParsedSection , parse_readme , parse_sponsors
2113
2214GITHUB_REPO_URL_RE = re .compile (r"^https?://github\.com/([^/]+/[^/]+?)(?:\.git)?/?$" )
2315
@@ -46,7 +38,7 @@ def extract_github_repo(url: str) -> str | None:
4638 return m .group (1 ) if m else None
4739
4840
49- def load_stars (path : Path ) -> dict [str , StarData ]:
41+ def load_stars (path : Path ) -> dict [str , dict ]:
5042 """Load star data from JSON. Returns empty dict if file doesn't exist or is corrupt."""
5143 if path .exists ():
5244 try :
@@ -76,68 +68,55 @@ def sort_key(entry: dict) -> tuple[int, int, int, str]:
7668
7769
7870def extract_entries (
79- categories : list [dict ],
80- groups : list [dict ],
71+ categories : list [ParsedSection ],
72+ groups : list [ParsedGroup ],
8173) -> list [dict ]:
8274 """Flatten categories into individual library entries for table display.
8375
8476 Entries appearing in multiple categories are merged into a single entry
8577 with lists of categories and groups.
8678 """
87- cat_to_group : dict [str , str ] = {}
88- for group in groups :
89- for cat in group ["categories" ]:
90- cat_to_group [cat ["name" ]] = group ["name" ]
79+ cat_to_group = {cat ["name" ]: group ["name" ] for group in groups for cat in group ["categories" ]}
9180
92- seen : dict [tuple [str , str ], dict ] = {} # (url, name) -> entry
93- entries : list [dict ] = []
81+ seen : dict [tuple [str , str ], dict [ str , Any ] ] = {} # (url, name) -> entry
82+ entries : list [dict [ str , Any ] ] = []
9483 for cat in categories :
9584 group_name = cat_to_group .get (cat ["name" ], "Other" )
9685 for entry in cat ["entries" ]:
97- url = entry ["url" ]
98- key = (url , entry ["name" ])
99- if key in seen :
100- existing = seen [key ]
101- if cat ["name" ] not in existing ["categories" ]:
102- existing ["categories" ].append (cat ["name" ])
103- if group_name not in existing ["groups" ]:
104- existing ["groups" ].append (group_name )
105- subcat = entry ["subcategory" ]
106- if subcat :
107- scoped = f"{ cat ['name' ]} > { subcat } "
108- if not any (s ["value" ] == scoped for s in existing ["subcategories" ]):
109- existing ["subcategories" ].append ({"name" : subcat , "value" : scoped })
110- else :
111- merged = {
86+ key = (entry ["url" ], entry ["name" ])
87+ existing : dict [str , Any ] | None = seen .get (key )
88+ if existing is None :
89+ existing = {
11290 "name" : entry ["name" ],
113- "url" : url ,
91+ "url" : entry [ " url" ] ,
11492 "description" : entry ["description" ],
115- "categories" : [cat [ "name" ] ],
116- "groups" : [group_name ],
117- "subcategories" : [{ "name" : entry [ "subcategory" ], "value" : f" { cat [ 'name' ] } > { entry [ 'subcategory' ] } " }] if entry [ "subcategory" ] else [ ],
93+ "categories" : [],
94+ "groups" : [],
95+ "subcategories" : [],
11896 "stars" : None ,
11997 "owner" : None ,
12098 "last_commit_at" : None ,
121- "source_type" : detect_source_type (url ),
99+ "source_type" : detect_source_type (entry [ " url" ] ),
122100 "also_see" : entry ["also_see" ],
123101 }
124- seen [key ] = merged
125- entries .append (merged )
102+ seen [key ] = existing
103+ entries .append (existing )
104+ if cat ["name" ] not in existing ["categories" ]:
105+ existing ["categories" ].append (cat ["name" ])
106+ if group_name not in existing ["groups" ]:
107+ existing ["groups" ].append (group_name )
108+ subcat = entry ["subcategory" ]
109+ if subcat :
110+ scoped = f"{ cat ['name' ]} > { subcat } "
111+ if not any (s ["value" ] == scoped for s in existing ["subcategories" ]):
112+ existing ["subcategories" ].append ({"name" : subcat , "value" : scoped })
126113 return entries
127114
128115
129- def format_stars_short (stars : int ) -> str :
130- """Format star count as compact string like '230k'."""
131- if stars >= 1000 :
132- return f"{ stars // 1000 } k"
133- return str (stars )
134-
135-
136- def build (repo_root : str ) -> None :
116+ def build (repo_root : Path ) -> None :
137117 """Main build: parse README, render single-page HTML via Jinja2 templates."""
138- repo = Path (repo_root )
139- website = repo / "website"
140- readme_text = (repo / "README.md" ).read_text (encoding = "utf-8" )
118+ website = repo_root / "website"
119+ readme_text = (repo_root / "README.md" ).read_text (encoding = "utf-8" )
141120
142121 subtitle = ""
143122 for line in readme_text .split ("\n " ):
@@ -156,7 +135,10 @@ def build(repo_root: str) -> None:
156135 stars_data = load_stars (website / "data" / "github_stars.json" )
157136
158137 repo_self = stars_data .get ("vinta/awesome-python" , {})
159- repo_stars = format_stars_short (repo_self ["stars" ]) if "stars" in repo_self else None
138+ repo_stars = None
139+ if "stars" in repo_self :
140+ stars_val = repo_self ["stars" ]
141+ repo_stars = f"{ stars_val // 1000 } k" if stars_val >= 1000 else str (stars_val )
160142
161143 for entry in entries :
162144 repo_key = extract_github_repo (entry ["url" ])
@@ -189,7 +171,7 @@ def build(repo_root: str) -> None:
189171 total_entries = total_entries ,
190172 total_categories = len (categories ),
191173 repo_stars = repo_stars ,
192- build_date = datetime .now (timezone . utc ).strftime ("%B %d, %Y" ),
174+ build_date = datetime .now (UTC ).strftime ("%B %d, %Y" ),
193175 sponsors = sponsors ,
194176 ),
195177 encoding = "utf-8" ,
@@ -208,4 +190,4 @@ def build(repo_root: str) -> None:
208190
209191
210192if __name__ == "__main__" :
211- build (str ( Path (__file__ ).parent .parent ) )
193+ build (Path (__file__ ).parent .parent )
0 commit comments