Skip to content

Commit cfcc564

Browse files
authored
Merge pull request #3076 from vinta/chore/code-cleanup
chore: simplify website/ Python and polish sponsors section
2 parents a4b7fc6 + 35aee20 commit cfcc564

7 files changed

Lines changed: 116 additions & 190 deletions

File tree

website/build.py

Lines changed: 38 additions & 56 deletions
Original file line numberDiff line numberDiff line change
@@ -4,20 +4,12 @@
44
import json
55
import re
66
import shutil
7-
from datetime import datetime, timezone
7+
from datetime import UTC, datetime
88
from pathlib import Path
9-
from typing import TypedDict
9+
from typing import Any
1010

1111
from jinja2 import Environment, FileSystemLoader
12-
from readme_parser import parse_readme, parse_sponsors
13-
14-
15-
class StarData(TypedDict):
16-
stars: int
17-
owner: str
18-
last_commit_at: str
19-
fetched_at: str
20-
12+
from readme_parser import ParsedGroup, ParsedSection, parse_readme, parse_sponsors
2113

2214
GITHUB_REPO_URL_RE = re.compile(r"^https?://github\.com/([^/]+/[^/]+?)(?:\.git)?/?$")
2315

@@ -46,7 +38,7 @@ def extract_github_repo(url: str) -> str | None:
4638
return m.group(1) if m else None
4739

4840

49-
def load_stars(path: Path) -> dict[str, StarData]:
41+
def load_stars(path: Path) -> dict[str, dict]:
5042
"""Load star data from JSON. Returns empty dict if file doesn't exist or is corrupt."""
5143
if path.exists():
5244
try:
@@ -76,68 +68,55 @@ def sort_key(entry: dict) -> tuple[int, int, int, str]:
7668

7769

7870
def extract_entries(
79-
categories: list[dict],
80-
groups: list[dict],
71+
categories: list[ParsedSection],
72+
groups: list[ParsedGroup],
8173
) -> list[dict]:
8274
"""Flatten categories into individual library entries for table display.
8375
8476
Entries appearing in multiple categories are merged into a single entry
8577
with lists of categories and groups.
8678
"""
87-
cat_to_group: dict[str, str] = {}
88-
for group in groups:
89-
for cat in group["categories"]:
90-
cat_to_group[cat["name"]] = group["name"]
79+
cat_to_group = {cat["name"]: group["name"] for group in groups for cat in group["categories"]}
9180

92-
seen: dict[tuple[str, str], dict] = {} # (url, name) -> entry
93-
entries: list[dict] = []
81+
seen: dict[tuple[str, str], dict[str, Any]] = {} # (url, name) -> entry
82+
entries: list[dict[str, Any]] = []
9483
for cat in categories:
9584
group_name = cat_to_group.get(cat["name"], "Other")
9685
for entry in cat["entries"]:
97-
url = entry["url"]
98-
key = (url, entry["name"])
99-
if key in seen:
100-
existing = seen[key]
101-
if cat["name"] not in existing["categories"]:
102-
existing["categories"].append(cat["name"])
103-
if group_name not in existing["groups"]:
104-
existing["groups"].append(group_name)
105-
subcat = entry["subcategory"]
106-
if subcat:
107-
scoped = f"{cat['name']} > {subcat}"
108-
if not any(s["value"] == scoped for s in existing["subcategories"]):
109-
existing["subcategories"].append({"name": subcat, "value": scoped})
110-
else:
111-
merged = {
86+
key = (entry["url"], entry["name"])
87+
existing: dict[str, Any] | None = seen.get(key)
88+
if existing is None:
89+
existing = {
11290
"name": entry["name"],
113-
"url": url,
91+
"url": entry["url"],
11492
"description": entry["description"],
115-
"categories": [cat["name"]],
116-
"groups": [group_name],
117-
"subcategories": [{"name": entry["subcategory"], "value": f"{cat['name']} > {entry['subcategory']}"}] if entry["subcategory"] else [],
93+
"categories": [],
94+
"groups": [],
95+
"subcategories": [],
11896
"stars": None,
11997
"owner": None,
12098
"last_commit_at": None,
121-
"source_type": detect_source_type(url),
99+
"source_type": detect_source_type(entry["url"]),
122100
"also_see": entry["also_see"],
123101
}
124-
seen[key] = merged
125-
entries.append(merged)
102+
seen[key] = existing
103+
entries.append(existing)
104+
if cat["name"] not in existing["categories"]:
105+
existing["categories"].append(cat["name"])
106+
if group_name not in existing["groups"]:
107+
existing["groups"].append(group_name)
108+
subcat = entry["subcategory"]
109+
if subcat:
110+
scoped = f"{cat['name']} > {subcat}"
111+
if not any(s["value"] == scoped for s in existing["subcategories"]):
112+
existing["subcategories"].append({"name": subcat, "value": scoped})
126113
return entries
127114

128115

129-
def format_stars_short(stars: int) -> str:
130-
"""Format star count as compact string like '230k'."""
131-
if stars >= 1000:
132-
return f"{stars // 1000}k"
133-
return str(stars)
134-
135-
136-
def build(repo_root: str) -> None:
116+
def build(repo_root: Path) -> None:
137117
"""Main build: parse README, render single-page HTML via Jinja2 templates."""
138-
repo = Path(repo_root)
139-
website = repo / "website"
140-
readme_text = (repo / "README.md").read_text(encoding="utf-8")
118+
website = repo_root / "website"
119+
readme_text = (repo_root / "README.md").read_text(encoding="utf-8")
141120

142121
subtitle = ""
143122
for line in readme_text.split("\n"):
@@ -156,7 +135,10 @@ def build(repo_root: str) -> None:
156135
stars_data = load_stars(website / "data" / "github_stars.json")
157136

158137
repo_self = stars_data.get("vinta/awesome-python", {})
159-
repo_stars = format_stars_short(repo_self["stars"]) if "stars" in repo_self else None
138+
repo_stars = None
139+
if "stars" in repo_self:
140+
stars_val = repo_self["stars"]
141+
repo_stars = f"{stars_val // 1000}k" if stars_val >= 1000 else str(stars_val)
160142

161143
for entry in entries:
162144
repo_key = extract_github_repo(entry["url"])
@@ -189,7 +171,7 @@ def build(repo_root: str) -> None:
189171
total_entries=total_entries,
190172
total_categories=len(categories),
191173
repo_stars=repo_stars,
192-
build_date=datetime.now(timezone.utc).strftime("%B %d, %Y"),
174+
build_date=datetime.now(UTC).strftime("%B %d, %Y"),
193175
sponsors=sponsors,
194176
),
195177
encoding="utf-8",
@@ -208,4 +190,4 @@ def build(repo_root: str) -> None:
208190

209191

210192
if __name__ == "__main__":
211-
build(str(Path(__file__).parent.parent))
193+
build(Path(__file__).parent.parent)

website/fetch_github_stars.py

Lines changed: 11 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,9 @@
55
import os
66
import re
77
import sys
8-
from datetime import datetime, timezone
8+
from collections.abc import Sequence
9+
from datetime import UTC, datetime, timedelta
10+
from itertools import batched
911
from pathlib import Path
1012

1113
import httpx
@@ -44,10 +46,8 @@ def save_cache(cache: dict) -> None:
4446
)
4547

4648

47-
def build_graphql_query(repos: list[str]) -> str:
49+
def build_graphql_query(repos: Sequence[str]) -> str:
4850
"""Build a GraphQL query with aliases for up to 100 repos."""
49-
if not repos:
50-
return ""
5151
parts = []
5252
for i, repo in enumerate(repos):
5353
owner, name = repo.split("/", 1)
@@ -64,7 +64,7 @@ def build_graphql_query(repos: list[str]) -> str:
6464

6565
def parse_graphql_response(
6666
data: dict,
67-
repos: list[str],
67+
repos: Sequence[str],
6868
) -> dict[str, dict]:
6969
"""Parse GraphQL response into {owner/repo: {stars, owner}} dict."""
7070
result = {}
@@ -82,9 +82,7 @@ def parse_graphql_response(
8282
return result
8383

8484

85-
def fetch_batch(
86-
repos: list[str], *, client: httpx.Client,
87-
) -> dict[str, dict]:
85+
def fetch_batch(repos: Sequence[str], client: httpx.Client) -> dict[str, dict]:
8886
"""Fetch star data for a batch of repos via GitHub GraphQL API."""
8987
query = build_graphql_query(repos)
9088
if not query:
@@ -112,7 +110,7 @@ def main() -> None:
112110
print(f"Found {len(current_repos)} GitHub repos in README.md")
113111

114112
cache = load_stars(CACHE_FILE)
115-
now = datetime.now(timezone.utc)
113+
now = datetime.now(UTC)
116114

117115
# Prune entries not in current README
118116
pruned = {k: v for k, v in cache.items() if k in current_repos}
@@ -121,13 +119,13 @@ def main() -> None:
121119
cache = pruned
122120

123121
# Determine which repos need fetching (missing or stale)
122+
max_age = timedelta(hours=CACHE_MAX_AGE_HOURS)
124123
to_fetch = []
125124
for repo in sorted(current_repos):
126125
entry = cache.get(repo)
127126
if entry and "fetched_at" in entry:
128127
fetched = datetime.fromisoformat(entry["fetched_at"])
129-
age_hours = (now - fetched).total_seconds() / 3600
130-
if age_hours < CACHE_MAX_AGE_HOURS:
128+
if now - fetched < max_age:
131129
continue
132130
to_fetch.append(repo)
133131

@@ -150,13 +148,11 @@ def main() -> None:
150148
transport=httpx.HTTPTransport(retries=2),
151149
timeout=30,
152150
) as client:
153-
for i in range(0, len(to_fetch), BATCH_SIZE):
154-
batch = to_fetch[i : i + BATCH_SIZE]
155-
batch_num = i // BATCH_SIZE + 1
151+
for batch_num, batch in enumerate(batched(to_fetch, BATCH_SIZE), 1):
156152
print(f"Fetching batch {batch_num}/{total_batches} ({len(batch)} repos)...")
157153

158154
try:
159-
results = fetch_batch(batch, client=client)
155+
results = fetch_batch(batch, client)
160156
except httpx.HTTPStatusError as e:
161157
print(f"HTTP error {e.response.status_code}", file=sys.stderr)
162158
if e.response.status_code == 401:

0 commit comments

Comments
 (0)