-
Notifications
You must be signed in to change notification settings - Fork 7.5k
feat(extensions): authenticate GitHub-hosted catalog and download requests with GITHUB_TOKEN/GH_TOKEN #2087
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from 3 commits
147a0af
c4ef1d1
eb4f894
f32d059
2ccd213
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -1411,6 +1411,57 @@ def _validate_catalog_url(self, url: str) -> None: | |
| if not parsed.netloc: | ||
| raise ValidationError("Catalog URL must be a valid URL with a host.") | ||
|
|
||
| def _make_request(self, url: str) -> "urllib.request.Request": | ||
| """Build a urllib Request, adding a GitHub auth header when available. | ||
|
|
||
| Reads GITHUB_TOKEN or GH_TOKEN from the environment and attaches an | ||
| ``Authorization: token <value>`` header for requests to GitHub-hosted | ||
| domains (``raw.githubusercontent.com``, ``github.com``, | ||
| ``api.github.com``). Non-GitHub URLs are returned as plain requests | ||
| so credentials are never leaked to third-party hosts. | ||
| """ | ||
| import os | ||
| import urllib.request | ||
| from urllib.parse import urlparse | ||
|
|
||
| headers: Dict[str, str] = {} | ||
| token = os.environ.get("GITHUB_TOKEN") or os.environ.get("GH_TOKEN") | ||
| hostname = (urlparse(url).hostname or "").lower() | ||
| github_hosts = {"raw.githubusercontent.com", "github.com", "api.github.com"} | ||
| if token and hostname in github_hosts: | ||
| headers["Authorization"] = f"token {token}" | ||
| return urllib.request.Request(url, headers=headers) | ||
|
|
||
| def _open_url(self, url: str, timeout: int = 10): | ||
| """Open a URL using _make_request, stripping auth on cross-host redirects. | ||
|
|
||
| When the request carries an Authorization header, a custom redirect | ||
| handler is used to drop that header if the redirect target is not a | ||
| GitHub-hosted domain, preventing token leakage to CDNs or other | ||
| third-party hosts that GitHub may redirect to. | ||
| """ | ||
| import urllib.request | ||
| from urllib.parse import urlparse | ||
|
|
||
| req = self._make_request(url) | ||
|
|
||
| if not req.get_header("Authorization"): | ||
| return urllib.request.urlopen(req, timeout=timeout) | ||
|
|
||
| _github_hosts = {"raw.githubusercontent.com", "github.com", "api.github.com"} | ||
|
|
||
| class _StripAuthOnRedirect(urllib.request.HTTPRedirectHandler): | ||
| def redirect_request(_self, req, fp, code, msg, headers, newurl): | ||
| new_req = super().redirect_request(req, fp, code, msg, headers, newurl) | ||
| if new_req is not None: | ||
| hostname = (urlparse(newurl).hostname or "").lower() | ||
| if hostname not in _github_hosts: | ||
| new_req.headers.pop("Authorization", None) | ||
| return new_req | ||
|
|
||
| opener = urllib.request.build_opener(_StripAuthOnRedirect) | ||
| return opener.open(req, timeout=timeout) | ||
|
|
||
|
Comment on lines
+1414
to
+1464
|
||
| def _load_catalog_config(self, config_path: Path) -> Optional[List[CatalogEntry]]: | ||
| """Load catalog stack configuration from a YAML file. | ||
|
|
||
|
|
@@ -1601,7 +1652,7 @@ def _fetch_single_catalog(self, entry: CatalogEntry, force_refresh: bool = False | |
|
|
||
| # Fetch from network | ||
| try: | ||
| with urllib.request.urlopen(entry.url, timeout=10) as response: | ||
| with self._open_url(entry.url, timeout=10) as response: | ||
| catalog_data = json.loads(response.read()) | ||
|
|
||
| if "schema_version" not in catalog_data or "extensions" not in catalog_data: | ||
|
|
@@ -1718,7 +1769,7 @@ def fetch_catalog(self, force_refresh: bool = False) -> Dict[str, Any]: | |
| import urllib.request | ||
| import urllib.error | ||
|
|
||
| with urllib.request.urlopen(catalog_url, timeout=10) as response: | ||
| with self._open_url(catalog_url, timeout=10) as response: | ||
| catalog_data = json.loads(response.read()) | ||
|
|
||
| # Validate catalog structure | ||
|
|
@@ -1861,7 +1912,7 @@ def download_extension(self, extension_id: str, target_dir: Optional[Path] = Non | |
|
|
||
| # Download the ZIP file | ||
| try: | ||
| with urllib.request.urlopen(download_url, timeout=60) as response: | ||
| with self._open_url(download_url, timeout=60) as response: | ||
| zip_data = response.read() | ||
|
|
||
| zip_path.write_bytes(zip_data) | ||
|
|
||
| Original file line number | Diff line number | Diff line change | ||||||||||||||||||||||||||||||||||||||||||||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
|
|
@@ -2142,6 +2142,158 @@ def test_clear_cache(self, temp_dir): | |||||||||||||||||||||||||||||||||||||||||||||
| assert not catalog.cache_file.exists() | ||||||||||||||||||||||||||||||||||||||||||||||
| assert not catalog.cache_metadata_file.exists() | ||||||||||||||||||||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||||||||||||||||||||
| # --- _make_request / GitHub auth --- | ||||||||||||||||||||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||||||||||||||||||||
| def _make_catalog(self, temp_dir): | ||||||||||||||||||||||||||||||||||||||||||||||
| project_dir = temp_dir / "project" | ||||||||||||||||||||||||||||||||||||||||||||||
| project_dir.mkdir() | ||||||||||||||||||||||||||||||||||||||||||||||
| (project_dir / ".specify").mkdir() | ||||||||||||||||||||||||||||||||||||||||||||||
| return ExtensionCatalog(project_dir) | ||||||||||||||||||||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||||||||||||||||||||
| def test_make_request_no_token_no_auth_header(self, temp_dir, monkeypatch): | ||||||||||||||||||||||||||||||||||||||||||||||
| """Without a token, requests carry no Authorization header.""" | ||||||||||||||||||||||||||||||||||||||||||||||
| monkeypatch.delenv("GITHUB_TOKEN", raising=False) | ||||||||||||||||||||||||||||||||||||||||||||||
| monkeypatch.delenv("GH_TOKEN", raising=False) | ||||||||||||||||||||||||||||||||||||||||||||||
| catalog = self._make_catalog(temp_dir) | ||||||||||||||||||||||||||||||||||||||||||||||
| req = catalog._make_request("https://raw.githubusercontent.com/org/repo/main/catalog.json") | ||||||||||||||||||||||||||||||||||||||||||||||
| assert "Authorization" not in req.headers | ||||||||||||||||||||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||||||||||||||||||||
| def test_make_request_github_token_added_for_raw_githubusercontent(self, temp_dir, monkeypatch): | ||||||||||||||||||||||||||||||||||||||||||||||
| """GITHUB_TOKEN is attached for raw.githubusercontent.com URLs.""" | ||||||||||||||||||||||||||||||||||||||||||||||
| monkeypatch.setenv("GITHUB_TOKEN", "ghp_testtoken") | ||||||||||||||||||||||||||||||||||||||||||||||
| monkeypatch.delenv("GH_TOKEN", raising=False) | ||||||||||||||||||||||||||||||||||||||||||||||
| catalog = self._make_catalog(temp_dir) | ||||||||||||||||||||||||||||||||||||||||||||||
| req = catalog._make_request("https://raw.githubusercontent.com/org/repo/main/catalog.json") | ||||||||||||||||||||||||||||||||||||||||||||||
| assert req.get_header("Authorization") == "token ghp_testtoken" | ||||||||||||||||||||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||||||||||||||||||||
| def test_make_request_gh_token_fallback(self, temp_dir, monkeypatch): | ||||||||||||||||||||||||||||||||||||||||||||||
| """GH_TOKEN is used when GITHUB_TOKEN is absent.""" | ||||||||||||||||||||||||||||||||||||||||||||||
| monkeypatch.delenv("GITHUB_TOKEN", raising=False) | ||||||||||||||||||||||||||||||||||||||||||||||
| monkeypatch.setenv("GH_TOKEN", "ghp_ghtoken") | ||||||||||||||||||||||||||||||||||||||||||||||
| catalog = self._make_catalog(temp_dir) | ||||||||||||||||||||||||||||||||||||||||||||||
| req = catalog._make_request("https://github.com/org/repo/releases/download/v1/ext.zip") | ||||||||||||||||||||||||||||||||||||||||||||||
| assert req.get_header("Authorization") == "token ghp_ghtoken" | ||||||||||||||||||||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||||||||||||||||||||
| def test_make_request_github_token_takes_precedence_over_gh_token(self, temp_dir, monkeypatch): | ||||||||||||||||||||||||||||||||||||||||||||||
| """GITHUB_TOKEN takes precedence over GH_TOKEN when both are set.""" | ||||||||||||||||||||||||||||||||||||||||||||||
| monkeypatch.setenv("GITHUB_TOKEN", "ghp_primary") | ||||||||||||||||||||||||||||||||||||||||||||||
| monkeypatch.setenv("GH_TOKEN", "ghp_secondary") | ||||||||||||||||||||||||||||||||||||||||||||||
| catalog = self._make_catalog(temp_dir) | ||||||||||||||||||||||||||||||||||||||||||||||
| req = catalog._make_request("https://api.github.com/repos/org/repo") | ||||||||||||||||||||||||||||||||||||||||||||||
| assert req.get_header("Authorization") == "token ghp_primary" | ||||||||||||||||||||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||||||||||||||||||||
| def test_make_request_token_not_added_for_non_github_url(self, temp_dir, monkeypatch): | ||||||||||||||||||||||||||||||||||||||||||||||
| """Auth header is never attached to non-GitHub URLs to prevent credential leakage.""" | ||||||||||||||||||||||||||||||||||||||||||||||
| monkeypatch.setenv("GITHUB_TOKEN", "ghp_testtoken") | ||||||||||||||||||||||||||||||||||||||||||||||
| catalog = self._make_catalog(temp_dir) | ||||||||||||||||||||||||||||||||||||||||||||||
| req = catalog._make_request("https://internal.example.com/catalog.json") | ||||||||||||||||||||||||||||||||||||||||||||||
| assert "Authorization" not in req.headers | ||||||||||||||||||||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||||||||||||||||||||
|
||||||||||||||||||||||||||||||||||||||||||||||
| def test_make_request_token_not_added_for_github_lookalike_host(self, temp_dir, monkeypatch): | |
| """Auth header is not attached to non-GitHub hosts that only contain github.com in the hostname.""" | |
| monkeypatch.setenv("GITHUB_TOKEN", "ghp_testtoken") | |
| catalog = self._make_catalog(temp_dir) | |
| req = catalog._make_request("https://github.com.evil.com/org/repo/releases/download/v1/ext.zip") | |
| assert "Authorization" not in req.headers | |
| def test_make_request_token_not_added_for_non_github_host_with_github_in_path(self, temp_dir, monkeypatch): | |
| """Auth header is not attached when a non-GitHub host includes github.com only in the URL path.""" | |
| monkeypatch.setenv("GITHUB_TOKEN", "ghp_testtoken") | |
| catalog = self._make_catalog(temp_dir) | |
| req = catalog._make_request("https://evil.example.com/github.com/org/repo/releases/download/v1/ext.zip") | |
| assert "Authorization" not in req.headers | |
| def test_make_request_token_not_added_for_non_github_host_with_github_in_query(self, temp_dir, monkeypatch): | |
| """Auth header is not attached when a non-GitHub host includes github.com only in the query string.""" | |
| monkeypatch.setenv("GITHUB_TOKEN", "ghp_testtoken") | |
| catalog = self._make_catalog(temp_dir) | |
| req = catalog._make_request("https://evil.example.com/download?source=https://github.com/org/repo/releases/download/v1/ext.zip") | |
| assert "Authorization" not in req.headers |
Copilot
AI
Apr 9, 2026
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This test docstring says the auth header is passed to urlopen, but the implementation path with Authorization uses urllib.request.build_opener(...).open(). Adjust the docstring to match the behavior being asserted.
| """download_extension passes Authorization header to urlopen for GitHub URLs.""" | |
| """download_extension passes Authorization header via opener for GitHub URLs.""" |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
_open_url() strips the Authorization header on redirects to any host outside {raw.githubusercontent.com, github.com, api.github.com}. GitHub archive URLs (e.g. https://github.com///archive/refs/tags/.zip) redirect to codeload.github.com, so with a token set this logic will drop the header and private-repo archive downloads will still 404. Consider including codeload.github.com (and any other GitHub-owned redirect targets you need to support) in the allowlist used for redirect decisions, or otherwise preserving Authorization for redirects that remain within trusted GitHub domains.