Skip to content

Commit a91a4d5

Browse files
authored
fix(pypi): handle unnormalized package names when extracting sdist version (#3635)
With this change we are handling more of the edge cases for when the filenames are more complex. Initial code had bugs when the sdist name had `-` in the name part. This code is easier to read and a little bit more explicit how it handles things. We will use it later to only return the `whl` and `sdist` entries for the versions requested through the requirements lock file. This is to make it possible to write facts only for the versions that we use. Work towards #2731
1 parent 06aa36d commit a91a4d5

5 files changed

Lines changed: 112 additions & 23 deletions

File tree

python/private/pypi/BUILD.bazel

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -241,6 +241,9 @@ bzl_library(
241241
bzl_library(
242242
name = "parse_simpleapi_html_bzl",
243243
srcs = ["parse_simpleapi_html.bzl"],
244+
deps = [
245+
":version_from_filename_bzl",
246+
],
244247
)
245248

246249
bzl_library(
@@ -416,6 +419,11 @@ bzl_library(
416419
],
417420
)
418421

422+
bzl_library(
423+
name = "version_from_filename_bzl",
424+
srcs = ["version_from_filename.bzl"],
425+
)
426+
419427
bzl_library(
420428
name = "whl_config_repo_bzl",
421429
srcs = ["whl_config_repo.bzl"],

python/private/pypi/parse_simpleapi_html.bzl

Lines changed: 3 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,8 @@
1616
Parse SimpleAPI HTML in Starlark.
1717
"""
1818

19+
load(":version_from_filename.bzl", "version_from_filename")
20+
1921
def parse_simpleapi_html(*, url, content):
2022
"""Get the package URLs for given shas by parsing the Simple API HTML.
2123
@@ -64,7 +66,7 @@ def parse_simpleapi_html(*, url, content):
6466

6567
head, _, _ = tail.rpartition("</a>")
6668
maybe_metadata, _, filename = head.rpartition(">")
67-
version = _version(filename)
69+
version = version_from_filename(filename)
6870
sha256s_by_version.setdefault(version, []).append(sha256)
6971

7072
metadata_sha256 = ""
@@ -105,28 +107,6 @@ def parse_simpleapi_html(*, url, content):
105107
sha256s_by_version = sha256s_by_version,
106108
)
107109

108-
_SDIST_EXTS = [
109-
".tar", # handles any compression
110-
".zip",
111-
]
112-
113-
def _version(filename):
114-
# See https://packaging.python.org/en/latest/specifications/binary-distribution-format/#binary-distribution-format
115-
116-
_, _, tail = filename.partition("-")
117-
version, _, _ = tail.partition("-")
118-
if version != tail:
119-
# The format is {name}-{version}-{whl_specifiers}.whl
120-
return version
121-
122-
# NOTE @aignas 2025-03-29: most of the files are wheels, so this is not the common path
123-
124-
# {name}-{version}.{ext}
125-
for ext in _SDIST_EXTS:
126-
version, _, _ = version.partition(ext) # build or name
127-
128-
return version
129-
130110
def _get_root_directory(url):
131111
scheme_end = url.find("://")
132112
if scheme_end == -1:
Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
"""Parse the version of the thing just from the filename. This is useful for selecting files based on the requested version."""
2+
3+
_SDIST_EXTS = [
4+
".tar", # handles any compression
5+
".zip",
6+
]
7+
8+
def version_from_filename(filename, _fail = None):
9+
"""Parse the version of the filename.
10+
11+
Args:
12+
filename: {type}`str` the filename.
13+
_fail: The fail function.
14+
15+
Returns:
16+
A string version or None if we could not parse the version.
17+
"""
18+
# See https://packaging.python.org/en/latest/specifications/binary-distribution-format/#binary-distribution-format
19+
20+
if filename.endswith(".whl"):
21+
# The format is {name}-{version}-{whl_specifiers}.whl
22+
_, _, version = filename.partition("-")
23+
version, _, _ = version.partition("-")
24+
return version
25+
26+
# NOTE @aignas 2025-03-29: most of the files are wheels, so this is not the common path
27+
28+
# {name}-{version}.{ext}
29+
head = ""
30+
for ext in _SDIST_EXTS:
31+
head, _, _ = filename.rpartition(ext) # build or name
32+
if head:
33+
break
34+
35+
if not head:
36+
if _fail:
37+
_fail("Unsupported sdist extension: {filename}".format(filename = filename))
38+
return None
39+
40+
# Based on PEP440 the version number cannot include dashes
41+
_, _, version = head.rpartition("-")
42+
return version
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
load(":version_from_filename_tests.bzl", "version_from_filename_test_suite")
2+
3+
version_from_filename_test_suite(name = "version_from_filename_tests")
Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
""
2+
3+
load("@rules_testing//lib:test_suite.bzl", "test_suite")
4+
load("//python/private/pypi:version_from_filename.bzl", "version_from_filename") # buildifier: disable=bzl-visibility
5+
6+
_tests = []
7+
8+
def _test_wheel_version_extraction(env):
9+
# Case 1: wheel
10+
env.expect.that_str(version_from_filename("foo-1.2.3-py3-none-any.whl")).equals("1.2.3")
11+
12+
_tests.append(_test_wheel_version_extraction)
13+
14+
def _test_sdist_version_extraction(env):
15+
# Case 1: Standard sdist
16+
env.expect.that_str(version_from_filename("foo-1.2.3.tar.gz")).equals("1.2.3")
17+
18+
# Case 2: PEP 625 - Project name has underscores (normalized from dashes)
19+
# If the package is 'my-pkg', the sdist might be 'my_pkg-1.0.0.tar.gz'
20+
env.expect.that_str(version_from_filename("my_pkg-1.0.0.tar.gz")).equals("1.0.0")
21+
22+
# Case 3: Project name has multiple underscores
23+
env.expect.that_str(version_from_filename("very_long_project_name-0.5.0.zip")).equals("0.5.0")
24+
25+
# Case 4: Legacy sdist with hyphens in name
26+
# Note: Modern tools normalize this, but we should support the hyphen split
27+
env.expect.that_str(version_from_filename("complex-name-1.2.3.tar.gz")).equals("1.2.3")
28+
29+
# Case 5: Version contains an underscore (e.g. local versions)
30+
env.expect.that_str(version_from_filename("pkg-1.2.3_post1.tar.gz")).equals("1.2.3_post1")
31+
32+
# Case 6: custom compression
33+
env.expect.that_str(version_from_filename("pkg-1.2.3_post1.tar.xz")).equals("1.2.3_post1")
34+
35+
_tests.append(_test_sdist_version_extraction)
36+
37+
def _test_sdist_version_extraction_fail(env):
38+
failures = []
39+
40+
# Case 1: 7z
41+
env.expect.that_str(version_from_filename("foo-1.2.3.7z")).equals(None)
42+
env.expect.that_str(version_from_filename("foo-1.2.3.7z", _fail = failures.append)).equals(None)
43+
env.expect.that_collection(failures).contains_exactly(["Unsupported sdist extension: foo-1.2.3.7z"])
44+
45+
# Case 2: egg
46+
failures.clear()
47+
env.expect.that_str(version_from_filename("foo-1.2.3-py3.egg", _fail = failures.append)).equals(None)
48+
env.expect.that_collection(failures).contains_exactly(["Unsupported sdist extension: foo-1.2.3-py3.egg"])
49+
50+
_tests.append(_test_sdist_version_extraction_fail)
51+
52+
def version_from_filename_test_suite(name):
53+
test_suite(
54+
name = name,
55+
basic_tests = _tests,
56+
)

0 commit comments

Comments
 (0)