Skip to content

Commit bc64f06

Browse files
authored
refactor(pypi): factor out a simple implementation of the PyPI cache (#3639)
We want to keep a `dict` like interface and later we would like to use the same interface to also do more things. I expect the cache key to become different in the future (i.e. include requested versions in it) so that we can check if we have the right versions in the MODULE.bazel.lock file or if we should actually call to PyPI. Work towards #2731
1 parent 82f78b3 commit bc64f06

7 files changed

Lines changed: 73 additions & 17 deletions

File tree

python/private/pypi/BUILD.bazel

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -123,6 +123,7 @@ bzl_library(
123123
":pep508_env_bzl",
124124
":pip_repository_attrs_bzl",
125125
":platform_bzl",
126+
":pypi_cache_bzl",
126127
":simpleapi_download_bzl",
127128
":whl_library_bzl",
128129
"//python/private:auth_bzl",
@@ -355,6 +356,11 @@ bzl_library(
355356
srcs = ["platform.bzl"],
356357
)
357358

359+
bzl_library(
360+
name = "pypi_cache_bzl",
361+
srcs = ["pypi_cache.bzl"],
362+
)
363+
358364
bzl_library(
359365
name = "pypi_repo_utils_bzl",
360366
srcs = ["pypi_repo_utils.bzl"],

python/private/pypi/extension.bzl

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@ load(":parse_whl_name.bzl", "parse_whl_name")
2727
load(":pep508_env.bzl", "env")
2828
load(":pip_repository_attrs.bzl", "ATTRS")
2929
load(":platform.bzl", _plat = "platform")
30+
load(":pypi_cache.bzl", "pypi_cache")
3031
load(":simpleapi_download.bzl", "simpleapi_download")
3132
load(":whl_library.bzl", "whl_library")
3233

@@ -224,7 +225,7 @@ You cannot use both the additive_build_content and additive_build_content_file a
224225
# dict[str repo, HubBuilder]
225226
# See `hub_builder.bzl%hub_builder()` for `HubBuilder`
226227
pip_hub_map = {}
227-
simpleapi_cache = {}
228+
simpleapi_cache = pypi_cache()
228229

229230
for mod in module_ctx.modules:
230231
for pip_attr in mod.tags.parse:

python/private/pypi/hub_builder.bzl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ def hub_builder(
3131
simpleapi_download_fn,
3232
evaluate_markers_fn,
3333
logger,
34-
simpleapi_cache = {}):
34+
simpleapi_cache):
3535
"""Return a hub builder instance
3636
3737
Args:

python/private/pypi/pypi_cache.bzl

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
"""A cache for the PyPI index contents evaluation.
2+
3+
This is design to work as the following:
4+
- in-memory cache for results of PyPI index queries, so that we are not calling PyPI multiple times
5+
for the same package for different hub repos.
6+
7+
In the future the same will be used to:
8+
- Store PyPI index query results as facts in the MODULE.bazel.lock file
9+
"""
10+
11+
def pypi_cache(store = None):
12+
"""The cache for PyPI index queries."""
13+
14+
# buildifier: disable=uninitialized
15+
self = struct(
16+
_store = store or {},
17+
setdefault = lambda key, parsed_result: _pypi_cache_setdefault(self, key, parsed_result),
18+
get = lambda key: _pypi_cache_get(self, key),
19+
)
20+
21+
# buildifier: enable=uninitialized
22+
return self
23+
24+
def _pypi_cache_setdefault(self, key, parsed_result):
25+
"""Store the value if not yet cached.
26+
27+
Args:
28+
self: {type}`struct` The self of this implementation.
29+
key: {type}`str` The cache key, can be any string.
30+
parsed_result: {type}`struct` The result of `parse_simpleapi_html` function.
31+
32+
Returns:
33+
The `parse_result`.
34+
"""
35+
return self._store.setdefault(key, parsed_result)
36+
37+
def _pypi_cache_get(self, key):
38+
"""Return the parsed result from the cache.
39+
40+
Args:
41+
self: {type}`struct` The self of this implementation.
42+
key: {type}`str` The cache key, can be any string.
43+
44+
Returns:
45+
The {type}`struct` or `None` based on if the result is in the cache or not.
46+
"""
47+
return self._store.get(key)

python/private/pypi/simpleapi_download.bzl

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -49,14 +49,13 @@ def simpleapi_download(
4949
* netrc: The netrc parameter for ctx.download, see http_file for docs.
5050
* auth_patterns: The auth_patterns parameter for ctx.download, see
5151
http_file for docs.
52-
cache: A dictionary that can be used as a cache between calls during a
53-
single evaluation of the extension. We use a dictionary as a cache
54-
so that we can reuse calls to the simple API when evaluating the
55-
extension. Using the canonical_id parameter of the module_ctx would
56-
deposit the simple API responses to the bazel cache and that is
57-
undesirable because additions to the PyPI index would not be
58-
reflected when re-evaluating the extension unless we do
59-
`bazel clean --expunge`.
52+
cache: An opaque object used to cache call results. For implementation
53+
see ./pypi_cache.bzl file. We use the canonical_id parameter for the key
54+
value to ensure that distribution fetches from different indexes do not cause
55+
cache collisions, because the index may return different locations from where
56+
the files should be downloaded. We are not using the built-in cache in the
57+
`download` function because the index may get updated at any time and we need
58+
to be able to refresh the data.
6059
parallel_download: A boolean to enable usage of bazel 7.1 non-blocking downloads.
6160
read_simpleapi: a function for reading and parsing of the SimpleAPI contents.
6261
Used in tests.
@@ -197,8 +196,9 @@ def _read_simpleapi(ctx, url, attr, cache, get_auth = None, **download_kwargs):
197196
))
198197

199198
cache_key = real_url
200-
if cache_key in cache:
201-
return struct(success = True, output = cache[cache_key])
199+
cached_result = cache.get(cache_key)
200+
if cached_result:
201+
return struct(success = True, output = cached_result)
202202

203203
output_str = envsubst(
204204
url,

tests/pypi/hub_builder/hub_builder_tests.bzl

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -99,6 +99,7 @@ def hub_builder(
9999
"unit-test",
100100
printer = log_printer,
101101
),
102+
simpleapi_cache = {},
102103
)
103104
self = struct(
104105
build = lambda: env.expect.that_struct(

tests/pypi/simpleapi_download/simpleapi_download_tests.bzl

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
""
1616

1717
load("@rules_testing//lib:test_suite.bzl", "test_suite")
18+
load("//python/private/pypi:pypi_cache.bzl", "pypi_cache") # buildifier: disable=bzl-visibility
1819
load("//python/private/pypi:simpleapi_download.bzl", "simpleapi_download", "strip_empty_path_segments") # buildifier: disable=bzl-visibility
1920

2021
_tests = []
@@ -52,7 +53,7 @@ def _test_simple(env):
5253
sources = ["foo", "bar", "baz"],
5354
envsubst = [],
5455
),
55-
cache = {},
56+
cache = pypi_cache(),
5657
parallel_download = True,
5758
read_simpleapi = read_simpleapi,
5859
)
@@ -112,7 +113,7 @@ def _test_fail(env):
112113
sources = ["foo", "bar", "baz"],
113114
envsubst = [],
114115
),
115-
cache = {},
116+
cache = pypi_cache(),
116117
parallel_download = True,
117118
read_simpleapi = read_simpleapi,
118119
_fail = fails.append,
@@ -165,7 +166,7 @@ def _test_download_url(env):
165166
sources = ["foo", "bar", "baz"],
166167
envsubst = [],
167168
),
168-
cache = {},
169+
cache = pypi_cache(),
169170
parallel_download = False,
170171
get_auth = lambda ctx, urls, ctx_attr: struct(),
171172
)
@@ -201,7 +202,7 @@ def _test_download_url_parallel(env):
201202
sources = ["foo", "bar", "baz"],
202203
envsubst = [],
203204
),
204-
cache = {},
205+
cache = pypi_cache(),
205206
parallel_download = True,
206207
get_auth = lambda ctx, urls, ctx_attr: struct(),
207208
)
@@ -237,7 +238,7 @@ def _test_download_envsubst_url(env):
237238
sources = ["foo", "bar", "baz"],
238239
envsubst = ["INDEX_URL"],
239240
),
240-
cache = {},
241+
cache = pypi_cache(),
241242
parallel_download = False,
242243
get_auth = lambda ctx, urls, ctx_attr: struct(),
243244
)

0 commit comments

Comments
 (0)