Skip to content

Commit eeb8650

Browse files
committed
Allow reopened proxies to fill caches after read-only open
1 parent dcf76f6 commit eeb8650

3 files changed

Lines changed: 44 additions & 0 deletions

File tree

src/blosc2/proxy.py

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -192,6 +192,11 @@ class Proxy(blosc2.Operand):
192192
193193
This can be used to cache chunks of a regular data container which follows the
194194
:ref:`ProxySource` or :ref:`ProxyNDSource` interfaces.
195+
196+
If a persisted proxy is reopened in read mode, lazy cache fills still need to
197+
update the local cache. In that case the cache file is reopened internally
198+
in append mode on demand when :meth:`fetch`, :meth:`afetch`, or
199+
:meth:`__getitem__` needs to populate missing chunks.
195200
"""
196201

197202
def __init__(
@@ -209,6 +214,9 @@ def __init__(
209214
mode: str, optional
210215
"a" means read/write (create if it doesn't exist); "w" means create
211216
(overwrite if it exists). Default is "a".
217+
When a persisted proxy is reopened through :func:`blosc2.open` with
218+
``mode="r"``, later lazy cache fills may reopen the cache file
219+
internally in append mode so missing chunks can be written locally.
212220
kwargs: dict, optional
213221
Keyword arguments supported:
214222
@@ -259,11 +267,21 @@ def __init__(
259267
)
260268
self._cache.fill_special(self.src.nbytes // self.src.typesize, blosc2.SpecialValue.UNINIT)
261269
self._schunk_cache = getattr(self._cache, "schunk", self._cache)
270+
if self.urlpath is None:
271+
self.urlpath = getattr(self._schunk_cache, "urlpath", None)
262272
vlmeta = kwargs.get("vlmeta")
263273
if vlmeta:
264274
for key in vlmeta:
265275
self._schunk_cache.vlmeta[key] = vlmeta[key]
266276

277+
def _ensure_writable_cache(self) -> None:
278+
"""Reopen a persisted cache writable when lazy fetch needs to fill chunks."""
279+
cache_urlpath = getattr(self._schunk_cache, "urlpath", None)
280+
if cache_urlpath is None or getattr(self._schunk_cache, "mode", None) != "r":
281+
return
282+
self._cache = blosc2.blosc2_ext.open(cache_urlpath, "a", 0)
283+
self._schunk_cache = getattr(self._cache, "schunk", self._cache)
284+
267285
def fetch(self, item: slice | list[slice] | None = ()) -> blosc2.NDArray | blosc2.schunk.SChunk:
268286
"""
269287
Get the container used as cache with the requested data updated.
@@ -279,6 +297,13 @@ def fetch(self, item: slice | list[slice] | None = ()) -> blosc2.NDArray | blosc
279297
out: :ref:`NDArray` or :ref:`SChunk`
280298
The local container used to cache the already requested data.
281299
300+
Notes
301+
-----
302+
If the proxy cache was reopened read-only from disk, this method may
303+
reopen that cache internally in append mode before filling missing
304+
chunks. This preserves the logical read-only open of the proxy source
305+
while still allowing the local cache to be populated lazily.
306+
282307
Examples
283308
--------
284309
>>> import numpy as np
@@ -292,6 +317,7 @@ def fetch(self, item: slice | list[slice] | None = ()) -> blosc2.NDArray | blosc
292317
[2 3]
293318
[4 5]]
294319
"""
320+
self._ensure_writable_cache()
295321
if item == ():
296322
# Full realization
297323
for info in self._schunk_cache.iterchunks_info():
@@ -327,6 +353,9 @@ async def afetch(self, item: slice | list[slice] | None = ()) -> blosc2.NDArray
327353
-----
328354
This method is only available if the :ref:`ProxySource` or :ref:`ProxyNDSource`
329355
have an async `aget_chunk` method.
356+
If the proxy cache was reopened read-only from disk, this method may
357+
reopen that cache internally in append mode before filling missing
358+
chunks.
330359
331360
Examples
332361
--------
@@ -384,6 +413,7 @@ async def afetch(self, item: slice | list[slice] | None = ()) -> blosc2.NDArray
384413
"""
385414
if not callable(getattr(self.src, "aget_chunk", None)):
386415
raise NotImplementedError("afetch is only available if the source has an aget_chunk method")
416+
self._ensure_writable_cache()
387417
if item == ():
388418
# Full realization
389419
for info in self._schunk_cache.iterchunks_info():

src/blosc2/schunk.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1721,6 +1721,9 @@ def open(
17211721
it will return the Python-Blosc2 container used to cache the data which
17221722
can be a :ref:`SChunk` or a :ref:`NDArray` and may not have all the data
17231723
initialized (e.g. if the user has not accessed to it yet).
1724+
When such a persisted proxy is opened with ``mode="r"``, later lazy cache
1725+
fills may reopen the local cache internally in append mode so missing
1726+
chunks can still be written.
17241727
17251728
* When opening a :ref:`LazyExpr` keep in mind the note above regarding operands.
17261729

tests/ndarray/test_proxy.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -105,6 +105,17 @@ def test_open(urlpath, shape, chunks, blocks, slices, dtype):
105105
blosc2.remove_urlpath(proxy_urlpath)
106106

107107

108+
def test_open_read_mode_allows_proxy_cache_fill(tmp_path):
109+
src_urlpath = str(tmp_path / "src.b2nd")
110+
proxy_urlpath = str(tmp_path / "proxy.b2nd")
111+
112+
a = blosc2.asarray(np.arange(25, dtype=np.int64).reshape(5, 5), urlpath=src_urlpath, mode="w")
113+
_ = blosc2.Proxy(a, urlpath=proxy_urlpath, mode="w")
114+
115+
proxy = blosc2.open(proxy_urlpath, mode="r")
116+
np.testing.assert_array_equal(proxy[:], a[:])
117+
118+
108119
# Test the ProxyNDSources interface
109120
@pytest.mark.parametrize(
110121
("shape", "chunks", "blocks"),

0 commit comments

Comments
 (0)