Skip to content

Commit da5308a

Browse files
committed
Support for deserializing objects in .b2z stores
1 parent 2ccef90 commit da5308a

3 files changed

Lines changed: 99 additions & 37 deletions

File tree

src/blosc2/_msgpack_utils.py

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,15 @@ def _encode_operand_reference(obj):
3535
}
3636
if isinstance(obj, blosc2.Proxy):
3737
obj = obj._cache
38+
dictstore_urlpath = getattr(obj, "_msgpack_dictstore_urlpath", None)
39+
dictstore_key = getattr(obj, "_msgpack_dictstore_key", None)
40+
if isinstance(dictstore_urlpath, str) and isinstance(dictstore_key, str):
41+
return {
42+
"kind": "dictstore_key",
43+
"version": _BLOSC2_STRUCTURED_VERSION,
44+
"urlpath": dictstore_urlpath,
45+
"key": dictstore_key,
46+
}
3847
if hasattr(obj, "schunk"):
3948
urlpath = obj.schunk.urlpath
4049
if urlpath is None:
@@ -87,6 +96,14 @@ def _decode_operand_reference(payload):
8796
if urlbase is not None and not isinstance(urlbase, str):
8897
raise TypeError("Structured C2Array msgpack payload requires 'urlbase' to be a string or None")
8998
return blosc2.C2Array(path, urlbase=urlbase)
99+
if kind == "dictstore_key":
100+
urlpath = payload.get("urlpath")
101+
if not isinstance(urlpath, str):
102+
raise TypeError("Structured DictStore-key msgpack payload requires a string 'urlpath'")
103+
key = payload.get("key")
104+
if not isinstance(key, str):
105+
raise TypeError("Structured DictStore-key msgpack payload requires a string 'key'")
106+
return blosc2.DictStore(urlpath, mode="r")[key]
90107
if kind == "urlpath":
91108
urlpath = payload.get("urlpath")
92109
if not isinstance(urlpath, str):

src/blosc2/dict_store.py

Lines changed: 57 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -329,6 +329,16 @@ def _update_map_tree_from_offsets(self):
329329
if self._probe_external_leaf_offset(filepath):
330330
self.map_tree[self._logical_key_from_relpath(filepath)] = filepath
331331

332+
def _annotate_external_value(
333+
self,
334+
key: str,
335+
value: blosc2.NDArray | SChunk | blosc2.VLArray | blosc2.BatchStore | C2Array,
336+
):
337+
"""Attach DictStore origin metadata so structured msgpack can preserve member identity."""
338+
value._msgpack_dictstore_urlpath = self.localpath
339+
value._msgpack_dictstore_key = key
340+
return value
341+
332342
@property
333343
def estore(self) -> EmbedStore:
334344
"""Access the underlying EmbedStore."""
@@ -416,15 +426,18 @@ def __getitem__(
416426
mmap_mode=self.mmap_mode,
417427
dparams=self.dparams,
418428
)
419-
return _process_opened_object(opened)
429+
return self._annotate_external_value(key, _process_opened_object(opened))
420430
else:
421431
urlpath = os.path.join(self.working_dir, filepath)
422432
if os.path.exists(urlpath):
423-
return blosc2.open(
424-
urlpath,
425-
mode="r" if self.mode == "r" else "a",
426-
mmap_mode=self.mmap_mode if self.mode == "r" else None,
427-
dparams=self.dparams,
433+
return self._annotate_external_value(
434+
key,
435+
blosc2.open(
436+
urlpath,
437+
mode="r" if self.mode == "r" else "a",
438+
mmap_mode=self.mmap_mode if self.mode == "r" else None,
439+
dparams=self.dparams,
440+
),
428441
)
429442
else:
430443
raise KeyError(f"File for key '{key}' not found in offsets or temporary directory.")
@@ -487,22 +500,28 @@ def values(self) -> Iterator[blosc2.NDArray | SChunk | C2Array]:
487500
if self.is_zip_store:
488501
if filepath in self.offsets:
489502
offset = self.offsets[filepath]["offset"]
490-
yield _process_opened_object(
491-
blosc2.blosc2_ext.open(
492-
self.b2z_path,
493-
mode="r",
494-
offset=offset,
495-
mmap_mode=self.mmap_mode,
496-
dparams=self.dparams,
497-
)
503+
yield self._annotate_external_value(
504+
key,
505+
_process_opened_object(
506+
blosc2.blosc2_ext.open(
507+
self.b2z_path,
508+
mode="r",
509+
offset=offset,
510+
mmap_mode=self.mmap_mode,
511+
dparams=self.dparams,
512+
)
513+
),
498514
)
499515
else:
500516
urlpath = os.path.join(self.working_dir, filepath)
501-
yield blosc2.open(
502-
urlpath,
503-
mode="r" if self.mode == "r" else "a",
504-
mmap_mode=self.mmap_mode if self.mode == "r" else None,
505-
dparams=self.dparams,
517+
yield self._annotate_external_value(
518+
key,
519+
blosc2.open(
520+
urlpath,
521+
mode="r" if self.mode == "r" else "a",
522+
mmap_mode=self.mmap_mode if self.mode == "r" else None,
523+
dparams=self.dparams,
524+
),
506525
)
507526
elif key in self._estore:
508527
yield self._estore[key]
@@ -521,25 +540,31 @@ def items(self) -> Iterator[tuple[str, blosc2.NDArray | SChunk | C2Array]]:
521540
offset = self.offsets[filepath]["offset"]
522541
yield (
523542
key,
524-
_process_opened_object(
525-
blosc2.blosc2_ext.open(
526-
self.b2z_path,
527-
mode="r",
528-
offset=offset,
529-
mmap_mode=self.mmap_mode,
530-
dparams=self.dparams,
531-
)
543+
self._annotate_external_value(
544+
key,
545+
_process_opened_object(
546+
blosc2.blosc2_ext.open(
547+
self.b2z_path,
548+
mode="r",
549+
offset=offset,
550+
mmap_mode=self.mmap_mode,
551+
dparams=self.dparams,
552+
)
553+
),
532554
),
533555
)
534556
else:
535557
urlpath = os.path.join(self.working_dir, filepath)
536558
yield (
537559
key,
538-
blosc2.open(
539-
urlpath,
540-
mode="r" if self.mode == "r" else "a",
541-
mmap_mode=self.mmap_mode if self.mode == "r" else None,
542-
dparams=self.dparams,
560+
self._annotate_external_value(
561+
key,
562+
blosc2.open(
563+
urlpath,
564+
mode="r" if self.mode == "r" else "a",
565+
mmap_mode=self.mmap_mode if self.mode == "r" else None,
566+
dparams=self.dparams,
567+
),
543568
),
544569
)
545570
elif key in self._estore:

tests/test_batch_store.py

Lines changed: 25 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -341,11 +341,9 @@ def test_batchstore_msgpack_supports_lazyexpr_with_c2array_operand(cat2_context,
341341
np.testing.assert_allclose(restored[:], a_values + b[:])
342342

343343

344-
@pytest.mark.xfail(
345-
strict=True, reason="Structured LazyExpr urlpath operands do not preserve .b2z member offsets"
346-
)
347-
def test_msgpack_lazyexpr_with_b2z_operands(tmp_path):
348-
store_path = tmp_path / "operands.b2z"
344+
@pytest.mark.parametrize("suffix", [".b2d", ".b2z"])
345+
def test_msgpack_lazyexpr_with_dictstore_operands(tmp_path, suffix):
346+
store_path = tmp_path / f"operands{suffix}"
349347
ext_a = tmp_path / "a.b2nd"
350348
ext_b = tmp_path / "b.b2nd"
351349
expected = np.arange(5, dtype=np.int64) * 3
@@ -364,6 +362,28 @@ def test_msgpack_lazyexpr_with_b2z_operands(tmp_path):
364362
np.testing.assert_array_equal(restored[:], expected)
365363

366364

365+
def test_batchstore_msgpack_lazyexpr_with_dictstore_operands(tmp_path):
366+
store_path = tmp_path / "operands.b2z"
367+
ext_a = tmp_path / "a.b2nd"
368+
ext_b = tmp_path / "b.b2nd"
369+
expected = np.arange(5, dtype=np.int64) * 3
370+
371+
a = blosc2.asarray(np.arange(5, dtype=np.int64), urlpath=str(ext_a), mode="w")
372+
b = blosc2.asarray(np.arange(5, dtype=np.int64) * 2, urlpath=str(ext_b), mode="w")
373+
with blosc2.DictStore(str(store_path), mode="w", threshold=None) as dstore:
374+
dstore["/a"] = a
375+
dstore["/b"] = b
376+
377+
with blosc2.DictStore(str(store_path), mode="r") as dstore:
378+
expr = blosc2.lazyexpr("a + b", operands={"a": dstore["/a"], "b": dstore["/b"]})
379+
barray = blosc2.BatchStore(items_per_block=2)
380+
barray.append([expr])
381+
restored = barray[0][0]
382+
383+
assert isinstance(restored, blosc2.LazyExpr)
384+
np.testing.assert_array_equal(restored[:], expected)
385+
386+
367387
@pytest.mark.network
368388
def test_msgpack_roundtrip_c2array_network(cat2_context):
369389
path = "@public/expr/ds-1-2-linspace-float64-b2-(5,)d.b2nd"

0 commit comments

Comments
 (0)