Skip to content

Commit 62dc717

Browse files
committed
Fix BatchStore metadata preservation paths
Preserve user vlmeta when BatchStore recreates its empty backing SChunk during initial layout inference, avoid persisting empty batch_lengths metadata that breaks vlmeta.getall() on empty stores, and keep user meta/vlmeta when copy(storage=...) is used. Add BatchStore regression tests covering: - vlmeta preservation during inferred layout initialization - clear()/delete-last on empty stores - metadata preservation on copy(storage=...)
1 parent 5a1cd0f commit 62dc717

2 files changed

Lines changed: 91 additions & 5 deletions

File tree

src/blosc2/batch_store.py

Lines changed: 19 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -370,6 +370,10 @@ def _load_batch_lengths(self) -> list[int] | None:
370370
def _persist_batch_lengths(self) -> None:
371371
if self._batch_lengths is None:
372372
return
373+
if len(self._batch_lengths) == 0:
374+
if _BATCHSTORE_VLMETA_KEY in self.vlmeta:
375+
del self.vlmeta[_BATCHSTORE_VLMETA_KEY]
376+
return
373377
self.schunk.vlmeta[_BATCHSTORE_VLMETA_KEY] = {"batch_lengths": list(self._batch_lengths)}
374378

375379
def _get_batch_lengths(self) -> list[int] | None:
@@ -542,6 +546,7 @@ def _persist_layout_metadata(self) -> None:
542546
if len(self) > 0:
543547
return
544548
batch_lengths = None if self._batch_lengths is None else list(self._batch_lengths)
549+
user_vlmeta = self._user_vlmeta_items() if len(self.vlmeta) > 0 else {}
545550
storage = self._make_storage()
546551
fixed_meta = dict(storage.meta or {})
547552
fixed_meta["batchstore"] = {
@@ -559,6 +564,8 @@ def _persist_layout_metadata(self) -> None:
559564
storage=storage,
560565
)
561566
self._attach_schunk(schunk)
567+
for key, value in user_vlmeta.items():
568+
self.vlmeta[key] = value
562569
if batch_lengths is not None and self._batch_lengths is None:
563570
self._batch_lengths = batch_lengths
564571

@@ -894,17 +901,24 @@ def copy(self, **kwargs: Any) -> BatchStore:
894901
kwargs["dparams"] = kwargs.get("dparams", copy.deepcopy(self.dparams))
895902
kwargs["max_blocksize"] = kwargs.get("max_blocksize", self.max_blocksize)
896903
kwargs["serializer"] = kwargs.get("serializer", self.serializer)
897-
898-
if "storage" not in kwargs:
904+
user_vlmeta = self._user_vlmeta_items() if len(self.vlmeta) > 0 else {}
905+
906+
if "storage" in kwargs:
907+
storage = self._coerce_storage(kwargs["storage"], {})
908+
fixed_meta = self._copy_meta()
909+
if storage.meta is not None:
910+
fixed_meta.update(storage.meta)
911+
storage.meta = fixed_meta
912+
kwargs["storage"] = storage
913+
else:
899914
kwargs["meta"] = self._copy_meta()
900915
kwargs["contiguous"] = kwargs.get("contiguous", self.schunk.contiguous)
901916
if "urlpath" in kwargs and "mode" not in kwargs:
902917
kwargs["mode"] = "w"
903918

904919
out = BatchStore(**kwargs)
905-
if "storage" not in kwargs and len(self.vlmeta) > 0:
906-
for key, value in self._user_vlmeta_items().items():
907-
out.vlmeta[key] = value
920+
for key, value in user_vlmeta.items():
921+
out.vlmeta[key] = value
908922
out.extend(self)
909923
return out
910924

tests/test_batch_store.py

Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -143,6 +143,26 @@ def test_batchstore_arrow_ipc_roundtrip():
143143
blosc2.remove_urlpath(urlpath)
144144

145145

146+
def test_batchstore_inferred_layout_preserves_user_vlmeta():
147+
barray = blosc2.BatchStore()
148+
barray.vlmeta["user"] = {"x": 1}
149+
150+
barray.append([1, 2, 3])
151+
152+
assert barray.vlmeta["user"] == {"x": 1}
153+
154+
155+
def test_batchstore_arrow_layout_persistence_preserves_user_vlmeta():
156+
pa = pytest.importorskip("pyarrow")
157+
158+
barray = blosc2.BatchStore(serializer="arrow")
159+
barray.vlmeta["user"] = {"x": 1}
160+
161+
barray.append(pa.array([[1], [2, 3]]))
162+
163+
assert barray.vlmeta["user"] == {"x": 1}
164+
165+
146166
def test_batchstore_from_cframe():
147167
barray = blosc2.BatchStore()
148168
barray.extend(BATCHES)
@@ -233,6 +253,38 @@ def test_batchstore_pop_keeps_batch_lengths_metadata_in_sync():
233253
assert items["nbatches"].startswith("2 (items per batch: mean=2.00")
234254

235255

256+
def test_batchstore_clear_keeps_empty_store_vlmeta_readable():
257+
urlpath = "test_batchstore_clear_empty_vlmeta.b2b"
258+
blosc2.remove_urlpath(urlpath)
259+
260+
barray = blosc2.BatchStore(urlpath=urlpath, mode="w", contiguous=True)
261+
barray.append([1, 2, 3])
262+
barray.clear()
263+
264+
assert barray.vlmeta.getall() == {}
265+
266+
reopened = blosc2.open(urlpath, mode="r")
267+
assert reopened.vlmeta.getall() == {}
268+
269+
blosc2.remove_urlpath(urlpath)
270+
271+
272+
def test_batchstore_delete_last_keeps_empty_store_vlmeta_readable():
273+
urlpath = "test_batchstore_delete_last_empty_vlmeta.b2b"
274+
blosc2.remove_urlpath(urlpath)
275+
276+
barray = blosc2.BatchStore(urlpath=urlpath, mode="w", contiguous=True)
277+
barray.append([1, 2, 3])
278+
barray.delete(0)
279+
280+
assert barray.vlmeta.getall() == {}
281+
282+
reopened = blosc2.open(urlpath, mode="r")
283+
assert reopened.vlmeta.getall() == {}
284+
285+
blosc2.remove_urlpath(urlpath)
286+
287+
236288
def test_batchstore_zstd_does_not_use_dict_by_default():
237289
barray = blosc2.BatchStore()
238290
assert barray.cparams.codec == blosc2.Codec.ZSTD
@@ -573,6 +625,26 @@ def test_batchstore_copy():
573625
blosc2.remove_urlpath(copy_path)
574626

575627

628+
def test_batchstore_copy_with_storage_preserves_user_metadata():
629+
urlpath = "test_batchstore_copy_storage.b2b"
630+
copy_path = "test_batchstore_copy_storage_out.b2b"
631+
blosc2.remove_urlpath(urlpath)
632+
blosc2.remove_urlpath(copy_path)
633+
634+
original = blosc2.BatchStore(urlpath=urlpath, mode="w", contiguous=True, meta={"user_meta": {"a": 1}})
635+
original.vlmeta["user_vlmeta"] = {"b": 2}
636+
original.extend(BATCHES)
637+
638+
copied = original.copy(storage=blosc2.Storage(contiguous=False, urlpath=copy_path, mode="w"))
639+
640+
assert [batch[:] for batch in copied] == [batch[:] for batch in original]
641+
assert copied.meta["user_meta"] == {"a": 1}
642+
assert copied.vlmeta["user_vlmeta"] == {"b": 2}
643+
644+
blosc2.remove_urlpath(urlpath)
645+
blosc2.remove_urlpath(copy_path)
646+
647+
576648
@pytest.mark.parametrize(("contiguous", "nthreads"), [(False, 2), (True, 4)])
577649
def test_batchstore_multithreaded_inner_vl(contiguous, nthreads):
578650
batches = []

0 commit comments

Comments
 (0)