Skip to content

Commit 6535606

Browse files
committed
Initial support for Blosc2 cframes serialized with msgpack
1 parent 185c67e commit 6535606

5 files changed

Lines changed: 145 additions & 7 deletions

File tree

src/blosc2/_msgpack_utils.py

Lines changed: 35 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -7,20 +7,51 @@
77

88
from __future__ import annotations
99

10-
from msgpack import packb, unpackb
10+
from msgpack import ExtType, packb, unpackb
1111

1212
from blosc2 import blosc2_ext
1313

14+
# Msgpack extension type codes are application-defined. Reserve code 42 in
15+
# python-blosc2 for values serialized as Blosc2 CFrames via ``to_cframe()`` and
16+
# reconstructed with ``blosc2.from_cframe()``. Keep this stable for backward
17+
# compatibility with persisted msgpack payloads produced by this package.
18+
_BLOSC2_EXT_CODE = 42
19+
20+
21+
def _encode_msgpack_ext(obj):
22+
import blosc2
23+
24+
if isinstance(
25+
obj,
26+
(
27+
blosc2.NDArray,
28+
blosc2.SChunk,
29+
blosc2.VLArray,
30+
blosc2.BatchStore,
31+
blosc2.EmbedStore,
32+
),
33+
):
34+
return ExtType(_BLOSC2_EXT_CODE, obj.to_cframe())
35+
return blosc2_ext.encode_tuple(obj)
36+
1437

1538
def msgpack_packb(value):
16-
return packb(value, default=blosc2_ext.encode_tuple, strict_types=True, use_bin_type=True)
39+
return packb(value, default=_encode_msgpack_ext, strict_types=True, use_bin_type=True)
1740

1841

1942
def decode_tuple_list_hook(obj):
20-
if obj and obj[0] == "__tuple__":
43+
if obj and isinstance(obj[0], str) and obj[0] == "__tuple__":
2144
return tuple(obj[1:])
2245
return obj
2346

2447

48+
def _decode_msgpack_ext(code, data):
49+
import blosc2
50+
51+
if code == _BLOSC2_EXT_CODE:
52+
return blosc2.from_cframe(data, copy=True)
53+
return ExtType(code, data)
54+
55+
2556
def msgpack_unpackb(payload):
26-
return unpackb(payload, list_hook=decode_tuple_list_hook)
57+
return unpackb(payload, list_hook=decode_tuple_list_hook, ext_hook=_decode_msgpack_ext)

src/blosc2/batch_store.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -166,8 +166,12 @@ class BatchStore:
166166
If not provided, a value is inferred from the first batch.
167167
serializer : {"msgpack", "arrow"}, optional
168168
Serializer used for batch payloads. ``"msgpack"`` is the default and is
169-
the general-purpose choice for Python items. ``"arrow"`` is optional and
170-
requires ``pyarrow``.
169+
the general-purpose choice for Python items, including nested Blosc2
170+
containers such as :class:`blosc2.NDArray`, :class:`blosc2.SChunk`,
171+
:class:`blosc2.VLArray`, :class:`blosc2.BatchStore`, and
172+
:class:`blosc2.EmbedStore`, which are serialized transparently via
173+
:meth:`to_cframe` / :func:`blosc2.from_cframe`. ``"arrow"`` is optional
174+
and requires ``pyarrow``.
171175
_from_schunk : blosc2.SChunk, optional
172176
Internal hook used when reopening an already-tagged BatchStore.
173177
**kwargs

src/blosc2/vlarray.py

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,14 @@ def _check_serialized_size(buffer: bytes) -> None:
2929

3030

3131
class VLArray:
32-
"""A variable-length array backed by an :class:`blosc2.SChunk`."""
32+
"""A variable-length array backed by an :class:`blosc2.SChunk`.
33+
34+
Entries are serialized with msgpack before compression. Standard Python
35+
objects are supported, and Blosc2 containers such as
36+
:class:`blosc2.NDArray`, :class:`blosc2.SChunk`, :class:`blosc2.VLArray`,
37+
:class:`blosc2.BatchStore`, and :class:`blosc2.EmbedStore` are serialized
38+
transparently via :meth:`to_cframe` / :func:`blosc2.from_cframe`.
39+
"""
3340

3441
@staticmethod
3542
def _set_typesize_one(cparams: blosc2.CParams | dict | None) -> blosc2.CParams | dict:

tests/test_batch_store.py

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
# SPDX-License-Identifier: BSD-3-Clause
66
#######################################################################
77

8+
import numpy as np
89
import pytest
910

1011
import blosc2
@@ -27,6 +28,24 @@ def _storage(contiguous, urlpath, mode="w"):
2728
return blosc2.Storage(contiguous=contiguous, urlpath=urlpath, mode=mode)
2829

2930

31+
def _make_nested_blosc2_objects():
32+
ndarray = blosc2.arange(6, dtype=np.int32)
33+
34+
schunk = blosc2.SChunk(chunksize=16)
35+
schunk.append_data(np.arange(4, dtype=np.int32))
36+
37+
nested_vlarray = blosc2.VLArray()
38+
nested_vlarray.extend(["alpha", {"beta": 2}])
39+
40+
nested_batchstore = blosc2.BatchStore(max_blocksize=2)
41+
nested_batchstore.extend([[1, 2], ["x", {"y": 3}]])
42+
43+
estore = blosc2.EmbedStore()
44+
estore["/node"] = blosc2.arange(3, dtype=np.int32)
45+
46+
return ndarray, schunk, nested_vlarray, nested_batchstore, estore
47+
48+
3049
@pytest.mark.parametrize(
3150
("contiguous", "urlpath"),
3251
[
@@ -181,6 +200,31 @@ def test_batchstore_from_cframe():
181200
assert [batch[:] for batch in restored2] == expected
182201

183202

203+
def test_batchstore_msgpack_supports_blosc2_objects():
204+
ndarray, schunk, nested_vlarray, nested_batchstore, estore = _make_nested_blosc2_objects()
205+
206+
barray = blosc2.BatchStore(max_blocksize=2)
207+
barray.append([ndarray, schunk, nested_vlarray, nested_batchstore, estore])
208+
209+
restored = barray[0][:]
210+
211+
assert isinstance(restored[0], blosc2.NDArray)
212+
assert np.array_equal(restored[0][:], ndarray[:])
213+
214+
assert isinstance(restored[1], blosc2.SChunk)
215+
assert restored[1].decompress_chunk(0) == schunk.decompress_chunk(0)
216+
217+
assert isinstance(restored[2], blosc2.VLArray)
218+
assert list(restored[2]) == list(nested_vlarray)
219+
220+
assert isinstance(restored[3], blosc2.BatchStore)
221+
assert [batch[:] for batch in restored[3]] == [batch[:] for batch in nested_batchstore]
222+
223+
assert isinstance(restored[4], blosc2.EmbedStore)
224+
assert list(restored[4].keys()) == ["/node"]
225+
assert np.array_equal(restored[4]["/node"][:], estore["/node"][:])
226+
227+
184228
def test_batchstore_info():
185229
barray = blosc2.BatchStore()
186230
barray.extend(BATCHES)

tests/test_vlarray.py

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
# SPDX-License-Identifier: BSD-3-Clause
66
#######################################################################
77

8+
import numpy as np
89
import pytest
910

1011
import blosc2
@@ -26,6 +27,24 @@ def _storage(contiguous, urlpath, mode="w"):
2627
return blosc2.Storage(contiguous=contiguous, urlpath=urlpath, mode=mode)
2728

2829

30+
def _make_nested_blosc2_objects():
31+
ndarray = blosc2.arange(6, dtype=np.int32)
32+
33+
schunk = blosc2.SChunk(chunksize=16)
34+
schunk.append_data(np.arange(4, dtype=np.int32))
35+
36+
nested_vlarray = blosc2.VLArray()
37+
nested_vlarray.extend(["alpha", {"beta": 2}])
38+
39+
nested_batchstore = blosc2.BatchStore(max_blocksize=2)
40+
nested_batchstore.extend([[1, 2], ["x", {"y": 3}]])
41+
42+
estore = blosc2.EmbedStore()
43+
estore["/node"] = blosc2.arange(3, dtype=np.int32)
44+
45+
return ndarray, schunk, nested_vlarray, nested_batchstore, estore
46+
47+
2948
@pytest.mark.parametrize(
3049
("contiguous", "urlpath"),
3150
[
@@ -117,6 +136,39 @@ def test_vlarray_from_cframe():
117136
assert list(restored2) == expected
118137

119138

139+
def test_vlarray_msgpack_supports_blosc2_objects():
140+
ndarray, schunk, nested_vlarray, nested_batchstore, estore = _make_nested_blosc2_objects()
141+
142+
vlarray = blosc2.VLArray()
143+
vlarray.append(
144+
{
145+
"ndarray": ndarray,
146+
"schunk": schunk,
147+
"vlarray": nested_vlarray,
148+
"batchstore": nested_batchstore,
149+
"estore": estore,
150+
}
151+
)
152+
153+
restored = vlarray[0]
154+
155+
assert isinstance(restored["ndarray"], blosc2.NDArray)
156+
assert np.array_equal(restored["ndarray"][:], ndarray[:])
157+
158+
assert isinstance(restored["schunk"], blosc2.SChunk)
159+
assert restored["schunk"].decompress_chunk(0) == schunk.decompress_chunk(0)
160+
161+
assert isinstance(restored["vlarray"], blosc2.VLArray)
162+
assert list(restored["vlarray"]) == list(nested_vlarray)
163+
164+
assert isinstance(restored["batchstore"], blosc2.BatchStore)
165+
assert [batch[:] for batch in restored["batchstore"]] == [batch[:] for batch in nested_batchstore]
166+
167+
assert isinstance(restored["estore"], blosc2.EmbedStore)
168+
assert list(restored["estore"].keys()) == ["/node"]
169+
assert np.array_equal(restored["estore"]["/node"][:], estore["/node"][:])
170+
171+
120172
def test_vlarray_info():
121173
vlarray = blosc2.VLArray()
122174
vlarray.extend(VALUES)

0 commit comments

Comments
 (0)