Skip to content

Commit dcf76f6

Browse files
committed
Avoid eager DictStore rehydration and new b2o bundle demo
1 parent c6fa4e4 commit dcf76f6

3 files changed

Lines changed: 126 additions & 16 deletions

File tree

examples/embeded-expr-udf-b2z.py

Lines changed: 76 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,76 @@
1+
#######################################################################
2+
# Copyright (c) 2019-present, Blosc Development Team <blosc@blosc.org>
3+
# All rights reserved.
4+
#
5+
# SPDX-License-Identifier: BSD-3-Clause
6+
#######################################################################
7+
8+
import numpy as np
9+
10+
import blosc2
11+
from blosc2 import where
12+
13+
14+
def show(label, value):
15+
print(f"{label}: {value}")
16+
17+
18+
@blosc2.dsl_kernel
19+
def masked_energy(a, b, mask):
20+
return where(mask > 0, a * a + 2 * b, 0.0)
21+
22+
23+
bundle_path = "example_b2o_bundle.b2z"
24+
blosc2.remove_urlpath(bundle_path)
25+
26+
# Build a portable bundle with ordinary arrays plus persisted lazy recipes.
27+
with blosc2.DictStore(bundle_path, mode="w", threshold=1) as store:
28+
store["/data/a"] = np.linspace(0.0, 1.0, 10, dtype=np.float32)
29+
store["/data/b"] = np.linspace(1.0, 2.0, 10, dtype=np.float32)
30+
store["/data/mask"] = np.asarray([0, 1, 1, 0, 1, 0, 1, 1, 0, 1], dtype=np.int8)
31+
32+
# Reopen the array members through the store so operand refs point back to
33+
# the .b2z container via dictstore_key payloads.
34+
a = store["/data/a"]
35+
b = store["/data/b"]
36+
mask = store["/data/mask"]
37+
38+
expr = blosc2.lazyexpr("(a - b) / (a + b + 1e-6)", operands={"a": a, "b": b})
39+
udf = blosc2.lazyudf(masked_energy, (a, b, mask), dtype=np.float32, shape=a.shape)
40+
41+
# DictStore currently stores array-like external leaves, so persist the
42+
# logical lazy objects through their b2o NDArray carriers.
43+
store["/recipes/expr"] = blosc2.ndarray_from_cframe(expr.to_cframe())
44+
store["/recipes/udf"] = blosc2.ndarray_from_cframe(udf.to_cframe())
45+
46+
show("Bundle created", bundle_path)
47+
48+
# Reopen the bundle read-only. The persisted LazyExpr and LazyUDF can be
49+
# evaluated directly without re-saving, rebuilding, or re-deploying the .b2z.
50+
with blosc2.open(bundle_path, mode="r") as store:
51+
show("Read-only keys", sorted(store.keys()))
52+
53+
expr = store["/recipes/expr"]
54+
udf = store["/recipes/udf"]
55+
56+
expr_result = expr.compute()
57+
udf_result = udf.compute()
58+
59+
show("Reopened expr type", type(expr).__name__)
60+
show("Reopened udf type", type(udf).__name__)
61+
show("Expr operand refs", expr.array.schunk.vlmeta["b2o"]["operands"])
62+
show("UDF operand refs", udf.array.schunk.vlmeta["b2o"]["operands"])
63+
show("Expr values", np.round(expr_result[:], 4))
64+
show("UDF values", udf_result[:])
65+
66+
expected_expr = (store["/data/a"][:] - store["/data/b"][:]) / (
67+
store["/data/a"][:] + store["/data/b"][:] + 1e-6
68+
)
69+
expected_udf = np.where(
70+
store["/data/mask"][:] > 0,
71+
store["/data/a"][:] ** 2 + 2 * store["/data/b"][:],
72+
0.0,
73+
).astype(np.float32)
74+
np.testing.assert_allclose(expr_result[:], expected_expr, rtol=1e-6, atol=1e-6)
75+
np.testing.assert_allclose(udf_result[:], expected_udf, rtol=1e-6, atol=1e-6)
76+
show("Read-only evaluation", "ok")

src/blosc2/dict_store.py

Lines changed: 25 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -227,30 +227,39 @@ def _opened_external_kind(
227227
rel_path: str,
228228
) -> str | None:
229229
"""Return the supported external leaf kind for an already opened object."""
230-
processed = process_opened_object(opened)
231-
if isinstance(processed, blosc2.BatchStore):
232-
kind = "batchstore"
233-
elif isinstance(processed, blosc2.VLArray):
234-
kind = "vlarray"
235-
elif isinstance(processed, blosc2.NDArray):
230+
meta = getattr(opened, "schunk", opened).meta
231+
if "b2o" in meta and isinstance(opened, blosc2.NDArray):
232+
# Keep b2o carriers as NDArray external leaves during discovery.
233+
# Rehydrating them here can recurse when a lazy recipe points back
234+
# into the same DictStore via dictstore_key refs.
236235
kind = "ndarray"
237-
elif isinstance(processed, SChunk):
238-
kind = "schunk"
236+
processed_name = type(opened).__name__
239237
else:
240-
warnings.warn(
241-
f"Ignoring unsupported Blosc2 object at '{rel_path}' during DictStore discovery: "
242-
f"{type(processed).__name__}",
243-
UserWarning,
244-
stacklevel=2,
245-
)
246-
return None
238+
processed = process_opened_object(opened)
239+
processed_name = type(processed).__name__
240+
if isinstance(processed, blosc2.BatchStore):
241+
kind = "batchstore"
242+
elif isinstance(processed, blosc2.VLArray):
243+
kind = "vlarray"
244+
elif isinstance(processed, blosc2.NDArray):
245+
kind = "ndarray"
246+
elif isinstance(processed, SChunk):
247+
kind = "schunk"
248+
else:
249+
warnings.warn(
250+
f"Ignoring unsupported Blosc2 object at '{rel_path}' during DictStore discovery: "
251+
f"{processed_name}",
252+
UserWarning,
253+
stacklevel=2,
254+
)
255+
return None
247256

248257
expected_ext = cls._expected_ext_from_kind(kind)
249258
found_ext = os.path.splitext(rel_path)[1]
250259
if found_ext != expected_ext:
251260
warnings.warn(
252261
f"External leaf '{rel_path}' uses extension '{found_ext}' but metadata resolves to "
253-
f"{type(processed).__name__}; expected '{expected_ext}'.",
262+
f"{processed_name}; expected '{expected_ext}'.",
254263
UserWarning,
255264
stacklevel=2,
256265
)

tests/test_b2objects.py

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -182,3 +182,28 @@ def test_lazyudf_open_roundtrip(tmp_path):
182182

183183
assert isinstance(restored, blosc2.LazyUDF)
184184
np.testing.assert_allclose(restored[:], (np.arange(5, dtype=np.float64) * 3) ** 2)
185+
186+
187+
def test_b2z_bundle_with_lazy_recipes_opens_read_only(tmp_path):
188+
bundle_path = tmp_path / "bundle.b2z"
189+
190+
with blosc2.DictStore(str(bundle_path), mode="w", threshold=1) as store:
191+
store["/data/a"] = np.arange(5, dtype=np.float64)
192+
store["/data/b"] = np.arange(5, dtype=np.float64) * 2
193+
194+
a = store["/data/a"]
195+
b = store["/data/b"]
196+
expr = blosc2.lazyexpr("a + b", operands={"a": a, "b": b})
197+
udf = blosc2.lazyudf(kernel_add_square, (a, b), dtype=np.float64, shape=a.shape)
198+
199+
store["/recipes/expr"] = blosc2.ndarray_from_cframe(expr.to_cframe())
200+
store["/recipes/udf"] = blosc2.ndarray_from_cframe(udf.to_cframe())
201+
202+
with blosc2.open(str(bundle_path), mode="r") as store:
203+
restored_expr = store["/recipes/expr"]
204+
restored_udf = store["/recipes/udf"]
205+
206+
assert isinstance(restored_expr, blosc2.LazyExpr)
207+
assert isinstance(restored_udf, blosc2.LazyUDF)
208+
np.testing.assert_allclose(restored_expr.compute()[:], np.arange(5, dtype=np.float64) * 3)
209+
np.testing.assert_allclose(restored_udf.compute()[:], (np.arange(5, dtype=np.float64) * 3) ** 2)

0 commit comments

Comments
 (0)