Skip to content

Commit 663cfe1

Browse files
committed
Add vlmeta support to LazyArray objects
1 parent 660a165 commit 663cfe1

5 files changed

Lines changed: 135 additions & 2 deletions

File tree

doc/reference/lazyarray.rst

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,11 @@ You can get an object following the LazyArray API with any of the following ways
1414

1515
The LazyArray object is a thin wrapper around the expression or user-defined function that allows for lazy computation. This means that the expression is not computed until the ``compute`` or ``__getitem__`` methods are called. The ``compute`` method will return a new NDArray object with the result of the expression evaluation. The ``__getitem__`` method will return an NumPy object instead.
1616

17+
LazyArray objects also support user metadata via :attr:`LazyArray.vlmeta`. For
18+
in-memory objects, this metadata lives on the Python object itself. For
19+
persisted LazyArrays reopened from disk, metadata is synchronized with the
20+
underlying carrier and survives reopening.
21+
1722
See the `LazyExpr`_ and `LazyUDF`_ sections for more information.
1823

1924
.. currentmodule:: blosc2
@@ -33,6 +38,10 @@ See the `LazyExpr`_ and `LazyUDF`_ sections for more information.
3338
---------------
3439
.. automethod:: __getitem__
3540

41+
Attributes
42+
----------
43+
.. autoattribute:: vlmeta
44+
3645
.. _LazyExpr:
3746

3847
LazyExpr
@@ -51,7 +60,7 @@ LazyUDF
5160

5261
For getting a LazyUDF object (which is LazyArray-compliant) from a user-defined Python function, you can use the lazyudf constructor below. See `a tutorial on how this works <../getting_started/tutorials/03.lazyarray-udf.html>`_.
5362

54-
This object follows the `LazyArray`_ API for computation, although storage is not supported yet.
63+
This object follows the `LazyArray`_ API for computation and storage.
5564

5665
.. autofunction:: lazyudf
5766

src/blosc2/b2objects.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323
_B2OBJECT_META_KEY = "b2o"
2424
_B2OBJECT_VERSION = 1
2525
_B2OBJECT_DSL_VERSION = 1
26+
_B2OBJECT_USER_VLMETA_KEY = "_b2o_user_vlmeta"
2627

2728

2829
def make_b2object_carrier(
@@ -44,6 +45,17 @@ def write_b2object_payload(array, payload: dict[str, Any]) -> None:
4445
array.schunk.vlmeta[_B2OBJECT_META_KEY] = payload
4546

4647

48+
def write_b2object_user_vlmeta(array, user_vlmeta: dict[str, Any]) -> None:
49+
array.schunk.vlmeta[_B2OBJECT_USER_VLMETA_KEY] = user_vlmeta
50+
51+
52+
def read_b2object_user_vlmeta(obj) -> dict[str, Any]:
53+
schunk = getattr(obj, "schunk", obj)
54+
if _B2OBJECT_USER_VLMETA_KEY not in schunk.vlmeta:
55+
return {}
56+
return schunk.vlmeta[_B2OBJECT_USER_VLMETA_KEY]
57+
58+
4759
def encode_operand_reference(obj):
4860
return blosc2.Ref.from_object(obj).to_dict()
4961

@@ -232,4 +244,5 @@ def open_b2object(obj):
232244
if isinstance(opened, blosc2.LazyExpr | blosc2.LazyUDF):
233245
opened.array = obj
234246
opened.schunk = schunk
247+
opened._set_user_vlmeta(read_b2object_user_vlmeta(obj), sync=False)
235248
return opened

src/blosc2/lazyexpr.py

Lines changed: 72 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424
import textwrap
2525
import threading
2626
from abc import ABC, abstractmethod, abstractproperty
27+
from collections.abc import MutableMapping
2728
from dataclasses import asdict
2829
from enum import Enum
2930
from pathlib import Path
@@ -42,7 +43,13 @@
4243

4344
import blosc2
4445

45-
from .b2objects import encode_b2object_payload, make_b2object_carrier, write_b2object_payload
46+
from .b2objects import (
47+
encode_b2object_payload,
48+
make_b2object_carrier,
49+
read_b2object_user_vlmeta,
50+
write_b2object_payload,
51+
write_b2object_user_vlmeta,
52+
)
4653
from .dsl_kernel import DSLKernel, DSLSyntaxError, DSLValidator, specialize_miniexpr_inputs
4754

4855
if blosc2._HAS_NUMBA:
@@ -325,7 +332,64 @@ class LazyArrayEnum(Enum):
325332
UDF = 1
326333

327334

335+
class LazyArrayVLMeta(MutableMapping):
336+
"""User metadata attached to a LazyArray."""
337+
338+
def __init__(self, lazyarr: LazyArray):
339+
self.lazyarr = lazyarr
340+
341+
def __getitem__(self, key):
342+
return self.lazyarr._get_user_vlmeta()[key]
343+
344+
def __setitem__(self, key, value):
345+
data = self.lazyarr._get_user_vlmeta()
346+
data[key] = value
347+
self.lazyarr._sync_user_vlmeta()
348+
349+
def __delitem__(self, key):
350+
data = self.lazyarr._get_user_vlmeta()
351+
del data[key]
352+
self.lazyarr._sync_user_vlmeta()
353+
354+
def __iter__(self):
355+
return iter(self.lazyarr._get_user_vlmeta())
356+
357+
def __len__(self):
358+
return len(self.lazyarr._get_user_vlmeta())
359+
360+
def getall(self):
361+
return self.lazyarr._get_user_vlmeta().copy()
362+
363+
def __repr__(self):
364+
return repr(self.getall())
365+
366+
def __str__(self):
367+
return str(self.getall())
368+
369+
328370
class LazyArray(ABC, blosc2.Operand):
371+
def _get_user_vlmeta(self) -> dict[str, Any]:
372+
if not hasattr(self, "_vlmeta_user"):
373+
self._vlmeta_user = {}
374+
return self._vlmeta_user
375+
376+
def _set_user_vlmeta(self, metadata: dict[str, Any], *, sync: bool = True) -> None:
377+
self._vlmeta_user = dict(metadata)
378+
if sync:
379+
self._sync_user_vlmeta()
380+
381+
def _sync_user_vlmeta(self) -> None:
382+
array = getattr(self, "array", None)
383+
if array is not None:
384+
write_b2object_user_vlmeta(array, self._get_user_vlmeta())
385+
386+
@property
387+
def vlmeta(self) -> LazyArrayVLMeta:
388+
"""User variable-length metadata for this LazyArray."""
389+
if not hasattr(self, "_vlmeta_proxy"):
390+
self._vlmeta_proxy = LazyArrayVLMeta(self)
391+
return self._vlmeta_proxy
392+
329393
@abstractmethod
330394
def indices(self, order: str | list[str] | None = None) -> blosc2.LazyArray:
331395
"""
@@ -494,6 +558,9 @@ def save(self, **kwargs: Any) -> None:
494558
section for more info).
495559
* This is currently only supported for :ref:`LazyExpr` and :ref:`LazyUDF`
496560
(including kernels decorated with :func:`blosc2.dsl_kernel`).
561+
* User metadata can be attached via :attr:`vlmeta`. For in-memory LazyArrays
562+
this stays in memory; for persisted LazyArrays it is serialized and restored
563+
on reopen.
497564
498565
Examples
499566
--------
@@ -3764,6 +3831,7 @@ def _to_b2object_carrier(self, **kwargs):
37643831
**kwargs,
37653832
)
37663833
write_b2object_payload(array, payload)
3834+
write_b2object_user_vlmeta(array, self._get_user_vlmeta())
37673835
return array
37683836

37693837
@classmethod
@@ -4125,6 +4193,7 @@ def save(self, urlpath=None, **kwargs):
41254193
if isinstance(self.func, DSLKernel) and self.func.dsl_source is not None:
41264194
meta["dsl_source"] = self.func.dsl_source
41274195
array.schunk.vlmeta["_LazyArray"] = meta
4196+
write_b2object_user_vlmeta(array, self._get_user_vlmeta())
41284197

41294198
def to_cframe(self) -> bytes:
41304199
return self._to_b2object_carrier().to_cframe()
@@ -4160,6 +4229,7 @@ def _to_b2object_carrier(self, **kwargs):
41604229
**kwargs,
41614230
)
41624231
write_b2object_payload(array, payload)
4232+
write_b2object_user_vlmeta(array, self._get_user_vlmeta())
41634233
return array
41644234

41654235

@@ -4518,6 +4588,7 @@ def open_lazyarray(array):
45184588
new_expr.array = array
45194589
# We want to expose schunk too, so that .info() can be used on the LazyArray
45204590
new_expr.schunk = array.schunk
4591+
new_expr._set_user_vlmeta(read_b2object_user_vlmeta(array), sync=False)
45214592
return new_expr
45224593

45234594

tests/ndarray/test_lazyexpr.py

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1768,6 +1768,29 @@ def test_save_proxy_operands_reopen_default_mode(tmp_path):
17681768
np.testing.assert_array_equal(restored[:], np.arange(10, dtype=np.int64) * 2)
17691769

17701770

1771+
def test_lazyexpr_vlmeta_in_memory_and_persisted(tmp_path):
1772+
a = blosc2.asarray(np.arange(5, dtype=np.int64), urlpath=str(tmp_path / "a.b2nd"), mode="w")
1773+
b = blosc2.asarray(np.arange(5, dtype=np.int64), urlpath=str(tmp_path / "b.b2nd"), mode="w")
1774+
expr = a + b
1775+
1776+
expr.vlmeta["name"] = "sum"
1777+
expr.vlmeta["config"] = {"scale": 1}
1778+
assert expr.vlmeta["name"] == "sum"
1779+
assert expr.vlmeta["config"] == {"scale": 1}
1780+
1781+
expr_path = tmp_path / "expr_vlmeta.b2nd"
1782+
expr.save(str(expr_path))
1783+
restored = blosc2.open(str(expr_path))
1784+
1785+
assert restored.vlmeta["name"] == "sum"
1786+
assert restored.vlmeta["config"] == {"scale": 1}
1787+
1788+
restored.vlmeta["note"] = "persisted"
1789+
reopened = blosc2.open(str(expr_path))
1790+
assert reopened.vlmeta["note"] == "persisted"
1791+
np.testing.assert_array_equal(reopened[:], np.arange(5, dtype=np.int64) * 2)
1792+
1793+
17711794
# Test the chaining of multiple lazy expressions
17721795
def test_chain_expressions():
17731796
N = 1_000

tests/ndarray/test_lazyudf.py

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -501,6 +501,23 @@ def test_save_ludf():
501501
blosc2.remove_urlpath(urlpath)
502502

503503

504+
def test_lazyudf_vlmeta_roundtrip(tmp_path):
505+
a_path = tmp_path / "a.b2nd"
506+
expr_path = tmp_path / "lazyudf_vlmeta.b2nd"
507+
array = blosc2.asarray(np.arange(5, dtype=np.int64), urlpath=str(a_path), mode="w")
508+
expr = blosc2.lazyudf(udf1p, (array,), np.float64)
509+
510+
expr.vlmeta["name"] = "increment"
511+
expr.vlmeta["attrs"] = {"version": 1}
512+
expr.save(urlpath=str(expr_path))
513+
514+
restored = blosc2.open(str(expr_path))
515+
516+
assert isinstance(restored, blosc2.LazyUDF)
517+
assert restored.vlmeta["name"] == "increment"
518+
assert restored.vlmeta["attrs"] == {"version": 1}
519+
520+
504521
# Test get_chunk method
505522
def test_get_chunk():
506523
a = blosc2.linspace(0, 100, 100, shape=(10, 10), chunks=(3, 4), blocks=(2, 3))

0 commit comments

Comments
 (0)