55# SPDX-License-Identifier: BSD-3-Clause
66#######################################################################
77
8+ from __future__ import annotations
9+
810import os
911import shutil
1012import tempfile
1113import zipfile
12- from collections .abc import Iterator , Set
13- from typing import Any
14+ from typing import TYPE_CHECKING , Any
1415
1516import numpy as np
1617
1718import blosc2
1819from blosc2 .c2array import C2Array
1920from blosc2 .embed_store import EmbedStore
20- from blosc2 .schunk import SChunk
21+ from blosc2 .schunk import SChunk , _process_opened_object
22+
23+ if TYPE_CHECKING :
24+ from collections .abc import Iterator , Set
2125
2226
2327class DictStore :
@@ -244,20 +248,36 @@ def estore(self) -> EmbedStore:
244248 """Access the underlying EmbedStore."""
245249 return self ._estore
246250
247- def __setitem__ (self , key : str , value : blosc2 .Array | SChunk ) -> None :
251+ @staticmethod
252+ def _value_nbytes (value : blosc2 .Array | SChunk | blosc2 .VLArray ) -> int :
253+ if isinstance (value , blosc2 .VLArray ):
254+ return value .schunk .nbytes
255+ return value .nbytes
256+
257+ @staticmethod
258+ def _is_external_value (value : blosc2 .Array | SChunk | blosc2 .VLArray ) -> bool :
259+ return isinstance (value , (blosc2 .NDArray , SChunk , blosc2 .VLArray )) and bool (
260+ getattr (value , "urlpath" , None )
261+ )
262+
263+ @staticmethod
264+ def _external_ext (value : blosc2 .Array | SChunk | blosc2 .VLArray ) -> str :
265+ if isinstance (value , blosc2 .NDArray ):
266+ return ".b2nd"
267+ return ".b2f"
268+
269+ def __setitem__ (self , key : str , value : blosc2 .Array | SChunk | blosc2 .VLArray ) -> None :
248270 """Add a node to the DictStore."""
249271 if isinstance (value , np .ndarray ):
250272 value = blosc2 .asarray (value , cparams = self .cparams , dparams = self .dparams )
251273 # C2Array should always go to embed store; let estore handle it directly
252274 if isinstance (value , C2Array ):
253275 self ._estore [key ] = value
254276 return
255- exceeds_threshold = self .threshold is not None and value .nbytes >= self .threshold
256- # Consider both NDArray and SChunk external files (have urlpath)
257- external_file = isinstance (value , (blosc2 .NDArray , SChunk )) and getattr (value , "urlpath" , None )
277+ exceeds_threshold = self .threshold is not None and self ._value_nbytes (value ) >= self .threshold
278+ external_file = self ._is_external_value (value )
258279 if exceeds_threshold or (external_file and self .threshold is None ):
259- # Choose extension based on type
260- ext = ".b2f" if isinstance (value , SChunk ) else ".b2nd"
280+ ext = self ._external_ext (value )
261281 # Convert key to a proper file path within the tree directory
262282 rel_key = key .lstrip ("/" )
263283 dest_path = os .path .join (self .working_dir , rel_key + ext )
@@ -272,7 +292,7 @@ def __setitem__(self, key: str, value: blosc2.Array | SChunk) -> None:
272292 if hasattr (value , "save" ):
273293 value .save (urlpath = dest_path )
274294 else :
275- # An SChunk does not have a save() method
295+ # SChunk and VLArray can both be persisted via their cframe.
276296 with open (dest_path , "wb" ) as f :
277297 f .write (value .to_cframe ())
278298 else :
@@ -290,20 +310,21 @@ def __setitem__(self, key: str, value: blosc2.Array | SChunk) -> None:
290310 value = blosc2 .from_cframe (value .to_cframe ())
291311 self ._estore [key ] = value
292312
293- def __getitem__ (self , key : str ) -> blosc2 .NDArray | SChunk | C2Array :
313+ def __getitem__ (self , key : str ) -> blosc2 .NDArray | SChunk | blosc2 . VLArray | C2Array :
294314 """Retrieve a node from the DictStore."""
295315 # Check map_tree first
296316 if key in self .map_tree :
297317 filepath = self .map_tree [key ]
298318 if filepath in self .offsets :
299319 offset = self .offsets [filepath ]["offset" ]
300- return blosc2 .blosc2_ext .open (
320+ opened = blosc2 .blosc2_ext .open (
301321 self .b2z_path ,
302322 mode = "r" ,
303323 offset = offset ,
304324 mmap_mode = self .mmap_mode ,
305325 dparams = self .dparams ,
306326 )
327+ return _process_opened_object (opened )
307328 else :
308329 urlpath = os .path .join (self .working_dir , filepath )
309330 if os .path .exists (urlpath ):
@@ -319,7 +340,7 @@ def __getitem__(self, key: str) -> blosc2.NDArray | SChunk | C2Array:
319340 # Fall back to EmbedStore
320341 return self ._estore [key ]
321342
322- def get (self , key : str , default : Any = None ) -> blosc2 .NDArray | SChunk | C2Array | Any :
343+ def get (self , key : str , default : Any = None ) -> blosc2 .NDArray | SChunk | blosc2 . VLArray | C2Array | Any :
323344 """Retrieve a node, or default if not found."""
324345 try :
325346 return self [key ]
0 commit comments