Skip to content

Commit 243c9cb

Browse files
committed
Fix mmap flag propagation and avoid full sidecar materialization
- All blosc2.open() calls now pass mmap_mode=_INDEX_MMAP_MODE, so --no-mmap is honoured consistently across creation and query paths - _replace_levels_descriptor_tail now slices the summary handle directly instead of decompressing the full sidecar into a numpy array - Remove dead helpers _load_level_summaries and _sidecar_block_len - Merge buffer in full OOC index build now uses max(chunks[0], FULL_OOC_MERGE_BUFFER_ITEMS) to match source geometry
1 parent 4fd86fc commit 243c9cb

1 file changed

Lines changed: 9 additions & 26 deletions

File tree

src/blosc2/indexing.py

Lines changed: 9 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -845,7 +845,7 @@ def _open_sidecar_handle(array: blosc2.NDArray, token: str, category: str, name:
845845
raise RuntimeError("sidecar handle path is not available")
846846
handle = legacy if isinstance(legacy, blosc2.NDArray) else blosc2.asarray(np.asarray(legacy))
847847
else:
848-
handle = blosc2.open(path)
848+
handle = blosc2.open(path, mmap_mode=_INDEX_MMAP_MODE)
849849
_SIDECAR_HANDLE_CACHE[cache_key] = handle
850850
return handle
851851

@@ -1095,20 +1095,15 @@ def _plain_value(value):
10951095
return {key: _plain_value(value) for key, value in cparams.items()}
10961096

10971097

1098-
def _load_array_sidecar(
1099-
array: blosc2.NDArray, token: str, category: str, name: str, path: str | None
1100-
) -> np.ndarray:
1098+
def _load_array_sidecar(array: blosc2.NDArray, token: str, category: str, name: str) -> np.ndarray:
11011099
cache_key = _data_cache_key(array, token, category, name)
11021100
cached = _DATA_CACHE.get(cache_key)
11031101
if cached is not None:
11041102
return cached
1105-
if path is None:
1106-
handle = _SIDECAR_HANDLE_CACHE.get(_sidecar_handle_cache_key(array, token, category, name))
1107-
if handle is None:
1108-
raise RuntimeError("in-memory index metadata is missing from the current process")
1109-
data = _read_sidecar_span(handle, 0, int(handle.shape[0]))
1110-
else:
1111-
data = blosc2.open(path)[:]
1103+
handle = _SIDECAR_HANDLE_CACHE.get(_sidecar_handle_cache_key(array, token, category, name))
1104+
if handle is None:
1105+
raise RuntimeError("in-memory index metadata is missing from the current process")
1106+
data = _read_sidecar_span(handle, 0, int(handle.shape[0]))
11121107
_DATA_CACHE[cache_key] = data
11131108
return data
11141109

@@ -1504,13 +1499,6 @@ def _index_build_threads(cparams: dict | blosc2.CParams | None = None) -> int:
15041499
return _python_executor_threads(int(getattr(blosc2, "nthreads", 1) or 1))
15051500

15061501

1507-
def _sidecar_block_len(sidecar: dict, fallback_block_len: int) -> int:
1508-
path = sidecar.get("path")
1509-
if path is None:
1510-
return fallback_block_len
1511-
return int(blosc2.open(path).blocks[0])
1512-
1513-
15141502
def _medium_nav_segment_divisor(optlevel: int) -> int:
15151503
if optlevel <= 1:
15161504
return 1
@@ -3199,14 +3187,14 @@ def _component_nbytes(array: blosc2.NDArray, descriptor: dict, component: IndexC
31993187
if component.path is not None:
32003188
return int(blosc2.open(component.path, mmap_mode=_INDEX_MMAP_MODE).nbytes)
32013189
token = descriptor["token"]
3202-
return int(_load_array_sidecar(array, token, component.category, component.name, component.path).nbytes)
3190+
return int(_load_array_sidecar(array, token, component.category, component.name).nbytes)
32033191

32043192

32053193
def _component_cbytes(array: blosc2.NDArray, descriptor: dict, component: IndexComponent) -> int:
32063194
if component.path is not None:
32073195
return int(blosc2.open(component.path, mmap_mode=_INDEX_MMAP_MODE).cbytes)
32083196
token = descriptor["token"]
3209-
sidecar = _load_array_sidecar(array, token, component.category, component.name, component.path)
3197+
sidecar = _load_array_sidecar(array, token, component.category, component.name)
32103198
kwargs = {}
32113199
cparams = descriptor.get("cparams")
32123200
if cparams is not None:
@@ -3345,7 +3333,7 @@ def _replace_levels_descriptor_tail(
33453333
for level, level_info in descriptor["levels"].items():
33463334
segment_len = int(level_info["segment_len"])
33473335
start_segment = old_size // segment_len
3348-
prefix = _load_level_summaries(array, descriptor, level)[:start_segment]
3336+
prefix = _open_level_summary_handle(array, descriptor, level)[:start_segment]
33493337
tail_start = start_segment * segment_len
33503338
tail_values = _slice_values_for_target(array, target, tail_start, new_size)
33513339
tail_summaries = _compute_segment_summaries(tail_values, dtype, segment_len)
@@ -3807,11 +3795,6 @@ def _descriptor_for_target(array: blosc2.NDArray, target: dict) -> dict | None:
38073795
return descriptor
38083796

38093797

3810-
def _load_level_summaries(array: blosc2.NDArray, descriptor: dict, level: str) -> np.ndarray:
3811-
level_info = descriptor["levels"][level]
3812-
return _load_array_sidecar(array, descriptor["token"], "summary", level, level_info["path"])
3813-
3814-
38153798
def _open_level_summary_handle(array: blosc2.NDArray, descriptor: dict, level: str):
38163799
level_info = descriptor["levels"][level]
38173800
return _open_sidecar_handle(array, descriptor["token"], "summary_handle", level, level_info["path"])

0 commit comments

Comments
 (0)