|
70 | 70 | _HOT_CACHE_BYTES: int = 0 |
71 | 71 | # Persistent VLArray handles: resolved urlpath -> open VLArray object. |
72 | 72 | _QUERY_CACHE_STORE_HANDLES: dict[str, object] = {} |
| 73 | +# Cached mmap handles for data arrays used in full-query gather: urlpath -> NDArray. |
| 74 | +_GATHER_MMAP_HANDLES: dict[str, object] = {} |
73 | 75 | _HOT_CACHE_GLOBAL_SCOPE = ("global", 0) |
74 | 76 |
|
75 | 77 | FULL_OOC_RUN_ITEMS = 2_000_000 |
@@ -668,6 +670,11 @@ def _invalidate_query_cache(array: blosc2.NDArray) -> None: |
668 | 670 | with contextlib.suppress(KeyError, Exception): |
669 | 671 | del array.schunk.vlmeta[QUERY_CACHE_VLMETA_KEY] |
670 | 672 | _hot_cache_clear(scope=scope) |
| 673 | + # Drop any cached mmap handle for this array's data file so a re-opened or |
| 674 | + # extended array is not served from a stale mapping. |
| 675 | + urlpath = getattr(array, "urlpath", None) |
| 676 | + if urlpath is not None: |
| 677 | + _GATHER_MMAP_HANDLES.pop(str(urlpath), None) |
671 | 678 |
|
672 | 679 |
|
673 | 680 | # --------------------------------------------------------------------------- |
@@ -4628,6 +4635,25 @@ def _light_worker_source(where_x): |
4628 | 4635 | return where_x |
4629 | 4636 |
|
4630 | 4637 |
|
| 4638 | +def _gather_mmap_source(where_x): |
| 4639 | + """Return a cached mmap handle for *where_x* for use in repeated gather operations. |
| 4640 | +
|
| 4641 | + On Windows mmap is disabled (see ``_INDEX_MMAP_MODE``), so the original handle |
| 4642 | + is returned unchanged. |
| 4643 | + """ |
| 4644 | + if _INDEX_MMAP_MODE is None: |
| 4645 | + return where_x |
| 4646 | + urlpath = getattr(where_x, "urlpath", None) |
| 4647 | + if not _supports_block_reads(where_x) or urlpath is None: |
| 4648 | + return where_x |
| 4649 | + urlpath = str(urlpath) |
| 4650 | + handle = _GATHER_MMAP_HANDLES.get(urlpath) |
| 4651 | + if handle is None: |
| 4652 | + handle = blosc2.open(urlpath, mmap_mode=_INDEX_MMAP_MODE) |
| 4653 | + _GATHER_MMAP_HANDLES[urlpath] = handle |
| 4654 | + return handle |
| 4655 | + |
| 4656 | + |
4631 | 4657 | def _light_match_from_span(span: np.ndarray, plan: IndexPlan) -> np.ndarray: |
4632 | 4658 | if plan.target is not None and plan.target.get("source") == "expression": |
4633 | 4659 | field_values = _values_from_numpy_target(span, plan.target) |
@@ -5531,16 +5557,19 @@ def evaluate_full_query(where: dict, plan: IndexPlan) -> np.ndarray: |
5531 | 5557 | if plan.exact_positions is None: |
5532 | 5558 | raise ValueError("full evaluation requires exact positions") |
5533 | 5559 | if plan.base is not None: |
| 5560 | + # Use a cached mmap handle when available so blosc2_schunk_get_lazychunk can return |
| 5561 | + # a zero-copy pointer into the mapped region instead of malloc+pread per block. |
| 5562 | + gather_source = _gather_mmap_source(where["_where_x"]) |
5534 | 5563 | block_gather_threshold = int(plan.base.blocks[0]) |
5535 | 5564 | if len(plan.exact_positions) <= block_gather_threshold: |
5536 | 5565 | return _gather_positions_by_block( |
5537 | | - where["_where_x"], |
| 5566 | + gather_source, |
5538 | 5567 | plan.exact_positions, |
5539 | 5568 | int(plan.base.chunks[0]), |
5540 | 5569 | int(plan.base.blocks[0]), |
5541 | 5570 | int(plan.base.shape[0]), |
5542 | 5571 | ) |
5543 | | - return _gather_positions_by_chunk(where["_where_x"], plan.exact_positions, int(plan.base.chunks[0])) |
| 5572 | + return _gather_positions_by_chunk(gather_source, plan.exact_positions, int(plan.base.chunks[0])) |
5544 | 5573 | return _gather_positions(where["_where_x"], plan.exact_positions) |
5545 | 5574 |
|
5546 | 5575 |
|
|
0 commit comments