Skip to content

Commit 4fd86fc

Browse files
committed
Prefer to use chunk size during merge sorts in full indexing (better performance)
1 parent 918deb8 commit 4fd86fc

1 file changed

Lines changed: 4 additions & 4 deletions

File tree

src/blosc2/indexing.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2835,6 +2835,7 @@ def _build_full_descriptor_ooc(
28352835
)
28362836
)
28372837

2838+
merge_buffer_items = max(int(array.chunks[0]), FULL_OOC_MERGE_BUFFER_ITEMS)
28382839
merge_id = 0
28392840
while len(runs) > 1:
28402841
next_runs = []
@@ -2849,7 +2850,7 @@ def _build_full_descriptor_ooc(
28492850
workdir,
28502851
dtype,
28512852
merge_id,
2852-
FULL_OOC_MERGE_BUFFER_ITEMS,
2853+
merge_buffer_items,
28532854
tracker,
28542855
cparams,
28552856
)
@@ -3724,6 +3725,7 @@ def compact_index(array: blosc2.NDArray, field: str | None = None, name: str | N
37243725
with tempfile.TemporaryDirectory(prefix="blosc2-index-compact-") as tmpdir:
37253726
workdir = Path(tmpdir)
37263727
runs = _full_compaction_runs(array, descriptor, workdir)
3728+
merge_buffer_items = max(int(array.chunks[0]), FULL_OOC_MERGE_BUFFER_ITEMS)
37273729
merge_id = 0
37283730
while len(runs) > 1:
37293731
next_runs = []
@@ -3732,9 +3734,7 @@ def compact_index(array: blosc2.NDArray, field: str | None = None, name: str | N
37323734
next_runs.append(runs[idx])
37333735
continue
37343736
next_runs.append(
3735-
_merge_run_pair(
3736-
runs[idx], runs[idx + 1], workdir, dtype, merge_id, FULL_OOC_MERGE_BUFFER_ITEMS
3737-
)
3737+
_merge_run_pair(runs[idx], runs[idx + 1], workdir, dtype, merge_id, merge_buffer_items)
37383738
)
37393739
merge_id += 1
37403740
runs = next_runs

0 commit comments

Comments
 (0)