Skip to content

Commit fbb0d55

Browse files
committed
New version of indexes tutorial
1 parent 0c5711c commit fbb0d55

1 file changed

Lines changed: 84 additions & 51 deletions

File tree

doc/getting_started/tutorials/14.indexing-arrays.ipynb

Lines changed: 84 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -32,8 +32,8 @@
3232
"id": "8c510216bc394cf9",
3333
"metadata": {
3434
"ExecuteTime": {
35-
"end_time": "2026-04-09T04:14:37.432863Z",
36-
"start_time": "2026-04-09T04:14:37.110770Z"
35+
"end_time": "2026-04-09T06:27:37.470903Z",
36+
"start_time": "2026-04-09T06:27:37.098590Z"
3737
}
3838
},
3939
"source": [
@@ -119,13 +119,13 @@
119119
"id": "d1a5a37585a045ca",
120120
"metadata": {
121121
"ExecuteTime": {
122-
"end_time": "2026-04-09T04:14:51.428855Z",
123-
"start_time": "2026-04-09T04:14:37.433671Z"
122+
"end_time": "2026-04-09T06:27:52.097578Z",
123+
"start_time": "2026-04-09T06:27:37.471828Z"
124124
}
125125
},
126126
"source": [
127127
"N_ROWS = 10_000_000\n",
128-
"MASK_TEXT = \"(id >= -25.0) & (id < 25.0)\"\n",
128+
"MASK_TEXT = \"(id >= -5.0) & (id < 5.0)\"\n",
129129
"\n",
130130
"rng = np.random.default_rng(0)\n",
131131
"dtype = np.dtype([(\"id\", np.float64), (\"payload\", np.int32)])\n",
@@ -157,10 +157,10 @@
157157
"text": [
158158
"Compressed base array size: 30.74 MiB\n",
159159
"kind build_ms index_size overhead\n",
160-
"ultralight 45.783 142 B 0.00x\n",
161-
"light 674.304 26.04 MiB 0.85x\n",
162-
"medium 2195.323 34.99 MiB 1.14x\n",
163-
"full 8483.835 28.44 MiB 0.93x\n"
160+
"ultralight 45.528 142 B 0.00x\n",
161+
"light 679.027 26.04 MiB 0.85x\n",
162+
"medium 2342.959 34.99 MiB 1.14x\n",
163+
"full 8925.948 28.44 MiB 0.93x\n"
164164
]
165165
}
166166
],
@@ -181,8 +181,8 @@
181181
"id": "f1b3aaec965b42d6",
182182
"metadata": {
183183
"ExecuteTime": {
184-
"end_time": "2026-04-09T04:14:51.546053Z",
185-
"start_time": "2026-04-09T04:14:51.449229Z"
184+
"end_time": "2026-04-09T06:27:52.220533Z",
185+
"start_time": "2026-04-09T06:27:52.120176Z"
186186
}
187187
},
188188
"source": [
@@ -202,7 +202,7 @@
202202
"output_type": "stream",
203203
"text": [
204204
"{'will_use_index': True, 'reason': 'multi-field exact indexes selected', 'kind': 'medium', 'level': 'exact', 'lookup_path': 'chunk-nav', 'full_runs': 0}\n",
205-
"Matched rows: 50\n"
205+
"Matched rows: 10\n"
206206
]
207207
}
208208
],
@@ -223,8 +223,8 @@
223223
"id": "c9e932b7561b4ff4",
224224
"metadata": {
225225
"ExecuteTime": {
226-
"end_time": "2026-04-09T04:14:53.105689Z",
227-
"start_time": "2026-04-09T04:14:51.548648Z"
226+
"end_time": "2026-04-09T06:27:53.696948Z",
227+
"start_time": "2026-04-09T06:27:52.222040Z"
228228
}
229229
},
230230
"source": [
@@ -243,21 +243,21 @@
243243
" timing_rows.append((kind, scan_ms, index_ms, scan_ms / index_ms))\n",
244244
"\n",
245245
"print(f\"Selective mask over {N_ROWS:,} rows\")\n",
246-
"print(f\"{'kind':<12} {'scan_ms':>10} {'index_ms':>10} {'speedup':>10}\")\n",
246+
"print(f\"{'kind':<12} {'scan_ms':>11} {'index_ms':>10} {'speedup':>10}\")\n",
247247
"for kind, scan_ms, index_ms, speedup in timing_rows:\n",
248-
" print(f\"{kind:<12} {scan_ms:10.3f} {index_ms:10.3f} {speedup:10.2f}x\")"
248+
" print(f\"{kind:<12} {scan_ms:11.3f} {index_ms:10.3f} {speedup:10.2f}x\")"
249249
],
250250
"outputs": [
251251
{
252252
"name": "stdout",
253253
"output_type": "stream",
254254
"text": [
255255
"Selective mask over 10,000,000 rows\n",
256-
"kind scan_ms index_ms speedup\n",
257-
"ultralight 70.429 67.914 1.04x\n",
258-
"light 68.560 5.011 13.68x\n",
259-
"medium 68.481 4.430 15.46x\n",
260-
"full 68.408 4.263 16.05x\n"
256+
"kind scan_ms index_ms speedup\n",
257+
"ultralight 73.371 70.249 1.04x\n",
258+
"light 65.966 1.478 44.63x\n",
259+
"medium 65.349 1.253 52.16x\n",
260+
"full 65.108 1.221 53.31x\n"
261261
]
262262
}
263263
],
@@ -278,8 +278,8 @@
278278
"id": "9ffcb0d8d06a4daa",
279279
"metadata": {
280280
"ExecuteTime": {
281-
"end_time": "2026-04-09T04:14:53.160261Z",
282-
"start_time": "2026-04-09T04:14:53.118529Z"
281+
"end_time": "2026-04-09T06:27:53.735085Z",
282+
"start_time": "2026-04-09T06:27:53.707924Z"
283283
}
284284
},
285285
"source": [
@@ -323,8 +323,8 @@
323323
"id": "7d337ce2f9fb4f32",
324324
"metadata": {
325325
"ExecuteTime": {
326-
"end_time": "2026-04-09T04:14:53.206514Z",
327-
"start_time": "2026-04-09T04:14:53.171092Z"
326+
"end_time": "2026-04-09T06:27:53.759337Z",
327+
"start_time": "2026-04-09T06:27:53.736407Z"
328328
}
329329
},
330330
"source": [
@@ -370,12 +370,12 @@
370370
"id": "0be5f512928f48db",
371371
"metadata": {
372372
"ExecuteTime": {
373-
"end_time": "2026-04-09T04:14:55.722443Z",
374-
"start_time": "2026-04-09T04:14:53.207978Z"
373+
"end_time": "2026-04-09T06:27:58.801567Z",
374+
"start_time": "2026-04-09T06:27:53.761336Z"
375375
}
376376
},
377377
"source": [
378-
"persistent_arr = blosc2.asarray(data, urlpath=paths[0], mode=\"w\")\n",
378+
"persistent_arr = data.copy(urlpath=paths[0], mode=\"w\")\n",
379379
"persistent_descriptor = persistent_arr.create_index(field=\"id\", kind=\"medium\")\n",
380380
"show_index_summary(\"persistent medium\", persistent_descriptor)\n",
381381
"\n",
@@ -388,19 +388,9 @@
388388
"name": "stdout",
389389
"output_type": "stream",
390390
"text": [
391-
"persistent medium: kind=medium, persistent=False, ooc=True, stale=False\n"
392-
]
393-
},
394-
{
395-
"ename": "FileNotFoundError",
396-
"evalue": "No such file or directory: indexing_tutorial_medium.b2nd",
397-
"output_type": "error",
398-
"traceback": [
399-
"\u001B[31m---------------------------------------------------------------------------\u001B[39m",
400-
"\u001B[31mFileNotFoundError\u001B[39m Traceback (most recent call last)",
401-
"\u001B[36mCell\u001B[39m\u001B[36m \u001B[39m\u001B[32mIn[7]\u001B[39m\u001B[32m, line 5\u001B[39m\n\u001B[32m 2\u001B[39m persistent_descriptor = persistent_arr.create_index(field=\u001B[33m\"\u001B[39m\u001B[33mid\u001B[39m\u001B[33m\"\u001B[39m, kind=\u001B[33m\"\u001B[39m\u001B[33mmedium\u001B[39m\u001B[33m\"\u001B[39m)\n\u001B[32m 3\u001B[39m show_index_summary(\u001B[33m\"\u001B[39m\u001B[33mpersistent medium\u001B[39m\u001B[33m\"\u001B[39m, persistent_descriptor)\n\u001B[32m----> \u001B[39m\u001B[32m5\u001B[39m reopened = \u001B[43mblosc2\u001B[49m\u001B[43m.\u001B[49m\u001B[43mopen\u001B[49m\u001B[43m(\u001B[49m\u001B[43mpaths\u001B[49m\u001B[43m[\u001B[49m\u001B[32;43m0\u001B[39;49m\u001B[43m]\u001B[49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[43mmode\u001B[49m\u001B[43m=\u001B[49m\u001B[33;43m\"\u001B[39;49m\u001B[33;43ma\u001B[39;49m\u001B[33;43m\"\u001B[39;49m\u001B[43m)\u001B[49m\n\u001B[32m 6\u001B[39m \u001B[38;5;28mprint\u001B[39m(\u001B[33mf\u001B[39m\u001B[33m\"\u001B[39m\u001B[33mReopened index count: \u001B[39m\u001B[38;5;132;01m{\u001B[39;00m\u001B[38;5;28mlen\u001B[39m(reopened.indexes)\u001B[38;5;132;01m}\u001B[39;00m\u001B[33m\"\u001B[39m)\n\u001B[32m 7\u001B[39m \u001B[38;5;28mprint\u001B[39m(\u001B[33mf\u001B[39m\u001B[33m\"\u001B[39m\u001B[33mPersisted sidecar path: \u001B[39m\u001B[38;5;132;01m{\u001B[39;00mreopened.indexes[\u001B[32m0\u001B[39m][\u001B[33m'\u001B[39m\u001B[33mreduced\u001B[39m\u001B[33m'\u001B[39m][\u001B[33m'\u001B[39m\u001B[33mvalues_path\u001B[39m\u001B[33m'\u001B[39m]\u001B[38;5;132;01m}\u001B[39;00m\u001B[33m\"\u001B[39m)\n",
402-
"\u001B[36mFile \u001B[39m\u001B[32m~/blosc/python-blosc2/src/blosc2/schunk.py:1779\u001B[39m, in \u001B[36mopen\u001B[39m\u001B[34m(urlpath, mode, offset, **kwargs)\u001B[39m\n\u001B[32m 1776\u001B[39m \u001B[38;5;28;01mreturn\u001B[39;00m special\n\u001B[32m 1778\u001B[39m \u001B[38;5;28;01mif\u001B[39;00m \u001B[38;5;129;01mnot\u001B[39;00m os.path.exists(urlpath):\n\u001B[32m-> \u001B[39m\u001B[32m1779\u001B[39m \u001B[38;5;28;01mraise\u001B[39;00m \u001B[38;5;167;01mFileNotFoundError\u001B[39;00m(\u001B[33mf\u001B[39m\u001B[33m\"\u001B[39m\u001B[33mNo such file or directory: \u001B[39m\u001B[38;5;132;01m{\u001B[39;00murlpath\u001B[38;5;132;01m}\u001B[39;00m\u001B[33m\"\u001B[39m)\n\u001B[32m 1781\u001B[39m _set_default_dparams(kwargs)\n\u001B[32m 1782\u001B[39m res = blosc2_ext.open(urlpath, mode, offset, **kwargs)\n",
403-
"\u001B[31mFileNotFoundError\u001B[39m: No such file or directory: indexing_tutorial_medium.b2nd"
391+
"persistent medium: kind=medium, persistent=True, ooc=True, stale=False\n",
392+
"Reopened index count: 1\n",
393+
"Persisted sidecar path: indexing_tutorial_medium.__index__.id.medium.reduced.values.b2nd\n"
404394
]
405395
}
406396
],
@@ -419,7 +409,12 @@
419409
{
420410
"cell_type": "code",
421411
"id": "11f0cd1b910b409a",
422-
"metadata": {},
412+
"metadata": {
413+
"ExecuteTime": {
414+
"end_time": "2026-04-09T06:27:58.852040Z",
415+
"start_time": "2026-04-09T06:27:58.814043Z"
416+
}
417+
},
423418
"source": [
424419
"mutable_arr = blosc2.arange(20, dtype=np.int64)\n",
425420
"mutable_arr.create_index(kind=\"full\")\n",
@@ -429,8 +424,17 @@
429424
"mutable_arr.rebuild_index()\n",
430425
"print(\"Stale after rebuild:\", mutable_arr.indexes[0][\"stale\"])"
431426
],
432-
"outputs": [],
433-
"execution_count": null
427+
"outputs": [
428+
{
429+
"name": "stdout",
430+
"output_type": "stream",
431+
"text": [
432+
"Stale after direct mutation: True\n",
433+
"Stale after rebuild: False\n"
434+
]
435+
}
436+
],
437+
"execution_count": 8
434438
},
435439
{
436440
"cell_type": "markdown",
@@ -454,7 +458,12 @@
454458
{
455459
"cell_type": "code",
456460
"id": "2e1a47a9cf7246e6",
457-
"metadata": {},
461+
"metadata": {
462+
"ExecuteTime": {
463+
"end_time": "2026-04-09T06:27:59.968401Z",
464+
"start_time": "2026-04-09T06:27:58.852830Z"
465+
}
466+
},
458467
"source": [
459468
"append_dtype = np.dtype([(\"id\", np.int64), (\"payload\", np.int32)])\n",
460469
"base_rows = 200_000\n",
@@ -492,8 +501,22 @@
492501
"print(f\"Median mask time after compaction: {after_ms:.3f} ms\")\n",
493502
"print(f\"Speedup after compaction: {before_ms / after_ms:.2f}x\")"
494503
],
495-
"outputs": [],
496-
"execution_count": null
504+
"outputs": [
505+
{
506+
"name": "stdout",
507+
"output_type": "stream",
508+
"text": [
509+
"Before compaction: {'will_use_index': True, 'reason': 'multi-field exact indexes selected', 'kind': 'full', 'level': 'exact', 'lookup_path': 'run-bounded-ooc', 'full_runs': 40}\n",
510+
"Pending runs: 40\n",
511+
"Median mask time before compaction: 3.293 ms\n",
512+
"After compaction: {'will_use_index': True, 'reason': 'multi-field exact indexes selected', 'kind': 'full', 'level': 'exact', 'lookup_path': 'compact-selective-ooc', 'full_runs': 0}\n",
513+
"Pending runs: 0\n",
514+
"Median mask time after compaction: 0.689 ms\n",
515+
"Speedup after compaction: 4.78x\n"
516+
]
517+
}
518+
],
519+
"execution_count": 9
497520
},
498521
{
499522
"cell_type": "markdown",
@@ -513,21 +536,31 @@
513536
{
514537
"cell_type": "code",
515538
"id": "9833102355db4ec0",
516-
"metadata": {},
539+
"metadata": {
540+
"ExecuteTime": {
541+
"end_time": "2026-04-09T06:27:59.991418Z",
542+
"start_time": "2026-04-09T06:27:59.978217Z"
543+
}
544+
},
517545
"source": [
518546
"for path in paths:\n",
519547
" blosc2.remove_urlpath(path)"
520548
],
521549
"outputs": [],
522-
"execution_count": null
550+
"execution_count": 10
523551
},
524552
{
525553
"cell_type": "code",
526554
"id": "17489b2c3d2ac57",
527-
"metadata": {},
555+
"metadata": {
556+
"ExecuteTime": {
557+
"end_time": "2026-04-09T06:28:00.015548Z",
558+
"start_time": "2026-04-09T06:27:59.998661Z"
559+
}
560+
},
528561
"source": [],
529562
"outputs": [],
530-
"execution_count": null
563+
"execution_count": 10
531564
}
532565
],
533566
"metadata": {

0 commit comments

Comments
 (0)