|
32 | 32 | "id": "8c510216bc394cf9", |
33 | 33 | "metadata": { |
34 | 34 | "ExecuteTime": { |
35 | | - "end_time": "2026-04-09T04:14:37.432863Z", |
36 | | - "start_time": "2026-04-09T04:14:37.110770Z" |
| 35 | + "end_time": "2026-04-09T06:27:37.470903Z", |
| 36 | + "start_time": "2026-04-09T06:27:37.098590Z" |
37 | 37 | } |
38 | 38 | }, |
39 | 39 | "source": [ |
|
119 | 119 | "id": "d1a5a37585a045ca", |
120 | 120 | "metadata": { |
121 | 121 | "ExecuteTime": { |
122 | | - "end_time": "2026-04-09T04:14:51.428855Z", |
123 | | - "start_time": "2026-04-09T04:14:37.433671Z" |
| 122 | + "end_time": "2026-04-09T06:27:52.097578Z", |
| 123 | + "start_time": "2026-04-09T06:27:37.471828Z" |
124 | 124 | } |
125 | 125 | }, |
126 | 126 | "source": [ |
127 | 127 | "N_ROWS = 10_000_000\n", |
128 | | - "MASK_TEXT = \"(id >= -25.0) & (id < 25.0)\"\n", |
| 128 | + "MASK_TEXT = \"(id >= -5.0) & (id < 5.0)\"\n", |
129 | 129 | "\n", |
130 | 130 | "rng = np.random.default_rng(0)\n", |
131 | 131 | "dtype = np.dtype([(\"id\", np.float64), (\"payload\", np.int32)])\n", |
|
157 | 157 | "text": [ |
158 | 158 | "Compressed base array size: 30.74 MiB\n", |
159 | 159 | "kind build_ms index_size overhead\n", |
160 | | - "ultralight 45.783 142 B 0.00x\n", |
161 | | - "light 674.304 26.04 MiB 0.85x\n", |
162 | | - "medium 2195.323 34.99 MiB 1.14x\n", |
163 | | - "full 8483.835 28.44 MiB 0.93x\n" |
| 160 | + "ultralight 45.528 142 B 0.00x\n", |
| 161 | + "light 679.027 26.04 MiB 0.85x\n", |
| 162 | + "medium 2342.959 34.99 MiB 1.14x\n", |
| 163 | + "full 8925.948 28.44 MiB 0.93x\n" |
164 | 164 | ] |
165 | 165 | } |
166 | 166 | ], |
|
181 | 181 | "id": "f1b3aaec965b42d6", |
182 | 182 | "metadata": { |
183 | 183 | "ExecuteTime": { |
184 | | - "end_time": "2026-04-09T04:14:51.546053Z", |
185 | | - "start_time": "2026-04-09T04:14:51.449229Z" |
| 184 | + "end_time": "2026-04-09T06:27:52.220533Z", |
| 185 | + "start_time": "2026-04-09T06:27:52.120176Z" |
186 | 186 | } |
187 | 187 | }, |
188 | 188 | "source": [ |
|
202 | 202 | "output_type": "stream", |
203 | 203 | "text": [ |
204 | 204 | "{'will_use_index': True, 'reason': 'multi-field exact indexes selected', 'kind': 'medium', 'level': 'exact', 'lookup_path': 'chunk-nav', 'full_runs': 0}\n", |
205 | | - "Matched rows: 50\n" |
| 205 | + "Matched rows: 10\n" |
206 | 206 | ] |
207 | 207 | } |
208 | 208 | ], |
|
223 | 223 | "id": "c9e932b7561b4ff4", |
224 | 224 | "metadata": { |
225 | 225 | "ExecuteTime": { |
226 | | - "end_time": "2026-04-09T04:14:53.105689Z", |
227 | | - "start_time": "2026-04-09T04:14:51.548648Z" |
| 226 | + "end_time": "2026-04-09T06:27:53.696948Z", |
| 227 | + "start_time": "2026-04-09T06:27:52.222040Z" |
228 | 228 | } |
229 | 229 | }, |
230 | 230 | "source": [ |
|
243 | 243 | " timing_rows.append((kind, scan_ms, index_ms, scan_ms / index_ms))\n", |
244 | 244 | "\n", |
245 | 245 | "print(f\"Selective mask over {N_ROWS:,} rows\")\n", |
246 | | - "print(f\"{'kind':<12} {'scan_ms':>10} {'index_ms':>10} {'speedup':>10}\")\n", |
| 246 | + "print(f\"{'kind':<12} {'scan_ms':>11} {'index_ms':>10} {'speedup':>10}\")\n", |
247 | 247 | "for kind, scan_ms, index_ms, speedup in timing_rows:\n", |
248 | | - " print(f\"{kind:<12} {scan_ms:10.3f} {index_ms:10.3f} {speedup:10.2f}x\")" |
| 248 | + " print(f\"{kind:<12} {scan_ms:11.3f} {index_ms:10.3f} {speedup:10.2f}x\")" |
249 | 249 | ], |
250 | 250 | "outputs": [ |
251 | 251 | { |
252 | 252 | "name": "stdout", |
253 | 253 | "output_type": "stream", |
254 | 254 | "text": [ |
255 | 255 | "Selective mask over 10,000,000 rows\n", |
256 | | - "kind scan_ms index_ms speedup\n", |
257 | | - "ultralight 70.429 67.914 1.04x\n", |
258 | | - "light 68.560 5.011 13.68x\n", |
259 | | - "medium 68.481 4.430 15.46x\n", |
260 | | - "full 68.408 4.263 16.05x\n" |
| 256 | + "kind scan_ms index_ms speedup\n", |
| 257 | + "ultralight 73.371 70.249 1.04x\n", |
| 258 | + "light 65.966 1.478 44.63x\n", |
| 259 | + "medium 65.349 1.253 52.16x\n", |
| 260 | + "full 65.108 1.221 53.31x\n" |
261 | 261 | ] |
262 | 262 | } |
263 | 263 | ], |
|
278 | 278 | "id": "9ffcb0d8d06a4daa", |
279 | 279 | "metadata": { |
280 | 280 | "ExecuteTime": { |
281 | | - "end_time": "2026-04-09T04:14:53.160261Z", |
282 | | - "start_time": "2026-04-09T04:14:53.118529Z" |
| 281 | + "end_time": "2026-04-09T06:27:53.735085Z", |
| 282 | + "start_time": "2026-04-09T06:27:53.707924Z" |
283 | 283 | } |
284 | 284 | }, |
285 | 285 | "source": [ |
|
323 | 323 | "id": "7d337ce2f9fb4f32", |
324 | 324 | "metadata": { |
325 | 325 | "ExecuteTime": { |
326 | | - "end_time": "2026-04-09T04:14:53.206514Z", |
327 | | - "start_time": "2026-04-09T04:14:53.171092Z" |
| 326 | + "end_time": "2026-04-09T06:27:53.759337Z", |
| 327 | + "start_time": "2026-04-09T06:27:53.736407Z" |
328 | 328 | } |
329 | 329 | }, |
330 | 330 | "source": [ |
|
370 | 370 | "id": "0be5f512928f48db", |
371 | 371 | "metadata": { |
372 | 372 | "ExecuteTime": { |
373 | | - "end_time": "2026-04-09T04:14:55.722443Z", |
374 | | - "start_time": "2026-04-09T04:14:53.207978Z" |
| 373 | + "end_time": "2026-04-09T06:27:58.801567Z", |
| 374 | + "start_time": "2026-04-09T06:27:53.761336Z" |
375 | 375 | } |
376 | 376 | }, |
377 | 377 | "source": [ |
378 | | - "persistent_arr = blosc2.asarray(data, urlpath=paths[0], mode=\"w\")\n", |
| 378 | + "persistent_arr = data.copy(urlpath=paths[0], mode=\"w\")\n", |
379 | 379 | "persistent_descriptor = persistent_arr.create_index(field=\"id\", kind=\"medium\")\n", |
380 | 380 | "show_index_summary(\"persistent medium\", persistent_descriptor)\n", |
381 | 381 | "\n", |
|
388 | 388 | "name": "stdout", |
389 | 389 | "output_type": "stream", |
390 | 390 | "text": [ |
391 | | - "persistent medium: kind=medium, persistent=False, ooc=True, stale=False\n" |
392 | | - ] |
393 | | - }, |
394 | | - { |
395 | | - "ename": "FileNotFoundError", |
396 | | - "evalue": "No such file or directory: indexing_tutorial_medium.b2nd", |
397 | | - "output_type": "error", |
398 | | - "traceback": [ |
399 | | - "\u001B[31m---------------------------------------------------------------------------\u001B[39m", |
400 | | - "\u001B[31mFileNotFoundError\u001B[39m Traceback (most recent call last)", |
401 | | - "\u001B[36mCell\u001B[39m\u001B[36m \u001B[39m\u001B[32mIn[7]\u001B[39m\u001B[32m, line 5\u001B[39m\n\u001B[32m 2\u001B[39m persistent_descriptor = persistent_arr.create_index(field=\u001B[33m\"\u001B[39m\u001B[33mid\u001B[39m\u001B[33m\"\u001B[39m, kind=\u001B[33m\"\u001B[39m\u001B[33mmedium\u001B[39m\u001B[33m\"\u001B[39m)\n\u001B[32m 3\u001B[39m show_index_summary(\u001B[33m\"\u001B[39m\u001B[33mpersistent medium\u001B[39m\u001B[33m\"\u001B[39m, persistent_descriptor)\n\u001B[32m----> \u001B[39m\u001B[32m5\u001B[39m reopened = \u001B[43mblosc2\u001B[49m\u001B[43m.\u001B[49m\u001B[43mopen\u001B[49m\u001B[43m(\u001B[49m\u001B[43mpaths\u001B[49m\u001B[43m[\u001B[49m\u001B[32;43m0\u001B[39;49m\u001B[43m]\u001B[49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[43mmode\u001B[49m\u001B[43m=\u001B[49m\u001B[33;43m\"\u001B[39;49m\u001B[33;43ma\u001B[39;49m\u001B[33;43m\"\u001B[39;49m\u001B[43m)\u001B[49m\n\u001B[32m 6\u001B[39m \u001B[38;5;28mprint\u001B[39m(\u001B[33mf\u001B[39m\u001B[33m\"\u001B[39m\u001B[33mReopened index count: \u001B[39m\u001B[38;5;132;01m{\u001B[39;00m\u001B[38;5;28mlen\u001B[39m(reopened.indexes)\u001B[38;5;132;01m}\u001B[39;00m\u001B[33m\"\u001B[39m)\n\u001B[32m 7\u001B[39m \u001B[38;5;28mprint\u001B[39m(\u001B[33mf\u001B[39m\u001B[33m\"\u001B[39m\u001B[33mPersisted sidecar path: \u001B[39m\u001B[38;5;132;01m{\u001B[39;00mreopened.indexes[\u001B[32m0\u001B[39m][\u001B[33m'\u001B[39m\u001B[33mreduced\u001B[39m\u001B[33m'\u001B[39m][\u001B[33m'\u001B[39m\u001B[33mvalues_path\u001B[39m\u001B[33m'\u001B[39m]\u001B[38;5;132;01m}\u001B[39;00m\u001B[33m\"\u001B[39m)\n", |
402 | | - "\u001B[36mFile \u001B[39m\u001B[32m~/blosc/python-blosc2/src/blosc2/schunk.py:1779\u001B[39m, in \u001B[36mopen\u001B[39m\u001B[34m(urlpath, mode, offset, **kwargs)\u001B[39m\n\u001B[32m 1776\u001B[39m \u001B[38;5;28;01mreturn\u001B[39;00m special\n\u001B[32m 1778\u001B[39m \u001B[38;5;28;01mif\u001B[39;00m \u001B[38;5;129;01mnot\u001B[39;00m os.path.exists(urlpath):\n\u001B[32m-> \u001B[39m\u001B[32m1779\u001B[39m \u001B[38;5;28;01mraise\u001B[39;00m \u001B[38;5;167;01mFileNotFoundError\u001B[39;00m(\u001B[33mf\u001B[39m\u001B[33m\"\u001B[39m\u001B[33mNo such file or directory: \u001B[39m\u001B[38;5;132;01m{\u001B[39;00murlpath\u001B[38;5;132;01m}\u001B[39;00m\u001B[33m\"\u001B[39m)\n\u001B[32m 1781\u001B[39m _set_default_dparams(kwargs)\n\u001B[32m 1782\u001B[39m res = blosc2_ext.open(urlpath, mode, offset, **kwargs)\n", |
403 | | - "\u001B[31mFileNotFoundError\u001B[39m: No such file or directory: indexing_tutorial_medium.b2nd" |
| 391 | + "persistent medium: kind=medium, persistent=True, ooc=True, stale=False\n", |
| 392 | + "Reopened index count: 1\n", |
| 393 | + "Persisted sidecar path: indexing_tutorial_medium.__index__.id.medium.reduced.values.b2nd\n" |
404 | 394 | ] |
405 | 395 | } |
406 | 396 | ], |
|
419 | 409 | { |
420 | 410 | "cell_type": "code", |
421 | 411 | "id": "11f0cd1b910b409a", |
422 | | - "metadata": {}, |
| 412 | + "metadata": { |
| 413 | + "ExecuteTime": { |
| 414 | + "end_time": "2026-04-09T06:27:58.852040Z", |
| 415 | + "start_time": "2026-04-09T06:27:58.814043Z" |
| 416 | + } |
| 417 | + }, |
423 | 418 | "source": [ |
424 | 419 | "mutable_arr = blosc2.arange(20, dtype=np.int64)\n", |
425 | 420 | "mutable_arr.create_index(kind=\"full\")\n", |
|
429 | 424 | "mutable_arr.rebuild_index()\n", |
430 | 425 | "print(\"Stale after rebuild:\", mutable_arr.indexes[0][\"stale\"])" |
431 | 426 | ], |
432 | | - "outputs": [], |
433 | | - "execution_count": null |
| 427 | + "outputs": [ |
| 428 | + { |
| 429 | + "name": "stdout", |
| 430 | + "output_type": "stream", |
| 431 | + "text": [ |
| 432 | + "Stale after direct mutation: True\n", |
| 433 | + "Stale after rebuild: False\n" |
| 434 | + ] |
| 435 | + } |
| 436 | + ], |
| 437 | + "execution_count": 8 |
434 | 438 | }, |
435 | 439 | { |
436 | 440 | "cell_type": "markdown", |
|
454 | 458 | { |
455 | 459 | "cell_type": "code", |
456 | 460 | "id": "2e1a47a9cf7246e6", |
457 | | - "metadata": {}, |
| 461 | + "metadata": { |
| 462 | + "ExecuteTime": { |
| 463 | + "end_time": "2026-04-09T06:27:59.968401Z", |
| 464 | + "start_time": "2026-04-09T06:27:58.852830Z" |
| 465 | + } |
| 466 | + }, |
458 | 467 | "source": [ |
459 | 468 | "append_dtype = np.dtype([(\"id\", np.int64), (\"payload\", np.int32)])\n", |
460 | 469 | "base_rows = 200_000\n", |
|
492 | 501 | "print(f\"Median mask time after compaction: {after_ms:.3f} ms\")\n", |
493 | 502 | "print(f\"Speedup after compaction: {before_ms / after_ms:.2f}x\")" |
494 | 503 | ], |
495 | | - "outputs": [], |
496 | | - "execution_count": null |
| 504 | + "outputs": [ |
| 505 | + { |
| 506 | + "name": "stdout", |
| 507 | + "output_type": "stream", |
| 508 | + "text": [ |
| 509 | + "Before compaction: {'will_use_index': True, 'reason': 'multi-field exact indexes selected', 'kind': 'full', 'level': 'exact', 'lookup_path': 'run-bounded-ooc', 'full_runs': 40}\n", |
| 510 | + "Pending runs: 40\n", |
| 511 | + "Median mask time before compaction: 3.293 ms\n", |
| 512 | + "After compaction: {'will_use_index': True, 'reason': 'multi-field exact indexes selected', 'kind': 'full', 'level': 'exact', 'lookup_path': 'compact-selective-ooc', 'full_runs': 0}\n", |
| 513 | + "Pending runs: 0\n", |
| 514 | + "Median mask time after compaction: 0.689 ms\n", |
| 515 | + "Speedup after compaction: 4.78x\n" |
| 516 | + ] |
| 517 | + } |
| 518 | + ], |
| 519 | + "execution_count": 9 |
497 | 520 | }, |
498 | 521 | { |
499 | 522 | "cell_type": "markdown", |
|
513 | 536 | { |
514 | 537 | "cell_type": "code", |
515 | 538 | "id": "9833102355db4ec0", |
516 | | - "metadata": {}, |
| 539 | + "metadata": { |
| 540 | + "ExecuteTime": { |
| 541 | + "end_time": "2026-04-09T06:27:59.991418Z", |
| 542 | + "start_time": "2026-04-09T06:27:59.978217Z" |
| 543 | + } |
| 544 | + }, |
517 | 545 | "source": [ |
518 | 546 | "for path in paths:\n", |
519 | 547 | " blosc2.remove_urlpath(path)" |
520 | 548 | ], |
521 | 549 | "outputs": [], |
522 | | - "execution_count": null |
| 550 | + "execution_count": 10 |
523 | 551 | }, |
524 | 552 | { |
525 | 553 | "cell_type": "code", |
526 | 554 | "id": "17489b2c3d2ac57", |
527 | | - "metadata": {}, |
| 555 | + "metadata": { |
| 556 | + "ExecuteTime": { |
| 557 | + "end_time": "2026-04-09T06:28:00.015548Z", |
| 558 | + "start_time": "2026-04-09T06:27:59.998661Z" |
| 559 | + } |
| 560 | + }, |
528 | 561 | "source": [], |
529 | 562 | "outputs": [], |
530 | | - "execution_count": null |
| 563 | + "execution_count": 10 |
531 | 564 | } |
532 | 565 | ], |
533 | 566 | "metadata": { |
|
0 commit comments