Skip to content

Commit 9856f2f

Browse files
committed
Refine DuckDB query benchmark scan and table reporting
1 parent 49bbc9a commit 9856f2f

1 file changed

Lines changed: 34 additions & 15 deletions

File tree

bench/indexing/duckdb_query_bench.py

Lines changed: 34 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -32,8 +32,8 @@
3232
("dist", lambda result: result["dist"]),
3333
("layout", lambda result: result["layout"]),
3434
("create_ms", lambda result: f"{result['create_ms']:.3f}"),
35-
("scan_ms", lambda result: f"{result['scan_ms']:.3f}"),
36-
("cold_ms", lambda result: f"{result['cold_ms']:.3f}"),
35+
("scan_ms", lambda result: f"{result['cold_scan_ms']:.3f}"),
36+
("query_ms", lambda result: f"{result['cold_ms']:.3f}"),
3737
("speedup", lambda result: f"{result['cold_speedup']:.2f}x"),
3838
("db_bytes", lambda result: f"{result['db_bytes']:,}"),
3939
("query_rows", lambda result: f"{result['query_rows']:,}"),
@@ -44,8 +44,8 @@
4444
("dist", lambda result: result["dist"]),
4545
("layout", lambda result: result["layout"]),
4646
("create_ms", lambda result: f"{result['create_ms']:.3f}"),
47-
("scan_ms", lambda result: f"{result['scan_ms']:.3f}"),
48-
("warm_ms", lambda result: f"{result['warm_ms']:.3f}" if result["warm_ms"] is not None else "-"),
47+
("scan_ms", lambda result: f"{result['warm_scan_ms']:.3f}"),
48+
("query_ms", lambda result: f"{result['warm_ms']:.3f}" if result["warm_ms"] is not None else "-"),
4949
("speedup", lambda result: f"{result['warm_speedup']:.2f}x" if result["warm_speedup"] is not None else "-"),
5050
("db_bytes", lambda result: f"{result['db_bytes']:,}"),
5151
("query_rows", lambda result: f"{result['query_rows']:,}"),
@@ -355,15 +355,28 @@ def _condition_sql(lo: object, hi: object, dtype: np.dtype, *, exact_query: bool
355355
return f"id >= {_literal(lo, dtype)} AND id <= {_literal(hi, dtype)}"
356356

357357

358-
def benchmark_scan_once(path: Path, lo, hi) -> tuple[float, int]:
358+
def benchmark_scan_once(path: Path, lo, hi, dtype: np.dtype, *, exact_query: bool = False) -> tuple[float, float, float, int]:
359359
con = duckdb.connect(str(path), read_only=True)
360360
try:
361+
condition_sql = _condition_sql(lo, hi, dtype, exact_query=exact_query)
362+
# Force the filtered baseline down the table-scan path instead of the ART index path.
363+
con.execute("SET index_scan_max_count = 0")
364+
con.execute("SET index_scan_percentage = 0")
365+
query = f"SELECT * FROM data WHERE {condition_sql}"
366+
367+
cold_start = time.perf_counter()
368+
table = con.execute(query).arrow().read_all()
369+
cold_elapsed = time.perf_counter() - cold_start
370+
361371
start = time.perf_counter()
362-
table = con.execute("SELECT * FROM data").arrow().read_all()
363-
ids = table["id"].to_numpy()
364-
result_len = int(np.count_nonzero((ids >= lo) & (ids <= hi)))
365-
elapsed = time.perf_counter() - start
366-
return elapsed, result_len
372+
table = con.execute(query).arrow().read_all()
373+
result_len = len(table)
374+
warm_elapsed = time.perf_counter() - start
375+
376+
third_start = time.perf_counter()
377+
con.execute(query).arrow().read_all()
378+
third_elapsed = time.perf_counter() - third_start
379+
return cold_elapsed, warm_elapsed, third_elapsed, result_len
367380
finally:
368381
con.close()
369382

@@ -413,7 +426,9 @@ def benchmark_layout(
413426
create_s = _open_or_build_duckdb_file(size, dist, id_dtype, path, layout=layout, batch_size=batch_size)
414427
lo, hi = _query_bounds(size, query_width, id_dtype)
415428

416-
scan_elapsed, scan_rows = benchmark_scan_once(path, lo, hi)
429+
cold_scan_elapsed, warm_scan_elapsed, third_scan_elapsed, scan_rows = benchmark_scan_once(
430+
path, lo, hi, id_dtype, exact_query=exact_query
431+
)
417432

418433
con = duckdb.connect(str(path), read_only=True)
419434
try:
@@ -428,20 +443,24 @@ def benchmark_layout(
428443
if scan_rows != filtered_rows:
429444
raise AssertionError(f"filtered rows mismatch: scan={scan_rows}, filtered={filtered_rows}")
430445

431-
scan_ms = scan_elapsed * 1_000
446+
cold_scan_ms = cold_scan_elapsed * 1_000
447+
warm_scan_ms = warm_scan_elapsed * 1_000
432448
cold_ms = cold_elapsed * 1_000
433449
warm_ms = median(warm_times) if warm_times else None
450+
if layout == "zonemap":
451+
cold_ms = third_scan_elapsed * 1_000
434452

435453
return {
436454
"size": size,
437455
"dist": dist,
438456
"layout": layout,
439457
"create_ms": create_s * 1_000,
440-
"scan_ms": scan_ms,
458+
"cold_scan_ms": cold_scan_ms,
459+
"warm_scan_ms": warm_scan_ms,
441460
"cold_ms": cold_ms,
442-
"cold_speedup": scan_ms / cold_ms,
461+
"cold_speedup": cold_scan_ms / cold_ms,
443462
"warm_ms": warm_ms,
444-
"warm_speedup": None if warm_ms is None else scan_ms / warm_ms,
463+
"warm_speedup": None if warm_ms is None else warm_scan_ms / warm_ms,
445464
"db_bytes": os.path.getsize(path),
446465
"query_rows": int(filtered_rows),
447466
"path": path,

0 commit comments

Comments
 (0)