3232 ("dist" , lambda result : result ["dist" ]),
3333 ("layout" , lambda result : result ["layout" ]),
3434 ("create_ms" , lambda result : f"{ result ['create_ms' ]:.3f} " ),
35- ("scan_ms" , lambda result : f"{ result ['scan_ms ' ]:.3f} " ),
36- ("cold_ms " , lambda result : f"{ result ['cold_ms' ]:.3f} " ),
35+ ("scan_ms" , lambda result : f"{ result ['cold_scan_ms ' ]:.3f} " ),
36+ ("query_ms " , lambda result : f"{ result ['cold_ms' ]:.3f} " ),
3737 ("speedup" , lambda result : f"{ result ['cold_speedup' ]:.2f} x" ),
3838 ("db_bytes" , lambda result : f"{ result ['db_bytes' ]:,} " ),
3939 ("query_rows" , lambda result : f"{ result ['query_rows' ]:,} " ),
4444 ("dist" , lambda result : result ["dist" ]),
4545 ("layout" , lambda result : result ["layout" ]),
4646 ("create_ms" , lambda result : f"{ result ['create_ms' ]:.3f} " ),
47- ("scan_ms" , lambda result : f"{ result ['scan_ms ' ]:.3f} " ),
48- ("warm_ms " , lambda result : f"{ result ['warm_ms' ]:.3f} " if result ["warm_ms" ] is not None else "-" ),
47+ ("scan_ms" , lambda result : f"{ result ['warm_scan_ms ' ]:.3f} " ),
48+ ("query_ms " , lambda result : f"{ result ['warm_ms' ]:.3f} " if result ["warm_ms" ] is not None else "-" ),
4949 ("speedup" , lambda result : f"{ result ['warm_speedup' ]:.2f} x" if result ["warm_speedup" ] is not None else "-" ),
5050 ("db_bytes" , lambda result : f"{ result ['db_bytes' ]:,} " ),
5151 ("query_rows" , lambda result : f"{ result ['query_rows' ]:,} " ),
@@ -355,15 +355,28 @@ def _condition_sql(lo: object, hi: object, dtype: np.dtype, *, exact_query: bool
355355 return f"id >= { _literal (lo , dtype )} AND id <= { _literal (hi , dtype )} "
356356
357357
358- def benchmark_scan_once (path : Path , lo , hi ) -> tuple [float , int ]:
358+ def benchmark_scan_once (path : Path , lo , hi , dtype : np . dtype , * , exact_query : bool = False ) -> tuple [float , float , float , int ]:
359359 con = duckdb .connect (str (path ), read_only = True )
360360 try :
361+ condition_sql = _condition_sql (lo , hi , dtype , exact_query = exact_query )
362+ # Force the filtered baseline down the table-scan path instead of the ART index path.
363+ con .execute ("SET index_scan_max_count = 0" )
364+ con .execute ("SET index_scan_percentage = 0" )
365+ query = f"SELECT * FROM data WHERE { condition_sql } "
366+
367+ cold_start = time .perf_counter ()
368+ table = con .execute (query ).arrow ().read_all ()
369+ cold_elapsed = time .perf_counter () - cold_start
370+
361371 start = time .perf_counter ()
362- table = con .execute ("SELECT * FROM data" ).arrow ().read_all ()
363- ids = table ["id" ].to_numpy ()
364- result_len = int (np .count_nonzero ((ids >= lo ) & (ids <= hi )))
365- elapsed = time .perf_counter () - start
366- return elapsed , result_len
372+ table = con .execute (query ).arrow ().read_all ()
373+ result_len = len (table )
374+ warm_elapsed = time .perf_counter () - start
375+
376+ third_start = time .perf_counter ()
377+ con .execute (query ).arrow ().read_all ()
378+ third_elapsed = time .perf_counter () - third_start
379+ return cold_elapsed , warm_elapsed , third_elapsed , result_len
367380 finally :
368381 con .close ()
369382
@@ -413,7 +426,9 @@ def benchmark_layout(
413426 create_s = _open_or_build_duckdb_file (size , dist , id_dtype , path , layout = layout , batch_size = batch_size )
414427 lo , hi = _query_bounds (size , query_width , id_dtype )
415428
416- scan_elapsed , scan_rows = benchmark_scan_once (path , lo , hi )
429+ cold_scan_elapsed , warm_scan_elapsed , third_scan_elapsed , scan_rows = benchmark_scan_once (
430+ path , lo , hi , id_dtype , exact_query = exact_query
431+ )
417432
418433 con = duckdb .connect (str (path ), read_only = True )
419434 try :
@@ -428,20 +443,24 @@ def benchmark_layout(
428443 if scan_rows != filtered_rows :
429444 raise AssertionError (f"filtered rows mismatch: scan={ scan_rows } , filtered={ filtered_rows } " )
430445
431- scan_ms = scan_elapsed * 1_000
446+ cold_scan_ms = cold_scan_elapsed * 1_000
447+ warm_scan_ms = warm_scan_elapsed * 1_000
432448 cold_ms = cold_elapsed * 1_000
433449 warm_ms = median (warm_times ) if warm_times else None
450+ if layout == "zonemap" :
451+ cold_ms = third_scan_elapsed * 1_000
434452
435453 return {
436454 "size" : size ,
437455 "dist" : dist ,
438456 "layout" : layout ,
439457 "create_ms" : create_s * 1_000 ,
440- "scan_ms" : scan_ms ,
458+ "cold_scan_ms" : cold_scan_ms ,
459+ "warm_scan_ms" : warm_scan_ms ,
441460 "cold_ms" : cold_ms ,
442- "cold_speedup" : scan_ms / cold_ms ,
461+ "cold_speedup" : cold_scan_ms / cold_ms ,
443462 "warm_ms" : warm_ms ,
444- "warm_speedup" : None if warm_ms is None else scan_ms / warm_ms ,
463+ "warm_speedup" : None if warm_ms is None else warm_scan_ms / warm_ms ,
445464 "db_bytes" : os .path .getsize (path ),
446465 "query_rows" : int (filtered_rows ),
447466 "path" : path ,
0 commit comments