5959QUERY_CACHE_VLMETA_KEY = "_blosc2_query_cache"
6060QUERY_CACHE_FORMAT_VERSION = 1
6161QUERY_CACHE_MAX_ENTRY_NBYTES = 65_536 # 64 KB of logical int64 positions per persistent entry
62- QUERY_CACHE_MAX_MEM_CBYTES = 131_072 # 128 KB for the in-process hot cache
63- QUERY_CACHE_MAX_PERSISTENT_NBYTES = 2_147_483_648 # 2 GB of logical int64 positions in the payload store
62+ QUERY_CACHE_MAX_MEM_NBYTES = 131_072 # 128 KB for the in-process hot cache
63+ QUERY_CACHE_MAX_PERSISTENT_NBYTES = 4 * 1024 * 1024 # 4 MB of logical int64 positions in the payload store
6464
6565# In-process hot cache: (array-scope, digest) -> decoded np.ndarray of coordinates.
6666_HOT_CACHE : dict [tuple [tuple [str , str | int ], str ], np .ndarray ] = {}
@@ -325,7 +325,7 @@ def _default_query_cache_catalog(payload_path: str) -> dict:
325325 "version" : QUERY_CACHE_FORMAT_VERSION ,
326326 "payload_ref" : {"kind" : "urlpath" , "version" : 1 , "urlpath" : payload_path },
327327 "max_entry_nbytes" : QUERY_CACHE_MAX_ENTRY_NBYTES ,
328- "max_mem_cbytes " : QUERY_CACHE_MAX_MEM_CBYTES ,
328+ "max_mem_nbytes " : QUERY_CACHE_MAX_MEM_NBYTES ,
329329 "max_persistent_nbytes" : QUERY_CACHE_MAX_PERSISTENT_NBYTES ,
330330 "persistent_nbytes" : 0 ,
331331 "next_slot" : 0 ,
@@ -334,41 +334,17 @@ def _default_query_cache_catalog(payload_path: str) -> dict:
334334
335335
336336def _normalize_query_cache_catalog (catalog : dict ) -> dict :
337- """Normalize legacy compressed-byte cache catalogs to logical-byte accounting ."""
337+ """Ensure the prototype query- cache catalog has the current nbytes schema ."""
338338 if not isinstance (catalog , dict ):
339339 return _default_query_cache_catalog ("" )
340340 catalog .setdefault ("version" , QUERY_CACHE_FORMAT_VERSION )
341+ catalog .setdefault ("payload_ref" , {"kind" : "urlpath" , "version" : 1 , "urlpath" : "" })
342+ catalog .setdefault ("max_entry_nbytes" , QUERY_CACHE_MAX_ENTRY_NBYTES )
343+ catalog .setdefault ("max_mem_nbytes" , QUERY_CACHE_MAX_MEM_NBYTES )
344+ catalog .setdefault ("max_persistent_nbytes" , QUERY_CACHE_MAX_PERSISTENT_NBYTES )
345+ catalog .setdefault ("persistent_nbytes" , 0 )
346+ catalog .setdefault ("next_slot" , 0 )
341347 catalog .setdefault ("entries" , {})
342-
343- if "max_entry_nbytes" not in catalog :
344- catalog ["max_entry_nbytes" ] = int (catalog .pop ("max_entry_cbytes" , QUERY_CACHE_MAX_ENTRY_NBYTES ))
345- else :
346- catalog .pop ("max_entry_cbytes" , None )
347-
348- catalog .setdefault ("max_mem_cbytes" , QUERY_CACHE_MAX_MEM_CBYTES )
349-
350- if "max_persistent_nbytes" not in catalog :
351- catalog ["max_persistent_nbytes" ] = int (
352- catalog .pop ("max_persistent_cbytes" , QUERY_CACHE_MAX_PERSISTENT_NBYTES )
353- )
354- else :
355- catalog .pop ("max_persistent_cbytes" , None )
356-
357- if "persistent_nbytes" not in catalog :
358- catalog ["persistent_nbytes" ] = int (catalog .pop ("persistent_cbytes" , 0 ))
359- else :
360- catalog .pop ("persistent_cbytes" , None )
361-
362- total_nbytes = 0
363- for entry in catalog ["entries" ].values ():
364- if "nbytes" not in entry :
365- entry ["nbytes" ] = int (entry .pop ("cbytes" , 0 ))
366- else :
367- entry .pop ("cbytes" , None )
368- total_nbytes += int (entry .get ("nbytes" , 0 ))
369-
370- if catalog ["entries" ]:
371- catalog ["persistent_nbytes" ] = total_nbytes
372348 return catalog
373349
374350
@@ -498,7 +474,7 @@ def _hot_cache_put(digest: str, coords: np.ndarray, scope: tuple[str, str | int]
498474 global _HOT_CACHE_BYTES
499475 key = _hot_cache_key (digest , scope )
500476 entry_bytes = coords .nbytes
501- if entry_bytes > QUERY_CACHE_MAX_MEM_CBYTES :
477+ if entry_bytes > QUERY_CACHE_MAX_MEM_NBYTES :
502478 # Single entry too large; skip.
503479 return
504480 # If already present, remove old accounting first.
@@ -507,7 +483,7 @@ def _hot_cache_put(digest: str, coords: np.ndarray, scope: tuple[str, str | int]
507483 with contextlib .suppress (ValueError ):
508484 _HOT_CACHE_ORDER .remove (key )
509485 # Evict LRU entries until there is room.
510- while _HOT_CACHE_ORDER and _HOT_CACHE_BYTES + entry_bytes > QUERY_CACHE_MAX_MEM_CBYTES :
486+ while _HOT_CACHE_ORDER and _HOT_CACHE_BYTES + entry_bytes > QUERY_CACHE_MAX_MEM_NBYTES :
511487 oldest = _HOT_CACHE_ORDER .pop (0 )
512488 evicted = _HOT_CACHE .pop (oldest , None )
513489 if evicted is not None :
@@ -558,71 +534,21 @@ def _query_cache_entry_nbytes(coords: np.ndarray) -> int:
558534 return int (np .asarray (coords ).size ) * np .dtype (np .int64 ).itemsize
559535
560536
561- def _query_cache_entries_fifo (catalog : dict ) -> list [tuple [str , dict ]]:
562- """Return catalog entries ordered from oldest to newest insertion."""
563- entries = catalog .get ("entries" , {})
564- return sorted (entries .items (), key = lambda item : int (item [1 ]["slot" ]))
565-
566-
567- def _query_cache_rebuild_store (
568- array : blosc2 .NDArray ,
569- catalog : dict ,
570- retained_entries : list [tuple [str , dict ]],
571- appended : tuple [str , dict , dict , int ] | None = None ,
572- ) -> bool :
573- """Rewrite the persistent store with retained FIFO entries and an optional appended entry."""
537+ def _reset_persistent_query_cache_catalog (array : blosc2 .NDArray , catalog : dict | None = None ) -> dict :
538+ """Drop persistent cache storage and return a fresh empty catalog preserving limits."""
574539 payload_path = _query_cache_payload_path (array )
575- temp_path = f"{ payload_path } .tmp"
576540 _close_query_cache_store (payload_path )
577- _close_query_cache_store (temp_path )
578- blosc2 .remove_urlpath (temp_path )
579-
580- old_store = _open_query_cache_store (array )
581- temp_store = blosc2 .VLArray (storage = blosc2 .Storage (urlpath = temp_path , mode = "w" ))
582- new_entries = {}
583- persistent_nbytes = 0
584- slot = 0
585-
586- try :
587- for digest , entry in retained_entries :
588- if old_store is None or int (entry ["slot" ]) >= len (old_store ):
589- continue
590- payload = old_store [int (entry ["slot" ])]
591- if not isinstance (payload , dict ) or payload .get ("version" ) != QUERY_CACHE_FORMAT_VERSION :
592- continue
593- temp_store .append (payload )
594- updated = entry .copy ()
595- updated ["slot" ] = slot
596- new_entries [digest ] = updated
597- persistent_nbytes += int (updated ["nbytes" ])
598- slot += 1
599-
600- if appended is not None :
601- digest , payload_mapping , query_descriptor , nbytes = appended
602- temp_store .append (payload_mapping )
603- new_entries [digest ] = {
604- "slot" : slot ,
605- "nbytes" : nbytes ,
606- "nrows" : payload_mapping ["nrows" ],
607- "dtype" : payload_mapping ["dtype" ],
608- "query" : query_descriptor ,
609- }
610- persistent_nbytes += nbytes
611- slot += 1
612- finally :
613- del temp_store
614- del old_store
615- _close_query_cache_store (payload_path )
616- _close_query_cache_store (temp_path )
617-
618541 blosc2 .remove_urlpath (payload_path )
619- os .replace (temp_path , payload_path )
620542
621- catalog ["entries" ] = new_entries
622- catalog ["persistent_nbytes" ] = persistent_nbytes
623- catalog ["next_slot" ] = slot
624- _save_query_cache_catalog (array , catalog )
625- return True
543+ fresh = _default_query_cache_catalog (payload_path )
544+ if catalog is not None :
545+ fresh ["max_entry_nbytes" ] = int (catalog .get ("max_entry_nbytes" , QUERY_CACHE_MAX_ENTRY_NBYTES ))
546+ fresh ["max_mem_nbytes" ] = int (catalog .get ("max_mem_nbytes" , QUERY_CACHE_MAX_MEM_NBYTES ))
547+ fresh ["max_persistent_nbytes" ] = int (
548+ catalog .get ("max_persistent_nbytes" , QUERY_CACHE_MAX_PERSISTENT_NBYTES )
549+ )
550+ _save_query_cache_catalog (array , fresh )
551+ return fresh
626552
627553
628554def _persistent_cache_insert (
@@ -653,19 +579,10 @@ def _persistent_cache_insert(
653579 max_persistent = catalog .get ("max_persistent_nbytes" , QUERY_CACHE_MAX_PERSISTENT_NBYTES )
654580 current_persistent = int (catalog .get ("persistent_nbytes" , 0 ))
655581 if current_persistent + nbytes > max_persistent :
656- retained_entries = _query_cache_entries_fifo (catalog )
657- retained_nbytes = current_persistent
658- while retained_entries and retained_nbytes + nbytes > max_persistent :
659- _ , oldest = retained_entries .pop (0 )
660- retained_nbytes -= int (oldest ["nbytes" ])
661- if retained_nbytes + nbytes > max_persistent :
582+ if nbytes > max_persistent :
662583 return False
663- return _query_cache_rebuild_store (
664- array ,
665- catalog ,
666- retained_entries ,
667- appended = (digest , payload_mapping , query_descriptor , nbytes ),
668- )
584+ catalog = _reset_persistent_query_cache_catalog (array , catalog )
585+ current_persistent = 0
669586
670587 store = _open_query_cache_store (array , create = True )
671588 if store is None :
@@ -701,7 +618,6 @@ def _invalidate_query_cache(array: blosc2.NDArray) -> None:
701618 payload_path = _query_cache_payload_path (array )
702619 _close_query_cache_store (payload_path )
703620 blosc2 .remove_urlpath (payload_path )
704- # Clear the catalog in vlmeta.
705621 with contextlib .suppress (KeyError , Exception ):
706622 del array .schunk .vlmeta [QUERY_CACHE_VLMETA_KEY ]
707623 _hot_cache_clear (scope = scope )
0 commit comments