@@ -512,6 +512,82 @@ def _persistent_cache_lookup(array: blosc2.NDArray, digest: str) -> np.ndarray |
512512 return coords
513513
514514
515+ def _query_cache_entry_cbytes (payload_mapping : dict ) -> int :
516+ """Return the compressed coordinate payload size used for budget accounting."""
517+ coord_dtype = np .dtype (payload_mapping ["dtype" ])
518+ compressed_coords = blosc2 .compress2 (
519+ payload_mapping ["data" ], cparams = blosc2 .CParams (typesize = coord_dtype .itemsize )
520+ )
521+ return len (compressed_coords )
522+
523+
524+ def _query_cache_entries_fifo (catalog : dict ) -> list [tuple [str , dict ]]:
525+ """Return catalog entries ordered from oldest to newest insertion."""
526+ entries = catalog .get ("entries" , {})
527+ return sorted (entries .items (), key = lambda item : int (item [1 ]["slot" ]))
528+
529+
530+ def _query_cache_rebuild_store (
531+ array : blosc2 .NDArray ,
532+ catalog : dict ,
533+ retained_entries : list [tuple [str , dict ]],
534+ appended : tuple [str , dict , dict , int ] | None = None ,
535+ ) -> bool :
536+ """Rewrite the persistent store with retained FIFO entries and an optional appended entry."""
537+ payload_path = _query_cache_payload_path (array )
538+ temp_path = f"{ payload_path } .tmp"
539+ _close_query_cache_store (payload_path )
540+ _close_query_cache_store (temp_path )
541+ blosc2 .remove_urlpath (temp_path )
542+
543+ old_store = _open_query_cache_store (array )
544+ temp_store = blosc2 .VLArray (storage = blosc2 .Storage (urlpath = temp_path , mode = "w" ))
545+ new_entries = {}
546+ persistent_cbytes = 0
547+ slot = 0
548+
549+ try :
550+ for digest , entry in retained_entries :
551+ if old_store is None or int (entry ["slot" ]) >= len (old_store ):
552+ continue
553+ payload = old_store [int (entry ["slot" ])]
554+ if not isinstance (payload , dict ) or payload .get ("version" ) != QUERY_CACHE_FORMAT_VERSION :
555+ continue
556+ temp_store .append (payload )
557+ updated = entry .copy ()
558+ updated ["slot" ] = slot
559+ new_entries [digest ] = updated
560+ persistent_cbytes += int (updated ["cbytes" ])
561+ slot += 1
562+
563+ if appended is not None :
564+ digest , payload_mapping , query_descriptor , cbytes = appended
565+ temp_store .append (payload_mapping )
566+ new_entries [digest ] = {
567+ "slot" : slot ,
568+ "cbytes" : cbytes ,
569+ "nrows" : payload_mapping ["nrows" ],
570+ "dtype" : payload_mapping ["dtype" ],
571+ "query" : query_descriptor ,
572+ }
573+ persistent_cbytes += cbytes
574+ slot += 1
575+ finally :
576+ del temp_store
577+ del old_store
578+ _close_query_cache_store (payload_path )
579+ _close_query_cache_store (temp_path )
580+
581+ blosc2 .remove_urlpath (payload_path )
582+ os .replace (temp_path , payload_path )
583+
584+ catalog ["entries" ] = new_entries
585+ catalog ["persistent_cbytes" ] = persistent_cbytes
586+ catalog ["next_slot" ] = slot
587+ _save_query_cache_catalog (array , catalog )
588+ return True
589+
590+
515591def _persistent_cache_insert (
516592 array : blosc2 .NDArray ,
517593 digest : str ,
@@ -527,15 +603,11 @@ def _persistent_cache_insert(
527603 payload_path = _query_cache_payload_path (array )
528604 if catalog is None :
529605 catalog = _default_query_cache_catalog (payload_path )
606+ elif digest in catalog .get ("entries" , {}):
607+ return True
530608
531609 payload_mapping = _encode_coords_payload (coords )
532- raw_data = payload_mapping ["data" ]
533-
534- # Measure the compressed size of the coordinate bytes directly so the
535- # per-entry limit is independent of VLArray/msgpack encoding overhead.
536- coord_dtype = np .dtype (payload_mapping ["dtype" ])
537- compressed_coords = blosc2 .compress2 (raw_data , cparams = blosc2 .CParams (typesize = coord_dtype .itemsize ))
538- cbytes = len (compressed_coords )
610+ cbytes = _query_cache_entry_cbytes (payload_mapping )
539611
540612 max_entry = catalog .get ("max_entry_cbytes" , QUERY_CACHE_MAX_ENTRY_CBYTES )
541613 if cbytes > max_entry :
@@ -544,7 +616,19 @@ def _persistent_cache_insert(
544616 max_persistent = catalog .get ("max_persistent_cbytes" , QUERY_CACHE_MAX_PERSISTENT_CBYTES )
545617 current_persistent = int (catalog .get ("persistent_cbytes" , 0 ))
546618 if current_persistent + cbytes > max_persistent :
547- return False
619+ retained_entries = _query_cache_entries_fifo (catalog )
620+ retained_cbytes = current_persistent
621+ while retained_entries and retained_cbytes + cbytes > max_persistent :
622+ _ , oldest = retained_entries .pop (0 )
623+ retained_cbytes -= int (oldest ["cbytes" ])
624+ if retained_cbytes + cbytes > max_persistent :
625+ return False
626+ return _query_cache_rebuild_store (
627+ array ,
628+ catalog ,
629+ retained_entries ,
630+ appended = (digest , payload_mapping , query_descriptor , cbytes ),
631+ )
548632
549633 store = _open_query_cache_store (array , create = True )
550634 if store is None :
0 commit comments