Skip to content

Commit e2348a6

Browse files
jrgemignaniclaude
andcommitted
VLE cache: replace snapshot invalidation with per-graph
Replace AGE's snapshot-based VLE cache invalidation with per-graph monotonic version counters in shared memory. The old code compared PostgreSQL's global xmin/xmax/curcid, causing false cache invalidation whenever ANY transaction ran on the server — even unrelated ones. This forced a full hash table rebuild (~138s at SF3) on every VLE query in any multi-connection environment. The fix uses three invalidation paths with automatic detection: - DSM (PG 17+): GetNamedDSMSegment — works without shared_preload_libraries - SHMEM (PG <17): shmem_request/startup hooks — needs shared_preload_libraries; functions conditionally compiled via #if PG_VERSION_NUM < 170000 - SNAPSHOT: fallback to original behavior when shared memory unavailable Version counter increment points: - Cypher CREATE/DELETE/SET/MERGE via executor hooks - SQL INSERT/UPDATE/DELETE via auto-installed per-table triggers - TRUNCATE via ProcessUtility hook interception New slot allocation in the version counter array uses pg_write_barrier() before incrementing num_entries to ensure entry visibility on weak memory-ordering architectures (e.g., ARM). Additional optimizations: - Thin entries: vertex/edge hash table entries store 6-byte TID instead of copied property Datum; properties fetched on demand via heap_fetch only during result construction. Reduces hash table memory by ~77%. - Fast path in is_an_edge_match: skip property access for label-only VLE patterns (e.g., [:KNOWS*1..2]). When property constraints are present, edge properties are fetched once and cached locally to avoid duplicate heap access. - Defensive elog(ERROR) on stale TID in lazy property fetch to catch invalidation logic bugs. - Trigger install is conditional — checks if the trigger function exists in the catalog before attempting installation, ensuring backward compatibility with older extension SQL versions. Test results (LDBC SNB benchmark, SF3 — 52.7M edges, 9.3M vertices): Production simulation (VLE with concurrent background transactions): Before: 177,188 ms avg per query (full rebuild every time) After: 15.7 ms avg per query (cache hit) Speedup: 11,299x Cold build time: Before: 186,275 ms After: 108,955 ms (41% faster — no datumCopy) LDBC IC1 warm (3-hop VLE, single session): Before: 219,385 ms After: 175,249 ms (20% faster — better cache utilization) Hash table memory (SF3): Before: ~9 GB After: ~2.1 GB (77% reduction) New regression tests in age_global_graph.sql verify: - VLE cache invalidation after CREATE (path extends) - VLE cache invalidation after DELETE (path shrinks) - VLE cache invalidation after SET (property updated via lazy fetch) - VLE edge property fetch via full path return (weight values in path) - VLE edge property fetch via UNWIND + relationships() (individual weights) Regression tests: 32/32 pass Files changed (14): src/backend/age.c — shmem hook registration (PG <17) src/backend/catalog/ag_catalog.c — TRUNCATE interception src/backend/commands/label_commands.c — conditional trigger auto-install on label creation src/backend/executor/cypher_create.c — increment_graph_version after CREATE src/backend/executor/cypher_delete.c — increment_graph_version after DELETE src/backend/executor/cypher_merge.c — increment_graph_version after MERGE src/backend/executor/cypher_set.c — increment_graph_version after SET src/backend/utils/adt/age_global_graph.c — version counter, thin entries, trigger fn, lazy fetch src/backend/utils/adt/age_vle.c — is_an_edge_match fast path, cached edge property fetch src/include/utils/age_global_graph.h — conditional declarations sql/age_main.sql — trigger function registration for next-version SQL regress/sql/age_global_graph.sql — VLE cache regression tests regress/expected/age_global_graph.out — expected output for new tests age--1.7.0--y.y.y.sql — upgrade template: trigger function for existing installs Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
1 parent 1847644 commit e2348a6

14 files changed

Lines changed: 1003 additions & 103 deletions

File tree

age--1.7.0--y.y.y.sql

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -408,3 +408,13 @@ $function$;
408408

409409
COMMENT ON FUNCTION ag_catalog.age_pg_upgrade_status() IS
410410
'Returns the current pg_upgrade readiness status of the AGE installation.';
411+
412+
--
413+
-- VLE cache invalidation trigger function
414+
-- Installed on graph label tables to catch SQL-level mutations
415+
-- and increment the per-graph version counter for VLE cache invalidation.
416+
--
417+
CREATE FUNCTION ag_catalog.age_invalidate_graph_cache()
418+
RETURNS trigger
419+
LANGUAGE c
420+
AS 'MODULE_PATHNAME';

regress/expected/age_global_graph.out

Lines changed: 174 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -413,6 +413,180 @@ NOTICE: graph "ag_graph_3" has been dropped
413413

414414
(1 row)
415415

416+
-----------------------------------------------------------------------------------------------------------------------------
417+
--
418+
-- VLE cache invalidation tests
419+
--
420+
-- These tests verify that the graph version counter properly invalidates
421+
-- the VLE hash table cache when the graph is mutated, and that thin
422+
-- entry lazy property fetch returns correct data.
423+
--
424+
-- Setup: create a graph with a chain a->b->c->d
425+
SELECT * FROM create_graph('vle_cache_test');
426+
NOTICE: graph "vle_cache_test" has been created
427+
create_graph
428+
--------------
429+
430+
(1 row)
431+
432+
SELECT * FROM cypher('vle_cache_test', $$
433+
CREATE (a:Node {name: 'a'})-[:Edge]->(b:Node {name: 'b'})-[:Edge]->(c:Node {name: 'c'})-[:Edge]->(d:Node {name: 'd'})
434+
$$) AS (v agtype);
435+
v
436+
---
437+
(0 rows)
438+
439+
-- VLE query: find all paths from a's neighbors (should find b, b->c, b->c->d)
440+
SELECT * FROM cypher('vle_cache_test', $$
441+
MATCH (a:Node {name: 'a'})-[:Edge*1..3]->(n:Node)
442+
RETURN n.name
443+
ORDER BY n.name
444+
$$) AS (name agtype);
445+
name
446+
------
447+
"b"
448+
"c"
449+
"d"
450+
(3 rows)
451+
452+
-- Now add a new node e connected to d. This should invalidate the cache.
453+
SELECT * FROM cypher('vle_cache_test', $$
454+
MATCH (d:Node {name: 'd'})
455+
CREATE (d)-[:Edge]->(:Node {name: 'e'})
456+
$$) AS (v agtype);
457+
v
458+
---
459+
(0 rows)
460+
461+
-- VLE query again: should now also find e via a->b->c->d->e (4 hops won't reach,
462+
-- but d->e is 1 hop from d, and a->b->c->d->e would be 4 hops from a).
463+
-- Increase range to *1..4 to include e
464+
SELECT * FROM cypher('vle_cache_test', $$
465+
MATCH (a:Node {name: 'a'})-[:Edge*1..4]->(n:Node)
466+
RETURN n.name
467+
ORDER BY n.name
468+
$$) AS (name agtype);
469+
name
470+
------
471+
"b"
472+
"c"
473+
"d"
474+
"e"
475+
(4 rows)
476+
477+
-- Test cache invalidation on DELETE: remove node c and its edges
478+
SELECT * FROM cypher('vle_cache_test', $$
479+
MATCH (c:Node {name: 'c'})
480+
DETACH DELETE c
481+
$$) AS (v agtype);
482+
v
483+
---
484+
(0 rows)
485+
486+
-- VLE query: should only find b now (c is gone, so b->c path is broken)
487+
SELECT * FROM cypher('vle_cache_test', $$
488+
MATCH (a:Node {name: 'a'})-[:Edge*1..4]->(n:Node)
489+
RETURN n.name
490+
ORDER BY n.name
491+
$$) AS (name agtype);
492+
name
493+
------
494+
"b"
495+
(1 row)
496+
497+
-- Test cache invalidation on SET: change b's name property
498+
SELECT * FROM cypher('vle_cache_test', $$
499+
MATCH (b:Node {name: 'b'})
500+
SET b.name = 'b_modified'
501+
RETURN b.name
502+
$$) AS (name agtype);
503+
name
504+
--------------
505+
"b_modified"
506+
(1 row)
507+
508+
-- VLE query: verify the updated property is returned via lazy fetch
509+
SELECT * FROM cypher('vle_cache_test', $$
510+
MATCH (a:Node {name: 'a'})-[:Edge*1..4]->(n:Node)
511+
RETURN n.name
512+
ORDER BY n.name
513+
$$) AS (name agtype);
514+
name
515+
--------------
516+
"b_modified"
517+
(1 row)
518+
519+
-- Test VLE with edge properties (exercises thin entry edge property fetch)
520+
SELECT * FROM drop_graph('vle_cache_test', true);
521+
NOTICE: drop cascades to 4 other objects
522+
DETAIL: drop cascades to table vle_cache_test._ag_label_vertex
523+
drop cascades to table vle_cache_test._ag_label_edge
524+
drop cascades to table vle_cache_test."Node"
525+
drop cascades to table vle_cache_test."Edge"
526+
NOTICE: graph "vle_cache_test" has been dropped
527+
drop_graph
528+
------------
529+
530+
(1 row)
531+
532+
SELECT * FROM create_graph('vle_cache_test2');
533+
NOTICE: graph "vle_cache_test2" has been created
534+
create_graph
535+
--------------
536+
537+
(1 row)
538+
539+
SELECT * FROM cypher('vle_cache_test2', $$
540+
CREATE (a:N {name: 'a'})-[:E {weight: 1}]->(b:N {name: 'b'})-[:E {weight: 2}]->(c:N {name: 'c'})
541+
$$) AS (v agtype);
542+
v
543+
---
544+
(0 rows)
545+
546+
-- VLE path output to verify edge properties are fetched correctly via
547+
-- thin entry lazy fetch. Returning the full path forces build_path()
548+
-- to call get_edge_entry_properties() for each edge in the result.
549+
-- The output must contain the correct weight values (1 and 2).
550+
SELECT * FROM cypher('vle_cache_test2', $$
551+
MATCH p=(a:N {name: 'a'})-[:E *1..2]->(n:N)
552+
RETURN p
553+
ORDER BY n.name
554+
$$) AS (p agtype);
555+
p
556+
------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
557+
[{"id": 844424930131969, "label": "N", "properties": {"name": "a"}}::vertex, {"id": 1125899906842626, "label": "E", "end_id": 844424930131970, "start_id": 844424930131969, "properties": {"weight": 1}}::edge, {"id": 844424930131970, "label": "N", "properties": {"name": "b"}}::vertex]::path
558+
[{"id": 844424930131969, "label": "N", "properties": {"name": "a"}}::vertex, {"id": 1125899906842626, "label": "E", "end_id": 844424930131970, "start_id": 844424930131969, "properties": {"weight": 1}}::edge, {"id": 844424930131970, "label": "N", "properties": {"name": "b"}}::vertex, {"id": 1125899906842625, "label": "E", "end_id": 844424930131971, "start_id": 844424930131970, "properties": {"weight": 2}}::edge, {"id": 844424930131971, "label": "N", "properties": {"name": "c"}}::vertex]::path
559+
(2 rows)
560+
561+
-- VLE edge properties via UNWIND + relationships() to individually verify
562+
-- each edge's properties are correctly fetched from the heap via TID.
563+
SELECT * FROM cypher('vle_cache_test2', $$
564+
MATCH p=(a:N {name: 'a'})-[:E *1..2]->(n:N)
565+
WITH p, n
566+
UNWIND relationships(p) AS e
567+
RETURN n.name, e.weight
568+
ORDER BY n.name, e.weight
569+
$$) AS (name agtype, weight agtype);
570+
name | weight
571+
------+--------
572+
"b" | 1
573+
"c" | 1
574+
"c" | 2
575+
(3 rows)
576+
577+
-- Cleanup
578+
SELECT * FROM drop_graph('vle_cache_test2', true);
579+
NOTICE: drop cascades to 4 other objects
580+
DETAIL: drop cascades to table vle_cache_test2._ag_label_vertex
581+
drop cascades to table vle_cache_test2._ag_label_edge
582+
drop cascades to table vle_cache_test2."N"
583+
drop cascades to table vle_cache_test2."E"
584+
NOTICE: graph "vle_cache_test2" has been dropped
585+
drop_graph
586+
------------
587+
588+
(1 row)
589+
416590
-----------------------------------------------------------------------------------------------------------------------------
417591
--
418592
-- End of tests

regress/sql/age_global_graph.sql

Lines changed: 97 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -146,6 +146,103 @@ RESET client_min_messages;
146146
SELECT * FROM drop_graph('ag_graph_1', true);
147147
SELECT * FROM drop_graph('ag_graph_2', true);
148148
SELECT * FROM drop_graph('ag_graph_3', true);
149+
150+
-----------------------------------------------------------------------------------------------------------------------------
151+
--
152+
-- VLE cache invalidation tests
153+
--
154+
-- These tests verify that the graph version counter properly invalidates
155+
-- the VLE hash table cache when the graph is mutated, and that thin
156+
-- entry lazy property fetch returns correct data.
157+
--
158+
159+
-- Setup: create a graph with a chain a->b->c->d
160+
SELECT * FROM create_graph('vle_cache_test');
161+
162+
SELECT * FROM cypher('vle_cache_test', $$
163+
CREATE (a:Node {name: 'a'})-[:Edge]->(b:Node {name: 'b'})-[:Edge]->(c:Node {name: 'c'})-[:Edge]->(d:Node {name: 'd'})
164+
$$) AS (v agtype);
165+
166+
-- VLE query: find all paths from a's neighbors (should find b, b->c, b->c->d)
167+
SELECT * FROM cypher('vle_cache_test', $$
168+
MATCH (a:Node {name: 'a'})-[:Edge*1..3]->(n:Node)
169+
RETURN n.name
170+
ORDER BY n.name
171+
$$) AS (name agtype);
172+
173+
-- Now add a new node e connected to d. This should invalidate the cache.
174+
SELECT * FROM cypher('vle_cache_test', $$
175+
MATCH (d:Node {name: 'd'})
176+
CREATE (d)-[:Edge]->(:Node {name: 'e'})
177+
$$) AS (v agtype);
178+
179+
-- VLE query again: should now also find e via a->b->c->d->e (4 hops won't reach,
180+
-- but d->e is 1 hop from d, and a->b->c->d->e would be 4 hops from a).
181+
-- Increase range to *1..4 to include e
182+
SELECT * FROM cypher('vle_cache_test', $$
183+
MATCH (a:Node {name: 'a'})-[:Edge*1..4]->(n:Node)
184+
RETURN n.name
185+
ORDER BY n.name
186+
$$) AS (name agtype);
187+
188+
-- Test cache invalidation on DELETE: remove node c and its edges
189+
SELECT * FROM cypher('vle_cache_test', $$
190+
MATCH (c:Node {name: 'c'})
191+
DETACH DELETE c
192+
$$) AS (v agtype);
193+
194+
-- VLE query: should only find b now (c is gone, so b->c path is broken)
195+
SELECT * FROM cypher('vle_cache_test', $$
196+
MATCH (a:Node {name: 'a'})-[:Edge*1..4]->(n:Node)
197+
RETURN n.name
198+
ORDER BY n.name
199+
$$) AS (name agtype);
200+
201+
-- Test cache invalidation on SET: change b's name property
202+
SELECT * FROM cypher('vle_cache_test', $$
203+
MATCH (b:Node {name: 'b'})
204+
SET b.name = 'b_modified'
205+
RETURN b.name
206+
$$) AS (name agtype);
207+
208+
-- VLE query: verify the updated property is returned via lazy fetch
209+
SELECT * FROM cypher('vle_cache_test', $$
210+
MATCH (a:Node {name: 'a'})-[:Edge*1..4]->(n:Node)
211+
RETURN n.name
212+
ORDER BY n.name
213+
$$) AS (name agtype);
214+
215+
-- Test VLE with edge properties (exercises thin entry edge property fetch)
216+
SELECT * FROM drop_graph('vle_cache_test', true);
217+
SELECT * FROM create_graph('vle_cache_test2');
218+
219+
SELECT * FROM cypher('vle_cache_test2', $$
220+
CREATE (a:N {name: 'a'})-[:E {weight: 1}]->(b:N {name: 'b'})-[:E {weight: 2}]->(c:N {name: 'c'})
221+
$$) AS (v agtype);
222+
223+
-- VLE path output to verify edge properties are fetched correctly via
224+
-- thin entry lazy fetch. Returning the full path forces build_path()
225+
-- to call get_edge_entry_properties() for each edge in the result.
226+
-- The output must contain the correct weight values (1 and 2).
227+
SELECT * FROM cypher('vle_cache_test2', $$
228+
MATCH p=(a:N {name: 'a'})-[:E *1..2]->(n:N)
229+
RETURN p
230+
ORDER BY n.name
231+
$$) AS (p agtype);
232+
233+
-- VLE edge properties via UNWIND + relationships() to individually verify
234+
-- each edge's properties are correctly fetched from the heap via TID.
235+
SELECT * FROM cypher('vle_cache_test2', $$
236+
MATCH p=(a:N {name: 'a'})-[:E *1..2]->(n:N)
237+
WITH p, n
238+
UNWIND relationships(p) AS e
239+
RETURN n.name, e.weight
240+
ORDER BY n.name, e.weight
241+
$$) AS (name agtype, weight agtype);
242+
243+
-- Cleanup
244+
SELECT * FROM drop_graph('vle_cache_test2', true);
245+
149246
-----------------------------------------------------------------------------------------------------------------------------
150247
--
151248
-- End of tests

sql/age_main.sql

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -381,3 +381,14 @@ CREATE FUNCTION ag_catalog._extract_label_id(graphid)
381381
STABLE
382382
PARALLEL SAFE
383383
AS 'MODULE_PATHNAME';
384+
385+
--
386+
-- VLE cache invalidation trigger function.
387+
-- Installed on graph label tables to catch SQL-level mutations
388+
-- (INSERT/UPDATE/DELETE/TRUNCATE) and increment the graph's
389+
-- version counter so VLE caches are properly invalidated.
390+
--
391+
CREATE FUNCTION ag_catalog.age_invalidate_graph_cache()
392+
RETURNS trigger
393+
LANGUAGE c
394+
AS 'MODULE_PATHNAME';

src/backend/age.c

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,29 @@
2222
#include "optimizer/cypher_paths.h"
2323
#include "parser/cypher_analyze.h"
2424
#include "utils/ag_guc.h"
25+
#include "utils/age_global_graph.h"
26+
27+
#if PG_VERSION_NUM < 170000
28+
#include "miscadmin.h"
29+
30+
/* saved hook pointers for PG < 17 shmem path */
31+
static shmem_request_hook_type prev_shmem_request_hook = NULL;
32+
static shmem_startup_hook_type prev_shmem_startup_hook = NULL;
33+
34+
static void age_shmem_request_hook(void)
35+
{
36+
if (prev_shmem_request_hook)
37+
prev_shmem_request_hook();
38+
age_graph_version_shmem_request();
39+
}
40+
41+
static void age_shmem_startup_hook(void)
42+
{
43+
if (prev_shmem_startup_hook)
44+
prev_shmem_startup_hook();
45+
age_graph_version_shmem_startup();
46+
}
47+
#endif /* PG_VERSION_NUM < 170000 */
2548

2649
PG_MODULE_MAGIC;
2750

@@ -35,6 +58,15 @@ void _PG_init(void)
3558
process_utility_hook_init();
3659
post_parse_analyze_init();
3760
define_config_params();
61+
62+
#if PG_VERSION_NUM < 170000
63+
/* Register shared memory hooks for graph version tracking.
64+
* On PG 17+, DSM is used instead (no hooks needed). */
65+
prev_shmem_request_hook = shmem_request_hook;
66+
shmem_request_hook = age_shmem_request_hook;
67+
prev_shmem_startup_hook = shmem_startup_hook;
68+
shmem_startup_hook = age_shmem_startup_hook;
69+
#endif
3870
}
3971

4072
void _PG_fini(void);

0 commit comments

Comments
 (0)