Skip to content

Commit b075f5a

Browse files
authored
fix(incremental): prevent duplicate edges on rebuild (#979) (#998)
* fix(incremental): prevent duplicate edges on rebuild (#979) Stage 6b (reparse_barrel_candidates) purged only 'imports' and 'reexports' edges for re-parsed barrel files before Stage 7 re-emitted every edge kind. As a result, every incremental rebuild appended duplicate 'calls', 'receiver', 'extends', 'implements', 'imports-type', and 'dynamic-imports' edges for any hybrid barrel file picked up via reverse-dep expansion, causing the edge count to grow by ~250 per rebuild. Extend the scoped DELETE to cover all 8 edge kinds that Stage 7 re-emits. 'contains' and 'parameter_of' are intentionally excluded because Stage 5 (insert_nodes) only runs on the original changed + reverse-dep file set, not on barrel candidates that are merged in here after Stage 5 — wiping them would permanently drop those edges. Add a regression test (tests/integration/incremental-edge-duplication.test.ts) that runs three consecutive incremental rebuilds against a hybrid-barrel fixture (reexports AND local definitions that call helpers) and asserts the edge count is stable and no new duplicates are introduced beyond the pre-existing full-build baseline. Before: 28487 -> 28699 -> 28949 -> 29199 (growing ~250/rebuild) After: 28487 -> 28445 -> 28445 -> 28445 (stable) Fixes #979 Impact: 7 functions changed, 5 affected * fix(incremental): use negative edge-kind filter for barrel re-parse (#998) Replace the positive allowlist in Stage 6b's barrel-candidate DELETE with a `NOT IN ('contains', 'parameter_of')` filter so any future edge kind added to Stage 7 is automatically purged on re-parse. Previously, adding a new kind to Stage 7 would silently reintroduce the duplicate-edge accumulation bug this PR fixes (#979). Also refresh the adjacent comment: "barrel files are re-export-only" was contradicted by the hybrid-barrel fixture this PR adds. Impact: 1 functions changed, 1 affected * test(incremental): assert incremental edge total matches fresh full build (#998) Add Invariant 3 to the #979 regression test: after the 3 incremental bumps, the edge total must equal a clean full build over the same code state. The previous invariants only asserted stability across rebuilds and a duplicate-free (source, target, kind) set, which would miss stale edges pointing at orphaned node ids that escape the scoped DELETE.
1 parent 3adc971 commit b075f5a

6 files changed

Lines changed: 150 additions & 5 deletions

File tree

crates/codegraph-core/src/build_pipeline.rs

Lines changed: 19 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -658,8 +658,10 @@ fn reparse_barrel_candidates(
658658
if !barrel_paths_to_parse.is_empty() {
659659
barrel_paths_to_parse.sort();
660660
barrel_paths_to_parse.dedup();
661-
// Barrel files are re-export-only — no function bodies or dataflow,
662-
// so skip dataflow/AST analysis to avoid unnecessary overhead.
661+
// Re-parse barrel candidates — these may be hybrid barrels (reexports
662+
// AND local definitions / call sites, see #979). Dataflow/AST analysis
663+
// is skipped because the barrel is not itself a "changed" file; Stage 7
664+
// will reconstruct all outgoing edge kinds from the fresh parse.
663665
let barrel_parsed = parallel::parse_files_parallel(
664666
&barrel_paths_to_parse,
665667
root_dir,
@@ -669,11 +671,23 @@ fn reparse_barrel_candidates(
669671
for mut sym in barrel_parsed {
670672
let rel = relative_path(root_dir, &sym.file);
671673
sym.file = rel.clone();
672-
// Delete outgoing import/reexport edges for barrel files being re-parsed
673-
// (scoped to import-related kinds to avoid dropping calls edges)
674+
// Delete every outgoing edge kind that Stage 7 re-emits for re-parsed
675+
// barrel candidates. Previously only 'imports' and 'reexports' were
676+
// purged, so 'calls', 'receiver', 'extends', 'implements',
677+
// 'imports-type', and 'dynamic-imports' accumulated duplicates on
678+
// every incremental rebuild (#979).
679+
//
680+
// Use a negative filter (`NOT IN`) rather than an allowlist so any
681+
// future edge kind added to Stage 7 is automatically covered. Only
682+
// 'contains' and 'parameter_of' must be preserved: those are emitted
683+
// by Stage 5 (insert_nodes) which only runs on the original
684+
// file_symbols (changed + reverse-deps). Barrel candidates are
685+
// merged into file_symbols here in Stage 6b *after* Stage 5 has
686+
// already run, so wiping contains/parameter_of would permanently
687+
// drop them.
674688
let _ = conn.execute(
675689
"DELETE FROM edges WHERE source_id IN (SELECT id FROM nodes WHERE file = ?1) \
676-
AND kind IN ('imports', 'reexports')",
690+
AND kind NOT IN ('contains', 'parameter_of')",
677691
rusqlite::params![&rel],
678692
);
679693
// Re-resolve imports for the barrel file
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
import { run } from './consumers/driver.js';
2+
3+
export function main(inputs) {
4+
return run(inputs);
5+
}
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
import { processAll } from '../core/index.js';
2+
3+
export function run(inputs) {
4+
return processAll(inputs);
5+
}
Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
export function clampValue(v, lo, hi) {
2+
if (v < lo) return lo;
3+
if (v > hi) return hi;
4+
return v;
5+
}
6+
7+
export function doubleValue(v) {
8+
return v * 2;
9+
}
Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
// Hybrid barrel: re-exports from helpers.js AND has local definitions that call helpers.
2+
export { doubleValue } from './helpers.js';
3+
4+
import { clampValue, doubleValue } from './helpers.js';
5+
6+
export function processValue(v) {
7+
return doubleValue(clampValue(v, 0, 100));
8+
}
9+
10+
export function processAll(values) {
11+
return values.map((v) => processValue(v));
12+
}
Lines changed: 100 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,100 @@
1+
/**
2+
* Regression test for #979: incremental rebuilds leak duplicate edges.
3+
*
4+
* Root cause: when `reparse_barrel_candidates` (Stage 6b, native engine) picks
5+
* up a file imported by a reverse-dep, it used to purge only the 'imports' and
6+
* 'reexports' edge kinds before Stage 7 re-emitted every edge kind, so every
7+
* rebuild appended new copies of 'calls', 'receiver', 'extends', 'implements',
8+
* 'imports-type', and 'dynamic-imports' edges.
9+
*
10+
* This test modifies a source file multiple times in a row and asserts:
11+
* 1. The total edge count does not grow across incremental rebuilds.
12+
* 2. The count of `(source_id, target_id, kind)` rows never exceeds the
13+
* pre-existing duplicates from a fresh full build (i.e. incremental
14+
* does not introduce new duplicates).
15+
*/
16+
import fs from 'node:fs';
17+
import os from 'node:os';
18+
import path from 'node:path';
19+
import Database from 'better-sqlite3';
20+
import { describe, expect, it } from 'vitest';
21+
import { buildGraph } from '../../src/domain/graph/builder.js';
22+
23+
const FIXTURE_DIR = path.join(import.meta.dirname, '..', 'fixtures', 'issue-979-hybrid-barrel');
24+
25+
function copyDirSync(src: string, dest: string) {
26+
fs.mkdirSync(dest, { recursive: true });
27+
for (const entry of fs.readdirSync(src, { withFileTypes: true })) {
28+
const s = path.join(src, entry.name);
29+
const d = path.join(dest, entry.name);
30+
if (entry.isDirectory()) copyDirSync(s, d);
31+
else fs.copyFileSync(s, d);
32+
}
33+
}
34+
35+
function edgeStats(dbPath: string) {
36+
const db = new Database(dbPath, { readonly: true });
37+
try {
38+
const total = (db.prepare('SELECT COUNT(*) AS c FROM edges').get() as { c: number }).c;
39+
const duplicates = (
40+
db
41+
.prepare(
42+
`SELECT source_id, target_id, kind, COUNT(*) AS c FROM edges
43+
GROUP BY source_id, target_id, kind HAVING c > 1`,
44+
)
45+
.all() as Array<{ c: number }>
46+
).reduce((sum, row) => sum + row.c - 1, 0);
47+
return { total, duplicates };
48+
} finally {
49+
db.close();
50+
}
51+
}
52+
53+
describe('Issue #979: incremental edges do not duplicate', () => {
54+
it('3 incremental rebuilds produce stable edge counts with no new duplicates', async () => {
55+
const tmpBase = fs.mkdtempSync(path.join(os.tmpdir(), 'cg-979-'));
56+
const fullDir = path.join(tmpBase, 'full');
57+
const incrDir = path.join(tmpBase, 'incr');
58+
59+
try {
60+
copyDirSync(FIXTURE_DIR, fullDir);
61+
copyDirSync(FIXTURE_DIR, incrDir);
62+
63+
// Baseline full build on the incr copy so subsequent rebuilds are truly incremental.
64+
await buildGraph(incrDir, { incremental: false, skipRegistry: true });
65+
66+
// Apply 3 rounds of "change one file" + incremental rebuild, recording
67+
// edge totals and duplicate counts after each rebuild.
68+
const history: Array<{ total: number; duplicates: number }> = [];
69+
for (let i = 0; i < 3; i++) {
70+
fs.appendFileSync(path.join(incrDir, 'consumers', 'driver.js'), `\n// bump ${i}\n`);
71+
await buildGraph(incrDir, { incremental: true, skipRegistry: true });
72+
history.push(edgeStats(path.join(incrDir, '.codegraph', 'graph.db')));
73+
}
74+
75+
// Mirror all 3 mutations on the full copy, then do a single clean full build.
76+
for (let i = 0; i < 3; i++) {
77+
fs.appendFileSync(path.join(fullDir, 'consumers', 'driver.js'), `\n// bump ${i}\n`);
78+
}
79+
await buildGraph(fullDir, { incremental: false, skipRegistry: true });
80+
const freshFull = edgeStats(path.join(fullDir, '.codegraph', 'graph.db'));
81+
82+
// Invariant 1: incremental edge count must not grow across rebuilds.
83+
expect(history[1].total).toBe(history[0].total);
84+
expect(history[2].total).toBe(history[0].total);
85+
86+
// Invariant 2: incremental must not introduce new duplicates beyond the
87+
// pre-existing duplicates present in a clean full build.
88+
expect(history[2].duplicates).toBeLessThanOrEqual(freshFull.duplicates);
89+
90+
// Invariant 3: after applying all 3 bumps, both dirs describe the same
91+
// code, so the incremental edge total must match a clean full build.
92+
// This catches stale edges that survive the scoped DELETE (e.g. edges
93+
// pointing at orphaned node ids) which would not be flagged as
94+
// (source, target, kind) duplicates.
95+
expect(history[2].total).toBe(freshFull.total);
96+
} finally {
97+
fs.rmSync(tmpBase, { recursive: true, force: true });
98+
}
99+
}, 60_000);
100+
});

0 commit comments

Comments
 (0)