Skip to content

Commit 30c8564

Browse files
authored
perf(native): scope node loading in call-edge builder for incremental builds (#976)
* perf(native): scope node loading in call-edge builder for incremental builds On incremental builds, `build_and_insert_call_edges` was loading every node in the graph (~13k rows in codegraph itself) just to resolve a handful of changed files' call sites. The deserialization cost dominates the actual edge-building work for small edits. Scope the SQL to only the files being processed plus their resolved import targets. The import context already knows the resolved target paths, so we stage them in a temp table and `INNER JOIN` against the nodes table. Full builds still load every node (there is no smaller set to work with). Guarded on `is_incremental && file_symbols.len() < 200` so the scoped path only kicks in when it actually wins — very large incremental builds fall through to the full-load path to avoid temp-table overhead. No behavior change: the scoped set is a strict superset of the nodes the call-edge builder needs (changed files + their import targets). Cross-file call resolution still sees every target it could previously. * fix(native): gate scoped call-edge node loading on codebase size Mirror the JS `loadNodes` gate in `src/domain/graph/builder/stages/build-edges.ts`: only scope call-edge node loading when the codebase is large enough (existing file-node count > 20) and the incremental change set is small (<= 5 files). Tiny fixtures skip the scoped path entirely — the savings are negligible at that scale and the scoped set can miss transitively- required nodes needed for receiver-type resolution, breaking the incremental-edge-parity tests (e.g. `main -> Calculator.compute`). Also include `barrel_only_files` in the scoped set to match JS exactly. Fixes the "new export added" parity regression introduced by #976. Impact: 1 functions changed, 1 affected * fix(native): include barrel-ultimate targets in scoped edge-build set Addresses review feedback on #976: - greptile P1: relevant_files only included direct import targets. Barrel re-exports resolve imported_names[].file to the ultimate definition file 2+ hops away, so that file's nodes were absent from all_nodes and the A->C call edge was silently dropped. Now we walk the barrel chain during scoping and add every resolved ultimate file. - greptile P2: align temp-table usage with the existing _analysis_files pattern (qualified temp. on all non-CREATE ops) and add a covering index on _edge_files.file so the INNER JOIN is a lookup. Impact: 1 functions changed, 1 affected
1 parent 725d61d commit 30c8564

1 file changed

Lines changed: 137 additions & 18 deletions

File tree

crates/codegraph-core/src/build_pipeline.rs

Lines changed: 137 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -328,7 +328,7 @@ pub fn run_pipeline(
328328
// Build call edges using existing Rust edge_builder (internal path)
329329
// For now, call edges are built via the existing napi-exported function's
330330
// internal logic. We load nodes from DB and pass to the edge builder.
331-
build_and_insert_call_edges(conn, &file_symbols, &import_ctx);
331+
build_and_insert_call_edges(conn, &file_symbols, &import_ctx, !change_result.is_full_build);
332332

333333
timing.edges_ms = t0.elapsed().as_secs_f64() * 1000.0;
334334

@@ -842,32 +842,151 @@ fn build_file_hash_entries(
842842
}
843843

844844
/// Build call edges using the Rust edge_builder and insert them.
845+
///
846+
/// `is_incremental`: when true, the set of nodes loaded from the DB may be
847+
/// scoped to the files being processed plus their resolved import targets.
848+
/// Scoping is gated on:
849+
/// - small incremental change set (`file_symbols.len() <= SMALL_FILES`)
850+
/// - large-enough existing codebase (`file-node count > MIN_EXISTING`)
851+
/// Both gates mirror the JS path in `build-edges.ts` (#976) to avoid
852+
/// exercising the scoped path on tiny fixtures where the scoped set can
853+
/// miss transitively-required nodes (e.g. a call site whose receiver type
854+
/// is declared in a file that isn't a direct import target).
855+
///
856+
/// Full builds always load every node — there is no smaller set anyway.
845857
fn build_and_insert_call_edges(
846858
conn: &Connection,
847859
file_symbols: &HashMap<String, FileSymbols>,
848860
import_ctx: &ImportEdgeContext,
861+
is_incremental: bool,
849862
) {
850863
use crate::edge_builder::*;
851864

852-
// Load all callable nodes from DB
853865
let node_kind_filter = "kind IN ('function','method','class','interface','struct','type','module','enum','trait','record','constant')";
854-
let sql = format!("SELECT id, name, kind, file, line FROM nodes WHERE {node_kind_filter}");
855-
let mut stmt = match conn.prepare(&sql) {
856-
Ok(s) => s,
857-
Err(_) => return,
866+
867+
// Gate parity with `loadNodes` in `src/domain/graph/builder/stages/build-edges.ts`:
868+
// isFullBuild = false
869+
// && fileSymbols.size <= smallFilesThreshold (5)
870+
// && existingFileCount > FAST_PATH_MIN_EXISTING_FILES (20)
871+
// Small fixtures skip the scoped path entirely — the savings are
872+
// negligible at that scale and the scoped set can miss nodes that the
873+
// edge builder needs for receiver-type resolution (#976).
874+
let existing_file_count: i64 = conn
875+
.query_row(
876+
"SELECT COUNT(*) FROM nodes WHERE kind = 'file'",
877+
[],
878+
|row| row.get(0),
879+
)
880+
.unwrap_or(0);
881+
let scope_eligible = is_incremental
882+
&& file_symbols.len() <= crate::constants::FAST_PATH_MAX_CHANGED_FILES
883+
&& existing_file_count > crate::constants::FAST_PATH_MIN_EXISTING_FILES;
884+
885+
let all_nodes: Vec<NodeInfo> = if scope_eligible {
886+
// Build the scoped set: changed/reverse-dep files + their resolved
887+
// import targets + any barrel files on the path + the **ultimate**
888+
// source files that barrel chains resolve to. The FileEdgeInput
889+
// construction below (see `imported_names` at ~L1035) rewrites
890+
// `target_file` to the ultimate definition file via
891+
// `resolve_barrel_export`; if that file isn't in `relevant_files`
892+
// the edge builder's `nodes_by_name_and_file` lookup returns
893+
// nothing and the call edge is silently dropped (greptile P1).
894+
let mut relevant_files: HashSet<String> = file_symbols.keys().cloned().collect();
895+
for (rel_path, symbols) in file_symbols {
896+
let abs_file = Path::new(&import_ctx.root_dir).join(rel_path);
897+
let abs_str = abs_file.to_str().unwrap_or("");
898+
for imp in &symbols.imports {
899+
let resolved = import_ctx.get_resolved(abs_str, &imp.source);
900+
if resolved.is_empty() {
901+
continue;
902+
}
903+
relevant_files.insert(resolved.clone());
904+
// If the resolved target is a barrel, walk the re-export
905+
// chain and add every ultimate definition file that a
906+
// named import could resolve to.
907+
if import_ctx.is_barrel_file(&resolved) {
908+
for name in &imp.names {
909+
let clean_name = name.strip_prefix("* as ").unwrap_or(name);
910+
let mut visited = HashSet::new();
911+
if let Some(ultimate) = import_ctx.resolve_barrel_export(
912+
&resolved,
913+
clean_name,
914+
&mut visited,
915+
) {
916+
relevant_files.insert(ultimate);
917+
}
918+
}
919+
}
920+
}
921+
}
922+
for barrel_path in &import_ctx.barrel_only_files {
923+
relevant_files.insert(barrel_path.clone());
924+
}
925+
926+
if relevant_files.is_empty() {
927+
Vec::new()
928+
} else {
929+
// Schema qualification matches the existing `_analysis_files`
930+
// pattern below: unqualified CREATE (temp schema is the
931+
// default for TEMP tables), qualified `temp.` for every
932+
// subsequent op. Index the file column so the INNER JOIN is
933+
// a lookup rather than a table scan (greptile P2).
934+
let _ = conn.execute_batch(
935+
"CREATE TEMP TABLE IF NOT EXISTS _edge_files (file TEXT NOT NULL);\n CREATE INDEX IF NOT EXISTS _edge_files_file_idx ON _edge_files (file);",
936+
);
937+
let _ = conn.execute("DELETE FROM temp._edge_files", []);
938+
{
939+
let mut ins =
940+
match conn.prepare("INSERT INTO temp._edge_files (file) VALUES (?1)") {
941+
Ok(s) => s,
942+
Err(_) => return,
943+
};
944+
for f in &relevant_files {
945+
let _ = ins.execute(rusqlite::params![f]);
946+
}
947+
}
948+
949+
let sql = format!(
950+
"SELECT n.id, n.name, n.kind, n.file, n.line FROM nodes n \
951+
INNER JOIN temp._edge_files ef ON n.file = ef.file \
952+
WHERE n.{node_kind_filter}",
953+
);
954+
let nodes: Vec<NodeInfo> = match conn.prepare(&sql) {
955+
Ok(mut stmt) => stmt
956+
.query_map([], |row| {
957+
Ok(NodeInfo {
958+
id: row.get::<_, i64>(0)? as u32,
959+
name: row.get(1)?,
960+
kind: row.get(2)?,
961+
file: row.get(3)?,
962+
line: row.get::<_, i64>(4)? as u32,
963+
})
964+
})
965+
.map(|rows| rows.filter_map(|r| r.ok()).collect())
966+
.unwrap_or_default(),
967+
Err(_) => Vec::new(),
968+
};
969+
let _ = conn.execute("DROP TABLE IF EXISTS temp._edge_files", []);
970+
nodes
971+
}
972+
} else {
973+
let sql = format!("SELECT id, name, kind, file, line FROM nodes WHERE {node_kind_filter}");
974+
match conn.prepare(&sql) {
975+
Ok(mut stmt) => stmt
976+
.query_map([], |row| {
977+
Ok(NodeInfo {
978+
id: row.get::<_, i64>(0)? as u32,
979+
name: row.get(1)?,
980+
kind: row.get(2)?,
981+
file: row.get(3)?,
982+
line: row.get::<_, i64>(4)? as u32,
983+
})
984+
})
985+
.map(|rows| rows.filter_map(|r| r.ok()).collect())
986+
.unwrap_or_default(),
987+
Err(_) => Vec::new(),
988+
}
858989
};
859-
let all_nodes: Vec<NodeInfo> = stmt
860-
.query_map([], |row| {
861-
Ok(NodeInfo {
862-
id: row.get::<_, i64>(0)? as u32,
863-
name: row.get(1)?,
864-
kind: row.get(2)?,
865-
file: row.get(3)?,
866-
line: row.get::<_, i64>(4)? as u32,
867-
})
868-
})
869-
.map(|rows| rows.filter_map(|r| r.ok()).collect())
870-
.unwrap_or_default();
871990

872991
if all_nodes.is_empty() {
873992
return;

0 commit comments

Comments
 (0)