Skip to content

Commit ef7c834

Browse files
authored
fix(native): type map confidence-aware dedup for engine parity (#885)
* fix(native): type map confidence-aware dedup for engine parity The native engine's type map used HashMap::collect() (last-wins) while the JS engine used setTypeMapEntry (highest-confidence, first-wins on tie). When the same variable name appeared in multiple function scopes with different type annotations (e.g. `node: TreeSitterNode` and `node: NodeRow` in cfg.ts), the engines disagreed on the resolved type, producing different receiver/calls edges. Additionally, the Rust JS extractor returned early after finding a type annotation, skipping constructor inference — unlike the JS extractor which lets constructors (confidence 1.0) override annotations (0.9). Changes: - Add `confidence` field to Rust TypeMapEntry and TypeMapInput structs - Fix Rust edge builder to keep highest-confidence entry per name - Remove early return in Rust JS extractor; emit both annotation and constructor entries with appropriate confidence levels - Add confidence values to all 12 Rust language extractors - Add JS-side type map dedup in buildCallEdgesNative for immediate parity without requiring a native addon rebuild Verified: receiver edge counts now identical between engines (735=735), cfg.ts correctly resolves to TreeSitterNode on both engines. * feat(ci): gate release workflow on resolution precision/recall thresholds (#875) Add resolution quality gates to the benchmark pipeline so regressions are caught before publishing: - benchmark.yml: run vitest resolution test after the benchmark script, failing the workflow if any language drops below its threshold - update-benchmark-report.ts: warn on precision >5pp or recall >10pp drop per language between releases - regression-guard.test.ts: hard-fail CI on precision/recall regressions across releases, with KNOWN_REGRESSIONS exemption support * style: fix biome formatting in regression guard * fix: remove invalid #[napi(default = 0.9)] attributes causing Rust compile failure napi-rs v3 does not support the `default` attribute on `#[napi(object)]` struct fields — only on function parameters. The macro expansion failed, preventing TypeMapEntry and TypeMapInput from being generated, which cascaded into "not found in scope" errors across all extractors and the build pipeline. Removing the attribute is safe because all call sites (JS buildCallEdgesNative and Rust build_pipeline) always provide the confidence value explicitly. * fix: clarify dedup comment per review — no-op for Map path, needed for Array path (#885)
1 parent d351fce commit ef7c834

20 files changed

Lines changed: 184 additions & 13 deletions

File tree

.github/workflows/benchmark.yml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -106,6 +106,10 @@ jobs:
106106
fi
107107
node $STRIP_FLAG --import ./scripts/ts-resolve-loader.js scripts/resolution-benchmark.ts $ARGS > resolution-result.json
108108
109+
- name: Gate on resolution thresholds
110+
if: steps.existing.outputs.skip != 'true'
111+
run: npx vitest run tests/benchmarks/resolution/resolution-benchmark.test.ts --reporter=verbose
112+
109113
- name: Merge resolution into build result
110114
if: steps.existing.outputs.skip != 'true'
111115
run: |

Cargo.lock

Lines changed: 1 addition & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

crates/codegraph-core/src/build_pipeline.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -877,6 +877,7 @@ fn build_and_insert_call_edges(
877877
.map(|t| TypeMapInput {
878878
name: t.name.clone(),
879879
type_name: t.type_name.clone(),
880+
confidence: t.confidence,
880881
})
881882
.collect();
882883

crates/codegraph-core/src/edge_builder.rs

Lines changed: 22 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,8 @@ pub struct TypeMapInput {
5757
pub name: String,
5858
#[napi(js_name = "typeName")]
5959
pub type_name: String,
60+
/// Confidence: 0.9 = type annotation, 1.0 = constructor, 0.7 = factory.
61+
pub confidence: f64,
6062
}
6163

6264
#[napi(object)]
@@ -153,10 +155,22 @@ fn process_file<'a>(
153155
.map(|im| (im.name.as_str(), im.file.as_str()))
154156
.collect();
155157

156-
let type_map: HashMap<&str, &str> = file_input
157-
.type_map.iter()
158-
.map(|tm| (tm.name.as_str(), tm.type_name.as_str()))
159-
.collect();
158+
// Build type map keeping the highest-confidence entry per name
159+
// (first-wins on tie), matching the JS setTypeMapEntry behaviour.
160+
let mut type_map: HashMap<&str, (&str, f64)> = HashMap::new();
161+
for tm in &file_input.type_map {
162+
let entry = type_map.entry(tm.name.as_str());
163+
match entry {
164+
std::collections::hash_map::Entry::Vacant(e) => {
165+
e.insert((tm.type_name.as_str(), tm.confidence));
166+
}
167+
std::collections::hash_map::Entry::Occupied(mut e) => {
168+
if tm.confidence > e.get().1 {
169+
e.insert((tm.type_name.as_str(), tm.confidence));
170+
}
171+
}
172+
}
173+
}
160174

161175
let file_nodes: Vec<&NodeInfo> = all_nodes.iter().filter(|n| n.file == *rel_path).collect();
162176
let defs_with_ids: Vec<DefWithId> = file_input.definitions.iter().map(|d| {
@@ -210,7 +224,7 @@ fn resolve_call_targets<'a>(
210224
call: &CallInfo,
211225
rel_path: &str,
212226
imported_from: Option<&str>,
213-
type_map: &HashMap<&str, &str>,
227+
type_map: &HashMap<&str, (&str, f64)>,
214228
) -> Vec<&'a NodeInfo> {
215229
// 1. Import-aware resolution
216230
if let Some(imp_file) = imported_from {
@@ -236,7 +250,7 @@ fn resolve_call_targets<'a>(
236250

237251
// 4. Type-aware resolution via receiver → type map
238252
if let Some(ref receiver) = call.receiver {
239-
if let Some(type_name) = type_map.get(receiver.as_str()) {
253+
if let Some(&(type_name, _conf)) = type_map.get(receiver.as_str()) {
240254
let qualified = format!("{}.{}", type_name, call.name);
241255
let typed: Vec<&NodeInfo> = ctx.nodes_by_name
242256
.get(qualified.as_str())
@@ -296,15 +310,15 @@ fn emit_call_edges(
296310
/// Emit a receiver edge from caller to the receiver's type node (if applicable).
297311
fn emit_receiver_edge(
298312
ctx: &EdgeContext, call: &CallInfo, caller_id: u32, rel_path: &str,
299-
type_map: &HashMap<&str, &str>,
313+
type_map: &HashMap<&str, (&str, f64)>,
300314
seen_edges: &mut HashSet<u64>, edges: &mut Vec<ComputedEdge>,
301315
) {
302316
let Some(ref receiver) = call.receiver else { return };
303317
if ctx.builtin_set.contains(receiver.as_str())
304318
|| receiver == "this" || receiver == "self" || receiver == "super"
305319
{ return; }
306320

307-
let effective_receiver = type_map.get(receiver.as_str()).copied().unwrap_or(receiver.as_str());
321+
let effective_receiver = type_map.get(receiver.as_str()).map(|&(t, _)| t).unwrap_or(receiver.as_str());
308322
let type_resolved = effective_receiver != receiver.as_str();
309323

310324
let samefile = ctx.nodes_by_name_and_file

crates/codegraph-core/src/extractors/c.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@ fn match_c_type_map(node: &Node, source: &[u8], symbols: &mut FileSymbols, _dept
3939
symbols.type_map.push(TypeMapEntry {
4040
name: final_name,
4141
type_name: type_name.to_string(),
42+
confidence: 0.9,
4243
});
4344
}
4445
}
@@ -55,6 +56,7 @@ fn match_c_type_map(node: &Node, source: &[u8], symbols: &mut FileSymbols, _dept
5556
symbols.type_map.push(TypeMapEntry {
5657
name,
5758
type_name: node_text(&type_node, source).to_string(),
59+
confidence: 0.9,
5860
});
5961
}
6062
}

crates/codegraph-core/src/extractors/cpp.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@ fn match_cpp_type_map(node: &Node, source: &[u8], symbols: &mut FileSymbols, _de
3838
symbols.type_map.push(TypeMapEntry {
3939
name: final_name,
4040
type_name: type_name.to_string(),
41+
confidence: 0.9,
4142
});
4243
}
4344
}
@@ -54,6 +55,7 @@ fn match_cpp_type_map(node: &Node, source: &[u8], symbols: &mut FileSymbols, _de
5455
symbols.type_map.push(TypeMapEntry {
5556
name,
5657
type_name: node_text(&type_node, source).to_string(),
58+
confidence: 0.9,
5759
});
5860
}
5961
}

crates/codegraph-core/src/extractors/csharp.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -428,6 +428,7 @@ fn match_csharp_type_map(node: &Node, source: &[u8], symbols: &mut FileSymbols,
428428
symbols.type_map.push(TypeMapEntry {
429429
name: node_text(&name_node, source).to_string(),
430430
type_name: type_name.to_string(),
431+
confidence: 0.9,
431432
});
432433
}
433434
}
@@ -445,6 +446,7 @@ fn match_csharp_type_map(node: &Node, source: &[u8], symbols: &mut FileSymbols,
445446
symbols.type_map.push(TypeMapEntry {
446447
name: node_text(&name_node, source).to_string(),
447448
type_name: type_name.to_string(),
449+
confidence: 0.9,
448450
});
449451
}
450452
}

crates/codegraph-core/src/extractors/go.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -330,6 +330,7 @@ fn collect_go_typed_identifiers(node: &Node, source: &[u8], type_map: &mut Vec<T
330330
type_map.push(TypeMapEntry {
331331
name: node_text(&child, source).to_string(),
332332
type_name: type_name.to_string(),
333+
confidence: 0.9,
333334
});
334335
}
335336
}

crates/codegraph-core/src/extractors/java.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@ fn match_java_type_map(node: &Node, source: &[u8], symbols: &mut FileSymbols, _d
3939
symbols.type_map.push(TypeMapEntry {
4040
name: node_text(&name_node, source).to_string(),
4141
type_name: type_name.to_string(),
42+
confidence: 0.9,
4243
});
4344
}
4445
}
@@ -54,6 +55,7 @@ fn match_java_type_map(node: &Node, source: &[u8], symbols: &mut FileSymbols, _d
5455
symbols.type_map.push(TypeMapEntry {
5556
name: node_text(&name_node, source).to_string(),
5657
type_name: type_name.to_string(),
58+
confidence: 0.9,
5759
});
5860
}
5961
}

crates/codegraph-core/src/extractors/javascript.rs

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -58,23 +58,24 @@ fn match_js_type_map(node: &Node, source: &[u8], symbols: &mut FileSymbols, _dep
5858
if let Some(name_n) = node.child_by_field_name("name") {
5959
if name_n.kind() == "identifier" {
6060
let var_name = node_text(&name_n, source);
61-
// Type annotation takes priority
61+
// Type annotation: confidence 0.9
6262
if let Some(type_anno) = find_child(node, "type_annotation") {
6363
if let Some(type_name) = extract_simple_type_name(&type_anno, source) {
6464
symbols.type_map.push(TypeMapEntry {
6565
name: var_name.to_string(),
6666
type_name: type_name.to_string(),
67+
confidence: 0.9,
6768
});
68-
return; // Skip new_expression check — annotation wins
6969
}
7070
}
71-
// Fall back to new expression inference
71+
// Constructor: confidence 1.0 (overrides annotation in edge builder)
7272
if let Some(value_n) = node.child_by_field_name("value") {
7373
if value_n.kind() == "new_expression" {
7474
if let Some(type_name) = extract_new_expr_type_name(&value_n, source) {
7575
symbols.type_map.push(TypeMapEntry {
7676
name: var_name.to_string(),
7777
type_name: type_name.to_string(),
78+
confidence: 1.0,
7879
});
7980
}
8081
}
@@ -93,6 +94,7 @@ fn match_js_type_map(node: &Node, source: &[u8], symbols: &mut FileSymbols, _dep
9394
symbols.type_map.push(TypeMapEntry {
9495
name: node_text(&name_node, source).to_string(),
9596
type_name: type_name.to_string(),
97+
confidence: 0.9,
9698
});
9799
}
98100
}

0 commit comments

Comments
 (0)