Skip to content

Commit 725d61d

Browse files
authored
fix(extractors): guard empty-text identifiers and isolate extractor crashes (#972)
* fix(extractors): guard empty-text identifiers and isolate extractor crashes The JavaScript extractor's factory-call type-map path called `objName[0]!.toLowerCase()` without guarding for empty strings. When tree-sitter produces a partial or corrupted tree the identifier's `text` can be `''`, which makes `objName[0]` undefined and crashes with "Cannot read properties of undefined (reading 'toLowerCase')". The Python extractor already uses the correct short-circuit pattern; mirror it in JS. Additionally wrap the WASM extractor invocation in `wasmExtractSymbols` in a try/catch that logs via `warn()` and returns `null`, mirroring the existing parse-error handling directly above. Previously any extractor runtime error would bubble up and kill the whole graph build; now the build continues past the broken file. Includes a regression test exercising a mock tree with an empty-text identifier to verify the guard no longer throws. Fixes #964 Impact: 2 functions changed, 16 affected * fix(parser): free WASM tree on extractor error path (#972) Addresses Greptile review feedback: when entry.extractor() throws, the tree object allocated by parser.parse was never deleted. Since web-tree-sitter trees are backed by WASM linear memory and not garbage-collected, each extractor crash leaked one tree for the process lifetime. Matches the finally/delete pattern already used in backfillTypeMap and backfillTypeMapBatch. Impact: 1 functions changed, 12 affected
1 parent 0ecd0fc commit 725d61d

3 files changed

Lines changed: 88 additions & 2 deletions

File tree

src/domain/parser.ts

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -786,7 +786,16 @@ function wasmExtractSymbols(
786786
if (!entry) return null;
787787
const query = _queryCache.get(entry.id) ?? undefined;
788788
// Query (web-tree-sitter) is structurally compatible with TreeSitterQuery at runtime
789-
const symbols = entry.extractor(tree as any, filePath, query as any);
789+
let symbols: ExtractorOutput | null;
790+
try {
791+
symbols = entry.extractor(tree as any, filePath, query as any);
792+
} catch (e: unknown) {
793+
warn(`Extractor error in ${filePath}: ${(e as Error).message}`);
794+
// Free WASM tree to prevent memory leak — web-tree-sitter trees are backed
795+
// by WASM linear memory and are not garbage-collected automatically.
796+
if (typeof (tree as any).delete === 'function') (tree as any).delete();
797+
return null;
798+
}
790799
return symbols ? { symbols, tree, langId: entry.id } : null;
791800
}
792801

src/extractors/javascript.ts

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1182,7 +1182,11 @@ function handleVarDeclaratorTypeMap(
11821182
const obj = fn.childForFieldName('object');
11831183
if (obj && obj.type === 'identifier') {
11841184
const objName = obj.text;
1185-
if (objName[0]! !== objName[0]!.toLowerCase() && !BUILTIN_GLOBALS.has(objName)) {
1185+
if (
1186+
objName[0] &&
1187+
objName[0] !== objName[0].toLowerCase() &&
1188+
!BUILTIN_GLOBALS.has(objName)
1189+
) {
11861190
setTypeMapEntry(typeMap, nameN.text, objName, 0.7);
11871191
}
11881192
}

tests/parsers/javascript.test.ts

Lines changed: 73 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -184,6 +184,79 @@ describe('JavaScript parser', () => {
184184
expect(symbols.typeMap.has('d')).toBe(false);
185185
expect(symbols.typeMap.has('p')).toBe(false);
186186
});
187+
188+
// Regression: GH #964 — tree-sitter can produce partial/corrupted trees in
189+
// which an identifier node has empty `text`. Previously the factory path
190+
// crashed with "Cannot read properties of undefined (reading 'toLowerCase')"
191+
// because `objName[0]` is undefined for an empty string. The guard now
192+
// mirrors the Python extractor's short-circuit check.
193+
it('does not crash when factory call has an empty-text identifier', () => {
194+
// Build a mock tree that mimics `const x = <empty-identifier>.create()`.
195+
// The walk path calls handleVarDeclaratorTypeMap → factory branch, which
196+
// reads `obj.text` ("") and would previously call "".toLowerCase() via
197+
// `objName[0]!.toLowerCase()`. The fix's `objName[0] &&` guard short-circuits.
198+
const pos = { row: 0, column: 0 };
199+
const makeNode = (
200+
type: string,
201+
text = '',
202+
fields: Record<string, any> = {},
203+
children: any[] = [],
204+
) => {
205+
const node: any = {
206+
type,
207+
text,
208+
startPosition: pos,
209+
endPosition: pos,
210+
childCount: children.length,
211+
child: (i: number) => children[i] ?? null,
212+
childForFieldName: (name: string) => fields[name] ?? null,
213+
parent: null,
214+
};
215+
for (const c of children) {
216+
c.parent = node;
217+
}
218+
return node;
219+
};
220+
221+
const emptyIdentifier = makeNode('identifier', '');
222+
const createName = makeNode('property_identifier', 'create');
223+
const memberExpr = makeNode(
224+
'member_expression',
225+
'.create',
226+
{
227+
object: emptyIdentifier,
228+
property: createName,
229+
},
230+
[emptyIdentifier, createName],
231+
);
232+
const callExpr = makeNode(
233+
'call_expression',
234+
'.create()',
235+
{
236+
function: memberExpr,
237+
},
238+
[memberExpr],
239+
);
240+
const nameIdent = makeNode('identifier', 'x');
241+
const declarator = makeNode(
242+
'variable_declarator',
243+
'x = .create()',
244+
{
245+
name: nameIdent,
246+
value: callExpr,
247+
},
248+
[nameIdent, callExpr],
249+
);
250+
const lexDecl = makeNode('lexical_declaration', 'const x = .create();', {}, [declarator]);
251+
const root = makeNode('program', '', {}, [lexDecl]);
252+
const fakeTree: any = { rootNode: root };
253+
254+
// Before the fix this would throw TypeError. Now it should complete and
255+
// simply leave `x` out of the typeMap (empty identifier is ignored).
256+
expect(() => extractSymbols(fakeTree, 'test.js')).not.toThrow();
257+
const symbols = extractSymbols(fakeTree, 'test.js');
258+
expect(symbols.typeMap.has('x')).toBe(false);
259+
});
187260
});
188261

189262
it('does not set receiver for .call()/.apply()/.bind() unwrapped calls', () => {

0 commit comments

Comments
 (0)