Skip to content

Commit a56a0c9

Browse files
authored
feat(code): port posthog detection logic to new pkg (#1562)
## Problem This adds a new `@posthog/enricher` package that provides PostHog SDK usage detection and enrichment capabilities for source code analysis. The package can detect PostHog method calls, feature flag usage, variant branches, and initialization calls across multiple programming languages (JavaScript, TypeScript, Python, Go, Ruby) using tree-sitter parsers. ## Changes - **New enricher package**: Created `packages/enricher/` with TypeScript source code for detecting PostHog SDK usage patterns - **Tree-sitter integration**: Added WASM grammar files for JavaScript, TypeScript, Python, Go, and Ruby parsing - **Detection capabilities**: - PostHog method calls (capture, getFeatureFlag, isFeatureEnabled, etc.) - Feature flag assignments and variant branches (if/else chains, switch statements) - PostHog initialization calls (posthog.init, new PostHog constructors) - Function definitions and client aliases - **Flag classification**: Added utilities for classifying flags as boolean, multivariate, or remote config - **Stale flag detection**: Added logic to identify potentially stale feature flags - **Multi-language support**: Language-specific queries and method mappings for different PostHog SDKs - **Build tooling**: Added tsup configuration, TypeScript setup, and Vitest for testing - **Grammar fetching**: Added script to build tree-sitter WASM files from grammar packages - **Development integration**: Added enricher to mprocs.yaml for local development
1 parent 50e8c06 commit a56a0c9

24 files changed

Lines changed: 4662 additions & 5 deletions

mprocs.yaml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,9 @@ procs:
1313
git:
1414
shell: 'node scripts/pnpm-run.mjs --filter @posthog/git run dev'
1515

16+
enricher:
17+
shell: 'node scripts/pnpm-run.mjs --filter @posthog/enricher run dev'
18+
1619
storybook:
1720
shell: 'node scripts/pnpm-run.mjs --filter code run storybook'
1821
autostart: false
207 KB
Binary file not shown.
355 KB
Binary file not shown.
448 KB
Binary file not shown.
2.03 MB
Binary file not shown.
1.38 MB
Binary file not shown.
1.35 MB
Binary file not shown.
186 KB
Binary file not shown.

packages/enricher/package.json

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
{
2+
"name": "@posthog/enricher",
3+
"version": "1.0.0",
4+
"description": "Detect and enrich PostHog SDK usage in source code",
5+
"type": "module",
6+
"exports": {
7+
".": {
8+
"types": "./dist/index.d.ts",
9+
"import": "./dist/index.js"
10+
}
11+
},
12+
"scripts": {
13+
"build": "tsup",
14+
"dev": "tsup --watch",
15+
"typecheck": "tsc --noEmit",
16+
"clean": "node ../../scripts/rimraf.mjs dist .turbo",
17+
"fetch-grammars": "node scripts/fetch-grammars.cjs",
18+
"test": "vitest run"
19+
},
20+
"dependencies": {
21+
"web-tree-sitter": "^0.24.7"
22+
},
23+
"devDependencies": {
24+
"tree-sitter-cli": "^0.26.6",
25+
"tsup": "^8.5.1",
26+
"typescript": "^5.5.0",
27+
"vitest": "^2.1.9"
28+
},
29+
"files": [
30+
"dist/**/*",
31+
"src/**/*",
32+
"grammars/**/*"
33+
]
34+
}
Lines changed: 174 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,174 @@
1+
#!/usr/bin/env node
2+
3+
/**
4+
* Builds tree-sitter WASM grammar files for all supported languages.
5+
* Requires: tree-sitter-cli and emscripten (or docker).
6+
*
7+
* Usage: node scripts/fetch-grammars.cjs
8+
*
9+
* If tree-sitter CLI cannot build WASM (no emscripten), you can manually
10+
* place pre-built .wasm files in the grammars/ directory.
11+
*/
12+
13+
const { execSync } = require("node:child_process");
14+
const fs = require("node:fs");
15+
const path = require("node:path");
16+
17+
const GRAMMARS_DIR = path.join(__dirname, "..", "grammars");
18+
19+
function hasCli() {
20+
try {
21+
execSync("npx tree-sitter --version", { stdio: "pipe" });
22+
return true;
23+
} catch {
24+
return false;
25+
}
26+
}
27+
28+
function buildGrammar(grammarPkg, outputName, subDir) {
29+
const dest = path.join(GRAMMARS_DIR, outputName);
30+
if (fs.existsSync(dest) && fs.statSync(dest).size > 10000) {
31+
const size = (fs.statSync(dest).size / 1024).toFixed(0);
32+
console.log(` ✓ ${outputName} (${size}KB, cached)`);
33+
return true;
34+
}
35+
36+
const tempDir = path.join(__dirname, "..", ".grammar-build");
37+
if (!fs.existsSync(tempDir)) {
38+
fs.mkdirSync(tempDir, { recursive: true });
39+
}
40+
41+
// Strip version specifier from package name for the directory path
42+
const dirName = grammarPkg.replace(/@[\d.]+.*$/, "");
43+
const grammarDir = path.join(tempDir, "node_modules", dirName);
44+
if (!fs.existsSync(grammarDir)) {
45+
process.stdout.write(` ↓ Installing ${grammarPkg}...`);
46+
try {
47+
execSync(
48+
`npm install ${grammarPkg} --prefix "${tempDir}" --ignore-scripts`,
49+
{
50+
stdio: "pipe",
51+
cwd: tempDir,
52+
},
53+
);
54+
console.log(" OK");
55+
} catch {
56+
console.log(` FAILED`);
57+
return false;
58+
}
59+
}
60+
61+
const buildDir = subDir ? path.join(grammarDir, subDir) : grammarDir;
62+
process.stdout.write(` ⚙ Building ${outputName}...`);
63+
try {
64+
execSync(`npx tree-sitter build --wasm -o "${dest}"`, {
65+
stdio: "pipe",
66+
cwd: buildDir,
67+
timeout: 120000,
68+
});
69+
const size = (fs.statSync(dest).size / 1024).toFixed(0);
70+
console.log(` ${size}KB`);
71+
return true;
72+
} catch (err) {
73+
const stderr = err.stderr ? err.stderr.toString().trim() : "";
74+
const stdout = err.stdout ? err.stdout.toString().trim() : "";
75+
const msg = stderr || stdout || err.message || "";
76+
console.log(` FAILED`);
77+
if (msg) {
78+
console.log(` → ${msg}`);
79+
}
80+
return false;
81+
}
82+
}
83+
84+
function main() {
85+
if (!fs.existsSync(GRAMMARS_DIR)) {
86+
fs.mkdirSync(GRAMMARS_DIR, { recursive: true });
87+
}
88+
89+
// Copy the core tree-sitter runtime WASM
90+
const runtimeSrc = path.join(
91+
__dirname,
92+
"..",
93+
"node_modules",
94+
"web-tree-sitter",
95+
"tree-sitter.wasm",
96+
);
97+
const altRuntimeSrc = path.join(
98+
__dirname,
99+
"..",
100+
"..",
101+
"..",
102+
"node_modules",
103+
"web-tree-sitter",
104+
"tree-sitter.wasm",
105+
);
106+
const runtimeDest = path.join(GRAMMARS_DIR, "tree-sitter.wasm");
107+
const src = fs.existsSync(runtimeSrc) ? runtimeSrc : altRuntimeSrc;
108+
if (fs.existsSync(src)) {
109+
fs.copyFileSync(src, runtimeDest);
110+
const size = (fs.statSync(runtimeDest).size / 1024).toFixed(0);
111+
console.log(` ✓ tree-sitter.wasm runtime (${size}KB)`);
112+
}
113+
114+
console.log("\nBuilding tree-sitter grammar WASM files...\n");
115+
116+
if (!hasCli()) {
117+
console.log("⚠ tree-sitter CLI not found. Install it:");
118+
console.log(" npm install -g tree-sitter-cli\n");
119+
console.log("Then re-run: node scripts/fetch-grammars.cjs");
120+
process.exit(1);
121+
}
122+
123+
let built = 0;
124+
125+
// JavaScript — pinned to 0.23.1 for ABI v14 compatibility with web-tree-sitter@0.24.x
126+
if (
127+
buildGrammar("tree-sitter-javascript@0.23.1", "tree-sitter-javascript.wasm")
128+
)
129+
built++;
130+
131+
// TypeScript (has typescript/ and tsx/ sub-directories)
132+
if (
133+
buildGrammar(
134+
"tree-sitter-typescript",
135+
"tree-sitter-typescript.wasm",
136+
"typescript",
137+
)
138+
)
139+
built++;
140+
if (buildGrammar("tree-sitter-typescript", "tree-sitter-tsx.wasm", "tsx"))
141+
built++;
142+
143+
// Python — pinned to 0.23.5 for ABI v14 compatibility with web-tree-sitter@0.24.x
144+
if (buildGrammar("tree-sitter-python@0.23.5", "tree-sitter-python.wasm"))
145+
built++;
146+
147+
// Go — pinned to 0.23.4 for ABI v14 compatibility with web-tree-sitter@0.24.x
148+
if (buildGrammar("tree-sitter-go@0.23.4", "tree-sitter-go.wasm")) built++;
149+
150+
// Ruby — pinned to 0.23.1 for ABI v14 compatibility with web-tree-sitter@0.24.x
151+
if (buildGrammar("tree-sitter-ruby@0.23.1", "tree-sitter-ruby.wasm")) built++;
152+
153+
// Cleanup temp dir
154+
const tempDir = path.join(__dirname, "..", ".grammar-build");
155+
try {
156+
fs.rmSync(tempDir, { recursive: true, force: true });
157+
} catch {
158+
/* */
159+
}
160+
161+
console.log(`\n${built} grammar(s) ready in grammars/`);
162+
163+
if (built === 0) {
164+
console.log(
165+
"\n⚠ No grammars were built. You may need emscripten installed.",
166+
);
167+
console.log(
168+
" See: https://emscripten.org/docs/getting_started/downloads.html",
169+
);
170+
console.log(" Or use Docker: tree-sitter build --wasm --docker");
171+
}
172+
}
173+
174+
main();

0 commit comments

Comments
 (0)