Skip to content

Commit 5e4a00d

Browse files
committed
feat(health): surface file risk in search
1 parent 0458be8 commit 5e4a00d

15 files changed

Lines changed: 904 additions & 6 deletions

src/constants/codebase-context.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ export const INDEX_META_FILENAME = 'index-meta.json' as const;
2020

2121
export const MEMORY_FILENAME = 'memory.json' as const;
2222
export const INTELLIGENCE_FILENAME = 'intelligence.json' as const;
23+
export const HEALTH_FILENAME = 'health.json' as const;
2324
export const KEYWORD_INDEX_FILENAME = 'index.json' as const;
2425
export const INDEXING_STATS_FILENAME = 'indexing-stats.json' as const;
2526
export const VECTOR_DB_DIRNAME = 'index' as const;

src/core/index-meta.ts

Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ import { z } from 'zod';
44

55
import {
66
CODEBASE_CONTEXT_DIRNAME,
7+
HEALTH_FILENAME,
78
INDEX_FORMAT_VERSION,
89
INDEX_META_FILENAME,
910
INDEX_META_VERSION,
@@ -41,6 +42,30 @@ const RelationshipsFileSchema = z
4142
})
4243
.passthrough();
4344

45+
const HealthFileSchema = z.object({
46+
header: ArtifactHeaderSchema,
47+
generatedAt: z.string().datetime(),
48+
summary: z
49+
.object({
50+
files: z.number().int().nonnegative(),
51+
highRiskFiles: z.number().int().nonnegative(),
52+
mediumRiskFiles: z.number().int().nonnegative(),
53+
lowRiskFiles: z.number().int().nonnegative()
54+
})
55+
.passthrough(),
56+
files: z.array(
57+
z
58+
.object({
59+
file: z.string().min(1),
60+
level: z.enum(['low', 'medium', 'high']),
61+
score: z.number().nonnegative(),
62+
reasons: z.array(z.string()),
63+
signals: z.record(z.string(), z.number()).optional()
64+
})
65+
.passthrough()
66+
)
67+
});
68+
4469
export const IndexMetaSchema = z.object({
4570
metaVersion: z.number().int().positive(),
4671
formatVersion: z.number().int().nonnegative(),
@@ -59,6 +84,11 @@ export const IndexMetaSchema = z.object({
5984
embeddingModel: z.string().optional()
6085
}),
6186
intelligence: z
87+
.object({
88+
path: z.string().min(1)
89+
})
90+
.optional(),
91+
health: z
6292
.object({
6393
path: z.string().min(1)
6494
})
@@ -270,4 +300,34 @@ export async function validateIndexArtifacts(rootDir: string, meta: IndexMeta):
270300
throw asIndexCorrupted('Relationships sidecar corrupted (rebuild required)', error);
271301
}
272302
}
303+
304+
// Optional health sidecar: validate if present, but do not require.
305+
const healthPath = path.join(contextDir, HEALTH_FILENAME);
306+
if (await pathExists(healthPath)) {
307+
try {
308+
const raw = await fs.readFile(healthPath, 'utf-8');
309+
const json = JSON.parse(raw);
310+
const parsed = HealthFileSchema.safeParse(json);
311+
if (!parsed.success) {
312+
throw new IndexCorruptedError(
313+
`Health schema mismatch (rebuild required): ${parsed.error.message}`
314+
);
315+
}
316+
317+
const { buildId, formatVersion } = parsed.data.header;
318+
if (formatVersion !== meta.formatVersion) {
319+
throw new IndexCorruptedError(
320+
`Health formatVersion mismatch (rebuild required): meta=${meta.formatVersion}, health.json=${formatVersion}`
321+
);
322+
}
323+
if (buildId !== meta.buildId) {
324+
throw new IndexCorruptedError(
325+
`Health buildId mismatch (rebuild required): meta=${meta.buildId}, health.json=${buildId}`
326+
);
327+
}
328+
} catch (error) {
329+
if (error instanceof IndexCorruptedError) throw error;
330+
throw asIndexCorrupted('Health sidecar corrupted (rebuild required)', error);
331+
}
332+
}
273333
}

src/core/indexer.ts

Lines changed: 19 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@ import { getFileCommitDates } from '../utils/git-dates.js';
4242
import {
4343
CODEBASE_CONTEXT_DIRNAME,
4444
EXCLUDED_GLOB_PATTERNS,
45+
HEALTH_FILENAME,
4546
INDEX_FORMAT_VERSION,
4647
INDEXING_STATS_FILENAME,
4748
INDEX_META_FILENAME,
@@ -52,6 +53,7 @@ import {
5253
RELATIONSHIPS_FILENAME,
5354
VECTOR_DB_DIRNAME
5455
} from '../constants/codebase-context.js';
56+
import { deriveCodebaseHealth } from '../health/derive.js';
5557

5658
const STAGING_DIRNAME = '.staging';
5759
const PREVIOUS_DIRNAME = '.previous';
@@ -104,6 +106,7 @@ async function atomicSwapStagingToActive(
104106
const activeManifestPath = path.join(contextDir, MANIFEST_FILENAME);
105107
const activeStatsPath = path.join(contextDir, INDEXING_STATS_FILENAME);
106108
const activeRelationshipsPath = path.join(contextDir, RELATIONSHIPS_FILENAME);
109+
const activeHealthPath = path.join(contextDir, HEALTH_FILENAME);
107110

108111
const stagingMetaPath = path.join(stagingDir, INDEX_META_FILENAME);
109112
const stagingIndexPath = path.join(stagingDir, KEYWORD_INDEX_FILENAME);
@@ -112,6 +115,7 @@ async function atomicSwapStagingToActive(
112115
const stagingManifestPath = path.join(stagingDir, MANIFEST_FILENAME);
113116
const stagingStatsPath = path.join(stagingDir, INDEXING_STATS_FILENAME);
114117
const stagingRelationshipsPath = path.join(stagingDir, RELATIONSHIPS_FILENAME);
118+
const stagingHealthPath = path.join(stagingDir, HEALTH_FILENAME);
115119

116120
// Step 1: Create .previous directory and move current active there
117121
await fs.mkdir(previousDir, { recursive: true });
@@ -149,6 +153,7 @@ async function atomicSwapStagingToActive(
149153
await moveIfExists(activeManifestPath, path.join(previousDir, MANIFEST_FILENAME));
150154
await moveIfExists(activeStatsPath, path.join(previousDir, INDEXING_STATS_FILENAME));
151155
await moveIfExists(activeRelationshipsPath, path.join(previousDir, RELATIONSHIPS_FILENAME));
156+
await moveIfExists(activeHealthPath, path.join(previousDir, HEALTH_FILENAME));
152157
await moveDirIfExists(activeVectorDir, path.join(previousDir, VECTOR_DB_DIRNAME));
153158

154159
// Step 2: Move staging artifacts to active location
@@ -159,6 +164,7 @@ async function atomicSwapStagingToActive(
159164
await moveIfExists(stagingManifestPath, activeManifestPath);
160165
await moveIfExists(stagingStatsPath, activeStatsPath);
161166
await moveIfExists(stagingRelationshipsPath, activeRelationshipsPath);
167+
await moveIfExists(stagingHealthPath, activeHealthPath);
162168
await moveDirIfExists(stagingVectorDir, activeVectorDir);
163169

164170
// Step 3: Clean up .previous and staging directories
@@ -188,6 +194,7 @@ async function atomicSwapStagingToActive(
188194
await moveIfExists(path.join(previousDir, MANIFEST_FILENAME), activeManifestPath);
189195
await moveIfExists(path.join(previousDir, INDEXING_STATS_FILENAME), activeStatsPath);
190196
await moveIfExists(path.join(previousDir, RELATIONSHIPS_FILENAME), activeRelationshipsPath);
197+
await moveIfExists(path.join(previousDir, HEALTH_FILENAME), activeHealthPath);
191198
await moveDirIfExists(path.join(previousDir, VECTOR_DB_DIRNAME), activeVectorDir);
192199
console.error('Rollback successful');
193200
} catch (rollbackError) {
@@ -980,6 +987,16 @@ export class CodebaseIndexer {
980987
};
981988
await fs.writeFile(relationshipsPath, JSON.stringify(relationships, null, 2));
982989

990+
const healthPath = path.join(activeContextDir, HEALTH_FILENAME);
991+
const health = deriveCodebaseHealth({
992+
buildId,
993+
formatVersion: INDEX_FORMAT_VERSION,
994+
generatedAt,
995+
chunks: allChunks,
996+
graph: internalFileGraph
997+
});
998+
await fs.writeFile(healthPath, JSON.stringify(health, null, 2));
999+
9831000
// Write manifest (both full and incremental)
9841001
// For full rebuild, write to staging; for incremental, write to active
9851002
const activeManifestPath = path.join(activeContextDir, MANIFEST_FILENAME);
@@ -1021,7 +1038,8 @@ export class CodebaseIndexer {
10211038
intelligence: { path: INTELLIGENCE_FILENAME },
10221039
manifest: { path: MANIFEST_FILENAME },
10231040
indexingStats: { path: INDEXING_STATS_FILENAME },
1024-
relationships: { path: RELATIONSHIPS_FILENAME }
1041+
relationships: { path: RELATIONSHIPS_FILENAME },
1042+
health: { path: HEALTH_FILENAME }
10251043
}
10261044
},
10271045
null,

src/health/derive.ts

Lines changed: 207 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,207 @@
1+
import type { CodeChunk, CodebaseHealthArtifact, CodebaseHealthFile } from '../types/index.js';
2+
import { InternalFileGraph } from '../utils/usage-tracker.js';
3+
4+
interface DeriveCodebaseHealthParams {
5+
buildId: string;
6+
formatVersion: number;
7+
generatedAt: string;
8+
chunks: CodeChunk[];
9+
graph: InternalFileGraph;
10+
}
11+
12+
interface FileMetrics {
13+
importCount: number;
14+
importerCount: number;
15+
cycleCount: number;
16+
maxCyclomaticComplexity: number;
17+
hotspotRank?: number;
18+
}
19+
20+
type FileMetricsMap = Map<string, FileMetrics>;
21+
22+
function normalizePathLike(filePath: string): string {
23+
return filePath.replace(/\\/g, '/').replace(/^\.\//, '');
24+
}
25+
26+
function collectFileMetrics(chunks: CodeChunk[], graph: InternalFileGraph): FileMetricsMap {
27+
const metrics = new Map<string, FileMetrics>();
28+
const graphJson = graph.toJSON();
29+
const reverseImports = new Map<string, Set<string>>();
30+
31+
for (const [file, deps] of Object.entries(graphJson.imports)) {
32+
const normalizedFile = normalizePathLike(file);
33+
const fileMetrics = metrics.get(normalizedFile) ?? {
34+
importCount: 0,
35+
importerCount: 0,
36+
cycleCount: 0,
37+
maxCyclomaticComplexity: 0
38+
};
39+
fileMetrics.importCount = deps.length;
40+
metrics.set(normalizedFile, fileMetrics);
41+
42+
for (const dependency of deps) {
43+
const normalizedDependency = normalizePathLike(dependency);
44+
const importers = reverseImports.get(normalizedDependency) ?? new Set<string>();
45+
importers.add(normalizedFile);
46+
reverseImports.set(normalizedDependency, importers);
47+
}
48+
}
49+
50+
for (const [file, importers] of reverseImports.entries()) {
51+
const fileMetrics = metrics.get(file) ?? {
52+
importCount: 0,
53+
importerCount: 0,
54+
cycleCount: 0,
55+
maxCyclomaticComplexity: 0
56+
};
57+
fileMetrics.importerCount = importers.size;
58+
metrics.set(file, fileMetrics);
59+
}
60+
61+
for (const chunk of chunks) {
62+
const file = normalizePathLike(chunk.relativePath || chunk.filePath);
63+
const fileMetrics = metrics.get(file) ?? {
64+
importCount: 0,
65+
importerCount: 0,
66+
cycleCount: 0,
67+
maxCyclomaticComplexity: 0
68+
};
69+
const chunkComplexity =
70+
typeof chunk.metadata?.cyclomaticComplexity === 'number'
71+
? chunk.metadata.cyclomaticComplexity
72+
: typeof chunk.metadata?.complexity === 'number'
73+
? chunk.metadata.complexity
74+
: 0;
75+
fileMetrics.maxCyclomaticComplexity = Math.max(
76+
fileMetrics.maxCyclomaticComplexity,
77+
chunkComplexity
78+
);
79+
metrics.set(file, fileMetrics);
80+
}
81+
82+
const hotspotRanks = Array.from(metrics.entries())
83+
.map(([file, fileMetrics]) => ({
84+
file,
85+
combined: fileMetrics.importCount + fileMetrics.importerCount
86+
}))
87+
.filter((entry) => entry.combined > 0)
88+
.sort((a, b) => b.combined - a.combined || a.file.localeCompare(b.file));
89+
90+
hotspotRanks.forEach((entry, index) => {
91+
const fileMetrics = metrics.get(entry.file);
92+
if (fileMetrics) {
93+
fileMetrics.hotspotRank = index + 1;
94+
}
95+
});
96+
97+
for (const cycle of graph.findCycles()) {
98+
for (const file of cycle.files.slice(0, -1)) {
99+
const normalizedFile = normalizePathLike(file);
100+
const fileMetrics = metrics.get(normalizedFile) ?? {
101+
importCount: 0,
102+
importerCount: 0,
103+
cycleCount: 0,
104+
maxCyclomaticComplexity: 0
105+
};
106+
fileMetrics.cycleCount += 1;
107+
metrics.set(normalizedFile, fileMetrics);
108+
}
109+
}
110+
111+
return metrics;
112+
}
113+
114+
function getHealthLevel(fileMetrics: FileMetrics): CodebaseHealthFile {
115+
const reasons: string[] = [];
116+
let score = 0;
117+
118+
if (fileMetrics.cycleCount > 0) {
119+
score += 3;
120+
reasons.push(
121+
`Participates in ${fileMetrics.cycleCount} circular dependenc${fileMetrics.cycleCount === 1 ? 'y' : 'ies'}`
122+
);
123+
}
124+
125+
if (fileMetrics.importerCount >= 8) {
126+
score += 2;
127+
reasons.push(`High fan-in: ${fileMetrics.importerCount} files depend on it`);
128+
} else if (fileMetrics.importerCount >= 4) {
129+
score += 1;
130+
reasons.push(`Shared dependency for ${fileMetrics.importerCount} files`);
131+
}
132+
133+
if (fileMetrics.hotspotRank && fileMetrics.hotspotRank <= 5) {
134+
score += 2;
135+
reasons.push(`Hotspot rank #${fileMetrics.hotspotRank} by graph centrality`);
136+
} else if (fileMetrics.hotspotRank && fileMetrics.hotspotRank <= 10) {
137+
score += 1;
138+
reasons.push('Top-10 hotspot by graph centrality');
139+
}
140+
141+
if (fileMetrics.maxCyclomaticComplexity >= 18) {
142+
score += 2;
143+
reasons.push(`Complex implementation (cyclomatic ${fileMetrics.maxCyclomaticComplexity})`);
144+
} else if (fileMetrics.maxCyclomaticComplexity >= 10) {
145+
score += 1;
146+
reasons.push(`Moderate code complexity (cyclomatic ${fileMetrics.maxCyclomaticComplexity})`);
147+
}
148+
149+
const level = score >= 4 ? 'high' : score >= 2 ? 'medium' : ('low' as const);
150+
151+
return {
152+
file: '',
153+
level,
154+
score,
155+
reasons: reasons.slice(0, 3),
156+
signals: {
157+
...(fileMetrics.hotspotRank ? { hotspotRank: fileMetrics.hotspotRank } : {}),
158+
...(fileMetrics.importerCount > 0 ? { importerCount: fileMetrics.importerCount } : {}),
159+
...(fileMetrics.importCount > 0 ? { importCount: fileMetrics.importCount } : {}),
160+
...(fileMetrics.cycleCount > 0 ? { cycleCount: fileMetrics.cycleCount } : {}),
161+
...(fileMetrics.maxCyclomaticComplexity > 0
162+
? { maxCyclomaticComplexity: fileMetrics.maxCyclomaticComplexity }
163+
: {})
164+
}
165+
};
166+
}
167+
168+
export function deriveCodebaseHealth({
169+
buildId,
170+
formatVersion,
171+
generatedAt,
172+
chunks,
173+
graph
174+
}: DeriveCodebaseHealthParams): CodebaseHealthArtifact {
175+
const fileMetrics = collectFileMetrics(chunks, graph);
176+
const files = Array.from(fileMetrics.entries())
177+
.map(([file, metrics]) => {
178+
const health = getHealthLevel(metrics);
179+
return {
180+
...health,
181+
file
182+
};
183+
})
184+
.sort((a, b) => {
185+
const priority = { high: 0, medium: 1, low: 2 };
186+
const levelDelta = priority[a.level] - priority[b.level];
187+
if (levelDelta !== 0) return levelDelta;
188+
if (b.score !== a.score) return b.score - a.score;
189+
return a.file.localeCompare(b.file);
190+
});
191+
192+
const highRiskFiles = files.filter((file) => file.level === 'high').length;
193+
const mediumRiskFiles = files.filter((file) => file.level === 'medium').length;
194+
const lowRiskFiles = files.length - highRiskFiles - mediumRiskFiles;
195+
196+
return {
197+
header: { buildId, formatVersion },
198+
generatedAt,
199+
summary: {
200+
files: files.length,
201+
highRiskFiles,
202+
mediumRiskFiles,
203+
lowRiskFiles
204+
},
205+
files
206+
};
207+
}

0 commit comments

Comments
 (0)