@@ -11,33 +11,25 @@ import { analyzerRegistry } from '../dist/core/analyzer-registry.js';
1111import { AngularAnalyzer } from '../dist/analyzers/angular/index.js' ;
1212import { GenericAnalyzer } from '../dist/analyzers/generic/index.js' ;
1313import { evaluateFixture , formatEvalReport } from '../dist/eval/harness.js' ;
14+ import {
15+ combineEditPreflightSummaries ,
16+ evaluateEditPreflightFixture ,
17+ formatEditPreflightReport
18+ } from '../dist/eval/edit-preflight-harness.js' ;
1419import {
1520 combineDiscoverySummaries ,
1621 evaluateDiscoveryGate ,
1722 evaluateDiscoveryFixture ,
1823 formatDiscoveryReport
1924} from '../dist/eval/discovery-harness.js' ;
25+ import { getDefaultFixturePaths , resolveEvalMode } from '../dist/eval/run-config.js' ;
2026
2127const __dirname = path . dirname ( fileURLToPath ( import . meta. url ) ) ;
2228const projectRoot = path . join ( __dirname , '..' ) ;
2329const packageJsonPath = path . join ( projectRoot , 'package.json' ) ;
2430
2531const packageJson = JSON . parse ( readFileSync ( packageJsonPath , 'utf-8' ) ) ;
2632
27- const defaultFixtureA = path . join ( projectRoot , 'tests' , 'fixtures' , 'eval-angular-spotify.json' ) ;
28- const defaultFixtureB = path . join ( projectRoot , 'tests' , 'fixtures' , 'eval-controlled.json' ) ;
29- const defaultDiscoveryFixtureA = path . join (
30- projectRoot ,
31- 'tests' ,
32- 'fixtures' ,
33- 'discovery-angular-spotify.json'
34- ) ;
35- const defaultDiscoveryFixtureB = path . join (
36- projectRoot ,
37- 'tests' ,
38- 'fixtures' ,
39- 'discovery-excalidraw.json'
40- ) ;
4133const defaultDiscoveryProtocol = path . join (
4234 projectRoot ,
4335 'tests' ,
@@ -49,7 +41,7 @@ const usage = [
4941 `Usage: node scripts/run-eval.mjs <codebaseA> [codebaseB] [options]` ,
5042 `` ,
5143 `Options:` ,
52- ` --mode=<retrieval|discovery> Select benchmark mode (default: retrieval)` ,
44+ ` --mode=<retrieval|discovery|edit-preflight > Select benchmark mode (default: retrieval)` ,
5345 ` --fixture-a=<path> Override fixture for codebaseA` ,
5446 ` --fixture-b=<path> Override fixture for codebaseB` ,
5547 ` --protocol=<path> Override discovery benchmark protocol` ,
@@ -151,6 +143,17 @@ async function runSingleEvaluation({
151143 fixturePath : resolvedFixture ,
152144 summary
153145 } ) ;
146+ } else if ( mode === 'edit-preflight' ) {
147+ console . log ( `\n--- Phase 2: Running ${ fixture . tasks . length } -task edit-preflight harness ---` ) ;
148+ summary = await evaluateEditPreflightFixture ( {
149+ fixture,
150+ rootPath : resolvedCodebase
151+ } ) ;
152+ report = formatEditPreflightReport ( {
153+ codebaseLabel : label ,
154+ fixturePath : resolvedFixture ,
155+ summary
156+ } ) ;
154157 } else {
155158 console . log ( `\n--- Phase 2: Running ${ fixture . queries . length } -query eval harness ---` ) ;
156159 const searcher = new CodebaseSearcher ( resolvedCodebase ) ;
@@ -202,6 +205,31 @@ function printCombinedSummary(summaries, mode) {
202205 return ;
203206 }
204207
208+ if ( mode === 'edit-preflight' ) {
209+ const combined = combineEditPreflightSummaries ( summaries ) ;
210+ console . log ( `\n=== Combined Edit Preflight Summary ===` ) ;
211+ console . log (
212+ `Top-target in top-3: ${ combined . topTargetInTop3Count } /${ combined . targetableTasks } (${ combined . topTargetInTop3Rate === null ? 'n/a' : ( combined . topTargetInTop3Rate * 100 ) . toFixed ( 0 ) + '%' } )`
213+ ) ;
214+ console . log (
215+ `Average first relevant hit: ${ combined . averageFirstRelevantHit === null ? 'n/a' : combined . averageFirstRelevantHit . toFixed ( 2 ) } `
216+ ) ;
217+ console . log (
218+ `Best-example hit rate: ${ combined . bestExampleHitCount } /${ combined . bestExampleTasks } (${ combined . bestExampleHitRate === null ? 'n/a' : ( combined . bestExampleHitRate * 100 ) . toFixed ( 0 ) + '%' } )`
219+ ) ;
220+ console . log (
221+ `Safe ready rate: ${ combined . safeTaskReadyCount } /${ combined . safeTasks } (${ combined . safeTaskReadyRate === null ? 'n/a' : ( combined . safeTaskReadyRate * 100 ) . toFixed ( 0 ) + '%' } )`
222+ ) ;
223+ console . log (
224+ `Unsafe abstain rate: ${ combined . unsafeTaskAbstainCount } /${ combined . unsafeTasks } (${ combined . unsafeTaskAbstainRate === null ? 'n/a' : ( combined . unsafeTaskAbstainRate * 100 ) . toFixed ( 0 ) + '%' } )`
225+ ) ;
226+ console . log (
227+ `Unsafe ready=true false positives: ${ combined . unsafeReadyFalsePositiveCount } /${ combined . unsafeTasks } (${ combined . unsafeReadyFalsePositiveRate === null ? 'n/a' : ( combined . unsafeReadyFalsePositiveRate * 100 ) . toFixed ( 0 ) + '%' } )`
228+ ) ;
229+ console . log ( `=======================================\n` ) ;
230+ return ;
231+ }
232+
205233 const total = summaries . reduce ( ( sum , summary ) => sum + summary . total , 0 ) ;
206234 const top1Correct = summaries . reduce ( ( sum , summary ) => sum + summary . top1Correct , 0 ) ;
207235 const top3RecallCount = summaries . reduce ( ( sum , summary ) => sum + summary . top3RecallCount , 0 ) ;
@@ -254,17 +282,14 @@ async function main() {
254282
255283 const codebaseA = positionals [ 0 ] ;
256284 const codebaseB = positionals [ 1 ] ;
257- const mode = values . mode === 'discovery' ? 'discovery' : 'retrieval' ;
285+ const mode = resolveEvalMode ( values . mode ) ;
286+ const defaultFixtures = getDefaultFixturePaths ( projectRoot , mode ) ;
258287 const fixtureA = values [ 'fixture-a' ]
259288 ? path . resolve ( values [ 'fixture-a' ] )
260- : mode === 'discovery'
261- ? defaultDiscoveryFixtureA
262- : defaultFixtureA ;
289+ : defaultFixtures . fixtureA ;
263290 const fixtureB = values [ 'fixture-b' ]
264291 ? path . resolve ( values [ 'fixture-b' ] )
265- : mode === 'discovery'
266- ? defaultDiscoveryFixtureB
267- : defaultFixtureB ;
292+ : defaultFixtures . fixtureB ;
268293 const protocolPath = values . protocol
269294 ? path . resolve ( values . protocol )
270295 : defaultDiscoveryProtocol ;
@@ -326,6 +351,25 @@ async function main() {
326351 process . exit ( gate . status === 'failed' ? 1 : 0 ) ;
327352 }
328353
354+ if ( mode === 'edit-preflight' ) {
355+ const combinedSummary = combineEditPreflightSummaries ( summaries ) ;
356+ printCombinedSummary ( summaries , mode ) ;
357+ console . log (
358+ formatEditPreflightReport ( {
359+ codebaseLabel : 'combined-suite' ,
360+ fixturePath : codebaseB ? `${ fixtureA } , ${ fixtureB } ` : fixtureA ,
361+ summary : combinedSummary
362+ } )
363+ ) ;
364+ if ( outputPath ) {
365+ const outputDir = path . dirname ( outputPath ) ;
366+ if ( ! existsSync ( outputDir ) ) mkdirSync ( outputDir , { recursive : true } ) ;
367+ writeFileSync ( outputPath , JSON . stringify ( combinedSummary , null , 2 ) ) ;
368+ console . log ( `\nResults written to: ${ outputPath } ` ) ;
369+ }
370+ process . exit ( 0 ) ;
371+ }
372+
329373 if ( outputPath && mode === 'discovery' && summaries . length === 1 ) {
330374 const outputDir = path . dirname ( outputPath ) ;
331375 if ( ! existsSync ( outputDir ) ) mkdirSync ( outputDir , { recursive : true } ) ;
0 commit comments