@@ -12,6 +12,7 @@ import { Builder } from "./builder.ts"
1212import { extractEvaluationReason , isComplete , parseEvaluation } from "./evaluator.ts"
1313import {
1414 ensureDirectories ,
15+ getISOTimestamp ,
1516 getTimestampForFilename ,
1617 initializePaths ,
1718 readFileOrNull ,
@@ -87,14 +88,48 @@ export async function runLoop(config: Config): Promise<void> {
8788 await runEvaluationPhase ( state , builder , paths , logger , config )
8889 break
8990 }
91+
92+ // Success - reset retry count
93+ state . retryCount = 0
94+ state . lastErrorTime = undefined
9095 } catch ( err ) {
91- logger . logError ( `Error in ${ state . phase } phase: ${ err } ` )
96+ // Track the error
97+ state . retryCount ++
98+ state . lastErrorTime = getISOTimestamp ( )
99+
100+ logger . logError (
101+ `Error in ${ state . phase } phase (attempt ${ state . retryCount } /${ config . maxRetries } ): ${ err } ` ,
102+ )
92103
93- // Retry with backoff
94- if ( ! shutdownRequested ) {
95- const backoffMs = config . backoffBase * 1000
96- logger . say ( `Retrying in ${ config . backoffBase } seconds...` )
97- await sleep ( backoffMs )
104+ // Check if we've exceeded max retries
105+ if ( state . retryCount >= config . maxRetries ) {
106+ logger . logError ( `Max retries (${ config . maxRetries } ) exceeded for ${ state . phase } phase` )
107+ logger . alert ( `CRITICAL: ${ state . phase } phase failed after ${ config . maxRetries } attempts` )
108+
109+ // Reset retry count and move to next phase or skip task
110+ state . retryCount = 0
111+ state . lastErrorTime = undefined
112+
113+ if ( state . phase === "build" ) {
114+ // Skip the failed task and continue
115+ logger . warn ( "Skipping failed task and continuing..." )
116+ await skipCurrentTask ( state , paths , logger )
117+ } else if ( state . phase === "plan" ) {
118+ // Clear any stuck idea and retry planning
119+ state . currentIdeaPath = undefined
120+ state . currentIdeaFilename = undefined
121+ logger . warn ( "Clearing idea state and retrying plan phase..." )
122+ }
123+ // For evaluation, just retry - it will eventually succeed or the user will intervene
124+ } else {
125+ // Retry with exponential backoff
126+ const backoffMs = calculateBackoff ( state . retryCount , config . backoffBase )
127+ const backoffSec = Math . round ( backoffMs / 1000 )
128+ logger . say ( `Retrying in ${ backoffSec } seconds...` )
129+
130+ if ( ! shutdownRequested ) {
131+ await sleep ( backoffMs )
132+ }
98133 }
99134 }
100135
@@ -460,6 +495,71 @@ export function sleep(ms: number): Promise<void> {
460495 return new Promise ( ( resolve ) => setTimeout ( resolve , ms ) )
461496}
462497
498+ /**
499+ * Calculate exponential backoff with jitter.
500+ * @param retryCount - Current retry attempt (1-based)
501+ * @param baseSeconds - Base delay in seconds
502+ * @returns Delay in milliseconds
503+ */
504+ export function calculateBackoff ( retryCount : number , baseSeconds : number ) : number {
505+ // Exponential backoff: base * 2^(retry-1) with max of 5 minutes
506+ const exponentialDelay = baseSeconds * 2 ** ( retryCount - 1 )
507+ const cappedDelay = Math . min ( exponentialDelay , 300 ) // Max 5 minutes
508+
509+ // Add jitter (up to 20% randomness) to prevent thundering herd
510+ const jitter = cappedDelay * 0.2 * Math . random ( )
511+
512+ return Math . round ( ( cappedDelay + jitter ) * 1000 )
513+ }
514+
515+ /**
516+ * Skip the current task in build phase when it has failed too many times.
517+ * Marks the task as completed (with a note) and moves to the next task.
518+ */
519+ async function skipCurrentTask ( state : RuntimeState , paths : Paths , logger : Logger ) : Promise < void > {
520+ const planContent = await readFileOrNull ( paths . currentPlan )
521+ if ( ! planContent ) return
522+
523+ const uncompletedTasks = getUncompletedTasks ( planContent )
524+
525+ if ( uncompletedTasks . length === 0 ) {
526+ state . phase = "evaluation"
527+ return
528+ }
529+
530+ const currentTask = uncompletedTasks [ 0 ]
531+ if ( ! currentTask ) {
532+ state . phase = "evaluation"
533+ return
534+ }
535+
536+ // Mark task as completed (even though it failed - to allow progress)
537+ const updatedPlan = markTaskComplete ( planContent , currentTask . lineNumber )
538+
539+ // Add a note about the skipped task
540+ const noteComment = `<!-- SKIPPED: Task failed after max retries -->`
541+ const planWithNote = updatedPlan . replace (
542+ new RegExp ( `(- \\[x\\] ${ escapeRegExp ( currentTask . description ) } )` ) ,
543+ `$1 ${ noteComment } ` ,
544+ )
545+
546+ await writeFile ( paths . currentPlan , planWithNote )
547+ logger . warn ( `Skipped failed task: ${ currentTask . description } ` )
548+
549+ // Check if this was the last task
550+ const remainingTasks = getUncompletedTasks ( planWithNote )
551+ if ( remainingTasks . length === 0 ) {
552+ state . phase = "evaluation"
553+ }
554+ }
555+
556+ /**
557+ * Escape special regex characters in a string.
558+ */
559+ function escapeRegExp ( str : string ) : string {
560+ return str . replace ( / [ . * + ? ^ $ { } ( ) | [ \] \\ ] / g, "\\$&" )
561+ }
562+
463563/**
464564 * Check if shutdown has been requested.
465565 * Exported for testing and external shutdown checks.
0 commit comments