Skip to content

Commit 2e474b5

Browse files
authored
feat: plan recovery system — graceful degradation, touchSet enforcement, and early conflict detection (#41)
* Merge graceful-retry * Merge touchset-gate * feat: activate needs_rebase job status for early conflict detection Add trial merge check before jobs enter the merge train to detect conflicts early. Jobs that would conflict are marked needs_rebase and the plan pauses at an on_error checkpoint. - Add checkMergeability() to merge-train.ts for trial merges - Integrate trial merge in orchestrator reconcile loop - Add retry arg to plan-approve for needs_rebase jobs - Add ready_to_merge → needs_rebase transition in plan-types - Add tests for all new functionality Closes #36 * test: fix diagnostics in rebased test files * fix: update plan job state before reconcile on completion * test: mock createIntegrationBranch in plan-approve test for CI
1 parent b571242 commit 2e474b5

10 files changed

Lines changed: 782 additions & 83 deletions

src/lib/merge-train.ts

Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,71 @@ const INSTALL_COMMAND_BY_LOCKFILE = [
5050

5151

5252

53+
export async function validateTouchSet(
54+
jobBranch: string,
55+
baseBranch: string,
56+
touchSet: string[],
57+
opts?: { cwd?: string },
58+
): Promise<{ valid: boolean; violations?: string[]; changedFiles?: string[] }> {
59+
if (touchSet.length === 0) {
60+
return { valid: true };
61+
}
62+
63+
const diffResult = await gitCommand(['diff', '--name-only', `${baseBranch}...${jobBranch}`], opts);
64+
if (diffResult.exitCode !== 0) {
65+
return { valid: false, violations: [`Failed to diff: ${diffResult.stderr}`] };
66+
}
67+
68+
const changedFiles = diffResult.stdout.split('\n').map(f => f.trim()).filter(Boolean);
69+
if (changedFiles.length === 0) {
70+
return { valid: true, changedFiles: [] };
71+
}
72+
73+
const violations: string[] = [];
74+
for (const file of changedFiles) {
75+
const matchesAny = touchSet.some(pattern => {
76+
const glob = new Bun.Glob(pattern);
77+
return glob.match(file);
78+
});
79+
if (!matchesAny) {
80+
violations.push(file);
81+
}
82+
}
83+
84+
return {
85+
valid: violations.length === 0,
86+
violations: violations.length > 0 ? violations : undefined,
87+
changedFiles,
88+
};
89+
}
90+
91+
export async function checkMergeability(
92+
integrationWorktree: string,
93+
jobBranch: string,
94+
): Promise<{ canMerge: boolean; conflicts?: string[] }> {
95+
const testMerge = await gitCommand([
96+
'-C', integrationWorktree,
97+
'merge', '--no-commit', '--no-ff', jobBranch,
98+
]);
99+
100+
// Always clean up — leave worktree pristine
101+
await gitCommand(['-C', integrationWorktree, 'merge', '--abort']).catch(() => {});
102+
await gitCommand(['-C', integrationWorktree, 'reset', '--hard', 'HEAD']).catch(() => {});
103+
await gitCommand(['-C', integrationWorktree, 'clean', '-fd']).catch(() => {});
104+
105+
if (testMerge.exitCode !== 0) {
106+
const conflicts = extractConflicts(
107+
[testMerge.stdout, testMerge.stderr].filter(Boolean).join('\n'),
108+
);
109+
return {
110+
canMerge: false,
111+
conflicts: conflicts.length > 0 ? conflicts : undefined,
112+
};
113+
}
114+
115+
return { canMerge: true };
116+
}
117+
53118
async function rollbackMerge(worktreePath: string): Promise<void> {
54119
// Try merge --abort first
55120
await gitCommand(['-C', worktreePath, 'merge', '--abort']).catch(() => {});

src/lib/orchestrator.ts

Lines changed: 59 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ import type { PlanSpec, JobSpec, PlanStatus, CheckpointType } from './plan-types
44
import { loadPlan, savePlan, updatePlanJob, clearPlan, validateGhAuth } from './plan-state';
55
import { getDefaultBranch } from './git';
66
import { createIntegrationBranch, deleteIntegrationBranch } from './integration';
7-
import { MergeTrain, type MergeTestReport } from './merge-train';
7+
import { MergeTrain, checkMergeability, type MergeTestReport, validateTouchSet } from './merge-train';
88
import { addJob, getRunningJobs, updateJob, loadJobState, removeJob, type Job } from './job-state';
99
import { JobMonitor } from './monitor';
1010
import { removeReport } from './reports';
@@ -473,6 +473,22 @@ export class Orchestrator {
473473

474474
for (const job of mergeOrder) {
475475
if (job.status === 'completed') {
476+
if (job.touchSet && job.touchSet.length > 0 && job.branch && plan.integrationBranch) {
477+
const validation = await validateTouchSet(job.branch, plan.integrationBranch, job.touchSet);
478+
if (!validation.valid && validation.violations) {
479+
await updatePlanJob(plan.id, job.name, {
480+
status: 'failed',
481+
error: `Modified files outside touchSet: ${validation.violations.join(', ')}. Expected only: ${job.touchSet.join(', ')}`,
482+
});
483+
job.status = 'failed';
484+
485+
this.showToast('Mission Control', `Job "${job.name}" touched files outside its touchSet. Plan paused.`, 'error');
486+
this.notify(`❌ Job "${job.name}" modified files outside its touchSet:\n Violations: ${validation.violations.join(', ')}\n Allowed: ${job.touchSet.join(', ')}\nFix the branch and retry with mc_plan_approve(checkpoint: "on_error", retry: "${job.name}").`);
487+
await this.setCheckpoint('on_error', plan);
488+
return;
489+
}
490+
}
491+
476492
await updatePlanJob(plan.id, job.name, { status: 'ready_to_merge' });
477493
job.status = 'ready_to_merge';
478494
}
@@ -494,6 +510,22 @@ export class Orchestrator {
494510
continue;
495511
}
496512

513+
if (job.branch && plan.integrationWorktree) {
514+
const mergeCheck = await checkMergeability(plan.integrationWorktree, job.branch);
515+
if (!mergeCheck.canMerge) {
516+
await updatePlanJob(plan.id, job.name, {
517+
status: 'needs_rebase',
518+
error: mergeCheck.conflicts?.join(', ') ?? 'merge conflict detected in trial merge',
519+
});
520+
job.status = 'needs_rebase';
521+
522+
this.showToast('Mission Control', `Job "${job.name}" has merge conflicts. Plan paused.`, 'error');
523+
this.notify(`❌ Job "${job.name}" would conflict with the integration branch.\n Files: ${mergeCheck.conflicts?.join(', ') ?? 'unknown'}\nRebase the job branch and retry with mc_plan_approve(checkpoint: "on_error", retry: "${job.name}").`);
524+
await this.setCheckpoint('on_error', plan);
525+
return;
526+
}
527+
}
528+
497529
if (this.isSupervisor(plan) && !this.approvedForMerge.has(job.name)) {
498530
await this.setCheckpoint('pre_merge', plan);
499531
return;
@@ -538,14 +570,10 @@ export class Orchestrator {
538570
error: mergeResult.files?.join(', ') ?? 'merge conflict',
539571
});
540572

541-
if (this.isSupervisor(plan)) {
542-
await this.setCheckpoint('on_error', plan);
543-
return;
544-
}
545-
546-
plan.status = 'failed';
547-
this.showToast('Mission Control', `Merge conflict in job "${nextJob.name}".`, 'error');
548-
this.notify(`❌ Merge conflict in job "${nextJob.name}". Files: ${mergeResult.files?.join(', ') ?? 'unknown'}. Plan failed.`);
573+
this.showToast('Mission Control', `Merge conflict in job "${nextJob.name}". Plan paused.`, 'error');
574+
this.notify(`❌ Merge conflict in job "${nextJob.name}". Files: ${mergeResult.files?.join(', ') ?? 'unknown'}. Fix the branch and retry with mc_plan_approve(checkpoint: "on_error", retry: "${nextJob.name}").`);
575+
await this.setCheckpoint('on_error', plan);
576+
return;
549577
} else {
550578
await updatePlanJob(plan.id, nextJob.name, {
551579
status: 'failed',
@@ -557,14 +585,10 @@ export class Orchestrator {
557585
this.notify(`🧪 ${nextJob.name}: ${testSummary}`);
558586
}
559587

560-
if (this.isSupervisor(plan)) {
561-
await this.setCheckpoint('on_error', plan);
562-
return;
563-
}
564-
565-
plan.status = 'failed';
566-
this.showToast('Mission Control', `Job "${nextJob.name}" failed during merge.`, 'error');
567-
this.notify(`❌ Job "${nextJob.name}" failed merge tests. Plan failed.`);
588+
this.showToast('Mission Control', `Job "${nextJob.name}" failed merge tests. Plan paused.`, 'error');
589+
this.notify(`❌ Job "${nextJob.name}" failed merge tests. Fix the branch and retry with mc_plan_approve(checkpoint: "on_error", retry: "${nextJob.name}").`);
590+
await this.setCheckpoint('on_error', plan);
591+
return;
568592
}
569593
}
570594

@@ -763,22 +787,24 @@ If your work needs human review before it can proceed: mc_report(status: "needs_
763787
}
764788

765789
private handleJobComplete = (job: Job): void => {
766-
if (job.planId && this.activePlanId && job.planId === this.activePlanId) {
767-
if (!this.firstJobCompleted) {
768-
this.firstJobCompleted = true;
769-
this.showToast('Mission Control', `First job completed: "${job.name}".`, 'success');
770-
}
790+
if (!job.planId || !this.activePlanId || job.planId !== this.activePlanId) {
791+
return;
792+
}
771793

772-
updatePlanJob(job.planId, job.name, {
773-
status: 'completed',
774-
}).catch((error) => {
775-
console.error('Failed to update completed job state:', error);
776-
});
794+
if (!this.firstJobCompleted) {
795+
this.firstJobCompleted = true;
796+
this.showToast('Mission Control', `First job completed: "${job.name}".`, 'success');
797+
}
777798

778-
this.reconcile().catch((error) => {
779-
console.error('Reconcile after completion failed:', error);
799+
const planId = job.planId;
800+
(async () => {
801+
await updatePlanJob(planId, job.name, {
802+
status: 'completed',
780803
});
781-
}
804+
await this.reconcile();
805+
})().catch((error) => {
806+
console.error('Failed to reconcile completed job state:', error);
807+
});
782808
}
783809

784810
private handleJobFailed = (job: Job): void => {
@@ -794,16 +820,9 @@ If your work needs human review before it can proceed: mc_report(status: "needs_
794820
return;
795821
}
796822

797-
if (this.isSupervisor(plan)) {
798-
await this.setCheckpoint('on_error', plan);
799-
return;
800-
}
801-
802-
plan.status = 'failed';
803-
plan.completedAt = new Date().toISOString();
804-
await savePlan(plan);
805-
this.showToast('Mission Control', `Plan failed: job "${job.name}" failed.`, 'error');
806-
this.notify(`❌ Plan failed: job "${job.name}" failed.`);
823+
this.showToast('Mission Control', `Job "${job.name}" failed. Plan paused.`, 'error');
824+
this.notify(`❌ Job "${job.name}" failed. Fix and retry with mc_plan_approve(checkpoint: "on_error", retry: "${job.name}").`);
825+
await this.setCheckpoint('on_error', plan);
807826
})
808827
.catch(() => {})
809828
.finally(() => {

src/lib/plan-types.ts

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -75,8 +75,8 @@ export const VALID_JOB_TRANSITIONS: Record<JobStatus, JobStatus[]> = {
7575
waiting_deps: ['running', 'stopped', 'canceled'],
7676
running: ['completed', 'failed', 'stopped', 'canceled'],
7777
completed: ['ready_to_merge', 'stopped', 'canceled'],
78-
failed: ['stopped', 'canceled'],
79-
ready_to_merge: ['merging', 'stopped', 'canceled'],
78+
failed: ['ready_to_merge', 'stopped', 'canceled'],
79+
ready_to_merge: ['merging', 'needs_rebase', 'stopped', 'canceled'],
8080
merging: ['merged', 'conflict', 'stopped', 'canceled'],
8181
merged: ['needs_rebase'],
8282
conflict: ['ready_to_merge', 'stopped', 'canceled'],

src/tools/plan-approve.ts

Lines changed: 30 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
import { tool, type ToolDefinition } from '@opencode-ai/plugin';
2-
import { loadPlan, savePlan } from '../lib/plan-state';
2+
import { loadPlan, savePlan, updatePlanJob } from '../lib/plan-state';
33
import { Orchestrator } from '../lib/orchestrator';
44
import { getSharedMonitor, getSharedNotifyCallback, setSharedOrchestrator } from '../lib/orchestrator-singleton';
55
import type { CheckpointType } from '../lib/plan-types';
@@ -10,21 +10,47 @@ import { resolvePostCreateHook } from '../lib/worktree-setup';
1010

1111
export const mc_plan_approve: ToolDefinition = tool({
1212
description:
13-
'Approve a pending copilot plan or clear a supervisor checkpoint to continue execution',
13+
'Approve a pending copilot plan, clear a supervisor checkpoint, or retry a failed job to continue execution',
1414
args: {
1515
checkpoint: tool.schema
1616
.enum(['pre_merge', 'on_error', 'pre_pr'])
1717
.optional()
1818
.describe('Specific checkpoint to clear (for supervisor mode)'),
19+
retry: tool.schema
20+
.string()
21+
.optional()
22+
.describe('Name of a failed, conflict, or needs_rebase job to retry'),
1923
},
2024
async execute(args) {
2125
const plan = await loadPlan();
2226
if (!plan) {
2327
throw new Error('No active plan to approve');
2428
}
2529

30+
if (args.retry) {
31+
const job = plan.jobs.find((j) => j.name === args.retry);
32+
if (!job) {
33+
throw new Error(`Job "${args.retry}" not found in plan`);
34+
}
35+
if (job.status !== 'failed' && job.status !== 'conflict' && job.status !== 'needs_rebase') {
36+
throw new Error(`Job "${args.retry}" is not in a retryable state (current: ${job.status}). Only failed, conflict, or needs_rebase jobs can be retried.`);
37+
}
38+
}
39+
2640
if (plan.status === 'paused' && plan.checkpoint) {
2741
const checkpoint = (args.checkpoint ?? plan.checkpoint) as CheckpointType;
42+
43+
if (args.retry) {
44+
const job = plan.jobs.find(j => j.name === args.retry);
45+
if (!job) {
46+
throw new Error(`Job "${args.retry}" not found in plan`);
47+
}
48+
if (job.status !== 'failed' && job.status !== 'conflict' && job.status !== 'needs_rebase') {
49+
throw new Error(`Job "${args.retry}" is not in a retryable state (current: ${job.status}). Only failed, conflict, or needs_rebase jobs can be retried.`);
50+
}
51+
await updatePlanJob(plan.id, args.retry, { status: 'ready_to_merge', error: undefined });
52+
}
53+
2854
plan.status = 'running';
2955
plan.checkpoint = null;
3056
await savePlan(plan);
@@ -35,8 +61,9 @@ export const mc_plan_approve: ToolDefinition = tool({
3561
orchestrator.setPlanModelSnapshot(getCurrentModel());
3662
await orchestrator.resumePlan();
3763

64+
const retryMsg = args.retry ? ` Job "${args.retry}" reset to ready_to_merge.` : '';
3865
return [
39-
`Checkpoint "${checkpoint}" cleared. Plan "${plan.name}" resuming.`,
66+
`Checkpoint "${checkpoint}" cleared.${retryMsg} Plan "${plan.name}" resuming.`,
4067
'',
4168
` ID: ${plan.id}`,
4269
` Mode: ${plan.mode}`,

tests/integration/orchestration.test.ts

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -447,7 +447,7 @@ describe('orchestration integration', () => {
447447
expect(tmux.sendKeys).toHaveBeenCalledTimes(0);
448448
}, 30000);
449449

450-
it('marks plan failed when a merge conflict occurs and leaves integration clean', async () => {
450+
it('pauses plan on merge conflict for retry instead of failing', async () => {
451451
const monitor = new FakeMonitor();
452452
mockTmux();
453453
const orchestrator = new Orchestrator(monitor as any, {
@@ -497,22 +497,23 @@ describe('orchestration integration', () => {
497497
await waitForCondition(async () => {
498498
const currentPlan = await loadPlan();
499499
const second = currentPlan?.jobs.find((job) => job.id === 'job-2');
500-
return currentPlan?.status === 'failed' && second?.status === 'conflict';
500+
return currentPlan?.status === 'paused' && second?.status === 'needs_rebase';
501501
}, 8000);
502502

503-
const failedPlan = await loadPlan();
504-
expect(failedPlan?.status).toBe('failed');
505-
expect(failedPlan?.jobs.find((job) => job.id === 'job-2')?.status).toBe('conflict');
503+
const pausedPlan = await loadPlan();
504+
expect(pausedPlan?.status).toBe('paused');
505+
expect(pausedPlan?.checkpoint).toBe('on_error');
506+
expect(pausedPlan?.jobs.find((job) => job.id === 'job-2')?.status).toBe('needs_rebase');
506507

507508
const status = await mustExec(
508509
['git', 'status', '--porcelain'],
509-
failedPlan!.integrationWorktree!,
510+
pausedPlan!.integrationWorktree!,
510511
);
511512
expect(status).toBe('');
512513

513514
const mergeHead = await exec(
514515
['git', 'rev-parse', '-q', '--verify', 'MERGE_HEAD'],
515-
failedPlan!.integrationWorktree!,
516+
pausedPlan!.integrationWorktree!,
516517
);
517518
expect(mergeHead.exitCode).not.toBe(0);
518519
}, 30000);

0 commit comments

Comments
 (0)