Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 12 additions & 6 deletions src/action.ts
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ import type { StateManager } from './state-manager.js';
import { extractCodeBlocks } from './utils/code-extractor.js';
import { htmlCombinedSnapshot, minifyHtml } from './utils/html.js';
import { createDebug, log, setStepSpanParent, tag } from './utils/logger.js';
import { withRetry } from './utils/retry.js';
import { safeFilename } from './utils/strings.ts';
import { throttle } from './utils/throttle.ts';

Expand Down Expand Up @@ -79,12 +80,17 @@ class Action {
const page = this.playwrightHelper.page;
const frame = this.playwrightHelper.frame;
await page?.waitForLoadState('domcontentloaded', { timeout: 10000 })?.catch(() => {});
const grabAll = () => Promise.all([(this.actor as any).grabSource(), (this.actor as any).grabTitle(), this.captureBrowserLogs()]);
const [html, title, browserLogs] = await grabAll().catch(async (err: Error) => {
const msg = err instanceof Error ? err.message : String(err);
if (!/navigating and changing the content/i.test(msg)) throw err;
await page?.waitForLoadState('domcontentloaded', { timeout: 10000 })?.catch(() => {});
return grabAll();
const grabAll = async () => {
try {
return await Promise.all([(this.actor as any).grabSource(), (this.actor as any).grabTitle(), this.captureBrowserLogs()]);
} catch (err) {
await recorder.reset();
await recorder.start();
throw err;
}
};
const [html, title, browserLogs] = await withRetry(grabAll, {
retryCondition: (err) => /navigating and changing the content/i.test(err.message),
});
const url = page?.url() || (await (this.actor as any).grabCurrentUrl?.());

Expand Down
103 changes: 102 additions & 1 deletion src/ai/captain.ts
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,9 @@ import { HooksRunner } from '../utils/hooks-runner.ts';
import { startLogCapture, stopLogCapture, tag } from '../utils/logger.js';
import { loop } from '../utils/loop.js';
import { truncateJson } from '../utils/strings.ts';
import { Stats } from '../stats.ts';
import type { Agent } from './agent.js';
import { WithHealMode } from './captain/heal-mode.ts';
import { WithIdleMode } from './captain/idle-mode.ts';
import { type CaptainMode, type ModeContext, debugLog } from './captain/mixin.ts';
import { WithTestMode } from './captain/test-mode.ts';
Expand All @@ -22,8 +24,9 @@ import { Researcher } from './researcher.ts';
import { TaskAgent } from './task-agent.ts';

const MAX_STEPS = 15;
const HEAL_MAX_STEPS = 5;

const CaptainBase = WithTestMode(WithWebMode(WithIdleMode(TaskAgent as unknown as new (...args: any[]) => TaskAgent)));
const CaptainBase = WithHealMode(WithTestMode(WithWebMode(WithIdleMode(TaskAgent as unknown as new (...args: any[]) => TaskAgent))));

export class Captain extends CaptainBase implements Agent {
protected readonly ACTION_TOOLS = ['click', 'pressKey', 'form', 'navigate'];
Expand Down Expand Up @@ -492,6 +495,104 @@ export class Captain extends CaptainBase implements Agent {

return null;
}

async heal(): Promise<void> {
const ctx: ModeContext = { explorBot: this.explorBot, task: new Task('heal', '') };
let isDone = false;
const onDone = () => {
isDone = true;
};
const tools = { ...this.coreTools(ctx.task, onDone), ...this.healModeTools(ctx) };

const provider = this.explorBot.getProvider();
const conversation = provider.startConversation(
dedent`
<role>You are Captain in heal mode — diagnosing a cluster of failures during a long-running session.</role>
${this.healModePrompt()}
<rules>
${this.healModeRules()}
</rules>
`,
'captain',
provider.getAgenticModel('captain')
);
conversation.addUserText(this.buildHealContext());

await loop(
async ({ stop, iteration }) => {
debugLog(`Captain heal iteration ${iteration}`);
if (isDone || Stats.haltSession) {
stop();
return;
}

const result = await provider.invokeConversation(conversation, tools, {
maxToolRoundtrips: 3,
toolChoice: 'auto',
});

if (!result) {
stop();
return;
}

this.trackToolExecutions(result?.toolExecutions || []);

if (isDone || Stats.haltSession) stop();
},
{
maxAttempts: HEAL_MAX_STEPS,
interruptible: false,
observability: { agent: 'captain', name: 'captain.heal' },
catch: async ({ error, stop }) => {
tag('error').log(`Captain heal error: ${error.message}`);
stop();
},
}
);
}

private buildHealContext(): string {
const stateManager = this.explorBot.getExplorer().getStateManager();
const state = stateManager.getCurrentState();
const now = Date.now();
const errorLines = Stats.recentErrors.map((e) => {
const ageSec = Math.round((now - e.at) / 1000);
return `- (${ageSec}s ago) ${e.message.substring(0, 200)}`;
});
const page = this.explorBot.getExplorer().playwrightHelper?.page;
const pageAlive = !!page && !page.isClosed?.();

const plan = this.explorBot.getCurrentPlan();
const pending = plan?.getPendingTests().length ?? 0;
const completed = plan ? plan.tests.filter((t) => t.hasFinished).length : 0;

const previous = Stats.lastHealReason ? `Previous heal verdict: ${Stats.lastHealReason}` : 'No previous heal verdict.';

return dedent`
<recent_errors>
Consecutive failures: ${Stats.consecutiveFailures}
${errorLines.join('\n') || '(empty)'}
</recent_errors>

<browser_state>
URL: ${state?.url || '(none)'}
Title: ${state?.title || '(none)'}
Page alive: ${pageAlive}
</browser_state>

<plan_progress>
Completed: ${completed}
Pending: ${pending}
</plan_progress>

<history>
${previous}
</history>

Diagnose and pick ONE recovery action (or halt). End with done(summary).
`;
}
}

export default Captain;
Expand Down
86 changes: 86 additions & 0 deletions src/ai/captain/heal-mode.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
import { tool } from 'ai';
import dedent from 'dedent';
import { z } from 'zod';
import { Stats } from '../../stats.ts';
import { tag } from '../../utils/logger.js';
import { type Constructor, type ModeContext } from './mixin.ts';
import { type WebModeMethods } from './web-mode.ts';

export function WithHealMode<T extends Constructor<WebModeMethods>>(Base: T) {
return class extends Base {
healModeTools(ctx: ModeContext): Record<string, any> {
const webTools = this.webModeTools(ctx);

return {
...webTools,
wait: tool({
description: 'Pause for transient conditions (rate limit, slow backend, blank page that is still loading). Max 120s.',
inputSchema: z.object({
seconds: z.number().int().min(5).max(120),
reason: z.string(),
}),
execute: async ({ seconds, reason }) => {
tag('info').log(`Heal: waiting ${seconds}s — ${reason}`);
await new Promise((r) => setTimeout(r, seconds * 1000));
return { waited: seconds, reason };
},
}),
restartBrowser: tool({
description: 'Full browser restart — stops Playwright, clears recorder state, reconnects. Use when reload/navigate cannot recover (target closed, frame detached, persistent context error).',
inputSchema: z.object({ reason: z.string() }),
execute: async ({ reason }) => {
tag('info').log(`Heal: restartBrowser — ${reason}`);
await ctx.explorBot.getExplorer().restartBrowser();
return { restarted: true, reason };
},
}),
halt: tool({
description: 'Systematic failure — remaining tests cannot succeed (server down, model unavailable, same error repeating across unrelated scenarios). Stops the session; remaining tests are skipped with this reason. Use this aggressively when you see the same root cause repeat.',
inputSchema: z.object({ reason: z.string() }),
execute: async ({ reason }) => {
Stats.haltSession = reason;
Stats.lastHealReason = reason;
tag('error').log(`Heal: halt — ${reason}`);
return { halted: true, reason };
},
}),
};
}

healModePrompt(): string {
return dedent`
<heal_capabilities>
You are diagnosing a cluster of failures during a long-running session.
You have the recent error history, current browser state, and plan progress.

Recovery escalates from cheap to expensive — try the lighter option first:
- probe the page with browser(evaluate) or codeceptjs see/context
- reload via browser(reload)
- ask Navigator to re-resolve expected state via navigate(destination)
- restartBrowser as a last resort

If errors look transient and time-only (rate limit, 429, blank page), pick wait.
If the same error keeps repeating across unrelated scenarios, pick halt — remaining work will fail the same way.
End with done(summary).
</heal_capabilities>
`;
}

healModeRules(): string {
return dedent`
- prefer halt when same root cause repeats across 2+ different scenarios
- prefer restartBrowser when error mentions Target closed / Frame was detached / context closed
- prefer navigate over browser(reload) when current URL differs from expected — session likely dropped
- prefer wait only for rate-limit / 429 / blank-page / "navigating and changing" patterns
- probe BEFORE acting: browser(evaluate, "window.location.href") or see() is cheap and informs the choice
- call done() once a recovery action has been taken or a halt verdict issued
`;
}
};
}

export interface HealModeMethods {
healModeTools(ctx: ModeContext): Record<string, any>;
healModePrompt(): string;
healModeRules(): string;
}
2 changes: 1 addition & 1 deletion src/ai/captain/mixin.ts
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ export type Constructor<T = object> = new (...args: any[]) => T;

export const debugLog = createDebug('explorbot:captain');

export type CaptainMode = 'idle' | 'web' | 'test';
export type CaptainMode = 'idle' | 'web' | 'test' | 'heal';

export interface ModeContext {
explorBot: ExplorBot;
Expand Down
1 change: 0 additions & 1 deletion src/ai/session-analyst.ts
Original file line number Diff line number Diff line change
Expand Up @@ -117,7 +117,6 @@ export class SessionAnalyst implements Agent {
private serializeTest(test: Test, ref: number): string {
const log = test
.getLog()
.slice(-30)
.map((entry) => ` - [${entry.type}] ${entry.content}`)
.join('\n');

Expand Down
17 changes: 17 additions & 0 deletions src/commands/base-command.ts
Original file line number Diff line number Diff line change
@@ -1,10 +1,14 @@
import chalk from 'chalk';
import { Command } from 'commander';
import { isInteractive } from '../ai/task-agent.js';
import { ConfigParser } from '../config.js';
import type { ExplorBot } from '../explorbot.js';
import { Stats } from '../stats.js';
import { getCliName } from '../utils/cli-name.js';
import { tag } from '../utils/logger.js';

const DEFAULT_HEAL_ATTEMPTS = 2;

export interface CommandOption {
flags: string;
description: string;
Expand All @@ -31,6 +35,19 @@ export abstract class BaseCommand {

abstract execute(args: string): Promise<void>;

protected healByCaptain = async (err: unknown): Promise<void> => {
const msg = err instanceof Error ? err.message : String(err);
tag('warning').log(`Failure observed: ${msg}`);
Stats.recordError(msg);
if (Stats.haltSession) return;
const attempts = ConfigParser.getInstance().getConfig().healAttempts ?? DEFAULT_HEAL_ATTEMPTS;
if (!attempts) return;
if (Stats.consecutiveFailures < attempts) return;
tag('warning').log(`Captain heal mode: ${Stats.consecutiveFailures} consecutive failures — investigating`);
await this.explorBot.agentCaptain().heal();
Stats.consecutiveFailures = 0;
};

matches(commandName: string): boolean {
return this.name === commandName || this.aliases.includes(commandName);
}
Expand Down
34 changes: 32 additions & 2 deletions src/commands/explore-command.ts
Original file line number Diff line number Diff line change
Expand Up @@ -59,13 +59,14 @@ export class ExploreCommand extends BaseCommand {
await this.runFreshMode(mainUrl, feature, cfg.styles);
}

this.skipRemainingTestsOnHalt();
const mainPlan = this.completedPlans[0];
if (mainPlan) this.explorBot.setCurrentPlan(mainPlan);
if (this.dryRun) {
this.printResults();
return;
}
if (mainUrl) await this.explorBot.visit(mainUrl);
if (mainUrl && !Stats.haltSession) await this.explorBot.visit(mainUrl);
const savedPath = this.explorBot.savePlans(this.completedPlans);
this.printResults();
await this.explorBot.printSessionAnalysis();
Expand Down Expand Up @@ -290,6 +291,7 @@ export class ExploreCommand extends BaseCommand {
await this.explorBot.plan(feature, opts);
if (this.explorBot.lastPlanError) {
tag('warning').log(`Planning style '${opts.style}' failed after retry, skipping`);
await this.healByCaptain(this.explorBot.lastPlanError);
return;
}
}
Expand Down Expand Up @@ -474,6 +476,7 @@ export class ExploreCommand extends BaseCommand {
}

private isLimitReached(): boolean {
if (Stats.haltSession) return true;
return this.maxTests != null && this.testsRun >= this.maxTests;
}

Expand All @@ -495,11 +498,38 @@ export class ExploreCommand extends BaseCommand {
if (this.dryRun) {
test.start();
test.finish(TestResult.SKIPPED);
} else {
this.testsRun++;
return;
}
try {
await this.explorBot.agentTester().test(test);
if (test.isSuccessful) Stats.recordSuccess();
} catch (err) {
const msg = err instanceof Error ? err.message : String(err);
tag('warning').log(`Test failed: ${test.scenario} — ${msg}`);
if (!test.hasFinished) {
test.addNote(`Aborted: ${msg}`, TestResult.FAILED);
test.finish(TestResult.FAILED);
}
await this.healByCaptain(err);
}
this.testsRun++;
}

private skipRemainingTestsOnHalt(): void {
if (!Stats.haltSession) return;
const reason = Stats.haltSession;
let skipped = 0;
for (const plan of this.completedPlans) {
for (const t of plan.getPendingTests()) {
t.start();
t.addNote(`Session halted: ${reason}`, TestResult.SKIPPED);
t.finish(TestResult.SKIPPED);
skipped++;
}
}
if (skipped > 0) tag('error').log(`Session halted: ${reason} — ${skipped} test(s) skipped`);
}
}

interface ConfigureSpec {
Expand Down
Loading