From ff97e0572d007d5dfe250ba739d8e234eb905d11 Mon Sep 17 00:00:00 2001 From: DavertMik Date: Tue, 12 May 2026 23:49:57 +0300 Subject: [PATCH 1/2] session improvements --- src/action.ts | 18 ++++++++++++------ src/ai/session-analyst.ts | 1 - src/commands/explore-command.ts | 12 +++++++++++- src/explorbot.ts | 2 +- src/explorer.ts | 2 +- 5 files changed, 25 insertions(+), 10 deletions(-) diff --git a/src/action.ts b/src/action.ts index d273472..db1c2e6 100644 --- a/src/action.ts +++ b/src/action.ts @@ -22,6 +22,7 @@ import type { StateManager } from './state-manager.js'; import { extractCodeBlocks } from './utils/code-extractor.js'; import { htmlCombinedSnapshot, minifyHtml } from './utils/html.js'; import { createDebug, log, setStepSpanParent, tag } from './utils/logger.js'; +import { withRetry } from './utils/retry.js'; import { safeFilename } from './utils/strings.ts'; import { throttle } from './utils/throttle.ts'; @@ -79,12 +80,17 @@ class Action { const page = this.playwrightHelper.page; const frame = this.playwrightHelper.frame; await page?.waitForLoadState('domcontentloaded', { timeout: 10000 })?.catch(() => {}); - const grabAll = () => Promise.all([(this.actor as any).grabSource(), (this.actor as any).grabTitle(), this.captureBrowserLogs()]); - const [html, title, browserLogs] = await grabAll().catch(async (err: Error) => { - const msg = err instanceof Error ? err.message : String(err); - if (!/navigating and changing the content/i.test(msg)) throw err; - await page?.waitForLoadState('domcontentloaded', { timeout: 10000 })?.catch(() => {}); - return grabAll(); + const grabAll = async () => { + try { + return await Promise.all([(this.actor as any).grabSource(), (this.actor as any).grabTitle(), this.captureBrowserLogs()]); + } catch (err) { + await recorder.reset(); + await recorder.start(); + throw err; + } + }; + const [html, title, browserLogs] = await withRetry(grabAll, { + retryCondition: (err) => /navigating and changing the content/i.test(err.message), }); const url = page?.url() || (await (this.actor as any).grabCurrentUrl?.()); diff --git a/src/ai/session-analyst.ts b/src/ai/session-analyst.ts index c87dc03..f1ac5a5 100644 --- a/src/ai/session-analyst.ts +++ b/src/ai/session-analyst.ts @@ -117,7 +117,6 @@ export class SessionAnalyst implements Agent { private serializeTest(test: Test, ref: number): string { const log = test .getLog() - .slice(-30) .map((entry) => ` - [${entry.type}] ${entry.content}`) .join('\n'); diff --git a/src/commands/explore-command.ts b/src/commands/explore-command.ts index 2c5b81b..20ed1ae 100644 --- a/src/commands/explore-command.ts +++ b/src/commands/explore-command.ts @@ -495,8 +495,18 @@ export class ExploreCommand extends BaseCommand { if (this.dryRun) { test.start(); test.finish(TestResult.SKIPPED); - } else { + this.testsRun++; + return; + } + try { await this.explorBot.agentTester().test(test); + } catch (err) { + const msg = err instanceof Error ? err.message : String(err); + tag('warning').log(`Test failed: ${test.scenario} — ${msg}`); + if (!test.hasFinished) { + test.addNote(`Aborted: ${msg}`, TestResult.FAILED); + test.finish(TestResult.FAILED); + } } this.testsRun++; } diff --git a/src/explorbot.ts b/src/explorbot.ts index e377b5b..e4fa6a2 100644 --- a/src/explorbot.ts +++ b/src/explorbot.ts @@ -27,10 +27,10 @@ import { KnowledgeTracker } from './knowledge-tracker.ts'; import { WebPageState } from './state-manager.ts'; import type { Suite } from './suite.ts'; import { Plan, type Test } from './test-plan.ts'; -import { parsePlansFromMarkdown } from './utils/test-plan-markdown.ts'; import { setVerboseMode, tag } from './utils/logger.ts'; import { relativeToCwd } from './utils/next-steps.ts'; import { sanitizeFilename } from './utils/strings.ts'; +import { parsePlansFromMarkdown } from './utils/test-plan-markdown.ts'; export interface ExplorBotOptions { from?: string; diff --git a/src/explorer.ts b/src/explorer.ts index a8ef563..e4ac5de 100644 --- a/src/explorer.ts +++ b/src/explorer.ts @@ -39,7 +39,7 @@ declare namespace CodeceptJS { const debugLog = createDebug('explorbot:explorer'); const FATAL_BROWSER_ERRORS = /Frame was detached|Target closed|Execution context was destroyed|Protocol error|Session closed/i; -const RECOVERABLE_NAVIGATION_ERRORS = /net::ERR_ABORTED|page\.screenshot.*Timeout|waiting for fonts to load/i; +const RECOVERABLE_NAVIGATION_ERRORS = /net::ERR_ABORTED|page\.screenshot.*Timeout|waiting for fonts to load|Unable to retrieve content because the page is navigating/i; interface TabInfo { url: string; From 26e5bea435503e410dd0358d0973b65ff25a12c8 Mon Sep 17 00:00:00 2001 From: DavertMik Date: Thu, 14 May 2026 22:23:36 +0300 Subject: [PATCH 2/2] added healing mode to captain --- src/ai/captain.ts | 103 ++++++++++++++++++++++++++++++- src/ai/captain/heal-mode.ts | 86 ++++++++++++++++++++++++++ src/ai/captain/mixin.ts | 2 +- src/commands/base-command.ts | 17 +++++ src/commands/explore-command.ts | 22 ++++++- src/commands/freesail-command.ts | 10 ++- src/commands/init-command.ts | 4 ++ src/config.ts | 1 + src/explorer.ts | 12 ++++ src/stats.ts | 21 +++++++ 10 files changed, 272 insertions(+), 6 deletions(-) create mode 100644 src/ai/captain/heal-mode.ts diff --git a/src/ai/captain.ts b/src/ai/captain.ts index ddb5548..0a94001 100644 --- a/src/ai/captain.ts +++ b/src/ai/captain.ts @@ -10,7 +10,9 @@ import { HooksRunner } from '../utils/hooks-runner.ts'; import { startLogCapture, stopLogCapture, tag } from '../utils/logger.js'; import { loop } from '../utils/loop.js'; import { truncateJson } from '../utils/strings.ts'; +import { Stats } from '../stats.ts'; import type { Agent } from './agent.js'; +import { WithHealMode } from './captain/heal-mode.ts'; import { WithIdleMode } from './captain/idle-mode.ts'; import { type CaptainMode, type ModeContext, debugLog } from './captain/mixin.ts'; import { WithTestMode } from './captain/test-mode.ts'; @@ -22,8 +24,9 @@ import { Researcher } from './researcher.ts'; import { TaskAgent } from './task-agent.ts'; const MAX_STEPS = 15; +const HEAL_MAX_STEPS = 5; -const CaptainBase = WithTestMode(WithWebMode(WithIdleMode(TaskAgent as unknown as new (...args: any[]) => TaskAgent))); +const CaptainBase = WithHealMode(WithTestMode(WithWebMode(WithIdleMode(TaskAgent as unknown as new (...args: any[]) => TaskAgent)))); export class Captain extends CaptainBase implements Agent { protected readonly ACTION_TOOLS = ['click', 'pressKey', 'form', 'navigate']; @@ -492,6 +495,104 @@ export class Captain extends CaptainBase implements Agent { return null; } + + async heal(): Promise { + const ctx: ModeContext = { explorBot: this.explorBot, task: new Task('heal', '') }; + let isDone = false; + const onDone = () => { + isDone = true; + }; + const tools = { ...this.coreTools(ctx.task, onDone), ...this.healModeTools(ctx) }; + + const provider = this.explorBot.getProvider(); + const conversation = provider.startConversation( + dedent` + You are Captain in heal mode — diagnosing a cluster of failures during a long-running session. + ${this.healModePrompt()} + + ${this.healModeRules()} + + `, + 'captain', + provider.getAgenticModel('captain') + ); + conversation.addUserText(this.buildHealContext()); + + await loop( + async ({ stop, iteration }) => { + debugLog(`Captain heal iteration ${iteration}`); + if (isDone || Stats.haltSession) { + stop(); + return; + } + + const result = await provider.invokeConversation(conversation, tools, { + maxToolRoundtrips: 3, + toolChoice: 'auto', + }); + + if (!result) { + stop(); + return; + } + + this.trackToolExecutions(result?.toolExecutions || []); + + if (isDone || Stats.haltSession) stop(); + }, + { + maxAttempts: HEAL_MAX_STEPS, + interruptible: false, + observability: { agent: 'captain', name: 'captain.heal' }, + catch: async ({ error, stop }) => { + tag('error').log(`Captain heal error: ${error.message}`); + stop(); + }, + } + ); + } + + private buildHealContext(): string { + const stateManager = this.explorBot.getExplorer().getStateManager(); + const state = stateManager.getCurrentState(); + const now = Date.now(); + const errorLines = Stats.recentErrors.map((e) => { + const ageSec = Math.round((now - e.at) / 1000); + return `- (${ageSec}s ago) ${e.message.substring(0, 200)}`; + }); + const page = this.explorBot.getExplorer().playwrightHelper?.page; + const pageAlive = !!page && !page.isClosed?.(); + + const plan = this.explorBot.getCurrentPlan(); + const pending = plan?.getPendingTests().length ?? 0; + const completed = plan ? plan.tests.filter((t) => t.hasFinished).length : 0; + + const previous = Stats.lastHealReason ? `Previous heal verdict: ${Stats.lastHealReason}` : 'No previous heal verdict.'; + + return dedent` + + Consecutive failures: ${Stats.consecutiveFailures} + ${errorLines.join('\n') || '(empty)'} + + + + URL: ${state?.url || '(none)'} + Title: ${state?.title || '(none)'} + Page alive: ${pageAlive} + + + + Completed: ${completed} + Pending: ${pending} + + + + ${previous} + + + Diagnose and pick ONE recovery action (or halt). End with done(summary). + `; + } } export default Captain; diff --git a/src/ai/captain/heal-mode.ts b/src/ai/captain/heal-mode.ts new file mode 100644 index 0000000..d4f26fb --- /dev/null +++ b/src/ai/captain/heal-mode.ts @@ -0,0 +1,86 @@ +import { tool } from 'ai'; +import dedent from 'dedent'; +import { z } from 'zod'; +import { Stats } from '../../stats.ts'; +import { tag } from '../../utils/logger.js'; +import { type Constructor, type ModeContext } from './mixin.ts'; +import { type WebModeMethods } from './web-mode.ts'; + +export function WithHealMode>(Base: T) { + return class extends Base { + healModeTools(ctx: ModeContext): Record { + const webTools = this.webModeTools(ctx); + + return { + ...webTools, + wait: tool({ + description: 'Pause for transient conditions (rate limit, slow backend, blank page that is still loading). Max 120s.', + inputSchema: z.object({ + seconds: z.number().int().min(5).max(120), + reason: z.string(), + }), + execute: async ({ seconds, reason }) => { + tag('info').log(`Heal: waiting ${seconds}s — ${reason}`); + await new Promise((r) => setTimeout(r, seconds * 1000)); + return { waited: seconds, reason }; + }, + }), + restartBrowser: tool({ + description: 'Full browser restart — stops Playwright, clears recorder state, reconnects. Use when reload/navigate cannot recover (target closed, frame detached, persistent context error).', + inputSchema: z.object({ reason: z.string() }), + execute: async ({ reason }) => { + tag('info').log(`Heal: restartBrowser — ${reason}`); + await ctx.explorBot.getExplorer().restartBrowser(); + return { restarted: true, reason }; + }, + }), + halt: tool({ + description: 'Systematic failure — remaining tests cannot succeed (server down, model unavailable, same error repeating across unrelated scenarios). Stops the session; remaining tests are skipped with this reason. Use this aggressively when you see the same root cause repeat.', + inputSchema: z.object({ reason: z.string() }), + execute: async ({ reason }) => { + Stats.haltSession = reason; + Stats.lastHealReason = reason; + tag('error').log(`Heal: halt — ${reason}`); + return { halted: true, reason }; + }, + }), + }; + } + + healModePrompt(): string { + return dedent` + + You are diagnosing a cluster of failures during a long-running session. + You have the recent error history, current browser state, and plan progress. + + Recovery escalates from cheap to expensive — try the lighter option first: + - probe the page with browser(evaluate) or codeceptjs see/context + - reload via browser(reload) + - ask Navigator to re-resolve expected state via navigate(destination) + - restartBrowser as a last resort + + If errors look transient and time-only (rate limit, 429, blank page), pick wait. + If the same error keeps repeating across unrelated scenarios, pick halt — remaining work will fail the same way. + End with done(summary). + + `; + } + + healModeRules(): string { + return dedent` + - prefer halt when same root cause repeats across 2+ different scenarios + - prefer restartBrowser when error mentions Target closed / Frame was detached / context closed + - prefer navigate over browser(reload) when current URL differs from expected — session likely dropped + - prefer wait only for rate-limit / 429 / blank-page / "navigating and changing" patterns + - probe BEFORE acting: browser(evaluate, "window.location.href") or see() is cheap and informs the choice + - call done() once a recovery action has been taken or a halt verdict issued + `; + } + }; +} + +export interface HealModeMethods { + healModeTools(ctx: ModeContext): Record; + healModePrompt(): string; + healModeRules(): string; +} diff --git a/src/ai/captain/mixin.ts b/src/ai/captain/mixin.ts index a86afa9..342c12f 100644 --- a/src/ai/captain/mixin.ts +++ b/src/ai/captain/mixin.ts @@ -8,7 +8,7 @@ export type Constructor = new (...args: any[]) => T; export const debugLog = createDebug('explorbot:captain'); -export type CaptainMode = 'idle' | 'web' | 'test'; +export type CaptainMode = 'idle' | 'web' | 'test' | 'heal'; export interface ModeContext { explorBot: ExplorBot; diff --git a/src/commands/base-command.ts b/src/commands/base-command.ts index 6610249..8e6002c 100644 --- a/src/commands/base-command.ts +++ b/src/commands/base-command.ts @@ -1,10 +1,14 @@ import chalk from 'chalk'; import { Command } from 'commander'; import { isInteractive } from '../ai/task-agent.js'; +import { ConfigParser } from '../config.js'; import type { ExplorBot } from '../explorbot.js'; +import { Stats } from '../stats.js'; import { getCliName } from '../utils/cli-name.js'; import { tag } from '../utils/logger.js'; +const DEFAULT_HEAL_ATTEMPTS = 2; + export interface CommandOption { flags: string; description: string; @@ -31,6 +35,19 @@ export abstract class BaseCommand { abstract execute(args: string): Promise; + protected healByCaptain = async (err: unknown): Promise => { + const msg = err instanceof Error ? err.message : String(err); + tag('warning').log(`Failure observed: ${msg}`); + Stats.recordError(msg); + if (Stats.haltSession) return; + const attempts = ConfigParser.getInstance().getConfig().healAttempts ?? DEFAULT_HEAL_ATTEMPTS; + if (!attempts) return; + if (Stats.consecutiveFailures < attempts) return; + tag('warning').log(`Captain heal mode: ${Stats.consecutiveFailures} consecutive failures — investigating`); + await this.explorBot.agentCaptain().heal(); + Stats.consecutiveFailures = 0; + }; + matches(commandName: string): boolean { return this.name === commandName || this.aliases.includes(commandName); } diff --git a/src/commands/explore-command.ts b/src/commands/explore-command.ts index 20ed1ae..cfe0957 100644 --- a/src/commands/explore-command.ts +++ b/src/commands/explore-command.ts @@ -59,13 +59,14 @@ export class ExploreCommand extends BaseCommand { await this.runFreshMode(mainUrl, feature, cfg.styles); } + this.skipRemainingTestsOnHalt(); const mainPlan = this.completedPlans[0]; if (mainPlan) this.explorBot.setCurrentPlan(mainPlan); if (this.dryRun) { this.printResults(); return; } - if (mainUrl) await this.explorBot.visit(mainUrl); + if (mainUrl && !Stats.haltSession) await this.explorBot.visit(mainUrl); const savedPath = this.explorBot.savePlans(this.completedPlans); this.printResults(); await this.explorBot.printSessionAnalysis(); @@ -290,6 +291,7 @@ export class ExploreCommand extends BaseCommand { await this.explorBot.plan(feature, opts); if (this.explorBot.lastPlanError) { tag('warning').log(`Planning style '${opts.style}' failed after retry, skipping`); + await this.healByCaptain(this.explorBot.lastPlanError); return; } } @@ -474,6 +476,7 @@ export class ExploreCommand extends BaseCommand { } private isLimitReached(): boolean { + if (Stats.haltSession) return true; return this.maxTests != null && this.testsRun >= this.maxTests; } @@ -500,6 +503,7 @@ export class ExploreCommand extends BaseCommand { } try { await this.explorBot.agentTester().test(test); + if (test.isSuccessful) Stats.recordSuccess(); } catch (err) { const msg = err instanceof Error ? err.message : String(err); tag('warning').log(`Test failed: ${test.scenario} — ${msg}`); @@ -507,9 +511,25 @@ export class ExploreCommand extends BaseCommand { test.addNote(`Aborted: ${msg}`, TestResult.FAILED); test.finish(TestResult.FAILED); } + await this.healByCaptain(err); } this.testsRun++; } + + private skipRemainingTestsOnHalt(): void { + if (!Stats.haltSession) return; + const reason = Stats.haltSession; + let skipped = 0; + for (const plan of this.completedPlans) { + for (const t of plan.getPendingTests()) { + t.start(); + t.addNote(`Session halted: ${reason}`, TestResult.SKIPPED); + t.finish(TestResult.SKIPPED); + skipped++; + } + } + if (skipped > 0) tag('error').log(`Session halted: ${reason} — ${skipped} test(s) skipped`); + } } interface ConfigureSpec { diff --git a/src/commands/freesail-command.ts b/src/commands/freesail-command.ts index 8a64f44..1cfe14d 100644 --- a/src/commands/freesail-command.ts +++ b/src/commands/freesail-command.ts @@ -33,13 +33,15 @@ export class FreesailCommand extends BaseCommand { await loop( async (ctx) => { + if (Stats.haltSession) ctx.stop(); if (maxTests != null && testsRun >= maxTests) ctx.stop(); const stateManager = this.explorBot.getExplorer().getStateManager(); const state = stateManager.getCurrentState(); if (state && !Researcher.getCachedResearch(state)) { - await this.explorBot.agentResearcher().research(state, { deep: true, screenshot: true }); + await this.explorBot.agentResearcher().research(state, { deep: true, screenshot: true }).catch(this.healByCaptain); + if (Stats.haltSession) ctx.stop(); } const cachedPlan = state?.url ? Planner.getCachedPlan(state.url) : null; @@ -54,11 +56,13 @@ export class FreesailCommand extends BaseCommand { if (plan) testsRun += plan.tests.filter((t) => t.hasFinished).length; } + if (Stats.haltSession) ctx.stop(); if (maxTests != null && testsRun >= maxTests) ctx.stop(); const navigator = this.explorBot.agentNavigator(); const visitedUrls = stateManager.getAllVisitedUrls(); - const suggestion = await navigator.freeSail({ strategy, scope, visitedUrls }); + const suggestion = await navigator.freeSail({ strategy, scope, visitedUrls }).catch(this.healByCaptain); + if (Stats.haltSession) ctx.stop(); if (!suggestion) { tag('info').log('No navigation suggestion available'); return; @@ -71,7 +75,7 @@ export class FreesailCommand extends BaseCommand { tag('info').log(`Navigating to: ${suggestion.target} - ${suggestion.reason}`); await this.explorBot.openFreshTab(); - await this.explorBot.visit(suggestion.target); + await this.explorBot.visit(suggestion.target).catch(this.healByCaptain); this.explorBot.clearPlan(); }, { maxAttempts: Number.POSITIVE_INFINITY } diff --git a/src/commands/init-command.ts b/src/commands/init-command.ts index 14d09ad..213ec3f 100644 --- a/src/commands/init-command.ts +++ b/src/commands/init-command.ts @@ -30,6 +30,10 @@ const config = { // agentic model for decision making agenticModel: openrouter('minimax/minimax-m2.5:nitro'), }, + + // Captain heal mode: consecutive failures before Captain investigates and decides + // to recover / wait / halt the session. Set 0 or false to disable. + healAttempts: 2, }; export default config; diff --git a/src/config.ts b/src/config.ts index 105677e..2116163 100644 --- a/src/config.ts +++ b/src/config.ts @@ -218,6 +218,7 @@ interface ExplorbotConfig { stepsFile?: string; files?: Record; dynamicPageRegex?: string; + healAttempts?: number | false; } const config: ExplorbotConfig = { diff --git a/src/explorer.ts b/src/explorer.ts index 2a09219..08a7777 100644 --- a/src/explorer.ts +++ b/src/explorer.ts @@ -391,6 +391,18 @@ class Explorer { return FATAL_BROWSER_ERRORS.test(msg); } + async restartBrowser(): Promise { + tag('warning').log('Captain heal: restarting browser'); + try { + await this.playwrightHelper._stopBrowser(); + } catch (err) { + debugLog('restartBrowser: stop failed', err); + } + await codeceptjs.recorder.reset(); + await codeceptjs.recorder.start(); + await this.connectOrLaunchBrowser(); + } + async recoverFromBrowserError(): Promise { try { const url = this.stateManager.getCurrentState()?.url; diff --git a/src/stats.ts b/src/stats.ts index c852dfe..ded27bb 100644 --- a/src/stats.ts +++ b/src/stats.ts @@ -7,8 +7,15 @@ interface TokenUsage { cached?: number; } +interface ErrorRecord { + message: string; + at: number; +} + export type ExplorbotMode = 'explore' | 'test' | 'freesail' | 'tui'; +const MAX_RECENT_ERRORS = 8; + export class Stats { static startTime = Date.now(); static sessionName = uniqExplorationName(); @@ -18,6 +25,20 @@ export class Stats { static mode?: ExplorbotMode; static focus?: string; static models: Record = {}; + static recentErrors: ErrorRecord[] = []; + static consecutiveFailures = 0; + static haltSession: string | null = null; + static lastHealReason: string | null = null; + + static recordError(message: string): void { + Stats.recentErrors.push({ message, at: Date.now() }); + if (Stats.recentErrors.length > MAX_RECENT_ERRORS) Stats.recentErrors.shift(); + Stats.consecutiveFailures++; + } + + static recordSuccess(): void { + Stats.consecutiveFailures = 0; + } static recordTokens(_agent: string, model: string, usage: TokenUsage): void { if (!Stats.models[model]) {