diff --git a/PolyPilot.IntegrationTests/OrchestrationRelaunchTests.cs b/PolyPilot.IntegrationTests/OrchestrationRelaunchTests.cs
new file mode 100644
index 000000000..35ed47112
--- /dev/null
+++ b/PolyPilot.IntegrationTests/OrchestrationRelaunchTests.cs
@@ -0,0 +1,57 @@
+using PolyPilot.IntegrationTests.Fixtures;
+
+namespace PolyPilot.IntegrationTests;
+
+///
+/// Integration tests for orchestration relaunch resilience (issue #400).
+/// Verifies the multi-agent group UI and orchestration resume indicators
+/// are accessible through the live Blazor UI via DevFlow CDP.
+///
+[Collection("PolyPilot")]
+[Trait("Category", "OrchestrationRelaunch")]
+public class OrchestrationRelaunchTests : IntegrationTestBase
+{
+ public OrchestrationRelaunchTests(AppFixture app, ITestOutputHelper output)
+ : base(app, output) { }
+
+ [Fact]
+ public async Task Dashboard_MultiAgentGroupsRenderable()
+ {
+ await WaitForCdpReadyAsync();
+ // The dashboard should be able to render multi-agent groups.
+ // This doesn't require an active orchestration — just verifies the
+ // rendering pipeline handles the group list without errors.
+ var dashboardExists = await ExistsAsync("#dashboard");
+ Assert.True(dashboardExists, "Dashboard should load successfully — multi-agent group rendering must not crash");
+ await ScreenshotAsync("dashboard-multiagent-groups");
+ }
+
+ [Fact]
+ public async Task Dashboard_OrchestratorPhaseIndicatorRenderable()
+ {
+ await WaitForCdpReadyAsync();
+ // Verify the dashboard component that shows orchestrator phase
+ // (Planning → Dispatching → WaitingForWorkers → Synthesizing → Complete → Resuming)
+ // doesn't crash even with no active orchestration.
+ var dashboardExists = await ExistsAsync("#dashboard");
+ Assert.True(dashboardExists,
+ "Dashboard should load without errors — phase indicator rendering must not crash when no orchestration is active");
+ await ScreenshotAsync("dashboard-no-active-orchestration");
+ }
+
+ [Fact]
+ public async Task Settings_HasConnectionModeForReconnect()
+ {
+ await WaitForCdpReadyAsync();
+ var navigated = await NavigateToAsync("Settings", "#settings-page");
+ if (navigated)
+ {
+ await ScreenshotAsync("settings-reconnect-mode");
+ // Settings page should have reconnect capability — needed for
+ // orchestration resume after relaunch
+ var content = await GetTextAsync("#settings-page");
+ Assert.False(string.IsNullOrWhiteSpace(content),
+ "Settings page should have visible content for connection mode configuration");
+ }
+ }
+}
diff --git a/PolyPilot.Tests/OrchestrationRelaunchTests.cs b/PolyPilot.Tests/OrchestrationRelaunchTests.cs
new file mode 100644
index 000000000..1720cd237
--- /dev/null
+++ b/PolyPilot.Tests/OrchestrationRelaunchTests.cs
@@ -0,0 +1,318 @@
+using System.Reflection;
+using System.Text.Json;
+using Microsoft.Extensions.DependencyInjection;
+using PolyPilot.Models;
+using PolyPilot.Services;
+
+namespace PolyPilot.Tests;
+
+///
+/// Behavioral tests for the orchestration relaunch fix (issue #400).
+/// Verifies that MonitorAndSynthesizeAsync correctly detects active workers
+/// that are lazy placeholders (Session == null) after app relaunch, instead of
+/// prematurely considering them idle due to the InvokeOnUI race condition.
+///
+[Collection("BaseDir")]
+public class OrchestrationRelaunchTests
+{
+ private readonly StubChatDatabase _chatDb = new();
+ private readonly StubServerManager _serverManager = new();
+ private readonly StubWsBridgeClient _bridgeClient = new();
+ private readonly StubDemoService _demoService = new();
+ private readonly IServiceProvider _serviceProvider;
+
+ private static readonly BindingFlags NonPublic = BindingFlags.NonPublic | BindingFlags.Instance;
+ private static readonly BindingFlags NonPublicStatic = BindingFlags.NonPublic | BindingFlags.Static;
+ private static readonly BindingFlags AnyInstance = BindingFlags.Public | BindingFlags.NonPublic | BindingFlags.Instance;
+
+ public OrchestrationRelaunchTests()
+ {
+ _serviceProvider = new ServiceCollection().BuildServiceProvider();
+ }
+
+ private CopilotService CreateService() =>
+ new CopilotService(_chatDb, _serverManager, _bridgeClient, new RepoManager(), _serviceProvider, _demoService);
+
+ #region Helpers
+
+ /// Get the SessionStatePath used by CopilotService (redirected to test temp dir).
+ private static string GetSessionStatePath()
+ {
+ var prop = typeof(CopilotService).GetProperty("SessionStatePath", NonPublicStatic)!;
+ return (string)prop.GetValue(null)!;
+ }
+
+ /// Get the SessionState type (private nested class).
+ private static Type GetSessionStateType()
+ {
+ return typeof(CopilotService).GetNestedType("SessionState", BindingFlags.NonPublic)!;
+ }
+
+ /// Create a SessionState with the given AgentSessionInfo via reflection.
+ /// GetUninitializedObject bypasses constructors, so Session is null (lazy placeholder).
+ /// We manually initialize fields that would be set by field initializers.
+ private static object CreateSessionState(AgentSessionInfo info)
+ {
+ var stateType = GetSessionStateType();
+ var state = System.Runtime.CompilerServices.RuntimeHelpers.GetUninitializedObject(stateType);
+ stateType.GetProperty("Info")!.SetValue(state, info);
+
+ // Initialize readonly field that would normally be set by the field initializer
+ var signalField = stateType.GetField("PrematureIdleSignal", AnyInstance);
+ signalField?.SetValue(state, new ManualResetEventSlim(initialState: false));
+
+ return state;
+ }
+
+ /// Add a session to the CopilotService._sessions dictionary.
+ private static void AddSession(CopilotService svc, string name, object sessionState)
+ {
+ var field = typeof(CopilotService).GetField("_sessions", NonPublic)!;
+ var dict = field.GetValue(svc)!;
+ dict.GetType().GetMethod("TryAdd")!.Invoke(dict, new[] { name, sessionState });
+ }
+
+ /// Create an events.jsonl file for a given session ID in the test directory.
+ private static string CreateEventsFile(string sessionId, params string[] lines)
+ {
+ var sessionDir = Path.Combine(GetSessionStatePath(), sessionId);
+ Directory.CreateDirectory(sessionDir);
+ var eventsFile = Path.Combine(sessionDir, "events.jsonl");
+ File.WriteAllLines(eventsFile, lines);
+ return eventsFile;
+ }
+
+ /// Build a JSONL event line with the given type.
+ private static string BuildEventLine(string type, object data)
+ {
+ var obj = new { type, data, timestamp = DateTimeOffset.UtcNow.ToString("o") };
+ return JsonSerializer.Serialize(obj);
+ }
+
+ #endregion
+
+ #region IsWorkerIdleForMonitor behavioral tests
+
+ [Fact]
+ public void IsWorkerIdleForMonitor_SessionNotFound_ReturnsTrue()
+ {
+ // Worker name not in _sessions → treat as idle (handled in result collection)
+ var svc = CreateService();
+ Assert.True(svc.IsWorkerIdleForMonitor("nonexistent-worker"));
+ }
+
+ [Fact]
+ public void IsWorkerIdleForMonitor_ConnectedAndProcessing_ReturnsFalse()
+ {
+ // Worker has IsProcessing=true → not idle regardless of Session state
+ var svc = CreateService();
+ var sessionId = Guid.NewGuid().ToString();
+ var info = new AgentSessionInfo
+ {
+ Name = "worker-1",
+ Model = "gpt-4",
+ SessionId = sessionId,
+ WorkingDirectory = "/tmp"
+ };
+ info.IsProcessing = true;
+
+ var state = CreateSessionState(info);
+ AddSession(svc, "worker-1", state);
+
+ Assert.False(svc.IsWorkerIdleForMonitor("worker-1"));
+ }
+
+ [Fact]
+ public void IsWorkerIdleForMonitor_ConnectedAndIdle_ReturnsTrue()
+ {
+ // Worker has IsProcessing=false and Session would be connected → idle
+ var svc = CreateService();
+ var sessionId = Guid.NewGuid().ToString();
+ var info = new AgentSessionInfo
+ {
+ Name = "worker-1",
+ Model = "gpt-4",
+ SessionId = sessionId,
+ WorkingDirectory = "/tmp"
+ };
+ info.IsProcessing = false;
+
+ var state = CreateSessionState(info);
+ AddSession(svc, "worker-1", state);
+
+ // No events.jsonl at all → IsSessionStillProcessing returns false
+ Assert.True(svc.IsWorkerIdleForMonitor("worker-1"));
+ }
+
+ [Fact]
+ public void IsWorkerIdleForMonitor_LazyPlaceholderWithActiveCli_ReturnsFalse()
+ {
+ // KEY FIX TEST: Worker is a lazy placeholder (Session == null) with
+ // IsProcessing=false (InvokeOnUI hasn't fired yet), but the CLI
+ // events.jsonl shows active tool execution → NOT idle.
+ var svc = CreateService();
+ var sessionId = Guid.NewGuid().ToString();
+ var info = new AgentSessionInfo
+ {
+ Name = "worker-1",
+ Model = "gpt-4",
+ SessionId = sessionId,
+ WorkingDirectory = "/tmp"
+ };
+ info.IsProcessing = false; // InvokeOnUI hasn't set this yet
+
+ var state = CreateSessionState(info);
+ // Session is null (lazy placeholder) — GetUninitializedObject doesn't call constructor
+ AddSession(svc, "worker-1", state);
+
+ // Create events.jsonl showing active tool execution
+ CreateEventsFile(sessionId,
+ BuildEventLine("assistant.turn_start", new { }),
+ BuildEventLine("tool.execution_start", new { toolCallId = "tc1", name = "bash" }));
+
+ Assert.False(svc.IsWorkerIdleForMonitor("worker-1"));
+ }
+
+ [Fact]
+ public void IsWorkerIdleForMonitor_LazyPlaceholderWithIdleCli_ReturnsTrue()
+ {
+ // Worker is a lazy placeholder but CLI has completed (terminal event) → idle
+ var svc = CreateService();
+ var sessionId = Guid.NewGuid().ToString();
+ var info = new AgentSessionInfo
+ {
+ Name = "worker-1",
+ Model = "gpt-4",
+ SessionId = sessionId,
+ WorkingDirectory = "/tmp"
+ };
+ info.IsProcessing = false;
+
+ var state = CreateSessionState(info);
+ AddSession(svc, "worker-1", state);
+
+ // Create events.jsonl showing session shutdown (terminal)
+ CreateEventsFile(sessionId,
+ BuildEventLine("assistant.turn_start", new { }),
+ BuildEventLine("assistant.message", new { content = "Done!" }),
+ BuildEventLine("session.shutdown", new { }));
+
+ Assert.True(svc.IsWorkerIdleForMonitor("worker-1"));
+ }
+
+ [Fact]
+ public void IsWorkerIdleForMonitor_LazyPlaceholderWithNoEvents_ReturnsTrue()
+ {
+ // Worker is a lazy placeholder with no events.jsonl → idle
+ var svc = CreateService();
+ var sessionId = Guid.NewGuid().ToString();
+ var info = new AgentSessionInfo
+ {
+ Name = "worker-1",
+ Model = "gpt-4",
+ SessionId = sessionId,
+ WorkingDirectory = "/tmp"
+ };
+ info.IsProcessing = false;
+
+ var state = CreateSessionState(info);
+ AddSession(svc, "worker-1", state);
+
+ // No events.jsonl created → IsSessionStillProcessing returns false
+ Assert.True(svc.IsWorkerIdleForMonitor("worker-1"));
+ }
+
+ [Fact]
+ public void IsWorkerIdleForMonitor_LazyPlaceholderWithStaleEvents_ReturnsTrue()
+ {
+ // Worker is a lazy placeholder but events.jsonl is old → CLI finished long ago → idle
+ var svc = CreateService();
+ var sessionId = Guid.NewGuid().ToString();
+ var info = new AgentSessionInfo
+ {
+ Name = "worker-1",
+ Model = "gpt-4",
+ SessionId = sessionId,
+ WorkingDirectory = "/tmp"
+ };
+ info.IsProcessing = false;
+
+ var state = CreateSessionState(info);
+ AddSession(svc, "worker-1", state);
+
+ // Create events.jsonl showing active event BUT backdate the file
+ var eventsFile = CreateEventsFile(sessionId,
+ BuildEventLine("tool.execution_start", new { toolCallId = "tc1", name = "bash" }));
+
+ // Backdate to be older than the staleness threshold
+ var staleTime = DateTime.UtcNow.AddSeconds(-(CopilotService.WatchdogToolExecutionTimeoutSeconds + 60));
+ File.SetLastWriteTimeUtc(eventsFile, staleTime);
+
+ Assert.True(svc.IsWorkerIdleForMonitor("worker-1"));
+ }
+
+ [Fact]
+ public void IsWorkerIdleForMonitor_LazyPlaceholderNoSessionId_ReturnsTrue()
+ {
+ // Worker is a lazy placeholder with no SessionId → can't check events → idle
+ var svc = CreateService();
+ var info = new AgentSessionInfo
+ {
+ Name = "worker-1",
+ Model = "gpt-4",
+ SessionId = null!,
+ WorkingDirectory = "/tmp"
+ };
+ info.IsProcessing = false;
+
+ var state = CreateSessionState(info);
+ AddSession(svc, "worker-1", state);
+
+ Assert.True(svc.IsWorkerIdleForMonitor("worker-1"));
+ }
+
+ #endregion
+
+ #region MonitorAndSynthesizeAsync source structure (supplementary invariant guards)
+
+ private static string GetRepoRoot()
+ {
+ var dir = AppContext.BaseDirectory;
+ while (dir != null && !File.Exists(Path.Combine(dir, "PolyPilot.slnx")))
+ dir = Path.GetDirectoryName(dir);
+ return dir ?? throw new InvalidOperationException("Could not find repo root");
+ }
+
+ [Fact]
+ public void MonitorAndSynthesizeAsync_CallsIsWorkerIdleForMonitor()
+ {
+ // Structural guard: MonitorAndSynthesizeAsync must use IsWorkerIdleForMonitor
+ // instead of directly checking state.Info.IsProcessing.
+ var source = File.ReadAllText(
+ Path.Combine(GetRepoRoot(), "PolyPilot", "Services", "CopilotService.Organization.cs"));
+
+ var methodStart = source.IndexOf("private async Task MonitorAndSynthesizeAsync");
+ Assert.True(methodStart >= 0, "MonitorAndSynthesizeAsync method not found");
+
+ var methodBody = source.Substring(methodStart, Math.Min(2000, source.Length - methodStart));
+
+ Assert.Contains("IsWorkerIdleForMonitor", methodBody);
+ }
+
+ [Fact]
+ public void MonitorAndSynthesizeAsync_WaitsForRestoreToComplete()
+ {
+ // Structural guard: MonitorAndSynthesizeAsync must wait for IsRestoring to become false
+ var source = File.ReadAllText(
+ Path.Combine(GetRepoRoot(), "PolyPilot", "Services", "CopilotService.Organization.cs"));
+
+ var methodStart = source.IndexOf("private async Task MonitorAndSynthesizeAsync");
+ Assert.True(methodStart >= 0, "MonitorAndSynthesizeAsync method not found");
+
+ var methodBody = source.Substring(methodStart, Math.Min(2000, source.Length - methodStart));
+
+ Assert.Contains("IsRestoring", methodBody);
+ }
+
+ #endregion
+}
diff --git a/PolyPilot/Services/CopilotService.Organization.cs b/PolyPilot/Services/CopilotService.Organization.cs
index 13d064685..22c4b95d6 100644
--- a/PolyPilot/Services/CopilotService.Organization.cs
+++ b/PolyPilot/Services/CopilotService.Organization.cs
@@ -3222,18 +3222,59 @@ internal async Task ResumeOrchestrationIfPendingAsync(CancellationToken ct = def
});
}
+ ///
+ /// Checks whether a worker session is idle for purposes of orchestration resume monitoring.
+ /// A worker is NOT idle if:
+ /// 1. Its in-memory IsProcessing flag is true, OR
+ /// 2. It's a lazy placeholder (Session == null) and the CLI events.jsonl shows active processing.
+ ///
+ /// Case 2 handles the race condition (issue #400) where IsProcessing is set via InvokeOnUI
+ /// (async UI thread dispatch) but MonitorAndSynthesizeAsync checks it from a background thread
+ /// before the callback fires.
+ ///
+ internal bool IsWorkerIdleForMonitor(string workerName)
+ {
+ if (!_sessions.TryGetValue(workerName, out var state))
+ return true; // session not found — will be handled in result collection
+
+ if (state.Info.IsProcessing)
+ return false;
+
+ // Lazy placeholder workers: check events.jsonl directly.
+ // IsProcessing may not be set yet (InvokeOnUI hasn't fired on UI thread).
+ if (state.Session == null && !string.IsNullOrEmpty(state.Info.SessionId)
+ && IsSessionStillProcessing(state.Info.SessionId))
+ {
+ Debug($"[DISPATCH] Worker '{workerName}' is a lazy placeholder with active CLI session — treating as not idle");
+ return false;
+ }
+
+ return true;
+ }
+
private async Task MonitorAndSynthesizeAsync(PendingOrchestration pending, CancellationToken ct)
{
var timeout = TimeSpan.FromMinutes(15);
var started = DateTime.UtcNow;
+ // Wait for session restore to complete before checking worker state.
+ // Worker IsProcessing flags are set via InvokeOnUI during restore, which is
+ // async (queued on UI thread). Without this wait, we may see workers as idle
+ // before the restore loop has set their processing state (issue #400).
+ var restoreWaitStart = DateTime.UtcNow;
+ while (IsRestoring && !ct.IsCancellationRequested && (DateTime.UtcNow - restoreWaitStart).TotalSeconds < 30)
+ await Task.Delay(500, ct);
+
+ // Extra settle time for queued InvokeOnUI callbacks to execute on the UI thread
+ await Task.Delay(2000, ct);
+
// Poll every 5 seconds until all workers are idle
while (!ct.IsCancellationRequested && (DateTime.UtcNow - started) < timeout)
{
var allIdle = true;
foreach (var workerName in pending.WorkerNames)
{
- if (_sessions.TryGetValue(workerName, out var state) && state.Info.IsProcessing)
+ if (!IsWorkerIdleForMonitor(workerName))
{
allIdle = false;
break;