From 345b1a0fb64f172c952b2d203aca19467245e40e Mon Sep 17 00:00:00 2001 From: User Date: Mon, 11 May 2026 13:41:27 -0700 Subject: [PATCH 1/4] feat: surface structured output in streams Co-authored-by: factory-droid[bot] <138933559+factory-droid[bot]@users.noreply.github.com> --- examples/structured-output-stress-test.ts | 308 ++++++++++++++++++++++ src/index.ts | 2 + src/schemas/enums.ts | 1 + src/schemas/index.ts | 4 + src/schemas/server.ts | 28 +- src/session.ts | 20 +- src/stream.ts | 36 ++- tests/helpers.ts | 16 ++ tests/public-api.test.ts | 1 + tests/run.test.ts | 75 ++++++ tests/schemas.test.ts | 61 +++-- tests/session.test.ts | 126 +++++++++ tests/stream.test.ts | 170 +++++++++++- 13 files changed, 817 insertions(+), 31 deletions(-) create mode 100644 examples/structured-output-stress-test.ts diff --git a/examples/structured-output-stress-test.ts b/examples/structured-output-stress-test.ts new file mode 100644 index 0000000..4f2bdde --- /dev/null +++ b/examples/structured-output-stress-test.ts @@ -0,0 +1,308 @@ +/** + * Structured output stress test. + * + * Runs several structured-output schemas against one or more Droid models and + * verifies both `run(...)` results and streaming `TurnComplete` metadata. + * + * Usage: + * npx tsx examples/structured-output-stress-test.ts + * DROID_EXEC_PATH=droid-dev npx tsx examples/structured-output-stress-test.ts + * DROID_STRUCTURED_OUTPUT_MODELS="claude-sonnet-4-5,gpt-5.2" npx tsx examples/structured-output-stress-test.ts + */ + +import assert from 'node:assert/strict'; + +import { + DroidMessageType, + OutputFormatType, + createSession, + run, +} from '@factory/droid-sdk'; +import type { + DroidMessage, + DroidResult, + MessageOptions, +} from '@factory/droid-sdk'; +import { z } from 'zod'; + +type OutputFormat = NonNullable; +type JsonObject = NonNullable; + +interface StressCase { + name: string; + prompt: string; + outputFormat: OutputFormat; + parse: (value: JsonObject) => unknown; +} + +const PersonSchema = z.object({ + name: z.literal('Ada Lovelace'), + language: z.literal('TypeScript'), + score: z.literal(99), +}); + +const PlanSchema = z.object({ + title: z.literal('Structured Output SDK Test'), + priority: z.enum(['low', 'medium', 'high']), + tasks: z.array( + z.object({ + id: z.string(), + done: z.boolean(), + }) + ), +}); + +const MetricsSchema = z.object({ + summary: z.object({ + passed: z.literal(3), + failed: z.literal(0), + }), + checks: z.array( + z.object({ + name: z.enum(['schema', 'stream', 'fallback']), + ok: z.literal(true), + }) + ), +}); + +const stressCases: StressCase[] = [ + { + name: 'flat-literals', + prompt: [ + 'Return a structured object for Ada Lovelace.', + 'Use exactly name "Ada Lovelace", language "TypeScript", and score 99.', + ].join(' '), + outputFormat: { + type: OutputFormatType.JsonSchema, + schema: { + type: 'object', + properties: { + name: { type: 'string' }, + language: { type: 'string', enum: ['TypeScript'] }, + score: { type: 'number', enum: [99] }, + }, + required: ['name', 'language', 'score'], + additionalProperties: false, + }, + }, + parse: (value) => PersonSchema.parse(value), + }, + { + name: 'nested-array-enum', + prompt: [ + 'Return a project plan object.', + 'Use title "Structured Output SDK Test", priority "high",', + 'and exactly two tasks with ids "schema" and "stream".', + 'Set both task done values to true.', + ].join(' '), + outputFormat: { + type: OutputFormatType.JsonSchema, + schema: { + type: 'object', + properties: { + title: { type: 'string', enum: ['Structured Output SDK Test'] }, + priority: { type: 'string', enum: ['low', 'medium', 'high'] }, + tasks: { + type: 'array', + minItems: 2, + maxItems: 2, + items: { + type: 'object', + properties: { + id: { type: 'string', enum: ['schema', 'stream'] }, + done: { type: 'boolean', enum: [true] }, + }, + required: ['id', 'done'], + additionalProperties: false, + }, + }, + }, + required: ['title', 'priority', 'tasks'], + additionalProperties: false, + }, + }, + parse: (value) => PlanSchema.parse(value), + }, + { + name: 'nested-metrics', + prompt: [ + 'Return validation metrics.', + 'The summary must have passed 3 and failed 0.', + 'The checks array must contain schema, stream, and fallback, each with ok true.', + ].join(' '), + outputFormat: { + type: OutputFormatType.JsonSchema, + schema: { + type: 'object', + properties: { + summary: { + type: 'object', + properties: { + passed: { type: 'number', enum: [3] }, + failed: { type: 'number', enum: [0] }, + }, + required: ['passed', 'failed'], + additionalProperties: false, + }, + checks: { + type: 'array', + minItems: 3, + maxItems: 3, + items: { + type: 'object', + properties: { + name: { + type: 'string', + enum: ['schema', 'stream', 'fallback'], + }, + ok: { type: 'boolean', enum: [true] }, + }, + required: ['name', 'ok'], + additionalProperties: false, + }, + }, + }, + required: ['summary', 'checks'], + additionalProperties: false, + }, + }, + parse: (value) => MetricsSchema.parse(value), + }, +]; + +function parseModels(): Array { + const raw = process.env['DROID_STRUCTURED_OUTPUT_MODELS']; + if (!raw) return [undefined]; + return raw + .split(',') + .map((model) => model.trim()) + .filter(Boolean); +} + +function labelModel(modelId: string | undefined): string { + return modelId ?? 'default session model'; +} + +function assertStructuredResult( + result: DroidResult, + stressCase: StressCase +): void { + const diagnostic = JSON.stringify( + { + text: result.text, + error: result.error, + structuredOutputError: result.structuredOutputError, + messages: result.messages + .filter( + (message) => + message.type === DroidMessageType.CreateMessage || + message.type === DroidMessageType.Error || + message.type === DroidMessageType.StructuredOutput || + message.type === DroidMessageType.TurnComplete + ) + .map((message) => { + if (message.type !== DroidMessageType.CreateMessage) return message; + return { + type: message.type, + role: message.role, + content: message.content, + }; + }), + }, + null, + 2 + ); + + assert.equal( + result.structuredOutputError, + null, + `${stressCase.name}: expected no structured output error\n${diagnostic}` + ); + assert.ok( + result.structuredOutput, + `${stressCase.name}: expected structuredOutput\n${diagnostic}` + ); + stressCase.parse(result.structuredOutput); +} + +function findMessage( + messages: DroidMessage[], + type: T +): Extract | undefined { + return messages.find( + (message): message is Extract => + message.type === type + ); +} + +async function runCase( + modelId: string | undefined, + stressCase: StressCase +): Promise { + const result = await run(stressCase.prompt, { + execPath: process.env['DROID_EXEC_PATH'] ?? 'droid', + cwd: process.cwd(), + ...(modelId !== undefined && { modelId }), + outputFormat: stressCase.outputFormat, + }); + + assertStructuredResult(result, stressCase); + + const notification = findMessage( + result.messages, + DroidMessageType.StructuredOutput + ); + assert.ok(notification, `${stressCase.name}: expected structured_output`); + + console.log( + ` ✓ ${stressCase.name}: ${JSON.stringify(result.structuredOutput)}` + ); +} + +async function runStreamingCase(modelId: string | undefined): Promise { + const stressCase = stressCases[0]; + const session = await createSession({ + execPath: process.env['DROID_EXEC_PATH'] ?? 'droid', + cwd: process.cwd(), + ...(modelId !== undefined && { modelId }), + }); + + try { + const messages: DroidMessage[] = []; + for await (const message of ( + await session.send(stressCase.prompt, { + outputFormat: stressCase.outputFormat, + }) + ).stream()) { + messages.push(message); + } + + const structured = findMessage(messages, DroidMessageType.StructuredOutput); + const complete = findMessage(messages, DroidMessageType.TurnComplete); + + assert.ok(structured, 'streaming: expected structured_output message'); + assert.ok(complete, 'streaming: expected turn_complete message'); + assert.deepEqual( + complete.structuredOutput, + structured.structuredOutput, + 'streaming: TurnComplete should carry structured output' + ); + assert.equal(complete.structuredOutputError, null); + assert.ok(complete.structuredOutput); + stressCase.parse(complete.structuredOutput); + + console.log(' ✓ streaming TurnComplete carries structured output'); + } finally { + await session.close(); + } +} + +for (const modelId of parseModels()) { + console.log(`\n=== Testing ${labelModel(modelId)} ===`); + for (const stressCase of stressCases) { + await runCase(modelId, stressCase); + } + await runStreamingCase(modelId); +} + +console.log('\nStructured output stress test passed'); diff --git a/src/index.ts b/src/index.ts index 6e8bfbf..7835042 100644 --- a/src/index.ts +++ b/src/index.ts @@ -48,6 +48,8 @@ export type { MissionWorkerCompleted, McpAuthRequired, McpAuthCompleted, + StructuredOutput, + StructuredOutputFields, ErrorEvent, TurnComplete, DroidMessage, diff --git a/src/schemas/enums.ts b/src/schemas/enums.ts index 2117db7..7e5bb5e 100644 --- a/src/schemas/enums.ts +++ b/src/schemas/enums.ts @@ -66,6 +66,7 @@ export enum SessionNotificationType { MISSION_WORKER_COMPLETED = 'mission_worker_completed', MCP_AUTH_REQUIRED = 'mcp_auth_required', MCP_AUTH_COMPLETED = 'mcp_auth_completed', + STRUCTURED_OUTPUT = 'structured_output', } /** Tool confirmation outcome options (possible user responses to permission requests). */ diff --git a/src/schemas/index.ts b/src/schemas/index.ts index 06eb8a3..0bd2df8 100644 --- a/src/schemas/index.ts +++ b/src/schemas/index.ts @@ -510,6 +510,8 @@ export { SettingsUpdatedNotificationSchema, SettingsUpdatedPayloadSchema, StartMissionRunConfirmationDetailsSchema, + StructuredOutputErrorSchema, + StructuredOutputNotificationSchema, ThinkingTextDeltaNotificationSchema, ToolConfirmationDetailsSchema, ToolConfirmationInfoSchema, @@ -564,6 +566,8 @@ export type { SettingsUpdatedNotification, SettingsUpdatedPayload, StartMissionRunConfirmationDetails, + StructuredOutputError, + StructuredOutputNotification, ThinkingTextDeltaNotification, ToolConfirmationDetails, ToolConfirmationInfo, diff --git a/src/schemas/server.ts b/src/schemas/server.ts index 2ac1046..1284cef 100644 --- a/src/schemas/server.ts +++ b/src/schemas/server.ts @@ -361,6 +361,31 @@ export type McpAuthCompletedNotification = z.infer< typeof McpAuthCompletedNotificationSchema >; +/** Structured output validation error emitted by Droid. */ +export const StructuredOutputErrorSchema = z + .object({ + code: z.string(), + message: z.string(), + details: z.unknown().optional(), + }) + .passthrough(); + +export type StructuredOutputError = z.infer; + +/** Backend-validated structured output for the completed turn. */ +export const StructuredOutputNotificationSchema = z + .object({ + type: z.literal(SessionNotificationType.STRUCTURED_OUTPUT), + messageId: z.string(), + structuredOutput: JsonObjectSchema.nullable(), + structuredOutputError: StructuredOutputErrorSchema.nullable(), + }) + .passthrough(); + +export type StructuredOutputNotification = z.infer< + typeof StructuredOutputNotificationSchema +>; + /** List of all session notification schemas (for discriminatedUnion). */ export const SessionNotificationSchemaList = [ ToolResultNotificationSchema, @@ -383,9 +408,10 @@ export const SessionNotificationSchemaList = [ MissionWorkerCompletedNotificationSchema, McpAuthRequiredNotificationSchema, McpAuthCompletedNotificationSchema, + StructuredOutputNotificationSchema, ] as const; -/** Discriminated union over all 20 session notification types. */ +/** Discriminated union over all session notification types. */ export const SessionNotificationPayloadSchema = z.discriminatedUnion( 'type', SessionNotificationSchemaList diff --git a/src/session.ts b/src/session.ts index 1b257cf..b092c9b 100644 --- a/src/session.ts +++ b/src/session.ts @@ -49,6 +49,7 @@ import type { import { DroidInteractionMode } from './schemas/enums.js'; import type { Base64ImageSource, DocumentSource } from './schemas/messages.js'; import { FactoryDroidMessageRole } from './schemas/messages.js'; +import type { StructuredOutputError } from './schemas/server.js'; import { JsonObjectSchema, type JsonObject } from './schemas/shared.js'; import { DroidMessageType } from './stream.js'; import type { DroidMessage, ErrorEvent, TokenUsageUpdate } from './stream.js'; @@ -71,6 +72,8 @@ export interface DroidResult { error: ErrorEvent | null; /** Structured JSON object emitted by the turn, when requested. */ structuredOutput: JsonObject | null; + /** Backend structured output validation error, when reported. */ + structuredOutputError: StructuredOutputError | null; /** True when the stream completed without an error event. */ success: boolean; } @@ -152,6 +155,8 @@ export function aggregateMessages( let lastTokenUsage: TokenUsageUpdate | null = null; let firstError: ErrorEvent | null = null; let structuredOutput: JsonObject | null = null; + let structuredOutputError: StructuredOutputError | null = null; + let receivedStructuredOutputNotification = false; let finalAssistantText = ''; let turnCount = 0; @@ -176,15 +181,27 @@ export function aggregateMessages( firstError = msg; } + if (msg.type === DroidMessageType.StructuredOutput) { + receivedStructuredOutputNotification = true; + structuredOutput = msg.structuredOutput; + structuredOutputError = msg.structuredOutputError; + } + if (msg.type === DroidMessageType.TurnComplete) { turnCount++; if (msg.tokenUsage) { lastTokenUsage = msg.tokenUsage; } + if (!receivedStructuredOutputNotification) { + structuredOutput = msg.structuredOutput; + structuredOutputError = msg.structuredOutputError; + receivedStructuredOutputNotification = + msg.structuredOutput !== null || msg.structuredOutputError !== null; + } } } - if (options?.outputFormat) { + if (options?.outputFormat && !receivedStructuredOutputNotification) { const textToParse = finalAssistantText || fullText; if (textToParse) { structuredOutput = parseJsonObject(textToParse); @@ -200,6 +217,7 @@ export function aggregateMessages( turnCount, error: firstError, structuredOutput, + structuredOutputError, success: firstError === null, }; } diff --git a/src/stream.ts b/src/stream.ts index 24dbdef..a8e0e8d 100644 --- a/src/stream.ts +++ b/src/stream.ts @@ -14,6 +14,7 @@ import { type ErrorNotification, type SessionNotificationPayload, type SettingsUpdatedPayload, + type StructuredOutputError as ServerStructuredOutputError, type ToolProgressUpdate, } from './schemas/server.js'; import type { JsonObject, JsonValue } from './schemas/shared.js'; @@ -39,6 +40,7 @@ export const DroidMessageType = { MissionWorkerCompleted: 'mission_worker_completed', McpAuthRequired: 'mcp_auth_required', McpAuthCompleted: 'mcp_auth_completed', + StructuredOutput: 'structured_output', Error: 'error', TurnComplete: 'turn_complete', } as const; @@ -168,6 +170,16 @@ export interface McpAuthCompleted { readonly message: string; } +export interface StructuredOutputFields { + readonly structuredOutput: JsonObject | null; + readonly structuredOutputError: ServerStructuredOutputError | null; +} + +export interface StructuredOutput extends StructuredOutputFields { + readonly type: 'structured_output'; + readonly messageId: string; +} + export interface ErrorEvent { readonly type: 'error'; readonly message: string; @@ -176,7 +188,7 @@ export interface ErrorEvent { } /** Sentinel yielded when the agent turn finishes (returns to Idle). */ -export interface TurnComplete { +export interface TurnComplete extends StructuredOutputFields { readonly type: 'turn_complete'; readonly tokenUsage: TokenUsageUpdate | null; } @@ -202,6 +214,7 @@ export type DroidMessage = | MissionWorkerCompleted | McpAuthRequired | McpAuthCompleted + | StructuredOutput | ErrorEvent | TurnComplete; @@ -389,6 +402,14 @@ export function convertNotificationToStreamMessage( message: notification.message, }; + case SessionNotificationType.STRUCTURED_OUTPUT: + return { + type: DroidMessageType.StructuredOutput, + messageId: notification.messageId, + structuredOutput: notification.structuredOutput, + structuredOutputError: notification.structuredOutputError, + }; + default: return null; } @@ -403,6 +424,10 @@ export class StreamStateTracker { private lastTokenUsage: TokenUsageUpdate | null = null; + private structuredOutput: JsonObject | null = null; + + private structuredOutputError: ServerStructuredOutputError | null = null; + private toolNameMap = new Map(); private getToolName(toolUseId: string): string { @@ -428,6 +453,11 @@ export class StreamStateTracker { this.lastTokenUsage = message; } + if (message.type === DroidMessageType.StructuredOutput) { + this.structuredOutput = message.structuredOutput; + this.structuredOutputError = message.structuredOutputError; + } + if (message.type === DroidMessageType.WorkingStateChanged) { if (message.state !== DroidWorkingState.Idle) { this.hasBeenNonIdle = true; @@ -435,8 +465,12 @@ export class StreamStateTracker { additional.push({ type: DroidMessageType.TurnComplete, tokenUsage: this.lastTokenUsage, + structuredOutput: this.structuredOutput, + structuredOutputError: this.structuredOutputError, }); this.hasBeenNonIdle = false; + this.structuredOutput = null; + this.structuredOutputError = null; } } diff --git a/tests/helpers.ts b/tests/helpers.ts index 9f99431..9957381 100644 --- a/tests/helpers.ts +++ b/tests/helpers.ts @@ -255,6 +255,9 @@ export function sendDefaultStreamSequence( initialState?: DroidWorkingState; finalState?: DroidWorkingState; includeTokenUsage?: boolean; + structuredOutput?: Record | null; + structuredOutputError?: Record | null; + structuredOutputMessageId?: string; } ): void { const { @@ -271,6 +274,9 @@ export function sendDefaultStreamSequence( initialState = DroidWorkingState.StreamingAssistantMessage, finalState = DroidWorkingState.Idle, includeTokenUsage = true, + structuredOutput, + structuredOutputError, + structuredOutputMessageId = messageId, } = options ?? {}; transport.injectMessage( @@ -302,6 +308,16 @@ export function sendDefaultStreamSequence( ); } + if (structuredOutput !== undefined || structuredOutputError !== undefined) { + transport.injectMessage( + makeSessionNotification(SessionNotificationType.STRUCTURED_OUTPUT, { + messageId: structuredOutputMessageId, + structuredOutput: structuredOutput ?? null, + structuredOutputError: structuredOutputError ?? null, + }) + ); + } + transport.injectMessage( makeSessionNotification( SessionNotificationType.DROID_WORKING_STATE_CHANGED, diff --git a/tests/public-api.test.ts b/tests/public-api.test.ts index 26b83ca..c9034e2 100644 --- a/tests/public-api.test.ts +++ b/tests/public-api.test.ts @@ -39,6 +39,7 @@ describe('public API barrel', () => { expect(DroidMessageType.AssistantTextDelta).toBe('assistant_text_delta'); expect(DroidMessageType.ToolUse).toBe('tool_use'); expect(DroidMessageType.TokenUsageUpdate).toBe('token_usage_update'); + expect(DroidMessageType.StructuredOutput).toBe('structured_output'); expect(DroidMessageType.TurnComplete).toBe('turn_complete'); }); }); diff --git a/tests/run.test.ts b/tests/run.test.ts index c351b02..c3cf8fe 100644 --- a/tests/run.test.ts +++ b/tests/run.test.ts @@ -285,6 +285,81 @@ describe('run()', () => { }); }); + it('prefers backend structured output notifications', async () => { + const transport = new InMemoryTransport(); + await transport.connect(); + + wireTransportSend(transport, ({ method, id }) => { + if (method === DroidServerMethod.INITIALIZE_SESSION) { + queueMicrotask(() => { + transport.injectMessage( + makeSuccessResponse(id, { + sessionId: 'sess-run-structured-output-notification', + session: {}, + settings: { modelId: 'test', reasoningEffort: 'medium' }, + }) + ); + }); + } else if (method === DroidServerMethod.ADD_USER_MESSAGE) { + queueMicrotask(() => { + transport.injectMessage(makeSuccessResponse(id, {})); + transport.injectMessage( + makeSessionNotification( + SessionNotificationType.DROID_WORKING_STATE_CHANGED, + { newState: DroidWorkingState.StreamingAssistantMessage } + ) + ); + transport.injectMessage( + makeSessionNotification(SessionNotificationType.CREATE_MESSAGE, { + message: { + id: 'msg-structured', + role: 'assistant', + content: [ + { + type: 'text', + text: JSON.stringify({ name: 'text-fallback' }), + }, + ], + createdAt: Date.now(), + updatedAt: Date.now(), + }, + }) + ); + transport.injectMessage( + makeSessionNotification(SessionNotificationType.STRUCTURED_OUTPUT, { + messageId: 'msg-structured', + structuredOutput: { name: 'Ada' }, + structuredOutputError: null, + }) + ); + transport.injectMessage( + makeSessionNotification( + SessionNotificationType.DROID_WORKING_STATE_CHANGED, + { newState: DroidWorkingState.Idle } + ) + ); + }); + } + }); + + const result = await run('Return a person', { + transport, + outputFormat: { + type: OutputFormatType.JsonSchema, + schema: { + type: 'object', + properties: { + name: { type: 'string' }, + }, + required: ['name'], + }, + }, + }); + + expect(result.structuredOutput).toEqual({ name: 'Ada' }); + expect(result.structuredOutputError).toBeNull(); + }); + it('concatenates multiple text deltas', async () => { const transport = new InMemoryTransport(); await transport.connect(); diff --git a/tests/schemas.test.ts b/tests/schemas.test.ts index 9679394..5a26fd0 100644 --- a/tests/schemas.test.ts +++ b/tests/schemas.test.ts @@ -107,6 +107,7 @@ import { MissionWorkerCompletedNotificationSchema, McpAuthRequiredNotificationSchema, McpAuthCompletedNotificationSchema, + StructuredOutputNotificationSchema, SessionNotificationPayloadSchema, RequestPermissionRequestParamsSchema, RequestPermissionResultSchema, @@ -174,29 +175,33 @@ describe('enums', () => { expect(DroidClientMethod.ASK_USER).toBe('droid.ask_user'); }); - it('SessionNotificationType has 20 types', () => { + it('SessionNotificationType has 21 types', () => { const values = Object.values(SessionNotificationType); - expect(values).toHaveLength(20); - expect(values).toContain('assistant_text_delta'); - expect(values).toContain('thinking_text_delta'); - expect(values).toContain('tool_result'); - expect(values).toContain('tool_progress_update'); - expect(values).toContain('create_message'); - expect(values).toContain('error'); - expect(values).toContain('droid_working_state_changed'); - expect(values).toContain('permission_resolved'); - expect(values).toContain('settings_updated'); - expect(values).toContain('session_title_updated'); - expect(values).toContain('mcp_status_changed'); - expect(values).toContain('session_token_usage_changed'); - expect(values).toContain('mission_state_changed'); - expect(values).toContain('mission_features_changed'); - expect(values).toContain('mission_progress_entry'); - expect(values).toContain('mission_heartbeat'); - expect(values).toContain('mission_worker_started'); - expect(values).toContain('mission_worker_completed'); - expect(values).toContain('mcp_auth_required'); - expect(values).toContain('mcp_auth_completed'); + const expectedValues = [ + 'assistant_text_delta', + 'thinking_text_delta', + 'tool_result', + 'tool_progress_update', + 'create_message', + 'error', + 'droid_working_state_changed', + 'permission_resolved', + 'settings_updated', + 'session_title_updated', + 'mcp_status_changed', + 'session_token_usage_changed', + 'mission_state_changed', + 'mission_features_changed', + 'mission_progress_entry', + 'mission_heartbeat', + 'mission_worker_started', + 'mission_worker_completed', + 'mcp_auth_required', + 'mcp_auth_completed', + 'structured_output', + ]; + expect(values).toHaveLength(expectedValues.length); + expect(values).toEqual(expect.arrayContaining(expectedValues)); }); it('ToolConfirmationOutcome has correct values', () => { @@ -1248,6 +1253,18 @@ describe('server notification schemas', () => { }; expect(McpAuthCompletedNotificationSchema.parse(n).outcome).toBe('success'); }); + + it('StructuredOutputNotificationSchema parses valid notification', () => { + const n = { + type: 'structured_output', + messageId: 'msg-1', + structuredOutput: { name: 'Ada' }, + structuredOutputError: null, + }; + expect( + StructuredOutputNotificationSchema.parse(n).structuredOutput + ).toEqual({ name: 'Ada' }); + }); }); describe('SessionNotificationPayloadSchema', () => { diff --git a/tests/session.test.ts b/tests/session.test.ts index a22442e..4b840f8 100644 --- a/tests/session.test.ts +++ b/tests/session.test.ts @@ -16,6 +16,7 @@ import { DroidWorkingState, JsonRpcErrorCode, McpServerType, + OutputFormatType, ReasoningEffort, SessionNotificationType, SettingsLevel, @@ -349,6 +350,131 @@ describe('DroidSession', () => { await session.close(); }); + it('streams backend structured output notifications', async () => { + const transport = new InMemoryTransport(); + await transport.connect(); + + wireTransportSend(transport, ({ method, id }) => { + if (method === DroidServerMethod.INITIALIZE_SESSION) { + queueMicrotask(() => { + transport.injectMessage( + makeSuccessResponse(id, { + sessionId: 'sess-structured-notification', + session: {}, + settings: { modelId: 'test-model', reasoningEffort: 'medium' }, + availableModels: [], + }) + ); + }); + } else if (method === DroidServerMethod.ADD_USER_MESSAGE) { + queueMicrotask(() => { + transport.injectMessage(makeSuccessResponse(id, {})); + sendDefaultStreamSequence(transport, { + deltas: [], + includeTokenUsage: false, + structuredOutputMessageId: 'msg-structured', + structuredOutput: { name: 'Ada' }, + }); + }); + } + }); + + const session = await createSession({ transport }); + const messages: DroidMessage[] = []; + for await (const msg of session.stream('Return a person', { + outputFormat: { + type: OutputFormatType.JsonSchema, + schema: { + type: 'object', + properties: { name: { type: 'string' } }, + }, + }, + })) { + messages.push(msg); + } + + expect(messages).toContainEqual({ + type: 'structured_output', + messageId: 'msg-structured', + structuredOutput: { name: 'Ada' }, + structuredOutputError: null, + }); + expect(messages[messages.length - 1]).toMatchObject({ + type: 'turn_complete', + structuredOutput: { name: 'Ada' }, + structuredOutputError: null, + }); + + await session.close(); + }); + + it('streams backend structured output errors', async () => { + const transport = new InMemoryTransport(); + await transport.connect(); + + wireTransportSend(transport, ({ method, id }) => { + if (method === DroidServerMethod.INITIALIZE_SESSION) { + queueMicrotask(() => { + transport.injectMessage( + makeSuccessResponse(id, { + sessionId: 'sess-structured-error', + session: {}, + settings: { modelId: 'test-model', reasoningEffort: 'medium' }, + availableModels: [], + }) + ); + }); + } else if (method === DroidServerMethod.ADD_USER_MESSAGE) { + queueMicrotask(() => { + transport.injectMessage(makeSuccessResponse(id, {})); + sendDefaultStreamSequence(transport, { + deltas: [], + includeTokenUsage: false, + structuredOutputMessageId: 'msg-structured', + structuredOutputError: { + code: 'schema_validation_failed', + message: '/name must be string', + }, + }); + }); + } + }); + + const session = await createSession({ transport }); + const messages: DroidMessage[] = []; + for await (const msg of session.stream('Return a person', { + outputFormat: { + type: OutputFormatType.JsonSchema, + schema: { + type: 'object', + properties: { name: { type: 'string' } }, + }, + }, + })) { + messages.push(msg); + } + + expect(messages).toContainEqual({ + type: 'structured_output', + messageId: 'msg-structured', + structuredOutput: null, + structuredOutputError: { + code: 'schema_validation_failed', + message: '/name must be string', + }, + }); + expect(messages[messages.length - 1]).toMatchObject({ + type: 'turn_complete', + structuredOutput: null, + structuredOutputError: { + code: 'schema_validation_failed', + message: '/name must be string', + }, + }); + + await session.close(); + }); + it('supports multiple stream calls (multi-turn)', async () => { const transport = new InMemoryTransport(); await transport.connect(); diff --git a/tests/stream.test.ts b/tests/stream.test.ts index 04e358d..0eabbdd 100644 --- a/tests/stream.test.ts +++ b/tests/stream.test.ts @@ -37,6 +37,7 @@ import type { MissionWorkerCompleted, McpAuthRequired, McpAuthCompleted, + StructuredOutput, ErrorEvent, TurnComplete, DroidMessage, @@ -67,6 +68,7 @@ const expectedDroidMessageTypes = [ 'mission_worker_completed', 'mcp_auth_required', 'mcp_auth_completed', + 'structured_output', 'error', 'turn_complete', ] as const satisfies readonly DroidMessage['type'][]; @@ -340,9 +342,13 @@ describe('DroidMessage types', () => { const msg: TurnComplete = { type: 'turn_complete', tokenUsage: null, + structuredOutput: null, + structuredOutputError: null, }; expect(msg.type).toBe('turn_complete'); expect(msg.tokenUsage).toBeNull(); + expect(msg.structuredOutput).toBeNull(); + expect(msg.structuredOutputError).toBeNull(); }); it('TurnComplete with token usage', () => { @@ -357,12 +363,14 @@ describe('DroidMessage types', () => { const msg: TurnComplete = { type: 'turn_complete', tokenUsage, + structuredOutput: null, + structuredOutputError: null, }; expect(msg.tokenUsage).not.toBeNull(); expect(msg.tokenUsage!.inputTokens).toBe(100); }); - it('DroidMessage union type allows all 22 types', () => { + it('DroidMessage union type allows all 23 types', () => { const messages: DroidMessage[] = [ { type: 'assistant_text_delta', @@ -438,15 +446,26 @@ describe('DroidMessage types', () => { outcome: McpAuthOutcome.Success, message: 'm', }, + { + type: 'structured_output', + messageId: 'm1', + structuredOutput: { name: 'Ada' }, + structuredOutputError: null, + }, { type: 'error', message: 'err', errorType: DroidErrorType.ERROR, timestamp: 't', }, - { type: 'turn_complete', tokenUsage: null }, + { + type: 'turn_complete', + tokenUsage: null, + structuredOutput: null, + structuredOutputError: null, + }, ]; - expect(messages).toHaveLength(22); + expect(messages).toHaveLength(expectedDroidMessageTypes.length); }); }); @@ -1040,6 +1059,56 @@ describe('convertNotificationToStreamMessage', () => { }); }); + describe('structured_output', () => { + it('converts successful structured output', () => { + const notification = makeNotification( + SessionNotificationType.STRUCTURED_OUTPUT, + { + messageId: 'msg-structured', + structuredOutput: { name: 'Ada' }, + structuredOutputError: null, + } + ); + const result = convertNotificationToStreamMessage( + notification + ) as StructuredOutput; + + expect(result).toEqual({ + type: 'structured_output', + messageId: 'msg-structured', + structuredOutput: { name: 'Ada' }, + structuredOutputError: null, + }); + }); + + it('converts structured output errors', () => { + const notification = makeNotification( + SessionNotificationType.STRUCTURED_OUTPUT, + { + messageId: 'msg-structured', + structuredOutput: null, + structuredOutputError: { + code: 'schema_validation_failed', + message: '/name must be string', + }, + } + ); + const result = convertNotificationToStreamMessage( + notification + ) as StructuredOutput; + + expect(result).toEqual({ + type: 'structured_output', + messageId: 'msg-structured', + structuredOutput: null, + structuredOutputError: { + code: 'schema_validation_failed', + message: '/name must be string', + }, + }); + }); + }); + describe('unknown notification type', () => { it('returns null for unknown types', () => { const notification = makeNotification('completely_unknown_type', { @@ -1056,11 +1125,13 @@ describe('convertNotificationToStreamMessage', () => { }); }); - describe('all 20 notification types are handled', () => { + describe('all notification types are handled', () => { const allNotificationTypes = Object.values(SessionNotificationType); - it('covers all 20 SessionNotificationType values', () => { - expect(allNotificationTypes).toHaveLength(20); + it('covers all SessionNotificationType values', () => { + expect(allNotificationTypes).toContain( + SessionNotificationType.STRUCTURED_OUTPUT + ); }); it('every notification type returns a non-null result (with valid payloads)', () => { @@ -1150,6 +1221,11 @@ describe('convertNotificationToStreamMessage', () => { outcome: McpAuthOutcome.Success, message: 'm', }, + [SessionNotificationType.STRUCTURED_OUTPUT]: { + messageId: 'm', + structuredOutput: { name: 'Ada' }, + structuredOutputError: null, + }, }; for (const notifType of allNotificationTypes) { @@ -1261,6 +1337,88 @@ describe('StreamStateTracker', () => { }); }); + describe('StructuredOutput propagation to TurnComplete', () => { + it('attaches structured output to TurnComplete', () => { + tracker.processMessage({ + type: 'working_state_changed', + state: DroidWorkingState.StreamingAssistantMessage, + }); + tracker.processMessage({ + type: 'structured_output', + messageId: 'msg-structured', + structuredOutput: { name: 'Ada' }, + structuredOutputError: null, + }); + + const result = tracker.processMessage({ + type: 'working_state_changed', + state: DroidWorkingState.Idle, + }); + + const tc = result.additional[0] as TurnComplete; + expect(tc.structuredOutput).toEqual({ name: 'Ada' }); + expect(tc.structuredOutputError).toBeNull(); + }); + + it('attaches structured output errors to TurnComplete', () => { + tracker.processMessage({ + type: 'working_state_changed', + state: DroidWorkingState.StreamingAssistantMessage, + }); + tracker.processMessage({ + type: 'structured_output', + messageId: 'msg-structured', + structuredOutput: null, + structuredOutputError: { + code: 'schema_validation_failed', + message: '/name must be string', + }, + }); + + const result = tracker.processMessage({ + type: 'working_state_changed', + state: DroidWorkingState.Idle, + }); + + const tc = result.additional[0] as TurnComplete; + expect(tc.structuredOutput).toBeNull(); + expect(tc.structuredOutputError).toEqual({ + code: 'schema_validation_failed', + message: '/name must be string', + }); + }); + + it('does not leak structured output between turns', () => { + tracker.processMessage({ + type: 'working_state_changed', + state: DroidWorkingState.StreamingAssistantMessage, + }); + tracker.processMessage({ + type: 'structured_output', + messageId: 'msg-structured', + structuredOutput: { name: 'Ada' }, + structuredOutputError: null, + }); + tracker.processMessage({ + type: 'working_state_changed', + state: DroidWorkingState.Idle, + }); + + tracker.processMessage({ + type: 'working_state_changed', + state: DroidWorkingState.StreamingAssistantMessage, + }); + const result = tracker.processMessage({ + type: 'working_state_changed', + state: DroidWorkingState.Idle, + }); + + const tc = result.additional[0] as TurnComplete; + expect(tc.structuredOutput).toBeNull(); + expect(tc.structuredOutputError).toBeNull(); + }); + }); + describe('TokenUsage propagation to TurnComplete', () => { it('carries last-seen TokenUsageUpdate in TurnComplete', () => { const tokenUsage: TokenUsageUpdate = { From fdecdef66985dfdf84bacc01af7fd24fac4c140f Mon Sep 17 00:00:00 2001 From: User Date: Tue, 12 May 2026 17:09:44 -0700 Subject: [PATCH 2/4] feat: simplify streaming result API Co-authored-by: factory-droid[bot] <138933559+factory-droid[bot]@users.noreply.github.com> --- examples/readme-structured-output.ts | 6 +- examples/session-stream.ts | 10 +- examples/stress/_harness.ts | 457 ++++++++++++++++++ examples/stress/all.ts | 48 ++ examples/stress/cancel-interrupt.ts | 92 ++++ examples/stress/default-streaming.ts | 79 +++ .../stress/default-vs-partial-consistency.ts | 87 ++++ examples/stress/error-paths.ts | 90 ++++ examples/stress/multi-turn.ts | 137 ++++++ examples/stress/partial-streaming.ts | 47 ++ examples/stress/structured-output.ts | 215 ++++++++ examples/stress/tool-use.ts | 101 ++++ examples/structured-output-stress-test.ts | 275 +++++++++-- examples/structured-output.ts | 10 +- examples/test-compact.ts | 6 +- src/helpers.ts | 51 +- src/index.ts | 12 +- src/run.ts | 17 +- src/schemas/enums.ts | 3 + src/schemas/index.ts | 6 + src/schemas/server.ts | 40 ++ src/session.ts | 160 ++---- src/stream.ts | 408 ++++++++++++++-- tests/helpers.test.ts | 9 +- tests/helpers.ts | 26 +- tests/integration.test.ts | 164 ++++--- tests/public-api.test.ts | 7 +- tests/run.test.ts | 15 +- tests/schemas.test.ts | 5 +- tests/session.test.ts | 57 +-- tests/stream.test.ts | 280 +++++++---- 31 files changed, 2472 insertions(+), 448 deletions(-) create mode 100644 examples/stress/_harness.ts create mode 100644 examples/stress/all.ts create mode 100644 examples/stress/cancel-interrupt.ts create mode 100644 examples/stress/default-streaming.ts create mode 100644 examples/stress/default-vs-partial-consistency.ts create mode 100644 examples/stress/error-paths.ts create mode 100644 examples/stress/multi-turn.ts create mode 100644 examples/stress/partial-streaming.ts create mode 100644 examples/stress/structured-output.ts create mode 100644 examples/stress/tool-use.ts diff --git a/examples/readme-structured-output.ts b/examples/readme-structured-output.ts index 04b3cd9..b03d510 100644 --- a/examples/readme-structured-output.ts +++ b/examples/readme-structured-output.ts @@ -1,5 +1,7 @@ import { OutputFormatType, run } from '@factory/droid-sdk'; +type FavoriteNumber = { favoriteNumber: number }; + const result = await run('Pick a favorite number between 1 and 42.', { cwd: process.cwd(), outputFormat: { @@ -18,4 +20,6 @@ const result = await run('Pick a favorite number between 1 and 42.', { }, }); -console.log(result.structuredOutput?.favoriteNumber); +console.log( + (result.structuredOutput as FavoriteNumber | undefined)?.favoriteNumber +); diff --git a/examples/session-stream.ts b/examples/session-stream.ts index 70639c5..2033ddb 100644 --- a/examples/session-stream.ts +++ b/examples/session-stream.ts @@ -2,7 +2,7 @@ * Simple session streaming example. * * Demonstrates using `session.stream()` to send a prompt, streaming - * `AssistantTextDelta` messages to stdout, and handling `TurnComplete`. + * full assistant/tool messages, and handling the final `result`. * * Usage: * npx tsx examples/session-stream.ts @@ -20,19 +20,19 @@ async function main(): Promise { try { for await (const msg of session.stream(prompt)) { switch (msg.type) { - case DroidMessageType.AssistantTextDelta: + case DroidMessageType.Assistant: process.stdout.write(msg.text); break; - case DroidMessageType.ToolUse: - console.log(`\n[Tool] ${msg.toolName}`); + case DroidMessageType.ToolCall: + console.log(`\n[Tool] ${msg.toolUse.name}`); break; case DroidMessageType.ToolResult: console.log(`[Tool Result] ${msg.isError ? 'Error' : 'OK'}`); break; - case DroidMessageType.TurnComplete: + case DroidMessageType.Result: console.log('\n\n--- Turn complete ---'); if (msg.tokenUsage) { console.log( diff --git a/examples/stress/_harness.ts b/examples/stress/_harness.ts new file mode 100644 index 0000000..775bc38 --- /dev/null +++ b/examples/stress/_harness.ts @@ -0,0 +1,457 @@ +import assert from 'node:assert/strict'; +import { mkdir, rm, writeFile } from 'node:fs/promises'; +import { join } from 'node:path'; +import { pathToFileURL } from 'node:url'; + +import { + DroidMessageType, + createSession, + run, + type CreateSessionOptions, + type DroidResult, + type DroidSession, + type DroidStreamEvent, + type MessageOptions, + type RunOptions, +} from '@factory/droid-sdk'; + +export interface CollectedStream { + name: string; + events: DroidStreamEvent[]; + counts: Record; + result: DroidResult; +} + +export const DEFAULT_PUBLIC_TYPES = new Set([ + DroidMessageType.Assistant, + DroidMessageType.User, + DroidMessageType.ToolCall, + DroidMessageType.ToolResult, + DroidMessageType.Error, + DroidMessageType.Result, +]); + +export const PARTIAL_ONLY_TYPES = new Set([ + DroidMessageType.AssistantTextDelta, + DroidMessageType.AssistantTextComplete, + DroidMessageType.ThinkingTextDelta, + DroidMessageType.ThinkingTextComplete, + DroidMessageType.ToolCallDelta, + DroidMessageType.ToolProgress, +]); + +export const INTERNAL_TYPES = new Set([ + 'create_message', + 'structured_output', + 'tool_use', +]); + +export function stressExecPath(): string { + return process.env['DROID_EXEC_PATH'] ?? 'droid-dev'; +} + +export function stressModelOptions(): Pick { + const modelId = process.env['DROID_STRESS_MODEL']; + return modelId ? { modelId } : {}; +} + +export function stressRepeat(): number { + const parsed = Number.parseInt(process.env['DROID_STRESS_REPEAT'] ?? '1', 10); + return Number.isFinite(parsed) && parsed > 0 ? parsed : 1; +} + +export function createStressSession( + options: CreateSessionOptions = {} +): Promise { + return createSession({ + execPath: stressExecPath(), + cwd: process.cwd(), + ...stressModelOptions(), + ...options, + }); +} + +export function createStressRunOptions(options: RunOptions = {}): RunOptions { + return { + execPath: stressExecPath(), + cwd: process.cwd(), + ...stressModelOptions(), + ...options, + }; +} + +export async function runStress( + prompt: string, + options: RunOptions = {} +): Promise { + const result = await run(prompt, createStressRunOptions(options)); + validateResult(result, 'run()'); + await writeJsonlArtifact('run', [result]); + return result; +} + +export async function collectStream( + name: string, + session: DroidSession, + prompt: string, + options: MessageOptions = {} +): Promise { + return collect(name, session, prompt, { + ...options, + includePartialMessages: false, + }); +} + +export async function collectPartialStream( + name: string, + session: DroidSession, + prompt: string, + options: MessageOptions = {} +): Promise { + return collect(name, session, prompt, { + ...options, + includePartialMessages: true, + }); +} + +export function countByType( + events: DroidStreamEvent[] +): Record { + const counts: Record = {}; + for (const event of events) { + counts[event.type] = (counts[event.type] ?? 0) + 1; + } + return counts; +} + +export function resultEvents(events: DroidStreamEvent[]): DroidResult[] { + return events.filter( + (event): event is DroidResult => event.type === DroidMessageType.Result + ); +} + +export function lastResult(events: DroidStreamEvent[]): DroidResult { + const results = resultEvents(events); + assert.equal( + results.length, + 1, + `expected exactly one result, saw ${results.length}\n${diagnose(events)}` + ); + assert.equal( + events.at(-1)?.type, + DroidMessageType.Result, + `result must be final event\n${diagnose(events)}` + ); + return results[0]!; +} + +export function validateResult(result: DroidResult, label: string): void { + assert.equal(result.type, DroidMessageType.Result, `${label}: not a result`); + assert.equal( + result.result, + result.text, + `${label}: result.result and result.text diverged` + ); + assert.ok(Array.isArray(result.messages), `${label}: messages missing`); + assertNoInternalMessages(result.messages, `${label}: result.messages`); + + if (!result.isError) { + assert.equal(result.error, null, `${label}: success result has error`); + assert.equal( + result.structuredOutputError ?? null, + null, + `${label}: success result has structuredOutputError` + ); + } +} + +export function assertDefaultStreamShape( + collected: CollectedStream, + label = collected.name +): void { + for (const event of collected.events) { + assert.ok( + DEFAULT_PUBLIC_TYPES.has(event.type), + `${label}: default stream leaked ${event.type}\n${diagnose( + collected.events + )}` + ); + } + assertNoInternalMessages(collected.events, label); + validateResult(collected.result, label); +} + +export function assertPartialStreamShape( + collected: CollectedStream, + label = collected.name +): void { + assertNoInternalMessages(collected.events, label); + validateResult(collected.result, label); +} + +export function assertNoInternalMessages( + events: DroidStreamEvent[], + label: string +): void { + const leaked = events.filter((event) => INTERNAL_TYPES.has(event.type)); + assert.equal( + leaked.length, + 0, + `${label}: internal message types leaked: ${leaked + .map((event) => event.type) + .join(', ')}` + ); +} + +export function assertAssistantOutput( + collected: CollectedStream, + label = collected.name +): void { + const assistantText = assistantTexts(collected.events).join(''); + assert.ok( + assistantText.length > 0 || collected.result.result.length > 0, + `${label}: expected non-empty assistant output\n${diagnose( + collected.events + )}` + ); +} + +export function assertPartialTextConsistency( + collected: CollectedStream, + label = collected.name +): void { + const deltas = assistantTextDeltas(collected.events); + const assistantText = assistantTexts(collected.events).join(''); + const resultText = collected.result.result; + + if (deltas.length > 0 && assistantText.length > 0) { + assertTextCompatible( + deltas, + assistantText, + `${label}: assistant deltas contradict full assistant message` + ); + } + + if (assistantText.length > 0 && resultText.length > 0) { + assertTextCompatible( + assistantText, + resultText, + `${label}: full assistant text contradicts result text` + ); + } + + if (deltas.length > 0 && resultText.length > 0) { + assertTextCompatible( + deltas, + resultText, + `${label}: assistant deltas contradict result text` + ); + } +} + +export function assertToolPairing( + events: DroidStreamEvent[], + label: string +): void { + const calls = new Set( + events + .filter((event) => event.type === DroidMessageType.ToolCall) + .map((event) => event.toolUse.id) + ); + const results = events.filter( + (event) => event.type === DroidMessageType.ToolResult + ); + + for (const result of results) { + assert.ok( + calls.has(result.toolUseId), + `${label}: tool_result ${result.toolUseId} has no prior tool_call` + ); + } +} + +export async function withTempDir( + prefix: string, + fn: (dir: string) => Promise +): Promise { + const dir = `/tmp/droid-sdk-stress-${prefix}-${process.pid}-${Date.now()}`; + await mkdir(dir, { recursive: true }); + try { + return await fn(dir); + } finally { + if (process.env['DROID_STRESS_KEEP_TEMP'] !== '1') { + await rm(dir, { recursive: true, force: true }); + } + } +} + +export async function runStressCase( + name: string, + fn: () => Promise +): Promise { + const repeat = stressRepeat(); + for (let index = 0; index < repeat; index++) { + const suffix = repeat > 1 ? ` (${index + 1}/${repeat})` : ''; + console.log(`→ ${name}${suffix}`); + await fn(); + console.log(`✓ ${name}${suffix}`); + } +} + +export function assistantTextDeltas(events: DroidStreamEvent[]): string { + return events + .filter((event) => event.type === DroidMessageType.AssistantTextDelta) + .map((event) => event.text) + .join(''); +} + +export function assistantTexts(events: DroidStreamEvent[]): string[] { + return events + .filter((event) => event.type === DroidMessageType.Assistant) + .map((event) => event.text) + .filter(Boolean); +} + +export function diagnose(events: DroidStreamEvent[]): string { + return JSON.stringify( + events.map((event, index) => summarizeEvent(event, index)), + null, + 2 + ); +} + +export function isDirectRun(metaUrl: string): boolean { + const entry = process.argv[1]; + return entry ? metaUrl === pathToFileURL(entry).href : false; +} + +export async function writeJsonlArtifact( + name: string, + events: DroidStreamEvent[] +): Promise { + const root = process.env['DROID_STRESS_ARTIFACTS'] ?? '.stress-artifacts'; + await mkdir(root, { recursive: true }); + const file = join( + root, + `${new Date().toISOString().replace(/[:.]/g, '-')}-${sanitize(name)}.jsonl` + ); + const lines = events + .map((event, index) => JSON.stringify(summarizeEvent(event, index))) + .join('\n'); + await writeFile(file, `${lines}\n`); +} + +async function collect( + name: string, + session: DroidSession, + prompt: string, + options: MessageOptions +): Promise { + const events: DroidStreamEvent[] = []; + if (options.includePartialMessages === true) { + for await (const event of session.stream(prompt, { + ...options, + includePartialMessages: true, + })) { + events.push(event); + } + } else { + for await (const event of session.stream(prompt, { + ...options, + includePartialMessages: false, + })) { + events.push(event); + } + } + const result = lastResult(events); + const collected = { + name, + events, + counts: countByType(events), + result, + }; + validateResult(result, name); + await writeJsonlArtifact(name, events); + return collected; +} + +function assertTextCompatible( + left: string, + right: string, + message: string +): void { + const a = normalizeText(left); + const b = normalizeText(right); + assert.ok( + a === b || a.includes(b) || b.includes(a), + `${message}\nleft: ${left}\nright: ${right}` + ); +} + +function normalizeText(text: string): string { + return text.replace(/\s+/g, ' ').trim(); +} + +function sanitize(value: string): string { + return value.replace(/[^a-zA-Z0-9._-]+/g, '-').replace(/^-|-$/g, ''); +} + +function summarizeEvent( + event: DroidStreamEvent, + index: number +): Record { + const base: Record = { + index, + type: event.type, + }; + + switch (event.type) { + case DroidMessageType.Assistant: + return { + ...base, + textLength: event.text.length, + textPreview: event.text.slice(0, 160), + }; + case DroidMessageType.AssistantTextDelta: + case DroidMessageType.ThinkingTextDelta: + return { + ...base, + messageId: event.messageId, + blockIndex: event.blockIndex, + textLength: event.text.length, + textPreview: event.text.slice(0, 160), + }; + case DroidMessageType.ToolCall: + case DroidMessageType.ToolCallDelta: + return { + ...base, + toolUseId: event.toolUse.id, + toolName: event.toolUse.name, + }; + case DroidMessageType.ToolResult: + return { + ...base, + toolUseId: event.toolUseId, + toolName: event.toolName, + isError: event.isError, + }; + case DroidMessageType.Error: + return { + ...base, + message: event.message, + errorType: event.errorType, + }; + case DroidMessageType.Result: + return { + ...base, + subtype: event.subtype, + isError: event.isError, + resultLength: event.result.length, + messageCount: event.messages.length, + structuredOutput: event.structuredOutput ?? null, + structuredOutputError: event.structuredOutputError ?? null, + }; + default: + return base; + } +} diff --git a/examples/stress/all.ts b/examples/stress/all.ts new file mode 100644 index 0000000..b578d6b --- /dev/null +++ b/examples/stress/all.ts @@ -0,0 +1,48 @@ +import { isDirectRun } from './_harness.js'; +import { main as cancelInterrupt } from './cancel-interrupt.js'; +import { main as defaultStreaming } from './default-streaming.js'; +import { main as defaultVsPartialConsistency } from './default-vs-partial-consistency.js'; +import { main as errorPaths } from './error-paths.js'; +import { main as multiTurn } from './multi-turn.js'; +import { main as partialStreaming } from './partial-streaming.js'; +import { main as structuredOutput } from './structured-output.js'; +import { main as toolUse } from './tool-use.js'; + +const scripts = [ + ['default-streaming', defaultStreaming], + ['partial-streaming', partialStreaming], + ['default-vs-partial-consistency', defaultVsPartialConsistency], + ['structured-output', structuredOutput], + ['tool-use', toolUse], + ['multi-turn', multiTurn], + ['cancel-interrupt', cancelInterrupt], + ['error-paths', errorPaths], +] as const; + +export async function main(): Promise { + console.log( + [ + 'Droid SDK stress suite', + `DROID_EXEC_PATH=${process.env['DROID_EXEC_PATH'] ?? 'droid-dev'}`, + `DROID_STRESS_MODEL=${process.env['DROID_STRESS_MODEL'] ?? '(default)'}`, + `DROID_STRESS_REPEAT=${process.env['DROID_STRESS_REPEAT'] ?? '1'}`, + `DROID_STRESS_ARTIFACTS=${ + process.env['DROID_STRESS_ARTIFACTS'] ?? '.stress-artifacts' + }`, + ].join('\n') + ); + + for (const [name, script] of scripts) { + console.log(`\n## ${name}`); + await script(); + } + + console.log('\nAll stress scripts completed.'); +} + +if (isDirectRun(import.meta.url)) { + main().catch((error: unknown) => { + console.error(error); + process.exit(1); + }); +} diff --git a/examples/stress/cancel-interrupt.ts b/examples/stress/cancel-interrupt.ts new file mode 100644 index 0000000..a2b4ae7 --- /dev/null +++ b/examples/stress/cancel-interrupt.ts @@ -0,0 +1,92 @@ +import assert from 'node:assert/strict'; + +import { DroidMessageType } from '@factory/droid-sdk'; +import type { DroidStreamEvent } from '@factory/droid-sdk'; + +import { + assertAssistantOutput, + assertDefaultStreamShape, + assertPartialStreamShape, + collectStream, + createStressSession, + isDirectRun, + lastResult, + runStressCase, + writeJsonlArtifact, +} from './_harness.js'; + +export async function main(): Promise { + await runStressCase('abort controller lifecycle', async () => { + const session = await createStressSession(); + const controller = new AbortController(); + const timeout = setTimeout( + () => controller.abort(new Error('stress abort requested')), + 750 + ); + + try { + await assert.rejects(async () => { + for await (const event of session.stream( + 'Write a long, detailed essay about compiler construction.', + { abortSignal: controller.signal, includePartialMessages: true } + )) { + void event; + } + }, /stress abort requested|Operation aborted/); + + const recovered = await collectStream( + 'abort-controller-recovered', + session, + 'Reply with exactly: abort recovery complete' + ); + assertDefaultStreamShape(recovered); + assertAssistantOutput(recovered); + } finally { + clearTimeout(timeout); + await session.close(); + } + }); + + await runStressCase('session interrupt completion', async () => { + const session = await createStressSession(); + const events: DroidStreamEvent[] = []; + let interrupted = false; + + try { + for await (const event of session.stream( + 'Write a long numbered list about operating systems.', + { includePartialMessages: true } + )) { + events.push(event); + if ( + !interrupted && + event.type !== DroidMessageType.Result && + events.length >= 2 + ) { + interrupted = true; + await session.interrupt(); + } + } + + await writeJsonlArtifact('session-interrupt', events); + const result = lastResult(events); + assert.ok(interrupted, 'expected to send an interrupt'); + assert.equal(result.type, DroidMessageType.Result); + assertPartialStreamShape({ + name: 'session-interrupt', + events, + counts: {}, + result, + }); + } finally { + await session.close(); + } + }); +} + +if (isDirectRun(import.meta.url)) { + main().catch((error: unknown) => { + console.error(error); + process.exit(1); + }); +} diff --git a/examples/stress/default-streaming.ts b/examples/stress/default-streaming.ts new file mode 100644 index 0000000..50dbd53 --- /dev/null +++ b/examples/stress/default-streaming.ts @@ -0,0 +1,79 @@ +import assert from 'node:assert/strict'; +import { writeFile } from 'node:fs/promises'; +import { join } from 'node:path'; + +import { AutonomyLevel, DroidMessageType } from '@factory/droid-sdk'; + +import { + assertAssistantOutput, + assertDefaultStreamShape, + assertToolPairing, + collectStream, + createStressSession, + isDirectRun, + runStressCase, + withTempDir, +} from './_harness.js'; + +export async function main(): Promise { + await runStressCase('default streaming text', async () => { + const session = await createStressSession(); + try { + const collected = await collectStream( + 'default-streaming-text', + session, + [ + 'Reply with one short sentence.', + 'Include the exact phrase "default streaming stress".', + ].join(' ') + ); + + assertDefaultStreamShape(collected); + assertAssistantOutput(collected); + } finally { + await session.close(); + } + }); + + await runStressCase('default streaming tool use', async () => { + await withTempDir('default-streaming', async (dir) => { + const file = join(dir, 'input.txt'); + await writeFile(file, 'default stream tool fixture\n'); + + const session = await createStressSession({ + autonomyLevel: AutonomyLevel.Medium, + }); + try { + const collected = await collectStream( + 'default-streaming-tool-use', + session, + [ + `Read the file at ${file}.`, + 'Then reply with exactly: default stream read complete', + ].join('\n') + ); + + assertDefaultStreamShape(collected); + assertAssistantOutput(collected); + assert.ok( + collected.counts[DroidMessageType.ToolCall] ?? 0, + 'expected default stream to emit a tool_call' + ); + assert.ok( + collected.counts[DroidMessageType.ToolResult] ?? 0, + 'expected default stream to emit a tool_result' + ); + assertToolPairing(collected.events, collected.name); + } finally { + await session.close(); + } + }); + }); +} + +if (isDirectRun(import.meta.url)) { + main().catch((error: unknown) => { + console.error(error); + process.exit(1); + }); +} diff --git a/examples/stress/default-vs-partial-consistency.ts b/examples/stress/default-vs-partial-consistency.ts new file mode 100644 index 0000000..e94fb65 --- /dev/null +++ b/examples/stress/default-vs-partial-consistency.ts @@ -0,0 +1,87 @@ +import assert from 'node:assert/strict'; + +import { DroidMessageType } from '@factory/droid-sdk'; + +import { + PARTIAL_ONLY_TYPES, + assertAssistantOutput, + assertDefaultStreamShape, + assertNoInternalMessages, + assertPartialStreamShape, + assertPartialTextConsistency, + collectPartialStream, + collectStream, + createStressSession, + isDirectRun, + runStressCase, +} from './_harness.js'; + +export async function main(): Promise { + await runStressCase('default vs partial consistency', async () => { + const prompt = [ + 'Answer with a concise paragraph about stream aggregation.', + 'Include the phrase "stream consistency stress".', + ].join(' '); + + const defaultSession = await createStressSession(); + const partialSession = await createStressSession(); + try { + const defaultCollected = await collectStream( + 'default-vs-partial-default', + defaultSession, + prompt + ); + const partialCollected = await collectPartialStream( + 'default-vs-partial-partial', + partialSession, + prompt + ); + + assertDefaultStreamShape(defaultCollected); + assertPartialStreamShape(partialCollected); + assertAssistantOutput(defaultCollected); + assertAssistantOutput(partialCollected); + assertPartialTextConsistency(partialCollected); + assertNoInternalMessages( + defaultCollected.result.messages, + 'default result' + ); + assertNoInternalMessages( + partialCollected.result.messages, + 'partial result' + ); + + assert.equal(defaultCollected.result.isError, false); + assert.equal(partialCollected.result.isError, false); + assert.ok( + partialCollected.events.some((event) => + PARTIAL_ONLY_TYPES.has(event.type) + ), + 'expected partial stream to include at least one partial-only event' + ); + assert.equal( + defaultCollected.counts[DroidMessageType.Result], + 1, + 'default stream must emit one result' + ); + assert.equal( + partialCollected.counts[DroidMessageType.Result], + 1, + 'partial stream must emit one result' + ); + assert.ok( + partialCollected.counts[DroidMessageType.Assistant] ?? 0, + 'partial stream should still include full assistant messages' + ); + } finally { + await Promise.all([defaultSession.close(), partialSession.close()]); + } + }); +} + +if (isDirectRun(import.meta.url)) { + main().catch((error: unknown) => { + console.error(error); + process.exit(1); + }); +} diff --git a/examples/stress/error-paths.ts b/examples/stress/error-paths.ts new file mode 100644 index 0000000..2ada6c7 --- /dev/null +++ b/examples/stress/error-paths.ts @@ -0,0 +1,90 @@ +import assert from 'node:assert/strict'; + +import { + ConnectionError, + DroidMessageType, + OutputFormatType, + createSession, +} from '@factory/droid-sdk'; +import type { DroidClientTransport } from '@factory/droid-sdk'; + +import { + assertDefaultStreamShape, + collectStream, + createStressSession, + isDirectRun, + runStressCase, +} from './_harness.js'; + +export async function main(): Promise { + await runStressCase('turn-level structured output error', async () => { + const session = await createStressSession(); + try { + const collected = await collectStream( + 'error-paths-structured-output', + session, + 'Return any object. This schema intentionally cannot be satisfied.', + { + outputFormat: { + type: OutputFormatType.JsonSchema, + schema: { + type: 'object', + properties: { + impossible: { type: 'string', enum: [] }, + }, + required: ['impossible'], + additionalProperties: false, + }, + }, + } + ); + + assertDefaultStreamShape(collected); + assert.equal(collected.result.type, DroidMessageType.Result); + assert.equal( + collected.result.isError, + true, + 'expected turn-level error to be captured on result' + ); + assert.ok( + collected.result.error || + collected.result.structuredOutputError || + collected.result.subtype === 'error_structured_output', + 'expected result to include error details or structured output subtype' + ); + } finally { + await session.close(); + } + }); + + await runStressCase('startup failure throws typed error', async () => { + await assert.rejects( + () => + createSession({ + transport: new FailingStartupTransport(), + }), + (error: unknown) => error instanceof ConnectionError + ); + }); +} + +class FailingStartupTransport implements DroidClientTransport { + readonly isConnected = false; + + send(): void { + throw new ConnectionError('Synthetic startup transport failure'); + } + + onMessage(): void {} + + onError(): void {} + + async close(): Promise {} +} + +if (isDirectRun(import.meta.url)) { + main().catch((error: unknown) => { + console.error(error); + process.exit(1); + }); +} diff --git a/examples/stress/multi-turn.ts b/examples/stress/multi-turn.ts new file mode 100644 index 0000000..a9e0a62 --- /dev/null +++ b/examples/stress/multi-turn.ts @@ -0,0 +1,137 @@ +import assert from 'node:assert/strict'; + +import { DroidMessageType, OutputFormatType } from '@factory/droid-sdk'; + +import { + assertAssistantOutput, + assertDefaultStreamShape, + assertPartialStreamShape, + assertPartialTextConsistency, + collectPartialStream, + collectStream, + createStressSession, + isDirectRun, + runStressCase, +} from './_harness.js'; + +export async function main(): Promise { + await runStressCase('multi-turn aggregation', async () => { + const session = await createStressSession(); + let previousNumTurns = 0; + let previousTurnCount = 0; + + try { + const first = await collectStream( + 'multi-turn-1', + session, + 'Remember this phrase for the next turn: multi turn stress anchor.' + ); + assertDefaultStreamShape(first); + assertAssistantOutput(first); + ({ previousNumTurns, previousTurnCount } = assertMonotonicTurns( + first.result, + previousNumTurns, + previousTurnCount + )); + + const structured = await collectStream( + 'multi-turn-2-structured', + session, + 'Return a structured object with anchor "multi turn stress anchor" and turn 2.', + { + outputFormat: { + type: OutputFormatType.JsonSchema, + schema: { + type: 'object', + properties: { + anchor: { + type: 'string', + enum: ['multi turn stress anchor'], + }, + turn: { type: 'number', enum: [2] }, + }, + required: ['anchor', 'turn'], + additionalProperties: false, + }, + }, + } + ); + assertDefaultStreamShape(structured); + assert.equal(structured.result.isError, false); + assert.ok(structured.result.structuredOutput); + ({ previousNumTurns, previousTurnCount } = assertMonotonicTurns( + structured.result, + previousNumTurns, + previousTurnCount + )); + + const third = await collectPartialStream( + 'multi-turn-3-partial', + session, + 'In one sentence, state the remembered anchor phrase.' + ); + assertPartialStreamShape(third); + assertAssistantOutput(third); + assertPartialTextConsistency(third); + assert.equal( + third.result.structuredOutput ?? null, + null, + 'structured output leaked into later turn' + ); + assert.equal( + third.result.structuredOutputError ?? null, + null, + 'structured output error leaked into later turn' + ); + ({ previousNumTurns, previousTurnCount } = assertMonotonicTurns( + third.result, + previousNumTurns, + previousTurnCount + )); + + for (const index of [4, 5]) { + const collected = await collectStream( + `multi-turn-${index}`, + session, + `Reply with exactly: multi turn stress turn ${index}` + ); + assertDefaultStreamShape(collected); + assertAssistantOutput(collected); + assert.equal(collected.result.type, DroidMessageType.Result); + ({ previousNumTurns, previousTurnCount } = assertMonotonicTurns( + collected.result, + previousNumTurns, + previousTurnCount + )); + } + } finally { + await session.close(); + } + }); +} + +function assertMonotonicTurns( + result: { numTurns: number; turnCount: number }, + previousNumTurns: number, + previousTurnCount: number +): { previousNumTurns: number; previousTurnCount: number } { + assert.ok( + result.numTurns >= previousNumTurns, + `numTurns regressed from ${previousNumTurns} to ${result.numTurns}` + ); + assert.ok( + result.turnCount >= previousTurnCount, + `turnCount regressed from ${previousTurnCount} to ${result.turnCount}` + ); + return { + previousNumTurns: result.numTurns, + previousTurnCount: result.turnCount, + }; +} + +if (isDirectRun(import.meta.url)) { + main().catch((error: unknown) => { + console.error(error); + process.exit(1); + }); +} diff --git a/examples/stress/partial-streaming.ts b/examples/stress/partial-streaming.ts new file mode 100644 index 0000000..7bab3e8 --- /dev/null +++ b/examples/stress/partial-streaming.ts @@ -0,0 +1,47 @@ +import assert from 'node:assert/strict'; + +import { DroidMessageType } from '@factory/droid-sdk'; + +import { + assertAssistantOutput, + assertPartialStreamShape, + assertPartialTextConsistency, + collectPartialStream, + createStressSession, + isDirectRun, + runStressCase, +} from './_harness.js'; + +export async function main(): Promise { + await runStressCase('partial streaming text', async () => { + const session = await createStressSession(); + try { + const collected = await collectPartialStream( + 'partial-streaming-text', + session, + [ + 'Reply in exactly two short sentences.', + 'The first sentence must include "partial streaming stress".', + 'The second sentence must include "delta reconstruction".', + ].join(' ') + ); + + assertPartialStreamShape(collected); + assertAssistantOutput(collected); + assert.ok( + (collected.counts[DroidMessageType.AssistantTextDelta] ?? 0) > 0, + 'expected partial stream to include assistant_text_delta events' + ); + assertPartialTextConsistency(collected); + } finally { + await session.close(); + } + }); +} + +if (isDirectRun(import.meta.url)) { + main().catch((error: unknown) => { + console.error(error); + process.exit(1); + }); +} diff --git a/examples/stress/structured-output.ts b/examples/stress/structured-output.ts new file mode 100644 index 0000000..aa91f5b --- /dev/null +++ b/examples/stress/structured-output.ts @@ -0,0 +1,215 @@ +import assert from 'node:assert/strict'; + +import { DroidMessageType, OutputFormatType } from '@factory/droid-sdk'; +import type { DroidResult, MessageOptions } from '@factory/droid-sdk'; +import { z } from 'zod'; + +import { + assertAssistantOutput, + assertDefaultStreamShape, + collectStream, + createStressSession, + isDirectRun, + runStress, + runStressCase, +} from './_harness.js'; + +type OutputFormat = NonNullable; +type JsonOutput = NonNullable; + +interface StructuredCase { + name: string; + prompt: string; + outputFormat: OutputFormat; + validate: (value: JsonOutput) => void; +} + +const cases: StructuredCase[] = [ + { + name: 'flat-object', + prompt: + 'Return a JSON object with name "Ada", language "TypeScript", and score 7.', + outputFormat: { + type: OutputFormatType.JsonSchema, + schema: { + type: 'object', + properties: { + name: { type: 'string', enum: ['Ada'] }, + language: { type: 'string', enum: ['TypeScript'] }, + score: { type: 'number', minimum: 7, maximum: 7 }, + }, + required: ['name', 'language', 'score'], + additionalProperties: false, + }, + }, + validate: (value) => + z + .object({ + name: z.literal('Ada'), + language: z.literal('TypeScript'), + score: z.literal(7), + }) + .parse(value), + }, + { + name: 'nested-array-enum', + prompt: + 'Return a JSON object with summary.status "ok" and two checks named stream and result, both passed true.', + outputFormat: { + type: OutputFormatType.JsonSchema, + schema: { + type: 'object', + properties: { + summary: { + type: 'object', + properties: { + status: { type: 'string', enum: ['ok'] }, + }, + required: ['status'], + additionalProperties: false, + }, + checks: { + type: 'array', + minItems: 2, + maxItems: 2, + items: { + type: 'object', + properties: { + name: { type: 'string', enum: ['stream', 'result'] }, + passed: { type: 'boolean', enum: [true] }, + }, + required: ['name', 'passed'], + additionalProperties: false, + }, + }, + }, + required: ['summary', 'checks'], + additionalProperties: false, + }, + }, + validate: (value) => + z + .object({ + summary: z.object({ status: z.literal('ok') }), + checks: z + .array( + z.object({ + name: z.enum(['stream', 'result']), + passed: z.literal(true), + }) + ) + .length(2), + }) + .parse(value), + }, + { + name: 'optional-pattern-bounds', + prompt: + 'Return a JSON object with code "stress-123", count 3, tags ["sdk"], and omit notes.', + outputFormat: { + type: OutputFormatType.JsonSchema, + schema: { + type: 'object', + properties: { + code: { type: 'string', pattern: '^stress-[0-9]{3}$' }, + count: { type: 'number', minimum: 1, maximum: 3 }, + tags: { + type: 'array', + minItems: 1, + items: { type: 'string', enum: ['sdk'] }, + }, + notes: { type: 'string' }, + }, + required: ['code', 'count', 'tags'], + additionalProperties: false, + }, + }, + validate: (value) => + z + .object({ + code: z.string().regex(/^stress-[0-9]{3}$/), + count: z.number().min(1).max(3), + tags: z.array(z.literal('sdk')).min(1), + notes: z.string().optional(), + }) + .parse(value), + }, +]; + +export async function main(): Promise { + await runStressCase('structured output run()', async () => { + for (const stressCase of cases) { + const result = await runStress(stressCase.prompt, { + outputFormat: stressCase.outputFormat, + }); + assertStructuredSuccess(result, stressCase); + } + }); + + await runStressCase('structured output streaming', async () => { + const session = await createStressSession(); + try { + for (const stressCase of cases) { + const collected = await collectStream( + `structured-output-${stressCase.name}`, + session, + stressCase.prompt, + { outputFormat: stressCase.outputFormat } + ); + assertDefaultStreamShape(collected); + assertAssistantOutput(collected); + assertStructuredSuccess(collected.result, stressCase); + } + } finally { + await session.close(); + } + }); + + await runStressCase('structured output invalid schema', async () => { + const result = await runStress( + 'Return any object. This schema intentionally cannot be satisfied.', + { + outputFormat: { + type: OutputFormatType.JsonSchema, + schema: { + type: 'object', + properties: { + impossible: { type: 'string', enum: [] }, + }, + required: ['impossible'], + additionalProperties: false, + }, + }, + } + ); + + assert.equal( + result.isError, + true, + 'expected invalid structured output to mark result as an error' + ); + assert.equal(result.structuredOutput ?? null, null); + assert.ok( + result.structuredOutputError, + 'expected invalid structured output error details' + ); + }); +} + +function assertStructuredSuccess( + result: DroidResult, + stressCase: StructuredCase +): void { + assert.equal(result.type, DroidMessageType.Result); + assert.equal(result.isError, false, `${stressCase.name}: unexpected error`); + assert.equal(result.structuredOutputError ?? null, null); + assert.ok(result.structuredOutput, `${stressCase.name}: missing output`); + stressCase.validate(result.structuredOutput as JsonOutput); +} + +if (isDirectRun(import.meta.url)) { + main().catch((error: unknown) => { + console.error(error); + process.exit(1); + }); +} diff --git a/examples/stress/tool-use.ts b/examples/stress/tool-use.ts new file mode 100644 index 0000000..fc4de0e --- /dev/null +++ b/examples/stress/tool-use.ts @@ -0,0 +1,101 @@ +import assert from 'node:assert/strict'; +import { writeFile } from 'node:fs/promises'; +import { join } from 'node:path'; + +import { AutonomyLevel, DroidMessageType } from '@factory/droid-sdk'; + +import { + assertAssistantOutput, + assertDefaultStreamShape, + assertPartialStreamShape, + assertToolPairing, + collectPartialStream, + collectStream, + createStressSession, + isDirectRun, + runStressCase, + withTempDir, +} from './_harness.js'; + +export async function main(): Promise { + await runStressCase('tool use preservation', async () => { + await withTempDir('tool-use', async (dir) => { + const defaultFile = join(dir, 'default.txt'); + const partialFile = join(dir, 'partial.txt'); + await writeFile(defaultFile, 'default tool use fixture\n'); + await writeFile(partialFile, 'partial tool use fixture\n'); + + const defaultSession = await createStressSession({ + autonomyLevel: AutonomyLevel.Medium, + }); + const partialSession = await createStressSession({ + autonomyLevel: AutonomyLevel.Medium, + }); + + try { + const defaultCollected = await collectStream( + 'tool-use-default', + defaultSession, + toolPrompt(defaultFile, 'default') + ); + const partialCollected = await collectPartialStream( + 'tool-use-partial', + partialSession, + toolPrompt(partialFile, 'partial') + ); + + assertDefaultStreamShape(defaultCollected); + assertPartialStreamShape(partialCollected); + assertAssistantOutput(defaultCollected); + assertAssistantOutput(partialCollected); + assertToolPairing(defaultCollected.events, defaultCollected.name); + assertToolPairing(partialCollected.events, partialCollected.name); + + assert.ok( + defaultCollected.counts[DroidMessageType.ToolCall] ?? 0, + 'default stream must expose completed tool calls' + ); + assert.ok( + defaultCollected.counts[DroidMessageType.ToolResult] ?? 0, + 'default stream must expose tool results' + ); + assert.ok( + partialCollected.counts[DroidMessageType.ToolCall] ?? 0, + 'partial stream must include completed tool calls' + ); + assert.ok( + partialCollected.counts[DroidMessageType.ToolCallDelta] ?? 0, + 'partial stream must include tool_call_delta' + ); + assert.ok( + partialCollected.result.messages.some( + (message) => message.type === DroidMessageType.ToolCall + ), + 'result.messages must preserve tool_call' + ); + assert.ok( + partialCollected.result.messages.some( + (message) => message.type === DroidMessageType.ToolResult + ), + 'result.messages must preserve tool_result' + ); + } finally { + await Promise.all([defaultSession.close(), partialSession.close()]); + } + }); + }); +} + +function toolPrompt(file: string, mode: string): string { + return [ + `Use a file-reading tool to read ${file}.`, + `Then respond with exactly: ${mode} tool use stress complete`, + ].join('\n'); +} + +if (isDirectRun(import.meta.url)) { + main().catch((error: unknown) => { + console.error(error); + process.exit(1); + }); +} diff --git a/examples/structured-output-stress-test.ts b/examples/structured-output-stress-test.ts index 4f2bdde..30b9956 100644 --- a/examples/structured-output-stress-test.ts +++ b/examples/structured-output-stress-test.ts @@ -1,8 +1,9 @@ /** * Structured output stress test. * - * Runs several structured-output schemas against one or more Droid models and - * verifies both `run(...)` results and streaming `TurnComplete` metadata. + * Runs several structured-output schemas against one or more Droid models, + * verifies both `run(...)` results and streaming result metadata, and + * stress-tests tool use before structured output. * * Usage: * npx tsx examples/structured-output-stress-test.ts @@ -13,6 +14,7 @@ import assert from 'node:assert/strict'; import { + AutonomyLevel, DroidMessageType, OutputFormatType, createSession, @@ -65,6 +67,15 @@ const MetricsSchema = z.object({ ), }); +const PackageSchema = z.object({ + packageName: z.literal('@factory/droid-sdk'), + tmpDir: z.string().startsWith('/tmp/droid-sdk-structured-output-stress-'), + createdFile: z.string().endsWith('/notes.txt'), + finalContent: z.string().includes('edited by structured output stress'), + filesRead: z.array(z.enum(['package.json', 'notes.txt'])), + toolsUsed: z.array(z.enum(['read', 'write', 'edit'])), +}); + const stressCases: StressCase[] = [ { name: 'flat-literals', @@ -195,17 +206,16 @@ function assertStructuredResult( messages: result.messages .filter( (message) => - message.type === DroidMessageType.CreateMessage || + message.type === DroidMessageType.Assistant || message.type === DroidMessageType.Error || - message.type === DroidMessageType.StructuredOutput || - message.type === DroidMessageType.TurnComplete + message.type === DroidMessageType.Result ) .map((message) => { - if (message.type !== DroidMessageType.CreateMessage) return message; + if (message.type !== DroidMessageType.Assistant) return message; return { type: message.type, - role: message.role, - content: message.content, + role: message.message.role, + content: message.message.content, }; }), }, @@ -235,6 +245,27 @@ function findMessage( ); } +function findNormalToolUse( + messages: DroidMessage[] +): Extract | undefined { + return messages.find( + (message): message is Extract => + message.type === DroidMessageType.ToolCall && + message.toolUse.name !== 'StructuredOutput' + ); +} + +function findToolUses( + messages: DroidMessage[], + matches: (toolName: string) => boolean +): Array> { + return messages.filter( + (message): message is Extract => + message.type === DroidMessageType.ToolCall && + matches(message.toolUse.name) + ); +} + async function runCase( modelId: string | undefined, stressCase: StressCase @@ -248,12 +279,6 @@ async function runCase( assertStructuredResult(result, stressCase); - const notification = findMessage( - result.messages, - DroidMessageType.StructuredOutput - ); - assert.ok(notification, `${stressCase.name}: expected structured_output`); - console.log( ` ✓ ${stressCase.name}: ${JSON.stringify(result.structuredOutput)}` ); @@ -269,40 +294,222 @@ async function runStreamingCase(modelId: string | undefined): Promise { try { const messages: DroidMessage[] = []; - for await (const message of ( - await session.send(stressCase.prompt, { - outputFormat: stressCase.outputFormat, - }) - ).stream()) { + for await (const message of session.stream(stressCase.prompt, { + outputFormat: stressCase.outputFormat, + })) { messages.push(message); } - const structured = findMessage(messages, DroidMessageType.StructuredOutput); - const complete = findMessage(messages, DroidMessageType.TurnComplete); + const result = findMessage(messages, DroidMessageType.Result); + + assert.ok(result, 'streaming: expected result message'); + assert.equal(result.structuredOutputError, null); + assert.ok(result.structuredOutput); + stressCase.parse(result.structuredOutput as JsonObject); + + console.log(' ✓ streaming emits structured output on result'); + } finally { + await session.close(); + } +} + +async function runToolUseCase(modelId: string | undefined): Promise { + const tmpDir = `/tmp/droid-sdk-structured-output-stress-${process.pid}-${Date.now()}`; + const tmpFile = `${tmpDir}/notes.txt`; + const outputFormat: OutputFormat = { + type: OutputFormatType.JsonSchema, + schema: { + type: 'object', + properties: { + packageName: { type: 'string', enum: ['@factory/droid-sdk'] }, + tmpDir: { type: 'string', enum: [tmpDir] }, + createdFile: { type: 'string', enum: [tmpFile] }, + finalContent: { + type: 'string', + enum: [ + [ + 'created by structured output stress', + 'edited by structured output stress', + ].join('\n'), + ], + }, + filesRead: { + type: 'array', + minItems: 2, + items: { + type: 'string', + enum: ['package.json', 'notes.txt'], + }, + }, + toolsUsed: { + type: 'array', + minItems: 3, + items: { + type: 'string', + enum: ['read', 'write', 'edit'], + }, + }, + }, + required: [ + 'packageName', + 'tmpDir', + 'createdFile', + 'finalContent', + 'filesRead', + 'toolsUsed', + ], + additionalProperties: false, + }, + }; + const session = await createSession({ + execPath: process.env['DROID_EXEC_PATH'] ?? 'droid', + autonomyLevel: AutonomyLevel.Medium, + cwd: process.cwd(), + ...(modelId !== undefined && { modelId }), + }); - assert.ok(structured, 'streaming: expected structured_output message'); - assert.ok(complete, 'streaming: expected turn_complete message'); - assert.deepEqual( - complete.structuredOutput, - structured.structuredOutput, - 'streaming: TurnComplete should carry structured output' + try { + const messages: DroidMessage[] = []; + for await (const message of session.stream( + [ + 'You must use multiple tools before producing structured output.', + 'Use the Read tool to read package.json in the current working directory.', + `Create the directory ${tmpDir}.`, + `Write ${tmpFile} with exactly this first line: created by structured output stress`, + `Then use an edit tool to modify ${tmpFile} so its full content is exactly:`, + 'created by structured output stress', + 'edited by structured output stress', + `Use the Read tool to read ${tmpFile} after editing it.`, + 'Only after all tool calls are done, return the structured object.', + 'Set packageName from package.json name.', + `Set tmpDir to ${tmpDir} and createdFile to ${tmpFile}.`, + 'Set filesRead to include package.json and notes.txt.', + 'Set toolsUsed to include read, write, and edit.', + ].join('\n'), + { outputFormat } + )) { + messages.push(message); + } + + const normalToolUse = findNormalToolUse(messages); + const readToolUses = findToolUses( + messages, + (toolName) => toolName.toLowerCase() === 'read' + ); + const writeToolUses = findToolUses(messages, (toolName) => { + const normalized = toolName.toLowerCase(); + return ( + normalized.includes('write') || + normalized.includes('create') || + normalized.includes('execute') + ); + }); + const editToolUses = findToolUses(messages, (toolName) => { + const normalized = toolName.toLowerCase(); + return normalized.includes('edit') || normalized.includes('patch'); + }); + const result = findMessage(messages, DroidMessageType.Result); + const diagnostic = JSON.stringify( + messages + .filter( + (message) => + message.type === DroidMessageType.ToolCall || + message.type === DroidMessageType.ToolResult || + message.type === DroidMessageType.Result || + message.type === DroidMessageType.Error + ) + .map((message) => + message.type === DroidMessageType.ToolCall + ? { + type: message.type, + toolName: message.toolUse.name, + toolInput: message.toolUse.input, + } + : message + ), + null, + 2 + ); + + assert.ok( + normalToolUse, + `tool-use: expected a normal tool call before structured output\n${diagnostic}` + ); + assert.ok( + readToolUses.length >= 2, + `tool-use: expected at least 2 Read tool calls, saw ${readToolUses.length}\n${diagnostic}` + ); + assert.ok( + writeToolUses.length >= 1, + `tool-use: expected at least 1 write/create tool call\n${diagnostic}` + ); + assert.ok( + editToolUses.length >= 1, + `tool-use: expected at least 1 edit tool call\n${diagnostic}` + ); + assert.ok(result, 'tool-use: expected result message'); + assert.equal( + result.structuredOutputError, + null, + `tool-use: expected no structured output error\n${diagnostic}` ); - assert.equal(complete.structuredOutputError, null); - assert.ok(complete.structuredOutput); - stressCase.parse(complete.structuredOutput); + assert.ok(result.structuredOutput); + PackageSchema.parse(result.structuredOutput); - console.log(' ✓ streaming TurnComplete carries structured output'); + console.log( + ` ✓ read/write/edit before structured output: ${readToolUses.length}/${writeToolUses.length}/${editToolUses.length} tool calls` + ); } finally { await session.close(); } } +interface Failure { + model: string; + step: string; + error: unknown; +} + +const failures: Failure[] = []; + +async function runStep( + modelId: string | undefined, + step: string, + callback: () => Promise +): Promise { + try { + await callback(); + } catch (error) { + failures.push({ model: labelModel(modelId), step, error }); + console.error( + ` ✗ ${step}: ${error instanceof Error ? error.message : String(error)}` + ); + } +} + for (const modelId of parseModels()) { console.log(`\n=== Testing ${labelModel(modelId)} ===`); for (const stressCase of stressCases) { - await runCase(modelId, stressCase); + await runStep(modelId, stressCase.name, () => runCase(modelId, stressCase)); } - await runStreamingCase(modelId); + await runStep(modelId, 'streaming', () => runStreamingCase(modelId)); + await runStep(modelId, 'read/write/edit tool-use', () => + runToolUseCase(modelId) + ); } -console.log('\nStructured output stress test passed'); +if (failures.length > 0) { + console.error('\nStructured output stress test failures:'); + for (const failure of failures) { + console.error( + `- ${failure.model} / ${failure.step}: ${ + failure.error instanceof Error + ? (failure.error.stack ?? failure.error.message) + : String(failure.error) + }` + ); + } + process.exitCode = 1; +} else { + console.log('\nStructured output stress test passed'); +} diff --git a/examples/structured-output.ts b/examples/structured-output.ts index 8e854cc..a1eb60e 100644 --- a/examples/structured-output.ts +++ b/examples/structured-output.ts @@ -13,6 +13,8 @@ import assert from 'node:assert/strict'; import { OutputFormatType, run } from '@factory/droid-sdk'; +type FavoriteNumber = { favoriteNumber: number }; + async function main(): Promise { const prompt = process.argv.slice(2).join(' ') || @@ -40,11 +42,13 @@ async function main(): Promise { outputFormat, }); - assert.ok(result.structuredOutput, 'Expected structuredOutput to be set'); - assert.equal(typeof result.structuredOutput['favoriteNumber'], 'number'); + const structuredOutput = result.structuredOutput as FavoriteNumber | null; + + assert.ok(structuredOutput, 'Expected structuredOutput to be set'); + assert.equal(typeof structuredOutput.favoriteNumber, 'number'); console.log('=== Structured output ==='); - console.log(JSON.stringify(result.structuredOutput, null, 2)); + console.log(JSON.stringify(structuredOutput, null, 2)); console.log('\nStructured output example passed'); } diff --git a/examples/test-compact.ts b/examples/test-compact.ts index 09e71fb..d5d7b16 100644 --- a/examples/test-compact.ts +++ b/examples/test-compact.ts @@ -27,11 +27,13 @@ async function main(): Promise { console.log(`=== Turn ${i + 1} ===`); console.log(`Prompt: "${prompts[i]}"\n`); - for await (const msg of session.stream(prompts[i])) { + for await (const msg of session.stream(prompts[i], { + includePartialMessages: true, + })) { if (msg.type === DroidMessageType.AssistantTextDelta) { process.stdout.write(msg.text); } - if (msg.type === DroidMessageType.TurnComplete) { + if (msg.type === DroidMessageType.Result) { console.log('\n'); } } diff --git a/src/helpers.ts b/src/helpers.ts index 17991b7..9642de3 100644 --- a/src/helpers.ts +++ b/src/helpers.ts @@ -8,6 +8,7 @@ import type { DroidMcpServerConfig } from './mcp.js'; import type { InitializeSessionRequestParams, McpServerConfig, + OutputFormat, SessionTag, } from './schemas/client.js'; import type { @@ -23,9 +24,11 @@ import type { ToolSelectionOverrides } from './schemas/shared.js'; import { convertNotificationToStreamMessage, DroidMessageType, + isDefaultStreamMessage, + isInternalMessage, StreamStateTracker, } from './stream.js'; -import type { DroidMessage } from './stream.js'; +import type { DroidStreamEvent, InternalDroidMessage } from './stream.js'; import { ProcessTransport } from './transport.js'; import type { DroidClientTransport, ProcessTransportOptions } from './types.js'; @@ -52,14 +55,27 @@ export function extractInnerNotification( } export class MessageBridge { - private readonly _queue: DroidMessage[] = []; + private readonly _queue: DroidStreamEvent[] = []; private readonly _onDone: (() => void) | undefined; private _resolveWaiting: (() => void) | null = null; private _done = false; - private readonly _stateTracker = new StreamStateTracker(); + private readonly _stateTracker: StreamStateTracker; - constructor(onDone?: () => void) { + constructor( + onDone?: () => void, + private readonly _options: { + includePartialMessages?: boolean; + sessionId?: string; + startedAt?: number; + outputFormat?: OutputFormat; + } = {} + ) { this._onDone = onDone; + this._stateTracker = new StreamStateTracker({ + sessionId: _options.sessionId, + startedAt: _options.startedAt, + hasOutputFormat: _options.outputFormat !== undefined, + }); } readonly notificationHandler = ( @@ -75,11 +91,15 @@ export class MessageBridge { for (const msg of messages) { const { message, additional } = this._stateTracker.processMessage(msg); - this._enqueue(message); + if (message && this._shouldYield(message)) { + this._enqueue(message); + } for (const extra of additional) { - this._enqueue(extra); - if (extra.type === DroidMessageType.TurnComplete) { + if (this._shouldYield(extra)) { + this._enqueue(extra); + } + if (extra.type === DroidMessageType.Result) { this._signalDone(); } } @@ -90,13 +110,13 @@ export class MessageBridge { this._signalDone(); } - async *messages(): AsyncGenerator { + async *messages(): AsyncGenerator { while (true) { while (this._queue.length > 0) { const msg = this._queue.shift()!; yield msg; - if (msg.type === DroidMessageType.TurnComplete) { + if (msg.type === DroidMessageType.Result) { return; } } @@ -111,7 +131,18 @@ export class MessageBridge { } } - private _enqueue(msg: DroidMessage): void { + private _shouldYield( + message: InternalDroidMessage + ): message is DroidStreamEvent { + if (isInternalMessage(message)) { + return false; + } + return this._options.includePartialMessages + ? true + : isDefaultStreamMessage(message); + } + + private _enqueue(msg: DroidStreamEvent): void { this._queue.push(msg); if (this._resolveWaiting) { const resolve = this._resolveWaiting; diff --git a/src/index.ts b/src/index.ts index 7835042..efe11ee 100644 --- a/src/index.ts +++ b/src/index.ts @@ -28,8 +28,19 @@ export { StreamStateTracker, } from './stream.js'; export type { + DroidAssistantMessage, + DroidUserMessage, + DroidToolCallMessage, + DroidToolResultMessage, + DroidErrorMessage, + DroidResultMessage, + DroidStreamMessage, + DroidStreamEvent, AssistantTextDelta, + AssistantTextComplete, ThinkingTextDelta, + ThinkingTextComplete, + ToolCallDelta, ToolUse, ToolResult, ToolProgress, @@ -51,7 +62,6 @@ export type { StructuredOutput, StructuredOutputFields, ErrorEvent, - TurnComplete, DroidMessage, } from './stream.js'; diff --git a/src/run.ts b/src/run.ts index a8578e8..b875f78 100644 --- a/src/run.ts +++ b/src/run.ts @@ -1,11 +1,10 @@ import { - aggregateMessages, createSession, type CreateSessionOptions, type DroidResult, type MessageOptions, } from './session.js'; -import type { DroidMessage } from './stream.js'; +import { DroidMessageType } from './stream.js'; export interface RunOptions extends CreateSessionOptions, MessageOptions {} @@ -16,12 +15,16 @@ export async function run( const session = await createSession(options); try { - const startedAt = Date.now(); - const messages: DroidMessage[] = []; - for await (const msg of session.stream(prompt, options)) { - messages.push(msg); + for await (const msg of session.stream(prompt, { + ...options, + includePartialMessages: false, + })) { + if (msg.type === DroidMessageType.Result) { + return msg; + } } - return aggregateMessages(session.sessionId, messages, startedAt, options); + + throw new Error('Stream completed without a result message'); } finally { await session.close(); } diff --git a/src/schemas/enums.ts b/src/schemas/enums.ts index 7e5bb5e..ca595a0 100644 --- a/src/schemas/enums.ts +++ b/src/schemas/enums.ts @@ -56,7 +56,10 @@ export enum SessionNotificationType { SESSION_TITLE_UPDATED = 'session_title_updated', MCP_STATUS_CHANGED = 'mcp_status_changed', ASSISTANT_TEXT_DELTA = 'assistant_text_delta', + ASSISTANT_TEXT_COMPLETE = 'assistant_text_complete', THINKING_TEXT_DELTA = 'thinking_text_delta', + THINKING_TEXT_COMPLETE = 'thinking_text_complete', + TOOL_CALL = 'tool_call', SESSION_TOKEN_USAGE_CHANGED = 'session_token_usage_changed', MISSION_STATE_CHANGED = 'mission_state_changed', MISSION_FEATURES_CHANGED = 'mission_features_changed', diff --git a/src/schemas/index.ts b/src/schemas/index.ts index 0bd2df8..e06cbdd 100644 --- a/src/schemas/index.ts +++ b/src/schemas/index.ts @@ -510,9 +510,12 @@ export { SettingsUpdatedNotificationSchema, SettingsUpdatedPayloadSchema, StartMissionRunConfirmationDetailsSchema, + AssistantTextCompleteNotificationSchema, StructuredOutputErrorSchema, StructuredOutputNotificationSchema, + ThinkingTextCompleteNotificationSchema, ThinkingTextDeltaNotificationSchema, + ToolCallNotificationSchema, ToolConfirmationDetailsSchema, ToolConfirmationInfoSchema, ToolProgressUpdateNotificationSchema, @@ -530,6 +533,7 @@ export type { AskUserResponse, AskUserResult, ApplyPatchToolConfirmationDetails, + AssistantTextCompleteNotification, AssistantTextDeltaNotification, CliRequestOrNotification, CreateMessageNotification, @@ -568,7 +572,9 @@ export type { StartMissionRunConfirmationDetails, StructuredOutputError, StructuredOutputNotification, + ThinkingTextCompleteNotification, ThinkingTextDeltaNotification, + ToolCallNotification, ToolConfirmationDetails, ToolConfirmationInfo, ToolProgressUpdate, diff --git a/src/schemas/server.ts b/src/schemas/server.ts index 1284cef..7cfeb3b 100644 --- a/src/schemas/server.ts +++ b/src/schemas/server.ts @@ -232,6 +232,19 @@ export type AssistantTextDeltaNotification = z.infer< typeof AssistantTextDeltaNotificationSchema >; +/** Assistant text complete notification (streaming block finished). */ +export const AssistantTextCompleteNotificationSchema = z + .object({ + type: z.literal(SessionNotificationType.ASSISTANT_TEXT_COMPLETE), + messageId: z.string(), + blockIndex: z.number(), + }) + .passthrough(); + +export type AssistantTextCompleteNotification = z.infer< + typeof AssistantTextCompleteNotificationSchema +>; + /** Thinking text delta notification (streaming thinking token). */ export const ThinkingTextDeltaNotificationSchema = z .object({ @@ -246,6 +259,30 @@ export type ThinkingTextDeltaNotification = z.infer< typeof ThinkingTextDeltaNotificationSchema >; +/** Thinking text complete notification (streaming thinking block finished). */ +export const ThinkingTextCompleteNotificationSchema = z + .object({ + type: z.literal(SessionNotificationType.THINKING_TEXT_COMPLETE), + messageId: z.string(), + blockIndex: z.number(), + durationMs: z.number().optional(), + }) + .passthrough(); + +export type ThinkingTextCompleteNotification = z.infer< + typeof ThinkingTextCompleteNotificationSchema +>; + +/** Tool call partial notification. */ +export const ToolCallNotificationSchema = z + .object({ + type: z.literal(SessionNotificationType.TOOL_CALL), + toolUse: ToolUseBlockSchema, + }) + .passthrough(); + +export type ToolCallNotification = z.infer; + /** Session token usage changed notification. */ export const SessionTokenUsageChangedNotificationSchema = z .object({ @@ -398,7 +435,10 @@ export const SessionNotificationSchemaList = [ SessionTitleUpdatedNotificationSchema, McpStatusChangedNotificationSchema, AssistantTextDeltaNotificationSchema, + AssistantTextCompleteNotificationSchema, ThinkingTextDeltaNotificationSchema, + ThinkingTextCompleteNotificationSchema, + ToolCallNotificationSchema, SessionTokenUsageChangedNotificationSchema, MissionStateChangedNotificationSchema, MissionFeaturesChangedNotificationSchema, diff --git a/src/session.ts b/src/session.ts index b092c9b..fdf1db0 100644 --- a/src/session.ts +++ b/src/session.ts @@ -48,35 +48,11 @@ import type { } from './schemas/client.js'; import { DroidInteractionMode } from './schemas/enums.js'; import type { Base64ImageSource, DocumentSource } from './schemas/messages.js'; -import { FactoryDroidMessageRole } from './schemas/messages.js'; -import type { StructuredOutputError } from './schemas/server.js'; -import { JsonObjectSchema, type JsonObject } from './schemas/shared.js'; import { DroidMessageType } from './stream.js'; -import type { DroidMessage, ErrorEvent, TokenUsageUpdate } from './stream.js'; +import type { DroidResultMessage, DroidStreamEvent } from './stream.js'; /** Aggregated result from a one-shot {@link run} call. */ -export interface DroidResult { - /** Session that produced this result. */ - sessionId: string; - /** Concatenated assistant text deltas emitted during the turn. */ - text: string; - /** All stream messages emitted during the turn. */ - messages: DroidMessage[]; - /** Latest token usage update for the turn, when reported by Droid. */ - tokenUsage: TokenUsageUpdate | null; - /** Wall-clock duration spent consuming the turn. */ - durationMs: number; - /** Number of completed turns observed while consuming the stream. */ - turnCount: number; - /** First error event emitted during the turn, if any. */ - error: ErrorEvent | null; - /** Structured JSON object emitted by the turn, when requested. */ - structuredOutput: JsonObject | null; - /** Backend structured output validation error, when reported. */ - structuredOutputError: StructuredOutputError | null; - /** True when the stream completed without an error event. */ - success: boolean; -} +export type DroidResult = DroidResultMessage; export interface CreateSessionOptions extends SessionInitOptions, HandlerOptions, TransportCreationOptions { @@ -101,6 +77,7 @@ export interface MessageOptions { images?: Base64ImageSource[]; files?: DocumentSource[]; outputFormat?: OutputFormat; + includePartialMessages?: boolean; abortSignal?: AbortSignal; } @@ -120,105 +97,35 @@ function throwIfAborted(signal: AbortSignal | undefined): void { } } -function parseJsonObject(text: string): JsonObject | null { - try { - const parsed: unknown = JSON.parse(text); - const result = JsonObjectSchema.safeParse(parsed); - return result.success ? result.data : null; - } catch { - return null; - } -} - -function extractAssistantText(message: DroidMessage): string { - if (message.type !== DroidMessageType.CreateMessage) { - return ''; - } - - if (message.role !== FactoryDroidMessageRole.Assistant) { - return ''; - } - - return message.content - .filter((block) => block.type === 'text') - .map((block) => block.text) - .join(''); -} - export function aggregateMessages( sessionId: string, - messages: DroidMessage[], + messages: DroidStreamEvent[], startedAt: number, - options?: MessageOptions + _options?: MessageOptions ): DroidResult { - let fullText = ''; - let lastTokenUsage: TokenUsageUpdate | null = null; - let firstError: ErrorEvent | null = null; - let structuredOutput: JsonObject | null = null; - let structuredOutputError: StructuredOutputError | null = null; - let receivedStructuredOutputNotification = false; - let finalAssistantText = ''; - let turnCount = 0; - - for (const msg of messages) { - if (msg.type === DroidMessageType.AssistantTextDelta) { - fullText += msg.text; - } - - const assistantText = extractAssistantText(msg); - if (assistantText) { - finalAssistantText = assistantText; - if (options?.outputFormat && fullText.length === 0) { - fullText = assistantText; - } - } - - if (msg.type === DroidMessageType.TokenUsageUpdate) { - lastTokenUsage = msg; - } - - if (msg.type === DroidMessageType.Error && firstError === null) { - firstError = msg; - } - - if (msg.type === DroidMessageType.StructuredOutput) { - receivedStructuredOutputNotification = true; - structuredOutput = msg.structuredOutput; - structuredOutputError = msg.structuredOutputError; - } - - if (msg.type === DroidMessageType.TurnComplete) { - turnCount++; - if (msg.tokenUsage) { - lastTokenUsage = msg.tokenUsage; - } - if (!receivedStructuredOutputNotification) { - structuredOutput = msg.structuredOutput; - structuredOutputError = msg.structuredOutputError; - receivedStructuredOutputNotification = - msg.structuredOutput !== null || msg.structuredOutputError !== null; - } - } - } - - if (options?.outputFormat && !receivedStructuredOutputNotification) { - const textToParse = finalAssistantText || fullText; - if (textToParse) { - structuredOutput = parseJsonObject(textToParse); - } + const result = messages.find( + (message): message is DroidResultMessage => + message.type === DroidMessageType.Result + ); + if (result) { + return result; } return { + type: DroidMessageType.Result, + subtype: 'error_during_execution', sessionId, - text: fullText, - messages, - tokenUsage: lastTokenUsage, durationMs: Date.now() - startedAt, - turnCount, - error: firstError, - structuredOutput, - structuredOutputError, - success: firstError === null, + isError: true, + numTurns: 0, + result: '', + tokenUsage: null, + errors: ['Stream completed without a result message'], + messages, + text: '', + turnCount: 0, + success: false, + error: null, }; } @@ -260,15 +167,30 @@ export class DroidSession { this._cleanupCallbacks.push(cleanup); } - /** Yields {@link DroidMessage} events until `turn_complete`. */ + /** Yields message-level events until the final `result` message. */ + stream( + prompt: string, + options?: MessageOptions & { includePartialMessages?: false } + ): AsyncGenerator; + /** Yields message-level events plus partial chunks until the final `result` message. */ + stream( + prompt: string, + options: MessageOptions & { includePartialMessages: true } + ): AsyncGenerator; async *stream( prompt: string, options?: MessageOptions - ): AsyncGenerator { + ): AsyncGenerator { this._ensureNotClosed(); throwIfAborted(options?.abortSignal); - const bridge = new MessageBridge(); + const startedAt = Date.now(); + const bridge = new MessageBridge(undefined, { + includePartialMessages: options?.includePartialMessages, + sessionId: this._sessionId, + startedAt, + outputFormat: options?.outputFormat, + }); const unsubscribe = this._client.onNotification(bridge.notificationHandler); let resolveAbort: () => void = () => {}; const abortPromise = new Promise((resolve) => { diff --git a/src/stream.ts b/src/stream.ts index a8e0e8d..eecd3a4 100644 --- a/src/stream.ts +++ b/src/stream.ts @@ -7,6 +7,11 @@ import { ToolConfirmationOutcome, } from './schemas/enums.js'; import type { McpServerStatusInfo, McpStatusSummary } from './schemas/mcp.js'; +import { + FactoryDroidMessageRole, + type FactoryDroidMessage, + type ToolUseBlock, +} from './schemas/messages.js'; import type { MissionFeature, ProgressLogEntry } from './schemas/mission.js'; import { SessionNotificationPayloadSchema, @@ -17,17 +22,25 @@ import { type StructuredOutputError as ServerStructuredOutputError, type ToolProgressUpdate, } from './schemas/server.js'; -import type { JsonObject, JsonValue } from './schemas/shared.js'; +import { + JsonObjectSchema, + type JsonObject, + type JsonValue, +} from './schemas/shared.js'; export const DroidMessageType = { + Assistant: 'assistant', + User: 'user', + ToolCall: 'tool_call', AssistantTextDelta: 'assistant_text_delta', + AssistantTextComplete: 'assistant_text_complete', ThinkingTextDelta: 'thinking_text_delta', - ToolUse: 'tool_use', + ThinkingTextComplete: 'thinking_text_complete', + ToolCallDelta: 'tool_call_delta', ToolResult: 'tool_result', ToolProgress: 'tool_progress', WorkingStateChanged: 'working_state_changed', TokenUsageUpdate: 'token_usage_update', - CreateMessage: 'create_message', PermissionResolved: 'permission_resolved', SettingsUpdated: 'settings_updated', SessionTitleUpdated: 'session_title_updated', @@ -40,9 +53,8 @@ export const DroidMessageType = { MissionWorkerCompleted: 'mission_worker_completed', McpAuthRequired: 'mcp_auth_required', McpAuthCompleted: 'mcp_auth_completed', - StructuredOutput: 'structured_output', Error: 'error', - TurnComplete: 'turn_complete', + Result: 'result', } as const; export interface AssistantTextDelta { @@ -52,6 +64,12 @@ export interface AssistantTextDelta { readonly text: string; } +export interface AssistantTextComplete { + readonly type: 'assistant_text_complete'; + readonly messageId: string; + readonly blockIndex: number; +} + export interface ThinkingTextDelta { readonly type: 'thinking_text_delta'; readonly messageId: string; @@ -59,6 +77,18 @@ export interface ThinkingTextDelta { readonly text: string; } +export interface ThinkingTextComplete { + readonly type: 'thinking_text_complete'; + readonly messageId: string; + readonly blockIndex: number; + readonly durationMs?: number; +} + +export interface ToolCallDelta { + readonly type: 'tool_call_delta'; + readonly toolUse: ToolUseBlock; +} + export interface ToolUse { readonly type: 'tool_use'; readonly toolName: string; @@ -66,6 +96,22 @@ export interface ToolUse { readonly toolUseId: string; } +export interface DroidToolCallMessage { + readonly type: 'tool_call'; + readonly toolUse: ToolUseBlock; +} + +export interface DroidAssistantMessage { + readonly type: 'assistant'; + readonly message: FactoryDroidMessage; + readonly text: string; +} + +export interface DroidUserMessage { + readonly type: 'user'; + readonly message: FactoryDroidMessage; +} + export interface ToolResult { readonly type: 'tool_result'; readonly toolUseId: string; @@ -187,21 +233,68 @@ export interface ErrorEvent { readonly timestamp: string; } -/** Sentinel yielded when the agent turn finishes (returns to Idle). */ -export interface TurnComplete extends StructuredOutputFields { - readonly type: 'turn_complete'; - readonly tokenUsage: TokenUsageUpdate | null; +export type DroidErrorMessage = ErrorEvent; + +export type DroidToolResultMessage = ToolResult; + +export type DroidResultSubtype = + | 'success' + | 'error_during_execution' + | 'error_structured_output'; + +interface DroidResultBase { + readonly type: 'result'; + readonly sessionId: string; + readonly durationMs: number; + readonly numTurns: number; + readonly result: string; + readonly tokenUsage: TokenUsage | null; + /** Stream messages emitted before this result. */ + readonly messages: DroidStreamEvent[]; + readonly text: string; + readonly turnCount: number; + readonly success: boolean; +} + +export interface DroidResultSuccessMessage extends DroidResultBase { + readonly subtype: 'success'; + readonly isError: false; + readonly structuredOutput?: unknown; + readonly structuredOutputError?: null; + readonly error: null; +} + +export interface DroidResultErrorMessage extends DroidResultBase { + readonly subtype: 'error_during_execution' | 'error_structured_output'; + readonly isError: true; + readonly errors: string[]; + readonly structuredOutput?: unknown; + readonly structuredOutputError?: ServerStructuredOutputError | null; + readonly error: ErrorEvent | null; } -export type DroidMessage = +export type DroidResultMessage = + | DroidResultSuccessMessage + | DroidResultErrorMessage; + +export type DroidStreamMessage = + | DroidAssistantMessage + | DroidUserMessage + | DroidToolCallMessage + | ToolResult + | ErrorEvent + | DroidResultMessage; + +export type DroidStreamEvent = + | DroidStreamMessage | AssistantTextDelta + | AssistantTextComplete | ThinkingTextDelta - | ToolUse - | ToolResult + | ThinkingTextComplete + | ToolCallDelta | ToolProgress | WorkingStateChanged | TokenUsageUpdate - | CreateMessage | PermissionResolved | SettingsUpdated | SessionTitleUpdated @@ -213,17 +306,22 @@ export type DroidMessage = | MissionWorkerStarted | MissionWorkerCompleted | McpAuthRequired - | McpAuthCompleted - | StructuredOutput - | ErrorEvent - | TurnComplete; + | McpAuthCompleted; + +export type InternalDroidMessage = + | DroidStreamEvent + | ToolUse + | CreateMessage + | StructuredOutput; + +export type DroidMessage = DroidStreamEvent; export type DroidMessageType = (typeof DroidMessageType)[keyof typeof DroidMessageType]; export function convertNotificationToStreamMessage( raw: unknown -): DroidMessage | DroidMessage[] | null { +): InternalDroidMessage | InternalDroidMessage[] | null { const parsed = SessionNotificationPayloadSchema.safeParse(raw); if (!parsed.success) { return null; @@ -240,6 +338,13 @@ export function convertNotificationToStreamMessage( text: notification.textDelta, }; + case SessionNotificationType.ASSISTANT_TEXT_COMPLETE: + return { + type: DroidMessageType.AssistantTextComplete, + messageId: notification.messageId, + blockIndex: notification.blockIndex, + }; + case SessionNotificationType.THINKING_TEXT_DELTA: return { type: DroidMessageType.ThinkingTextDelta, @@ -248,6 +353,20 @@ export function convertNotificationToStreamMessage( text: notification.textDelta, }; + case SessionNotificationType.THINKING_TEXT_COMPLETE: + return { + type: DroidMessageType.ThinkingTextComplete, + messageId: notification.messageId, + blockIndex: notification.blockIndex, + durationMs: notification.durationMs, + }; + + case SessionNotificationType.TOOL_CALL: + return { + type: DroidMessageType.ToolCallDelta, + toolUse: notification.toolUse, + }; + case SessionNotificationType.TOOL_RESULT: return { type: DroidMessageType.ToolResult, @@ -289,26 +408,37 @@ export function convertNotificationToStreamMessage( case SessionNotificationType.CREATE_MESSAGE: { const msg = notification.message; - const messages: DroidMessage[] = []; + const messages: InternalDroidMessage[] = []; for (const block of msg.content) { - if (block.type === DroidMessageType.ToolUse) { + if (block.type === 'tool_use') { messages.push({ - type: DroidMessageType.ToolUse, - toolName: block.name, - toolInput: block.input, - toolUseId: block.id, + type: DroidMessageType.ToolCall, + toolUse: block, }); } } - messages.push({ - type: DroidMessageType.CreateMessage, - messageId: msg.id, - role: msg.role, - content: msg.content, - parentId: notification.parentId, - }); + if (msg.role === FactoryDroidMessageRole.Assistant) { + messages.push({ + type: DroidMessageType.Assistant, + message: msg, + text: extractTextFromMessage(msg), + }); + } else if (msg.role === FactoryDroidMessageRole.User) { + messages.push({ + type: DroidMessageType.User, + message: msg, + }); + } else { + messages.push({ + type: 'create_message', + messageId: msg.id, + role: msg.role, + content: msg.content, + parentId: notification.parentId, + }); + } return messages; } @@ -404,7 +534,7 @@ export function convertNotificationToStreamMessage( case SessionNotificationType.STRUCTURED_OUTPUT: return { - type: DroidMessageType.StructuredOutput, + type: 'structured_output', messageId: notification.messageId, structuredOutput: notification.structuredOutput, structuredOutputError: notification.structuredOutputError, @@ -424,58 +554,237 @@ export class StreamStateTracker { private lastTokenUsage: TokenUsageUpdate | null = null; + private fullText = ''; + + private finalAssistantText = ''; + private structuredOutput: JsonObject | null = null; private structuredOutputError: ServerStructuredOutputError | null = null; + private errors: ErrorEvent[] = []; + + private numTurns = 0; + + private emittedMessages: DroidStreamEvent[] = []; + private toolNameMap = new Map(); + constructor( + private readonly options: { + sessionId?: string; + startedAt?: number; + hasOutputFormat?: boolean; + } = {} + ) {} + private getToolName(toolUseId: string): string { return this.toolNameMap.get(toolUseId) ?? ''; } - processMessage(message: DroidMessage): { - message: DroidMessage; - additional: DroidMessage[]; + processMessage(message: InternalDroidMessage): { + message: InternalDroidMessage | null; + additional: InternalDroidMessage[]; } { - const additional: DroidMessage[] = []; + const additional: InternalDroidMessage[] = []; - if (message.type === DroidMessageType.ToolUse) { + if (message.type === 'tool_use') { this.toolNameMap.set(message.toolUseId, message.toolName); } + if (message.type === DroidMessageType.ToolCall) { + this.toolNameMap.set(message.toolUse.id, message.toolUse.name); + } + + if (message.type === DroidMessageType.ToolCallDelta) { + this.toolNameMap.set(message.toolUse.id, message.toolUse.name); + } + // Enrich tool_result with toolName from prior tool_use if (message.type === DroidMessageType.ToolResult) { message = { ...message, toolName: this.getToolName(message.toolUseId) }; } + if (message.type === DroidMessageType.AssistantTextDelta) { + this.fullText += message.text; + } + + if (message.type === DroidMessageType.Assistant) { + this.finalAssistantText = message.text; + if (this.fullText.length === 0) { + this.fullText = message.text; + } + } + if (message.type === DroidMessageType.TokenUsageUpdate) { this.lastTokenUsage = message; } - if (message.type === DroidMessageType.StructuredOutput) { + if (message.type === DroidMessageType.Error) { + this.errors.push(message); + } + + if (message.type === 'structured_output') { this.structuredOutput = message.structuredOutput; this.structuredOutputError = message.structuredOutputError; + return { message: null, additional }; } if (message.type === DroidMessageType.WorkingStateChanged) { if (message.state !== DroidWorkingState.Idle) { this.hasBeenNonIdle = true; } else if (this.hasBeenNonIdle) { - additional.push({ - type: DroidMessageType.TurnComplete, - tokenUsage: this.lastTokenUsage, - structuredOutput: this.structuredOutput, - structuredOutputError: this.structuredOutputError, - }); + this.numTurns++; + additional.push(this.createResultMessage()); this.hasBeenNonIdle = false; - this.structuredOutput = null; - this.structuredOutputError = null; + this.resetTurnState(); } } + if (message.type !== DroidMessageType.WorkingStateChanged) { + this.trackEmittedMessage(message); + } + return { message, additional }; } + + private createResultMessage(): DroidResultMessage { + if ( + this.options.hasOutputFormat && + this.structuredOutput === null && + this.structuredOutputError === null + ) { + this.structuredOutput = + parseJsonObject(this.finalAssistantText || this.fullText) ?? null; + } + + const tokenUsage = this.lastTokenUsage + ? stripTokenUsageType(this.lastTokenUsage) + : null; + const result = this.finalAssistantText || this.fullText; + const base = { + type: DroidMessageType.Result, + sessionId: this.options.sessionId ?? '', + durationMs: Date.now() - (this.options.startedAt ?? Date.now()), + numTurns: this.numTurns, + result, + tokenUsage, + messages: [...this.emittedMessages], + text: result, + turnCount: this.numTurns, + }; + + if (this.structuredOutputError) { + return { + ...base, + subtype: 'error_structured_output', + isError: true, + success: false, + errors: [this.structuredOutputError.message], + structuredOutput: this.structuredOutput, + structuredOutputError: this.structuredOutputError, + error: null, + }; + } + + if (this.errors.length > 0) { + return { + ...base, + subtype: 'error_during_execution', + isError: true, + success: false, + errors: this.errors.map((error) => error.message), + structuredOutput: this.structuredOutput, + structuredOutputError: null, + error: this.errors[0] ?? null, + }; + } + + return { + ...base, + subtype: 'success', + isError: false, + success: true, + structuredOutput: this.structuredOutput, + structuredOutputError: null, + error: null, + }; + } + + private trackEmittedMessage(message: InternalDroidMessage): void { + if (isDefaultStreamMessage(message)) { + this.emittedMessages.push(message); + } + } + + private resetTurnState(): void { + this.lastTokenUsage = null; + this.fullText = ''; + this.finalAssistantText = ''; + this.structuredOutput = null; + this.structuredOutputError = null; + this.errors = []; + this.emittedMessages = []; + } +} + +export function isPartialMessage(message: InternalDroidMessage): boolean { + return ( + message.type === DroidMessageType.AssistantTextDelta || + message.type === DroidMessageType.AssistantTextComplete || + message.type === DroidMessageType.ThinkingTextDelta || + message.type === DroidMessageType.ThinkingTextComplete || + message.type === DroidMessageType.ToolCallDelta || + message.type === DroidMessageType.ToolProgress + ); +} + +export function isInternalMessage(message: InternalDroidMessage): boolean { + return ( + message.type === 'structured_output' || + message.type === 'tool_use' || + message.type === 'create_message' + ); +} + +export function isDefaultStreamMessage( + message: InternalDroidMessage +): message is DroidStreamMessage { + return ( + message.type === DroidMessageType.Assistant || + message.type === DroidMessageType.User || + message.type === DroidMessageType.ToolCall || + message.type === DroidMessageType.ToolResult || + message.type === DroidMessageType.Error || + message.type === DroidMessageType.Result + ); +} + +function stripTokenUsageType(message: TokenUsageUpdate): TokenUsage { + const { + inputTokens, + outputTokens, + cacheCreationTokens, + cacheReadTokens, + thinkingTokens, + } = message; + return { + inputTokens, + outputTokens, + cacheCreationTokens, + cacheReadTokens, + thinkingTokens, + }; +} + +function parseJsonObject(text: string): JsonObject | null { + try { + const parsed: unknown = JSON.parse(text); + const result = JsonObjectSchema.safeParse(parsed); + return result.success ? result.data : null; + } catch { + return null; + } } function normalizeToolResultContent( @@ -492,3 +801,10 @@ function normalizeToolResultContent( } return String(content); } + +function extractTextFromMessage(message: FactoryDroidMessage): string { + return message.content + .filter((block) => block.type === 'text') + .map((block) => block.text) + .join(''); +} diff --git a/tests/helpers.test.ts b/tests/helpers.test.ts index 28f0baa..bbd818c 100644 --- a/tests/helpers.test.ts +++ b/tests/helpers.test.ts @@ -149,6 +149,7 @@ describe('MessageBridge', () => { }); it('processes notifications and yields messages via messages() generator', async () => { + bridge = new MessageBridge(undefined, { includePartialMessages: true }); bridge.notificationHandler( makeSessionNotification( SessionNotificationType.DROID_WORKING_STATE_CHANGED, @@ -167,8 +168,8 @@ describe('MessageBridge', () => { expect(messages.some((m) => m.type === 'working_state_changed')).toBe(true); }); - it('terminates generator on turn_complete message', async () => { - // Transition to streaming state then back to idle to trigger turn_complete + it('terminates generator on result message', async () => { + // Transition to streaming state then back to idle to trigger result bridge.notificationHandler( makeSessionNotification( SessionNotificationType.DROID_WORKING_STATE_CHANGED, @@ -186,12 +187,12 @@ describe('MessageBridge', () => { const messages = []; for await (const msg of bridge.messages()) { messages.push(msg); - if (msg.type === 'turn_complete') { + if (msg.type === 'result') { break; } } - expect(messages.some((m) => m.type === 'turn_complete')).toBe(true); + expect(messages.some((m) => m.type === 'result')).toBe(true); }); it('signalDone() terminates the generator when queue is empty', async () => { diff --git a/tests/helpers.ts b/tests/helpers.ts index 9957381..3f04686 100644 --- a/tests/helpers.ts +++ b/tests/helpers.ts @@ -8,7 +8,7 @@ import { ToolConfirmationOutcome, } from '../src/schemas/index.js'; import type { DroidSession } from '../src/session.js'; -import type { DroidMessage } from '../src/stream.js'; +import type { DroidMessage, DroidResultMessage } from '../src/stream.js'; import type { DroidClientTransport, ErrorCallback, @@ -25,8 +25,10 @@ export async function collectStreamText( let text = ''; for await (const msg of session.stream(prompt)) { messages.push(msg); - if (msg.type === 'assistant_text_delta') { + if (msg.type === 'assistant') { text += msg.text; + } else if (msg.type === 'result' && text.length === 0) { + text = msg.result; } } @@ -36,12 +38,12 @@ export async function collectStreamText( }; } -export function findLastTurnComplete( +export function findLastResult( messages: DroidMessage[] -): Extract | undefined { +): DroidResultMessage | undefined { for (let index = messages.length - 1; index >= 0; index--) { const msg = messages[index]; - if (msg?.type === 'turn_complete') { + if (msg?.type === 'result') { return msg; } } @@ -296,6 +298,20 @@ export function sendDefaultStreamSequence( ); } + if (deltas.length > 0) { + transport.injectMessage( + makeSessionNotification(SessionNotificationType.CREATE_MESSAGE, { + message: { + id: messageId, + role: 'assistant', + createdAt: 1000, + updatedAt: 1000, + content: [{ type: 'text', text: deltas.join('') }], + }, + }) + ); + } + if (includeTokenUsage) { transport.injectMessage( makeSessionNotification( diff --git a/tests/integration.test.ts b/tests/integration.test.ts index c236bfa..767dd54 100644 --- a/tests/integration.test.ts +++ b/tests/integration.test.ts @@ -25,7 +25,7 @@ import type { DroidMessage } from '../src/stream.js'; import { InMemoryTransport, collectStreamText, - findLastTurnComplete, + findLastResult, makePermissionRequestParams, makeServerRequest, makeSessionNotification, @@ -114,7 +114,7 @@ function wireTransport( } describe('Full session stream lifecycle (VAL-CROSS-001)', () => { - it('session.stream() sends initializeSession + addUserMessage, receives streaming notifications, and yields TurnComplete', async () => { + it('session.stream() sends initializeSession + addUserMessage, receives streaming notifications, and yields Result', async () => { const transport = new InMemoryTransport(); await transport.connect(); @@ -223,7 +223,9 @@ describe('Full session stream lifecycle (VAL-CROSS-001)', () => { const session = await createSession({ cwd: '/tmp', transport }); const messages: DroidMessage[] = []; - for await (const msg of session.stream('Fix the bug')) { + for await (const msg of session.stream('Fix the bug', { + includePartialMessages: true, + })) { messages.push(msg); } @@ -237,19 +239,19 @@ describe('Full session stream lifecycle (VAL-CROSS-001)', () => { expect(types).toContain('working_state_changed'); expect(types).toContain('assistant_text_delta'); - expect(types).toContain('tool_use'); - expect(types).toContain('create_message'); + expect(types).toContain('tool_call'); + expect(types).toContain('assistant'); expect(types).toContain('tool_result'); expect(types).toContain('token_usage_update'); - expect(types[types.length - 1]).toBe('turn_complete'); + expect(types[types.length - 1]).toBe('result'); - const turnComplete = messages[messages.length - 1]; - expect(turnComplete.type).toBe('turn_complete'); - if (turnComplete.type === 'turn_complete') { - expect(turnComplete.tokenUsage).not.toBeNull(); - expect(turnComplete.tokenUsage!.inputTokens).toBe(200); - expect(turnComplete.tokenUsage!.outputTokens).toBe(100); + const resultMessage = messages[messages.length - 1]; + expect(resultMessage.type).toBe('result'); + if (resultMessage.type === 'result') { + expect(resultMessage.tokenUsage).not.toBeNull(); + expect(resultMessage.tokenUsage!.inputTokens).toBe(200); + expect(resultMessage.tokenUsage!.outputTokens).toBe(100); } const textDeltas = messages.filter( @@ -263,12 +265,12 @@ describe('Full session stream lifecycle (VAL-CROSS-001)', () => { expect(textDeltas[1].text).toBe('Done!'); } - const toolUse = messages.find((m) => m.type === 'tool_use'); + const toolUse = messages.find((m) => m.type === 'tool_call'); expect(toolUse).toBeDefined(); - if (toolUse?.type === 'tool_use') { - expect(toolUse.toolName).toBe('read_file'); - expect(toolUse.toolUseId).toBe('tu-1'); - expect(toolUse.toolInput).toEqual({ path: '/tmp/test.ts' }); + if (toolUse?.type === 'tool_call') { + expect(toolUse.toolUse.name).toBe('read_file'); + expect(toolUse.toolUse.id).toBe('tu-1'); + expect(toolUse.toolUse.input).toEqual({ path: '/tmp/test.ts' }); } const toolResult = messages.find((m) => m.type === 'tool_result'); @@ -286,7 +288,7 @@ describe('Full session stream lifecycle (VAL-CROSS-001)', () => { }); describe('Full session lifecycle (VAL-CROSS-002)', () => { - it("createSession() → session.stream('first') → session.stream('second') → session.close()", async () => { + it("createSession() → session.stream('first', { includePartialMessages: true }) → session.stream('second', { includePartialMessages: true }) → session.close()", async () => { const transport = new InMemoryTransport(); await transport.connect(); @@ -348,7 +350,9 @@ describe('Full session lifecycle (VAL-CROSS-002)', () => { expect(session.sessionId).toBe('sess-multi-turn'); const streamMessages: DroidMessage[] = []; - for await (const msg of session.stream('first message')) { + for await (const msg of session.stream('first message', { + includePartialMessages: true, + })) { streamMessages.push(msg); } @@ -360,12 +364,12 @@ describe('Full session lifecycle (VAL-CROSS-002)', () => { if (firstTextDelta?.type === 'assistant_text_delta') { expect(firstTextDelta.text).toBe('Response to turn 1'); } - expect(streamMessages[streamMessages.length - 1].type).toBe( - 'turn_complete' - ); + expect(streamMessages[streamMessages.length - 1].type).toBe('result'); const secondMessages: DroidMessage[] = []; - for await (const msg of session.stream('second message')) { + for await (const msg of session.stream('second message', { + includePartialMessages: true, + })) { secondMessages.push(msg); } @@ -376,12 +380,12 @@ describe('Full session lifecycle (VAL-CROSS-002)', () => { if (secondTextDelta?.type === 'assistant_text_delta') { expect(secondTextDelta.text).toBe('Response to turn 2'); } - const turnComplete = findLastTurnComplete(secondMessages); - expect(turnComplete?.type).toBe('turn_complete'); - if (turnComplete?.type === 'turn_complete') { - expect(turnComplete.tokenUsage).not.toBeNull(); - expect(turnComplete.tokenUsage!.inputTokens).toBe(200); - expect(turnComplete.tokenUsage!.outputTokens).toBe(100); + const resultMessage = findLastResult(secondMessages); + expect(resultMessage?.type).toBe('result'); + if (resultMessage?.type === 'result') { + expect(resultMessage.tokenUsage).not.toBeNull(); + expect(resultMessage.tokenUsage!.inputTokens).toBe(200); + expect(resultMessage.tokenUsage!.outputTokens).toBe(100); } expect(addUserMessageCount).toBe(2); @@ -458,22 +462,28 @@ describe('Full session lifecycle (VAL-CROSS-002)', () => { const session = await createSession({ cwd: '/tmp', transport }); const turn1Msgs: DroidMessage[] = []; - for await (const msg of session.stream('turn 1')) { + for await (const msg of session.stream('turn 1', { + includePartialMessages: true, + })) { turn1Msgs.push(msg); } - expect(turn1Msgs[turn1Msgs.length - 1].type).toBe('turn_complete'); + expect(turn1Msgs[turn1Msgs.length - 1].type).toBe('result'); const turn2Msgs: DroidMessage[] = []; - for await (const msg of session.stream('turn 2')) { + for await (const msg of session.stream('turn 2', { + includePartialMessages: true, + })) { turn2Msgs.push(msg); } - expect(turn2Msgs[turn2Msgs.length - 1].type).toBe('turn_complete'); + expect(turn2Msgs[turn2Msgs.length - 1].type).toBe('result'); const turn3Msgs: DroidMessage[] = []; - for await (const msg of session.stream('turn 3')) { + for await (const msg of session.stream('turn 3', { + includePartialMessages: true, + })) { turn3Msgs.push(msg); } - expect(turn3Msgs[turn3Msgs.length - 1].type).toBe('turn_complete'); + expect(turn3Msgs[turn3Msgs.length - 1].type).toBe('result'); expect(turnCount).toBe(3); @@ -659,7 +669,9 @@ describe('Permission handler integration (VAL-CROSS-003)', () => { }); const messages: DroidMessage[] = []; - for await (const msg of session.stream('Run the tests')) { + for await (const msg of session.stream('Run the tests', { + includePartialMessages: true, + })) { messages.push(msg); } @@ -685,7 +697,7 @@ describe('Permission handler integration (VAL-CROSS-003)', () => { expect(types).toContain('assistant_text_delta'); expect(types).toContain('tool_result'); expect(types).toContain('permission_resolved'); - expect(types[types.length - 1]).toBe('turn_complete'); + expect(types[types.length - 1]).toBe('result'); const textDeltas = messages.filter( (m) => m.type === 'assistant_text_delta' @@ -807,7 +819,9 @@ describe('Permission handler integration (VAL-CROSS-003)', () => { }); const messages: DroidMessage[] = []; - for await (const msg of session.stream('Do two things')) { + for await (const msg of session.stream('Do two things', { + includePartialMessages: true, + })) { messages.push(msg); } @@ -824,7 +838,7 @@ describe('Permission handler integration (VAL-CROSS-003)', () => { expect(permResponses.length).toBe(2); const types = messages.map((m) => m.type); - expect(types[types.length - 1]).toBe('turn_complete'); + expect(types[types.length - 1]).toBe('result'); await session.close(); }); @@ -898,7 +912,9 @@ describe('Permission handler integration (VAL-CROSS-003)', () => { }); const messages: DroidMessage[] = []; - for await (const msg of session.stream('dangerous command')) { + for await (const msg of session.stream('dangerous command', { + includePartialMessages: true, + })) { messages.push(msg); } @@ -914,7 +930,7 @@ describe('Permission handler integration (VAL-CROSS-003)', () => { (permResponse['result'] as Record)['selectedOption'] ).toBe(ToolConfirmationOutcome.Cancel); - expect(messages[messages.length - 1].type).toBe('turn_complete'); + expect(messages[messages.length - 1].type).toBe('result'); await session.close(); expect(transport.isConnected).toBe(false); @@ -1087,7 +1103,9 @@ describe('Ask-user handler integration (VAL-CROSS-004)', () => { }); const messages: DroidMessage[] = []; - for await (const msg of session.stream('Set up the project')) { + for await (const msg of session.stream('Set up the project', { + includePartialMessages: true, + })) { messages.push(msg); } @@ -1115,7 +1133,7 @@ describe('Ask-user handler integration (VAL-CROSS-004)', () => { const types = messages.map((m) => m.type); expect(types).toContain('assistant_text_delta'); - expect(types[types.length - 1]).toBe('turn_complete'); + expect(types[types.length - 1]).toBe('result'); const textDeltas = messages.filter( (m) => m.type === 'assistant_text_delta' @@ -1203,7 +1221,7 @@ describe('Ask-user handler integration (VAL-CROSS-004)', () => { }); describe('Interrupt during active streaming (VAL-CROSS-005)', () => { - it('interrupt during ExecutingTool state emits TurnComplete', async () => { + it('interrupt during ExecutingTool state emits Result', async () => { const transport = new InMemoryTransport(); await transport.connect(); @@ -1260,7 +1278,9 @@ describe('Interrupt during active streaming (VAL-CROSS-005)', () => { const messages: DroidMessage[] = []; let didInterrupt = false; - for await (const msg of session.stream('test')) { + for await (const msg of session.stream('test', { + includePartialMessages: true, + })) { messages.push(msg); if ( @@ -1277,7 +1297,7 @@ describe('Interrupt during active streaming (VAL-CROSS-005)', () => { expect(didInterrupt).toBe(true); const types = messages.map((m) => m.type); - expect(types[types.length - 1]).toBe('turn_complete'); + expect(types[types.length - 1]).toBe('result'); await session.close(); }); @@ -1363,13 +1383,15 @@ describe('Interrupt during active streaming (VAL-CROSS-005)', () => { const session = await createSession({ cwd: '/tmp', transport }); const msgs1: DroidMessage[] = []; - for await (const msg of session.stream('first')) { + for await (const msg of session.stream('first', { + includePartialMessages: true, + })) { msgs1.push(msg); if (msg.type === 'assistant_text_delta') { await session.interrupt(); } } - expect(msgs1[msgs1.length - 1].type).toBe('turn_complete'); + expect(msgs1[msgs1.length - 1].type).toBe('result'); const result = await collectStreamText(session, 'second'); expect(result.text).toBe('Full second response'); @@ -1380,7 +1402,7 @@ describe('Interrupt during active streaming (VAL-CROSS-005)', () => { await session.close(); }); - it('session.stream() active → interrupt() called → remaining messages yielded → TurnComplete', async () => { + it('session.stream() active → interrupt() called → remaining messages yielded → Result', async () => { const transport = new InMemoryTransport(); await transport.connect(); @@ -1468,7 +1490,9 @@ describe('Interrupt during active streaming (VAL-CROSS-005)', () => { const messages: DroidMessage[] = []; let didInterrupt = false; - for await (const msg of session.stream('Write a long essay')) { + for await (const msg of session.stream('Write a long essay', { + includePartialMessages: true, + })) { messages.push(msg); if (msg.type === 'assistant_text_delta' && !didInterrupt) { @@ -1487,7 +1511,7 @@ describe('Interrupt during active streaming (VAL-CROSS-005)', () => { const types = messages.map((m) => m.type); expect(types.filter((t) => t === 'assistant_text_delta').length).toBe(3); - expect(types[types.length - 1]).toBe('turn_complete'); + expect(types[types.length - 1]).toBe('result'); const textDeltas = messages.filter( (m) => m.type === 'assistant_text_delta' @@ -1496,10 +1520,10 @@ describe('Interrupt during active streaming (VAL-CROSS-005)', () => { expect(textDeltas[2].text).toBe('Final chunk after interrupt.'); } - const turnComplete = messages[messages.length - 1]; - if (turnComplete.type === 'turn_complete') { - expect(turnComplete.tokenUsage).not.toBeNull(); - expect(turnComplete.tokenUsage!.inputTokens).toBe(50); + const resultMessage = messages[messages.length - 1]; + if (resultMessage.type === 'result') { + expect(resultMessage.tokenUsage).not.toBeNull(); + expect(resultMessage.tokenUsage!.inputTokens).toBe(50); } await session.close(); @@ -1565,7 +1589,9 @@ describe('Transport errors during supported session APIs (VAL-CROSS-006)', () => const session = await createSession({ transport }); try { - for await (const _msg of session.stream('Do something')) { + for await (const _msg of session.stream('Do something', { + includePartialMessages: true, + })) { void _msg; } } catch (err) { @@ -1663,7 +1689,9 @@ describe('Transport errors during supported session APIs (VAL-CROSS-006)', () => let caughtError: Error | null = null; try { - for await (const _msg of session.stream('trigger crash')) { + for await (const _msg of session.stream('trigger crash', { + includePartialMessages: true, + })) { void _msg; } } catch (err) { @@ -1751,7 +1779,9 @@ describe('Settings update notification flow (VAL-CROSS-007)', () => { const session = await createSession({ cwd: '/tmp', transport }); const messages: DroidMessage[] = []; - for await (const msg of session.stream('do work')) { + for await (const msg of session.stream('do work', { + includePartialMessages: true, + })) { messages.push(msg); } @@ -1871,7 +1901,9 @@ describe('Settings update notification flow (VAL-CROSS-007)', () => { }); const messages: DroidMessage[] = []; - for await (const msg of session.stream('Edit the file')) { + for await (const msg of session.stream('Edit the file', { + includePartialMessages: true, + })) { messages.push(msg); } @@ -1890,7 +1922,7 @@ describe('Settings update notification flow (VAL-CROSS-007)', () => { ], }); - expect(messages[messages.length - 1].type).toBe('turn_complete'); + expect(messages[messages.length - 1].type).toBe('result'); await session.close(); }); @@ -1958,7 +1990,9 @@ describe('Settings update notification flow (VAL-CROSS-007)', () => { }); const messages: DroidMessage[] = []; - for await (const msg of session.stream('Set up DB')) { + for await (const msg of session.stream('Set up DB', { + includePartialMessages: true, + })) { messages.push(msg); } @@ -1974,7 +2008,7 @@ describe('Settings update notification flow (VAL-CROSS-007)', () => { expect(result['cancelled']).toBe(true); expect(result['answers']).toEqual([]); - expect(messages[messages.length - 1].type).toBe('turn_complete'); + expect(messages[messages.length - 1].type).toBe('result'); await session.close(); }); @@ -2027,7 +2061,9 @@ describe('Settings update notification flow (VAL-CROSS-007)', () => { const session = await createSession({ transport }); const messages: DroidMessage[] = []; - for await (const msg of session.stream('Do something')) { + for await (const msg of session.stream('Do something', { + includePartialMessages: true, + })) { messages.push(msg); } @@ -2042,7 +2078,7 @@ describe('Settings update notification flow (VAL-CROSS-007)', () => { }); } - expect(types[types.length - 1]).toBe('turn_complete'); + expect(types[types.length - 1]).toBe('result'); await session.close(); }); }); diff --git a/tests/public-api.test.ts b/tests/public-api.test.ts index c9034e2..66720d7 100644 --- a/tests/public-api.test.ts +++ b/tests/public-api.test.ts @@ -36,10 +36,11 @@ describe('public API barrel', () => { }); it('exports stable message type constants', () => { + expect(DroidMessageType.Assistant).toBe('assistant'); + expect(DroidMessageType.ToolCall).toBe('tool_call'); expect(DroidMessageType.AssistantTextDelta).toBe('assistant_text_delta'); - expect(DroidMessageType.ToolUse).toBe('tool_use'); + expect(DroidMessageType.ToolCallDelta).toBe('tool_call_delta'); expect(DroidMessageType.TokenUsageUpdate).toBe('token_usage_update'); - expect(DroidMessageType.StructuredOutput).toBe('structured_output'); - expect(DroidMessageType.TurnComplete).toBe('turn_complete'); + expect(DroidMessageType.Result).toBe('result'); }); }); diff --git a/tests/run.test.ts b/tests/run.test.ts index c3cf8fe..f2a236c 100644 --- a/tests/run.test.ts +++ b/tests/run.test.ts @@ -273,15 +273,12 @@ describe('run()', () => { expect(result.text).toEqual(JSON.stringify({ name: 'Ada' })); expect(result.structuredOutput).toEqual({ name: 'Ada' }); expect(result.messages).toContainEqual({ - type: 'create_message', - messageId: 'msg-structured', - role: 'assistant', - content: [ - { - type: 'text', - text: JSON.stringify({ name: 'Ada' }), - }, - ], + type: 'assistant', + text: JSON.stringify({ name: 'Ada' }), + message: expect.objectContaining({ + id: 'msg-structured', + role: 'assistant', + }), }); }); diff --git a/tests/schemas.test.ts b/tests/schemas.test.ts index 5a26fd0..dd4e3e7 100644 --- a/tests/schemas.test.ts +++ b/tests/schemas.test.ts @@ -175,11 +175,14 @@ describe('enums', () => { expect(DroidClientMethod.ASK_USER).toBe('droid.ask_user'); }); - it('SessionNotificationType has 21 types', () => { + it('SessionNotificationType has 24 types', () => { const values = Object.values(SessionNotificationType); const expectedValues = [ 'assistant_text_delta', + 'assistant_text_complete', 'thinking_text_delta', + 'thinking_text_complete', + 'tool_call', 'tool_result', 'tool_progress_update', 'create_message', diff --git a/tests/session.test.ts b/tests/session.test.ts index 4b840f8..d8a04d3 100644 --- a/tests/session.test.ts +++ b/tests/session.test.ts @@ -26,7 +26,7 @@ import type { DroidMessage } from '../src/stream.js'; import { InMemoryTransport, collectStreamText, - findLastTurnComplete, + findLastResult, makeErrorResponse, makeSessionNotification, makeSuccessResponse, @@ -325,7 +325,7 @@ describe('resumeSession()', () => { describe('DroidSession', () => { describe('stream() API (VAL-API-004)', () => { - it('streams DroidMessage until TurnComplete', async () => { + it('streams DroidMessage until Result', async () => { const transport = new InMemoryTransport(); await transport.connect(); @@ -338,14 +338,14 @@ describe('DroidSession', () => { messages.push(msg); } - expect(messages.length).toBeGreaterThanOrEqual(3); + expect(messages.length).toBeGreaterThanOrEqual(2); - const textDeltas = messages.filter( - (m) => m.type === 'assistant_text_delta' + const results = messages.filter( + (m) => m.type === 'result' && m.result.length > 0 ); - expect(textDeltas.length).toBeGreaterThanOrEqual(1); + expect(results.length).toBeGreaterThanOrEqual(1); - expect(messages[messages.length - 1].type).toBe('turn_complete'); + expect(messages[messages.length - 1].type).toBe('result'); await session.close(); }); @@ -393,14 +393,8 @@ describe('DroidSession', () => { messages.push(msg); } - expect(messages).toContainEqual({ - type: 'structured_output', - messageId: 'msg-structured', - structuredOutput: { name: 'Ada' }, - structuredOutputError: null, - }); expect(messages[messages.length - 1]).toMatchObject({ - type: 'turn_complete', + type: 'result', structuredOutput: { name: 'Ada' }, structuredOutputError: null, }); @@ -454,17 +448,8 @@ describe('DroidSession', () => { messages.push(msg); } - expect(messages).toContainEqual({ - type: 'structured_output', - messageId: 'msg-structured', - structuredOutput: null, - structuredOutputError: { - code: 'schema_validation_failed', - message: '/name must be string', - }, - }); expect(messages[messages.length - 1]).toMatchObject({ - type: 'turn_complete', + type: 'result', structuredOutput: null, structuredOutputError: { code: 'schema_validation_failed', @@ -487,13 +472,13 @@ describe('DroidSession', () => { for await (const msg of session.stream('First message')) { msgs1.push(msg); } - expect(msgs1[msgs1.length - 1].type).toBe('turn_complete'); + expect(msgs1[msgs1.length - 1].type).toBe('result'); const msgs2: DroidMessage[] = []; for await (const msg of session.stream('Second message')) { msgs2.push(msg); } - expect(msgs2[msgs2.length - 1].type).toBe('turn_complete'); + expect(msgs2[msgs2.length - 1].type).toBe('result'); const addMsgCalls = transport.sentMessages.filter( (m) => @@ -1096,14 +1081,16 @@ describe('DroidSession', () => { const session = await createSession({ transport }); const messages: DroidMessage[] = []; - for await (const msg of session.stream('test')) { + for await (const msg of session.stream('test', { + includePartialMessages: true, + })) { messages.push(msg); if (msg.type === 'assistant_text_delta') { await session.close(); } } - expect(messages[messages.length - 1].type).toBe('turn_complete'); + expect(messages[messages.length - 1].type).toBe('result'); expect(transport.isConnected).toBe(false); await expectStreamToThrow(session, 'test'); @@ -1169,7 +1156,7 @@ describe('DroidSession', () => { const result = await collectStreamText(session, 'second turn'); expect(result.text).toBe('Hello world'); - expect(result.messages.length).toBeGreaterThanOrEqual(3); + expect(result.messages.length).toBeGreaterThanOrEqual(1); const addMsgCalls = transport.sentMessages.filter( (m) => @@ -1263,9 +1250,9 @@ describe('DroidSession', () => { for await (const msg of session.stream('first')) { result1.push(msg); } - const turn1 = findLastTurnComplete(result1); - expect(turn1?.type).toBe('turn_complete'); - if (turn1?.type === 'turn_complete') { + const turn1 = findLastResult(result1); + expect(turn1?.type).toBe('result'); + if (turn1?.type === 'result') { expect(turn1.tokenUsage).not.toBeNull(); expect(turn1.tokenUsage!.inputTokens).toBe(100); expect(turn1.tokenUsage!.outputTokens).toBe(50); @@ -1275,9 +1262,9 @@ describe('DroidSession', () => { for await (const msg of session.stream('second')) { result2.push(msg); } - const turn2 = findLastTurnComplete(result2); - expect(turn2?.type).toBe('turn_complete'); - if (turn2?.type === 'turn_complete') { + const turn2 = findLastResult(result2); + expect(turn2?.type).toBe('result'); + if (turn2?.type === 'result') { expect(turn2.tokenUsage).not.toBeNull(); expect(turn2.tokenUsage!.inputTokens).toBe(200); expect(turn2.tokenUsage!.outputTokens).toBe(75); diff --git a/tests/stream.test.ts b/tests/stream.test.ts index 0eabbdd..3712adb 100644 --- a/tests/stream.test.ts +++ b/tests/stream.test.ts @@ -37,10 +37,10 @@ import type { MissionWorkerCompleted, McpAuthRequired, McpAuthCompleted, - StructuredOutput, ErrorEvent, - TurnComplete, DroidMessage, + DroidResultMessage, + StructuredOutput, } from '../src/stream.js'; function makeNotification(type: string, payload: Record) { @@ -48,14 +48,18 @@ function makeNotification(type: string, payload: Record) { } const expectedDroidMessageTypes = [ + 'assistant', + 'user', + 'tool_call', 'assistant_text_delta', + 'assistant_text_complete', 'thinking_text_delta', - 'tool_use', + 'thinking_text_complete', + 'tool_call_delta', 'tool_result', 'tool_progress', 'working_state_changed', 'token_usage_update', - 'create_message', 'permission_resolved', 'settings_updated', 'session_title_updated', @@ -68,9 +72,8 @@ const expectedDroidMessageTypes = [ 'mission_worker_completed', 'mcp_auth_required', 'mcp_auth_completed', - 'structured_output', 'error', - 'turn_complete', + 'result', ] as const satisfies readonly DroidMessage['type'][]; const allMessageTypesCovered: Exclude< @@ -338,20 +341,7 @@ describe('DroidMessage types', () => { expect(msg.errorType).toBe(DroidErrorType.SESSION_ERROR); }); - it('TurnComplete has correct structure', () => { - const msg: TurnComplete = { - type: 'turn_complete', - tokenUsage: null, - structuredOutput: null, - structuredOutputError: null, - }; - expect(msg.type).toBe('turn_complete'); - expect(msg.tokenUsage).toBeNull(); - expect(msg.structuredOutput).toBeNull(); - expect(msg.structuredOutputError).toBeNull(); - }); - - it('TurnComplete with token usage', () => { + it('DroidResultMessage has correct structure', () => { const tokenUsage: TokenUsageUpdate = { type: 'token_usage_update', inputTokens: 100, @@ -360,31 +350,89 @@ describe('DroidMessage types', () => { cacheCreationTokens: 5, thinkingTokens: 20, }; - const msg: TurnComplete = { - type: 'turn_complete', + const msg: DroidResultMessage = { + type: 'result', + subtype: 'success', + sessionId: 's1', + durationMs: 1, + isError: false, + numTurns: 1, + result: 'done', tokenUsage, - structuredOutput: null, - structuredOutputError: null, + messages: [], + text: 'done', + turnCount: 1, + success: true, + error: null, }; + expect(msg.type).toBe('result'); expect(msg.tokenUsage).not.toBeNull(); expect(msg.tokenUsage!.inputTokens).toBe(100); }); it('DroidMessage union type allows all 23 types', () => { const messages: DroidMessage[] = [ + { + type: 'assistant', + message: { + id: 'a1', + role: 'assistant', + content: [{ type: 'text', text: 'hi' }], + createdAt: 1, + updatedAt: 1, + }, + text: 'hi', + }, + { + type: 'user', + message: { + id: 'u1', + role: 'user', + content: [{ type: 'text', text: 'hi' }], + createdAt: 1, + updatedAt: 1, + }, + }, + { + type: 'tool_call', + toolUse: { + type: 'tool_use', + id: 'tu-1', + name: 'Read', + input: {}, + }, + }, { type: 'assistant_text_delta', messageId: 'm1', blockIndex: 0, text: 'hi', }, + { + type: 'assistant_text_complete', + messageId: 'm1', + blockIndex: 0, + }, { type: 'thinking_text_delta', messageId: 'm1', blockIndex: 0, text: 'hmm', }, - { type: 'tool_use', toolName: 'x', toolInput: {}, toolUseId: 'tu1' }, + { + type: 'thinking_text_complete', + messageId: 'm1', + blockIndex: 0, + }, + { + type: 'tool_call_delta', + toolUse: { + type: 'tool_use', + id: 'tu-1', + name: 'Read', + input: {}, + }, + }, { type: 'tool_result', toolUseId: 'tu1', @@ -408,12 +456,6 @@ describe('DroidMessage types', () => { cacheCreationTokens: 0, thinkingTokens: 0, }, - { - type: 'create_message', - messageId: 'm1', - role: 'assistant', - content: [], - }, { type: 'permission_resolved', requestId: 'r1', @@ -446,12 +488,6 @@ describe('DroidMessage types', () => { outcome: McpAuthOutcome.Success, message: 'm', }, - { - type: 'structured_output', - messageId: 'm1', - structuredOutput: { name: 'Ada' }, - structuredOutputError: null, - }, { type: 'error', message: 'err', @@ -459,10 +495,19 @@ describe('DroidMessage types', () => { timestamp: 't', }, { - type: 'turn_complete', + type: 'result', + subtype: 'success', + sessionId: 's1', + durationMs: 1, + isError: false, + numTurns: 1, + result: 'hi', tokenUsage: null, - structuredOutput: null, - structuredOutputError: null, + messages: [], + text: 'hi', + turnCount: 1, + success: true, + error: null, }, ]; expect(messages).toHaveLength(expectedDroidMessageTypes.length); @@ -718,7 +763,7 @@ describe('convertNotificationToStreamMessage', () => { }); describe('create_message', () => { - it('converts message with tool_use blocks to ToolUse + CreateMessage', () => { + it('converts message with tool_use blocks to tool_call + assistant', () => { const notification = makeNotification( SessionNotificationType.CREATE_MESSAGE, { @@ -750,22 +795,28 @@ describe('convertNotificationToStreamMessage', () => { const messages = result as DroidMessage[]; expect(messages).toHaveLength(3); - const tu1 = messages[0] as ToolUse; - expect(tu1.type).toBe('tool_use'); - expect(tu1.toolName).toBe('read_file'); - expect(tu1.toolUseId).toBe('tu-1'); - expect(tu1.toolInput).toEqual({ path: '/tmp/test' }); - - const tu2 = messages[1] as ToolUse; - expect(tu2.type).toBe('tool_use'); - expect(tu2.toolName).toBe('write_file'); - expect(tu2.toolUseId).toBe('tu-2'); - - const cm = messages[2] as CreateMessage; - expect(cm.type).toBe('create_message'); - expect(cm.messageId).toBe('msg-1'); - expect(cm.role).toBe('assistant'); - expect(cm.parentId).toBe('parent-1'); + expect(messages[0]).toMatchObject({ + type: 'tool_call', + toolUse: { + id: 'tu-1', + name: 'read_file', + input: { path: '/tmp/test' }, + }, + }); + expect(messages[1]).toMatchObject({ + type: 'tool_call', + toolUse: { + id: 'tu-2', + name: 'write_file', + }, + }); + expect(messages[2]).toMatchObject({ + type: 'assistant', + message: { + id: 'msg-1', + role: 'assistant', + }, + }); }); it('converts message without tool_use blocks to just CreateMessage', () => { @@ -785,7 +836,7 @@ describe('convertNotificationToStreamMessage', () => { expect(Array.isArray(result)).toBe(true); const messages = result as DroidMessage[]; expect(messages).toHaveLength(1); - expect(messages[0].type).toBe('create_message'); + expect(messages[0].type).toBe('assistant'); }); it('handles empty content array', () => { @@ -805,7 +856,7 @@ describe('convertNotificationToStreamMessage', () => { expect(Array.isArray(result)).toBe(true); const messages = result as DroidMessage[]; expect(messages).toHaveLength(1); - expect(messages[0].type).toBe('create_message'); + expect(messages[0].type).toBe('assistant'); }); }); @@ -1143,11 +1194,28 @@ describe('convertNotificationToStreamMessage', () => { blockIndex: 0, textDelta: 't', }, + [SessionNotificationType.ASSISTANT_TEXT_COMPLETE]: { + messageId: 'm', + blockIndex: 0, + }, [SessionNotificationType.THINKING_TEXT_DELTA]: { messageId: 'm', blockIndex: 0, textDelta: 't', }, + [SessionNotificationType.THINKING_TEXT_COMPLETE]: { + messageId: 'm', + blockIndex: 0, + durationMs: 1, + }, + [SessionNotificationType.TOOL_CALL]: { + toolUse: { + type: 'tool_use', + id: 'tu', + name: 'Read', + input: {}, + }, + }, [SessionNotificationType.TOOL_RESULT]: { messageId: 'm', toolUseId: 'tu', @@ -1252,8 +1320,8 @@ describe('StreamStateTracker', () => { tracker = new StreamStateTracker(); }); - describe('TurnComplete emission', () => { - it('emits TurnComplete on non-idle → idle transition', () => { + describe('Result emission', () => { + it('emits Result on non-idle → idle transition', () => { const r1 = tracker.processMessage({ type: 'working_state_changed', state: DroidWorkingState.StreamingAssistantMessage, @@ -1265,11 +1333,11 @@ describe('StreamStateTracker', () => { state: DroidWorkingState.Idle, }); expect(r2.additional).toHaveLength(1); - expect(r2.additional[0].type).toBe('turn_complete'); - expect((r2.additional[0] as TurnComplete).tokenUsage).toBeNull(); + expect(r2.additional[0].type).toBe('result'); + expect((r2.additional[0] as DroidResultMessage).tokenUsage).toBeNull(); }); - it('does NOT emit TurnComplete for initial idle', () => { + it('does NOT emit Result for initial idle', () => { const result = tracker.processMessage({ type: 'working_state_changed', state: DroidWorkingState.Idle, @@ -1277,7 +1345,7 @@ describe('StreamStateTracker', () => { expect(result.additional).toEqual([]); }); - it('does NOT emit TurnComplete for non-idle → non-idle transitions', () => { + it('does NOT emit Result for non-idle → non-idle transitions', () => { tracker.processMessage({ type: 'working_state_changed', state: DroidWorkingState.StreamingAssistantMessage, @@ -1289,7 +1357,7 @@ describe('StreamStateTracker', () => { expect(result.additional).toEqual([]); }); - it('emits TurnComplete after multiple non-idle states', () => { + it('emits Result after multiple non-idle states', () => { tracker.processMessage({ type: 'working_state_changed', state: DroidWorkingState.StreamingAssistantMessage, @@ -1303,10 +1371,10 @@ describe('StreamStateTracker', () => { state: DroidWorkingState.Idle, }); expect(result.additional).toHaveLength(1); - expect(result.additional[0].type).toBe('turn_complete'); + expect(result.additional[0].type).toBe('result'); }); - it('can emit TurnComplete again after reset', () => { + it('can emit Result again after reset', () => { tracker.processMessage({ type: 'working_state_changed', state: DroidWorkingState.StreamingAssistantMessage, @@ -1333,12 +1401,12 @@ describe('StreamStateTracker', () => { state: DroidWorkingState.Idle, }); expect(r2.additional).toHaveLength(1); - expect(r2.additional[0].type).toBe('turn_complete'); + expect(r2.additional[0].type).toBe('result'); }); }); - describe('StructuredOutput propagation to TurnComplete', () => { - it('attaches structured output to TurnComplete', () => { + describe('StructuredOutput and Result', () => { + it('attaches structured output to Result', () => { tracker.processMessage({ type: 'working_state_changed', state: DroidWorkingState.StreamingAssistantMessage, @@ -1355,12 +1423,16 @@ describe('StreamStateTracker', () => { state: DroidWorkingState.Idle, }); - const tc = result.additional[0] as TurnComplete; - expect(tc.structuredOutput).toEqual({ name: 'Ada' }); - expect(tc.structuredOutputError).toBeNull(); + const tc = result.additional[0] as DroidResultMessage; + expect(tc).toMatchObject({ + type: 'result', + tokenUsage: null, + structuredOutput: { name: 'Ada' }, + structuredOutputError: null, + }); }); - it('attaches structured output errors to TurnComplete', () => { + it('attaches structured output errors to Result', () => { tracker.processMessage({ type: 'working_state_changed', state: DroidWorkingState.StreamingAssistantMessage, @@ -1380,15 +1452,19 @@ describe('StreamStateTracker', () => { state: DroidWorkingState.Idle, }); - const tc = result.additional[0] as TurnComplete; - expect(tc.structuredOutput).toBeNull(); - expect(tc.structuredOutputError).toEqual({ - code: 'schema_validation_failed', - message: '/name must be string', + const tc = result.additional[0] as DroidResultMessage; + expect(tc).toMatchObject({ + type: 'result', + tokenUsage: null, + structuredOutput: null, + structuredOutputError: { + code: 'schema_validation_failed', + message: '/name must be string', + }, }); }); - it('does not leak structured output between turns', () => { + it('does not carry structured output across turns', () => { tracker.processMessage({ type: 'working_state_changed', state: DroidWorkingState.StreamingAssistantMessage, @@ -1413,14 +1489,18 @@ describe('StreamStateTracker', () => { state: DroidWorkingState.Idle, }); - const tc = result.additional[0] as TurnComplete; - expect(tc.structuredOutput).toBeNull(); - expect(tc.structuredOutputError).toBeNull(); + const tc = result.additional[0] as DroidResultMessage; + expect(tc).toMatchObject({ + type: 'result', + tokenUsage: null, + structuredOutput: null, + structuredOutputError: null, + }); }); }); - describe('TokenUsage propagation to TurnComplete', () => { - it('carries last-seen TokenUsageUpdate in TurnComplete', () => { + describe('TokenUsage propagation to Result', () => { + it('carries last-seen TokenUsageUpdate in Result', () => { const tokenUsage: TokenUsageUpdate = { type: 'token_usage_update', inputTokens: 100, @@ -1441,8 +1521,8 @@ describe('StreamStateTracker', () => { }); expect(result.additional).toHaveLength(1); - const tc = result.additional[0] as TurnComplete; - expect(tc.type).toBe('turn_complete'); + const tc = result.additional[0] as DroidResultMessage; + expect(tc.type).toBe('result'); expect(tc.tokenUsage).not.toBeNull(); expect(tc.tokenUsage!.inputTokens).toBe(100); expect(tc.tokenUsage!.outputTokens).toBe(50); @@ -1478,7 +1558,7 @@ describe('StreamStateTracker', () => { state: DroidWorkingState.Idle, }); - const tc = result.additional[0] as TurnComplete; + const tc = result.additional[0] as DroidResultMessage; expect(tc.tokenUsage!.inputTokens).toBe(200); expect(tc.tokenUsage!.outputTokens).toBe(100); }); @@ -1493,7 +1573,7 @@ describe('StreamStateTracker', () => { state: DroidWorkingState.Idle, }); - const tc = result.additional[0] as TurnComplete; + const tc = result.additional[0] as DroidResultMessage; expect(tc.tokenUsage).toBeNull(); }); @@ -1526,7 +1606,7 @@ describe('StreamStateTracker', () => { state: DroidWorkingState.Idle, }); - const tc = result.additional[0] as TurnComplete; + const tc = result.additional[0] as DroidResultMessage; expect(tc.tokenUsage).toBeNull(); }); }); @@ -1546,7 +1626,7 @@ describe('StreamStateTracker', () => { content: 'file contents', isError: false, }); - expect(message.type).toBe('tool_result'); + expect(message?.type).toBe('tool_result'); expect((message as ToolResult).toolName).toBe('read_file'); }); @@ -1647,7 +1727,7 @@ describe('StreamStateTracker', () => { state: DroidWorkingState.Idle, }); expect(r1.additional).toHaveLength(1); - expect(r1.additional[0].type).toBe('turn_complete'); + expect(r1.additional[0].type).toBe('result'); }); it('simulates multi-turn session with reset between turns', () => { @@ -1669,7 +1749,8 @@ describe('StreamStateTracker', () => { }); expect(turn1Result.additional).toHaveLength(1); expect( - (turn1Result.additional[0] as TurnComplete).tokenUsage!.inputTokens + (turn1Result.additional[0] as DroidResultMessage).tokenUsage! + .inputTokens ).toBe(50); tracker = new StreamStateTracker(); @@ -1692,7 +1773,8 @@ describe('StreamStateTracker', () => { }); expect(turn2Result.additional).toHaveLength(1); expect( - (turn2Result.additional[0] as TurnComplete).tokenUsage!.inputTokens + (turn2Result.additional[0] as DroidResultMessage).tokenUsage! + .inputTokens ).toBe(200); }); }); @@ -1714,7 +1796,7 @@ describe('StreamStateTracker', () => { state: DroidWorkingState.Idle, }); expect(result.additional).toHaveLength(1); - expect(result.additional[0].type).toBe('turn_complete'); + expect(result.additional[0].type).toBe('result'); }); it('handles WaitingForToolConfirmation as non-idle', () => { @@ -1727,7 +1809,7 @@ describe('StreamStateTracker', () => { state: DroidWorkingState.Idle, }); expect(result.additional).toHaveLength(1); - expect(result.additional[0].type).toBe('turn_complete'); + expect(result.additional[0].type).toBe('result'); }); it('handles CompactingConversation as non-idle', () => { @@ -1740,10 +1822,10 @@ describe('StreamStateTracker', () => { state: DroidWorkingState.Idle, }); expect(result.additional).toHaveLength(1); - expect(result.additional[0].type).toBe('turn_complete'); + expect(result.additional[0].type).toBe('result'); }); - it('multiple idle transitions after non-idle only emit first TurnComplete (no duplicate)', () => { + it('multiple idle transitions after non-idle only emit first Result (no duplicate)', () => { tracker.processMessage({ type: 'working_state_changed', state: DroidWorkingState.StreamingAssistantMessage, From f3a5a7de000aad51a02262f14c600521255a47bc Mon Sep 17 00:00:00 2001 From: User Date: Tue, 12 May 2026 17:22:56 -0700 Subject: [PATCH 3/4] test: move stress examples out of structured output PR Co-authored-by: factory-droid[bot] <138933559+factory-droid[bot]@users.noreply.github.com> --- examples/stress/_harness.ts | 457 ---------------- examples/stress/all.ts | 48 -- examples/stress/cancel-interrupt.ts | 92 ---- examples/stress/default-streaming.ts | 79 --- .../stress/default-vs-partial-consistency.ts | 87 --- examples/stress/error-paths.ts | 90 --- examples/stress/multi-turn.ts | 137 ----- examples/stress/partial-streaming.ts | 47 -- examples/stress/structured-output.ts | 215 -------- examples/stress/tool-use.ts | 101 ---- examples/structured-output-stress-test.ts | 515 ------------------ 11 files changed, 1868 deletions(-) delete mode 100644 examples/stress/_harness.ts delete mode 100644 examples/stress/all.ts delete mode 100644 examples/stress/cancel-interrupt.ts delete mode 100644 examples/stress/default-streaming.ts delete mode 100644 examples/stress/default-vs-partial-consistency.ts delete mode 100644 examples/stress/error-paths.ts delete mode 100644 examples/stress/multi-turn.ts delete mode 100644 examples/stress/partial-streaming.ts delete mode 100644 examples/stress/structured-output.ts delete mode 100644 examples/stress/tool-use.ts delete mode 100644 examples/structured-output-stress-test.ts diff --git a/examples/stress/_harness.ts b/examples/stress/_harness.ts deleted file mode 100644 index 775bc38..0000000 --- a/examples/stress/_harness.ts +++ /dev/null @@ -1,457 +0,0 @@ -import assert from 'node:assert/strict'; -import { mkdir, rm, writeFile } from 'node:fs/promises'; -import { join } from 'node:path'; -import { pathToFileURL } from 'node:url'; - -import { - DroidMessageType, - createSession, - run, - type CreateSessionOptions, - type DroidResult, - type DroidSession, - type DroidStreamEvent, - type MessageOptions, - type RunOptions, -} from '@factory/droid-sdk'; - -export interface CollectedStream { - name: string; - events: DroidStreamEvent[]; - counts: Record; - result: DroidResult; -} - -export const DEFAULT_PUBLIC_TYPES = new Set([ - DroidMessageType.Assistant, - DroidMessageType.User, - DroidMessageType.ToolCall, - DroidMessageType.ToolResult, - DroidMessageType.Error, - DroidMessageType.Result, -]); - -export const PARTIAL_ONLY_TYPES = new Set([ - DroidMessageType.AssistantTextDelta, - DroidMessageType.AssistantTextComplete, - DroidMessageType.ThinkingTextDelta, - DroidMessageType.ThinkingTextComplete, - DroidMessageType.ToolCallDelta, - DroidMessageType.ToolProgress, -]); - -export const INTERNAL_TYPES = new Set([ - 'create_message', - 'structured_output', - 'tool_use', -]); - -export function stressExecPath(): string { - return process.env['DROID_EXEC_PATH'] ?? 'droid-dev'; -} - -export function stressModelOptions(): Pick { - const modelId = process.env['DROID_STRESS_MODEL']; - return modelId ? { modelId } : {}; -} - -export function stressRepeat(): number { - const parsed = Number.parseInt(process.env['DROID_STRESS_REPEAT'] ?? '1', 10); - return Number.isFinite(parsed) && parsed > 0 ? parsed : 1; -} - -export function createStressSession( - options: CreateSessionOptions = {} -): Promise { - return createSession({ - execPath: stressExecPath(), - cwd: process.cwd(), - ...stressModelOptions(), - ...options, - }); -} - -export function createStressRunOptions(options: RunOptions = {}): RunOptions { - return { - execPath: stressExecPath(), - cwd: process.cwd(), - ...stressModelOptions(), - ...options, - }; -} - -export async function runStress( - prompt: string, - options: RunOptions = {} -): Promise { - const result = await run(prompt, createStressRunOptions(options)); - validateResult(result, 'run()'); - await writeJsonlArtifact('run', [result]); - return result; -} - -export async function collectStream( - name: string, - session: DroidSession, - prompt: string, - options: MessageOptions = {} -): Promise { - return collect(name, session, prompt, { - ...options, - includePartialMessages: false, - }); -} - -export async function collectPartialStream( - name: string, - session: DroidSession, - prompt: string, - options: MessageOptions = {} -): Promise { - return collect(name, session, prompt, { - ...options, - includePartialMessages: true, - }); -} - -export function countByType( - events: DroidStreamEvent[] -): Record { - const counts: Record = {}; - for (const event of events) { - counts[event.type] = (counts[event.type] ?? 0) + 1; - } - return counts; -} - -export function resultEvents(events: DroidStreamEvent[]): DroidResult[] { - return events.filter( - (event): event is DroidResult => event.type === DroidMessageType.Result - ); -} - -export function lastResult(events: DroidStreamEvent[]): DroidResult { - const results = resultEvents(events); - assert.equal( - results.length, - 1, - `expected exactly one result, saw ${results.length}\n${diagnose(events)}` - ); - assert.equal( - events.at(-1)?.type, - DroidMessageType.Result, - `result must be final event\n${diagnose(events)}` - ); - return results[0]!; -} - -export function validateResult(result: DroidResult, label: string): void { - assert.equal(result.type, DroidMessageType.Result, `${label}: not a result`); - assert.equal( - result.result, - result.text, - `${label}: result.result and result.text diverged` - ); - assert.ok(Array.isArray(result.messages), `${label}: messages missing`); - assertNoInternalMessages(result.messages, `${label}: result.messages`); - - if (!result.isError) { - assert.equal(result.error, null, `${label}: success result has error`); - assert.equal( - result.structuredOutputError ?? null, - null, - `${label}: success result has structuredOutputError` - ); - } -} - -export function assertDefaultStreamShape( - collected: CollectedStream, - label = collected.name -): void { - for (const event of collected.events) { - assert.ok( - DEFAULT_PUBLIC_TYPES.has(event.type), - `${label}: default stream leaked ${event.type}\n${diagnose( - collected.events - )}` - ); - } - assertNoInternalMessages(collected.events, label); - validateResult(collected.result, label); -} - -export function assertPartialStreamShape( - collected: CollectedStream, - label = collected.name -): void { - assertNoInternalMessages(collected.events, label); - validateResult(collected.result, label); -} - -export function assertNoInternalMessages( - events: DroidStreamEvent[], - label: string -): void { - const leaked = events.filter((event) => INTERNAL_TYPES.has(event.type)); - assert.equal( - leaked.length, - 0, - `${label}: internal message types leaked: ${leaked - .map((event) => event.type) - .join(', ')}` - ); -} - -export function assertAssistantOutput( - collected: CollectedStream, - label = collected.name -): void { - const assistantText = assistantTexts(collected.events).join(''); - assert.ok( - assistantText.length > 0 || collected.result.result.length > 0, - `${label}: expected non-empty assistant output\n${diagnose( - collected.events - )}` - ); -} - -export function assertPartialTextConsistency( - collected: CollectedStream, - label = collected.name -): void { - const deltas = assistantTextDeltas(collected.events); - const assistantText = assistantTexts(collected.events).join(''); - const resultText = collected.result.result; - - if (deltas.length > 0 && assistantText.length > 0) { - assertTextCompatible( - deltas, - assistantText, - `${label}: assistant deltas contradict full assistant message` - ); - } - - if (assistantText.length > 0 && resultText.length > 0) { - assertTextCompatible( - assistantText, - resultText, - `${label}: full assistant text contradicts result text` - ); - } - - if (deltas.length > 0 && resultText.length > 0) { - assertTextCompatible( - deltas, - resultText, - `${label}: assistant deltas contradict result text` - ); - } -} - -export function assertToolPairing( - events: DroidStreamEvent[], - label: string -): void { - const calls = new Set( - events - .filter((event) => event.type === DroidMessageType.ToolCall) - .map((event) => event.toolUse.id) - ); - const results = events.filter( - (event) => event.type === DroidMessageType.ToolResult - ); - - for (const result of results) { - assert.ok( - calls.has(result.toolUseId), - `${label}: tool_result ${result.toolUseId} has no prior tool_call` - ); - } -} - -export async function withTempDir( - prefix: string, - fn: (dir: string) => Promise -): Promise { - const dir = `/tmp/droid-sdk-stress-${prefix}-${process.pid}-${Date.now()}`; - await mkdir(dir, { recursive: true }); - try { - return await fn(dir); - } finally { - if (process.env['DROID_STRESS_KEEP_TEMP'] !== '1') { - await rm(dir, { recursive: true, force: true }); - } - } -} - -export async function runStressCase( - name: string, - fn: () => Promise -): Promise { - const repeat = stressRepeat(); - for (let index = 0; index < repeat; index++) { - const suffix = repeat > 1 ? ` (${index + 1}/${repeat})` : ''; - console.log(`→ ${name}${suffix}`); - await fn(); - console.log(`✓ ${name}${suffix}`); - } -} - -export function assistantTextDeltas(events: DroidStreamEvent[]): string { - return events - .filter((event) => event.type === DroidMessageType.AssistantTextDelta) - .map((event) => event.text) - .join(''); -} - -export function assistantTexts(events: DroidStreamEvent[]): string[] { - return events - .filter((event) => event.type === DroidMessageType.Assistant) - .map((event) => event.text) - .filter(Boolean); -} - -export function diagnose(events: DroidStreamEvent[]): string { - return JSON.stringify( - events.map((event, index) => summarizeEvent(event, index)), - null, - 2 - ); -} - -export function isDirectRun(metaUrl: string): boolean { - const entry = process.argv[1]; - return entry ? metaUrl === pathToFileURL(entry).href : false; -} - -export async function writeJsonlArtifact( - name: string, - events: DroidStreamEvent[] -): Promise { - const root = process.env['DROID_STRESS_ARTIFACTS'] ?? '.stress-artifacts'; - await mkdir(root, { recursive: true }); - const file = join( - root, - `${new Date().toISOString().replace(/[:.]/g, '-')}-${sanitize(name)}.jsonl` - ); - const lines = events - .map((event, index) => JSON.stringify(summarizeEvent(event, index))) - .join('\n'); - await writeFile(file, `${lines}\n`); -} - -async function collect( - name: string, - session: DroidSession, - prompt: string, - options: MessageOptions -): Promise { - const events: DroidStreamEvent[] = []; - if (options.includePartialMessages === true) { - for await (const event of session.stream(prompt, { - ...options, - includePartialMessages: true, - })) { - events.push(event); - } - } else { - for await (const event of session.stream(prompt, { - ...options, - includePartialMessages: false, - })) { - events.push(event); - } - } - const result = lastResult(events); - const collected = { - name, - events, - counts: countByType(events), - result, - }; - validateResult(result, name); - await writeJsonlArtifact(name, events); - return collected; -} - -function assertTextCompatible( - left: string, - right: string, - message: string -): void { - const a = normalizeText(left); - const b = normalizeText(right); - assert.ok( - a === b || a.includes(b) || b.includes(a), - `${message}\nleft: ${left}\nright: ${right}` - ); -} - -function normalizeText(text: string): string { - return text.replace(/\s+/g, ' ').trim(); -} - -function sanitize(value: string): string { - return value.replace(/[^a-zA-Z0-9._-]+/g, '-').replace(/^-|-$/g, ''); -} - -function summarizeEvent( - event: DroidStreamEvent, - index: number -): Record { - const base: Record = { - index, - type: event.type, - }; - - switch (event.type) { - case DroidMessageType.Assistant: - return { - ...base, - textLength: event.text.length, - textPreview: event.text.slice(0, 160), - }; - case DroidMessageType.AssistantTextDelta: - case DroidMessageType.ThinkingTextDelta: - return { - ...base, - messageId: event.messageId, - blockIndex: event.blockIndex, - textLength: event.text.length, - textPreview: event.text.slice(0, 160), - }; - case DroidMessageType.ToolCall: - case DroidMessageType.ToolCallDelta: - return { - ...base, - toolUseId: event.toolUse.id, - toolName: event.toolUse.name, - }; - case DroidMessageType.ToolResult: - return { - ...base, - toolUseId: event.toolUseId, - toolName: event.toolName, - isError: event.isError, - }; - case DroidMessageType.Error: - return { - ...base, - message: event.message, - errorType: event.errorType, - }; - case DroidMessageType.Result: - return { - ...base, - subtype: event.subtype, - isError: event.isError, - resultLength: event.result.length, - messageCount: event.messages.length, - structuredOutput: event.structuredOutput ?? null, - structuredOutputError: event.structuredOutputError ?? null, - }; - default: - return base; - } -} diff --git a/examples/stress/all.ts b/examples/stress/all.ts deleted file mode 100644 index b578d6b..0000000 --- a/examples/stress/all.ts +++ /dev/null @@ -1,48 +0,0 @@ -import { isDirectRun } from './_harness.js'; -import { main as cancelInterrupt } from './cancel-interrupt.js'; -import { main as defaultStreaming } from './default-streaming.js'; -import { main as defaultVsPartialConsistency } from './default-vs-partial-consistency.js'; -import { main as errorPaths } from './error-paths.js'; -import { main as multiTurn } from './multi-turn.js'; -import { main as partialStreaming } from './partial-streaming.js'; -import { main as structuredOutput } from './structured-output.js'; -import { main as toolUse } from './tool-use.js'; - -const scripts = [ - ['default-streaming', defaultStreaming], - ['partial-streaming', partialStreaming], - ['default-vs-partial-consistency', defaultVsPartialConsistency], - ['structured-output', structuredOutput], - ['tool-use', toolUse], - ['multi-turn', multiTurn], - ['cancel-interrupt', cancelInterrupt], - ['error-paths', errorPaths], -] as const; - -export async function main(): Promise { - console.log( - [ - 'Droid SDK stress suite', - `DROID_EXEC_PATH=${process.env['DROID_EXEC_PATH'] ?? 'droid-dev'}`, - `DROID_STRESS_MODEL=${process.env['DROID_STRESS_MODEL'] ?? '(default)'}`, - `DROID_STRESS_REPEAT=${process.env['DROID_STRESS_REPEAT'] ?? '1'}`, - `DROID_STRESS_ARTIFACTS=${ - process.env['DROID_STRESS_ARTIFACTS'] ?? '.stress-artifacts' - }`, - ].join('\n') - ); - - for (const [name, script] of scripts) { - console.log(`\n## ${name}`); - await script(); - } - - console.log('\nAll stress scripts completed.'); -} - -if (isDirectRun(import.meta.url)) { - main().catch((error: unknown) => { - console.error(error); - process.exit(1); - }); -} diff --git a/examples/stress/cancel-interrupt.ts b/examples/stress/cancel-interrupt.ts deleted file mode 100644 index a2b4ae7..0000000 --- a/examples/stress/cancel-interrupt.ts +++ /dev/null @@ -1,92 +0,0 @@ -import assert from 'node:assert/strict'; - -import { DroidMessageType } from '@factory/droid-sdk'; -import type { DroidStreamEvent } from '@factory/droid-sdk'; - -import { - assertAssistantOutput, - assertDefaultStreamShape, - assertPartialStreamShape, - collectStream, - createStressSession, - isDirectRun, - lastResult, - runStressCase, - writeJsonlArtifact, -} from './_harness.js'; - -export async function main(): Promise { - await runStressCase('abort controller lifecycle', async () => { - const session = await createStressSession(); - const controller = new AbortController(); - const timeout = setTimeout( - () => controller.abort(new Error('stress abort requested')), - 750 - ); - - try { - await assert.rejects(async () => { - for await (const event of session.stream( - 'Write a long, detailed essay about compiler construction.', - { abortSignal: controller.signal, includePartialMessages: true } - )) { - void event; - } - }, /stress abort requested|Operation aborted/); - - const recovered = await collectStream( - 'abort-controller-recovered', - session, - 'Reply with exactly: abort recovery complete' - ); - assertDefaultStreamShape(recovered); - assertAssistantOutput(recovered); - } finally { - clearTimeout(timeout); - await session.close(); - } - }); - - await runStressCase('session interrupt completion', async () => { - const session = await createStressSession(); - const events: DroidStreamEvent[] = []; - let interrupted = false; - - try { - for await (const event of session.stream( - 'Write a long numbered list about operating systems.', - { includePartialMessages: true } - )) { - events.push(event); - if ( - !interrupted && - event.type !== DroidMessageType.Result && - events.length >= 2 - ) { - interrupted = true; - await session.interrupt(); - } - } - - await writeJsonlArtifact('session-interrupt', events); - const result = lastResult(events); - assert.ok(interrupted, 'expected to send an interrupt'); - assert.equal(result.type, DroidMessageType.Result); - assertPartialStreamShape({ - name: 'session-interrupt', - events, - counts: {}, - result, - }); - } finally { - await session.close(); - } - }); -} - -if (isDirectRun(import.meta.url)) { - main().catch((error: unknown) => { - console.error(error); - process.exit(1); - }); -} diff --git a/examples/stress/default-streaming.ts b/examples/stress/default-streaming.ts deleted file mode 100644 index 50dbd53..0000000 --- a/examples/stress/default-streaming.ts +++ /dev/null @@ -1,79 +0,0 @@ -import assert from 'node:assert/strict'; -import { writeFile } from 'node:fs/promises'; -import { join } from 'node:path'; - -import { AutonomyLevel, DroidMessageType } from '@factory/droid-sdk'; - -import { - assertAssistantOutput, - assertDefaultStreamShape, - assertToolPairing, - collectStream, - createStressSession, - isDirectRun, - runStressCase, - withTempDir, -} from './_harness.js'; - -export async function main(): Promise { - await runStressCase('default streaming text', async () => { - const session = await createStressSession(); - try { - const collected = await collectStream( - 'default-streaming-text', - session, - [ - 'Reply with one short sentence.', - 'Include the exact phrase "default streaming stress".', - ].join(' ') - ); - - assertDefaultStreamShape(collected); - assertAssistantOutput(collected); - } finally { - await session.close(); - } - }); - - await runStressCase('default streaming tool use', async () => { - await withTempDir('default-streaming', async (dir) => { - const file = join(dir, 'input.txt'); - await writeFile(file, 'default stream tool fixture\n'); - - const session = await createStressSession({ - autonomyLevel: AutonomyLevel.Medium, - }); - try { - const collected = await collectStream( - 'default-streaming-tool-use', - session, - [ - `Read the file at ${file}.`, - 'Then reply with exactly: default stream read complete', - ].join('\n') - ); - - assertDefaultStreamShape(collected); - assertAssistantOutput(collected); - assert.ok( - collected.counts[DroidMessageType.ToolCall] ?? 0, - 'expected default stream to emit a tool_call' - ); - assert.ok( - collected.counts[DroidMessageType.ToolResult] ?? 0, - 'expected default stream to emit a tool_result' - ); - assertToolPairing(collected.events, collected.name); - } finally { - await session.close(); - } - }); - }); -} - -if (isDirectRun(import.meta.url)) { - main().catch((error: unknown) => { - console.error(error); - process.exit(1); - }); -} diff --git a/examples/stress/default-vs-partial-consistency.ts b/examples/stress/default-vs-partial-consistency.ts deleted file mode 100644 index e94fb65..0000000 --- a/examples/stress/default-vs-partial-consistency.ts +++ /dev/null @@ -1,87 +0,0 @@ -import assert from 'node:assert/strict'; - -import { DroidMessageType } from '@factory/droid-sdk'; - -import { - PARTIAL_ONLY_TYPES, - assertAssistantOutput, - assertDefaultStreamShape, - assertNoInternalMessages, - assertPartialStreamShape, - assertPartialTextConsistency, - collectPartialStream, - collectStream, - createStressSession, - isDirectRun, - runStressCase, -} from './_harness.js'; - -export async function main(): Promise { - await runStressCase('default vs partial consistency', async () => { - const prompt = [ - 'Answer with a concise paragraph about stream aggregation.', - 'Include the phrase "stream consistency stress".', - ].join(' '); - - const defaultSession = await createStressSession(); - const partialSession = await createStressSession(); - try { - const defaultCollected = await collectStream( - 'default-vs-partial-default', - defaultSession, - prompt - ); - const partialCollected = await collectPartialStream( - 'default-vs-partial-partial', - partialSession, - prompt - ); - - assertDefaultStreamShape(defaultCollected); - assertPartialStreamShape(partialCollected); - assertAssistantOutput(defaultCollected); - assertAssistantOutput(partialCollected); - assertPartialTextConsistency(partialCollected); - assertNoInternalMessages( - defaultCollected.result.messages, - 'default result' - ); - assertNoInternalMessages( - partialCollected.result.messages, - 'partial result' - ); - - assert.equal(defaultCollected.result.isError, false); - assert.equal(partialCollected.result.isError, false); - assert.ok( - partialCollected.events.some((event) => - PARTIAL_ONLY_TYPES.has(event.type) - ), - 'expected partial stream to include at least one partial-only event' - ); - assert.equal( - defaultCollected.counts[DroidMessageType.Result], - 1, - 'default stream must emit one result' - ); - assert.equal( - partialCollected.counts[DroidMessageType.Result], - 1, - 'partial stream must emit one result' - ); - assert.ok( - partialCollected.counts[DroidMessageType.Assistant] ?? 0, - 'partial stream should still include full assistant messages' - ); - } finally { - await Promise.all([defaultSession.close(), partialSession.close()]); - } - }); -} - -if (isDirectRun(import.meta.url)) { - main().catch((error: unknown) => { - console.error(error); - process.exit(1); - }); -} diff --git a/examples/stress/error-paths.ts b/examples/stress/error-paths.ts deleted file mode 100644 index 2ada6c7..0000000 --- a/examples/stress/error-paths.ts +++ /dev/null @@ -1,90 +0,0 @@ -import assert from 'node:assert/strict'; - -import { - ConnectionError, - DroidMessageType, - OutputFormatType, - createSession, -} from '@factory/droid-sdk'; -import type { DroidClientTransport } from '@factory/droid-sdk'; - -import { - assertDefaultStreamShape, - collectStream, - createStressSession, - isDirectRun, - runStressCase, -} from './_harness.js'; - -export async function main(): Promise { - await runStressCase('turn-level structured output error', async () => { - const session = await createStressSession(); - try { - const collected = await collectStream( - 'error-paths-structured-output', - session, - 'Return any object. This schema intentionally cannot be satisfied.', - { - outputFormat: { - type: OutputFormatType.JsonSchema, - schema: { - type: 'object', - properties: { - impossible: { type: 'string', enum: [] }, - }, - required: ['impossible'], - additionalProperties: false, - }, - }, - } - ); - - assertDefaultStreamShape(collected); - assert.equal(collected.result.type, DroidMessageType.Result); - assert.equal( - collected.result.isError, - true, - 'expected turn-level error to be captured on result' - ); - assert.ok( - collected.result.error || - collected.result.structuredOutputError || - collected.result.subtype === 'error_structured_output', - 'expected result to include error details or structured output subtype' - ); - } finally { - await session.close(); - } - }); - - await runStressCase('startup failure throws typed error', async () => { - await assert.rejects( - () => - createSession({ - transport: new FailingStartupTransport(), - }), - (error: unknown) => error instanceof ConnectionError - ); - }); -} - -class FailingStartupTransport implements DroidClientTransport { - readonly isConnected = false; - - send(): void { - throw new ConnectionError('Synthetic startup transport failure'); - } - - onMessage(): void {} - - onError(): void {} - - async close(): Promise {} -} - -if (isDirectRun(import.meta.url)) { - main().catch((error: unknown) => { - console.error(error); - process.exit(1); - }); -} diff --git a/examples/stress/multi-turn.ts b/examples/stress/multi-turn.ts deleted file mode 100644 index a9e0a62..0000000 --- a/examples/stress/multi-turn.ts +++ /dev/null @@ -1,137 +0,0 @@ -import assert from 'node:assert/strict'; - -import { DroidMessageType, OutputFormatType } from '@factory/droid-sdk'; - -import { - assertAssistantOutput, - assertDefaultStreamShape, - assertPartialStreamShape, - assertPartialTextConsistency, - collectPartialStream, - collectStream, - createStressSession, - isDirectRun, - runStressCase, -} from './_harness.js'; - -export async function main(): Promise { - await runStressCase('multi-turn aggregation', async () => { - const session = await createStressSession(); - let previousNumTurns = 0; - let previousTurnCount = 0; - - try { - const first = await collectStream( - 'multi-turn-1', - session, - 'Remember this phrase for the next turn: multi turn stress anchor.' - ); - assertDefaultStreamShape(first); - assertAssistantOutput(first); - ({ previousNumTurns, previousTurnCount } = assertMonotonicTurns( - first.result, - previousNumTurns, - previousTurnCount - )); - - const structured = await collectStream( - 'multi-turn-2-structured', - session, - 'Return a structured object with anchor "multi turn stress anchor" and turn 2.', - { - outputFormat: { - type: OutputFormatType.JsonSchema, - schema: { - type: 'object', - properties: { - anchor: { - type: 'string', - enum: ['multi turn stress anchor'], - }, - turn: { type: 'number', enum: [2] }, - }, - required: ['anchor', 'turn'], - additionalProperties: false, - }, - }, - } - ); - assertDefaultStreamShape(structured); - assert.equal(structured.result.isError, false); - assert.ok(structured.result.structuredOutput); - ({ previousNumTurns, previousTurnCount } = assertMonotonicTurns( - structured.result, - previousNumTurns, - previousTurnCount - )); - - const third = await collectPartialStream( - 'multi-turn-3-partial', - session, - 'In one sentence, state the remembered anchor phrase.' - ); - assertPartialStreamShape(third); - assertAssistantOutput(third); - assertPartialTextConsistency(third); - assert.equal( - third.result.structuredOutput ?? null, - null, - 'structured output leaked into later turn' - ); - assert.equal( - third.result.structuredOutputError ?? null, - null, - 'structured output error leaked into later turn' - ); - ({ previousNumTurns, previousTurnCount } = assertMonotonicTurns( - third.result, - previousNumTurns, - previousTurnCount - )); - - for (const index of [4, 5]) { - const collected = await collectStream( - `multi-turn-${index}`, - session, - `Reply with exactly: multi turn stress turn ${index}` - ); - assertDefaultStreamShape(collected); - assertAssistantOutput(collected); - assert.equal(collected.result.type, DroidMessageType.Result); - ({ previousNumTurns, previousTurnCount } = assertMonotonicTurns( - collected.result, - previousNumTurns, - previousTurnCount - )); - } - } finally { - await session.close(); - } - }); -} - -function assertMonotonicTurns( - result: { numTurns: number; turnCount: number }, - previousNumTurns: number, - previousTurnCount: number -): { previousNumTurns: number; previousTurnCount: number } { - assert.ok( - result.numTurns >= previousNumTurns, - `numTurns regressed from ${previousNumTurns} to ${result.numTurns}` - ); - assert.ok( - result.turnCount >= previousTurnCount, - `turnCount regressed from ${previousTurnCount} to ${result.turnCount}` - ); - return { - previousNumTurns: result.numTurns, - previousTurnCount: result.turnCount, - }; -} - -if (isDirectRun(import.meta.url)) { - main().catch((error: unknown) => { - console.error(error); - process.exit(1); - }); -} diff --git a/examples/stress/partial-streaming.ts b/examples/stress/partial-streaming.ts deleted file mode 100644 index 7bab3e8..0000000 --- a/examples/stress/partial-streaming.ts +++ /dev/null @@ -1,47 +0,0 @@ -import assert from 'node:assert/strict'; - -import { DroidMessageType } from '@factory/droid-sdk'; - -import { - assertAssistantOutput, - assertPartialStreamShape, - assertPartialTextConsistency, - collectPartialStream, - createStressSession, - isDirectRun, - runStressCase, -} from './_harness.js'; - -export async function main(): Promise { - await runStressCase('partial streaming text', async () => { - const session = await createStressSession(); - try { - const collected = await collectPartialStream( - 'partial-streaming-text', - session, - [ - 'Reply in exactly two short sentences.', - 'The first sentence must include "partial streaming stress".', - 'The second sentence must include "delta reconstruction".', - ].join(' ') - ); - - assertPartialStreamShape(collected); - assertAssistantOutput(collected); - assert.ok( - (collected.counts[DroidMessageType.AssistantTextDelta] ?? 0) > 0, - 'expected partial stream to include assistant_text_delta events' - ); - assertPartialTextConsistency(collected); - } finally { - await session.close(); - } - }); -} - -if (isDirectRun(import.meta.url)) { - main().catch((error: unknown) => { - console.error(error); - process.exit(1); - }); -} diff --git a/examples/stress/structured-output.ts b/examples/stress/structured-output.ts deleted file mode 100644 index aa91f5b..0000000 --- a/examples/stress/structured-output.ts +++ /dev/null @@ -1,215 +0,0 @@ -import assert from 'node:assert/strict'; - -import { DroidMessageType, OutputFormatType } from '@factory/droid-sdk'; -import type { DroidResult, MessageOptions } from '@factory/droid-sdk'; -import { z } from 'zod'; - -import { - assertAssistantOutput, - assertDefaultStreamShape, - collectStream, - createStressSession, - isDirectRun, - runStress, - runStressCase, -} from './_harness.js'; - -type OutputFormat = NonNullable; -type JsonOutput = NonNullable; - -interface StructuredCase { - name: string; - prompt: string; - outputFormat: OutputFormat; - validate: (value: JsonOutput) => void; -} - -const cases: StructuredCase[] = [ - { - name: 'flat-object', - prompt: - 'Return a JSON object with name "Ada", language "TypeScript", and score 7.', - outputFormat: { - type: OutputFormatType.JsonSchema, - schema: { - type: 'object', - properties: { - name: { type: 'string', enum: ['Ada'] }, - language: { type: 'string', enum: ['TypeScript'] }, - score: { type: 'number', minimum: 7, maximum: 7 }, - }, - required: ['name', 'language', 'score'], - additionalProperties: false, - }, - }, - validate: (value) => - z - .object({ - name: z.literal('Ada'), - language: z.literal('TypeScript'), - score: z.literal(7), - }) - .parse(value), - }, - { - name: 'nested-array-enum', - prompt: - 'Return a JSON object with summary.status "ok" and two checks named stream and result, both passed true.', - outputFormat: { - type: OutputFormatType.JsonSchema, - schema: { - type: 'object', - properties: { - summary: { - type: 'object', - properties: { - status: { type: 'string', enum: ['ok'] }, - }, - required: ['status'], - additionalProperties: false, - }, - checks: { - type: 'array', - minItems: 2, - maxItems: 2, - items: { - type: 'object', - properties: { - name: { type: 'string', enum: ['stream', 'result'] }, - passed: { type: 'boolean', enum: [true] }, - }, - required: ['name', 'passed'], - additionalProperties: false, - }, - }, - }, - required: ['summary', 'checks'], - additionalProperties: false, - }, - }, - validate: (value) => - z - .object({ - summary: z.object({ status: z.literal('ok') }), - checks: z - .array( - z.object({ - name: z.enum(['stream', 'result']), - passed: z.literal(true), - }) - ) - .length(2), - }) - .parse(value), - }, - { - name: 'optional-pattern-bounds', - prompt: - 'Return a JSON object with code "stress-123", count 3, tags ["sdk"], and omit notes.', - outputFormat: { - type: OutputFormatType.JsonSchema, - schema: { - type: 'object', - properties: { - code: { type: 'string', pattern: '^stress-[0-9]{3}$' }, - count: { type: 'number', minimum: 1, maximum: 3 }, - tags: { - type: 'array', - minItems: 1, - items: { type: 'string', enum: ['sdk'] }, - }, - notes: { type: 'string' }, - }, - required: ['code', 'count', 'tags'], - additionalProperties: false, - }, - }, - validate: (value) => - z - .object({ - code: z.string().regex(/^stress-[0-9]{3}$/), - count: z.number().min(1).max(3), - tags: z.array(z.literal('sdk')).min(1), - notes: z.string().optional(), - }) - .parse(value), - }, -]; - -export async function main(): Promise { - await runStressCase('structured output run()', async () => { - for (const stressCase of cases) { - const result = await runStress(stressCase.prompt, { - outputFormat: stressCase.outputFormat, - }); - assertStructuredSuccess(result, stressCase); - } - }); - - await runStressCase('structured output streaming', async () => { - const session = await createStressSession(); - try { - for (const stressCase of cases) { - const collected = await collectStream( - `structured-output-${stressCase.name}`, - session, - stressCase.prompt, - { outputFormat: stressCase.outputFormat } - ); - assertDefaultStreamShape(collected); - assertAssistantOutput(collected); - assertStructuredSuccess(collected.result, stressCase); - } - } finally { - await session.close(); - } - }); - - await runStressCase('structured output invalid schema', async () => { - const result = await runStress( - 'Return any object. This schema intentionally cannot be satisfied.', - { - outputFormat: { - type: OutputFormatType.JsonSchema, - schema: { - type: 'object', - properties: { - impossible: { type: 'string', enum: [] }, - }, - required: ['impossible'], - additionalProperties: false, - }, - }, - } - ); - - assert.equal( - result.isError, - true, - 'expected invalid structured output to mark result as an error' - ); - assert.equal(result.structuredOutput ?? null, null); - assert.ok( - result.structuredOutputError, - 'expected invalid structured output error details' - ); - }); -} - -function assertStructuredSuccess( - result: DroidResult, - stressCase: StructuredCase -): void { - assert.equal(result.type, DroidMessageType.Result); - assert.equal(result.isError, false, `${stressCase.name}: unexpected error`); - assert.equal(result.structuredOutputError ?? null, null); - assert.ok(result.structuredOutput, `${stressCase.name}: missing output`); - stressCase.validate(result.structuredOutput as JsonOutput); -} - -if (isDirectRun(import.meta.url)) { - main().catch((error: unknown) => { - console.error(error); - process.exit(1); - }); -} diff --git a/examples/stress/tool-use.ts b/examples/stress/tool-use.ts deleted file mode 100644 index fc4de0e..0000000 --- a/examples/stress/tool-use.ts +++ /dev/null @@ -1,101 +0,0 @@ -import assert from 'node:assert/strict'; -import { writeFile } from 'node:fs/promises'; -import { join } from 'node:path'; - -import { AutonomyLevel, DroidMessageType } from '@factory/droid-sdk'; - -import { - assertAssistantOutput, - assertDefaultStreamShape, - assertPartialStreamShape, - assertToolPairing, - collectPartialStream, - collectStream, - createStressSession, - isDirectRun, - runStressCase, - withTempDir, -} from './_harness.js'; - -export async function main(): Promise { - await runStressCase('tool use preservation', async () => { - await withTempDir('tool-use', async (dir) => { - const defaultFile = join(dir, 'default.txt'); - const partialFile = join(dir, 'partial.txt'); - await writeFile(defaultFile, 'default tool use fixture\n'); - await writeFile(partialFile, 'partial tool use fixture\n'); - - const defaultSession = await createStressSession({ - autonomyLevel: AutonomyLevel.Medium, - }); - const partialSession = await createStressSession({ - autonomyLevel: AutonomyLevel.Medium, - }); - - try { - const defaultCollected = await collectStream( - 'tool-use-default', - defaultSession, - toolPrompt(defaultFile, 'default') - ); - const partialCollected = await collectPartialStream( - 'tool-use-partial', - partialSession, - toolPrompt(partialFile, 'partial') - ); - - assertDefaultStreamShape(defaultCollected); - assertPartialStreamShape(partialCollected); - assertAssistantOutput(defaultCollected); - assertAssistantOutput(partialCollected); - assertToolPairing(defaultCollected.events, defaultCollected.name); - assertToolPairing(partialCollected.events, partialCollected.name); - - assert.ok( - defaultCollected.counts[DroidMessageType.ToolCall] ?? 0, - 'default stream must expose completed tool calls' - ); - assert.ok( - defaultCollected.counts[DroidMessageType.ToolResult] ?? 0, - 'default stream must expose tool results' - ); - assert.ok( - partialCollected.counts[DroidMessageType.ToolCall] ?? 0, - 'partial stream must include completed tool calls' - ); - assert.ok( - partialCollected.counts[DroidMessageType.ToolCallDelta] ?? 0, - 'partial stream must include tool_call_delta' - ); - assert.ok( - partialCollected.result.messages.some( - (message) => message.type === DroidMessageType.ToolCall - ), - 'result.messages must preserve tool_call' - ); - assert.ok( - partialCollected.result.messages.some( - (message) => message.type === DroidMessageType.ToolResult - ), - 'result.messages must preserve tool_result' - ); - } finally { - await Promise.all([defaultSession.close(), partialSession.close()]); - } - }); - }); -} - -function toolPrompt(file: string, mode: string): string { - return [ - `Use a file-reading tool to read ${file}.`, - `Then respond with exactly: ${mode} tool use stress complete`, - ].join('\n'); -} - -if (isDirectRun(import.meta.url)) { - main().catch((error: unknown) => { - console.error(error); - process.exit(1); - }); -} diff --git a/examples/structured-output-stress-test.ts b/examples/structured-output-stress-test.ts deleted file mode 100644 index 30b9956..0000000 --- a/examples/structured-output-stress-test.ts +++ /dev/null @@ -1,515 +0,0 @@ -/** - * Structured output stress test. - * - * Runs several structured-output schemas against one or more Droid models, - * verifies both `run(...)` results and streaming result metadata, and - * stress-tests tool use before structured output. - * - * Usage: - * npx tsx examples/structured-output-stress-test.ts - * DROID_EXEC_PATH=droid-dev npx tsx examples/structured-output-stress-test.ts - * DROID_STRUCTURED_OUTPUT_MODELS="claude-sonnet-4-5,gpt-5.2" npx tsx examples/structured-output-stress-test.ts - */ - -import assert from 'node:assert/strict'; - -import { - AutonomyLevel, - DroidMessageType, - OutputFormatType, - createSession, - run, -} from '@factory/droid-sdk'; -import type { - DroidMessage, - DroidResult, - MessageOptions, -} from '@factory/droid-sdk'; -import { z } from 'zod'; - -type OutputFormat = NonNullable; -type JsonObject = NonNullable; - -interface StressCase { - name: string; - prompt: string; - outputFormat: OutputFormat; - parse: (value: JsonObject) => unknown; -} - -const PersonSchema = z.object({ - name: z.literal('Ada Lovelace'), - language: z.literal('TypeScript'), - score: z.literal(99), -}); - -const PlanSchema = z.object({ - title: z.literal('Structured Output SDK Test'), - priority: z.enum(['low', 'medium', 'high']), - tasks: z.array( - z.object({ - id: z.string(), - done: z.boolean(), - }) - ), -}); - -const MetricsSchema = z.object({ - summary: z.object({ - passed: z.literal(3), - failed: z.literal(0), - }), - checks: z.array( - z.object({ - name: z.enum(['schema', 'stream', 'fallback']), - ok: z.literal(true), - }) - ), -}); - -const PackageSchema = z.object({ - packageName: z.literal('@factory/droid-sdk'), - tmpDir: z.string().startsWith('/tmp/droid-sdk-structured-output-stress-'), - createdFile: z.string().endsWith('/notes.txt'), - finalContent: z.string().includes('edited by structured output stress'), - filesRead: z.array(z.enum(['package.json', 'notes.txt'])), - toolsUsed: z.array(z.enum(['read', 'write', 'edit'])), -}); - -const stressCases: StressCase[] = [ - { - name: 'flat-literals', - prompt: [ - 'Return a structured object for Ada Lovelace.', - 'Use exactly name "Ada Lovelace", language "TypeScript", and score 99.', - ].join(' '), - outputFormat: { - type: OutputFormatType.JsonSchema, - schema: { - type: 'object', - properties: { - name: { type: 'string' }, - language: { type: 'string', enum: ['TypeScript'] }, - score: { type: 'number', enum: [99] }, - }, - required: ['name', 'language', 'score'], - additionalProperties: false, - }, - }, - parse: (value) => PersonSchema.parse(value), - }, - { - name: 'nested-array-enum', - prompt: [ - 'Return a project plan object.', - 'Use title "Structured Output SDK Test", priority "high",', - 'and exactly two tasks with ids "schema" and "stream".', - 'Set both task done values to true.', - ].join(' '), - outputFormat: { - type: OutputFormatType.JsonSchema, - schema: { - type: 'object', - properties: { - title: { type: 'string', enum: ['Structured Output SDK Test'] }, - priority: { type: 'string', enum: ['low', 'medium', 'high'] }, - tasks: { - type: 'array', - minItems: 2, - maxItems: 2, - items: { - type: 'object', - properties: { - id: { type: 'string', enum: ['schema', 'stream'] }, - done: { type: 'boolean', enum: [true] }, - }, - required: ['id', 'done'], - additionalProperties: false, - }, - }, - }, - required: ['title', 'priority', 'tasks'], - additionalProperties: false, - }, - }, - parse: (value) => PlanSchema.parse(value), - }, - { - name: 'nested-metrics', - prompt: [ - 'Return validation metrics.', - 'The summary must have passed 3 and failed 0.', - 'The checks array must contain schema, stream, and fallback, each with ok true.', - ].join(' '), - outputFormat: { - type: OutputFormatType.JsonSchema, - schema: { - type: 'object', - properties: { - summary: { - type: 'object', - properties: { - passed: { type: 'number', enum: [3] }, - failed: { type: 'number', enum: [0] }, - }, - required: ['passed', 'failed'], - additionalProperties: false, - }, - checks: { - type: 'array', - minItems: 3, - maxItems: 3, - items: { - type: 'object', - properties: { - name: { - type: 'string', - enum: ['schema', 'stream', 'fallback'], - }, - ok: { type: 'boolean', enum: [true] }, - }, - required: ['name', 'ok'], - additionalProperties: false, - }, - }, - }, - required: ['summary', 'checks'], - additionalProperties: false, - }, - }, - parse: (value) => MetricsSchema.parse(value), - }, -]; - -function parseModels(): Array { - const raw = process.env['DROID_STRUCTURED_OUTPUT_MODELS']; - if (!raw) return [undefined]; - return raw - .split(',') - .map((model) => model.trim()) - .filter(Boolean); -} - -function labelModel(modelId: string | undefined): string { - return modelId ?? 'default session model'; -} - -function assertStructuredResult( - result: DroidResult, - stressCase: StressCase -): void { - const diagnostic = JSON.stringify( - { - text: result.text, - error: result.error, - structuredOutputError: result.structuredOutputError, - messages: result.messages - .filter( - (message) => - message.type === DroidMessageType.Assistant || - message.type === DroidMessageType.Error || - message.type === DroidMessageType.Result - ) - .map((message) => { - if (message.type !== DroidMessageType.Assistant) return message; - return { - type: message.type, - role: message.message.role, - content: message.message.content, - }; - }), - }, - null, - 2 - ); - - assert.equal( - result.structuredOutputError, - null, - `${stressCase.name}: expected no structured output error\n${diagnostic}` - ); - assert.ok( - result.structuredOutput, - `${stressCase.name}: expected structuredOutput\n${diagnostic}` - ); - stressCase.parse(result.structuredOutput); -} - -function findMessage( - messages: DroidMessage[], - type: T -): Extract | undefined { - return messages.find( - (message): message is Extract => - message.type === type - ); -} - -function findNormalToolUse( - messages: DroidMessage[] -): Extract | undefined { - return messages.find( - (message): message is Extract => - message.type === DroidMessageType.ToolCall && - message.toolUse.name !== 'StructuredOutput' - ); -} - -function findToolUses( - messages: DroidMessage[], - matches: (toolName: string) => boolean -): Array> { - return messages.filter( - (message): message is Extract => - message.type === DroidMessageType.ToolCall && - matches(message.toolUse.name) - ); -} - -async function runCase( - modelId: string | undefined, - stressCase: StressCase -): Promise { - const result = await run(stressCase.prompt, { - execPath: process.env['DROID_EXEC_PATH'] ?? 'droid', - cwd: process.cwd(), - ...(modelId !== undefined && { modelId }), - outputFormat: stressCase.outputFormat, - }); - - assertStructuredResult(result, stressCase); - - console.log( - ` ✓ ${stressCase.name}: ${JSON.stringify(result.structuredOutput)}` - ); -} - -async function runStreamingCase(modelId: string | undefined): Promise { - const stressCase = stressCases[0]; - const session = await createSession({ - execPath: process.env['DROID_EXEC_PATH'] ?? 'droid', - cwd: process.cwd(), - ...(modelId !== undefined && { modelId }), - }); - - try { - const messages: DroidMessage[] = []; - for await (const message of session.stream(stressCase.prompt, { - outputFormat: stressCase.outputFormat, - })) { - messages.push(message); - } - - const result = findMessage(messages, DroidMessageType.Result); - - assert.ok(result, 'streaming: expected result message'); - assert.equal(result.structuredOutputError, null); - assert.ok(result.structuredOutput); - stressCase.parse(result.structuredOutput as JsonObject); - - console.log(' ✓ streaming emits structured output on result'); - } finally { - await session.close(); - } -} - -async function runToolUseCase(modelId: string | undefined): Promise { - const tmpDir = `/tmp/droid-sdk-structured-output-stress-${process.pid}-${Date.now()}`; - const tmpFile = `${tmpDir}/notes.txt`; - const outputFormat: OutputFormat = { - type: OutputFormatType.JsonSchema, - schema: { - type: 'object', - properties: { - packageName: { type: 'string', enum: ['@factory/droid-sdk'] }, - tmpDir: { type: 'string', enum: [tmpDir] }, - createdFile: { type: 'string', enum: [tmpFile] }, - finalContent: { - type: 'string', - enum: [ - [ - 'created by structured output stress', - 'edited by structured output stress', - ].join('\n'), - ], - }, - filesRead: { - type: 'array', - minItems: 2, - items: { - type: 'string', - enum: ['package.json', 'notes.txt'], - }, - }, - toolsUsed: { - type: 'array', - minItems: 3, - items: { - type: 'string', - enum: ['read', 'write', 'edit'], - }, - }, - }, - required: [ - 'packageName', - 'tmpDir', - 'createdFile', - 'finalContent', - 'filesRead', - 'toolsUsed', - ], - additionalProperties: false, - }, - }; - const session = await createSession({ - execPath: process.env['DROID_EXEC_PATH'] ?? 'droid', - autonomyLevel: AutonomyLevel.Medium, - cwd: process.cwd(), - ...(modelId !== undefined && { modelId }), - }); - - try { - const messages: DroidMessage[] = []; - for await (const message of session.stream( - [ - 'You must use multiple tools before producing structured output.', - 'Use the Read tool to read package.json in the current working directory.', - `Create the directory ${tmpDir}.`, - `Write ${tmpFile} with exactly this first line: created by structured output stress`, - `Then use an edit tool to modify ${tmpFile} so its full content is exactly:`, - 'created by structured output stress', - 'edited by structured output stress', - `Use the Read tool to read ${tmpFile} after editing it.`, - 'Only after all tool calls are done, return the structured object.', - 'Set packageName from package.json name.', - `Set tmpDir to ${tmpDir} and createdFile to ${tmpFile}.`, - 'Set filesRead to include package.json and notes.txt.', - 'Set toolsUsed to include read, write, and edit.', - ].join('\n'), - { outputFormat } - )) { - messages.push(message); - } - - const normalToolUse = findNormalToolUse(messages); - const readToolUses = findToolUses( - messages, - (toolName) => toolName.toLowerCase() === 'read' - ); - const writeToolUses = findToolUses(messages, (toolName) => { - const normalized = toolName.toLowerCase(); - return ( - normalized.includes('write') || - normalized.includes('create') || - normalized.includes('execute') - ); - }); - const editToolUses = findToolUses(messages, (toolName) => { - const normalized = toolName.toLowerCase(); - return normalized.includes('edit') || normalized.includes('patch'); - }); - const result = findMessage(messages, DroidMessageType.Result); - const diagnostic = JSON.stringify( - messages - .filter( - (message) => - message.type === DroidMessageType.ToolCall || - message.type === DroidMessageType.ToolResult || - message.type === DroidMessageType.Result || - message.type === DroidMessageType.Error - ) - .map((message) => - message.type === DroidMessageType.ToolCall - ? { - type: message.type, - toolName: message.toolUse.name, - toolInput: message.toolUse.input, - } - : message - ), - null, - 2 - ); - - assert.ok( - normalToolUse, - `tool-use: expected a normal tool call before structured output\n${diagnostic}` - ); - assert.ok( - readToolUses.length >= 2, - `tool-use: expected at least 2 Read tool calls, saw ${readToolUses.length}\n${diagnostic}` - ); - assert.ok( - writeToolUses.length >= 1, - `tool-use: expected at least 1 write/create tool call\n${diagnostic}` - ); - assert.ok( - editToolUses.length >= 1, - `tool-use: expected at least 1 edit tool call\n${diagnostic}` - ); - assert.ok(result, 'tool-use: expected result message'); - assert.equal( - result.structuredOutputError, - null, - `tool-use: expected no structured output error\n${diagnostic}` - ); - assert.ok(result.structuredOutput); - PackageSchema.parse(result.structuredOutput); - - console.log( - ` ✓ read/write/edit before structured output: ${readToolUses.length}/${writeToolUses.length}/${editToolUses.length} tool calls` - ); - } finally { - await session.close(); - } -} - -interface Failure { - model: string; - step: string; - error: unknown; -} - -const failures: Failure[] = []; - -async function runStep( - modelId: string | undefined, - step: string, - callback: () => Promise -): Promise { - try { - await callback(); - } catch (error) { - failures.push({ model: labelModel(modelId), step, error }); - console.error( - ` ✗ ${step}: ${error instanceof Error ? error.message : String(error)}` - ); - } -} - -for (const modelId of parseModels()) { - console.log(`\n=== Testing ${labelModel(modelId)} ===`); - for (const stressCase of stressCases) { - await runStep(modelId, stressCase.name, () => runCase(modelId, stressCase)); - } - await runStep(modelId, 'streaming', () => runStreamingCase(modelId)); - await runStep(modelId, 'read/write/edit tool-use', () => - runToolUseCase(modelId) - ); -} - -if (failures.length > 0) { - console.error('\nStructured output stress test failures:'); - for (const failure of failures) { - console.error( - `- ${failure.model} / ${failure.step}: ${ - failure.error instanceof Error - ? (failure.error.stack ?? failure.error.message) - : String(failure.error) - }` - ); - } - process.exitCode = 1; -} else { - console.log('\nStructured output stress test passed'); -} From 63bb9402b6c55f0932b9d9cac71ed19e00e797f5 Mon Sep 17 00:00:00 2001 From: User Date: Tue, 12 May 2026 17:23:35 -0700 Subject: [PATCH 4/4] test: add SDK streaming stress examples Co-authored-by: factory-droid[bot] <138933559+factory-droid[bot]@users.noreply.github.com> --- examples/stress/_harness.ts | 457 ++++++++++++++++ examples/stress/all.ts | 48 ++ examples/stress/cancel-interrupt.ts | 92 ++++ examples/stress/default-streaming.ts | 79 +++ .../stress/default-vs-partial-consistency.ts | 87 +++ examples/stress/error-paths.ts | 90 +++ examples/stress/multi-turn.ts | 137 +++++ examples/stress/partial-streaming.ts | 47 ++ examples/stress/structured-output.ts | 215 ++++++++ examples/stress/tool-use.ts | 101 ++++ examples/structured-output-stress-test.ts | 515 ++++++++++++++++++ 11 files changed, 1868 insertions(+) create mode 100644 examples/stress/_harness.ts create mode 100644 examples/stress/all.ts create mode 100644 examples/stress/cancel-interrupt.ts create mode 100644 examples/stress/default-streaming.ts create mode 100644 examples/stress/default-vs-partial-consistency.ts create mode 100644 examples/stress/error-paths.ts create mode 100644 examples/stress/multi-turn.ts create mode 100644 examples/stress/partial-streaming.ts create mode 100644 examples/stress/structured-output.ts create mode 100644 examples/stress/tool-use.ts create mode 100644 examples/structured-output-stress-test.ts diff --git a/examples/stress/_harness.ts b/examples/stress/_harness.ts new file mode 100644 index 0000000..775bc38 --- /dev/null +++ b/examples/stress/_harness.ts @@ -0,0 +1,457 @@ +import assert from 'node:assert/strict'; +import { mkdir, rm, writeFile } from 'node:fs/promises'; +import { join } from 'node:path'; +import { pathToFileURL } from 'node:url'; + +import { + DroidMessageType, + createSession, + run, + type CreateSessionOptions, + type DroidResult, + type DroidSession, + type DroidStreamEvent, + type MessageOptions, + type RunOptions, +} from '@factory/droid-sdk'; + +export interface CollectedStream { + name: string; + events: DroidStreamEvent[]; + counts: Record; + result: DroidResult; +} + +export const DEFAULT_PUBLIC_TYPES = new Set([ + DroidMessageType.Assistant, + DroidMessageType.User, + DroidMessageType.ToolCall, + DroidMessageType.ToolResult, + DroidMessageType.Error, + DroidMessageType.Result, +]); + +export const PARTIAL_ONLY_TYPES = new Set([ + DroidMessageType.AssistantTextDelta, + DroidMessageType.AssistantTextComplete, + DroidMessageType.ThinkingTextDelta, + DroidMessageType.ThinkingTextComplete, + DroidMessageType.ToolCallDelta, + DroidMessageType.ToolProgress, +]); + +export const INTERNAL_TYPES = new Set([ + 'create_message', + 'structured_output', + 'tool_use', +]); + +export function stressExecPath(): string { + return process.env['DROID_EXEC_PATH'] ?? 'droid-dev'; +} + +export function stressModelOptions(): Pick { + const modelId = process.env['DROID_STRESS_MODEL']; + return modelId ? { modelId } : {}; +} + +export function stressRepeat(): number { + const parsed = Number.parseInt(process.env['DROID_STRESS_REPEAT'] ?? '1', 10); + return Number.isFinite(parsed) && parsed > 0 ? parsed : 1; +} + +export function createStressSession( + options: CreateSessionOptions = {} +): Promise { + return createSession({ + execPath: stressExecPath(), + cwd: process.cwd(), + ...stressModelOptions(), + ...options, + }); +} + +export function createStressRunOptions(options: RunOptions = {}): RunOptions { + return { + execPath: stressExecPath(), + cwd: process.cwd(), + ...stressModelOptions(), + ...options, + }; +} + +export async function runStress( + prompt: string, + options: RunOptions = {} +): Promise { + const result = await run(prompt, createStressRunOptions(options)); + validateResult(result, 'run()'); + await writeJsonlArtifact('run', [result]); + return result; +} + +export async function collectStream( + name: string, + session: DroidSession, + prompt: string, + options: MessageOptions = {} +): Promise { + return collect(name, session, prompt, { + ...options, + includePartialMessages: false, + }); +} + +export async function collectPartialStream( + name: string, + session: DroidSession, + prompt: string, + options: MessageOptions = {} +): Promise { + return collect(name, session, prompt, { + ...options, + includePartialMessages: true, + }); +} + +export function countByType( + events: DroidStreamEvent[] +): Record { + const counts: Record = {}; + for (const event of events) { + counts[event.type] = (counts[event.type] ?? 0) + 1; + } + return counts; +} + +export function resultEvents(events: DroidStreamEvent[]): DroidResult[] { + return events.filter( + (event): event is DroidResult => event.type === DroidMessageType.Result + ); +} + +export function lastResult(events: DroidStreamEvent[]): DroidResult { + const results = resultEvents(events); + assert.equal( + results.length, + 1, + `expected exactly one result, saw ${results.length}\n${diagnose(events)}` + ); + assert.equal( + events.at(-1)?.type, + DroidMessageType.Result, + `result must be final event\n${diagnose(events)}` + ); + return results[0]!; +} + +export function validateResult(result: DroidResult, label: string): void { + assert.equal(result.type, DroidMessageType.Result, `${label}: not a result`); + assert.equal( + result.result, + result.text, + `${label}: result.result and result.text diverged` + ); + assert.ok(Array.isArray(result.messages), `${label}: messages missing`); + assertNoInternalMessages(result.messages, `${label}: result.messages`); + + if (!result.isError) { + assert.equal(result.error, null, `${label}: success result has error`); + assert.equal( + result.structuredOutputError ?? null, + null, + `${label}: success result has structuredOutputError` + ); + } +} + +export function assertDefaultStreamShape( + collected: CollectedStream, + label = collected.name +): void { + for (const event of collected.events) { + assert.ok( + DEFAULT_PUBLIC_TYPES.has(event.type), + `${label}: default stream leaked ${event.type}\n${diagnose( + collected.events + )}` + ); + } + assertNoInternalMessages(collected.events, label); + validateResult(collected.result, label); +} + +export function assertPartialStreamShape( + collected: CollectedStream, + label = collected.name +): void { + assertNoInternalMessages(collected.events, label); + validateResult(collected.result, label); +} + +export function assertNoInternalMessages( + events: DroidStreamEvent[], + label: string +): void { + const leaked = events.filter((event) => INTERNAL_TYPES.has(event.type)); + assert.equal( + leaked.length, + 0, + `${label}: internal message types leaked: ${leaked + .map((event) => event.type) + .join(', ')}` + ); +} + +export function assertAssistantOutput( + collected: CollectedStream, + label = collected.name +): void { + const assistantText = assistantTexts(collected.events).join(''); + assert.ok( + assistantText.length > 0 || collected.result.result.length > 0, + `${label}: expected non-empty assistant output\n${diagnose( + collected.events + )}` + ); +} + +export function assertPartialTextConsistency( + collected: CollectedStream, + label = collected.name +): void { + const deltas = assistantTextDeltas(collected.events); + const assistantText = assistantTexts(collected.events).join(''); + const resultText = collected.result.result; + + if (deltas.length > 0 && assistantText.length > 0) { + assertTextCompatible( + deltas, + assistantText, + `${label}: assistant deltas contradict full assistant message` + ); + } + + if (assistantText.length > 0 && resultText.length > 0) { + assertTextCompatible( + assistantText, + resultText, + `${label}: full assistant text contradicts result text` + ); + } + + if (deltas.length > 0 && resultText.length > 0) { + assertTextCompatible( + deltas, + resultText, + `${label}: assistant deltas contradict result text` + ); + } +} + +export function assertToolPairing( + events: DroidStreamEvent[], + label: string +): void { + const calls = new Set( + events + .filter((event) => event.type === DroidMessageType.ToolCall) + .map((event) => event.toolUse.id) + ); + const results = events.filter( + (event) => event.type === DroidMessageType.ToolResult + ); + + for (const result of results) { + assert.ok( + calls.has(result.toolUseId), + `${label}: tool_result ${result.toolUseId} has no prior tool_call` + ); + } +} + +export async function withTempDir( + prefix: string, + fn: (dir: string) => Promise +): Promise { + const dir = `/tmp/droid-sdk-stress-${prefix}-${process.pid}-${Date.now()}`; + await mkdir(dir, { recursive: true }); + try { + return await fn(dir); + } finally { + if (process.env['DROID_STRESS_KEEP_TEMP'] !== '1') { + await rm(dir, { recursive: true, force: true }); + } + } +} + +export async function runStressCase( + name: string, + fn: () => Promise +): Promise { + const repeat = stressRepeat(); + for (let index = 0; index < repeat; index++) { + const suffix = repeat > 1 ? ` (${index + 1}/${repeat})` : ''; + console.log(`→ ${name}${suffix}`); + await fn(); + console.log(`✓ ${name}${suffix}`); + } +} + +export function assistantTextDeltas(events: DroidStreamEvent[]): string { + return events + .filter((event) => event.type === DroidMessageType.AssistantTextDelta) + .map((event) => event.text) + .join(''); +} + +export function assistantTexts(events: DroidStreamEvent[]): string[] { + return events + .filter((event) => event.type === DroidMessageType.Assistant) + .map((event) => event.text) + .filter(Boolean); +} + +export function diagnose(events: DroidStreamEvent[]): string { + return JSON.stringify( + events.map((event, index) => summarizeEvent(event, index)), + null, + 2 + ); +} + +export function isDirectRun(metaUrl: string): boolean { + const entry = process.argv[1]; + return entry ? metaUrl === pathToFileURL(entry).href : false; +} + +export async function writeJsonlArtifact( + name: string, + events: DroidStreamEvent[] +): Promise { + const root = process.env['DROID_STRESS_ARTIFACTS'] ?? '.stress-artifacts'; + await mkdir(root, { recursive: true }); + const file = join( + root, + `${new Date().toISOString().replace(/[:.]/g, '-')}-${sanitize(name)}.jsonl` + ); + const lines = events + .map((event, index) => JSON.stringify(summarizeEvent(event, index))) + .join('\n'); + await writeFile(file, `${lines}\n`); +} + +async function collect( + name: string, + session: DroidSession, + prompt: string, + options: MessageOptions +): Promise { + const events: DroidStreamEvent[] = []; + if (options.includePartialMessages === true) { + for await (const event of session.stream(prompt, { + ...options, + includePartialMessages: true, + })) { + events.push(event); + } + } else { + for await (const event of session.stream(prompt, { + ...options, + includePartialMessages: false, + })) { + events.push(event); + } + } + const result = lastResult(events); + const collected = { + name, + events, + counts: countByType(events), + result, + }; + validateResult(result, name); + await writeJsonlArtifact(name, events); + return collected; +} + +function assertTextCompatible( + left: string, + right: string, + message: string +): void { + const a = normalizeText(left); + const b = normalizeText(right); + assert.ok( + a === b || a.includes(b) || b.includes(a), + `${message}\nleft: ${left}\nright: ${right}` + ); +} + +function normalizeText(text: string): string { + return text.replace(/\s+/g, ' ').trim(); +} + +function sanitize(value: string): string { + return value.replace(/[^a-zA-Z0-9._-]+/g, '-').replace(/^-|-$/g, ''); +} + +function summarizeEvent( + event: DroidStreamEvent, + index: number +): Record { + const base: Record = { + index, + type: event.type, + }; + + switch (event.type) { + case DroidMessageType.Assistant: + return { + ...base, + textLength: event.text.length, + textPreview: event.text.slice(0, 160), + }; + case DroidMessageType.AssistantTextDelta: + case DroidMessageType.ThinkingTextDelta: + return { + ...base, + messageId: event.messageId, + blockIndex: event.blockIndex, + textLength: event.text.length, + textPreview: event.text.slice(0, 160), + }; + case DroidMessageType.ToolCall: + case DroidMessageType.ToolCallDelta: + return { + ...base, + toolUseId: event.toolUse.id, + toolName: event.toolUse.name, + }; + case DroidMessageType.ToolResult: + return { + ...base, + toolUseId: event.toolUseId, + toolName: event.toolName, + isError: event.isError, + }; + case DroidMessageType.Error: + return { + ...base, + message: event.message, + errorType: event.errorType, + }; + case DroidMessageType.Result: + return { + ...base, + subtype: event.subtype, + isError: event.isError, + resultLength: event.result.length, + messageCount: event.messages.length, + structuredOutput: event.structuredOutput ?? null, + structuredOutputError: event.structuredOutputError ?? null, + }; + default: + return base; + } +} diff --git a/examples/stress/all.ts b/examples/stress/all.ts new file mode 100644 index 0000000..b578d6b --- /dev/null +++ b/examples/stress/all.ts @@ -0,0 +1,48 @@ +import { isDirectRun } from './_harness.js'; +import { main as cancelInterrupt } from './cancel-interrupt.js'; +import { main as defaultStreaming } from './default-streaming.js'; +import { main as defaultVsPartialConsistency } from './default-vs-partial-consistency.js'; +import { main as errorPaths } from './error-paths.js'; +import { main as multiTurn } from './multi-turn.js'; +import { main as partialStreaming } from './partial-streaming.js'; +import { main as structuredOutput } from './structured-output.js'; +import { main as toolUse } from './tool-use.js'; + +const scripts = [ + ['default-streaming', defaultStreaming], + ['partial-streaming', partialStreaming], + ['default-vs-partial-consistency', defaultVsPartialConsistency], + ['structured-output', structuredOutput], + ['tool-use', toolUse], + ['multi-turn', multiTurn], + ['cancel-interrupt', cancelInterrupt], + ['error-paths', errorPaths], +] as const; + +export async function main(): Promise { + console.log( + [ + 'Droid SDK stress suite', + `DROID_EXEC_PATH=${process.env['DROID_EXEC_PATH'] ?? 'droid-dev'}`, + `DROID_STRESS_MODEL=${process.env['DROID_STRESS_MODEL'] ?? '(default)'}`, + `DROID_STRESS_REPEAT=${process.env['DROID_STRESS_REPEAT'] ?? '1'}`, + `DROID_STRESS_ARTIFACTS=${ + process.env['DROID_STRESS_ARTIFACTS'] ?? '.stress-artifacts' + }`, + ].join('\n') + ); + + for (const [name, script] of scripts) { + console.log(`\n## ${name}`); + await script(); + } + + console.log('\nAll stress scripts completed.'); +} + +if (isDirectRun(import.meta.url)) { + main().catch((error: unknown) => { + console.error(error); + process.exit(1); + }); +} diff --git a/examples/stress/cancel-interrupt.ts b/examples/stress/cancel-interrupt.ts new file mode 100644 index 0000000..a2b4ae7 --- /dev/null +++ b/examples/stress/cancel-interrupt.ts @@ -0,0 +1,92 @@ +import assert from 'node:assert/strict'; + +import { DroidMessageType } from '@factory/droid-sdk'; +import type { DroidStreamEvent } from '@factory/droid-sdk'; + +import { + assertAssistantOutput, + assertDefaultStreamShape, + assertPartialStreamShape, + collectStream, + createStressSession, + isDirectRun, + lastResult, + runStressCase, + writeJsonlArtifact, +} from './_harness.js'; + +export async function main(): Promise { + await runStressCase('abort controller lifecycle', async () => { + const session = await createStressSession(); + const controller = new AbortController(); + const timeout = setTimeout( + () => controller.abort(new Error('stress abort requested')), + 750 + ); + + try { + await assert.rejects(async () => { + for await (const event of session.stream( + 'Write a long, detailed essay about compiler construction.', + { abortSignal: controller.signal, includePartialMessages: true } + )) { + void event; + } + }, /stress abort requested|Operation aborted/); + + const recovered = await collectStream( + 'abort-controller-recovered', + session, + 'Reply with exactly: abort recovery complete' + ); + assertDefaultStreamShape(recovered); + assertAssistantOutput(recovered); + } finally { + clearTimeout(timeout); + await session.close(); + } + }); + + await runStressCase('session interrupt completion', async () => { + const session = await createStressSession(); + const events: DroidStreamEvent[] = []; + let interrupted = false; + + try { + for await (const event of session.stream( + 'Write a long numbered list about operating systems.', + { includePartialMessages: true } + )) { + events.push(event); + if ( + !interrupted && + event.type !== DroidMessageType.Result && + events.length >= 2 + ) { + interrupted = true; + await session.interrupt(); + } + } + + await writeJsonlArtifact('session-interrupt', events); + const result = lastResult(events); + assert.ok(interrupted, 'expected to send an interrupt'); + assert.equal(result.type, DroidMessageType.Result); + assertPartialStreamShape({ + name: 'session-interrupt', + events, + counts: {}, + result, + }); + } finally { + await session.close(); + } + }); +} + +if (isDirectRun(import.meta.url)) { + main().catch((error: unknown) => { + console.error(error); + process.exit(1); + }); +} diff --git a/examples/stress/default-streaming.ts b/examples/stress/default-streaming.ts new file mode 100644 index 0000000..50dbd53 --- /dev/null +++ b/examples/stress/default-streaming.ts @@ -0,0 +1,79 @@ +import assert from 'node:assert/strict'; +import { writeFile } from 'node:fs/promises'; +import { join } from 'node:path'; + +import { AutonomyLevel, DroidMessageType } from '@factory/droid-sdk'; + +import { + assertAssistantOutput, + assertDefaultStreamShape, + assertToolPairing, + collectStream, + createStressSession, + isDirectRun, + runStressCase, + withTempDir, +} from './_harness.js'; + +export async function main(): Promise { + await runStressCase('default streaming text', async () => { + const session = await createStressSession(); + try { + const collected = await collectStream( + 'default-streaming-text', + session, + [ + 'Reply with one short sentence.', + 'Include the exact phrase "default streaming stress".', + ].join(' ') + ); + + assertDefaultStreamShape(collected); + assertAssistantOutput(collected); + } finally { + await session.close(); + } + }); + + await runStressCase('default streaming tool use', async () => { + await withTempDir('default-streaming', async (dir) => { + const file = join(dir, 'input.txt'); + await writeFile(file, 'default stream tool fixture\n'); + + const session = await createStressSession({ + autonomyLevel: AutonomyLevel.Medium, + }); + try { + const collected = await collectStream( + 'default-streaming-tool-use', + session, + [ + `Read the file at ${file}.`, + 'Then reply with exactly: default stream read complete', + ].join('\n') + ); + + assertDefaultStreamShape(collected); + assertAssistantOutput(collected); + assert.ok( + collected.counts[DroidMessageType.ToolCall] ?? 0, + 'expected default stream to emit a tool_call' + ); + assert.ok( + collected.counts[DroidMessageType.ToolResult] ?? 0, + 'expected default stream to emit a tool_result' + ); + assertToolPairing(collected.events, collected.name); + } finally { + await session.close(); + } + }); + }); +} + +if (isDirectRun(import.meta.url)) { + main().catch((error: unknown) => { + console.error(error); + process.exit(1); + }); +} diff --git a/examples/stress/default-vs-partial-consistency.ts b/examples/stress/default-vs-partial-consistency.ts new file mode 100644 index 0000000..e94fb65 --- /dev/null +++ b/examples/stress/default-vs-partial-consistency.ts @@ -0,0 +1,87 @@ +import assert from 'node:assert/strict'; + +import { DroidMessageType } from '@factory/droid-sdk'; + +import { + PARTIAL_ONLY_TYPES, + assertAssistantOutput, + assertDefaultStreamShape, + assertNoInternalMessages, + assertPartialStreamShape, + assertPartialTextConsistency, + collectPartialStream, + collectStream, + createStressSession, + isDirectRun, + runStressCase, +} from './_harness.js'; + +export async function main(): Promise { + await runStressCase('default vs partial consistency', async () => { + const prompt = [ + 'Answer with a concise paragraph about stream aggregation.', + 'Include the phrase "stream consistency stress".', + ].join(' '); + + const defaultSession = await createStressSession(); + const partialSession = await createStressSession(); + try { + const defaultCollected = await collectStream( + 'default-vs-partial-default', + defaultSession, + prompt + ); + const partialCollected = await collectPartialStream( + 'default-vs-partial-partial', + partialSession, + prompt + ); + + assertDefaultStreamShape(defaultCollected); + assertPartialStreamShape(partialCollected); + assertAssistantOutput(defaultCollected); + assertAssistantOutput(partialCollected); + assertPartialTextConsistency(partialCollected); + assertNoInternalMessages( + defaultCollected.result.messages, + 'default result' + ); + assertNoInternalMessages( + partialCollected.result.messages, + 'partial result' + ); + + assert.equal(defaultCollected.result.isError, false); + assert.equal(partialCollected.result.isError, false); + assert.ok( + partialCollected.events.some((event) => + PARTIAL_ONLY_TYPES.has(event.type) + ), + 'expected partial stream to include at least one partial-only event' + ); + assert.equal( + defaultCollected.counts[DroidMessageType.Result], + 1, + 'default stream must emit one result' + ); + assert.equal( + partialCollected.counts[DroidMessageType.Result], + 1, + 'partial stream must emit one result' + ); + assert.ok( + partialCollected.counts[DroidMessageType.Assistant] ?? 0, + 'partial stream should still include full assistant messages' + ); + } finally { + await Promise.all([defaultSession.close(), partialSession.close()]); + } + }); +} + +if (isDirectRun(import.meta.url)) { + main().catch((error: unknown) => { + console.error(error); + process.exit(1); + }); +} diff --git a/examples/stress/error-paths.ts b/examples/stress/error-paths.ts new file mode 100644 index 0000000..2ada6c7 --- /dev/null +++ b/examples/stress/error-paths.ts @@ -0,0 +1,90 @@ +import assert from 'node:assert/strict'; + +import { + ConnectionError, + DroidMessageType, + OutputFormatType, + createSession, +} from '@factory/droid-sdk'; +import type { DroidClientTransport } from '@factory/droid-sdk'; + +import { + assertDefaultStreamShape, + collectStream, + createStressSession, + isDirectRun, + runStressCase, +} from './_harness.js'; + +export async function main(): Promise { + await runStressCase('turn-level structured output error', async () => { + const session = await createStressSession(); + try { + const collected = await collectStream( + 'error-paths-structured-output', + session, + 'Return any object. This schema intentionally cannot be satisfied.', + { + outputFormat: { + type: OutputFormatType.JsonSchema, + schema: { + type: 'object', + properties: { + impossible: { type: 'string', enum: [] }, + }, + required: ['impossible'], + additionalProperties: false, + }, + }, + } + ); + + assertDefaultStreamShape(collected); + assert.equal(collected.result.type, DroidMessageType.Result); + assert.equal( + collected.result.isError, + true, + 'expected turn-level error to be captured on result' + ); + assert.ok( + collected.result.error || + collected.result.structuredOutputError || + collected.result.subtype === 'error_structured_output', + 'expected result to include error details or structured output subtype' + ); + } finally { + await session.close(); + } + }); + + await runStressCase('startup failure throws typed error', async () => { + await assert.rejects( + () => + createSession({ + transport: new FailingStartupTransport(), + }), + (error: unknown) => error instanceof ConnectionError + ); + }); +} + +class FailingStartupTransport implements DroidClientTransport { + readonly isConnected = false; + + send(): void { + throw new ConnectionError('Synthetic startup transport failure'); + } + + onMessage(): void {} + + onError(): void {} + + async close(): Promise {} +} + +if (isDirectRun(import.meta.url)) { + main().catch((error: unknown) => { + console.error(error); + process.exit(1); + }); +} diff --git a/examples/stress/multi-turn.ts b/examples/stress/multi-turn.ts new file mode 100644 index 0000000..a9e0a62 --- /dev/null +++ b/examples/stress/multi-turn.ts @@ -0,0 +1,137 @@ +import assert from 'node:assert/strict'; + +import { DroidMessageType, OutputFormatType } from '@factory/droid-sdk'; + +import { + assertAssistantOutput, + assertDefaultStreamShape, + assertPartialStreamShape, + assertPartialTextConsistency, + collectPartialStream, + collectStream, + createStressSession, + isDirectRun, + runStressCase, +} from './_harness.js'; + +export async function main(): Promise { + await runStressCase('multi-turn aggregation', async () => { + const session = await createStressSession(); + let previousNumTurns = 0; + let previousTurnCount = 0; + + try { + const first = await collectStream( + 'multi-turn-1', + session, + 'Remember this phrase for the next turn: multi turn stress anchor.' + ); + assertDefaultStreamShape(first); + assertAssistantOutput(first); + ({ previousNumTurns, previousTurnCount } = assertMonotonicTurns( + first.result, + previousNumTurns, + previousTurnCount + )); + + const structured = await collectStream( + 'multi-turn-2-structured', + session, + 'Return a structured object with anchor "multi turn stress anchor" and turn 2.', + { + outputFormat: { + type: OutputFormatType.JsonSchema, + schema: { + type: 'object', + properties: { + anchor: { + type: 'string', + enum: ['multi turn stress anchor'], + }, + turn: { type: 'number', enum: [2] }, + }, + required: ['anchor', 'turn'], + additionalProperties: false, + }, + }, + } + ); + assertDefaultStreamShape(structured); + assert.equal(structured.result.isError, false); + assert.ok(structured.result.structuredOutput); + ({ previousNumTurns, previousTurnCount } = assertMonotonicTurns( + structured.result, + previousNumTurns, + previousTurnCount + )); + + const third = await collectPartialStream( + 'multi-turn-3-partial', + session, + 'In one sentence, state the remembered anchor phrase.' + ); + assertPartialStreamShape(third); + assertAssistantOutput(third); + assertPartialTextConsistency(third); + assert.equal( + third.result.structuredOutput ?? null, + null, + 'structured output leaked into later turn' + ); + assert.equal( + third.result.structuredOutputError ?? null, + null, + 'structured output error leaked into later turn' + ); + ({ previousNumTurns, previousTurnCount } = assertMonotonicTurns( + third.result, + previousNumTurns, + previousTurnCount + )); + + for (const index of [4, 5]) { + const collected = await collectStream( + `multi-turn-${index}`, + session, + `Reply with exactly: multi turn stress turn ${index}` + ); + assertDefaultStreamShape(collected); + assertAssistantOutput(collected); + assert.equal(collected.result.type, DroidMessageType.Result); + ({ previousNumTurns, previousTurnCount } = assertMonotonicTurns( + collected.result, + previousNumTurns, + previousTurnCount + )); + } + } finally { + await session.close(); + } + }); +} + +function assertMonotonicTurns( + result: { numTurns: number; turnCount: number }, + previousNumTurns: number, + previousTurnCount: number +): { previousNumTurns: number; previousTurnCount: number } { + assert.ok( + result.numTurns >= previousNumTurns, + `numTurns regressed from ${previousNumTurns} to ${result.numTurns}` + ); + assert.ok( + result.turnCount >= previousTurnCount, + `turnCount regressed from ${previousTurnCount} to ${result.turnCount}` + ); + return { + previousNumTurns: result.numTurns, + previousTurnCount: result.turnCount, + }; +} + +if (isDirectRun(import.meta.url)) { + main().catch((error: unknown) => { + console.error(error); + process.exit(1); + }); +} diff --git a/examples/stress/partial-streaming.ts b/examples/stress/partial-streaming.ts new file mode 100644 index 0000000..7bab3e8 --- /dev/null +++ b/examples/stress/partial-streaming.ts @@ -0,0 +1,47 @@ +import assert from 'node:assert/strict'; + +import { DroidMessageType } from '@factory/droid-sdk'; + +import { + assertAssistantOutput, + assertPartialStreamShape, + assertPartialTextConsistency, + collectPartialStream, + createStressSession, + isDirectRun, + runStressCase, +} from './_harness.js'; + +export async function main(): Promise { + await runStressCase('partial streaming text', async () => { + const session = await createStressSession(); + try { + const collected = await collectPartialStream( + 'partial-streaming-text', + session, + [ + 'Reply in exactly two short sentences.', + 'The first sentence must include "partial streaming stress".', + 'The second sentence must include "delta reconstruction".', + ].join(' ') + ); + + assertPartialStreamShape(collected); + assertAssistantOutput(collected); + assert.ok( + (collected.counts[DroidMessageType.AssistantTextDelta] ?? 0) > 0, + 'expected partial stream to include assistant_text_delta events' + ); + assertPartialTextConsistency(collected); + } finally { + await session.close(); + } + }); +} + +if (isDirectRun(import.meta.url)) { + main().catch((error: unknown) => { + console.error(error); + process.exit(1); + }); +} diff --git a/examples/stress/structured-output.ts b/examples/stress/structured-output.ts new file mode 100644 index 0000000..aa91f5b --- /dev/null +++ b/examples/stress/structured-output.ts @@ -0,0 +1,215 @@ +import assert from 'node:assert/strict'; + +import { DroidMessageType, OutputFormatType } from '@factory/droid-sdk'; +import type { DroidResult, MessageOptions } from '@factory/droid-sdk'; +import { z } from 'zod'; + +import { + assertAssistantOutput, + assertDefaultStreamShape, + collectStream, + createStressSession, + isDirectRun, + runStress, + runStressCase, +} from './_harness.js'; + +type OutputFormat = NonNullable; +type JsonOutput = NonNullable; + +interface StructuredCase { + name: string; + prompt: string; + outputFormat: OutputFormat; + validate: (value: JsonOutput) => void; +} + +const cases: StructuredCase[] = [ + { + name: 'flat-object', + prompt: + 'Return a JSON object with name "Ada", language "TypeScript", and score 7.', + outputFormat: { + type: OutputFormatType.JsonSchema, + schema: { + type: 'object', + properties: { + name: { type: 'string', enum: ['Ada'] }, + language: { type: 'string', enum: ['TypeScript'] }, + score: { type: 'number', minimum: 7, maximum: 7 }, + }, + required: ['name', 'language', 'score'], + additionalProperties: false, + }, + }, + validate: (value) => + z + .object({ + name: z.literal('Ada'), + language: z.literal('TypeScript'), + score: z.literal(7), + }) + .parse(value), + }, + { + name: 'nested-array-enum', + prompt: + 'Return a JSON object with summary.status "ok" and two checks named stream and result, both passed true.', + outputFormat: { + type: OutputFormatType.JsonSchema, + schema: { + type: 'object', + properties: { + summary: { + type: 'object', + properties: { + status: { type: 'string', enum: ['ok'] }, + }, + required: ['status'], + additionalProperties: false, + }, + checks: { + type: 'array', + minItems: 2, + maxItems: 2, + items: { + type: 'object', + properties: { + name: { type: 'string', enum: ['stream', 'result'] }, + passed: { type: 'boolean', enum: [true] }, + }, + required: ['name', 'passed'], + additionalProperties: false, + }, + }, + }, + required: ['summary', 'checks'], + additionalProperties: false, + }, + }, + validate: (value) => + z + .object({ + summary: z.object({ status: z.literal('ok') }), + checks: z + .array( + z.object({ + name: z.enum(['stream', 'result']), + passed: z.literal(true), + }) + ) + .length(2), + }) + .parse(value), + }, + { + name: 'optional-pattern-bounds', + prompt: + 'Return a JSON object with code "stress-123", count 3, tags ["sdk"], and omit notes.', + outputFormat: { + type: OutputFormatType.JsonSchema, + schema: { + type: 'object', + properties: { + code: { type: 'string', pattern: '^stress-[0-9]{3}$' }, + count: { type: 'number', minimum: 1, maximum: 3 }, + tags: { + type: 'array', + minItems: 1, + items: { type: 'string', enum: ['sdk'] }, + }, + notes: { type: 'string' }, + }, + required: ['code', 'count', 'tags'], + additionalProperties: false, + }, + }, + validate: (value) => + z + .object({ + code: z.string().regex(/^stress-[0-9]{3}$/), + count: z.number().min(1).max(3), + tags: z.array(z.literal('sdk')).min(1), + notes: z.string().optional(), + }) + .parse(value), + }, +]; + +export async function main(): Promise { + await runStressCase('structured output run()', async () => { + for (const stressCase of cases) { + const result = await runStress(stressCase.prompt, { + outputFormat: stressCase.outputFormat, + }); + assertStructuredSuccess(result, stressCase); + } + }); + + await runStressCase('structured output streaming', async () => { + const session = await createStressSession(); + try { + for (const stressCase of cases) { + const collected = await collectStream( + `structured-output-${stressCase.name}`, + session, + stressCase.prompt, + { outputFormat: stressCase.outputFormat } + ); + assertDefaultStreamShape(collected); + assertAssistantOutput(collected); + assertStructuredSuccess(collected.result, stressCase); + } + } finally { + await session.close(); + } + }); + + await runStressCase('structured output invalid schema', async () => { + const result = await runStress( + 'Return any object. This schema intentionally cannot be satisfied.', + { + outputFormat: { + type: OutputFormatType.JsonSchema, + schema: { + type: 'object', + properties: { + impossible: { type: 'string', enum: [] }, + }, + required: ['impossible'], + additionalProperties: false, + }, + }, + } + ); + + assert.equal( + result.isError, + true, + 'expected invalid structured output to mark result as an error' + ); + assert.equal(result.structuredOutput ?? null, null); + assert.ok( + result.structuredOutputError, + 'expected invalid structured output error details' + ); + }); +} + +function assertStructuredSuccess( + result: DroidResult, + stressCase: StructuredCase +): void { + assert.equal(result.type, DroidMessageType.Result); + assert.equal(result.isError, false, `${stressCase.name}: unexpected error`); + assert.equal(result.structuredOutputError ?? null, null); + assert.ok(result.structuredOutput, `${stressCase.name}: missing output`); + stressCase.validate(result.structuredOutput as JsonOutput); +} + +if (isDirectRun(import.meta.url)) { + main().catch((error: unknown) => { + console.error(error); + process.exit(1); + }); +} diff --git a/examples/stress/tool-use.ts b/examples/stress/tool-use.ts new file mode 100644 index 0000000..fc4de0e --- /dev/null +++ b/examples/stress/tool-use.ts @@ -0,0 +1,101 @@ +import assert from 'node:assert/strict'; +import { writeFile } from 'node:fs/promises'; +import { join } from 'node:path'; + +import { AutonomyLevel, DroidMessageType } from '@factory/droid-sdk'; + +import { + assertAssistantOutput, + assertDefaultStreamShape, + assertPartialStreamShape, + assertToolPairing, + collectPartialStream, + collectStream, + createStressSession, + isDirectRun, + runStressCase, + withTempDir, +} from './_harness.js'; + +export async function main(): Promise { + await runStressCase('tool use preservation', async () => { + await withTempDir('tool-use', async (dir) => { + const defaultFile = join(dir, 'default.txt'); + const partialFile = join(dir, 'partial.txt'); + await writeFile(defaultFile, 'default tool use fixture\n'); + await writeFile(partialFile, 'partial tool use fixture\n'); + + const defaultSession = await createStressSession({ + autonomyLevel: AutonomyLevel.Medium, + }); + const partialSession = await createStressSession({ + autonomyLevel: AutonomyLevel.Medium, + }); + + try { + const defaultCollected = await collectStream( + 'tool-use-default', + defaultSession, + toolPrompt(defaultFile, 'default') + ); + const partialCollected = await collectPartialStream( + 'tool-use-partial', + partialSession, + toolPrompt(partialFile, 'partial') + ); + + assertDefaultStreamShape(defaultCollected); + assertPartialStreamShape(partialCollected); + assertAssistantOutput(defaultCollected); + assertAssistantOutput(partialCollected); + assertToolPairing(defaultCollected.events, defaultCollected.name); + assertToolPairing(partialCollected.events, partialCollected.name); + + assert.ok( + defaultCollected.counts[DroidMessageType.ToolCall] ?? 0, + 'default stream must expose completed tool calls' + ); + assert.ok( + defaultCollected.counts[DroidMessageType.ToolResult] ?? 0, + 'default stream must expose tool results' + ); + assert.ok( + partialCollected.counts[DroidMessageType.ToolCall] ?? 0, + 'partial stream must include completed tool calls' + ); + assert.ok( + partialCollected.counts[DroidMessageType.ToolCallDelta] ?? 0, + 'partial stream must include tool_call_delta' + ); + assert.ok( + partialCollected.result.messages.some( + (message) => message.type === DroidMessageType.ToolCall + ), + 'result.messages must preserve tool_call' + ); + assert.ok( + partialCollected.result.messages.some( + (message) => message.type === DroidMessageType.ToolResult + ), + 'result.messages must preserve tool_result' + ); + } finally { + await Promise.all([defaultSession.close(), partialSession.close()]); + } + }); + }); +} + +function toolPrompt(file: string, mode: string): string { + return [ + `Use a file-reading tool to read ${file}.`, + `Then respond with exactly: ${mode} tool use stress complete`, + ].join('\n'); +} + +if (isDirectRun(import.meta.url)) { + main().catch((error: unknown) => { + console.error(error); + process.exit(1); + }); +} diff --git a/examples/structured-output-stress-test.ts b/examples/structured-output-stress-test.ts new file mode 100644 index 0000000..30b9956 --- /dev/null +++ b/examples/structured-output-stress-test.ts @@ -0,0 +1,515 @@ +/** + * Structured output stress test. + * + * Runs several structured-output schemas against one or more Droid models, + * verifies both `run(...)` results and streaming result metadata, and + * stress-tests tool use before structured output. + * + * Usage: + * npx tsx examples/structured-output-stress-test.ts + * DROID_EXEC_PATH=droid-dev npx tsx examples/structured-output-stress-test.ts + * DROID_STRUCTURED_OUTPUT_MODELS="claude-sonnet-4-5,gpt-5.2" npx tsx examples/structured-output-stress-test.ts + */ + +import assert from 'node:assert/strict'; + +import { + AutonomyLevel, + DroidMessageType, + OutputFormatType, + createSession, + run, +} from '@factory/droid-sdk'; +import type { + DroidMessage, + DroidResult, + MessageOptions, +} from '@factory/droid-sdk'; +import { z } from 'zod'; + +type OutputFormat = NonNullable; +type JsonObject = NonNullable; + +interface StressCase { + name: string; + prompt: string; + outputFormat: OutputFormat; + parse: (value: JsonObject) => unknown; +} + +const PersonSchema = z.object({ + name: z.literal('Ada Lovelace'), + language: z.literal('TypeScript'), + score: z.literal(99), +}); + +const PlanSchema = z.object({ + title: z.literal('Structured Output SDK Test'), + priority: z.enum(['low', 'medium', 'high']), + tasks: z.array( + z.object({ + id: z.string(), + done: z.boolean(), + }) + ), +}); + +const MetricsSchema = z.object({ + summary: z.object({ + passed: z.literal(3), + failed: z.literal(0), + }), + checks: z.array( + z.object({ + name: z.enum(['schema', 'stream', 'fallback']), + ok: z.literal(true), + }) + ), +}); + +const PackageSchema = z.object({ + packageName: z.literal('@factory/droid-sdk'), + tmpDir: z.string().startsWith('/tmp/droid-sdk-structured-output-stress-'), + createdFile: z.string().endsWith('/notes.txt'), + finalContent: z.string().includes('edited by structured output stress'), + filesRead: z.array(z.enum(['package.json', 'notes.txt'])), + toolsUsed: z.array(z.enum(['read', 'write', 'edit'])), +}); + +const stressCases: StressCase[] = [ + { + name: 'flat-literals', + prompt: [ + 'Return a structured object for Ada Lovelace.', + 'Use exactly name "Ada Lovelace", language "TypeScript", and score 99.', + ].join(' '), + outputFormat: { + type: OutputFormatType.JsonSchema, + schema: { + type: 'object', + properties: { + name: { type: 'string' }, + language: { type: 'string', enum: ['TypeScript'] }, + score: { type: 'number', enum: [99] }, + }, + required: ['name', 'language', 'score'], + additionalProperties: false, + }, + }, + parse: (value) => PersonSchema.parse(value), + }, + { + name: 'nested-array-enum', + prompt: [ + 'Return a project plan object.', + 'Use title "Structured Output SDK Test", priority "high",', + 'and exactly two tasks with ids "schema" and "stream".', + 'Set both task done values to true.', + ].join(' '), + outputFormat: { + type: OutputFormatType.JsonSchema, + schema: { + type: 'object', + properties: { + title: { type: 'string', enum: ['Structured Output SDK Test'] }, + priority: { type: 'string', enum: ['low', 'medium', 'high'] }, + tasks: { + type: 'array', + minItems: 2, + maxItems: 2, + items: { + type: 'object', + properties: { + id: { type: 'string', enum: ['schema', 'stream'] }, + done: { type: 'boolean', enum: [true] }, + }, + required: ['id', 'done'], + additionalProperties: false, + }, + }, + }, + required: ['title', 'priority', 'tasks'], + additionalProperties: false, + }, + }, + parse: (value) => PlanSchema.parse(value), + }, + { + name: 'nested-metrics', + prompt: [ + 'Return validation metrics.', + 'The summary must have passed 3 and failed 0.', + 'The checks array must contain schema, stream, and fallback, each with ok true.', + ].join(' '), + outputFormat: { + type: OutputFormatType.JsonSchema, + schema: { + type: 'object', + properties: { + summary: { + type: 'object', + properties: { + passed: { type: 'number', enum: [3] }, + failed: { type: 'number', enum: [0] }, + }, + required: ['passed', 'failed'], + additionalProperties: false, + }, + checks: { + type: 'array', + minItems: 3, + maxItems: 3, + items: { + type: 'object', + properties: { + name: { + type: 'string', + enum: ['schema', 'stream', 'fallback'], + }, + ok: { type: 'boolean', enum: [true] }, + }, + required: ['name', 'ok'], + additionalProperties: false, + }, + }, + }, + required: ['summary', 'checks'], + additionalProperties: false, + }, + }, + parse: (value) => MetricsSchema.parse(value), + }, +]; + +function parseModels(): Array { + const raw = process.env['DROID_STRUCTURED_OUTPUT_MODELS']; + if (!raw) return [undefined]; + return raw + .split(',') + .map((model) => model.trim()) + .filter(Boolean); +} + +function labelModel(modelId: string | undefined): string { + return modelId ?? 'default session model'; +} + +function assertStructuredResult( + result: DroidResult, + stressCase: StressCase +): void { + const diagnostic = JSON.stringify( + { + text: result.text, + error: result.error, + structuredOutputError: result.structuredOutputError, + messages: result.messages + .filter( + (message) => + message.type === DroidMessageType.Assistant || + message.type === DroidMessageType.Error || + message.type === DroidMessageType.Result + ) + .map((message) => { + if (message.type !== DroidMessageType.Assistant) return message; + return { + type: message.type, + role: message.message.role, + content: message.message.content, + }; + }), + }, + null, + 2 + ); + + assert.equal( + result.structuredOutputError, + null, + `${stressCase.name}: expected no structured output error\n${diagnostic}` + ); + assert.ok( + result.structuredOutput, + `${stressCase.name}: expected structuredOutput\n${diagnostic}` + ); + stressCase.parse(result.structuredOutput); +} + +function findMessage( + messages: DroidMessage[], + type: T +): Extract | undefined { + return messages.find( + (message): message is Extract => + message.type === type + ); +} + +function findNormalToolUse( + messages: DroidMessage[] +): Extract | undefined { + return messages.find( + (message): message is Extract => + message.type === DroidMessageType.ToolCall && + message.toolUse.name !== 'StructuredOutput' + ); +} + +function findToolUses( + messages: DroidMessage[], + matches: (toolName: string) => boolean +): Array> { + return messages.filter( + (message): message is Extract => + message.type === DroidMessageType.ToolCall && + matches(message.toolUse.name) + ); +} + +async function runCase( + modelId: string | undefined, + stressCase: StressCase +): Promise { + const result = await run(stressCase.prompt, { + execPath: process.env['DROID_EXEC_PATH'] ?? 'droid', + cwd: process.cwd(), + ...(modelId !== undefined && { modelId }), + outputFormat: stressCase.outputFormat, + }); + + assertStructuredResult(result, stressCase); + + console.log( + ` ✓ ${stressCase.name}: ${JSON.stringify(result.structuredOutput)}` + ); +} + +async function runStreamingCase(modelId: string | undefined): Promise { + const stressCase = stressCases[0]; + const session = await createSession({ + execPath: process.env['DROID_EXEC_PATH'] ?? 'droid', + cwd: process.cwd(), + ...(modelId !== undefined && { modelId }), + }); + + try { + const messages: DroidMessage[] = []; + for await (const message of session.stream(stressCase.prompt, { + outputFormat: stressCase.outputFormat, + })) { + messages.push(message); + } + + const result = findMessage(messages, DroidMessageType.Result); + + assert.ok(result, 'streaming: expected result message'); + assert.equal(result.structuredOutputError, null); + assert.ok(result.structuredOutput); + stressCase.parse(result.structuredOutput as JsonObject); + + console.log(' ✓ streaming emits structured output on result'); + } finally { + await session.close(); + } +} + +async function runToolUseCase(modelId: string | undefined): Promise { + const tmpDir = `/tmp/droid-sdk-structured-output-stress-${process.pid}-${Date.now()}`; + const tmpFile = `${tmpDir}/notes.txt`; + const outputFormat: OutputFormat = { + type: OutputFormatType.JsonSchema, + schema: { + type: 'object', + properties: { + packageName: { type: 'string', enum: ['@factory/droid-sdk'] }, + tmpDir: { type: 'string', enum: [tmpDir] }, + createdFile: { type: 'string', enum: [tmpFile] }, + finalContent: { + type: 'string', + enum: [ + [ + 'created by structured output stress', + 'edited by structured output stress', + ].join('\n'), + ], + }, + filesRead: { + type: 'array', + minItems: 2, + items: { + type: 'string', + enum: ['package.json', 'notes.txt'], + }, + }, + toolsUsed: { + type: 'array', + minItems: 3, + items: { + type: 'string', + enum: ['read', 'write', 'edit'], + }, + }, + }, + required: [ + 'packageName', + 'tmpDir', + 'createdFile', + 'finalContent', + 'filesRead', + 'toolsUsed', + ], + additionalProperties: false, + }, + }; + const session = await createSession({ + execPath: process.env['DROID_EXEC_PATH'] ?? 'droid', + autonomyLevel: AutonomyLevel.Medium, + cwd: process.cwd(), + ...(modelId !== undefined && { modelId }), + }); + + try { + const messages: DroidMessage[] = []; + for await (const message of session.stream( + [ + 'You must use multiple tools before producing structured output.', + 'Use the Read tool to read package.json in the current working directory.', + `Create the directory ${tmpDir}.`, + `Write ${tmpFile} with exactly this first line: created by structured output stress`, + `Then use an edit tool to modify ${tmpFile} so its full content is exactly:`, + 'created by structured output stress', + 'edited by structured output stress', + `Use the Read tool to read ${tmpFile} after editing it.`, + 'Only after all tool calls are done, return the structured object.', + 'Set packageName from package.json name.', + `Set tmpDir to ${tmpDir} and createdFile to ${tmpFile}.`, + 'Set filesRead to include package.json and notes.txt.', + 'Set toolsUsed to include read, write, and edit.', + ].join('\n'), + { outputFormat } + )) { + messages.push(message); + } + + const normalToolUse = findNormalToolUse(messages); + const readToolUses = findToolUses( + messages, + (toolName) => toolName.toLowerCase() === 'read' + ); + const writeToolUses = findToolUses(messages, (toolName) => { + const normalized = toolName.toLowerCase(); + return ( + normalized.includes('write') || + normalized.includes('create') || + normalized.includes('execute') + ); + }); + const editToolUses = findToolUses(messages, (toolName) => { + const normalized = toolName.toLowerCase(); + return normalized.includes('edit') || normalized.includes('patch'); + }); + const result = findMessage(messages, DroidMessageType.Result); + const diagnostic = JSON.stringify( + messages + .filter( + (message) => + message.type === DroidMessageType.ToolCall || + message.type === DroidMessageType.ToolResult || + message.type === DroidMessageType.Result || + message.type === DroidMessageType.Error + ) + .map((message) => + message.type === DroidMessageType.ToolCall + ? { + type: message.type, + toolName: message.toolUse.name, + toolInput: message.toolUse.input, + } + : message + ), + null, + 2 + ); + + assert.ok( + normalToolUse, + `tool-use: expected a normal tool call before structured output\n${diagnostic}` + ); + assert.ok( + readToolUses.length >= 2, + `tool-use: expected at least 2 Read tool calls, saw ${readToolUses.length}\n${diagnostic}` + ); + assert.ok( + writeToolUses.length >= 1, + `tool-use: expected at least 1 write/create tool call\n${diagnostic}` + ); + assert.ok( + editToolUses.length >= 1, + `tool-use: expected at least 1 edit tool call\n${diagnostic}` + ); + assert.ok(result, 'tool-use: expected result message'); + assert.equal( + result.structuredOutputError, + null, + `tool-use: expected no structured output error\n${diagnostic}` + ); + assert.ok(result.structuredOutput); + PackageSchema.parse(result.structuredOutput); + + console.log( + ` ✓ read/write/edit before structured output: ${readToolUses.length}/${writeToolUses.length}/${editToolUses.length} tool calls` + ); + } finally { + await session.close(); + } +} + +interface Failure { + model: string; + step: string; + error: unknown; +} + +const failures: Failure[] = []; + +async function runStep( + modelId: string | undefined, + step: string, + callback: () => Promise +): Promise { + try { + await callback(); + } catch (error) { + failures.push({ model: labelModel(modelId), step, error }); + console.error( + ` ✗ ${step}: ${error instanceof Error ? error.message : String(error)}` + ); + } +} + +for (const modelId of parseModels()) { + console.log(`\n=== Testing ${labelModel(modelId)} ===`); + for (const stressCase of stressCases) { + await runStep(modelId, stressCase.name, () => runCase(modelId, stressCase)); + } + await runStep(modelId, 'streaming', () => runStreamingCase(modelId)); + await runStep(modelId, 'read/write/edit tool-use', () => + runToolUseCase(modelId) + ); +} + +if (failures.length > 0) { + console.error('\nStructured output stress test failures:'); + for (const failure of failures) { + console.error( + `- ${failure.model} / ${failure.step}: ${ + failure.error instanceof Error + ? (failure.error.stack ?? failure.error.message) + : String(failure.error) + }` + ); + } + process.exitCode = 1; +} else { + console.log('\nStructured output stress test passed'); +}