From 408c248933c08d5c1868522d10d1715fa6091c9d Mon Sep 17 00:00:00 2001 From: PatrickSys Date: Fri, 10 Apr 2026 14:58:33 +0200 Subject: [PATCH 1/2] fix: clean up benchmark MCP sessions --- scripts/benchmark-comparators.mjs | 203 ++++++++++++-------------- scripts/lib/managed-mcp-session.mjs | 93 ++++++++++++ tests/benchmark-comparators.test.ts | 92 ++++++++++++ tests/fixtures/mcp/echo-server.mjs | 36 +++++ tests/fixtures/mcp/hanging-server.mjs | 1 + 5 files changed, 317 insertions(+), 108 deletions(-) create mode 100644 scripts/lib/managed-mcp-session.mjs create mode 100644 tests/benchmark-comparators.test.ts create mode 100644 tests/fixtures/mcp/echo-server.mjs create mode 100644 tests/fixtures/mcp/hanging-server.mjs diff --git a/scripts/benchmark-comparators.mjs b/scripts/benchmark-comparators.mjs index 17bc264..aa6f047 100644 --- a/scripts/benchmark-comparators.mjs +++ b/scripts/benchmark-comparators.mjs @@ -17,6 +17,7 @@ import { readFileSync, writeFileSync, mkdirSync, existsSync } from 'fs'; import { execSync, exec } from 'child_process'; import { parseArgs } from 'util'; import { promisify } from 'util'; +import { withManagedStdioClientSession } from './lib/managed-mcp-session.mjs'; const execAsync = promisify(exec); @@ -293,125 +294,111 @@ const COMPARATOR_ADAPTERS = [ // --------------------------------------------------------------------------- async function runComparatorViaMcp(adapter, rootPath, tasks) { - let Client, StdioClientTransport; - try { - ({ Client } = await import('@modelcontextprotocol/sdk/client/index.js')); - ({ StdioClientTransport } = await import('@modelcontextprotocol/sdk/client/stdio.js')); - } catch (err) { - throw new Error(`MCP SDK not available: ${err.message}`); - } - - const transport = new StdioClientTransport({ - command: adapter.serverCommand, - args: adapter.serverArgs, - env: { ...process.env, ...adapter.serverEnv } - }); + return withManagedStdioClientSession( + { + serverCommand: adapter.serverCommand, + serverArgs: adapter.serverArgs, + serverEnv: adapter.serverEnv, + connectTimeoutMs: adapter.connectTimeout ?? 15_000 + }, + async ({ client }) => { + if (adapter.initTimeout > 0) { + await new Promise((resolve) => setTimeout(resolve, adapter.initTimeout)); + } - const client = new Client({ name: 'benchmark-runner', version: '1.0.0' }); + let availableTools = []; + try { + const toolsResult = await client.listTools(); + availableTools = toolsResult.tools ?? []; + } catch (err) { + throw new Error(`Failed to list tools from ${adapter.name}: ${err.message}`); + } - try { - await client.connect(transport); - } catch (err) { - throw new Error(`Failed to connect to ${adapter.name} MCP server: ${err.message}`); - } + const toolNames = availableTools.map((t) => t.name); + let searchToolName = adapter.searchTool; + if (!searchToolName) { + searchToolName = + toolNames.find((n) => n.includes('search') || n.includes('query') || n.includes('find')) ?? + null; + } - // Wait for server to be ready - if (adapter.initTimeout > 0) { - await new Promise((resolve) => setTimeout(resolve, adapter.initTimeout)); - } + if (!searchToolName || !toolNames.includes(searchToolName)) { + throw new Error( + `No suitable search tool found for ${adapter.name}. Available: ${toolNames.join(', ')}` + ); + } - // Discover tools - let availableTools = []; - try { - const toolsResult = await client.listTools(); - availableTools = toolsResult.tools ?? []; - } catch (err) { - await client.close(); - throw new Error(`Failed to list tools from ${adapter.name}: ${err.message}`); - } + let totalToolCalls = 0; + if (adapter.indexTool && toolNames.includes(adapter.indexTool)) { + console.log(` [${adapter.name}] Indexing ${path.basename(rootPath)}...`); + const indexStartMs = Date.now(); + try { + await Promise.race([ + client.callTool({ name: adapter.indexTool, arguments: adapter.indexArgs(rootPath) }), + new Promise((_, reject) => + setTimeout(() => reject(new Error('Index timeout')), adapter.indexTimeout ?? 120000) + ) + ]); + totalToolCalls++; + console.log(` [${adapter.name}] Index complete in ${Date.now() - indexStartMs}ms`); + } catch (err) { + throw new Error(`${adapter.name} indexing failed: ${err.message}`); + } + } - const toolNames = availableTools.map((t) => t.name); + const taskResults = []; + for (const task of tasks) { + const startMs = Date.now(); + let payload = ''; + let toolCallCount = totalToolCalls; + + try { + const result = await client.callTool({ + name: searchToolName, + arguments: adapter.searchArgs(task) + }); + toolCallCount++; + payload = adapter.extractPayload(result); + } catch (err) { + console.warn(` [${adapter.name}] Task ${task.id} failed: ${err.message}`); + payload = ''; + } - // Determine search tool (may be dynamic for CodeGraphContext) - let searchToolName = adapter.searchTool; - if (!searchToolName) { - // Try to find a search-like tool - searchToolName = - toolNames.find((n) => n.includes('search') || n.includes('query') || n.includes('find')) ?? - null; - } + const elapsedMs = Date.now() - startMs; + const payloadBytes = countUtf8Bytes(payload); + const estimatedTokens = estimateTokens(payloadBytes); + const { usefulnessScore, matchedSignals, missingSignals } = matchSignals( + payload, + task.expectedSignals, + task.forbiddenSignals + ); - if (!searchToolName || !toolNames.includes(searchToolName)) { - await client.close(); - throw new Error( - `No suitable search tool found for ${adapter.name}. Available: ${toolNames.join(', ')}` - ); - } + taskResults.push({ + taskId: task.id, + job: task.job, + surface: task.surface, + usefulnessScore, + matchedSignals, + missingSignals, + payloadBytes, + estimatedTokens, + toolCallCount, + elapsedMs + }); + } - // Index the repo if needed - let totalToolCalls = 0; - if (adapter.indexTool && toolNames.includes(adapter.indexTool)) { - console.log(` [${adapter.name}] Indexing ${path.basename(rootPath)}...`); - const indexStartMs = Date.now(); - try { - await Promise.race([ - client.callTool({ name: adapter.indexTool, arguments: adapter.indexArgs(rootPath) }), - new Promise((_, reject) => - setTimeout(() => reject(new Error('Index timeout')), adapter.indexTimeout ?? 120000) - ) - ]); - totalToolCalls++; - console.log(` [${adapter.name}] Index complete in ${Date.now() - indexStartMs}ms`); - } catch (err) { - await client.close(); - throw new Error(`${adapter.name} indexing failed: ${err.message}`); + return taskResults; } - } - - // Run each task - const taskResults = []; - for (const task of tasks) { - const startMs = Date.now(); - let payload = ''; - let toolCallCount = totalToolCalls; - - try { - const result = await client.callTool({ - name: searchToolName, - arguments: adapter.searchArgs(task) - }); - toolCallCount++; - payload = adapter.extractPayload(result); - } catch (err) { - console.warn(` [${adapter.name}] Task ${task.id} failed: ${err.message}`); - payload = ''; + ).catch((err) => { + if (err.message.startsWith('Failed to connect to')) { + throw err; } - - const elapsedMs = Date.now() - startMs; - const payloadBytes = countUtf8Bytes(payload); - const estimatedTokens = estimateTokens(payloadBytes); - const { usefulnessScore, matchedSignals, missingSignals } = matchSignals( - payload, - task.expectedSignals, - task.forbiddenSignals + throw new Error( + err.message.includes('timed out') + ? `Failed to connect to ${adapter.name} MCP server: ${err.message}` + : err.message ); - - taskResults.push({ - taskId: task.id, - job: task.job, - surface: task.surface, - usefulnessScore, - matchedSignals, - missingSignals, - payloadBytes, - estimatedTokens, - toolCallCount, - elapsedMs - }); - } - - await client.close(); - return taskResults; + }); } // --------------------------------------------------------------------------- diff --git a/scripts/lib/managed-mcp-session.mjs b/scripts/lib/managed-mcp-session.mjs new file mode 100644 index 0000000..bdadc9e --- /dev/null +++ b/scripts/lib/managed-mcp-session.mjs @@ -0,0 +1,93 @@ +import process from 'node:process'; + +async function loadSdkClient() { + const [{ Client }, { StdioClientTransport }] = await Promise.all([ + import('@modelcontextprotocol/sdk/client/index.js'), + import('@modelcontextprotocol/sdk/client/stdio.js') + ]); + + return { Client, StdioClientTransport }; +} + +function createTimeoutError(timeoutMs) { + const seconds = Math.max(1, Math.round(timeoutMs / 1000)); + return new Error(`MCP client connect timed out after ${seconds}s`); +} + +function withTimeout(promise, timeoutMs) { + if (!Number.isFinite(timeoutMs) || timeoutMs <= 0) { + return promise; + } + + let timer; + const timeoutPromise = new Promise((_, reject) => { + timer = setTimeout(() => reject(createTimeoutError(timeoutMs)), timeoutMs); + timer.unref?.(); + }); + + return Promise.race([promise, timeoutPromise]).finally(() => { + if (timer) { + clearTimeout(timer); + } + }); +} + +async function safeClose(client, transport, connected) { + const closeAttempts = []; + + if (connected) { + closeAttempts.push(client.close().catch(() => undefined)); + } + + closeAttempts.push(transport.close?.().catch(() => undefined)); + await Promise.all(closeAttempts); +} + +export async function withManagedStdioClientSession(options, callback) { + const { + serverCommand, + serverArgs = [], + serverEnv = {}, + cwd, + clientInfo = { name: 'benchmark-runner', version: '1.0.0' }, + connectTimeoutMs = 10_000, + onSpawn + } = options; + + const { Client, StdioClientTransport } = await loadSdkClient(); + const transport = new StdioClientTransport({ + command: serverCommand, + args: serverArgs, + env: { ...process.env, ...serverEnv }, + cwd + }); + const client = new Client(clientInfo); + + let connected = false; + let settling = false; + const connectPromise = client.connect(transport); + const spawnNotification = (async () => { + if (typeof onSpawn !== 'function') { + return; + } + + while (!settling) { + if (transport.pid !== null) { + onSpawn(transport.pid); + return; + } + await new Promise((resolve) => setTimeout(resolve, 10)); + } + })(); + + try { + await withTimeout(connectPromise, connectTimeoutMs); + connected = true; + return await callback({ client, transport }); + } finally { + settling = true; + await safeClose(client, transport, connected); + await spawnNotification.catch(() => undefined); + await connectPromise.catch(() => undefined); + } +} diff --git a/tests/benchmark-comparators.test.ts b/tests/benchmark-comparators.test.ts new file mode 100644 index 0000000..8863ef4 --- /dev/null +++ b/tests/benchmark-comparators.test.ts @@ -0,0 +1,92 @@ +import { describe, expect, it } from 'vitest'; +import path from 'node:path'; +import { fileURLToPath, pathToFileURL } from 'node:url'; + +const __dirname = path.dirname(fileURLToPath(import.meta.url)); + +async function importHelper() { + return import(pathToFileURL(path.resolve(__dirname, '..', 'scripts', 'lib', 'managed-mcp-session.mjs')).href); +} + +function isProcessAlive(pid: number): boolean { + try { + process.kill(pid, 0); + return true; + } catch (error: unknown) { + return (error as NodeJS.ErrnoException).code !== 'ESRCH'; + } +} + +async function waitForProcessExit(pid: number, timeoutMs = 5000): Promise { + const deadline = Date.now() + timeoutMs; + while (Date.now() < deadline) { + if (!isProcessAlive(pid)) { + return; + } + await new Promise((resolve) => setTimeout(resolve, 50)); + } + throw new Error(`Process ${pid} still alive after ${timeoutMs}ms`); +} + +describe('managed MCP benchmark sessions', () => { + it('kills the child when connect times out', async () => { + const { withManagedStdioClientSession } = await importHelper(); + const hangingServer = path.resolve(__dirname, 'fixtures', 'mcp', 'hanging-server.mjs'); + + let pid: number | null = null; + + await expect( + withManagedStdioClientSession( + { + serverCommand: process.execPath, + serverArgs: [hangingServer], + connectTimeoutMs: 200, + onSpawn: (childPid: number) => { + pid = childPid; + } + }, + async () => undefined + ) + ).rejects.toThrow('MCP client connect timed out'); + + expect(pid).toBeTypeOf('number'); + await waitForProcessExit(pid as number); + }); + + it('kills the child when work fails after a successful connection', async () => { + const { withManagedStdioClientSession } = await importHelper(); + const echoServer = path.resolve(__dirname, 'fixtures', 'mcp', 'echo-server.mjs'); + + let pid: number | null = null; + + await expect( + withManagedStdioClientSession( + { + serverCommand: process.execPath, + serverArgs: [echoServer], + connectTimeoutMs: 5000, + onSpawn: (childPid: number) => { + pid = childPid; + } + }, + async ({ + client, + transport + }: { + client: { + listTools: () => Promise<{ tools: Array<{ name: string }> }>; + }; + transport: { pid: number | null }; + }) => { + pid = transport.pid ?? pid; + const tools = await client.listTools(); + expect(tools.tools.map((tool) => tool.name)).toContain('echo_search'); + throw new Error('forced failure'); + } + ) + ).rejects.toThrow('forced failure'); + + expect(pid).toBeTypeOf('number'); + await waitForProcessExit(pid as number); + }); +}); diff --git a/tests/fixtures/mcp/echo-server.mjs b/tests/fixtures/mcp/echo-server.mjs new file mode 100644 index 0000000..0c508dc --- /dev/null +++ b/tests/fixtures/mcp/echo-server.mjs @@ -0,0 +1,36 @@ +import { Server } from '@modelcontextprotocol/sdk/server/index.js'; +import { StdioServerTransport } from '@modelcontextprotocol/sdk/server/stdio.js'; +import { CallToolRequestSchema, ListToolsRequestSchema } from '@modelcontextprotocol/sdk/types.js'; + +const server = new Server( + { name: 'echo-test-server', version: '1.0.0' }, + { capabilities: { tools: {} } } +); + +server.setRequestHandler(ListToolsRequestSchema, async () => ({ + tools: [ + { + name: 'echo_search', + description: 'Echoes the incoming query', + inputSchema: { + type: 'object', + properties: { + query: { type: 'string' } + }, + required: ['query'] + } + } + ] +})); + +server.setRequestHandler(CallToolRequestSchema, async (request) => ({ + content: [ + { + type: 'text', + text: request.params.arguments?.query ?? '' + } + ] +})); + +const transport = new StdioServerTransport(); +await server.connect(transport); diff --git a/tests/fixtures/mcp/hanging-server.mjs b/tests/fixtures/mcp/hanging-server.mjs new file mode 100644 index 0000000..32940f0 --- /dev/null +++ b/tests/fixtures/mcp/hanging-server.mjs @@ -0,0 +1 @@ +setInterval(() => {}, 1000); From fcce4b6e820e55c085ff933e436c3bc76881ab6b Mon Sep 17 00:00:00 2001 From: PatrickSys Date: Fri, 10 Apr 2026 15:44:57 +0200 Subject: [PATCH 2/2] fix: harden managed MCP session cleanup --- scripts/benchmark-comparators.mjs | 3 --- scripts/lib/managed-mcp-session.mjs | 13 ++++++++++++- 2 files changed, 12 insertions(+), 4 deletions(-) diff --git a/scripts/benchmark-comparators.mjs b/scripts/benchmark-comparators.mjs index aa6f047..207ace5 100644 --- a/scripts/benchmark-comparators.mjs +++ b/scripts/benchmark-comparators.mjs @@ -390,9 +390,6 @@ async function runComparatorViaMcp(adapter, rootPath, tasks) { return taskResults; } ).catch((err) => { - if (err.message.startsWith('Failed to connect to')) { - throw err; - } throw new Error( err.message.includes('timed out') ? `Failed to connect to ${adapter.name} MCP server: ${err.message}` diff --git a/scripts/lib/managed-mcp-session.mjs b/scripts/lib/managed-mcp-session.mjs index bdadc9e..5f54e55 100644 --- a/scripts/lib/managed-mcp-session.mjs +++ b/scripts/lib/managed-mcp-session.mjs @@ -32,6 +32,13 @@ function withTimeout(promise, timeoutMs) { }); } +function delay(timeoutMs) { + return new Promise((resolve) => { + const timer = setTimeout(resolve, timeoutMs); + timer.unref?.(); + }); +} + async function safeClose(client, transport, connected) { const closeAttempts = []; @@ -78,6 +85,10 @@ export async function withManagedStdioClientSession(options, callback) { } await new Promise((resolve) => setTimeout(resolve, 10)); } + + if (transport.pid !== null) { + onSpawn(transport.pid); + } })(); try { @@ -88,6 +99,6 @@ export async function withManagedStdioClientSession(options, callback) { settling = true; await safeClose(client, transport, connected); await spawnNotification.catch(() => undefined); - await connectPromise.catch(() => undefined); + await Promise.race([connectPromise, delay(5_000)]).catch(() => undefined); } }