Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
203 changes: 95 additions & 108 deletions scripts/benchmark-comparators.mjs
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ import { readFileSync, writeFileSync, mkdirSync, existsSync } from 'fs';
import { execSync, exec } from 'child_process';
import { parseArgs } from 'util';
import { promisify } from 'util';
import { withManagedStdioClientSession } from './lib/managed-mcp-session.mjs';

const execAsync = promisify(exec);

Expand Down Expand Up @@ -293,125 +294,111 @@ const COMPARATOR_ADAPTERS = [
// ---------------------------------------------------------------------------

async function runComparatorViaMcp(adapter, rootPath, tasks) {
let Client, StdioClientTransport;
try {
({ Client } = await import('@modelcontextprotocol/sdk/client/index.js'));
({ StdioClientTransport } = await import('@modelcontextprotocol/sdk/client/stdio.js'));
} catch (err) {
throw new Error(`MCP SDK not available: ${err.message}`);
}

const transport = new StdioClientTransport({
command: adapter.serverCommand,
args: adapter.serverArgs,
env: { ...process.env, ...adapter.serverEnv }
});
return withManagedStdioClientSession(
{
serverCommand: adapter.serverCommand,
serverArgs: adapter.serverArgs,
serverEnv: adapter.serverEnv,
connectTimeoutMs: adapter.connectTimeout ?? 15_000
},
async ({ client }) => {
if (adapter.initTimeout > 0) {
await new Promise((resolve) => setTimeout(resolve, adapter.initTimeout));
}

const client = new Client({ name: 'benchmark-runner', version: '1.0.0' });
let availableTools = [];
try {
const toolsResult = await client.listTools();
availableTools = toolsResult.tools ?? [];
} catch (err) {
throw new Error(`Failed to list tools from ${adapter.name}: ${err.message}`);
}

try {
await client.connect(transport);
} catch (err) {
throw new Error(`Failed to connect to ${adapter.name} MCP server: ${err.message}`);
}
const toolNames = availableTools.map((t) => t.name);
let searchToolName = adapter.searchTool;
if (!searchToolName) {
searchToolName =
toolNames.find((n) => n.includes('search') || n.includes('query') || n.includes('find')) ??
null;
}

// Wait for server to be ready
if (adapter.initTimeout > 0) {
await new Promise((resolve) => setTimeout(resolve, adapter.initTimeout));
}
if (!searchToolName || !toolNames.includes(searchToolName)) {
throw new Error(
`No suitable search tool found for ${adapter.name}. Available: ${toolNames.join(', ')}`
);
}

// Discover tools
let availableTools = [];
try {
const toolsResult = await client.listTools();
availableTools = toolsResult.tools ?? [];
} catch (err) {
await client.close();
throw new Error(`Failed to list tools from ${adapter.name}: ${err.message}`);
}
let totalToolCalls = 0;
if (adapter.indexTool && toolNames.includes(adapter.indexTool)) {
console.log(` [${adapter.name}] Indexing ${path.basename(rootPath)}...`);
const indexStartMs = Date.now();
try {
await Promise.race([
client.callTool({ name: adapter.indexTool, arguments: adapter.indexArgs(rootPath) }),
new Promise((_, reject) =>
setTimeout(() => reject(new Error('Index timeout')), adapter.indexTimeout ?? 120000)
)
]);
totalToolCalls++;
console.log(` [${adapter.name}] Index complete in ${Date.now() - indexStartMs}ms`);
} catch (err) {
throw new Error(`${adapter.name} indexing failed: ${err.message}`);
}
}

const toolNames = availableTools.map((t) => t.name);
const taskResults = [];
for (const task of tasks) {
const startMs = Date.now();
let payload = '';
let toolCallCount = totalToolCalls;

try {
const result = await client.callTool({
name: searchToolName,
arguments: adapter.searchArgs(task)
});
toolCallCount++;
payload = adapter.extractPayload(result);
} catch (err) {
console.warn(` [${adapter.name}] Task ${task.id} failed: ${err.message}`);
payload = '';
}

// Determine search tool (may be dynamic for CodeGraphContext)
let searchToolName = adapter.searchTool;
if (!searchToolName) {
// Try to find a search-like tool
searchToolName =
toolNames.find((n) => n.includes('search') || n.includes('query') || n.includes('find')) ??
null;
}
const elapsedMs = Date.now() - startMs;
const payloadBytes = countUtf8Bytes(payload);
const estimatedTokens = estimateTokens(payloadBytes);
const { usefulnessScore, matchedSignals, missingSignals } = matchSignals(
payload,
task.expectedSignals,
task.forbiddenSignals
);

if (!searchToolName || !toolNames.includes(searchToolName)) {
await client.close();
throw new Error(
`No suitable search tool found for ${adapter.name}. Available: ${toolNames.join(', ')}`
);
}
taskResults.push({
taskId: task.id,
job: task.job,
surface: task.surface,
usefulnessScore,
matchedSignals,
missingSignals,
payloadBytes,
estimatedTokens,
toolCallCount,
elapsedMs
});
}

// Index the repo if needed
let totalToolCalls = 0;
if (adapter.indexTool && toolNames.includes(adapter.indexTool)) {
console.log(` [${adapter.name}] Indexing ${path.basename(rootPath)}...`);
const indexStartMs = Date.now();
try {
await Promise.race([
client.callTool({ name: adapter.indexTool, arguments: adapter.indexArgs(rootPath) }),
new Promise((_, reject) =>
setTimeout(() => reject(new Error('Index timeout')), adapter.indexTimeout ?? 120000)
)
]);
totalToolCalls++;
console.log(` [${adapter.name}] Index complete in ${Date.now() - indexStartMs}ms`);
} catch (err) {
await client.close();
throw new Error(`${adapter.name} indexing failed: ${err.message}`);
return taskResults;
}
}

// Run each task
const taskResults = [];
for (const task of tasks) {
const startMs = Date.now();
let payload = '';
let toolCallCount = totalToolCalls;

try {
const result = await client.callTool({
name: searchToolName,
arguments: adapter.searchArgs(task)
});
toolCallCount++;
payload = adapter.extractPayload(result);
} catch (err) {
console.warn(` [${adapter.name}] Task ${task.id} failed: ${err.message}`);
payload = '';
).catch((err) => {
if (err.message.startsWith('Failed to connect to')) {
throw err;
}

const elapsedMs = Date.now() - startMs;
const payloadBytes = countUtf8Bytes(payload);
const estimatedTokens = estimateTokens(payloadBytes);
const { usefulnessScore, matchedSignals, missingSignals } = matchSignals(
payload,
task.expectedSignals,
task.forbiddenSignals
throw new Error(
err.message.includes('timed out')
? `Failed to connect to ${adapter.name} MCP server: ${err.message}`
: err.message
);

taskResults.push({
taskId: task.id,
job: task.job,
surface: task.surface,
usefulnessScore,
matchedSignals,
missingSignals,
payloadBytes,
estimatedTokens,
toolCallCount,
elapsedMs
});
}

await client.close();
return taskResults;
});
Comment on lines +392 to +398
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P2 Dead-code branch in catch handler

err.message.startsWith('Failed to connect to') can never be true here. withManagedStdioClientSession either throws "MCP client connect timed out after Xs" (timeout path) or re-throws errors from the callback, none of which start with "Failed to connect to". The guard is harmless but misleading; removing it makes the intent clearer:

Suggested change
).catch((err) => {
if (err.message.startsWith('Failed to connect to')) {
throw err;
}
const elapsedMs = Date.now() - startMs;
const payloadBytes = countUtf8Bytes(payload);
const estimatedTokens = estimateTokens(payloadBytes);
const { usefulnessScore, matchedSignals, missingSignals } = matchSignals(
payload,
task.expectedSignals,
task.forbiddenSignals
throw new Error(
err.message.includes('timed out')
? `Failed to connect to ${adapter.name} MCP server: ${err.message}`
: err.message
);
taskResults.push({
taskId: task.id,
job: task.job,
surface: task.surface,
usefulnessScore,
matchedSignals,
missingSignals,
payloadBytes,
estimatedTokens,
toolCallCount,
elapsedMs
});
}
await client.close();
return taskResults;
});
}).catch((err) => {
throw new Error(
err.message.includes('timed out')
? `Failed to connect to ${adapter.name} MCP server: ${err.message}`
: err.message
);
});

}

// ---------------------------------------------------------------------------
Expand Down
93 changes: 93 additions & 0 deletions scripts/lib/managed-mcp-session.mjs
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
import process from 'node:process';

async function loadSdkClient() {
const [{ Client }, { StdioClientTransport }] = await Promise.all([
import('@modelcontextprotocol/sdk/client/index.js'),
import('@modelcontextprotocol/sdk/client/stdio.js')
]);

return { Client, StdioClientTransport };
}

function createTimeoutError(timeoutMs) {
const seconds = Math.max(1, Math.round(timeoutMs / 1000));
return new Error(`MCP client connect timed out after ${seconds}s`);
}

function withTimeout(promise, timeoutMs) {
if (!Number.isFinite(timeoutMs) || timeoutMs <= 0) {
return promise;
}

let timer;
const timeoutPromise = new Promise((_, reject) => {
timer = setTimeout(() => reject(createTimeoutError(timeoutMs)), timeoutMs);
timer.unref?.();
});

return Promise.race([promise, timeoutPromise]).finally(() => {
if (timer) {
clearTimeout(timer);
}
});
}

async function safeClose(client, transport, connected) {
const closeAttempts = [];

if (connected) {
closeAttempts.push(client.close().catch(() => undefined));
}

closeAttempts.push(transport.close?.().catch(() => undefined));
await Promise.all(closeAttempts);
}

export async function withManagedStdioClientSession(options, callback) {
const {
serverCommand,
serverArgs = [],
serverEnv = {},
cwd,
clientInfo = { name: 'benchmark-runner', version: '1.0.0' },
connectTimeoutMs = 10_000,
onSpawn
} = options;

const { Client, StdioClientTransport } = await loadSdkClient();
const transport = new StdioClientTransport({
command: serverCommand,
args: serverArgs,
env: { ...process.env, ...serverEnv },
cwd
});
const client = new Client(clientInfo);

let connected = false;
let settling = false;
const connectPromise = client.connect(transport);
const spawnNotification = (async () => {
if (typeof onSpawn !== 'function') {
return;
}

while (!settling) {
if (transport.pid !== null) {
onSpawn(transport.pid);
return;
}
await new Promise((resolve) => setTimeout(resolve, 10));
}
})();
Comment on lines +73 to +92
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P2 onSpawn can be silently dropped due to settling race

The polling loop exits the moment settling = true is set in the finally block. If the child process spawns during the 10 ms sleep that happens to span the settling = true assignment, the loop re-evaluates while (!settling), sees true, and exits without ever calling onSpawn — even though transport.pid is now populated. In the connect-timeout test this leaves pid === null, causing expect(pid).toBeTypeOf('number') to fail.

In practice, with a 200 ms timeout and a local process that spawns in < 50 ms, the window is very narrow. But under CI load or on slow machines the test can become flaky. A robust fix is to perform one final pid check after the loop exits:

const spawnNotification = (async () => {
  if (typeof onSpawn !== 'function') return;

  while (!settling) {
    if (transport.pid !== null) {
      onSpawn(transport.pid);
      return;
    }
    await new Promise((resolve) => setTimeout(resolve, 10));
  }
  // One final check: process may have spawned while we were asleep
  if (transport.pid !== null) {
    onSpawn(transport.pid);
  }
})();


try {
await withTimeout(connectPromise, connectTimeoutMs);
connected = true;
return await callback({ client, transport });
} finally {
settling = true;
await safeClose(client, transport, connected);
await spawnNotification.catch(() => undefined);
await connectPromise.catch(() => undefined);
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P2 connectPromise awaited without a timeout after cleanup

After safeClose terminates the child process, await connectPromise.catch(() => undefined) is called with no deadline. If the MCP SDK's connect promise doesn't reject when the transport is force-closed (e.g., due to an SDK buffering bug or a process that ignores SIGTERM), this line will hang indefinitely and withManagedStdioClientSession will never return — defeating the resource-cleanup guarantee. A defensive timeout here would bound the worst-case hang:

  } finally {
    settling = true;
    await safeClose(client, transport, connected);
    await spawnNotification.catch(() => undefined);
    await Promise.race([
      connectPromise,
      new Promise((resolve) => setTimeout(resolve, 5_000))
    ]).catch(() => undefined);
  }

}
}
Loading
Loading