Skip to content

Commit dcc9c02

Browse files
author
Andrei Bratu
committed
add support for agents + online files eval
1 parent eaae0af commit dcc9c02

2 files changed

Lines changed: 33 additions & 4 deletions

File tree

src/evals/run.ts

Lines changed: 31 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ import cliProgress from "cli-progress";
1111
import _, { capitalize } from "lodash";
1212

1313
import {
14+
AgentResponse,
1415
BooleanEvaluatorStatsResponse,
1516
CreateEvaluationRequestEvaluatorsItem,
1617
DatapointResponse,
@@ -216,6 +217,13 @@ function getFileCallable<I extends Record<string, unknown> & { messages?: any[]
216217
return function_;
217218
}
218219

220+
type EvaluatedFile =
221+
| PromptResponse
222+
| FlowResponse
223+
| ToolResponse
224+
| EvaluatorResponse
225+
| AgentResponse;
226+
219227
export async function runEval<
220228
I extends Record<string, unknown> & { messages?: any[] },
221229
O,
@@ -249,14 +257,35 @@ export async function runEval<
249257
}
250258
}
251259

252-
let hlFile: PromptResponse | FlowResponse | ToolResponse | EvaluatorResponse;
260+
// The assert promises the compiler hlFile will be initialized before use
261+
// This is handled in the section below, but the compiler is stubborn
262+
let hlFile!: EvaluatedFile;
263+
let fileExists = false;
264+
try {
265+
const path_ = file_.path;
266+
if (path_) {
267+
// @ts-ignore retrieveByPath returns a polymorphic type that is equivalent with FileResponses above
268+
hlFile = await client.files.retrieveByPath({ path: path_ });
269+
if (hlFile.type !== type_) {
270+
throw new HumanloopRuntimeError(
271+
`Error in your \`file\` argument: The file type ${type_} does ` +
272+
`not match the type of the File at ${path_}: ${hlFile.type}.`,
273+
);
274+
}
275+
fileExists = true;
276+
}
277+
} catch (e: any) {
278+
// File does not exist, try upserting it
279+
}
280+
if (!fileExists) {
253281
try {
254282
hlFile = await upsertFile({ file: file_, type: type_, client: client });
255283
} catch (e: any) {
256284
console.error(
257285
`${RED}Error in your \`file\` argument:\n\n${e.constructor.name}: ${e.message}${RESET}`,
258286
);
259287
return [];
288+
}
260289
}
261290

262291
let hlDataset: DatasetResponse;
@@ -645,7 +674,7 @@ async function getNewRun({
645674
client: HumanloopClient;
646675
evaluationName: string | undefined;
647676
evaluators: Evaluator[];
648-
hlFile: PromptResponse | FlowResponse | ToolResponse | EvaluatorResponse;
677+
hlFile: EvaluatedFile;
649678
hlDataset: DatasetResponse;
650679
func: ((inputs: Record<string, unknown>) => Promise<unknown>) | undefined;
651680
}): Promise<{ evaluation: EvaluationResponse; run: EvaluationRunResponse }> {

src/evals/types.ts

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,7 @@ interface Identifiers {
5050
export interface File<I extends Record<string, unknown> & { messages?: any[] }, O>
5151
extends Identifiers {
5252
/** The type of File this callable relates to on Humanloop. */
53-
type?: "flow" | "prompt";
53+
type?: "flow" | "prompt" | "agent";
5454
/** The contents uniquely define the version of the File on Humanloop. */
5555
version?: Version;
5656
/**
@@ -117,7 +117,7 @@ export interface LocalEvaluator<ReturnType, ArgsType> extends Evaluator {
117117
* @param dataset - The dataset to map your function over to produce the outputs required by the Evaluation.
118118
* @param name - The name of the Evaluation to run. If it does not exist, a new Evaluation will be created under your File.
119119
* @param evaluators - Define how judgments are provided for this Evaluation.
120-
* @param workers - The number of threads to process datapoints using your function concurrently.
120+
* @param workers - The number of datapoints to process concurrently.
121121
* @returns Per Evaluator checks.
122122
*/
123123
export interface EvaluatorCheck {

0 commit comments

Comments
 (0)