Skip to content

Commit 39d51e4

Browse files
Chibionosclaudeakshaylive
authored
fix: use agent model for eval simulations (#1555)
Co-authored-by: Claude Opus 4.6 (1M context) <noreply@anthropic.com> Co-authored-by: Akshaya Shanbhogue <akshaya.shanbhogue@uipath.com>
1 parent 48d9f4e commit 39d51e4

11 files changed

Lines changed: 110 additions & 43 deletions

File tree

packages/uipath/pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[project]
22
name = "uipath"
3-
version = "2.10.46"
3+
version = "2.10.47"
44
description = "Python SDK and CLI for UiPath Platform, enabling programmatic interaction with automation services, process management, and deployment tools."
55
readme = { file = "README.md", content-type = "text/markdown" }
66
requires-python = ">=3.11"

packages/uipath/src/uipath/_cli/cli_debug.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
from uipath._cli._utils._studio_project import StudioClient
1010
from uipath.core.tracing import UiPathTraceManager
1111
from uipath.eval.mocks import UiPathMockRuntime
12+
from uipath.eval.mocks._mock_runtime import load_simulation_config
1213
from uipath.platform.common import ResourceOverwritesContext, UiPathConfig
1314
from uipath.runtime import (
1415
UiPathExecuteOptions,
@@ -163,8 +164,19 @@ async def execute_debug_runtime():
163164
trigger_poll_interval=trigger_poll_interval,
164165
)
165166

167+
# Build mocking context with agent model for simulations
168+
schema = await runtime.get_schema()
169+
agent_model = None
170+
if schema.metadata and "settings" in schema.metadata:
171+
agent_model = schema.metadata["settings"].get("model")
172+
173+
mocking_context = load_simulation_config(
174+
agent_model=agent_model
175+
)
176+
166177
mock_runtime = UiPathMockRuntime(
167178
delegate=debug_runtime,
179+
mocking_context=mocking_context,
168180
)
169181

170182
try:

packages/uipath/src/uipath/_cli/cli_eval.py

Lines changed: 0 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,6 @@
2525
from uipath.runtime import (
2626
UiPathRuntimeContext,
2727
UiPathRuntimeFactoryRegistry,
28-
UiPathRuntimeSchema,
2928
)
3029
from uipath.telemetry._track import flush_events
3130
from uipath.tracing import (
@@ -65,27 +64,6 @@ def setup_reporting_prereq(no_report: bool) -> bool:
6564
return True
6665

6766

68-
def _get_agent_model(schema: UiPathRuntimeSchema) -> str | None:
69-
"""Get agent model from the runtime schema metadata.
70-
71-
The model is read from schema.metadata["settings"]["model"] which is
72-
populated by the low-code agents runtime from agent.json.
73-
74-
Returns:
75-
The model name from agent settings, or None if not found.
76-
"""
77-
try:
78-
if schema.metadata and "settings" in schema.metadata:
79-
settings = schema.metadata["settings"]
80-
model = settings.get("model")
81-
if model:
82-
logger.debug(f"Got agent model from schema.metadata: {model}")
83-
return model
84-
return None
85-
except Exception:
86-
return None
87-
88-
8967
def _resolve_model_settings_override(
9068
model_settings_id: str, evaluation_set: EvaluationSet
9169
) -> dict[str, Any] | None:
@@ -431,7 +409,6 @@ async def execute_eval():
431409
eval_context.evaluators = await EvalHelpers.load_evaluators(
432410
resolved_eval_set_path,
433411
eval_context.evaluation_set,
434-
_get_agent_model(eval_context.runtime_schema),
435412
)
436413

437414
# Runtime is not required anymore.

packages/uipath/src/uipath/eval/helpers.py

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,8 @@
77

88
from pydantic import ValidationError
99

10+
from uipath.runtime.schema import UiPathRuntimeSchema
11+
1012
from .evaluators.base_evaluator import GenericBaseEvaluator
1113
from .evaluators.evaluator_factory import EvaluatorFactory
1214
from .mocks._types import InputMockingStrategy, LLMMockingStrategy
@@ -277,3 +279,24 @@ async def load_evaluators(
277279
)
278280

279281
return evaluators
282+
283+
284+
def get_agent_model(schema: UiPathRuntimeSchema) -> str | None:
285+
"""Get agent model from the runtime schema metadata.
286+
287+
The model is read from schema.metadata["settings"]["model"] which is
288+
populated by the low-code agents runtime from agent.json.
289+
290+
Returns:
291+
The model name from agent settings, or None if not found.
292+
"""
293+
try:
294+
if schema.metadata and "settings" in schema.metadata:
295+
settings = schema.metadata["settings"]
296+
model = settings.get("model")
297+
if model:
298+
logger.debug(f"Got agent model from schema.metadata: {model}")
299+
return model
300+
return None
301+
except Exception:
302+
return None

packages/uipath/src/uipath/eval/mocks/_input_mocker.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
"""LLM Input Mocker implementation."""
22

33
import json
4+
import logging
45
from datetime import datetime
56
from typing import Any
67

@@ -9,6 +10,7 @@
910
from uipath.core.tracing import traced
1011
from uipath.platform import UiPath
1112
from uipath.platform.chat import UiPathLlmChatService
13+
from uipath.platform.chat._llm_gateway_service import ChatModels
1214

1315
from .._execution_context import eval_set_run_id_context
1416
from ._mock_context import cache_manager_context
@@ -17,6 +19,8 @@
1719
InputMockingStrategy,
1820
)
1921

22+
logger = logging.getLogger(__name__)
23+
2024

2125
def get_input_mocking_prompt(
2226
input_schema: str,
@@ -117,6 +121,11 @@ async def generate_llm_input(
117121
else {}
118122
)
119123

124+
simulation_model = completion_kwargs.get(
125+
"model", ChatModels.gpt_4_1_mini_2025_04_14
126+
)
127+
logger.info(f"Simulating input generation using model: {simulation_model}")
128+
120129
if cache_manager is not None:
121130
cache_key_data = {
122131
"response_format": response_format,

packages/uipath/src/uipath/eval/mocks/_llm_mocker.py

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
from uipath.core.tracing import traced
1111
from uipath.platform import UiPath
1212
from uipath.platform.chat import UiPathLlmChatService
13-
from uipath.platform.chat._llm_gateway_service import _cleanup_schema
13+
from uipath.platform.chat._llm_gateway_service import ChatModels, _cleanup_schema
1414

1515
from .._execution_context import (
1616
eval_set_run_id_context,
@@ -182,6 +182,13 @@ async def response(
182182
else {}
183183
)
184184

185+
simulation_model = completion_kwargs.get(
186+
"model", ChatModels.gpt_4_1_mini_2025_04_14
187+
)
188+
logger.info(
189+
f"Simulating tool '{function_name}' using model: {simulation_model}"
190+
)
191+
185192
formatted_prompt = PROMPT.format(**prompt_generation_args)
186193

187194
cache_key_data = {

packages/uipath/src/uipath/eval/mocks/_mock_runtime.py

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -28,13 +28,14 @@
2828
LLMMockingStrategy,
2929
MockingContext,
3030
MockingStrategyType,
31+
ModelSettings,
3132
ToolSimulation,
3233
)
3334

3435
logger = logging.getLogger(__name__)
3536

3637

37-
def load_simulation_config() -> MockingContext | None:
38+
def load_simulation_config(agent_model: str | None = None) -> MockingContext | None:
3839
"""Load simulation.json from current directory and convert to MockingContext.
3940
4041
Returns:
@@ -63,11 +64,21 @@ def load_simulation_config() -> MockingContext | None:
6364
if not tools_to_simulate:
6465
return None
6566

66-
# Create LLM mocking strategy
67+
# Honor model from simulation config if specified, otherwise use the agent model
68+
simulation_model = simulation_data.get("model")
69+
model = (
70+
ModelSettings(model=simulation_model)
71+
if simulation_model
72+
else ModelSettings(model=agent_model)
73+
if agent_model
74+
else None
75+
)
76+
6777
mocking_strategy = LLMMockingStrategy(
6878
type=MockingStrategyType.LLM,
6979
prompt=simulation_data.get("instructions", ""),
7080
tools_to_simulate=tools_to_simulate,
81+
model=model,
7182
)
7283

7384
# Create MockingContext for debugging

packages/uipath/src/uipath/eval/runtime/runtime.py

Lines changed: 27 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -47,13 +47,14 @@
4747
from .._execution_context import ExecutionSpanCollector
4848
from ..evaluators.base_evaluator import GenericBaseEvaluator
4949
from ..evaluators.output_evaluator import OutputEvaluationCriteria
50+
from ..helpers import get_agent_model
5051
from ..mocks._cache_manager import CacheManager
5152
from ..mocks._input_mocker import (
5253
generate_llm_input,
5354
)
5455
from ..mocks._mock_context import cache_manager_context
5556
from ..mocks._mock_runtime import UiPathMockRuntime
56-
from ..mocks._types import MockingContext
57+
from ..mocks._types import LLMMockingStrategy, MockingContext, ModelSettings
5758
from ..models import EvaluationResult
5859
from ..models.evaluation_set import (
5960
EvaluationItem,
@@ -526,12 +527,25 @@ async def _execute_eval(
526527
eval_item=eval_item,
527528
),
528529
)
530+
# Set agent model on the mocking strategy if not already set
531+
mocking_strategy = eval_item.mocking_strategy
532+
if (
533+
mocking_strategy
534+
and isinstance(mocking_strategy, LLMMockingStrategy)
535+
and not mocking_strategy.model
536+
):
537+
mocking_model = get_agent_model(self.context.runtime_schema)
538+
if mocking_model:
539+
mocking_strategy = mocking_strategy.model_copy(
540+
update={"model": ModelSettings(model=mocking_model)}
541+
)
542+
529543
agent_execution_output = await self.execute_runtime(
530544
eval_item,
531545
execution_id,
532546
input_overrides=self.context.input_overrides,
533547
mocking_context=MockingContext(
534-
strategy=eval_item.mocking_strategy,
548+
strategy=mocking_strategy,
535549
name=eval_item.name,
536550
inputs=eval_item.inputs,
537551
),
@@ -811,8 +825,18 @@ async def _generate_input_for_eval(
811825
or getattr(eval_item, "expected_output", None)
812826
or {}
813827
)
828+
# Set agent model on the input mocking strategy if not already set
829+
input_strategy = eval_item.input_mocking_strategy
830+
# If input strategy does not specify a model, extract it
831+
if input_strategy and not input_strategy.model:
832+
input_generation_model = get_agent_model(self.context.runtime_schema)
833+
if input_generation_model:
834+
input_strategy = input_strategy.model_copy(
835+
update={"model": ModelSettings(model=input_generation_model)}
836+
)
837+
814838
generated_input = await generate_llm_input(
815-
eval_item.input_mocking_strategy,
839+
input_strategy,
816840
(await self.get_schema()).input,
817841
expected_behavior=eval_item.expected_agent_behavior or "",
818842
expected_output=expected_output,

packages/uipath/tests/cli/eval/test_eval_runtime_metadata.py

Lines changed: 10 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
"""Tests for UiPathEvalRuntime metadata loading functionality.
22
33
This module tests:
4-
- _get_agent_model() - cached agent model retrieval
4+
- get_agent_model() - cached agent model retrieval
55
- get_schema() - cached schema retrieval
66
"""
77

@@ -10,11 +10,9 @@
1010

1111
import pytest
1212

13-
from uipath._cli.cli_eval import (
14-
_get_agent_model,
15-
)
1613
from uipath.core.events import EventBus
1714
from uipath.core.tracing import UiPathTraceManager
15+
from uipath.eval.helpers import get_agent_model
1816
from uipath.eval.runtime import UiPathEvalContext, UiPathEvalRuntime
1917
from uipath.runtime import (
2018
UiPathExecuteOptions,
@@ -119,34 +117,34 @@ async def dispose(self) -> None:
119117

120118

121119
class TestGetAgentModel:
122-
"""Tests for _get_agent_model function."""
120+
"""Tests for get_agent_model function."""
123121

124122
@pytest.mark.asyncio
125123
async def test_returns_agent_model(self):
126-
"""Test that _get_agent_model returns the correct model from schema."""
124+
"""Test that get_agent_model returns the correct model from schema."""
127125
schema = MockRuntimeSchema()
128126
schema.metadata = {"settings": {"model": "gpt-4o-2024-11-20"}}
129127

130-
model = _get_agent_model(schema)
128+
model = get_agent_model(schema)
131129
assert model == "gpt-4o-2024-11-20"
132130

133131
@pytest.mark.asyncio
134132
async def test_returns_none_when_no_model(self):
135-
"""Test that _get_agent_model returns None when runtime has no model."""
133+
"""Test that get_agent_model returns None when runtime has no model."""
136134
schema = MockRuntimeSchema()
137135

138-
model = _get_agent_model(schema)
136+
model = get_agent_model(schema)
139137
assert model is None
140138

141139
@pytest.mark.asyncio
142140
async def test_returns_model_consistently(self):
143-
"""Test that _get_agent_model returns consistent results."""
141+
"""Test that get_agent_model returns consistent results."""
144142
schema = MockRuntimeSchema()
145143
schema.metadata = {"settings": {"model": "consistent-model"}}
146144

147145
# Multiple calls should return the same value
148-
model1 = _get_agent_model(schema)
149-
model2 = _get_agent_model(schema)
146+
model1 = get_agent_model(schema)
147+
model2 = get_agent_model(schema)
150148

151149
assert model1 == model2 == "consistent-model"
152150

packages/uipath/tests/cli/test_debug_simulation.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -241,6 +241,9 @@ def test_debug_always_wraps_with_mock_runtime(
241241
) as mock_factory_get:
242242
mock_runtime = Mock()
243243
mock_runtime.dispose = AsyncMock()
244+
mock_runtime.get_schema = AsyncMock(
245+
return_value=Mock(metadata=None)
246+
)
244247

245248
mock_factory = Mock()
246249
mock_factory.new_runtime = AsyncMock(return_value=mock_runtime)
@@ -305,6 +308,9 @@ def test_debug_wraps_with_mock_runtime_on_error(
305308
) as mock_factory_get:
306309
mock_runtime = Mock()
307310
mock_runtime.dispose = AsyncMock()
311+
mock_runtime.get_schema = AsyncMock(
312+
return_value=Mock(metadata=None)
313+
)
308314

309315
mock_factory = Mock()
310316
mock_factory.new_runtime = AsyncMock(return_value=mock_runtime)

0 commit comments

Comments
 (0)