|
10 | 10 | managing prompt configurations. |
11 | 11 | """ |
12 | 12 |
|
| 13 | +from collections.abc import AsyncIterator |
13 | 14 | from dataclasses import dataclass |
14 | 15 | from pathlib import Path |
| 16 | +from typing import Any |
15 | 17 |
|
16 | 18 | import yaml |
| 19 | +from langchain_core.callbacks import ( |
| 20 | + AsyncCallbackManagerForLLMRun, |
| 21 | + CallbackManagerForLLMRun, |
| 22 | +) |
| 23 | +from langchain_core.messages import AIMessage, BaseMessage |
| 24 | +from langchain_core.outputs import ChatGenerationChunk, ChatResult |
17 | 25 | from langchain_litellm import ChatLiteLLM |
18 | 26 | from litellm import get_model_info |
19 | 27 | from sqlalchemy import select |
|
23 | 31 | AUTO_MODE_ID, |
24 | 32 | ChatLiteLLMRouter, |
25 | 33 | LLMRouterService, |
| 34 | + _sanitize_content, |
26 | 35 | get_auto_mode_llm, |
27 | 36 | is_auto_mode, |
28 | 37 | ) |
29 | 38 |
|
| 39 | + |
| 40 | +def _sanitize_messages(messages: list[BaseMessage]) -> list[BaseMessage]: |
| 41 | + """Sanitize content on every message so it is safe for any provider. |
| 42 | +
|
| 43 | + Handles three cross-provider incompatibilities: |
| 44 | + - List content with provider-specific blocks (e.g. ``thinking``) |
| 45 | + - List content with bare strings or empty text blocks |
| 46 | + - AI messages with empty content + tool calls: some providers (Bedrock) |
| 47 | + convert ``""`` to ``[{"type":"text","text":""}]`` server-side then |
| 48 | + reject the blank text. The OpenAI spec says ``content`` should be |
| 49 | + ``null`` when an assistant message only carries tool calls. |
| 50 | + """ |
| 51 | + for msg in messages: |
| 52 | + if isinstance(msg.content, list): |
| 53 | + msg.content = _sanitize_content(msg.content) |
| 54 | + if ( |
| 55 | + isinstance(msg, AIMessage) |
| 56 | + and (not msg.content or msg.content == "") |
| 57 | + and getattr(msg, "tool_calls", None) |
| 58 | + ): |
| 59 | + msg.content = None # type: ignore[assignment] |
| 60 | + return messages |
| 61 | + |
| 62 | + |
| 63 | +class SanitizedChatLiteLLM(ChatLiteLLM): |
| 64 | + """ChatLiteLLM subclass that strips provider-specific content blocks |
| 65 | + (e.g. ``thinking`` from reasoning models) and normalises bare strings |
| 66 | + in content arrays before forwarding to the underlying provider.""" |
| 67 | + |
| 68 | + def _generate( |
| 69 | + self, |
| 70 | + messages: list[BaseMessage], |
| 71 | + stop: list[str] | None = None, |
| 72 | + run_manager: CallbackManagerForLLMRun | None = None, |
| 73 | + **kwargs: Any, |
| 74 | + ) -> ChatResult: |
| 75 | + return super()._generate( |
| 76 | + _sanitize_messages(messages), stop, run_manager, **kwargs |
| 77 | + ) |
| 78 | + |
| 79 | + async def _astream( |
| 80 | + self, |
| 81 | + messages: list[BaseMessage], |
| 82 | + stop: list[str] | None = None, |
| 83 | + run_manager: AsyncCallbackManagerForLLMRun | None = None, |
| 84 | + **kwargs: Any, |
| 85 | + ) -> AsyncIterator[ChatGenerationChunk]: |
| 86 | + async for chunk in super()._astream( |
| 87 | + _sanitize_messages(messages), stop, run_manager, **kwargs |
| 88 | + ): |
| 89 | + yield chunk |
| 90 | + |
| 91 | + |
30 | 92 | # Provider mapping for LiteLLM model string construction |
31 | 93 | PROVIDER_MAP = { |
32 | 94 | "OPENAI": "openai", |
@@ -252,6 +314,28 @@ def load_llm_config_from_yaml(llm_config_id: int = -1) -> dict | None: |
252 | 314 | return None |
253 | 315 |
|
254 | 316 |
|
| 317 | +def load_global_llm_config_by_id(llm_config_id: int) -> dict | None: |
| 318 | + """ |
| 319 | + Load a global LLM config by ID, checking in-memory configs first. |
| 320 | +
|
| 321 | + This handles both static YAML configs and dynamically injected configs |
| 322 | + (e.g. OpenRouter integration models that only exist in memory). |
| 323 | +
|
| 324 | + Args: |
| 325 | + llm_config_id: The negative ID of the global config to load |
| 326 | +
|
| 327 | + Returns: |
| 328 | + LLM config dict or None if not found |
| 329 | + """ |
| 330 | + from app.config import config as app_config |
| 331 | + |
| 332 | + for cfg in app_config.GLOBAL_LLM_CONFIGS: |
| 333 | + if cfg.get("id") == llm_config_id: |
| 334 | + return cfg |
| 335 | + # Fallback to YAML file read (covers edge cases like hot-reload) |
| 336 | + return load_llm_config_from_yaml(llm_config_id) |
| 337 | + |
| 338 | + |
255 | 339 | async def load_new_llm_config_from_db( |
256 | 340 | session: AsyncSession, |
257 | 341 | config_id: int, |
@@ -359,7 +443,13 @@ async def load_agent_config( |
359 | 443 | return AgentConfig.from_auto_mode() |
360 | 444 |
|
361 | 445 | if config_id < 0: |
362 | | - # Load from YAML (global configs have negative IDs) |
| 446 | + # Check in-memory configs first (includes static YAML + dynamic OpenRouter) |
| 447 | + from app.config import config as app_config |
| 448 | + |
| 449 | + for cfg in app_config.GLOBAL_LLM_CONFIGS: |
| 450 | + if cfg.get("id") == config_id: |
| 451 | + return AgentConfig.from_yaml_config(cfg) |
| 452 | + # Fallback to YAML file read for safety |
363 | 453 | yaml_config = load_llm_config_from_yaml(config_id) |
364 | 454 | if yaml_config: |
365 | 455 | return AgentConfig.from_yaml_config(yaml_config) |
@@ -402,7 +492,7 @@ def create_chat_litellm_from_config(llm_config: dict) -> ChatLiteLLM | None: |
402 | 492 | if llm_config.get("litellm_params"): |
403 | 493 | litellm_kwargs.update(llm_config["litellm_params"]) |
404 | 494 |
|
405 | | - llm = ChatLiteLLM(**litellm_kwargs) |
| 495 | + llm = SanitizedChatLiteLLM(**litellm_kwargs) |
406 | 496 | _attach_model_profile(llm, model_string) |
407 | 497 | return llm |
408 | 498 |
|
@@ -457,6 +547,6 @@ def create_chat_litellm_from_agent_config( |
457 | 547 | if agent_config.litellm_params: |
458 | 548 | litellm_kwargs.update(agent_config.litellm_params) |
459 | 549 |
|
460 | | - llm = ChatLiteLLM(**litellm_kwargs) |
| 550 | + llm = SanitizedChatLiteLLM(**litellm_kwargs) |
461 | 551 | _attach_model_profile(llm, model_string) |
462 | 552 | return llm |
0 commit comments