Skip to content

Commit 4a51ccd

Browse files
committed
cloud: added openrouter integration with global configs
1 parent ff4e0f9 commit 4a51ccd

26 files changed

Lines changed: 911 additions & 178 deletions

surfsense_backend/.env.example

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -184,17 +184,17 @@ VIDEO_PRESENTATION_DEFAULT_DURATION_IN_FRAMES=300
184184
# (Optional) Maximum pages limit per user for ETL services (default: `999999999` for unlimited in OSS version)
185185
PAGES_LIMIT=500
186186

187-
# Premium token quota per registered user (default: 5,000,000)
187+
# Premium token quota per registered user (default: 3,000,000)
188188
# Applies only to models with billing_tier=premium in global_llm_config.yaml
189-
PREMIUM_TOKEN_LIMIT=5000000
189+
PREMIUM_TOKEN_LIMIT=3000000
190190

191191
# No-login (anonymous) mode — allows public users to chat without an account
192192
# Set TRUE to enable /free pages and anonymous chat API
193193
NOLOGIN_MODE_ENABLED=FALSE
194194
# Total tokens allowed per anonymous session before requiring account creation
195-
ANON_TOKEN_LIMIT=1000000
195+
ANON_TOKEN_LIMIT=500000
196196
# Token count at which the UI shows a soft warning
197-
ANON_TOKEN_WARNING_THRESHOLD=800000
197+
ANON_TOKEN_WARNING_THRESHOLD=400000
198198
# Days before anonymous quota tracking expires in Redis
199199
ANON_TOKEN_QUOTA_TTL_DAYS=30
200200
# Max document upload size for anonymous users (MB)

surfsense_backend/app/agents/new_chat/__init__.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,11 @@
2222
from .context import SurfSenseContextSchema
2323

2424
# LLM config
25-
from .llm_config import create_chat_litellm_from_config, load_llm_config_from_yaml
25+
from .llm_config import (
26+
create_chat_litellm_from_config,
27+
load_global_llm_config_by_id,
28+
load_llm_config_from_yaml,
29+
)
2630

2731
# Middleware
2832
from .middleware import (
@@ -81,6 +85,7 @@
8185
"get_all_tool_names",
8286
"get_default_enabled_tools",
8387
"get_tool_by_name",
88+
"load_global_llm_config_by_id",
8489
"load_llm_config_from_yaml",
8590
"search_knowledge_base_async",
8691
]

surfsense_backend/app/agents/new_chat/llm_config.py

Lines changed: 93 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -10,10 +10,18 @@
1010
managing prompt configurations.
1111
"""
1212

13+
from collections.abc import AsyncIterator
1314
from dataclasses import dataclass
1415
from pathlib import Path
16+
from typing import Any
1517

1618
import yaml
19+
from langchain_core.callbacks import (
20+
AsyncCallbackManagerForLLMRun,
21+
CallbackManagerForLLMRun,
22+
)
23+
from langchain_core.messages import AIMessage, BaseMessage
24+
from langchain_core.outputs import ChatGenerationChunk, ChatResult
1725
from langchain_litellm import ChatLiteLLM
1826
from litellm import get_model_info
1927
from sqlalchemy import select
@@ -23,10 +31,64 @@
2331
AUTO_MODE_ID,
2432
ChatLiteLLMRouter,
2533
LLMRouterService,
34+
_sanitize_content,
2635
get_auto_mode_llm,
2736
is_auto_mode,
2837
)
2938

39+
40+
def _sanitize_messages(messages: list[BaseMessage]) -> list[BaseMessage]:
41+
"""Sanitize content on every message so it is safe for any provider.
42+
43+
Handles three cross-provider incompatibilities:
44+
- List content with provider-specific blocks (e.g. ``thinking``)
45+
- List content with bare strings or empty text blocks
46+
- AI messages with empty content + tool calls: some providers (Bedrock)
47+
convert ``""`` to ``[{"type":"text","text":""}]`` server-side then
48+
reject the blank text. The OpenAI spec says ``content`` should be
49+
``null`` when an assistant message only carries tool calls.
50+
"""
51+
for msg in messages:
52+
if isinstance(msg.content, list):
53+
msg.content = _sanitize_content(msg.content)
54+
if (
55+
isinstance(msg, AIMessage)
56+
and (not msg.content or msg.content == "")
57+
and getattr(msg, "tool_calls", None)
58+
):
59+
msg.content = None # type: ignore[assignment]
60+
return messages
61+
62+
63+
class SanitizedChatLiteLLM(ChatLiteLLM):
64+
"""ChatLiteLLM subclass that strips provider-specific content blocks
65+
(e.g. ``thinking`` from reasoning models) and normalises bare strings
66+
in content arrays before forwarding to the underlying provider."""
67+
68+
def _generate(
69+
self,
70+
messages: list[BaseMessage],
71+
stop: list[str] | None = None,
72+
run_manager: CallbackManagerForLLMRun | None = None,
73+
**kwargs: Any,
74+
) -> ChatResult:
75+
return super()._generate(
76+
_sanitize_messages(messages), stop, run_manager, **kwargs
77+
)
78+
79+
async def _astream(
80+
self,
81+
messages: list[BaseMessage],
82+
stop: list[str] | None = None,
83+
run_manager: AsyncCallbackManagerForLLMRun | None = None,
84+
**kwargs: Any,
85+
) -> AsyncIterator[ChatGenerationChunk]:
86+
async for chunk in super()._astream(
87+
_sanitize_messages(messages), stop, run_manager, **kwargs
88+
):
89+
yield chunk
90+
91+
3092
# Provider mapping for LiteLLM model string construction
3193
PROVIDER_MAP = {
3294
"OPENAI": "openai",
@@ -252,6 +314,28 @@ def load_llm_config_from_yaml(llm_config_id: int = -1) -> dict | None:
252314
return None
253315

254316

317+
def load_global_llm_config_by_id(llm_config_id: int) -> dict | None:
318+
"""
319+
Load a global LLM config by ID, checking in-memory configs first.
320+
321+
This handles both static YAML configs and dynamically injected configs
322+
(e.g. OpenRouter integration models that only exist in memory).
323+
324+
Args:
325+
llm_config_id: The negative ID of the global config to load
326+
327+
Returns:
328+
LLM config dict or None if not found
329+
"""
330+
from app.config import config as app_config
331+
332+
for cfg in app_config.GLOBAL_LLM_CONFIGS:
333+
if cfg.get("id") == llm_config_id:
334+
return cfg
335+
# Fallback to YAML file read (covers edge cases like hot-reload)
336+
return load_llm_config_from_yaml(llm_config_id)
337+
338+
255339
async def load_new_llm_config_from_db(
256340
session: AsyncSession,
257341
config_id: int,
@@ -359,7 +443,13 @@ async def load_agent_config(
359443
return AgentConfig.from_auto_mode()
360444

361445
if config_id < 0:
362-
# Load from YAML (global configs have negative IDs)
446+
# Check in-memory configs first (includes static YAML + dynamic OpenRouter)
447+
from app.config import config as app_config
448+
449+
for cfg in app_config.GLOBAL_LLM_CONFIGS:
450+
if cfg.get("id") == config_id:
451+
return AgentConfig.from_yaml_config(cfg)
452+
# Fallback to YAML file read for safety
363453
yaml_config = load_llm_config_from_yaml(config_id)
364454
if yaml_config:
365455
return AgentConfig.from_yaml_config(yaml_config)
@@ -402,7 +492,7 @@ def create_chat_litellm_from_config(llm_config: dict) -> ChatLiteLLM | None:
402492
if llm_config.get("litellm_params"):
403493
litellm_kwargs.update(llm_config["litellm_params"])
404494

405-
llm = ChatLiteLLM(**litellm_kwargs)
495+
llm = SanitizedChatLiteLLM(**litellm_kwargs)
406496
_attach_model_profile(llm, model_string)
407497
return llm
408498

@@ -457,6 +547,6 @@ def create_chat_litellm_from_agent_config(
457547
if agent_config.litellm_params:
458548
litellm_kwargs.update(agent_config.litellm_params)
459549

460-
llm = ChatLiteLLM(**litellm_kwargs)
550+
llm = SanitizedChatLiteLLM(**litellm_kwargs)
461551
_attach_model_profile(llm, model_string)
462552
return llm

surfsense_backend/app/agents/podcaster/nodes.py

Lines changed: 9 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
from app.config import config as app_config
1414
from app.services.kokoro_tts_service import get_kokoro_tts_service
1515
from app.services.llm_service import get_agent_llm
16+
from app.utils.content_utils import extract_text_content, strip_markdown_fences
1617

1718
from .configuration import Configuration
1819
from .prompts import get_podcast_generation_prompt
@@ -53,43 +54,32 @@ async def create_podcast_transcript(
5354
# Generate the podcast transcript
5455
llm_response = await llm.ainvoke(messages)
5556

56-
# First try the direct approach
57+
# Reasoning models (e.g. Kimi K2.5) may return content as a list of
58+
# blocks including 'reasoning' entries. Normalise to a plain string.
59+
content = strip_markdown_fences(extract_text_content(llm_response.content))
60+
5761
try:
58-
podcast_transcript = PodcastTranscripts.model_validate(
59-
json.loads(llm_response.content)
60-
)
61-
except (json.JSONDecodeError, ValueError) as e:
62+
podcast_transcript = PodcastTranscripts.model_validate(json.loads(content))
63+
except (json.JSONDecodeError, TypeError, ValueError) as e:
6264
print(f"Direct JSON parsing failed, trying fallback approach: {e!s}")
6365

64-
# Fallback: Parse the JSON response manually
6566
try:
66-
# Extract JSON content from the response
67-
content = llm_response.content
68-
69-
# Find the JSON in the content (handle case where LLM might add additional text)
7067
json_start = content.find("{")
7168
json_end = content.rfind("}") + 1
7269
if json_start >= 0 and json_end > json_start:
7370
json_str = content[json_start:json_end]
74-
75-
# Parse the JSON string
7671
parsed_data = json.loads(json_str)
77-
78-
# Convert to Pydantic model
7972
podcast_transcript = PodcastTranscripts.model_validate(parsed_data)
80-
8173
print("Successfully parsed podcast transcript using fallback approach")
8274
else:
83-
# If JSON structure not found, raise a clear error
8475
error_message = f"Could not find valid JSON in LLM response. Raw response: {content}"
8576
print(error_message)
8677
raise ValueError(error_message)
8778

88-
except (json.JSONDecodeError, ValueError) as e2:
89-
# Log the error and re-raise it
79+
except (json.JSONDecodeError, TypeError, ValueError) as e2:
9080
error_message = f"Error parsing LLM response (fallback also failed): {e2!s}"
9181
print(f"Error parsing LLM response: {e2!s}")
92-
print(f"Raw response: {llm_response.content}")
82+
print(f"Raw response: {content}")
9383
raise
9484

9585
return {"podcast_transcript": podcast_transcript.podcast_transcripts}

surfsense_backend/app/agents/video_presentation/nodes.py

Lines changed: 13 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
from app.config import config as app_config
1717
from app.services.kokoro_tts_service import get_kokoro_tts_service
1818
from app.services.llm_service import get_agent_llm
19+
from app.utils.content_utils import extract_text_content, strip_markdown_fences
1920

2021
from .configuration import Configuration
2122
from .prompts import (
@@ -67,16 +68,14 @@ async def create_presentation_slides(
6768
]
6869

6970
llm_response = await llm.ainvoke(messages)
71+
content = strip_markdown_fences(extract_text_content(llm_response.content))
7072

7173
try:
72-
presentation = PresentationSlides.model_validate(
73-
json.loads(llm_response.content)
74-
)
75-
except (json.JSONDecodeError, ValueError) as e:
74+
presentation = PresentationSlides.model_validate(json.loads(content))
75+
except (json.JSONDecodeError, TypeError, ValueError) as e:
7676
print(f"Direct JSON parsing failed, trying fallback approach: {e!s}")
7777

7878
try:
79-
content = llm_response.content
8079
json_start = content.find("{")
8180
json_end = content.rfind("}") + 1
8281
if json_start >= 0 and json_end > json_start:
@@ -89,10 +88,10 @@ async def create_presentation_slides(
8988
print(error_message)
9089
raise ValueError(error_message)
9190

92-
except (json.JSONDecodeError, ValueError) as e2:
91+
except (json.JSONDecodeError, TypeError, ValueError) as e2:
9392
error_message = f"Error parsing LLM response (fallback also failed): {e2!s}"
9493
print(f"Error parsing LLM response: {e2!s}")
95-
print(f"Raw response: {llm_response.content}")
94+
print(f"Raw response: {content}")
9695
raise
9796

9897
return {"slides": presentation.slides}
@@ -308,12 +307,7 @@ async def _assign_themes_with_llm(
308307
]
309308
)
310309

311-
text = response.content.strip()
312-
if text.startswith("```"):
313-
lines = text.split("\n")
314-
text = "\n".join(
315-
line for line in lines if not line.strip().startswith("```")
316-
).strip()
310+
text = strip_markdown_fences(extract_text_content(response.content))
317311

318312
assignments = json.loads(text)
319313
valid_themes = set(THEME_PRESETS)
@@ -424,7 +418,9 @@ async def _generate_scene_for_slide(slide: SlideContent) -> SlideSceneCode:
424418
)
425419

426420
llm_response = await llm.ainvoke(messages)
427-
code, scene_title = _extract_code_and_title(llm_response.content)
421+
code, scene_title = _extract_code_and_title(
422+
extract_text_content(llm_response.content)
423+
)
428424

429425
code = await _refine_if_needed(llm, code, slide.slide_number)
430426

@@ -452,7 +448,7 @@ def _extract_code_and_title(content: str) -> tuple[str, str | None]:
452448
453449
Returns (code, title) where title may be None.
454450
"""
455-
text = content.strip()
451+
text = strip_markdown_fences(content)
456452

457453
if text.startswith("{"):
458454
try:
@@ -472,18 +468,7 @@ def _extract_code_and_title(content: str) -> tuple[str, str | None]:
472468
except (json.JSONDecodeError, ValueError):
473469
pass
474470

475-
code = text
476-
if code.startswith("```"):
477-
lines = code.split("\n")
478-
start = 1
479-
end = len(lines)
480-
for i in range(len(lines) - 1, 0, -1):
481-
if lines[i].strip().startswith("```"):
482-
end = i
483-
break
484-
code = "\n".join(lines[start:end]).strip()
485-
486-
return code, None
471+
return text, None
487472

488473

489474
async def _refine_if_needed(llm, code: str, slide_number: int) -> str:
@@ -512,7 +497,7 @@ async def _refine_if_needed(llm, code: str, slide_number: int) -> str:
512497
]
513498

514499
response = await llm.ainvoke(messages)
515-
code, _ = _extract_code_and_title(response.content)
500+
code, _ = _extract_code_and_title(extract_text_content(response.content))
516501

517502
error = _basic_syntax_check(code)
518503
if error is None:

0 commit comments

Comments
 (0)