feat: use passthrough API for files

ionut-mihalache-uipath · ionut-mihalache-uipath · commit bd4c69435bc5 · 2025-12-22T18:00:57.000+02:00
diff --git a/src/uipath_langchain/agent/react/file_type_handler.py b/src/uipath_langchain/agent/react/file_type_handler.py
@@ -0,0 +1,123 @@
+import base64
+from enum import StrEnum
+from typing import Any
+
+import httpx
+from uipath._utils._ssl_context import get_httpx_client_kwargs
+
+IMAGE_MIME_TYPES: set[str] = {
+    "image/png",
+    "image/jpeg",
+    "image/gif",
+    "image/webp",
+}
+
+
+class LlmProvider(StrEnum):
+    OPENAI = "openai"
+    BEDROCK = "bedrock"
+    VERTEX = "vertex"
+    UNKNOWN = "unknown"
+
+
+def is_pdf(mime_type: str) -> bool:
+    """Check if the MIME type represents a PDF document."""
+    return mime_type.lower() == "application/pdf"
+
+
+def is_image(mime_type: str) -> bool:
+    """Check if the MIME type represents a supported image format (PNG, JPEG, GIF, WebP)."""
+    return mime_type.lower() in IMAGE_MIME_TYPES
+
+
+def detect_provider(model_name: str) -> LlmProvider:
+    """Detect the LLM provider (Bedrock, OpenAI, or Vertex) based on the model name."""
+    if not model_name:
+        raise ValueError(f"Unsupported model: {model_name}")
+
+    model_lower = model_name.lower()
+
+    if "anthropic" in model_lower or "claude" in model_lower:
+        return LlmProvider.BEDROCK
+
+    if "gpt" in model_lower:
+        return LlmProvider.OPENAI
+
+    if "gemini" in model_lower:
+        return LlmProvider.VERTEX
+
+    raise ValueError(f"Unsupported model: {model_name}")
+
+
+async def _download_file(url: str) -> str:
+    """Download a file from a URL and return its content as a base64 string."""
+    async with httpx.AsyncClient(**get_httpx_client_kwargs()) as client:
+        response = await client.get(url)
+        response.raise_for_status()
+        file_content = response.content
+
+    return base64.b64encode(file_content).decode("utf-8")
+
+
+async def build_message_content_part_from_data(
+    url: str,
+    filename: str,
+    mime_type: str,
+    model: str,
+) -> dict[str, Any]:
+    """Download a file and build a provider-specific message content part.
+
+    The format varies based on the detected provider (Bedrock, OpenAI, or Vertex).
+    """
+    provider = detect_provider(model)
+
+    if provider == LlmProvider.BEDROCK:
+        raise ValueError("Anthropic models are not yet supported for file attachments")
+
+    if provider == LlmProvider.OPENAI:
+        return await _build_openai_content_part_from_data(
+            url, mime_type, filename, False
+        )
+
+    if provider == LlmProvider.VERTEX:
+        raise ValueError("Gemini models are not yet supported for file attachments")
+
+    raise ValueError(f"Unsupported provider: {provider}")
+
+
+async def _build_openai_content_part_from_data(
+    url: str,
+    mime_type: str,
+    filename: str,
+    download_image: bool,
+) -> dict[str, Any]:
+    """Build a content part for OpenAI models (base64-encoded or URL reference)."""
+    if download_image:
+        base64_content = await _download_file(url)
+        if is_image(mime_type):
+            data_url = f"data:{mime_type};base64,{base64_content}"
+            return {
+                "type": "input_image",
+                "image_url": data_url,
+            }
+
+        if is_pdf(mime_type):
+            return {
+                "type": "input_file",
+                "filename": filename,
+                "file_data": base64_content,
+            }
+
+    elif is_image(mime_type):
+        return {
+            "type": "input_image",
+            "image_url": url,
+        }
+
+    elif is_pdf(mime_type):
+        return {
+            "type": "input_file",
+            "file_url": url,
+        }
+
+    raise ValueError(f"Unsupported mime_type: {mime_type}")
diff --git a/src/uipath_langchain/agent/react/llm_with_files.py b/src/uipath_langchain/agent/react/llm_with_files.py
@@ -0,0 +1,76 @@
+"""LLM invocation with file attachments support."""
+
+from dataclasses import dataclass
+from typing import Any
+
+from langchain_core.language_models import BaseChatModel
+from langchain_core.messages import AIMessage, AnyMessage, HumanMessage
+
+from .file_type_handler import build_message_content_part_from_data
+
+
+@dataclass
+class FileInfo:
+    """File information for LLM file attachments."""
+
+    url: str
+    name: str
+    mime_type: str
+
+
+def _get_model_name(model: BaseChatModel) -> str:
+    """Extract model name from a BaseChatModel instance."""
+    for attr in ["model_name", "_model_name", "model", "model_id"]:
+        value = getattr(model, attr, None)
+        if value and isinstance(value, str):
+            return value
+    raise ValueError(f"Model name not found in model {model}")
+
+
+async def create_part_for_file(
+    file_info: FileInfo,
+    model: BaseChatModel,
+) -> dict[str, Any]:
+    """Create a provider-specific message content part for a file attachment.
+
+    Downloads the file from file_info.url and formats it for the model's provider.
+    """
+    model_name = _get_model_name(model)
+    return await build_message_content_part_from_data(
+        url=file_info.url,
+        filename=file_info.name,
+        mime_type=file_info.mime_type,
+        model=model_name,
+    )
+
+
+async def llm_call_with_files(
+    messages: list[AnyMessage],
+    files: list[FileInfo],
+    model: BaseChatModel,
+) -> AIMessage:
+    """Invoke an LLM with file attachments.
+
+    Downloads files, creates provider-specific content parts, and appends them
+    as a HumanMessage. If no files are provided, equivalent to model.ainvoke().
+    """
+    if not files:
+        response = await model.ainvoke(messages)
+        if not isinstance(response, AIMessage):
+            raise TypeError(
+                f"LLM returned {type(response).__name__} instead of AIMessage"
+            )
+        return response
+
+    content_parts: list[str | dict[Any, Any]] = []
+    for file_info in files:
+        content_part = await create_part_for_file(file_info, model)
+        content_parts.append(content_part)
+
+    file_message = HumanMessage(content=content_parts)
+    all_messages = list(messages) + [file_message]
+
+    response = await model.ainvoke(all_messages)
+    if not isinstance(response, AIMessage):
+        raise TypeError(f"LLM returned {type(response).__name__} instead of AIMessage")
+    return response