Skip to content

Commit 2f0d88b

Browse files
committed
fix: raise ValueError when MIME type cannot be determined for file_uri
When file_data.file_uri has no explicit MIME type and no recognizable extension, _get_content() previously fell back silently to application/octet-stream via _DEFAULT_MIME_TYPE. This broke providers that validate MIME types (e.g. Vertex AI for GCS URIs) without warning. Changes: - Raise ValueError with actionable guidance when MIME type cannot be determined and a file block would be constructed. Provider-specific text fallback paths (anthropic, vertex_ai non-gemini, etc.) still work without requiring MIME. - Propagate model= in the recursive _content_to_message_param() call for mixed function_response + file content, fixing incorrect provider detection on Vertex AI / Gemini. Fixes: #5184
1 parent 0fedb3b commit 2f0d88b

2 files changed

Lines changed: 130 additions & 46 deletions

File tree

src/google/adk/models/lite_llm.py

Lines changed: 12 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -811,6 +811,7 @@ async def _content_to_message_param(
811811
follow_up = await _content_to_message_param(
812812
types.Content(role=content.role, parts=non_tool_parts),
813813
provider=provider,
814+
model=model,
814815
)
815816
follow_up_messages = (
816817
follow_up if isinstance(follow_up, list) else [follow_up]
@@ -1081,24 +1082,17 @@ async def _get_content(
10811082
})
10821083
continue
10831084

1084-
# Determine MIME type: use explicit value, infer from URI, or use default.
1085+
# Determine MIME type: use explicit value, infer from URI, or fail.
10851086
mime_type = part.file_data.mime_type
10861087
if not mime_type:
10871088
mime_type = _infer_mime_type_from_uri(part.file_data.file_uri)
10881089
if not mime_type and part.file_data.display_name:
10891090
guessed_mime_type, _ = mimetypes.guess_type(part.file_data.display_name)
10901091
mime_type = guessed_mime_type
1091-
if not mime_type:
1092-
# LiteLLM's Vertex AI backend requires format for GCS URIs.
1093-
mime_type = _DEFAULT_MIME_TYPE
1094-
logger.debug(
1095-
"Could not determine MIME type for file_uri %s, using default: %s",
1096-
part.file_data.file_uri,
1097-
mime_type,
1098-
)
1099-
mime_type = _normalize_mime_type(mime_type)
1092+
if mime_type:
1093+
mime_type = _normalize_mime_type(mime_type)
11001094

1101-
if provider in _FILE_ID_REQUIRED_PROVIDERS and _is_http_url(
1095+
if mime_type and provider in _FILE_ID_REQUIRED_PROVIDERS and _is_http_url(
11021096
part.file_data.file_uri
11031097
):
11041098
url_content_type = _media_url_content_type(mime_type)
@@ -1125,6 +1119,13 @@ async def _get_content(
11251119
})
11261120
continue
11271121

1122+
if not mime_type:
1123+
raise ValueError(
1124+
f"Cannot determine MIME type for file_uri"
1125+
f" '{part.file_data.file_uri}'. Set file_data.mime_type"
1126+
f" explicitly."
1127+
)
1128+
11281129
file_object: ChatCompletionFileUrlObject = {
11291130
"file_id": part.file_data.file_uri,
11301131
}

tests/unittests/models/test_litellm.py

Lines changed: 118 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -1882,14 +1882,13 @@ async def test_content_to_message_param_user_message_file_uri_only(
18821882

18831883
@pytest.mark.asyncio
18841884
async def test_content_to_message_param_user_message_file_uri_without_mime_type():
1885-
"""Test handling of file_data without mime_type (GcsArtifactService scenario).
1885+
"""Test that file_data without determinable MIME type raises ValueError.
18861886
18871887
When using GcsArtifactService, artifacts may have file_uri (gs://...) but
1888-
without mime_type set. LiteLLM's Vertex AI backend requires the format
1889-
field to be present, so we infer MIME type from the URI extension or use
1890-
a default fallback to ensure compatibility.
1888+
without mime_type set and no recognizable extension. A clear error should
1889+
be raised so the developer can set file_data.mime_type explicitly.
18911890
1892-
See: https://github.com/google/adk-python/issues/3787
1891+
See: https://github.com/google/adk-python/issues/5184
18931892
"""
18941893
file_part = types.Part(
18951894
file_data=types.FileData(
@@ -1904,22 +1903,8 @@ async def test_content_to_message_param_user_message_file_uri_without_mime_type(
19041903
],
19051904
)
19061905

1907-
message = await _content_to_message_param(content)
1908-
assert message == {
1909-
"role": "user",
1910-
"content": [
1911-
{"type": "text", "text": "Analyze this file."},
1912-
{
1913-
"type": "file",
1914-
"file": {
1915-
"file_id": (
1916-
"gs://agent-artifact-bucket/app/user/session/artifact/0"
1917-
),
1918-
"format": "application/octet-stream",
1919-
},
1920-
},
1921-
],
1922-
}
1906+
with pytest.raises(ValueError, match="Cannot determine MIME type"):
1907+
await _content_to_message_param(content)
19231908

19241909

19251910
@pytest.mark.asyncio
@@ -3097,27 +3082,125 @@ async def test_get_content_file_uri_infers_from_display_name():
30973082

30983083
@pytest.mark.asyncio
30993084
async def test_get_content_file_uri_default_mime_type():
3100-
"""Test that file_uri without extension uses default MIME type.
3085+
"""Test that file_uri without extension raises ValueError.
31013086
31023087
When file_data has a file_uri without a recognizable extension and no explicit
3103-
mime_type, a default MIME type should be used to ensure compatibility with
3104-
LiteLLM backends.
3088+
mime_type, a ValueError should be raised to prevent silent misconfiguration.
31053089
3106-
See: https://github.com/google/adk-python/issues/3787
3090+
See: https://github.com/google/adk-python/issues/5184
31073091
"""
3108-
# Use Part constructor directly to create file_data without mime_type
3109-
# (types.Part.from_uri requires a valid mime_type when it can't infer)
31103092
parts = [
31113093
types.Part(file_data=types.FileData(file_uri="gs://bucket/artifact/0"))
31123094
]
3113-
content = await _get_content(parts)
3114-
assert content[0] == {
3115-
"type": "file",
3116-
"file": {
3117-
"file_id": "gs://bucket/artifact/0",
3118-
"format": "application/octet-stream",
3119-
},
3120-
}
3095+
with pytest.raises(ValueError, match="Cannot determine MIME type"):
3096+
await _get_content(parts)
3097+
3098+
3099+
@pytest.mark.asyncio
3100+
async def test_get_content_file_uri_no_mime_text_fallback_still_works():
3101+
"""Text fallback for unsupported providers works without MIME type.
3102+
3103+
When a provider requires text fallback (e.g., anthropic), file_data
3104+
without a determinable MIME type should produce a text reference
3105+
rather than raising a ValueError.
3106+
3107+
See: https://github.com/google/adk-python/issues/5184
3108+
"""
3109+
parts = [
3110+
types.Part(
3111+
file_data=types.FileData(
3112+
file_uri="gs://bucket/artifact/0",
3113+
display_name="my_artifact",
3114+
)
3115+
)
3116+
]
3117+
content = await _get_content(parts, provider="anthropic")
3118+
assert content == [
3119+
{"type": "text", "text": '[File reference: "my_artifact"]'},
3120+
]
3121+
3122+
3123+
@pytest.mark.asyncio
3124+
async def test_content_to_message_param_recursive_model_propagation():
3125+
"""Recursive _content_to_message_param calls propagate model parameter.
3126+
3127+
When a Content has mixed function_response + file parts, the recursive
3128+
call for non-tool parts must forward model= so provider-specific behavior
3129+
(e.g., Vertex AI Gemini file block support) works correctly.
3130+
3131+
See: https://github.com/google/adk-python/issues/5184
3132+
"""
3133+
tool_part = types.Part.from_function_response(
3134+
name="fetch_file",
3135+
response={"status": "ok"},
3136+
)
3137+
tool_part.function_response.id = "call_1"
3138+
3139+
file_part = types.Part(
3140+
file_data=types.FileData(
3141+
file_uri="gs://bucket/data.csv",
3142+
mime_type="text/csv",
3143+
)
3144+
)
3145+
3146+
content = types.Content(
3147+
role="user",
3148+
parts=[tool_part, file_part],
3149+
)
3150+
3151+
# vertex_ai + gemini model should keep the file block (not text fallback)
3152+
messages = await _content_to_message_param(
3153+
content, provider="vertex_ai", model="vertex_ai/gemini-1.5-pro"
3154+
)
3155+
assert isinstance(messages, list)
3156+
assert len(messages) == 2
3157+
assert messages[0]["role"] == "tool"
3158+
# The follow-up user message must contain a file block, not text fallback
3159+
user_msg = messages[1]
3160+
assert user_msg["role"] == "user"
3161+
file_content = user_msg["content"]
3162+
assert any(
3163+
item.get("type") == "file" for item in file_content
3164+
), "file block expected when model is propagated for vertex_ai/gemini"
3165+
3166+
3167+
@pytest.mark.asyncio
3168+
async def test_content_to_message_param_recursive_model_propagation_fallback():
3169+
"""Without model propagation, vertex_ai non-gemini would use text fallback.
3170+
3171+
Verify that vertex_ai with a non-gemini model correctly falls back to text.
3172+
"""
3173+
tool_part = types.Part.from_function_response(
3174+
name="fetch_file",
3175+
response={"status": "ok"},
3176+
)
3177+
tool_part.function_response.id = "call_1"
3178+
3179+
file_part = types.Part(
3180+
file_data=types.FileData(
3181+
file_uri="gs://bucket/data.csv",
3182+
mime_type="text/csv",
3183+
)
3184+
)
3185+
3186+
content = types.Content(
3187+
role="user",
3188+
parts=[tool_part, file_part],
3189+
)
3190+
3191+
# vertex_ai + non-gemini model should use text fallback
3192+
messages = await _content_to_message_param(
3193+
content, provider="vertex_ai", model="vertex_ai/claude-3-sonnet"
3194+
)
3195+
assert isinstance(messages, list)
3196+
user_msg = messages[1]
3197+
assert user_msg["role"] == "user"
3198+
file_content = user_msg["content"]
3199+
assert any(
3200+
item.get("type") == "text"
3201+
and "File reference" in item.get("text", "")
3202+
for item in file_content
3203+
), "text fallback expected for vertex_ai non-gemini model"
31213204

31223205

31233206
@pytest.mark.asyncio

0 commit comments

Comments
 (0)