feat: enable language code for audio transcription config in Live API for Vertex AI

yyyu-google · copybara-github · commit c04be0db2b65 · 2026-03-11T17:09:49.000-07:00
PiperOrigin-RevId: 882272902
diff --git a/google/genai/_live_converters.py b/google/genai/_live_converters.py
@@ -23,6 +23,17 @@
 from ._common import set_value_by_path as setv
 
 
+def _AudioTranscriptionConfig_to_mldev(
+    from_object: Union[dict[str, Any], object],
+    parent_object: Optional[dict[str, Any]] = None,
+) -> dict[str, Any]:
+  to_object: dict[str, Any] = {}
+  if getv(from_object, ['language_codes']) is not None:
+    raise ValueError('language_codes parameter is not supported in Gemini API.')
+
+  return to_object
+
+
 def _AuthConfig_to_mldev(
     from_object: Union[dict[str, Any], object],
     parent_object: Optional[dict[str, Any]] = None,
@@ -556,14 +567,18 @@ def _LiveClientSetup_to_mldev(
     setv(
         to_object,
         ['inputAudioTranscription'],
-        getv(from_object, ['input_audio_transcription']),
+        _AudioTranscriptionConfig_to_mldev(
+            getv(from_object, ['input_audio_transcription']), to_object
+        ),
     )
 
   if getv(from_object, ['output_audio_transcription']) is not None:
     setv(
         to_object,
         ['outputAudioTranscription'],
-        getv(from_object, ['output_audio_transcription']),
+        _AudioTranscriptionConfig_to_mldev(
+            getv(from_object, ['output_audio_transcription']), to_object
+        ),
     )
 
   if getv(from_object, ['proactivity']) is not None:
@@ -769,14 +784,18 @@ def _LiveConnectConfig_to_mldev(
     setv(
         parent_object,
         ['setup', 'inputAudioTranscription'],
-        getv(from_object, ['input_audio_transcription']),
+        _AudioTranscriptionConfig_to_mldev(
+            getv(from_object, ['input_audio_transcription']), to_object
+        ),
     )
 
   if getv(from_object, ['output_audio_transcription']) is not None:
     setv(
         parent_object,
         ['setup', 'outputAudioTranscription'],
-        getv(from_object, ['output_audio_transcription']),
+        _AudioTranscriptionConfig_to_mldev(
+            getv(from_object, ['output_audio_transcription']), to_object
+        ),
     )
 
   if getv(from_object, ['realtime_input_config']) is not None:
diff --git a/google/genai/_tokens_converters.py b/google/genai/_tokens_converters.py
@@ -23,6 +23,17 @@
 from ._common import set_value_by_path as setv
 
 
+def _AudioTranscriptionConfig_to_mldev(
+    from_object: Union[dict[str, Any], object],
+    parent_object: Optional[dict[str, Any]] = None,
+) -> dict[str, Any]:
+  to_object: dict[str, Any] = {}
+  if getv(from_object, ['language_codes']) is not None:
+    raise ValueError('language_codes parameter is not supported in Gemini API.')
+
+  return to_object
+
+
 def _AuthConfig_to_mldev(
     from_object: Union[dict[str, Any], object],
     parent_object: Optional[dict[str, Any]] = None,
@@ -365,14 +376,18 @@ def _LiveConnectConfig_to_mldev(
     setv(
         parent_object,
         ['setup', 'inputAudioTranscription'],
-        getv(from_object, ['input_audio_transcription']),
+        _AudioTranscriptionConfig_to_mldev(
+            getv(from_object, ['input_audio_transcription']), to_object
+        ),
     )
 
   if getv(from_object, ['output_audio_transcription']) is not None:
     setv(
         parent_object,
         ['setup', 'outputAudioTranscription'],
-        getv(from_object, ['output_audio_transcription']),
+        _AudioTranscriptionConfig_to_mldev(
+            getv(from_object, ['output_audio_transcription']), to_object
+        ),
     )
 
   if getv(from_object, ['realtime_input_config']) is not None:
diff --git a/google/genai/types.py b/google/genai/types.py
@@ -17996,13 +17996,19 @@ class ContextWindowCompressionConfigDict(TypedDict, total=False):
 class AudioTranscriptionConfig(_common.BaseModel):
   """The audio transcription configuration in Setup."""
 
-  pass
+  language_codes: Optional[list[str]] = Field(
+      default=None,
+      description="""The language codes of the audio. BCP-47 language code. If not set, the transcription will be in the language detected by the model. If set, the server will use the language code specified in the model config as a hint for the language of the audio
+      """,
+  )
 
 
 class AudioTranscriptionConfigDict(TypedDict, total=False):
   """The audio transcription configuration in Setup."""
 
-  pass
+  language_codes: Optional[list[str]]
+  """The language codes of the audio. BCP-47 language code. If not set, the transcription will be in the language detected by the model. If set, the server will use the language code specified in the model config as a hint for the language of the audio
+      """
 
 
 AudioTranscriptionConfigOrDict = Union[