Skip to content

Commit c04be0d

Browse files
yyyu-googlecopybara-github
authored andcommitted
feat: enable language code for audio transcription config in Live API for Vertex AI
PiperOrigin-RevId: 882272902
1 parent e6fe71a commit c04be0d

3 files changed

Lines changed: 48 additions & 8 deletions

File tree

google/genai/_live_converters.py

Lines changed: 23 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,17 @@
2323
from ._common import set_value_by_path as setv
2424

2525

26+
def _AudioTranscriptionConfig_to_mldev(
27+
from_object: Union[dict[str, Any], object],
28+
parent_object: Optional[dict[str, Any]] = None,
29+
) -> dict[str, Any]:
30+
to_object: dict[str, Any] = {}
31+
if getv(from_object, ['language_codes']) is not None:
32+
raise ValueError('language_codes parameter is not supported in Gemini API.')
33+
34+
return to_object
35+
36+
2637
def _AuthConfig_to_mldev(
2738
from_object: Union[dict[str, Any], object],
2839
parent_object: Optional[dict[str, Any]] = None,
@@ -556,14 +567,18 @@ def _LiveClientSetup_to_mldev(
556567
setv(
557568
to_object,
558569
['inputAudioTranscription'],
559-
getv(from_object, ['input_audio_transcription']),
570+
_AudioTranscriptionConfig_to_mldev(
571+
getv(from_object, ['input_audio_transcription']), to_object
572+
),
560573
)
561574

562575
if getv(from_object, ['output_audio_transcription']) is not None:
563576
setv(
564577
to_object,
565578
['outputAudioTranscription'],
566-
getv(from_object, ['output_audio_transcription']),
579+
_AudioTranscriptionConfig_to_mldev(
580+
getv(from_object, ['output_audio_transcription']), to_object
581+
),
567582
)
568583

569584
if getv(from_object, ['proactivity']) is not None:
@@ -769,14 +784,18 @@ def _LiveConnectConfig_to_mldev(
769784
setv(
770785
parent_object,
771786
['setup', 'inputAudioTranscription'],
772-
getv(from_object, ['input_audio_transcription']),
787+
_AudioTranscriptionConfig_to_mldev(
788+
getv(from_object, ['input_audio_transcription']), to_object
789+
),
773790
)
774791

775792
if getv(from_object, ['output_audio_transcription']) is not None:
776793
setv(
777794
parent_object,
778795
['setup', 'outputAudioTranscription'],
779-
getv(from_object, ['output_audio_transcription']),
796+
_AudioTranscriptionConfig_to_mldev(
797+
getv(from_object, ['output_audio_transcription']), to_object
798+
),
780799
)
781800

782801
if getv(from_object, ['realtime_input_config']) is not None:

google/genai/_tokens_converters.py

Lines changed: 17 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,17 @@
2323
from ._common import set_value_by_path as setv
2424

2525

26+
def _AudioTranscriptionConfig_to_mldev(
27+
from_object: Union[dict[str, Any], object],
28+
parent_object: Optional[dict[str, Any]] = None,
29+
) -> dict[str, Any]:
30+
to_object: dict[str, Any] = {}
31+
if getv(from_object, ['language_codes']) is not None:
32+
raise ValueError('language_codes parameter is not supported in Gemini API.')
33+
34+
return to_object
35+
36+
2637
def _AuthConfig_to_mldev(
2738
from_object: Union[dict[str, Any], object],
2839
parent_object: Optional[dict[str, Any]] = None,
@@ -365,14 +376,18 @@ def _LiveConnectConfig_to_mldev(
365376
setv(
366377
parent_object,
367378
['setup', 'inputAudioTranscription'],
368-
getv(from_object, ['input_audio_transcription']),
379+
_AudioTranscriptionConfig_to_mldev(
380+
getv(from_object, ['input_audio_transcription']), to_object
381+
),
369382
)
370383

371384
if getv(from_object, ['output_audio_transcription']) is not None:
372385
setv(
373386
parent_object,
374387
['setup', 'outputAudioTranscription'],
375-
getv(from_object, ['output_audio_transcription']),
388+
_AudioTranscriptionConfig_to_mldev(
389+
getv(from_object, ['output_audio_transcription']), to_object
390+
),
376391
)
377392

378393
if getv(from_object, ['realtime_input_config']) is not None:

google/genai/types.py

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17996,13 +17996,19 @@ class ContextWindowCompressionConfigDict(TypedDict, total=False):
1799617996
class AudioTranscriptionConfig(_common.BaseModel):
1799717997
"""The audio transcription configuration in Setup."""
1799817998

17999-
pass
17999+
language_codes: Optional[list[str]] = Field(
18000+
default=None,
18001+
description="""The language codes of the audio. BCP-47 language code. If not set, the transcription will be in the language detected by the model. If set, the server will use the language code specified in the model config as a hint for the language of the audio
18002+
""",
18003+
)
1800018004

1800118005

1800218006
class AudioTranscriptionConfigDict(TypedDict, total=False):
1800318007
"""The audio transcription configuration in Setup."""
1800418008

18005-
pass
18009+
language_codes: Optional[list[str]]
18010+
"""The language codes of the audio. BCP-47 language code. If not set, the transcription will be in the language detected by the model. If set, the server will use the language code specified in the model config as a hint for the language of the audio
18011+
"""
1800618012

1800718013

1800818014
AudioTranscriptionConfigOrDict = Union[

0 commit comments

Comments
 (0)