From b73b40253b9a1e5e900530f0c841321c61bded06 Mon Sep 17 00:00:00 2001 From: Arkadiusz Malka Date: Tue, 3 Jun 2025 10:54:15 +0200 Subject: [PATCH] Add audio transcription settings to MultiModalLiveClient Updated MultiModalLiveClient to include properties for input and output audio transcription management. Modified the constructor to accept new parameters for these settings and adjusted setup configuration accordingly. Enhanced BidiGenerateContentSetup with new properties for audio transcription configuration. --- src/GenerativeAI.Live/Models/MultiModalLiveClient.cs | 9 +++++++++ .../Types/MultimodalLive/BidiGenerateContentSetup.cs | 8 +++++++- 2 files changed, 16 insertions(+), 1 deletion(-) diff --git a/src/GenerativeAI.Live/Models/MultiModalLiveClient.cs b/src/GenerativeAI.Live/Models/MultiModalLiveClient.cs index 712ab641..a85c5a4e 100644 --- a/src/GenerativeAI.Live/Models/MultiModalLiveClient.cs +++ b/src/GenerativeAI.Live/Models/MultiModalLiveClient.cs @@ -105,6 +105,10 @@ private async Task GetClient() /// public bool UseCodeExecutor { get; set; } = false; + public bool InputAudioTranscriptionEnabled { get; set; } = false; + + public bool OutputAudioTranscriptionEnabled { get; set; } = false; + #endregion #region Constructors @@ -115,6 +119,7 @@ private async Task GetClient() public MultiModalLiveClient(IPlatformAdapter platformAdapter, string modelName, GenerationConfig? config = null, ICollection? safetySettings = null, string? systemInstruction = null, + bool inputAudioTranscriptionEnabled = false, bool outputAudioTranscriptionEnabled = false, ILogger? logger = null) { _platformAdapter = platformAdapter ?? throw new ArgumentNullException(nameof(platformAdapter)); @@ -123,6 +128,8 @@ public MultiModalLiveClient(IPlatformAdapter platformAdapter, string modelName, { ResponseModalities = new List { Modality.TEXT } }; + InputAudioTranscriptionEnabled = inputAudioTranscriptionEnabled; + OutputAudioTranscriptionEnabled = outputAudioTranscriptionEnabled; SafetySettings = safetySettings; SystemInstruction = systemInstruction; _connectionId = Guid.NewGuid(); @@ -550,6 +557,8 @@ public async Task SendSetupAsync(CancellationToken cancellationToken = default) ? new Content(this.SystemInstruction, Roles.System) : null, Tools = tools.Count > 0 ? tools.ToArray() : null, + InputAudioTranscription = InputAudioTranscriptionEnabled ? new AudioTranscriptionConfig(): null, + OutputAudioTranscription = OutputAudioTranscriptionEnabled ? new AudioTranscriptionConfig() : null, }; await SendSetupAsync(setup, cancellationToken).ConfigureAwait(false); } diff --git a/src/GenerativeAI/Types/MultimodalLive/BidiGenerateContentSetup.cs b/src/GenerativeAI/Types/MultimodalLive/BidiGenerateContentSetup.cs index 5c5061c0..10b40bed 100644 --- a/src/GenerativeAI/Types/MultimodalLive/BidiGenerateContentSetup.cs +++ b/src/GenerativeAI/Types/MultimodalLive/BidiGenerateContentSetup.cs @@ -44,9 +44,15 @@ public class BidiGenerateContentSetup [JsonPropertyName("tools")] public Tool[]? Tools { get; set; } + /// + /// Configures output audio transcription settings. + /// [JsonPropertyName("outputAudioTranscription")] - public AudioTranscriptionConfig? OutputAudioTranscription { get; set; } + public AudioTranscriptionConfig? OutputAudioTranscription { get; set; } + /// + /// Configures input audio transcription settings. + /// [JsonPropertyName("inputAudioTranscription")] public AudioTranscriptionConfig? InputAudioTranscription { get; set; } ///