Skip to content

Commit d4d129f

Browse files
feat!: sdk regeneration 2026-04-14 (#690)
## Summary - Fern SDK regeneration with latest API specs from deepgram-docs - Re-applied manual patches (broad exception catch, optional send params, sanitize numeric types, construct_type keyword args, float→int type fixes) - Fixed examples for current API surface - New voice agent configurations and variables REST API - New WebSocket methods: send_update_think (agent), send_configure (listen v2) ## BREAKING CHANGE Removed public type exports due to upstream API spec restructuring: **Agent provider types restructured** (~280 types): - `AgentV1Settings*SpeakEndpoint*` and `AgentV1Settings*SpeakOneItem*` types replaced by shared provider schemas (`SpeakSettingsV1Provider_*`) - `AgentV1UpdateSpeak*` types restructured similarly - `AgentV1Settings*ThinkOneItem*` types replaced by shared `ThinkSettingsV1` schemas **Type enums inlined to Literal strings** (42 types): - `ListenV1MetadataType`, `ListenV1ResultsType`, `ListenV1SpeechStartedType`, `ListenV1UtteranceEndType` - `ListenV2ConnectedType`, `ListenV2FatalErrorType`, `ListenV2TurnInfoType` - `SpeakV1MetadataType`, `SpeakV1TextType`, `SpeakV1WarningType` - `AgentV1*Type` enums (29 types) **Other removed types** (9 types): - `AgentThinkModelsV1ResponseModelsItem*Provider` (6 types) - `MediaTranscribeRequest*Zero` (3 types) **Not breaking:** - No client methods removed - No method signatures changed - Core APIs (listen, speak, manage, read) unchanged - DeepgramClient, transcribe_url, transcribe_file, audio.generate all work identically ## Test plan - [x] Unit tests: 147 passed, 1 skipped, 0 failed - [x] Integration tests: 32/32 passed - [x] Examples: all passing - [x] 6 SDK-based starters validated against local branch --------- Co-authored-by: fern-api <115122769+fern-api[bot]@users.noreply.github.com>
1 parent 8c349ac commit d4d129f

394 files changed

Lines changed: 5289 additions & 6406 deletions

File tree

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

.fern/metadata.json

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
{
2-
"cliVersion": "4.46.0",
2+
"cliVersion": "4.67.1",
33
"generatorName": "fernapi/fern-python-sdk",
44
"generatorVersion": "4.62.0",
55
"generatorConfig": {
@@ -16,6 +16,6 @@
1616
"skip_validation": true
1717
}
1818
},
19-
"originGitCommit": "879c76c78827f323e425c1640f76a6e50d6c68d3",
20-
"sdkVersion": "6.0.2"
19+
"originGitCommit": "aa8e0677bcaea82c02a5934c61d195b35921b33d",
20+
"sdkVersion": "6.1.2"
2121
}

context7.json

Lines changed: 0 additions & 4 deletions
This file was deleted.

examples/13-transcription-live-websocket.py

Lines changed: 27 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,12 @@
22
Example: Live Transcription with WebSocket (Listen V1)
33
44
This example shows how to stream audio for real-time transcription using WebSocket.
5+
It streams a pre-recorded audio file in chunks to simulate a live microphone feed.
56
"""
67

8+
import os
9+
import threading
10+
import time
711
from typing import Union
812

913
from dotenv import load_dotenv
@@ -23,33 +27,46 @@
2327

2428
client = DeepgramClient()
2529

30+
# Audio chunking: simulate real-time streaming by sending 1-second chunks
31+
sample_rate = 44100 # Hz (matches fixtures/audio.wav)
32+
chunk_size = sample_rate * 2 # 2 bytes per sample (linear16 PCM mono)
33+
chunk_delay = 1.0 # seconds between chunks
34+
35+
audio_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "fixtures", "audio.wav")
36+
2637
try:
2738
with client.listen.v1.connect(model="nova-3") as connection:
2839

2940
def on_message(message: ListenV1SocketClientResponse) -> None:
3041
msg_type = getattr(message, "type", "Unknown")
31-
print(f"Received {msg_type} event")
32-
33-
# Extract transcription from Results events
3442
if isinstance(message, ListenV1Results):
3543
if message.channel and message.channel.alternatives:
3644
transcript = message.channel.alternatives[0].transcript
3745
if transcript:
3846
print(f"Transcript: {transcript}")
47+
else:
48+
print(f"Received {msg_type} event")
3949

4050
connection.on(EventType.OPEN, lambda _: print("Connection opened"))
4151
connection.on(EventType.MESSAGE, on_message)
4252
connection.on(EventType.CLOSE, lambda _: print("Connection closed"))
4353
connection.on(EventType.ERROR, lambda error: print(f"Error: {error}"))
4454

45-
# Start listening - this blocks until the connection closes
46-
# In production, you would send audio data here:
47-
# audio_path = os.path.join(os.path.dirname(__file__), "..", "fixtures", "audio.wav")
48-
# with open(audio_path, "rb") as audio_file:
49-
# audio_data = audio_file.read()
50-
# connection.send_listen_v_1_media(audio_data)
55+
# Start listening in a background thread so we can send audio concurrently
56+
threading.Thread(target=connection.start_listening, daemon=True).start()
57+
58+
# Stream audio file in chunks to simulate live microphone input
59+
with open(audio_path, "rb") as f:
60+
audio_data = f.read()
61+
62+
for i in range(0, len(audio_data), chunk_size):
63+
chunk = audio_data[i : i + chunk_size]
64+
if chunk:
65+
connection.send_media(chunk)
66+
time.sleep(chunk_delay)
5167

52-
connection.start_listening()
68+
# Wait for final transcription results
69+
time.sleep(2)
5370

5471
# For async version:
5572
# from deepgram import AsyncDeepgramClient

examples/14-transcription-live-websocket-v2.py

Lines changed: 13 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -36,14 +36,22 @@
3636
) as connection:
3737

3838
def on_message(message: ListenV2SocketClientResponse) -> None:
39-
msg_type = getattr(message, "type", type(message).__name__)
40-
print(f"Received {msg_type} event ({type(message).__name__})")
41-
42-
# Extract transcription from TurnInfo events
43-
if isinstance(message, ListenV2TurnInfo):
39+
# V2 messages may arrive as typed objects or dicts depending on the union match
40+
if isinstance(message, dict):
41+
msg_type = message.get("type", "Unknown")
42+
print(f"Received {msg_type} event")
43+
if msg_type == "TurnInfo":
44+
print(f" transcript: {message.get('transcript', '')}")
45+
print(f" event: {message.get('event', '')}")
46+
print(f" turn_index: {message.get('turn_index', '')}")
47+
elif isinstance(message, ListenV2TurnInfo):
48+
print(f"Received TurnInfo event")
4449
print(f" transcript: {message.transcript}")
4550
print(f" event: {message.event}")
4651
print(f" turn_index: {message.turn_index}")
52+
else:
53+
msg_type = getattr(message, "type", type(message).__name__)
54+
print(f"Received {msg_type} event")
4755

4856
connection.on(EventType.OPEN, lambda _: print("Connection opened"))
4957
connection.on(EventType.MESSAGE, on_message)

examples/23-text-builder-helper.py

Lines changed: 34 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -10,27 +10,23 @@
1010

1111
from deepgram import DeepgramClient
1212
from deepgram.helpers import TextBuilder, add_pronunciation, ssml_to_deepgram
13-
from deepgram.speak.v1.audio.types import (
14-
AudioGenerateRequestEncoding,
15-
AudioGenerateRequestModel,
16-
)
1713

1814

1915
def example_basic_text_builder():
2016
"""Example 1: Basic TextBuilder usage with pronunciations and pauses"""
2117
print("Example 1: Basic TextBuilder Usage")
2218
print("-" * 50)
2319

24-
# Build text with pronunciations and pauses
20+
# Build text with pronunciations
21+
# Note: .pause() is supported in streaming (WebSocket) mode.
22+
# For REST API, use plain text between pronunciations.
2523
text = (
2624
TextBuilder()
2725
.text("Take ")
2826
.pronunciation("azathioprine", "ˌæzəˈθaɪəpriːn")
2927
.text(" twice daily with ")
3028
.pronunciation("dupilumab", "duːˈpɪljuːmæb")
31-
.text(" injections")
32-
.pause(500)
33-
.text(" Do not exceed prescribed dosage.")
29+
.text(" injections. Do not exceed prescribed dosage.")
3430
.build()
3531
)
3632

@@ -42,15 +38,16 @@ def example_basic_text_builder():
4238
client = DeepgramClient(api_key=api_key)
4339

4440
# Generate speech with custom pronunciations
45-
response = client.speak.v1.generate(
46-
text,
47-
model=AudioGenerateRequestModel.AURA_ASTERIA_EN,
48-
encoding=AudioGenerateRequestEncoding.LINEAR16,
41+
response = client.speak.v1.audio.generate(
42+
text=text,
43+
model="aura-2-asteria-en",
44+
encoding="linear16",
4945
)
5046

5147
# Save to file
5248
with open("output_example1.wav", "wb") as f:
53-
f.write(response)
49+
for chunk in response:
50+
f.write(chunk)
5451

5552
print("✓ Audio saved to output_example1.wav")
5653
else:
@@ -75,13 +72,14 @@ def example_add_pronunciation_function():
7572
if api_key:
7673
client = DeepgramClient(api_key=api_key)
7774

78-
response = client.speak.v1.generate(
79-
text,
80-
model=AudioGenerateRequestModel.AURA_ASTERIA_EN,
75+
response = client.speak.v1.audio.generate(
76+
text=text,
77+
model="aura-2-asteria-en",
8178
)
8279

8380
with open("output_example2.wav", "wb") as f:
84-
f.write(response)
81+
for chunk in response:
82+
f.write(chunk)
8583

8684
print("✓ Audio saved to output_example2.wav")
8785
else:
@@ -96,10 +94,8 @@ def example_ssml_migration():
9694
# Existing SSML from another TTS provider
9795
ssml = """<speak>
9896
Welcome to your medication guide.
99-
<break time="500ms"/>
100-
Take <phoneme alphabet="ipa" ph="ˌæzəˈθaɪəpriːn">azathioprine</phoneme>
97+
Take <phoneme alphabet="ipa" ph="ˌæzəˈθaɪəpriːn">azathioprine</phoneme>
10198
as prescribed.
102-
<break time="1000ms"/>
10399
Contact your doctor if you experience side effects.
104100
</speak>"""
105101

@@ -112,13 +108,14 @@ def example_ssml_migration():
112108
if api_key:
113109
client = DeepgramClient(api_key=api_key)
114110

115-
response = client.speak.v1.generate(
116-
text,
117-
model=AudioGenerateRequestModel.AURA_ASTERIA_EN,
111+
response = client.speak.v1.audio.generate(
112+
text=text,
113+
model="aura-2-asteria-en",
118114
)
119115

120116
with open("output_example3.wav", "wb") as f:
121-
f.write(response)
117+
for chunk in response:
118+
f.write(chunk)
122119

123120
print("✓ Audio saved to output_example3.wav")
124121
else:
@@ -137,9 +134,7 @@ def example_mixed_ssml_and_builder():
137134
text = (
138135
TextBuilder()
139136
.from_ssml(ssml)
140-
.pause(500)
141137
.text(" Store at room temperature.")
142-
.pause(500)
143138
.text(" Keep out of reach of children.")
144139
.build()
145140
)
@@ -150,13 +145,14 @@ def example_mixed_ssml_and_builder():
150145
if api_key:
151146
client = DeepgramClient(api_key=api_key)
152147

153-
response = client.speak.v1.generate(
154-
text,
155-
model=AudioGenerateRequestModel.AURA_ASTERIA_EN,
148+
response = client.speak.v1.audio.generate(
149+
text=text,
150+
model="aura-2-asteria-en",
156151
)
157152

158153
with open("output_example4.wav", "wb") as f:
159-
f.write(response)
154+
for chunk in response:
155+
f.write(chunk)
160156

161157
print("✓ Audio saved to output_example4.wav")
162158
else:
@@ -172,19 +168,15 @@ def example_pharmacy_instructions():
172168
TextBuilder()
173169
.text("Prescription for ")
174170
.pronunciation("lisinopril", "laɪˈsɪnəprɪl")
175-
.pause(300)
176-
.text(" Take one tablet by mouth daily for hypertension.")
177-
.pause(500)
171+
.text(". Take one tablet by mouth daily for hypertension.")
178172
.text(" Common side effects may include ")
179173
.pronunciation("hypotension", "ˌhaɪpoʊˈtɛnʃən")
180174
.text(" or dizziness.")
181-
.pause(500)
182175
.text(" Do not take with ")
183176
.pronunciation("aliskiren", "əˈlɪskɪrɛn")
184177
.text(" or ")
185178
.pronunciation("sacubitril", "səˈkjuːbɪtrɪl")
186-
.pause(500)
187-
.text(" Call your doctor if symptoms worsen.")
179+
.text(". Call your doctor if symptoms worsen.")
188180
.build()
189181
)
190182

@@ -194,14 +186,15 @@ def example_pharmacy_instructions():
194186
if api_key:
195187
client = DeepgramClient(api_key=api_key)
196188

197-
response = client.speak.v1.generate(
198-
text,
199-
model=AudioGenerateRequestModel.AURA_ASTERIA_EN,
200-
encoding=AudioGenerateRequestEncoding.LINEAR16,
189+
response = client.speak.v1.audio.generate(
190+
text=text,
191+
model="aura-2-asteria-en",
192+
encoding="linear16",
201193
)
202194

203195
with open("output_example5.wav", "wb") as f:
204-
f.write(response)
196+
for chunk in response:
197+
f.write(chunk)
205198

206199
print("✓ Audio saved to output_example5.wav")
207200
else:

examples/24-text-builder-streaming.py

Lines changed: 6 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -65,16 +65,13 @@ def on_message(message: SpeakV1SocketClientResponse) -> None:
6565
connection.on(EventType.ERROR, lambda error: print(f"✗ Error: {error}"))
6666

6767
# Send the TextBuilder-generated text
68-
text_message = SpeakV1Text(text=text)
69-
connection.send_speak_v_1_text(text_message)
68+
connection.send_text(SpeakV1Text(text=text))
7069

7170
# Flush to ensure all text is processed
72-
flush_message = SpeakV1Flush()
73-
connection.send_speak_v_1_flush(flush_message)
71+
connection.send_flush()
7472

7573
# Close the connection when done
76-
close_message = SpeakV1Close()
77-
connection.send_speak_v_1_close(close_message)
74+
connection.send_close()
7875

7976
# Start listening - this blocks until the connection closes
8077
connection.start_listening()
@@ -138,10 +135,10 @@ def on_message(message: SpeakV1SocketClientResponse) -> None:
138135
# Send multiple messages
139136
for i, text in enumerate([intro, instruction1, instruction2, closing], 1):
140137
print(f"Sending message {i}: {text[:50]}...")
141-
connection.send_speak_v_1_text(SpeakV1Text(text=text))
138+
connection.send_text(SpeakV1Text(text=text))
142139

143-
connection.send_speak_v_1_flush(SpeakV1Flush())
144-
connection.send_speak_v_1_close(SpeakV1Close())
140+
connection.send_flush()
141+
connection.send_close()
145142

146143
connection.start_listening()
147144

0 commit comments

Comments
 (0)