client.listen.v1.connect(...)
-
-
-
Transcribe audio and video using Deepgram's speech-to-text WebSocket
-
-
-
from deepgram import DeepgramClient from deepgram.core.events import EventType from deepgram.extensions.types.sockets import ListenV1SocketClientResponse client = DeepgramClient( api_key="YOUR_API_KEY", ) with client.listen.v1.connect(model="nova-3") as connection: def on_message(message: ListenV1SocketClientResponse) -> None: msg_type = getattr(message, "type", "Unknown") print(f"Received {msg_type} event") connection.on(EventType.OPEN, lambda _: print("Connection opened")) connection.on(EventType.MESSAGE, on_message) connection.on(EventType.CLOSE, lambda _: print("Connection closed")) connection.on(EventType.ERROR, lambda error: print(f"Caught: {error}")) # Start listening connection.start_listening() # Send audio data from deepgram.extensions.types.sockets import ListenV1MediaMessage connection.send_media(ListenV1MediaMessage(audio_bytes)) # Send control messages from deepgram.extensions.types.sockets import ListenV1ControlMessage connection.send_control(ListenV1ControlMessage(type="KeepAlive"))
-
-
-
import asyncio from deepgram import AsyncDeepgramClient from deepgram.core.events import EventType from deepgram.extensions.types.sockets import ListenV1SocketClientResponse client = AsyncDeepgramClient( api_key="YOUR_API_KEY", ) async def main(): async with client.listen.v1.connect(model="nova-3") as connection: def on_message(message: ListenV1SocketClientResponse) -> None: msg_type = getattr(message, "type", "Unknown") print(f"Received {msg_type} event") connection.on(EventType.OPEN, lambda _: print("Connection opened")) connection.on(EventType.MESSAGE, on_message) connection.on(EventType.CLOSE, lambda _: print("Connection closed")) connection.on(EventType.ERROR, lambda error: print(f"Caught: {error}")) # Start listening await connection.start_listening() # Send audio data from deepgram.extensions.types.sockets import ListenV1MediaMessage await connection.send_media(ListenV1MediaMessage(audio_bytes)) # Send control messages from deepgram.extensions.types.sockets import ListenV1ControlMessage await connection.send_control(ListenV1ControlMessage(type="KeepAlive")) asyncio.run(main())
-
-
-
send_media(message)— Send binary audio data for transcriptionListenV1MediaMessage(audio_bytes)
-
send_control(message)— Send control messages to manage the connectionListenV1ControlMessage(type="KeepAlive")— Keep the connection aliveListenV1ControlMessage(type="Finalize")— Finalize the transcription
-
-
-
model:
str— AI model to use for the transcription
-
callback:
typing.Optional[str]— URL to which we'll make the callback request
-
callback_method:
typing.Optional[str]— HTTP method by which the callback request will be made
-
channels:
typing.Optional[str]— Number of independent audio channels contained in submitted audio
-
diarize:
typing.Optional[str]— Recognize speaker changes. Each word in the transcript will be assigned a speaker number starting at 0
-
dictation:
typing.Optional[str]— Dictation mode for controlling formatting with dictated speech
-
encoding:
typing.Optional[str]— Specify the expected encoding of your submitted audio
-
endpointing:
typing.Optional[str]— Control when speech recognition ends
-
extra:
typing.Optional[str]— Arbitrary key-value pairs that are attached to the API response
-
filler_words:
typing.Optional[str]— Include filler words like "uh" and "um" in transcripts
-
interim_results:
typing.Optional[str]— Return partial transcripts as audio is being processed
-
keyterm:
typing.Optional[str]— Key term prompting can boost or suppress specialized terminology and brands
-
keywords:
typing.Optional[str]— Keywords can boost or suppress specialized terminology and brands
-
language:
typing.Optional[str]— BCP-47 language tag that hints at the primary spoken language
-
mip_opt_out:
typing.Optional[str]— Opts out requests from the Deepgram Model Improvement Program
-
multichannel:
typing.Optional[str]— Transcribe each audio channel independently
-
numerals:
typing.Optional[str]— Convert numbers from written format to numerical format
-
profanity_filter:
typing.Optional[str]— Remove profanity from transcripts
-
punctuate:
typing.Optional[str]— Add punctuation and capitalization to the transcript
-
redact:
typing.Optional[str]— Redaction removes sensitive information from your transcripts
-
replace:
typing.Optional[str]— Search for terms or phrases in submitted audio and replaces them
-
sample_rate:
typing.Optional[str]— Sample rate of the submitted audio
-
search:
typing.Optional[str]— Search for terms or phrases in submitted audio
-
smart_format:
typing.Optional[str]— Apply formatting to transcript output for improved readability
-
tag:
typing.Optional[str]— Label your requests for the purpose of identification during usage reporting
-
utterance_end_ms:
typing.Optional[str]— Length of time in milliseconds of silence to wait for before finalizing speech
-
vad_events:
typing.Optional[str]— Return Voice Activity Detection events via the websocket
-
version:
typing.Optional[str]— Version of the model to use
-
authorization:
typing.Optional[str]— Use your API key for authentication, or alternatively generate a temporary token and pass it via the token query parameter.Example:
token %DEEPGRAM_API_KEY%orbearer %DEEPGRAM_TOKEN%
-
request_options:
typing.Optional[RequestOptions]— Request-specific configuration.
-
-
client.listen.v2.connect(...)
-
-
-
Real-time conversational speech recognition with contextual turn detection for natural voice conversations
-
-
-
from deepgram import DeepgramClient from deepgram.core.events import EventType from deepgram.extensions.types.sockets import ListenV2SocketClientResponse client = DeepgramClient( api_key="YOUR_API_KEY", ) with client.listen.v2.connect( model="flux-general-en", encoding="linear16", sample_rate="16000" ) as connection: def on_message(message: ListenV2SocketClientResponse) -> None: msg_type = getattr(message, "type", "Unknown") print(f"Received {msg_type} event") connection.on(EventType.OPEN, lambda _: print("Connection opened")) connection.on(EventType.MESSAGE, on_message) connection.on(EventType.CLOSE, lambda _: print("Connection closed")) connection.on(EventType.ERROR, lambda error: print(f"Caught: {error}")) # Start listening connection.start_listening() # Send audio data from deepgram.extensions.types.sockets import ListenV2MediaMessage connection.send_media(ListenV2MediaMessage(data=audio_bytes)) # Send control messages from deepgram.extensions.types.sockets import ListenV2ControlMessage connection.send_control(ListenV2ControlMessage(type="CloseStream"))
-
-
-
import asyncio from deepgram import AsyncDeepgramClient from deepgram.core.events import EventType from deepgram.extensions.types.sockets import ListenV2SocketClientResponse client = AsyncDeepgramClient( api_key="YOUR_API_KEY", ) async def main(): async with client.listen.v2.connect( model="flux-general-en", encoding="linear16", sample_rate="16000" ) as connection: def on_message(message: ListenV2SocketClientResponse) -> None: msg_type = getattr(message, "type", "Unknown") print(f"Received {msg_type} event") connection.on(EventType.OPEN, lambda _: print("Connection opened")) connection.on(EventType.MESSAGE, on_message) connection.on(EventType.CLOSE, lambda _: print("Connection closed")) connection.on(EventType.ERROR, lambda error: print(f"Caught: {error}")) # Start listening await connection.start_listening() # Send audio data from deepgram.extensions.types.sockets import ListenV2MediaMessage await connection.send_media(ListenV2MediaMessage(data=audio_bytes)) # Send control messages from deepgram.extensions.types.sockets import ListenV2ControlMessage await connection.send_control(ListenV2ControlMessage(type="CloseStream")) asyncio.run(main())
-
-
-
send_media(message)— Send binary audio data for transcriptionListenV2MediaMessage(data=audio_bytes)
-
send_control(message)— Send control messages to manage the connectionListenV2ControlMessage(type="CloseStream")— Close the audio stream
-
-
-
model:
str— AI model used to process submitted audio
-
encoding:
str— Specify the expected encoding of your submitted audio
-
sample_rate:
str— Sample rate of the submitted audio
-
eager_eot_threshold:
typing.Optional[str]— Threshold for eager end-of-turn detection
-
eot_threshold:
typing.Optional[str]— Threshold for end-of-turn detection
-
eot_timeout_ms:
typing.Optional[str]— Timeout in milliseconds for end-of-turn detection
-
keyterm:
typing.Optional[str]— Key term prompting can boost or suppress specialized terminology and brands
-
mip_opt_out:
typing.Optional[str]— Opts out requests from the Deepgram Model Improvement Program
-
tag:
typing.Optional[str]— Label your requests for the purpose of identification during usage reporting
-
authorization:
typing.Optional[str]— Use your API key for authentication, or alternatively generate a temporary token and pass it via the token query parameter.Example:
token %DEEPGRAM_API_KEY%orbearer %DEEPGRAM_TOKEN%
-
request_options:
typing.Optional[RequestOptions]— Request-specific configuration.
-
-
client.speak.v1.connect(...)
-
-
-
Convert text into natural-sounding speech using Deepgram's TTS WebSocket
-
-
-
from deepgram import DeepgramClient from deepgram.core.events import EventType from deepgram.extensions.types.sockets import SpeakV1SocketClientResponse client = DeepgramClient( api_key="YOUR_API_KEY", ) with client.speak.v1.connect( model="aura-2-asteria-en", encoding="linear16", sample_rate=24000 ) as connection: def on_message(message: SpeakV1SocketClientResponse) -> None: if isinstance(message, bytes): print("Received audio event") else: msg_type = getattr(message, "type", "Unknown") print(f"Received {msg_type} event") connection.on(EventType.OPEN, lambda _: print("Connection opened")) connection.on(EventType.MESSAGE, on_message) connection.on(EventType.CLOSE, lambda _: print("Connection closed")) connection.on(EventType.ERROR, lambda error: print(f"Caught: {error}")) # Start listening connection.start_listening() # Send text to be converted to speech from deepgram.extensions.types.sockets import SpeakV1TextMessage connection.send_text(SpeakV1TextMessage(text="Hello, world!")) # Send control messages from deepgram.extensions.types.sockets import SpeakV1ControlMessage connection.send_control(SpeakV1ControlMessage(type="Flush")) connection.send_control(SpeakV1ControlMessage(type="Close"))
-
-
-
import asyncio from deepgram import AsyncDeepgramClient from deepgram.core.events import EventType from deepgram.extensions.types.sockets import SpeakV1SocketClientResponse client = AsyncDeepgramClient( api_key="YOUR_API_KEY", ) async def main(): async with client.speak.v1.connect( model="aura-2-asteria-en", encoding="linear16", sample_rate=24000 ) as connection: def on_message(message: SpeakV1SocketClientResponse) -> None: if isinstance(message, bytes): print("Received audio event") else: msg_type = getattr(message, "type", "Unknown") print(f"Received {msg_type} event") connection.on(EventType.OPEN, lambda _: print("Connection opened")) connection.on(EventType.MESSAGE, on_message) connection.on(EventType.CLOSE, lambda _: print("Connection closed")) connection.on(EventType.ERROR, lambda error: print(f"Caught: {error}")) # Start listening await connection.start_listening() # Send text to be converted to speech from deepgram.extensions.types.sockets import SpeakV1TextMessage await connection.send_text(SpeakV1TextMessage(text="Hello, world!")) # Send control messages from deepgram.extensions.types.sockets import SpeakV1ControlMessage await connection.send_control(SpeakV1ControlMessage(type="Flush")) await connection.send_control(SpeakV1ControlMessage(type="Close")) asyncio.run(main())
-
-
-
send_text(message)— Send text to be converted to speechSpeakV1TextMessage(text="Hello, world!")
-
send_control(message)— Send control messages to manage speech synthesisSpeakV1ControlMessage(type="Flush")— Process all queued text immediatelySpeakV1ControlMessage(type="Clear")— Clear the text queueSpeakV1ControlMessage(type="Close")— Close the connection
-
-
-
encoding:
typing.Optional[str]— Specify the expected encoding of your output audio
-
mip_opt_out:
typing.Optional[str]— Opts out requests from the Deepgram Model Improvement Program
-
model:
typing.Optional[str]— AI model used to process submitted text
-
sample_rate:
typing.Optional[str]— Sample rate for the output audio
-
authorization:
typing.Optional[str]— Use your API key for authentication, or alternatively generate a temporary token and pass it via the token query parameter.Example:
token %DEEPGRAM_API_KEY%orbearer %DEEPGRAM_TOKEN%
-
request_options:
typing.Optional[RequestOptions]— Request-specific configuration.
-
-
client.agent.v1.connect(...)
-
-
-
Build a conversational voice agent using Deepgram's Voice Agent WebSocket
-
-
-
from deepgram import DeepgramClient from deepgram.core.events import EventType from deepgram.extensions.types.sockets import ( AgentV1Agent, AgentV1AudioConfig, AgentV1AudioInput, AgentV1DeepgramSpeakProvider, AgentV1Listen, AgentV1ListenProvider, AgentV1OpenAiThinkProvider, AgentV1SettingsMessage, AgentV1SocketClientResponse, AgentV1SpeakProviderConfig, AgentV1Think, ) client = DeepgramClient( api_key="YOUR_API_KEY", ) with client.agent.v1.connect() as agent: # Configure the agent settings = AgentV1SettingsMessage( audio=AgentV1AudioConfig( input=AgentV1AudioInput( encoding="linear16", sample_rate=44100, ) ), agent=AgentV1Agent( listen=AgentV1Listen( provider=AgentV1ListenProvider( type="deepgram", model="nova-3", smart_format=True, ) ), think=AgentV1Think( provider=AgentV1OpenAiThinkProvider( type="open_ai", model="gpt-4o-mini", temperature=0.7, ), prompt='Reply only and explicitly with "OK".', ), speak=AgentV1SpeakProviderConfig( provider=AgentV1DeepgramSpeakProvider( type="deepgram", model="aura-2-asteria-en", ) ), ), ) agent.send_settings(settings) def on_message(message: AgentV1SocketClientResponse) -> None: if isinstance(message, bytes): print("Received audio event") else: msg_type = getattr(message, "type", "Unknown") print(f"Received {msg_type} event") agent.on(EventType.OPEN, lambda _: print("Connection opened")) agent.on(EventType.MESSAGE, on_message) agent.on(EventType.CLOSE, lambda _: print("Connection closed")) agent.on(EventType.ERROR, lambda error: print(f"Caught: {error}")) # Start listening agent.start_listening() # Send audio data from deepgram.extensions.types.sockets import AgentV1MediaMessage agent.send_media(AgentV1MediaMessage(data=audio_bytes)) # Send control messages from deepgram.extensions.types.sockets import AgentV1ControlMessage agent.send_control(AgentV1ControlMessage(type="KeepAlive"))
-
-
-
import asyncio from deepgram import AsyncDeepgramClient from deepgram.core.events import EventType from deepgram.extensions.types.sockets import ( AgentV1Agent, AgentV1AudioConfig, AgentV1AudioInput, AgentV1DeepgramSpeakProvider, AgentV1Listen, AgentV1ListenProvider, AgentV1OpenAiThinkProvider, AgentV1SettingsMessage, AgentV1SocketClientResponse, AgentV1SpeakProviderConfig, AgentV1Think, ) client = AsyncDeepgramClient( api_key="YOUR_API_KEY", ) async def main(): async with client.agent.v1.connect() as agent: # Configure the agent settings = AgentV1SettingsMessage( audio=AgentV1AudioConfig( input=AgentV1AudioInput( encoding="linear16", sample_rate=16000, ) ), agent=AgentV1Agent( listen=AgentV1Listen( provider=AgentV1ListenProvider( type="deepgram", model="nova-3", smart_format=True, ) ), think=AgentV1Think( provider=AgentV1OpenAiThinkProvider( type="open_ai", model="gpt-4o-mini", temperature=0.7, ) ), speak=AgentV1SpeakProviderConfig( provider=AgentV1DeepgramSpeakProvider( type="deepgram", model="aura-2-asteria-en", ) ), ), ) await agent.send_settings(settings) def on_message(message: AgentV1SocketClientResponse) -> None: if isinstance(message, bytes): print("Received audio event") else: msg_type = getattr(message, "type", "Unknown") print(f"Received {msg_type} event") agent.on(EventType.OPEN, lambda _: print("Connection opened")) agent.on(EventType.MESSAGE, on_message) agent.on(EventType.CLOSE, lambda _: print("Connection closed")) agent.on(EventType.ERROR, lambda error: print(f"Caught: {error}")) # Start listening await agent.start_listening() # Send audio data from deepgram.extensions.types.sockets import AgentV1MediaMessage await agent.send_media(AgentV1MediaMessage(data=audio_bytes)) # Send control messages from deepgram.extensions.types.sockets import AgentV1ControlMessage await agent.send_control(AgentV1ControlMessage(type="KeepAlive")) asyncio.run(main())
-
-
-
authorization:
typing.Optional[str]— Use your API key for authentication, or alternatively generate a temporary token and pass it via the token query parameter.Example:
token %DEEPGRAM_API_KEY%orbearer %DEEPGRAM_TOKEN%
-
request_options:
typing.Optional[RequestOptions]— Request-specific configuration.
-
-
-
send_settings(message)— Send initial agent configuration settingsAgentV1SettingsMessage(...)— Configure audio, listen, think, and speak providers
-
send_media(message)— Send binary audio data to the agentAgentV1MediaMessage(data=audio_bytes)
-
send_control(message)— Send control messages (keep_alive, etc.)AgentV1ControlMessage(type="KeepAlive")
-
send_update_speak(message)— Update the agent's speech synthesis settingsAgentV1UpdateSpeakMessage(...)— Modify TTS configuration during conversation
-
send_update_prompt(message)— Update the agent's system promptAgentV1UpdatePromptMessage(...)— Change the agent's behavior instructions
-
send_inject_user_message(message)— Inject a user message into the conversationAgentV1InjectUserMessageMessage(...)— Add a simulated user input
-
send_inject_agent_message(message)— Inject an agent message into the conversationAgentV1InjectAgentMessageMessage(...)— Add a simulated agent response
-
send_function_call_response(message)— Send the result of a function call back to the agentAgentV1FunctionCallResponseMessage(...)— Provide function execution results
-
-