From fd834299a022621412546a837081e66074edb82e Mon Sep 17 00:00:00 2001 From: Ben Cherry Date: Mon, 2 Feb 2026 15:55:55 -0800 Subject: [PATCH 1/2] Change model to deepgram --- README.md | 2 +- src/main.ts | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 70ca643..dc5da2d 100644 --- a/README.md +++ b/README.md @@ -9,7 +9,7 @@ A complete starter project for building voice AI apps with [LiveKit Agents for N The starter project includes: - A simple voice AI assistant, ready for extension and customization -- A voice AI pipeline with [models](https://docs.livekit.io/agents/models) from OpenAI, Cartesia, and AssemblyAI served through LiveKit Cloud +- A voice AI pipeline with [models](https://docs.livekit.io/agents/models) from OpenAI, Cartesia, and Deepgram served through LiveKit Cloud - Easily integrate your preferred [LLM](https://docs.livekit.io/agents/models/llm/), [STT](https://docs.livekit.io/agents/models/stt/), and [TTS](https://docs.livekit.io/agents/models/tts/) instead, or swap to a realtime model like the [OpenAI Realtime API](https://docs.livekit.io/agents/models/realtime/openai) - [LiveKit Turn Detector](https://docs.livekit.io/agents/build/turns/turn-detector/) for contextually-aware speaker detection, with multilingual support - [Background voice cancellation](https://docs.livekit.io/home/cloud/noise-cancellation/) diff --git a/src/main.ts b/src/main.ts index d4b7e96..36afa8d 100644 --- a/src/main.ts +++ b/src/main.ts @@ -25,12 +25,12 @@ export default defineAgent({ proc.userData.vad = await silero.VAD.load(); }, entry: async (ctx: JobContext) => { - // Set up a voice AI pipeline using OpenAI, Cartesia, AssemblyAI, and the LiveKit turn detector + // Set up a voice AI pipeline using OpenAI, Cartesia, Deepgram, and the LiveKit turn detector const session = new voice.AgentSession({ // Speech-to-text (STT) is your agent's ears, turning the user's speech into text that the LLM can understand // See all available models at https://docs.livekit.io/agents/models/stt/ stt: new inference.STT({ - model: 'assemblyai/universal-streaming', + model: 'deepgram/flux-general', language: 'en', }), From 1fa463280db1225ee21c456e4cb940259afa4b2e Mon Sep 17 00:00:00 2001 From: Ben Cherry Date: Tue, 3 Feb 2026 11:03:31 -0800 Subject: [PATCH 2/2] Switch Deepgram STT to nova-3 multi --- src/main.ts | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/main.ts b/src/main.ts index 36afa8d..70263ca 100644 --- a/src/main.ts +++ b/src/main.ts @@ -30,8 +30,8 @@ export default defineAgent({ // Speech-to-text (STT) is your agent's ears, turning the user's speech into text that the LLM can understand // See all available models at https://docs.livekit.io/agents/models/stt/ stt: new inference.STT({ - model: 'deepgram/flux-general', - language: 'en', + model: 'deepgram/nova-3', + language: 'multi', }), // A Large Language Model (LLM) is your agent's brain, processing user input and generating a response