From 08378a4036483296e4df1e37e7dbbe6fdb814b4d Mon Sep 17 00:00:00 2001 From: David Fenster Date: Wed, 6 May 2026 14:48:17 -0400 Subject: [PATCH 1/2] support for SRT output voice jobs can target SRT output --- api-reference/jobs-voice-translate.mdx | 33 ++++++++++++++----- .../jobs-voice-translate/reference.mdx | 1 + api-reference/openapi.yaml | 1 + 3 files changed, 26 insertions(+), 9 deletions(-) diff --git a/api-reference/jobs-voice-translate.mdx b/api-reference/jobs-voice-translate.mdx index 28239dc..4ea9418 100644 --- a/api-reference/jobs-voice-translate.mdx +++ b/api-reference/jobs-voice-translate.mdx @@ -1,7 +1,7 @@ --- title: "Translate Audio Files" sidebarTitle: "Overview" -description: "Translate pre-recorded audio files into text or audio in other languages using asynchronous jobs." +description: "Translate pre-recorded audio files into plain text transcripts, SRT subtitles, or translated speech audio using asynchronous jobs." public: true --- @@ -9,11 +9,15 @@ public: true **Closed alpha.** This API may change without notice and is only available to select DeepL customers. See [alpha and beta features](/docs/resources/alpha-and-beta-features) for details. To request access, contact your customer success manager. -The Voice Translate Job API provides asynchronous translation of audio files into text or audio in other languages. +The Voice Translate Job API translates pre-recorded audio files into any combination of three output forms in any of its supported target languages: -The Voice Translate Job API processes entire audio files asynchronously. This makes it suitable for pre-recorded content such as podcasts, meeting recordings, or other media files. For live audio, see the [real-time Voice API](/api-reference/voice). +- **Plain text transcripts** (`text/plain`) +- **SRT subtitles** (`application/x-subrip`) — preserves the original timecodes +- **Translated speech audio** — speech-to-speech in a range of audio and video container formats (see the [Reference](/api-reference/jobs-voice-translate/reference#supported-output-formats) for the full list) -Each job can translate and transcribe audio into multiple languages. For example, a single English podcast can be translated into both German text and Spanish audio in one job. +The API processes entire audio files asynchronously. This makes it suitable for pre-recorded content such as podcasts, meeting recordings, or other media files. For live audio, see the [real-time Voice API](/api-reference/voice). + +Each job can produce multiple outputs from a single source. For example, one English podcast can be translated into German plain text, French SRT subtitles, and Spanish audio in a single job. For full request and response schemas, see the [Create Job](/api-reference/jobs-voice-translate/create-voice-translate-job) and [Get Job Status](/api-reference/jobs-voice-translate/get-voice-translate-job-status) endpoint references. For status values, limits, supported formats, and supported languages, see the [Reference](/api-reference/jobs-voice-translate/reference). @@ -45,7 +49,8 @@ Translating an audio file is a four-step process. The examples below use the API }, "targets": [ { "language": "de", "type": "text/plain" }, - { "language": "es", "type": "audio/pcm;encoding=s16le;rate=16000" } + { "language": "es", "type": "audio/pcm;encoding=s16le;rate=16000" }, + { "language": "fr", "type": "application/x-subrip" } ] }' ``` @@ -68,7 +73,8 @@ Translating an audio file is a four-step process. The examples below use the API }, "targets": [ { "language": "de", "type": "text/plain" }, - { "language": "es", "type": "audio/pcm;encoding=s16le;rate=16000" } + { "language": "es", "type": "audio/pcm;encoding=s16le;rate=16000" }, + { "language": "fr", "type": "application/x-subrip" } ] } ``` @@ -151,9 +157,11 @@ Translating an audio file is a four-step process. The examples below use the API "parameters": { "source_language": "en" }, "targets": [ { "language": "de", "type": "text/plain" }, - { "language": "es", "type": "audio/pcm;encoding=s16le;rate=16000" } + { "language": "es", "type": "audio/pcm;encoding=s16le;rate=16000" }, + { "language": "fr", "type": "application/x-subrip" } ], "results": [ + { "status": "processing" }, { "status": "processing" }, { "status": "processing" } ] @@ -176,7 +184,8 @@ Translating an audio file is a four-step process. The examples below use the API "parameters": { "source_language": "en" }, "targets": [ { "language": "de", "type": "text/plain" }, - { "language": "es", "type": "audio/pcm;encoding=s16le;rate=16000" } + { "language": "es", "type": "audio/pcm;encoding=s16le;rate=16000" }, + { "language": "fr", "type": "application/x-subrip" } ], "results": [ { @@ -187,6 +196,11 @@ Translating an audio file is a four-step process. The examples below use the API { "status": "failed", "error": { "message": "processing failed" } + }, + { + "status": "complete", + "download_url": "https://assets.deepl.com/collections/a74d88fb/assets/d4e5f6a7", + "signature": "eyJhbGciOiJIUzI1NiIs..." } ] } @@ -251,6 +265,7 @@ create_resp = requests.post( "targets": [ {"language": "de", "type": "text/plain"}, {"language": "es", "type": "audio/pcm;encoding=s16le;rate=16000"}, + {"language": "fr", "type": "application/x-subrip"}, ], }, ) @@ -293,7 +308,7 @@ for target, result in zip(status["targets"], status["results"]): headers={"Authorization": f"DeepL-Signature {result['signature']}"}, ) download_resp.raise_for_status() - suffix = ".txt" if target["type"] == "text/plain" else ".bin" + suffix = {"text/plain": ".txt", "application/x-subrip": ".srt"}.get(target["type"], ".bin") out_path = Path(f"output-{target['language']}{suffix}") out_path.write_bytes(download_resp.content) print(f"{target['language']}: wrote {out_path}") diff --git a/api-reference/jobs-voice-translate/reference.mdx b/api-reference/jobs-voice-translate/reference.mdx index 7c1e6ff..4792658 100644 --- a/api-reference/jobs-voice-translate/reference.mdx +++ b/api-reference/jobs-voice-translate/reference.mdx @@ -55,6 +55,7 @@ Each target specifies a `type` for the desired output format. | **Type** | **Description** | | :--------------------------------------------- | :----------------------------- | | `text/plain` | Plain text transcript | +| `application/x-subrip` | SRT subtitle file with original timecodes | | `audio/opus` | Opus audio | | `audio/flac` | FLAC audio | | `audio/pcm;encoding=s16le;rate=16000` | PCM 16-bit signed LE, 16 kHz | diff --git a/api-reference/openapi.yaml b/api-reference/openapi.yaml index 2567c3f..79f66ac 100644 --- a/api-reference/openapi.yaml +++ b/api-reference/openapi.yaml @@ -5757,6 +5757,7 @@ components: description: The desired output format for the translation target. enum: - text/plain + - application/x-subrip - audio/opus - audio/flac - "audio/pcm;encoding=s16le;rate=16000" From f9556069c2912ed4ec7fd1339f7efd73140aac75 Mon Sep 17 00:00:00 2001 From: David Fenster Date: Wed, 6 May 2026 19:00:56 -0400 Subject: [PATCH 2/2] make voice job api docs available --- docs.json | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/docs.json b/docs.json index ab18222..bd59efe 100644 --- a/docs.json +++ b/docs.json @@ -237,6 +237,15 @@ "api-reference/voice/reconnect-session" ] }, + { + "group": "Translate Audio Files", + "pages": [ + "api-reference/jobs-voice-translate", + "api-reference/jobs-voice-translate/create-voice-translate-job", + "api-reference/jobs-voice-translate/get-voice-translate-job-status", + "api-reference/jobs-voice-translate/reference" + ] + }, { "group": "Write", "pages": [