From 08378a4036483296e4df1e37e7dbbe6fdb814b4d Mon Sep 17 00:00:00 2001
From: David Fenster <david.fenster@deepl.com>
Date: Wed, 6 May 2026 14:48:17 -0400
Subject: [PATCH 1/2] support for SRT output

voice jobs can target SRT output
---
 api-reference/jobs-voice-translate.mdx        | 33 ++++++++++++++-----
 .../jobs-voice-translate/reference.mdx        |  1 +
 api-reference/openapi.yaml                    |  1 +
 3 files changed, 26 insertions(+), 9 deletions(-)
diff --git a/api-reference/jobs-voice-translate.mdx b/api-reference/jobs-voice-translate.mdx
index 28239dc..4ea9418 100644
--- a/api-reference/jobs-voice-translate.mdx
+++ b/api-reference/jobs-voice-translate.mdx
@@ -1,7 +1,7 @@
 ---
 title: "Translate Audio Files"
 sidebarTitle: "Overview"
-description: "Translate pre-recorded audio files into text or audio in other languages using asynchronous jobs."
+description: "Translate pre-recorded audio files into plain text transcripts, SRT subtitles, or translated speech audio using asynchronous jobs."
 public: true
 ---
 
@@ -9,11 +9,15 @@ public: true
   **Closed alpha.** This API may change without notice and is only available to select DeepL customers. See [alpha and beta features](/docs/resources/alpha-and-beta-features) for details. To request access, contact your customer success manager.
 </Warning>
 
-The Voice Translate Job API provides asynchronous translation of audio files into text or audio in other languages.
+The Voice Translate Job API translates pre-recorded audio files into any combination of three output forms in any of its supported target languages:
 
-The Voice Translate Job API processes entire audio files asynchronously. This makes it suitable for pre-recorded content such as podcasts, meeting recordings, or other media files. For live audio, see the [real-time Voice API](/api-reference/voice).
+- **Plain text transcripts** (`text/plain`)
+- **SRT subtitles** (`application/x-subrip`) — preserves the original timecodes
+- **Translated speech audio** — speech-to-speech in a range of audio and video container formats (see the [Reference](/api-reference/jobs-voice-translate/reference#supported-output-formats) for the full list)
 
-Each job can translate and transcribe audio into multiple languages. For example, a single English podcast can be translated into both German text and Spanish audio in one job.
+The API processes entire audio files asynchronously. This makes it suitable for pre-recorded content such as podcasts, meeting recordings, or other media files. For live audio, see the [real-time Voice API](/api-reference/voice).
+
+Each job can produce multiple outputs from a single source. For example, one English podcast can be translated into German plain text, French SRT subtitles, and Spanish audio in a single job.
 
 For full request and response schemas, see the [Create Job](/api-reference/jobs-voice-translate/create-voice-translate-job) and [Get Job Status](/api-reference/jobs-voice-translate/get-voice-translate-job-status) endpoint references. For status values, limits, supported formats, and supported languages, see the [Reference](/api-reference/jobs-voice-translate/reference).
 
@@ -45,7 +49,8 @@ Translating an audio file is a four-step process. The examples below use the API
           },
           "targets": [
             { "language": "de", "type": "text/plain" },
-            { "language": "es", "type": "audio/pcm;encoding=s16le;rate=16000" }
+            { "language": "es", "type": "audio/pcm;encoding=s16le;rate=16000" },
+            { "language": "fr", "type": "application/x-subrip" }
           ]
         }'
         ```
@@ -68,7 +73,8 @@ Translating an audio file is a four-step process. The examples below use the API
           },
           "targets": [
             { "language": "de", "type": "text/plain" },
-            { "language": "es", "type": "audio/pcm;encoding=s16le;rate=16000" }
+            { "language": "es", "type": "audio/pcm;encoding=s16le;rate=16000" },
+            { "language": "fr", "type": "application/x-subrip" }
           ]
         }
         ```
@@ -151,9 +157,11 @@ Translating an audio file is a four-step process. The examples below use the API
       "parameters": { "source_language": "en" },
       "targets": [
         { "language": "de", "type": "text/plain" },
-        { "language": "es", "type": "audio/pcm;encoding=s16le;rate=16000" }
+        { "language": "es", "type": "audio/pcm;encoding=s16le;rate=16000" },
+        { "language": "fr", "type": "application/x-subrip" }
       ],
       "results": [
+        { "status": "processing" },
         { "status": "processing" },
         { "status": "processing" }
       ]
@@ -176,7 +184,8 @@ Translating an audio file is a four-step process. The examples below use the API
       "parameters": { "source_language": "en" },
       "targets": [
         { "language": "de", "type": "text/plain" },
-        { "language": "es", "type": "audio/pcm;encoding=s16le;rate=16000" }
+        { "language": "es", "type": "audio/pcm;encoding=s16le;rate=16000" },
+        { "language": "fr", "type": "application/x-subrip" }
       ],
       "results": [
         {
@@ -187,6 +196,11 @@ Translating an audio file is a four-step process. The examples below use the API
         {
           "status": "failed",
           "error": { "message": "processing failed" }
+        },
+        {
+          "status": "complete",
+          "download_url": "https://assets.deepl.com/collections/a74d88fb/assets/d4e5f6a7",
+          "signature": "eyJhbGciOiJIUzI1NiIs..."
         }
       ]
     }
@@ -251,6 +265,7 @@ create_resp = requests.post(
         "targets": [
             {"language": "de", "type": "text/plain"},
             {"language": "es", "type": "audio/pcm;encoding=s16le;rate=16000"},
+            {"language": "fr", "type": "application/x-subrip"},
         ],
     },
 )
@@ -293,7 +308,7 @@ for target, result in zip(status["targets"], status["results"]):
         headers={"Authorization": f"DeepL-Signature {result['signature']}"},
     )
     download_resp.raise_for_status()
-    suffix = ".txt" if target["type"] == "text/plain" else ".bin"
+    suffix = {"text/plain": ".txt", "application/x-subrip": ".srt"}.get(target["type"], ".bin")
     out_path = Path(f"output-{target['language']}{suffix}")
     out_path.write_bytes(download_resp.content)
     print(f"{target['language']}: wrote {out_path}")
diff --git a/api-reference/jobs-voice-translate/reference.mdx b/api-reference/jobs-voice-translate/reference.mdx
index 7c1e6ff..4792658 100644
--- a/api-reference/jobs-voice-translate/reference.mdx
+++ b/api-reference/jobs-voice-translate/reference.mdx
@@ -55,6 +55,7 @@ Each target specifies a `type` for the desired output format.
 | **Type**                                       | **Description**                |
 | :--------------------------------------------- | :----------------------------- |
 | `text/plain`                                   | Plain text transcript          |
+| `application/x-subrip`                         | SRT subtitle file with original timecodes |
 | `audio/opus`                                   | Opus audio                     |
 | `audio/flac`                                   | FLAC audio                     |
 | `audio/pcm;encoding=s16le;rate=16000`          | PCM 16-bit signed LE, 16 kHz   |
diff --git a/api-reference/openapi.yaml b/api-reference/openapi.yaml
index 2567c3f..79f66ac 100644
--- a/api-reference/openapi.yaml
+++ b/api-reference/openapi.yaml
@@ -5757,6 +5757,7 @@ components:
       description: The desired output format for the translation target.
       enum:
         - text/plain
+        - application/x-subrip
         - audio/opus
         - audio/flac
         - "audio/pcm;encoding=s16le;rate=16000"

From f9556069c2912ed4ec7fd1339f7efd73140aac75 Mon Sep 17 00:00:00 2001
From: David Fenster <david.fenster@deepl.com>
Date: Wed, 6 May 2026 19:00:56 -0400
Subject: [PATCH 2/2] make voice job api docs available

---
 docs.json | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/docs.json b/docs.json
index ab18222..bd59efe 100644
--- a/docs.json
+++ b/docs.json
@@ -237,6 +237,15 @@
               "api-reference/voice/reconnect-session"
             ]
           },
+          {
+            "group": "Translate Audio Files",
+            "pages": [
+              "api-reference/jobs-voice-translate",
+              "api-reference/jobs-voice-translate/create-voice-translate-job",
+              "api-reference/jobs-voice-translate/get-voice-translate-job-status",
+              "api-reference/jobs-voice-translate/reference"
+            ]
+          },
           {
             "group": "Write",
             "pages": [