Skip to content

Commit c6bb024

Browse files
authored
Merge pull request #22 from 74th/fix/whisper-cpp
Handle invalid UTF-8 in whisper transcription responses
2 parents 65d1015 + 3b807f8 commit c6bb024

File tree

3 files changed

+51
-3
lines changed

3 files changed

+51
-3
lines changed

stackchan_server/speech_recognition/whisper_cpp.py

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -165,7 +165,14 @@ def _normalize_transcript(text: str) -> str:
165165

166166

167167
def _load_transcript_from_json(path: Path) -> str:
168-
data = json.loads(path.read_text(encoding="utf-8"))
168+
try:
169+
raw_bytes = path.read_bytes()
170+
text = raw_bytes.decode("utf-8", errors="replace")
171+
if "\ufffd" in text:
172+
logger.warning("whisper.cpp JSON output contains invalid UTF-8 bytes")
173+
data = json.loads(text)
174+
except (json.JSONDecodeError, OSError) as exc:
175+
raise RuntimeError(f"Failed to read whisper.cpp JSON output: {exc}") from exc
169176
transcription = data.get("transcription")
170177
if not isinstance(transcription, list):
171178
return ""

stackchan_server/speech_recognition/whisper_server.py

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -96,11 +96,14 @@ def _request_transcript(self, wav_bytes: bytes, language: str) -> str:
9696
raise RuntimeError(f"whisper-server request failed: {exc.reason}") from exc
9797

9898
if self._response_format == "json":
99-
payload = json.loads(response_body.decode("utf-8"))
99+
payload = _load_json_response_bytes(response_body)
100+
if not isinstance(payload, Mapping):
101+
return ""
102+
payload = cast(Mapping[str, object], payload)
100103
text = payload.get("text")
101104
return text.strip() if isinstance(text, str) else ""
102105

103-
payload = json.loads(response_body.decode("utf-8"))
106+
payload = _load_json_response_bytes(response_body)
104107
return _load_transcript_from_verbose_json(payload)
105108

106109

@@ -118,6 +121,13 @@ def _normalize_language(language_code: str) -> str:
118121
return language_code.split("-", 1)[0].lower()
119122

120123

124+
def _load_json_response_bytes(response_body: bytes) -> object:
125+
response_text = response_body.decode("utf-8", errors="replace")
126+
if "\ufffd" in response_text:
127+
logger.warning("whisper-server JSON output contains invalid UTF-8 bytes")
128+
return json.loads(response_text)
129+
130+
121131
def _load_transcript_from_verbose_json(payload: object) -> str:
122132
if not isinstance(payload, Mapping):
123133
return ""

tests/test_whisper_server.py

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
from __future__ import annotations
2+
3+
import unittest
4+
5+
from stackchan_server.speech_recognition.whisper_server import (
6+
_load_json_response_bytes,
7+
_load_transcript_from_verbose_json,
8+
)
9+
10+
11+
class WhisperServerJsonTests(unittest.TestCase):
12+
def test_load_json_response_bytes_replaces_invalid_utf8(self) -> None:
13+
payload = _load_json_response_bytes(b'{"transcription":[{"text":"\xe6\x90"},{"text":"ok"}]}')
14+
15+
self.assertEqual(payload, {"transcription": [{"text": "�"}, {"text": "ok"}]})
16+
17+
def test_load_transcript_from_verbose_json_with_replacement_char(self) -> None:
18+
payload = {
19+
"transcription": [
20+
{"text": "�"},
21+
{"text": "ok"},
22+
]
23+
}
24+
25+
transcript = _load_transcript_from_verbose_json(payload)
26+
27+
self.assertEqual(transcript, "� ok")
28+
29+
30+
if __name__ == "__main__":
31+
unittest.main()

0 commit comments

Comments
 (0)