From 06d1e70eeb319e6e7f1430f3f947e642fd5475f8 Mon Sep 17 00:00:00 2001 From: Atsushi Morimoto <74th.tech@gmail.com> Date: Sun, 17 May 2026 11:44:29 +0900 Subject: [PATCH 1/3] feat: Add process configuration for whisper server and update wake word detection keywords --- misc/on_mac_demo/process-compose.yaml | 9 +++++++ .../speech_recognition/whisper_server.py | 26 +++++++++---------- .../wakeup_word_detection/whisper_server.py | 5 +++- 3 files changed, 26 insertions(+), 14 deletions(-) create mode 100644 misc/on_mac_demo/process-compose.yaml diff --git a/misc/on_mac_demo/process-compose.yaml b/misc/on_mac_demo/process-compose.yaml new file mode 100644 index 0000000..a808d5d --- /dev/null +++ b/misc/on_mac_demo/process-compose.yaml @@ -0,0 +1,9 @@ +version: "0.5" + +processes: + whisper-small: + command: |- + whisper-server --host 0.0.0.0 --port "8431" --model /opt/whisper.cpp/models/ggml-small.bin -l ja -nt -sns --vad -vm /opt/whisper.cpp/models/ggml-silero-v6.2.0.bin -vt "0.5" -vspd "100" -vsd "500" -vp "200" + whisper-large-turbo: + command: |- + whisper-server --host 0.0.0.0 --port "8432" --model /opt/whisper.cpp/models/ggml-large-v3-turbo-q8_0.bin -l ja -nt -sns --vad -vm /opt/whisper.cpp/models/ggml-silero-v6.2.0.bin -vt "0.5" -vspd "100" -vsd "500" -vp "200" diff --git a/stackchan_server/speech_recognition/whisper_server.py b/stackchan_server/speech_recognition/whisper_server.py index d508cb5..c992ab1 100644 --- a/stackchan_server/speech_recognition/whisper_server.py +++ b/stackchan_server/speech_recognition/whisper_server.py @@ -43,16 +43,16 @@ def __init__( *, config: WhisperServerSpeechToTextConfig | None = None, ) -> None: - self._conf = config or WhisperServerSpeechToTextConfig() - self._server_url = self._conf.url + self.config = config or WhisperServerSpeechToTextConfig() + self._server_url = self.config.url async def transcribe(self, pcm_bytes: bytes) -> str: rms_level = _pcm_rms_level(pcm_bytes) - if rms_level < self._conf.silence_rms_threshold: + if rms_level < self.config.silence_rms_threshold: logger.info( "Skipping whisper-server transcription because pcm rms %.2f is below silence threshold %.2f", rms_level, - self._conf.silence_rms_threshold, + self.config.silence_rms_threshold, ) return "" @@ -65,7 +65,7 @@ async def transcribe(self, pcm_bytes: bytes) -> str: transcript = await asyncio.to_thread( self._request_transcript, wav_bytes, - self._conf.language, + self.config.language, ) if transcript: logger.info("whisper-server transcript: %s", transcript) @@ -73,20 +73,20 @@ async def transcribe(self, pcm_bytes: bytes) -> str: def _request_transcript(self, wav_bytes: bytes, language: str) -> str: fields = { - "response_format": self._conf.response_format, + "response_format": self.config.response_format, } normalized_language = language.strip() if normalized_language: fields["language"] = normalized_language - if self._conf.prompt: - fields["prompt"] = self._conf.prompt + if self.config.prompt: + fields["prompt"] = self.config.prompt - if self._conf.model: - fields["model"] = self._conf.model + if self.config.model: + fields["model"] = self.config.model - if self._conf.detect_language: + if self.config.detect_language: fields["detect_language"] = "true" body, content_type = _encode_multipart_formdata( @@ -102,7 +102,7 @@ def _request_transcript(self, wav_bytes: bytes, language: str) -> str: logger.info("Running whisper-server request: POST %s", self._server_url) try: with urlopen( - request, timeout=self._conf.request_timeout_seconds + request, timeout=self.config.request_timeout_seconds ) as response: response_body = response.read() except HTTPError as exc: @@ -113,7 +113,7 @@ def _request_transcript(self, wav_bytes: bytes, language: str) -> str: except URLError as exc: raise RuntimeError(f"whisper-server request failed: {exc.reason}") from exc - if self._conf.response_format == "json": + if self.config.response_format == "json": payload = _load_json_response_bytes(response_body) if not isinstance(payload, Mapping): return "" diff --git a/stackchan_server/wakeup_word_detection/whisper_server.py b/stackchan_server/wakeup_word_detection/whisper_server.py index 6b25fe6..38a3703 100644 --- a/stackchan_server/wakeup_word_detection/whisper_server.py +++ b/stackchan_server/wakeup_word_detection/whisper_server.py @@ -25,7 +25,7 @@ class WakeWordDetectionTimeout(WakeWordDetectionError): class WhisperServerWakeWordDetectorConfig(BaseSettings): - keywords: list[str] = Field(default_factory=lambda: ["スタックチャン"]) + keywords: list[str] = Field(default_factory=lambda: ["ハイスタックチャン"]) window_seconds: float = 3.0 interval_seconds: float = 0.5 timeout_seconds: float = 300.0 @@ -192,6 +192,9 @@ def _contains_wake_word(self, transcript: str) -> bool: if not normalized_transcript: return False + if self.recognizer.config.prompt in normalized_transcript: + # If the prompt is included in the transcript, it may indicate that the transcription is not accurate or that the model is confused. In this case, we choose to ignore the transcript to avoid false positives. + return False for keyword in self.config.keywords: normalized_keyword = _normalize_text(keyword) if normalized_keyword and normalized_keyword in normalized_transcript: From 2bf8a32568bbbe4af4107efba5781d9e7b7a482f Mon Sep 17 00:00:00 2001 From: Atsushi Morimoto <74th.tech@gmail.com> Date: Sun, 17 May 2026 12:20:12 +0900 Subject: [PATCH 2/3] feat: Add scripts to run whisper server for small and large models --- .vscode/settings.json | 4 +++- misc/on_mac_demo/process-compose.yaml | 9 --------- .../run-whisper-server-large-turbo.sh | 16 ++++++++++++++++ misc/on_mac_demo/run-whisper-server-small.sh | 16 ++++++++++++++++ 4 files changed, 35 insertions(+), 10 deletions(-) delete mode 100644 misc/on_mac_demo/process-compose.yaml create mode 100755 misc/on_mac_demo/run-whisper-server-large-turbo.sh create mode 100755 misc/on_mac_demo/run-whisper-server-small.sh diff --git a/.vscode/settings.json b/.vscode/settings.json index c02be26..26c144c 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -12,5 +12,7 @@ "numeric": "cpp", "ostream": "cpp", "sstream": "cpp" - } + }, + "python.analysis.typeCheckingMode": "off", + "ty.interpreter": [".venv/bin/python"] } diff --git a/misc/on_mac_demo/process-compose.yaml b/misc/on_mac_demo/process-compose.yaml deleted file mode 100644 index a808d5d..0000000 --- a/misc/on_mac_demo/process-compose.yaml +++ /dev/null @@ -1,9 +0,0 @@ -version: "0.5" - -processes: - whisper-small: - command: |- - whisper-server --host 0.0.0.0 --port "8431" --model /opt/whisper.cpp/models/ggml-small.bin -l ja -nt -sns --vad -vm /opt/whisper.cpp/models/ggml-silero-v6.2.0.bin -vt "0.5" -vspd "100" -vsd "500" -vp "200" - whisper-large-turbo: - command: |- - whisper-server --host 0.0.0.0 --port "8432" --model /opt/whisper.cpp/models/ggml-large-v3-turbo-q8_0.bin -l ja -nt -sns --vad -vm /opt/whisper.cpp/models/ggml-silero-v6.2.0.bin -vt "0.5" -vspd "100" -vsd "500" -vp "200" diff --git a/misc/on_mac_demo/run-whisper-server-large-turbo.sh b/misc/on_mac_demo/run-whisper-server-large-turbo.sh new file mode 100755 index 0000000..1600567 --- /dev/null +++ b/misc/on_mac_demo/run-whisper-server-large-turbo.sh @@ -0,0 +1,16 @@ +#!/bin/bash +set -xe + +whisper-server \ + --host 0.0.0.0 \ + --port "8432" \ + --model /opt/whisper.cpp/models/ggml-large-v3-turbo-q8_0.bin \ + -l ja \ + -nt \ + -sns \ + --vad \ + -vm /opt/whisper.cpp/models/ggml-silero-v6.2.0.bin \ + -vt "0.5" \ + -vspd "100" \ + -vsd "500" \ + -vp "200" diff --git a/misc/on_mac_demo/run-whisper-server-small.sh b/misc/on_mac_demo/run-whisper-server-small.sh new file mode 100755 index 0000000..2e16c12 --- /dev/null +++ b/misc/on_mac_demo/run-whisper-server-small.sh @@ -0,0 +1,16 @@ +#!/bin/bash +set -xe + +whisper-server \ + --host 0.0.0.0 \ + --port "8431" \ + --model /opt/whisper.cpp/models/ggml-small.bin \ + -l ja \ + -nt \ + -sns \ + --vad \ + -vm /opt/whisper.cpp/models/ggml-silero-v6.2.0.bin \ + -vt "0.5" \ + -vspd "100" \ + -vsd "500" \ + -vp "200" From bef4fe658e20a9c77fb8297113d3507bab232d91 Mon Sep 17 00:00:00 2001 From: Atsushi Morimoto <74th.tech@gmail.com> Date: Sun, 17 May 2026 14:33:30 +0900 Subject: [PATCH 3/3] feat: Add README for macOS setup and update whisper server scripts --- misc/on_mac_demo/README.md | 29 +++++++++++++++++++ .../run-whisper-server-large-turbo.sh | 7 +++-- misc/on_mac_demo/run-whisper-server-small.sh | 2 +- 3 files changed, 34 insertions(+), 4 deletions(-) create mode 100644 misc/on_mac_demo/README.md diff --git a/misc/on_mac_demo/README.md b/misc/on_mac_demo/README.md new file mode 100644 index 0000000..5a51674 --- /dev/null +++ b/misc/on_mac_demo/README.md @@ -0,0 +1,29 @@ +need Xcode app + +``` +sudo xcode-select --switch /Applications/Xcode.app/Contents/Developer +``` + +``` +sudo git clone https://github.com/ggml-org/whisper.cpp /opt/whisper.cpp +sudo chown -R $(id -u):$(id -g) /opt/whisper.cpp + +cd /opt/whisper.cpp + +uv venv -p 3.11 +uv pip install ane_transformers openai-whisper coremltools +source .venv/bin/activate +``` + +``` +./models/generate-coreml-model.sh small +./models/download-ggml-model.sh small +./models/generate-coreml-model.sh large-v3-turbo +./models/download-ggml-model.sh large-v3-turbo +``` + +``` +# rm -rf build +cmake -B build -DWHISPER_COREML=ON -DWHISPER_FFMPEG=ON -DGGML_NATIVE=OFF +cmake --build build -j --config Release +``` diff --git a/misc/on_mac_demo/run-whisper-server-large-turbo.sh b/misc/on_mac_demo/run-whisper-server-large-turbo.sh index 1600567..40b4be9 100755 --- a/misc/on_mac_demo/run-whisper-server-large-turbo.sh +++ b/misc/on_mac_demo/run-whisper-server-large-turbo.sh @@ -1,10 +1,10 @@ #!/bin/bash set -xe -whisper-server \ +/opt/whisper.cpp/build/bin/whisper-server \ --host 0.0.0.0 \ --port "8432" \ - --model /opt/whisper.cpp/models/ggml-large-v3-turbo-q8_0.bin \ + --model /opt/whisper.cpp/models/ggml-large-v3-turbo.bin \ -l ja \ -nt \ -sns \ @@ -13,4 +13,5 @@ whisper-server \ -vt "0.5" \ -vspd "100" \ -vsd "500" \ - -vp "200" + -vp "200" \ + --convert diff --git a/misc/on_mac_demo/run-whisper-server-small.sh b/misc/on_mac_demo/run-whisper-server-small.sh index 2e16c12..94217c0 100755 --- a/misc/on_mac_demo/run-whisper-server-small.sh +++ b/misc/on_mac_demo/run-whisper-server-small.sh @@ -1,7 +1,7 @@ #!/bin/bash set -xe -whisper-server \ +/opt/whisper.cpp/build/bin/whisper-server \ --host 0.0.0.0 \ --port "8431" \ --model /opt/whisper.cpp/models/ggml-small.bin \