From 06d1e70eeb319e6e7f1430f3f947e642fd5475f8 Mon Sep 17 00:00:00 2001
From: Atsushi Morimoto <74th.tech@gmail.com>
Date: Sun, 17 May 2026 11:44:29 +0900
Subject: [PATCH 1/3] feat: Add process configuration for whisper server and
 update wake word detection keywords

---
 misc/on_mac_demo/process-compose.yaml         |  9 +++++++
 .../speech_recognition/whisper_server.py      | 26 +++++++++----------
 .../wakeup_word_detection/whisper_server.py   |  5 +++-
 3 files changed, 26 insertions(+), 14 deletions(-)
 create mode 100644 misc/on_mac_demo/process-compose.yaml

diff --git a/misc/on_mac_demo/process-compose.yaml b/misc/on_mac_demo/process-compose.yaml
new file mode 100644
index 0000000..a808d5d
--- /dev/null
+++ b/misc/on_mac_demo/process-compose.yaml
@@ -0,0 +1,9 @@
+version: "0.5"
+
+processes:
+  whisper-small:
+      command: |-
+        whisper-server --host 0.0.0.0 --port "8431" --model /opt/whisper.cpp/models/ggml-small.bin -l ja -nt -sns --vad -vm /opt/whisper.cpp/models/ggml-silero-v6.2.0.bin -vt "0.5" -vspd "100" -vsd "500" -vp "200"
+  whisper-large-turbo:
+      command: |-
+        whisper-server --host 0.0.0.0 --port "8432" --model /opt/whisper.cpp/models/ggml-large-v3-turbo-q8_0.bin -l ja -nt -sns --vad -vm /opt/whisper.cpp/models/ggml-silero-v6.2.0.bin -vt "0.5" -vspd "100" -vsd "500" -vp "200"
diff --git a/stackchan_server/speech_recognition/whisper_server.py b/stackchan_server/speech_recognition/whisper_server.py
index d508cb5..c992ab1 100644
--- a/stackchan_server/speech_recognition/whisper_server.py
+++ b/stackchan_server/speech_recognition/whisper_server.py
@@ -43,16 +43,16 @@ def __init__(
         *,
         config: WhisperServerSpeechToTextConfig | None = None,
     ) -> None:
-        self._conf = config or WhisperServerSpeechToTextConfig()
-        self._server_url = self._conf.url
+        self.config = config or WhisperServerSpeechToTextConfig()
+        self._server_url = self.config.url
 
     async def transcribe(self, pcm_bytes: bytes) -> str:
         rms_level = _pcm_rms_level(pcm_bytes)
-        if rms_level < self._conf.silence_rms_threshold:
+        if rms_level < self.config.silence_rms_threshold:
             logger.info(
                 "Skipping whisper-server transcription because pcm rms %.2f is below silence threshold %.2f",
                 rms_level,
-                self._conf.silence_rms_threshold,
+                self.config.silence_rms_threshold,
             )
             return ""
 
@@ -65,7 +65,7 @@ async def transcribe(self, pcm_bytes: bytes) -> str:
         transcript = await asyncio.to_thread(
             self._request_transcript,
             wav_bytes,
-            self._conf.language,
+            self.config.language,
         )
         if transcript:
             logger.info("whisper-server transcript: %s", transcript)
@@ -73,20 +73,20 @@ async def transcribe(self, pcm_bytes: bytes) -> str:
 
     def _request_transcript(self, wav_bytes: bytes, language: str) -> str:
         fields = {
-            "response_format": self._conf.response_format,
+            "response_format": self.config.response_format,
         }
 
         normalized_language = language.strip()
         if normalized_language:
             fields["language"] = normalized_language
 
-        if self._conf.prompt:
-            fields["prompt"] = self._conf.prompt
+        if self.config.prompt:
+            fields["prompt"] = self.config.prompt
 
-        if self._conf.model:
-            fields["model"] = self._conf.model
+        if self.config.model:
+            fields["model"] = self.config.model
 
-        if self._conf.detect_language:
+        if self.config.detect_language:
             fields["detect_language"] = "true"
 
         body, content_type = _encode_multipart_formdata(
@@ -102,7 +102,7 @@ def _request_transcript(self, wav_bytes: bytes, language: str) -> str:
         logger.info("Running whisper-server request: POST %s", self._server_url)
         try:
             with urlopen(
-                request, timeout=self._conf.request_timeout_seconds
+                request, timeout=self.config.request_timeout_seconds
             ) as response:
                 response_body = response.read()
         except HTTPError as exc:
@@ -113,7 +113,7 @@ def _request_transcript(self, wav_bytes: bytes, language: str) -> str:
         except URLError as exc:
             raise RuntimeError(f"whisper-server request failed: {exc.reason}") from exc
 
-        if self._conf.response_format == "json":
+        if self.config.response_format == "json":
             payload = _load_json_response_bytes(response_body)
             if not isinstance(payload, Mapping):
                 return ""
diff --git a/stackchan_server/wakeup_word_detection/whisper_server.py b/stackchan_server/wakeup_word_detection/whisper_server.py
index 6b25fe6..38a3703 100644
--- a/stackchan_server/wakeup_word_detection/whisper_server.py
+++ b/stackchan_server/wakeup_word_detection/whisper_server.py
@@ -25,7 +25,7 @@ class WakeWordDetectionTimeout(WakeWordDetectionError):
 
 
 class WhisperServerWakeWordDetectorConfig(BaseSettings):
-    keywords: list[str] = Field(default_factory=lambda: ["スタックチャン"])
+    keywords: list[str] = Field(default_factory=lambda: ["ハイスタックチャン"])
     window_seconds: float = 3.0
     interval_seconds: float = 0.5
     timeout_seconds: float = 300.0
@@ -192,6 +192,9 @@ def _contains_wake_word(self, transcript: str) -> bool:
         if not normalized_transcript:
             return False
 
+        if self.recognizer.config.prompt in normalized_transcript:
+            # If the prompt is included in the transcript, it may indicate that the transcription is not accurate or that the model is confused. In this case, we choose to ignore the transcript to avoid false positives.
+            return False
         for keyword in self.config.keywords:
             normalized_keyword = _normalize_text(keyword)
             if normalized_keyword and normalized_keyword in normalized_transcript:

From 2bf8a32568bbbe4af4107efba5781d9e7b7a482f Mon Sep 17 00:00:00 2001
From: Atsushi Morimoto <74th.tech@gmail.com>
Date: Sun, 17 May 2026 12:20:12 +0900
Subject: [PATCH 2/3] feat: Add scripts to run whisper server for small and
 large models

---
 .vscode/settings.json                            |  4 +++-
 misc/on_mac_demo/process-compose.yaml            |  9 ---------
 .../run-whisper-server-large-turbo.sh            | 16 ++++++++++++++++
 misc/on_mac_demo/run-whisper-server-small.sh     | 16 ++++++++++++++++
 4 files changed, 35 insertions(+), 10 deletions(-)
 delete mode 100644 misc/on_mac_demo/process-compose.yaml
 create mode 100755 misc/on_mac_demo/run-whisper-server-large-turbo.sh
 create mode 100755 misc/on_mac_demo/run-whisper-server-small.sh

diff --git a/.vscode/settings.json b/.vscode/settings.json
index c02be26..26c144c 100644
--- a/.vscode/settings.json
+++ b/.vscode/settings.json
@@ -12,5 +12,7 @@
         "numeric": "cpp",
         "ostream": "cpp",
         "sstream": "cpp"
-    }
+    },
+    "python.analysis.typeCheckingMode": "off",
+    "ty.interpreter": [".venv/bin/python"]
 }
diff --git a/misc/on_mac_demo/process-compose.yaml b/misc/on_mac_demo/process-compose.yaml
deleted file mode 100644
index a808d5d..0000000
--- a/misc/on_mac_demo/process-compose.yaml
+++ /dev/null
@@ -1,9 +0,0 @@
-version: "0.5"
-
-processes:
-  whisper-small:
-      command: |-
-        whisper-server --host 0.0.0.0 --port "8431" --model /opt/whisper.cpp/models/ggml-small.bin -l ja -nt -sns --vad -vm /opt/whisper.cpp/models/ggml-silero-v6.2.0.bin -vt "0.5" -vspd "100" -vsd "500" -vp "200"
-  whisper-large-turbo:
-      command: |-
-        whisper-server --host 0.0.0.0 --port "8432" --model /opt/whisper.cpp/models/ggml-large-v3-turbo-q8_0.bin -l ja -nt -sns --vad -vm /opt/whisper.cpp/models/ggml-silero-v6.2.0.bin -vt "0.5" -vspd "100" -vsd "500" -vp "200"
diff --git a/misc/on_mac_demo/run-whisper-server-large-turbo.sh b/misc/on_mac_demo/run-whisper-server-large-turbo.sh
new file mode 100755
index 0000000..1600567
--- /dev/null
+++ b/misc/on_mac_demo/run-whisper-server-large-turbo.sh
@@ -0,0 +1,16 @@
+#!/bin/bash
+set -xe
+
+whisper-server \
+    --host 0.0.0.0 \
+    --port "8432" \
+    --model /opt/whisper.cpp/models/ggml-large-v3-turbo-q8_0.bin \
+    -l ja \
+    -nt \
+    -sns \
+    --vad \
+    -vm /opt/whisper.cpp/models/ggml-silero-v6.2.0.bin \
+    -vt "0.5" \
+    -vspd "100" \
+    -vsd "500" \
+    -vp "200"
diff --git a/misc/on_mac_demo/run-whisper-server-small.sh b/misc/on_mac_demo/run-whisper-server-small.sh
new file mode 100755
index 0000000..2e16c12
--- /dev/null
+++ b/misc/on_mac_demo/run-whisper-server-small.sh
@@ -0,0 +1,16 @@
+#!/bin/bash
+set -xe
+
+whisper-server \
+    --host 0.0.0.0 \
+    --port "8431" \
+    --model /opt/whisper.cpp/models/ggml-small.bin \
+    -l ja \
+    -nt \
+    -sns \
+    --vad \
+    -vm /opt/whisper.cpp/models/ggml-silero-v6.2.0.bin \
+    -vt "0.5" \
+    -vspd "100" \
+    -vsd "500" \
+    -vp "200"

From bef4fe658e20a9c77fb8297113d3507bab232d91 Mon Sep 17 00:00:00 2001
From: Atsushi Morimoto <74th.tech@gmail.com>
Date: Sun, 17 May 2026 14:33:30 +0900
Subject: [PATCH 3/3] feat: Add README for macOS setup and update whisper
 server scripts

---
 misc/on_mac_demo/README.md                    | 29 +++++++++++++++++++
 .../run-whisper-server-large-turbo.sh         |  7 +++--
 misc/on_mac_demo/run-whisper-server-small.sh  |  2 +-
 3 files changed, 34 insertions(+), 4 deletions(-)
 create mode 100644 misc/on_mac_demo/README.md

diff --git a/misc/on_mac_demo/README.md b/misc/on_mac_demo/README.md
new file mode 100644
index 0000000..5a51674
--- /dev/null
+++ b/misc/on_mac_demo/README.md
@@ -0,0 +1,29 @@
+need Xcode app
+
+```
+sudo xcode-select --switch /Applications/Xcode.app/Contents/Developer
+```
+
+```
+sudo git clone https://github.com/ggml-org/whisper.cpp /opt/whisper.cpp
+sudo chown -R $(id -u):$(id -g) /opt/whisper.cpp
+
+cd /opt/whisper.cpp
+
+uv venv -p 3.11
+uv pip install ane_transformers openai-whisper coremltools
+source .venv/bin/activate
+```
+
+```
+./models/generate-coreml-model.sh small
+./models/download-ggml-model.sh small
+./models/generate-coreml-model.sh large-v3-turbo
+./models/download-ggml-model.sh large-v3-turbo
+```
+
+```
+# rm -rf build
+cmake -B build -DWHISPER_COREML=ON -DWHISPER_FFMPEG=ON -DGGML_NATIVE=OFF
+cmake --build build -j --config Release
+```
diff --git a/misc/on_mac_demo/run-whisper-server-large-turbo.sh b/misc/on_mac_demo/run-whisper-server-large-turbo.sh
index 1600567..40b4be9 100755
--- a/misc/on_mac_demo/run-whisper-server-large-turbo.sh
+++ b/misc/on_mac_demo/run-whisper-server-large-turbo.sh
@@ -1,10 +1,10 @@
 #!/bin/bash
 set -xe
 
-whisper-server \
+/opt/whisper.cpp/build/bin/whisper-server \
     --host 0.0.0.0 \
     --port "8432" \
-    --model /opt/whisper.cpp/models/ggml-large-v3-turbo-q8_0.bin \
+    --model /opt/whisper.cpp/models/ggml-large-v3-turbo.bin \
     -l ja \
     -nt \
     -sns \
@@ -13,4 +13,5 @@ whisper-server \
     -vt "0.5" \
     -vspd "100" \
     -vsd "500" \
-    -vp "200"
+    -vp "200" \
+    --convert
diff --git a/misc/on_mac_demo/run-whisper-server-small.sh b/misc/on_mac_demo/run-whisper-server-small.sh
index 2e16c12..94217c0 100755
--- a/misc/on_mac_demo/run-whisper-server-small.sh
+++ b/misc/on_mac_demo/run-whisper-server-small.sh
@@ -1,7 +1,7 @@
 #!/bin/bash
 set -xe
 
-whisper-server \
+/opt/whisper.cpp/build/bin/whisper-server \
     --host 0.0.0.0 \
     --port "8431" \
     --model /opt/whisper.cpp/models/ggml-small.bin \