diff --git a/.vscode/settings.json b/.vscode/settings.json index c02be26..b7cce9f 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -11,6 +11,7 @@ "istream": "cpp", "numeric": "cpp", "ostream": "cpp", - "sstream": "cpp" + "sstream": "cpp", + "chrono": "cpp" } } diff --git a/README.md b/README.md index 1d5e197..6ece298 100644 --- a/README.md +++ b/README.md @@ -88,6 +88,14 @@ async def talk_session(proxy: WsProxy): await proxy.speak(resp.text) ``` +`StackChanApp()` は既定で、WebSocket 接続直後に WakeWord 検出通知音をデバイスへ送信しようとします。 +送信する音は環境変数 `STACKCHAN_WAKEWORD_SOUND_PATH` で指定した WAV ファイルから読み込みます。 +読み込んだ WAV は送信前に 16-bit PCM / 24kHz / mono へ正規化されます。 +さらに短い通知音でも再生しやすいよう、送信前に前後へ短い無音を付与し、最小再生長を確保します。 +この値が未設定なら通知音は送信されません。 +送信された音はデバイス側で SPIFFS に保存され、WakeWord 検出時にローカル再生されます。 +接続時送信の機能自体を無効化したい場合は `StackChanApp(send_wakeword_sound_on_connect=False)` を使ってください。 + ## セットアップ 以下を確認ください。 @@ -101,7 +109,7 @@ async def talk_session(proxy: WsProxy): - M5Stack CoreS3(SKU:K128, K128-Lite, K128-SE) - M5Stack Atom S3R(SKU:C126) + Atomic Echo Base(SKU:A149) - M5Stack公式StackChan(SKU:K151) - + - M5Stack Atom EchoS3R - サーボ(なくても動作します): - Tower Pro SG90 - FEETECH SCS0009 diff --git a/docs/websocket_protocols_ja.md b/docs/websocket_protocols_ja.md index b816c13..ee765e2 100644 --- a/docs/websocket_protocols_ja.md +++ b/docs/websocket_protocols_ja.md @@ -35,6 +35,7 @@ | `SpeakDoneEvt` | CoreS3 → Server | 音声再生完了通知 | | `ServoCmd` | Server → CoreS3 | サーボ動作シーケンス指示 | | `ServoDoneEvt` | CoreS3 → Server | サーボ動作完了通知 | +| `StoredFile` | Server → CoreS3 | SPIFFS 保存用の汎用ファイル転送 | ### `MessageType` 一覧 @@ -136,6 +137,29 @@ - CoreS3 側の音声再生完了を通知します。 - Server はこの通知を待って `proxy.speak()` を完了させます。 +## 保存ファイル転送 `StoredFile` + +- 方向: Server → CoreS3 +- 用途: バイナリファイルを WebSocket 経由で配布し、CoreS3 側で SPIFFS に保存するための汎用転送です。 +- 1 転送の流れは `StoredFileStart` → `FileChunk` 複数回 → `StoredFileEnd` です。 + +### body 形式 + +| messageType | body | +| --- | --- | +| `START` | `StoredFileStart { file_id, content_type, total_size, sample_rate, channels }` | +| `DATA` | `FileChunk { chunk_bytes }` | +| `END` | `StoredFileEnd {}` | + +### 現行実装メモ + +- `file_id` はファイルの論理名です。現在の実装では `wakeword-detected-sound` が WakeUpWord 検出音に使われます。 +- `content_type` は現在 `audio/pcm` をサポートします。 +- `sample_rate` / `channels` は PCM 再生用の追加メタデータです。 +- CoreS3 は受信完了後に SPIFFS へ保存し、**その WebSocket 接続中に受信したファイルだけ** を有効化します。 +- 再接続後にサーバーが同じファイルを再送しない場合、SPIFFS に過去データが残っていても再生には使いません。 +- WakeUpWord 検出時は、該当サウンドが現在の接続で有効になっている場合のみローカル再生してから `WakeWordEvt` を送信します。 + ## サーボ動作指示 `ServoCmd` - 方向: Server → CoreS3 diff --git a/firmware/include/protocols.hpp b/firmware/include/protocols.hpp index 89cb7d6..c5de57f 100644 --- a/firmware/include/protocols.hpp +++ b/firmware/include/protocols.hpp @@ -8,6 +8,7 @@ #include "../lib/generated_protobuf/websocket-message.pb.h" constexpr size_t kProtoAudioChunkMaxBytes = 4096; +constexpr size_t kProtoFileChunkMaxBytes = 4096; constexpr size_t kProtoServoCommandMaxCount = 255; constexpr size_t kMaxEncodedWebSocketMessageBytes = stackchan_websocket_v1_WebSocketMessage_size; @@ -18,6 +19,13 @@ bool setProtoAudioChunk( const uint8_t *getProtoAudioChunkBytes(const stackchan_websocket_v1_AudioChunk &chunk); size_t getProtoAudioChunkSize(const stackchan_websocket_v1_AudioChunk &chunk); +bool setProtoFileChunk( + stackchan_websocket_v1_FileChunk &chunk, + const uint8_t *data, + size_t data_len); +const uint8_t *getProtoFileChunkBytes(const stackchan_websocket_v1_FileChunk &chunk); +size_t getProtoFileChunkSize(const stackchan_websocket_v1_FileChunk &chunk); + bool encodeWebSocketMessage( const stackchan_websocket_v1_WebSocketMessage &message, std::vector &encoded); diff --git a/firmware/include/stored_files.hpp b/firmware/include/stored_files.hpp new file mode 100644 index 0000000..7778d56 --- /dev/null +++ b/firmware/include/stored_files.hpp @@ -0,0 +1,73 @@ +#pragma once + +#include +#include +#include +#include + +#include "protocols.hpp" + +struct StoredFileView +{ + const uint8_t *data = nullptr; + size_t size = 0; + uint32_t sample_rate = 0; + uint16_t channels = 0; +}; + +class StoredFiles +{ +public: + void init(); + void resetSession(); + + bool handleStart(uint32_t seq, const stackchan_websocket_v1_StoredFileStart &start); + bool handleData(uint32_t seq, const uint8_t *data, size_t data_len); + bool handleEnd(uint32_t seq); + + bool getActivePcmFile(const char *fileId, StoredFileView &view); + +private: + static constexpr size_t kMaxStoredFiles = 4; + static constexpr size_t kMaxStoredFileBytes = 256 * 1024; + + struct PersistedSlot + { + bool used = false; + char file_id[64] = ""; + char content_type[64] = ""; + uint32_t sample_rate = 0; + uint32_t channels = 0; + uint32_t size = 0; + }; + + struct TransferState + { + bool active = false; + uint32_t next_seq = 0; + int slot_index = -1; + uint32_t received_bytes = 0; + uint32_t chunk_count = 0; + PersistedSlot slot{}; + std::vector payload; + }; + + bool storage_ready_ = false; + std::array slots_{}; + std::array session_active_{}; + int cached_slot_index_ = -1; + std::vector cached_payload_; + TransferState transfer_; + + bool mountSpiffs(); + bool loadIndex(); + bool persistIndex(); + bool persistSlotPayload(int slotIndex, const std::vector &payload); + bool loadSlotPayload(int slotIndex, std::vector &payload); + int findSlotById(const char *fileId) const; + int selectSlotForId(const char *fileId); + void resetTransfer(); + bool activateSlot(int slotIndex, const std::vector &payload); + static const char *payloadPathForSlot(int slotIndex); + static const char *indexPath(); +}; diff --git a/firmware/lib/generated_protobuf/websocket-message.pb.c b/firmware/lib/generated_protobuf/websocket-message.pb.c index f70a79e..2d976af 100644 --- a/firmware/lib/generated_protobuf/websocket-message.pb.c +++ b/firmware/lib/generated_protobuf/websocket-message.pb.c @@ -24,6 +24,9 @@ PB_BIND(stackchan_websocket_v1_AudioWavEnd, stackchan_websocket_v1_AudioWavEnd, PB_BIND(stackchan_websocket_v1_AudioChunk, stackchan_websocket_v1_AudioChunk, 4) +PB_BIND(stackchan_websocket_v1_FileChunk, stackchan_websocket_v1_FileChunk, 4) + + PB_BIND(stackchan_websocket_v1_StateCommand, stackchan_websocket_v1_StateCommand, AUTO) @@ -36,6 +39,12 @@ PB_BIND(stackchan_websocket_v1_StateEvent, stackchan_websocket_v1_StateEvent, AU PB_BIND(stackchan_websocket_v1_SpeakDoneEvent, stackchan_websocket_v1_SpeakDoneEvent, AUTO) +PB_BIND(stackchan_websocket_v1_StoredFileStart, stackchan_websocket_v1_StoredFileStart, AUTO) + + +PB_BIND(stackchan_websocket_v1_StoredFileEnd, stackchan_websocket_v1_StoredFileEnd, AUTO) + + PB_BIND(stackchan_websocket_v1_ServoCommandSequence, stackchan_websocket_v1_ServoCommandSequence, 2) diff --git a/firmware/lib/generated_protobuf/websocket-message.pb.h b/firmware/lib/generated_protobuf/websocket-message.pb.h index 8e0c222..a668a7e 100644 --- a/firmware/lib/generated_protobuf/websocket-message.pb.h +++ b/firmware/lib/generated_protobuf/websocket-message.pb.h @@ -21,7 +21,8 @@ typedef enum _stackchan_websocket_v1_MessageKind { stackchan_websocket_v1_MessageKind_MESSAGE_KIND_SERVO_CMD = 7, stackchan_websocket_v1_MessageKind_MESSAGE_KIND_SERVO_DONE_EVT = 8, stackchan_websocket_v1_MessageKind_MESSAGE_KIND_FIRMWARE_METADATA = 9, - stackchan_websocket_v1_MessageKind_MESSAGE_KIND_SERVER_METADATA = 10 + stackchan_websocket_v1_MessageKind_MESSAGE_KIND_SERVER_METADATA = 10, + stackchan_websocket_v1_MessageKind_MESSAGE_KIND_STORED_FILE = 11 } stackchan_websocket_v1_MessageKind; typedef enum _stackchan_websocket_v1_MessageType { @@ -81,6 +82,11 @@ typedef struct _stackchan_websocket_v1_AudioChunk { stackchan_websocket_v1_AudioChunk_pcm_bytes_t pcm_bytes; } stackchan_websocket_v1_AudioChunk; +typedef PB_BYTES_ARRAY_T(4096) stackchan_websocket_v1_FileChunk_chunk_bytes_t; +typedef struct _stackchan_websocket_v1_FileChunk { + stackchan_websocket_v1_FileChunk_chunk_bytes_t chunk_bytes; +} stackchan_websocket_v1_FileChunk; + typedef struct _stackchan_websocket_v1_StateCommand { stackchan_websocket_v1_StackchanState state; } stackchan_websocket_v1_StateCommand; @@ -97,6 +103,18 @@ typedef struct _stackchan_websocket_v1_SpeakDoneEvent { bool done; } stackchan_websocket_v1_SpeakDoneEvent; +typedef struct _stackchan_websocket_v1_StoredFileStart { + char file_id[64]; + char content_type[64]; + uint32_t total_size; + uint32_t sample_rate; + uint32_t channels; +} stackchan_websocket_v1_StoredFileStart; + +typedef struct _stackchan_websocket_v1_StoredFileEnd { + char dummy_field; +} stackchan_websocket_v1_StoredFileEnd; + typedef struct _stackchan_websocket_v1_ServoCommand { stackchan_websocket_v1_ServoOperation op; int32_t angle; /* used by MOVE_X / MOVE_Y */ @@ -155,6 +173,9 @@ typedef struct _stackchan_websocket_v1_WebSocketMessage { stackchan_websocket_v1_ServoDoneEvent servo_done_evt; stackchan_websocket_v1_FirmwareMetadata firmware_metadata; stackchan_websocket_v1_ServerMetadata server_metadata; + stackchan_websocket_v1_StoredFileStart stored_file_start; + stackchan_websocket_v1_FileChunk stored_file_data; + stackchan_websocket_v1_StoredFileEnd stored_file_end; } body; } stackchan_websocket_v1_WebSocketMessage; @@ -165,8 +186,8 @@ extern "C" { /* Helper constants for enums */ #define _stackchan_websocket_v1_MessageKind_MIN stackchan_websocket_v1_MessageKind_MESSAGE_KIND_UNSPECIFIED -#define _stackchan_websocket_v1_MessageKind_MAX stackchan_websocket_v1_MessageKind_MESSAGE_KIND_SERVER_METADATA -#define _stackchan_websocket_v1_MessageKind_ARRAYSIZE ((stackchan_websocket_v1_MessageKind)(stackchan_websocket_v1_MessageKind_MESSAGE_KIND_SERVER_METADATA+1)) +#define _stackchan_websocket_v1_MessageKind_MAX stackchan_websocket_v1_MessageKind_MESSAGE_KIND_STORED_FILE +#define _stackchan_websocket_v1_MessageKind_ARRAYSIZE ((stackchan_websocket_v1_MessageKind)(stackchan_websocket_v1_MessageKind_MESSAGE_KIND_STORED_FILE+1)) #define _stackchan_websocket_v1_MessageType_MIN stackchan_websocket_v1_MessageType_MESSAGE_TYPE_UNSPECIFIED #define _stackchan_websocket_v1_MessageType_MAX stackchan_websocket_v1_MessageType_MESSAGE_TYPE_END @@ -196,6 +217,7 @@ extern "C" { + #define stackchan_websocket_v1_StateCommand_state_ENUMTYPE stackchan_websocket_v1_StackchanState @@ -203,6 +225,8 @@ extern "C" { + + #define stackchan_websocket_v1_ServoCommand_op_ENUMTYPE stackchan_websocket_v1_ServoOperation @@ -218,10 +242,13 @@ extern "C" { #define stackchan_websocket_v1_AudioWavStart_init_default {0, 0} #define stackchan_websocket_v1_AudioWavEnd_init_default {0} #define stackchan_websocket_v1_AudioChunk_init_default {{0, {0}}} +#define stackchan_websocket_v1_FileChunk_init_default {{0, {0}}} #define stackchan_websocket_v1_StateCommand_init_default {_stackchan_websocket_v1_StackchanState_MIN} #define stackchan_websocket_v1_WakeWordEvent_init_default {0} #define stackchan_websocket_v1_StateEvent_init_default {_stackchan_websocket_v1_StackchanState_MIN} #define stackchan_websocket_v1_SpeakDoneEvent_init_default {0} +#define stackchan_websocket_v1_StoredFileStart_init_default {"", "", 0, 0, 0} +#define stackchan_websocket_v1_StoredFileEnd_init_default {0} #define stackchan_websocket_v1_ServoCommandSequence_init_default {0, {stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default}} #define stackchan_websocket_v1_ServoCommand_init_default {_stackchan_websocket_v1_ServoOperation_MIN, 0, 0} #define stackchan_websocket_v1_ServoDoneEvent_init_default {0} @@ -233,10 +260,13 @@ extern "C" { #define stackchan_websocket_v1_AudioWavStart_init_zero {0, 0} #define stackchan_websocket_v1_AudioWavEnd_init_zero {0} #define stackchan_websocket_v1_AudioChunk_init_zero {{0, {0}}} +#define stackchan_websocket_v1_FileChunk_init_zero {{0, {0}}} #define stackchan_websocket_v1_StateCommand_init_zero {_stackchan_websocket_v1_StackchanState_MIN} #define stackchan_websocket_v1_WakeWordEvent_init_zero {0} #define stackchan_websocket_v1_StateEvent_init_zero {_stackchan_websocket_v1_StackchanState_MIN} #define stackchan_websocket_v1_SpeakDoneEvent_init_zero {0} +#define stackchan_websocket_v1_StoredFileStart_init_zero {"", "", 0, 0, 0} +#define stackchan_websocket_v1_StoredFileEnd_init_zero {0} #define stackchan_websocket_v1_ServoCommandSequence_init_zero {0, {stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero}} #define stackchan_websocket_v1_ServoCommand_init_zero {_stackchan_websocket_v1_ServoOperation_MIN, 0, 0} #define stackchan_websocket_v1_ServoDoneEvent_init_zero {0} @@ -247,10 +277,16 @@ extern "C" { #define stackchan_websocket_v1_AudioWavStart_sample_rate_tag 1 #define stackchan_websocket_v1_AudioWavStart_channels_tag 2 #define stackchan_websocket_v1_AudioChunk_pcm_bytes_tag 1 +#define stackchan_websocket_v1_FileChunk_chunk_bytes_tag 1 #define stackchan_websocket_v1_StateCommand_state_tag 1 #define stackchan_websocket_v1_WakeWordEvent_detected_tag 1 #define stackchan_websocket_v1_StateEvent_state_tag 1 #define stackchan_websocket_v1_SpeakDoneEvent_done_tag 1 +#define stackchan_websocket_v1_StoredFileStart_file_id_tag 1 +#define stackchan_websocket_v1_StoredFileStart_content_type_tag 2 +#define stackchan_websocket_v1_StoredFileStart_total_size_tag 3 +#define stackchan_websocket_v1_StoredFileStart_sample_rate_tag 4 +#define stackchan_websocket_v1_StoredFileStart_channels_tag 5 #define stackchan_websocket_v1_ServoCommand_op_tag 1 #define stackchan_websocket_v1_ServoCommand_angle_tag 2 #define stackchan_websocket_v1_ServoCommand_duration_ms_tag 3 @@ -283,6 +319,9 @@ extern "C" { #define stackchan_websocket_v1_WebSocketMessage_servo_done_evt_tag 35 #define stackchan_websocket_v1_WebSocketMessage_firmware_metadata_tag 36 #define stackchan_websocket_v1_WebSocketMessage_server_metadata_tag 37 +#define stackchan_websocket_v1_WebSocketMessage_stored_file_start_tag 40 +#define stackchan_websocket_v1_WebSocketMessage_stored_file_data_tag 41 +#define stackchan_websocket_v1_WebSocketMessage_stored_file_end_tag 42 /* Struct field encoding specification for nanopb */ #define stackchan_websocket_v1_WebSocketMessage_FIELDLIST(X, a) \ @@ -302,7 +341,10 @@ X(a, STATIC, ONEOF, MESSAGE, (body,speak_done_evt,body.speak_done_evt), 3 X(a, STATIC, ONEOF, MESSAGE, (body,servo_cmd,body.servo_cmd), 34) \ X(a, STATIC, ONEOF, MESSAGE, (body,servo_done_evt,body.servo_done_evt), 35) \ X(a, STATIC, ONEOF, MESSAGE, (body,firmware_metadata,body.firmware_metadata), 36) \ -X(a, STATIC, ONEOF, MESSAGE, (body,server_metadata,body.server_metadata), 37) +X(a, STATIC, ONEOF, MESSAGE, (body,server_metadata,body.server_metadata), 37) \ +X(a, STATIC, ONEOF, MESSAGE, (body,stored_file_start,body.stored_file_start), 40) \ +X(a, STATIC, ONEOF, MESSAGE, (body,stored_file_data,body.stored_file_data), 41) \ +X(a, STATIC, ONEOF, MESSAGE, (body,stored_file_end,body.stored_file_end), 42) #define stackchan_websocket_v1_WebSocketMessage_CALLBACK NULL #define stackchan_websocket_v1_WebSocketMessage_DEFAULT NULL #define stackchan_websocket_v1_WebSocketMessage_body_audio_pcm_start_MSGTYPE stackchan_websocket_v1_AudioPcmStart @@ -319,6 +361,9 @@ X(a, STATIC, ONEOF, MESSAGE, (body,server_metadata,body.server_metadata), #define stackchan_websocket_v1_WebSocketMessage_body_servo_done_evt_MSGTYPE stackchan_websocket_v1_ServoDoneEvent #define stackchan_websocket_v1_WebSocketMessage_body_firmware_metadata_MSGTYPE stackchan_websocket_v1_FirmwareMetadata #define stackchan_websocket_v1_WebSocketMessage_body_server_metadata_MSGTYPE stackchan_websocket_v1_ServerMetadata +#define stackchan_websocket_v1_WebSocketMessage_body_stored_file_start_MSGTYPE stackchan_websocket_v1_StoredFileStart +#define stackchan_websocket_v1_WebSocketMessage_body_stored_file_data_MSGTYPE stackchan_websocket_v1_FileChunk +#define stackchan_websocket_v1_WebSocketMessage_body_stored_file_end_MSGTYPE stackchan_websocket_v1_StoredFileEnd #define stackchan_websocket_v1_AudioPcmStart_FIELDLIST(X, a) \ @@ -346,6 +391,11 @@ X(a, STATIC, SINGULAR, BYTES, pcm_bytes, 1) #define stackchan_websocket_v1_AudioChunk_CALLBACK NULL #define stackchan_websocket_v1_AudioChunk_DEFAULT NULL +#define stackchan_websocket_v1_FileChunk_FIELDLIST(X, a) \ +X(a, STATIC, SINGULAR, BYTES, chunk_bytes, 1) +#define stackchan_websocket_v1_FileChunk_CALLBACK NULL +#define stackchan_websocket_v1_FileChunk_DEFAULT NULL + #define stackchan_websocket_v1_StateCommand_FIELDLIST(X, a) \ X(a, STATIC, SINGULAR, UENUM, state, 1) #define stackchan_websocket_v1_StateCommand_CALLBACK NULL @@ -366,6 +416,20 @@ X(a, STATIC, SINGULAR, BOOL, done, 1) #define stackchan_websocket_v1_SpeakDoneEvent_CALLBACK NULL #define stackchan_websocket_v1_SpeakDoneEvent_DEFAULT NULL +#define stackchan_websocket_v1_StoredFileStart_FIELDLIST(X, a) \ +X(a, STATIC, SINGULAR, STRING, file_id, 1) \ +X(a, STATIC, SINGULAR, STRING, content_type, 2) \ +X(a, STATIC, SINGULAR, UINT32, total_size, 3) \ +X(a, STATIC, SINGULAR, UINT32, sample_rate, 4) \ +X(a, STATIC, SINGULAR, UINT32, channels, 5) +#define stackchan_websocket_v1_StoredFileStart_CALLBACK NULL +#define stackchan_websocket_v1_StoredFileStart_DEFAULT NULL + +#define stackchan_websocket_v1_StoredFileEnd_FIELDLIST(X, a) \ + +#define stackchan_websocket_v1_StoredFileEnd_CALLBACK NULL +#define stackchan_websocket_v1_StoredFileEnd_DEFAULT NULL + #define stackchan_websocket_v1_ServoCommandSequence_FIELDLIST(X, a) \ X(a, STATIC, REPEATED, MESSAGE, commands, 1) #define stackchan_websocket_v1_ServoCommandSequence_CALLBACK NULL @@ -408,10 +472,13 @@ extern const pb_msgdesc_t stackchan_websocket_v1_AudioPcmEnd_msg; extern const pb_msgdesc_t stackchan_websocket_v1_AudioWavStart_msg; extern const pb_msgdesc_t stackchan_websocket_v1_AudioWavEnd_msg; extern const pb_msgdesc_t stackchan_websocket_v1_AudioChunk_msg; +extern const pb_msgdesc_t stackchan_websocket_v1_FileChunk_msg; extern const pb_msgdesc_t stackchan_websocket_v1_StateCommand_msg; extern const pb_msgdesc_t stackchan_websocket_v1_WakeWordEvent_msg; extern const pb_msgdesc_t stackchan_websocket_v1_StateEvent_msg; extern const pb_msgdesc_t stackchan_websocket_v1_SpeakDoneEvent_msg; +extern const pb_msgdesc_t stackchan_websocket_v1_StoredFileStart_msg; +extern const pb_msgdesc_t stackchan_websocket_v1_StoredFileEnd_msg; extern const pb_msgdesc_t stackchan_websocket_v1_ServoCommandSequence_msg; extern const pb_msgdesc_t stackchan_websocket_v1_ServoCommand_msg; extern const pb_msgdesc_t stackchan_websocket_v1_ServoDoneEvent_msg; @@ -425,10 +492,13 @@ extern const pb_msgdesc_t stackchan_websocket_v1_ServerMetadata_msg; #define stackchan_websocket_v1_AudioWavStart_fields &stackchan_websocket_v1_AudioWavStart_msg #define stackchan_websocket_v1_AudioWavEnd_fields &stackchan_websocket_v1_AudioWavEnd_msg #define stackchan_websocket_v1_AudioChunk_fields &stackchan_websocket_v1_AudioChunk_msg +#define stackchan_websocket_v1_FileChunk_fields &stackchan_websocket_v1_FileChunk_msg #define stackchan_websocket_v1_StateCommand_fields &stackchan_websocket_v1_StateCommand_msg #define stackchan_websocket_v1_WakeWordEvent_fields &stackchan_websocket_v1_WakeWordEvent_msg #define stackchan_websocket_v1_StateEvent_fields &stackchan_websocket_v1_StateEvent_msg #define stackchan_websocket_v1_SpeakDoneEvent_fields &stackchan_websocket_v1_SpeakDoneEvent_msg +#define stackchan_websocket_v1_StoredFileStart_fields &stackchan_websocket_v1_StoredFileStart_msg +#define stackchan_websocket_v1_StoredFileEnd_fields &stackchan_websocket_v1_StoredFileEnd_msg #define stackchan_websocket_v1_ServoCommandSequence_fields &stackchan_websocket_v1_ServoCommandSequence_msg #define stackchan_websocket_v1_ServoCommand_fields &stackchan_websocket_v1_ServoCommand_msg #define stackchan_websocket_v1_ServoDoneEvent_fields &stackchan_websocket_v1_ServoDoneEvent_msg @@ -442,6 +512,7 @@ extern const pb_msgdesc_t stackchan_websocket_v1_ServerMetadata_msg; #define stackchan_websocket_v1_AudioPcmStart_size 0 #define stackchan_websocket_v1_AudioWavEnd_size 0 #define stackchan_websocket_v1_AudioWavStart_size 12 +#define stackchan_websocket_v1_FileChunk_size 4099 #define stackchan_websocket_v1_FirmwareMetadata_size 87 #define stackchan_websocket_v1_ServerMetadata_size 67 #define stackchan_websocket_v1_ServoCommandSequence_size 4080 @@ -450,6 +521,8 @@ extern const pb_msgdesc_t stackchan_websocket_v1_ServerMetadata_msg; #define stackchan_websocket_v1_SpeakDoneEvent_size 2 #define stackchan_websocket_v1_StateCommand_size 2 #define stackchan_websocket_v1_StateEvent_size 2 +#define stackchan_websocket_v1_StoredFileEnd_size 0 +#define stackchan_websocket_v1_StoredFileStart_size 148 #define stackchan_websocket_v1_WakeWordEvent_size 2 #define stackchan_websocket_v1_WebSocketMessage_size 4113 diff --git a/firmware/src/listening.cpp b/firmware/src/listening.cpp index edb2e35..d7e6aa6 100644 --- a/firmware/src/listening.cpp +++ b/firmware/src/listening.cpp @@ -41,6 +41,12 @@ void Listening::init() void Listening::begin() { + if (M5.Speaker.isPlaying()) + { + log_i("Stopping speaker playback before listening start"); + M5.Speaker.stop(); + delay(20); + } M5.Mic.begin(); startStreaming(); } diff --git a/firmware/src/main.cpp b/firmware/src/main.cpp index 6fb95c4..0d838d5 100644 --- a/firmware/src/main.cpp +++ b/firmware/src/main.cpp @@ -24,6 +24,7 @@ #include "../include/wake_up_word.hpp" #include "../include/display.hpp" #include "../include/servo.hpp" +#include "../include/stored_files.hpp" //////////////////// 設定 //////////////////// const char *WIFI_SSID = WIFI_SSID_H; @@ -32,6 +33,11 @@ const char *SERVER_HOST = SERVER_HOST_H; const int SERVER_PORT = SERVER_PORT_H; const char *SERVER_PATH = SERVER_PATH_H; // WebSocket エンドポイント const int SAMPLE_RATE = 16000; // 16kHz モノラル +const int SPEAKER_OUTPUT_SAMPLE_RATE = 24000; +const int SPEAKER_VOLUME = 160; // 0-255 +const size_t SPEAKER_DMA_BUF_LEN = 512; +const size_t SPEAKER_DMA_BUF_COUNT = 12; +const uint8_t SPEAKER_TASK_PRIORITY = 3; ///////////////////////////////////////////// StateMachine stateMachine; @@ -42,6 +48,7 @@ static Listening listening(wsClient, stateMachine, SAMPLE_RATE); static WakeUpWord wakeUpWord(stateMachine, SAMPLE_RATE); static Display display(stateMachine); static BodyServo servo; +static StoredFiles storedFiles; // Protocol types are defined in include/protocols.hpp namespace @@ -51,8 +58,12 @@ uint32_t g_last_comm_ms = 0; uint32_t g_last_local_wake_word_ms = 0; constexpr uint32_t kCommTimeoutMs = 60000; constexpr uint32_t kLocalWakeWordCooldownMs = 750; +constexpr const char *kWakeWordDetectedSoundFileId = "wakeword-detected-sound"; stackchan_websocket_v1_WebSocketMessage g_tx_message = stackchan_websocket_v1_WebSocketMessage_init_zero; stackchan_websocket_v1_WebSocketMessage g_rx_message = stackchan_websocket_v1_WebSocketMessage_init_zero; +bool g_pending_device_wake_word_feedback = false; +bool g_feedback_sound_playing = false; +bool g_notify_after_feedback_sound = false; void markCommunicationActive() { @@ -156,6 +167,106 @@ void triggerLocalWakeWord(const char *source) notifyWakeWordDetected(); } +void clearFeedbackSoundState(bool stopPlayback) +{ + g_pending_device_wake_word_feedback = false; + g_notify_after_feedback_sound = false; + if (stopPlayback && g_feedback_sound_playing && M5.Speaker.isPlaying()) + { + M5.Speaker.stop(); + } + g_feedback_sound_playing = false; +} + +bool startWakeWordFeedbackSoundPlayback() +{ + StoredFileView sound; + if (!storedFiles.getActivePcmFile(kWakeWordDetectedSoundFileId, sound)) + { + log_w("Wake-word feedback sound is not available for current session id=%s", kWakeWordDetectedSoundFileId); + return false; + } + + if (sound.size == 0 || (sound.size % sizeof(int16_t)) != 0) + { + log_w("Stored wake-word sound has invalid size=%u", static_cast(sound.size)); + return false; + } + + if (sound.sample_rate == 0 || sound.channels == 0) + { + log_w("Stored wake-word sound missing format sample_rate=%u channels=%u", + static_cast(sound.sample_rate), + static_cast(sound.channels)); + return false; + } + + wakeUpWord.end(); + if (M5.Speaker.isPlaying()) + { + M5.Speaker.stop(); + } + + const int16_t *samples = reinterpret_cast(sound.data); + size_t sample_len = sound.size / sizeof(int16_t); + log_i("Starting wake-word feedback playback sample_len=%u sample_rate=%u channels=%u", + static_cast(sample_len), + static_cast(sound.sample_rate), + static_cast(sound.channels)); + M5.Speaker.playRaw(samples, sample_len, sound.sample_rate, sound.channels > 1, 1, 0); + g_feedback_sound_playing = true; + log_i("Playing stored wake-word sound bytes=%u", static_cast(sound.size)); + return true; +} + +void finishWakeWordFeedbackSoundPlayback() +{ + log_i("Wake-word feedback playback finished notify_after=%u", + static_cast(g_notify_after_feedback_sound)); + g_feedback_sound_playing = false; + if (g_notify_after_feedback_sound) + { + g_notify_after_feedback_sound = false; + notifyWakeWordDetected(); + } + + if (stateMachine.getState() == StateMachine::Idle && shouldUseDeviceWakeWord()) + { + wakeUpWord.begin(); + } +} + +void processPendingDeviceWakeWordFeedback() +{ + if (!g_pending_device_wake_word_feedback || stateMachine.getState() != StateMachine::Idle) + { + return; + } + + g_pending_device_wake_word_feedback = false; + if (startWakeWordFeedbackSoundPlayback()) + { + g_notify_after_feedback_sound = true; + log_i("Wake-word feedback playback scheduled before uplink event"); + return; + } + + log_w("Wake-word feedback playback skipped; sending WakeWordEvt immediately"); + notifyWakeWordDetected(); +} + +void handleDeviceWakeWordDetected() +{ + if (!canTriggerLocalWakeWord()) + { + return; + } + + g_last_local_wake_word_ms = millis(); + log_i("Local wake-word trigger from device wake word"); + g_pending_device_wake_word_feedback = true; +} + void handleTouchWakeWordInput() { #if USE_STACKCHAN_BSP @@ -324,11 +435,15 @@ void handleWsEvent(WStype_t type, uint8_t *payload, size_t length) // M5.Display.println("WS: disconnected"); log_i("WS disconnected"); resetServerMetadata(); + storedFiles.resetSession(); + clearFeedbackSoundState(true); stateMachine.setState(StateMachine::Disconnected); break; case WStype_CONNECTED: // M5.Display.printf("WS: connected %s\n", SERVER_PATH); log_i("WS connected to %s", SERVER_PATH); + storedFiles.resetSession(); + clearFeedbackSoundState(true); if (stateMachine.getState() == StateMachine::Disconnected) { stateMachine.setState(StateMachine::Idle); @@ -427,6 +542,49 @@ void handleWsEvent(WStype_t type, uint8_t *payload, size_t length) log_w("ServerMetadata protobuf body mismatch type=%u body=%u", (unsigned)rx.message_type, (unsigned)rx.which_body); } break; + case stackchan_websocket_v1_MessageKind_MESSAGE_KIND_STORED_FILE: + if (rx.message_type == stackchan_websocket_v1_MessageType_MESSAGE_TYPE_START && + rx.which_body == stackchan_websocket_v1_WebSocketMessage_stored_file_start_tag) + { + log_i("Received stored file start id=%s seq=%u size=%u sample_rate=%u channels=%u", + rx.body.stored_file_start.file_id, + static_cast(rx.seq), + static_cast(rx.body.stored_file_start.total_size), + static_cast(rx.body.stored_file_start.sample_rate), + static_cast(rx.body.stored_file_start.channels)); + if (!storedFiles.handleStart(rx.seq, rx.body.stored_file_start)) + { + log_w("Stored file start rejected"); + } + } + else if (rx.message_type == stackchan_websocket_v1_MessageType_MESSAGE_TYPE_DATA && + rx.which_body == stackchan_websocket_v1_WebSocketMessage_stored_file_data_tag) + { + log_i("Received stored file chunk seq=%u bytes=%u", + static_cast(rx.seq), + static_cast(getProtoFileChunkSize(rx.body.stored_file_data))); + if (!storedFiles.handleData( + rx.seq, + getProtoFileChunkBytes(rx.body.stored_file_data), + getProtoFileChunkSize(rx.body.stored_file_data))) + { + log_w("Stored file data rejected"); + } + } + else if (rx.message_type == stackchan_websocket_v1_MessageType_MESSAGE_TYPE_END && + rx.which_body == stackchan_websocket_v1_WebSocketMessage_stored_file_end_tag) + { + log_i("Received stored file end seq=%u", static_cast(rx.seq)); + if (!storedFiles.handleEnd(rx.seq)) + { + log_w("Stored file end rejected"); + } + } + else + { + log_w("StoredFile protobuf body mismatch type=%u body=%u", (unsigned)rx.message_type, (unsigned)rx.which_body); + } + break; default: // M5.Display.printf("WS bin kind=%u len=%d\n", (unsigned)rx.kind, (int)length); break; @@ -459,6 +617,18 @@ void setup() M5.begin(cfg); #endif + auto spk_cfg = M5.Speaker.config(); + spk_cfg.sample_rate = SPEAKER_OUTPUT_SAMPLE_RATE; + spk_cfg.dma_buf_len = SPEAKER_DMA_BUF_LEN; + spk_cfg.dma_buf_count = SPEAKER_DMA_BUF_COUNT; + spk_cfg.task_priority = SPEAKER_TASK_PRIORITY; + M5.Speaker.config(spk_cfg); + log_i("Speaker config sample_rate=%u dma_buf_len=%u dma_buf_count=%u task_priority=%u", + static_cast(spk_cfg.sample_rate), + static_cast(spk_cfg.dma_buf_len), + static_cast(spk_cfg.dma_buf_count), + static_cast(spk_cfg.task_priority)); + auto mic_cfg = M5.Mic.config(); mic_cfg.sample_rate = SAMPLE_RATE; mic_cfg.dma_buf_len = 256; @@ -468,6 +638,7 @@ void setup() listening.init(); speaking.init(); + storedFiles.init(); speaking.setSpeakFinishedCallback([]() { notifySpeakDone(); }); @@ -477,7 +648,7 @@ void setup() }); wakeUpWord.init(); wakeUpWord.setWakeWordDetectedCallback([]() { - notifyWakeWordDetected(); + handleDeviceWakeWordDetected(); }); display.init(); initializeFirmwareMetadata(); @@ -485,7 +656,7 @@ void setup() connectWiFi(); // Mic/Speaking setup - M5.Speaker.setVolume(200); // 0-255 + M5.Speaker.setVolume(SPEAKER_VOLUME); // 0-255 wsClient.begin(SERVER_HOST, SERVER_PORT, SERVER_PATH); markCommunicationActive(); @@ -507,6 +678,7 @@ void setup() stateMachine.addStateEntryEvent(StateMachine::Listening, [](StateMachine::State, StateMachine::State) { notifyCurrentState(StateMachine::Listening); + clearFeedbackSoundState(true); listening.begin(); }); stateMachine.addStateExitEvent(StateMachine::Listening, [](StateMachine::State, StateMachine::State) { @@ -541,11 +713,21 @@ void loop() switch (current) { case StateMachine::Idle: + if (g_feedback_sound_playing) + { + if (!M5.Speaker.isPlaying()) + { + finishWakeWordFeedbackSoundPlayback(); + } + break; + } + handleTouchWakeWordInput(); if (shouldUseDeviceWakeWord()) { wakeUpWord.loop(); } + processPendingDeviceWakeWordFeedback(); break; case StateMachine::Listening: listening.loop(); diff --git a/firmware/src/protocols.cpp b/firmware/src/protocols.cpp index 16af592..d6aa03b 100644 --- a/firmware/src/protocols.cpp +++ b/firmware/src/protocols.cpp @@ -33,6 +33,34 @@ size_t getProtoAudioChunkSize(const stackchan_websocket_v1_AudioChunk &chunk) return chunk.pcm_bytes.size; } +bool setProtoFileChunk( + stackchan_websocket_v1_FileChunk &chunk, + const uint8_t *data, + size_t data_len) +{ + if (data_len > kProtoFileChunkMaxBytes) + { + return false; + } + + chunk.chunk_bytes.size = static_cast(data_len); + if (data_len > 0 && data != nullptr) + { + memcpy(chunk.chunk_bytes.bytes, data, data_len); + } + return true; +} + +const uint8_t *getProtoFileChunkBytes(const stackchan_websocket_v1_FileChunk &chunk) +{ + return chunk.chunk_bytes.bytes; +} + +size_t getProtoFileChunkSize(const stackchan_websocket_v1_FileChunk &chunk) +{ + return chunk.chunk_bytes.size; +} + bool encodeWebSocketMessage( const stackchan_websocket_v1_WebSocketMessage &message, std::vector &encoded) diff --git a/firmware/src/stored_files.cpp b/firmware/src/stored_files.cpp new file mode 100644 index 0000000..dff459e --- /dev/null +++ b/firmware/src/stored_files.cpp @@ -0,0 +1,494 @@ +#include "stored_files.hpp" + +#include +#include + +#include +#include +#include + +namespace +{ +constexpr const char *kIndexFilePath = "/wsfiles.idx"; +constexpr const char *kPayloadPaths[] = { + "/wsfile0.bin", + "/wsfile1.bin", + "/wsfile2.bin", + "/wsfile3.bin", +}; +} // namespace + +void StoredFiles::init() +{ + session_active_.fill(false); + cached_slot_index_ = -1; + cached_payload_.clear(); + resetTransfer(); + + if (!mountSpiffs()) + { + return; + } + + log_i( + "SPIFFS mounted total=%u used=%u", + static_cast(SPIFFS.totalBytes()), + static_cast(SPIFFS.usedBytes())); + + if (!loadIndex()) + { + slots_ = {}; + persistIndex(); + } +} + +void StoredFiles::resetSession() +{ + session_active_.fill(false); + cached_slot_index_ = -1; + cached_payload_.clear(); + resetTransfer(); +} + +bool StoredFiles::handleStart(uint32_t seq, const stackchan_websocket_v1_StoredFileStart &start) +{ + if (!mountSpiffs()) + { + return false; + } + + resetTransfer(); + + if (start.file_id[0] == '\0') + { + log_w("Stored file start missing file_id"); + return false; + } + + size_t total_size = static_cast(start.total_size); + if (total_size > kMaxStoredFileBytes) + { + log_w( + "Stored file too large for SPIFFS-backed transfer id=%s size=%u limit=%u", + start.file_id, + static_cast(start.total_size), + static_cast(kMaxStoredFileBytes)); + return false; + } + + int slot_index = selectSlotForId(start.file_id); + if (slot_index < 0) + { + log_w("No slot available for stored file id=%s", start.file_id); + return false; + } + + size_t reclaimable_bytes = slots_[slot_index].used ? slots_[slot_index].size : 0; + size_t total_bytes = SPIFFS.totalBytes(); + size_t used_bytes = SPIFFS.usedBytes(); + size_t free_bytes = total_bytes > used_bytes ? total_bytes - used_bytes : 0; + size_t available_bytes = free_bytes + reclaimable_bytes; + if (total_bytes > 0 && total_size > available_bytes) + { + log_w( + "Insufficient SPIFFS space for stored file id=%s requested=%u free=%u reclaimable=%u", + start.file_id, + static_cast(total_size), + static_cast(free_bytes), + static_cast(reclaimable_bytes)); + return false; + } + + transfer_.active = true; + transfer_.next_seq = seq + 1; + transfer_.slot_index = slot_index; + transfer_.slot = PersistedSlot{}; + transfer_.slot.used = true; + snprintf(transfer_.slot.file_id, sizeof(transfer_.slot.file_id), "%s", start.file_id); + snprintf(transfer_.slot.content_type, sizeof(transfer_.slot.content_type), "%s", start.content_type); + transfer_.slot.sample_rate = start.sample_rate; + transfer_.slot.channels = start.channels; + transfer_.slot.size = static_cast(total_size); + transfer_.received_bytes = 0; + transfer_.chunk_count = 0; + transfer_.payload.clear(); + transfer_.payload.reserve(total_size); + + log_i( + "Stored file start id=%s type=%s size=%u sample_rate=%u channels=%u slot=%d spiffs_free=%u", + transfer_.slot.file_id, + transfer_.slot.content_type, + static_cast(transfer_.slot.size), + static_cast(transfer_.slot.sample_rate), + static_cast(transfer_.slot.channels), + slot_index, + static_cast(free_bytes)); + return true; +} + +bool StoredFiles::handleData(uint32_t seq, const uint8_t *data, size_t data_len) +{ + if (!transfer_.active) + { + log_w("Stored file data without active transfer"); + return false; + } + + if (seq != transfer_.next_seq) + { + log_w("Stored file seq gap got=%u expected=%u", static_cast(seq), static_cast(transfer_.next_seq)); + resetTransfer(); + return false; + } + transfer_.next_seq++; + + size_t next_size = transfer_.payload.size() + data_len; + if (next_size > transfer_.slot.size || next_size > kMaxStoredFileBytes) + { + log_w("Stored file payload too large id=%s size=%u expected=%u", + transfer_.slot.file_id, + static_cast(next_size), + static_cast(transfer_.slot.size)); + resetTransfer(); + return false; + } + + transfer_.payload.insert(transfer_.payload.end(), data, data + data_len); + transfer_.received_bytes += static_cast(data_len); + transfer_.chunk_count++; + log_i( + "Stored file chunk id=%s chunk=%u bytes=%u total=%u/%u", + transfer_.slot.file_id, + static_cast(transfer_.chunk_count), + static_cast(data_len), + static_cast(transfer_.received_bytes), + static_cast(transfer_.slot.size)); + return true; +} + +bool StoredFiles::handleEnd(uint32_t seq) +{ + if (!transfer_.active) + { + log_w("Stored file end without active transfer"); + return false; + } + + if (seq != transfer_.next_seq) + { + log_w("Stored file end seq gap got=%u expected=%u", static_cast(seq), static_cast(transfer_.next_seq)); + resetTransfer(); + return false; + } + + if (transfer_.payload.size() != transfer_.slot.size) + { + log_w("Stored file size mismatch id=%s actual=%u expected=%u", + transfer_.slot.file_id, + static_cast(transfer_.payload.size()), + static_cast(transfer_.slot.size)); + resetTransfer(); + return false; + } + + int slot_index = transfer_.slot_index; + PersistedSlot slot = transfer_.slot; + std::vector payload = transfer_.payload; + + if (!persistSlotPayload(slot_index, payload)) + { + resetTransfer(); + return false; + } + + slots_[slot_index] = slot; + if (!persistIndex()) + { + resetTransfer(); + return false; + } + + bool activated = activateSlot(slot_index, payload); + log_i( + "Stored file saved to SPIFFS id=%s slot=%d chunks=%u bytes=%u activated=%u used=%u", + slot.file_id, + slot_index, + static_cast(transfer_.chunk_count), + static_cast(payload.size()), + static_cast(activated), + static_cast(SPIFFS.usedBytes())); + resetTransfer(); + return activated; +} + +bool StoredFiles::getActivePcmFile(const char *fileId, StoredFileView &view) +{ + view = StoredFileView{}; + + int slot_index = findSlotById(fileId); + if (slot_index < 0 || !session_active_[slot_index]) + { + log_i("Stored file inactive or not received in this session id=%s", fileId); + return false; + } + + const PersistedSlot &slot = slots_[slot_index]; + if (strcmp(slot.content_type, "audio/pcm") != 0) + { + log_w("Stored file id=%s has unsupported content_type=%s", slot.file_id, slot.content_type); + return false; + } + + if (cached_slot_index_ != slot_index) + { + std::vector payload; + if (!loadSlotPayload(slot_index, payload)) + { + log_w("Failed to load stored file payload id=%s slot=%d", fileId, slot_index); + return false; + } + cached_slot_index_ = slot_index; + cached_payload_ = std::move(payload); + log_i("Loaded stored file payload from SPIFFS into cache id=%s slot=%d bytes=%u", + fileId, + slot_index, + static_cast(cached_payload_.size())); + } + + if (cached_payload_.empty() && slot.size != 0) + { + return false; + } + + view.data = cached_payload_.data(); + view.size = cached_payload_.size(); + view.sample_rate = slot.sample_rate; + view.channels = static_cast(slot.channels); + log_i("Stored file ready for playback id=%s slot=%d bytes=%u sample_rate=%u channels=%u", + fileId, + slot_index, + static_cast(view.size), + static_cast(view.sample_rate), + static_cast(view.channels)); + return true; +} + +bool StoredFiles::mountSpiffs() +{ + if (storage_ready_) + { + return true; + } + + storage_ready_ = SPIFFS.begin(true); + if (!storage_ready_) + { + log_w("Failed to mount SPIFFS"); + } + return storage_ready_; +} + +bool StoredFiles::loadIndex() +{ + if (!storage_ready_) + { + return false; + } + + if (!SPIFFS.exists(indexPath())) + { + slots_ = {}; + return true; + } + + File file = SPIFFS.open(indexPath(), "r"); + if (!file) + { + log_w("Failed to open SPIFFS index file path=%s", indexPath()); + return false; + } + + size_t index_size = file.size(); + if (index_size != sizeof(slots_)) + { + log_w("Stored file index size mismatch actual=%u expected=%u", + static_cast(index_size), + static_cast(sizeof(slots_))); + file.close(); + return false; + } + + size_t bytes_read = file.read(reinterpret_cast(slots_.data()), sizeof(slots_)); + file.close(); + return bytes_read == sizeof(slots_); +} + +bool StoredFiles::persistIndex() +{ + if (!storage_ready_) + { + return false; + } + + File file = SPIFFS.open(indexPath(), "w"); + if (!file) + { + log_w("Failed to open SPIFFS index file for write path=%s", indexPath()); + return false; + } + + size_t bytes_written = file.write( + reinterpret_cast(slots_.data()), + sizeof(slots_)); + file.close(); + if (bytes_written != sizeof(slots_)) + { + log_w("Failed to persist stored file index written=%u expected=%u", + static_cast(bytes_written), + static_cast(sizeof(slots_))); + return false; + } + return true; +} + +bool StoredFiles::persistSlotPayload(int slotIndex, const std::vector &payload) +{ + if (!storage_ready_) + { + return false; + } + + File file = SPIFFS.open(payloadPathForSlot(slotIndex), "w"); + if (!file) + { + log_w("Failed to open SPIFFS payload file for write slot=%d path=%s", + slotIndex, + payloadPathForSlot(slotIndex)); + return false; + } + + size_t bytes_written = 0; + if (!payload.empty()) + { + bytes_written = file.write(payload.data(), payload.size()); + } + file.close(); + if (bytes_written != payload.size()) + { + log_w("Failed to persist stored file payload slot=%d written=%u expected=%u", + slotIndex, + static_cast(bytes_written), + static_cast(payload.size())); + return false; + } + return true; +} + +bool StoredFiles::loadSlotPayload(int slotIndex, std::vector &payload) +{ + payload.clear(); + if (!storage_ready_) + { + return false; + } + + File file = SPIFFS.open(payloadPathForSlot(slotIndex), "r"); + if (!file) + { + log_w("Failed to open SPIFFS payload file for read slot=%d path=%s", + slotIndex, + payloadPathForSlot(slotIndex)); + return false; + } + + size_t payload_size = file.size(); + if (payload_size != slots_[slotIndex].size) + { + log_w("Stored file payload size mismatch slot=%d actual=%u expected=%u", + slotIndex, + static_cast(payload_size), + static_cast(slots_[slotIndex].size)); + file.close(); + return false; + } + + payload.resize(payload_size); + size_t bytes_read = 0; + if (payload_size > 0) + { + bytes_read = file.read(payload.data(), payload_size); + } + file.close(); + return bytes_read == payload_size; +} + +int StoredFiles::findSlotById(const char *fileId) const +{ + for (size_t i = 0; i < slots_.size(); ++i) + { + if (!slots_[i].used) + { + continue; + } + if (strcmp(slots_[i].file_id, fileId) == 0) + { + return static_cast(i); + } + } + return -1; +} + +int StoredFiles::selectSlotForId(const char *fileId) +{ + int existing_slot = findSlotById(fileId); + if (existing_slot >= 0) + { + return existing_slot; + } + + for (size_t i = 0; i < slots_.size(); ++i) + { + if (!slots_[i].used) + { + return static_cast(i); + } + } + + return 0; +} + +void StoredFiles::resetTransfer() +{ + transfer_ = TransferState{}; +} + +bool StoredFiles::activateSlot(int slotIndex, const std::vector &payload) +{ + if (slotIndex < 0 || slotIndex >= static_cast(slots_.size())) + { + return false; + } + + session_active_[slotIndex] = true; + cached_slot_index_ = slotIndex; + cached_payload_ = payload; + log_i("Stored file activated for current session id=%s slot=%d bytes=%u", + slots_[slotIndex].file_id, + slotIndex, + static_cast(cached_payload_.size())); + return true; +} + +const char *StoredFiles::payloadPathForSlot(int slotIndex) +{ + if (slotIndex < 0 || slotIndex >= static_cast(sizeof(kPayloadPaths) / sizeof(kPayloadPaths[0]))) + { + return kPayloadPaths[0]; + } + return kPayloadPaths[slotIndex]; +} + +const char *StoredFiles::indexPath() +{ + return kIndexFilePath; +} diff --git a/misc/wake_sound/wake_sound.wav b/misc/wake_sound/wake_sound.wav new file mode 100644 index 0000000..a1f37c5 Binary files /dev/null and b/misc/wake_sound/wake_sound.wav differ diff --git a/protobuf/websocket-message.options b/protobuf/websocket-message.options index e032f4f..ab152ff 100644 --- a/protobuf/websocket-message.options +++ b/protobuf/websocket-message.options @@ -1,4 +1,7 @@ stackchan.websocket.v1.AudioChunk.pcm_bytes max_size:4096 +stackchan.websocket.v1.FileChunk.chunk_bytes max_size:4096 stackchan.websocket.v1.ServoCommandSequence.commands max_count:255 stackchan.websocket.v1.FirmwareMetadata.firmware_version max_length:63 stackchan.websocket.v1.ServerMetadata.server_version max_length:63 +stackchan.websocket.v1.StoredFileStart.file_id max_length:63 +stackchan.websocket.v1.StoredFileStart.content_type max_length:63 diff --git a/protobuf/websocket-message.proto b/protobuf/websocket-message.proto index c643673..daae0c8 100644 --- a/protobuf/websocket-message.proto +++ b/protobuf/websocket-message.proto @@ -31,6 +31,9 @@ message WebSocketMessage { ServoDoneEvent servo_done_evt = 35; FirmwareMetadata firmware_metadata = 36; ServerMetadata server_metadata = 37; + StoredFileStart stored_file_start = 40; + FileChunk stored_file_data = 41; + StoredFileEnd stored_file_end = 42; } } @@ -46,6 +49,7 @@ enum MessageKind { MESSAGE_KIND_SERVO_DONE_EVT = 8; MESSAGE_KIND_FIRMWARE_METADATA = 9; MESSAGE_KIND_SERVER_METADATA = 10; + MESSAGE_KIND_STORED_FILE = 11; } enum MessageType { @@ -97,6 +101,10 @@ message AudioChunk { bytes pcm_bytes = 1; } +message FileChunk { + bytes chunk_bytes = 1; +} + message StateCommand { StackchanState state = 1; } @@ -113,6 +121,16 @@ message SpeakDoneEvent { bool done = 1; } +message StoredFileStart { + string file_id = 1; + string content_type = 2; + uint32 total_size = 3; + uint32 sample_rate = 4; + uint32 channels = 5; +} + +message StoredFileEnd {} + message ServoCommandSequence { repeated ServoCommand commands = 1; } diff --git a/stackchan_server/app.py b/stackchan_server/app.py index 14496d2..3ca50f8 100644 --- a/stackchan_server/app.py +++ b/stackchan_server/app.py @@ -29,9 +29,12 @@ def __init__( self, speech_recognizer: SpeechRecognizer | None = None, speech_synthesizer: SpeechSynthesizer | None = None, + *, + send_wakeword_sound_on_connect: bool = True, ) -> None: self.speech_recognizer = speech_recognizer or create_speech_recognizer() self.speech_synthesizer = speech_synthesizer or create_speech_synthesizer() + self.send_wakeword_sound_on_connect = send_wakeword_sound_on_connect self.fastapi = FastAPI(title="StackChan WebSocket Server") self._setup_fn: Optional[Callable[[WsProxy], Awaitable[None]]] = None self._talk_session_fn: Optional[Callable[[WsProxy], Awaitable[None]]] = None @@ -96,6 +99,9 @@ async def _handle_ws(self, websocket: WebSocket) -> None: await existing.close() try: + if self.send_wakeword_sound_on_connect: + await proxy.send_default_wake_word_sound() + if self._setup_fn: await self._setup_fn(proxy) diff --git a/stackchan_server/generated_protobuf/websocket_message_pb2.py b/stackchan_server/generated_protobuf/websocket_message_pb2.py index a7d7a4e..9d4a505 100644 --- a/stackchan_server/generated_protobuf/websocket_message_pb2.py +++ b/stackchan_server/generated_protobuf/websocket_message_pb2.py @@ -24,53 +24,59 @@ -DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\x17websocket-message.proto\x12\x16stackchan.websocket.v1\"\x96\x08\n\x10WebSocketMessage\x12\x31\n\x04kind\x18\x01 \x01(\x0e\x32#.stackchan.websocket.v1.MessageKind\x12\x39\n\x0cmessage_type\x18\x02 \x01(\x0e\x32#.stackchan.websocket.v1.MessageType\x12\x0b\n\x03seq\x18\x03 \x01(\r\x12@\n\x0f\x61udio_pcm_start\x18\n \x01(\x0b\x32%.stackchan.websocket.v1.AudioPcmStartH\x00\x12<\n\x0e\x61udio_pcm_data\x18\x0b \x01(\x0b\x32\".stackchan.websocket.v1.AudioChunkH\x00\x12<\n\raudio_pcm_end\x18\x0c \x01(\x0b\x32#.stackchan.websocket.v1.AudioPcmEndH\x00\x12@\n\x0f\x61udio_wav_start\x18\x14 \x01(\x0b\x32%.stackchan.websocket.v1.AudioWavStartH\x00\x12<\n\x0e\x61udio_wav_data\x18\x15 \x01(\x0b\x32\".stackchan.websocket.v1.AudioChunkH\x00\x12<\n\raudio_wav_end\x18\x16 \x01(\x0b\x32#.stackchan.websocket.v1.AudioWavEndH\x00\x12\x39\n\tstate_cmd\x18\x1e \x01(\x0b\x32$.stackchan.websocket.v1.StateCommandH\x00\x12>\n\rwake_word_evt\x18\x1f \x01(\x0b\x32%.stackchan.websocket.v1.WakeWordEventH\x00\x12\x37\n\tstate_evt\x18 \x01(\x0b\x32\".stackchan.websocket.v1.StateEventH\x00\x12@\n\x0espeak_done_evt\x18! \x01(\x0b\x32&.stackchan.websocket.v1.SpeakDoneEventH\x00\x12\x41\n\tservo_cmd\x18\" \x01(\x0b\x32,.stackchan.websocket.v1.ServoCommandSequenceH\x00\x12@\n\x0eservo_done_evt\x18# \x01(\x0b\x32&.stackchan.websocket.v1.ServoDoneEventH\x00\x12\x45\n\x11\x66irmware_metadata\x18$ \x01(\x0b\x32(.stackchan.websocket.v1.FirmwareMetadataH\x00\x12\x41\n\x0fserver_metadata\x18% \x01(\x0b\x32&.stackchan.websocket.v1.ServerMetadataH\x00\x42\x06\n\x04\x62ody\"\x0f\n\rAudioPcmStart\"\r\n\x0b\x41udioPcmEnd\"6\n\rAudioWavStart\x12\x13\n\x0bsample_rate\x18\x01 \x01(\r\x12\x10\n\x08\x63hannels\x18\x02 \x01(\r\"\r\n\x0b\x41udioWavEnd\"\x1f\n\nAudioChunk\x12\x11\n\tpcm_bytes\x18\x01 \x01(\x0c\"E\n\x0cStateCommand\x12\x35\n\x05state\x18\x01 \x01(\x0e\x32&.stackchan.websocket.v1.StackchanState\"!\n\rWakeWordEvent\x12\x10\n\x08\x64\x65tected\x18\x01 \x01(\x08\"C\n\nStateEvent\x12\x35\n\x05state\x18\x01 \x01(\x0e\x32&.stackchan.websocket.v1.StackchanState\"\x1e\n\x0eSpeakDoneEvent\x12\x0c\n\x04\x64one\x18\x01 \x01(\x08\"N\n\x14ServoCommandSequence\x12\x36\n\x08\x63ommands\x18\x01 \x03(\x0b\x32$.stackchan.websocket.v1.ServoCommand\"f\n\x0cServoCommand\x12\x32\n\x02op\x18\x01 \x01(\x0e\x32&.stackchan.websocket.v1.ServoOperation\x12\r\n\x05\x61ngle\x18\x02 \x01(\x11\x12\x13\n\x0b\x64uration_ms\x18\x03 \x01(\x11\"\x1e\n\x0eServoDoneEvent\x12\x0c\n\x04\x64one\x18\x01 \x01(\x08\"\x99\x02\n\x10\x46irmwareMetadata\x12\x37\n\x0b\x64\x65vice_type\x18\x01 \x01(\x0e\x32\".stackchan.websocket.v1.DeviceType\x12\x15\n\rdisplay_width\x18\x02 \x01(\r\x12\x16\n\x0e\x64isplay_height\x18\x03 \x01(\r\x12\x1c\n\x14has_device_wake_word\x18\x04 \x01(\x08\x12\x0f\n\x07has_led\x18\x05 \x01(\x08\x12\x35\n\nservo_type\x18\x06 \x01(\x0e\x32!.stackchan.websocket.v1.ServoType\x12\x1d\n\x15supports_audio_duplex\x18\x07 \x01(\x08\x12\x18\n\x10\x66irmware_version\x18\x08 \x01(\t\"F\n\x0eServerMetadata\x12\x1c\n\x14has_server_wake_word\x18\x01 \x01(\x08\x12\x16\n\x0eserver_version\x18\x02 \x01(\t*\xdf\x02\n\x0bMessageKind\x12\x1c\n\x18MESSAGE_KIND_UNSPECIFIED\x10\x00\x12\x1a\n\x16MESSAGE_KIND_AUDIO_PCM\x10\x01\x12\x1a\n\x16MESSAGE_KIND_AUDIO_WAV\x10\x02\x12\x1a\n\x16MESSAGE_KIND_STATE_CMD\x10\x03\x12\x1e\n\x1aMESSAGE_KIND_WAKE_WORD_EVT\x10\x04\x12\x1a\n\x16MESSAGE_KIND_STATE_EVT\x10\x05\x12\x1f\n\x1bMESSAGE_KIND_SPEAK_DONE_EVT\x10\x06\x12\x1a\n\x16MESSAGE_KIND_SERVO_CMD\x10\x07\x12\x1f\n\x1bMESSAGE_KIND_SERVO_DONE_EVT\x10\x08\x12\"\n\x1eMESSAGE_KIND_FIRMWARE_METADATA\x10\t\x12 \n\x1cMESSAGE_KIND_SERVER_METADATA\x10\n*p\n\x0bMessageType\x12\x1c\n\x18MESSAGE_TYPE_UNSPECIFIED\x10\x00\x12\x16\n\x12MESSAGE_TYPE_START\x10\x01\x12\x15\n\x11MESSAGE_TYPE_DATA\x10\x02\x12\x14\n\x10MESSAGE_TYPE_END\x10\x03*\x85\x01\n\x0eStackchanState\x12\x18\n\x14STACKCHAN_STATE_IDLE\x10\x00\x12\x1d\n\x19STACKCHAN_STATE_LISTENING\x10\x01\x12\x1c\n\x18STACKCHAN_STATE_THINKING\x10\x02\x12\x1c\n\x18STACKCHAN_STATE_SPEAKING\x10\x03*c\n\x0eServoOperation\x12\x19\n\x15SERVO_OPERATION_SLEEP\x10\x00\x12\x1a\n\x16SERVO_OPERATION_MOVE_X\x10\x01\x12\x1a\n\x16SERVO_OPERATION_MOVE_Y\x10\x02*\x85\x01\n\nDeviceType\x12\x1b\n\x17\x44\x45VICE_TYPE_UNSPECIFIED\x10\x00\x12\x1e\n\x1a\x44\x45VICE_TYPE_M5STACK_CORES3\x10\x01\x12\x1a\n\x16\x44\x45VICE_TYPE_M5ATOM_S3R\x10\x02\x12\x1e\n\x1a\x44\x45VICE_TYPE_M5ATOM_ECHOS3R\x10\x03*i\n\tServoType\x12\x1a\n\x16SERVO_TYPE_UNSPECIFIED\x10\x00\x12\x13\n\x0fSERVO_TYPE_NONE\x10\x01\x12\x13\n\x0fSERVO_TYPE_SG90\x10\x02\x12\x16\n\x12SERVO_TYPE_SCS0009\x10\x03\x62\x06proto3') +DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\x17websocket-message.proto\x12\x16stackchan.websocket.v1\"\xdd\t\n\x10WebSocketMessage\x12\x31\n\x04kind\x18\x01 \x01(\x0e\x32#.stackchan.websocket.v1.MessageKind\x12\x39\n\x0cmessage_type\x18\x02 \x01(\x0e\x32#.stackchan.websocket.v1.MessageType\x12\x0b\n\x03seq\x18\x03 \x01(\r\x12@\n\x0f\x61udio_pcm_start\x18\n \x01(\x0b\x32%.stackchan.websocket.v1.AudioPcmStartH\x00\x12<\n\x0e\x61udio_pcm_data\x18\x0b \x01(\x0b\x32\".stackchan.websocket.v1.AudioChunkH\x00\x12<\n\raudio_pcm_end\x18\x0c \x01(\x0b\x32#.stackchan.websocket.v1.AudioPcmEndH\x00\x12@\n\x0f\x61udio_wav_start\x18\x14 \x01(\x0b\x32%.stackchan.websocket.v1.AudioWavStartH\x00\x12<\n\x0e\x61udio_wav_data\x18\x15 \x01(\x0b\x32\".stackchan.websocket.v1.AudioChunkH\x00\x12<\n\raudio_wav_end\x18\x16 \x01(\x0b\x32#.stackchan.websocket.v1.AudioWavEndH\x00\x12\x39\n\tstate_cmd\x18\x1e \x01(\x0b\x32$.stackchan.websocket.v1.StateCommandH\x00\x12>\n\rwake_word_evt\x18\x1f \x01(\x0b\x32%.stackchan.websocket.v1.WakeWordEventH\x00\x12\x37\n\tstate_evt\x18 \x01(\x0b\x32\".stackchan.websocket.v1.StateEventH\x00\x12@\n\x0espeak_done_evt\x18! \x01(\x0b\x32&.stackchan.websocket.v1.SpeakDoneEventH\x00\x12\x41\n\tservo_cmd\x18\" \x01(\x0b\x32,.stackchan.websocket.v1.ServoCommandSequenceH\x00\x12@\n\x0eservo_done_evt\x18# \x01(\x0b\x32&.stackchan.websocket.v1.ServoDoneEventH\x00\x12\x45\n\x11\x66irmware_metadata\x18$ \x01(\x0b\x32(.stackchan.websocket.v1.FirmwareMetadataH\x00\x12\x41\n\x0fserver_metadata\x18% \x01(\x0b\x32&.stackchan.websocket.v1.ServerMetadataH\x00\x12\x44\n\x11stored_file_start\x18( \x01(\x0b\x32\'.stackchan.websocket.v1.StoredFileStartH\x00\x12=\n\x10stored_file_data\x18) \x01(\x0b\x32!.stackchan.websocket.v1.FileChunkH\x00\x12@\n\x0fstored_file_end\x18* \x01(\x0b\x32%.stackchan.websocket.v1.StoredFileEndH\x00\x42\x06\n\x04\x62ody\"\x0f\n\rAudioPcmStart\"\r\n\x0b\x41udioPcmEnd\"6\n\rAudioWavStart\x12\x13\n\x0bsample_rate\x18\x01 \x01(\r\x12\x10\n\x08\x63hannels\x18\x02 \x01(\r\"\r\n\x0b\x41udioWavEnd\"\x1f\n\nAudioChunk\x12\x11\n\tpcm_bytes\x18\x01 \x01(\x0c\" \n\tFileChunk\x12\x13\n\x0b\x63hunk_bytes\x18\x01 \x01(\x0c\"E\n\x0cStateCommand\x12\x35\n\x05state\x18\x01 \x01(\x0e\x32&.stackchan.websocket.v1.StackchanState\"!\n\rWakeWordEvent\x12\x10\n\x08\x64\x65tected\x18\x01 \x01(\x08\"C\n\nStateEvent\x12\x35\n\x05state\x18\x01 \x01(\x0e\x32&.stackchan.websocket.v1.StackchanState\"\x1e\n\x0eSpeakDoneEvent\x12\x0c\n\x04\x64one\x18\x01 \x01(\x08\"s\n\x0fStoredFileStart\x12\x0f\n\x07\x66ile_id\x18\x01 \x01(\t\x12\x14\n\x0c\x63ontent_type\x18\x02 \x01(\t\x12\x12\n\ntotal_size\x18\x03 \x01(\r\x12\x13\n\x0bsample_rate\x18\x04 \x01(\r\x12\x10\n\x08\x63hannels\x18\x05 \x01(\r\"\x0f\n\rStoredFileEnd\"N\n\x14ServoCommandSequence\x12\x36\n\x08\x63ommands\x18\x01 \x03(\x0b\x32$.stackchan.websocket.v1.ServoCommand\"f\n\x0cServoCommand\x12\x32\n\x02op\x18\x01 \x01(\x0e\x32&.stackchan.websocket.v1.ServoOperation\x12\r\n\x05\x61ngle\x18\x02 \x01(\x11\x12\x13\n\x0b\x64uration_ms\x18\x03 \x01(\x11\"\x1e\n\x0eServoDoneEvent\x12\x0c\n\x04\x64one\x18\x01 \x01(\x08\"\x99\x02\n\x10\x46irmwareMetadata\x12\x37\n\x0b\x64\x65vice_type\x18\x01 \x01(\x0e\x32\".stackchan.websocket.v1.DeviceType\x12\x15\n\rdisplay_width\x18\x02 \x01(\r\x12\x16\n\x0e\x64isplay_height\x18\x03 \x01(\r\x12\x1c\n\x14has_device_wake_word\x18\x04 \x01(\x08\x12\x0f\n\x07has_led\x18\x05 \x01(\x08\x12\x35\n\nservo_type\x18\x06 \x01(\x0e\x32!.stackchan.websocket.v1.ServoType\x12\x1d\n\x15supports_audio_duplex\x18\x07 \x01(\x08\x12\x18\n\x10\x66irmware_version\x18\x08 \x01(\t\"F\n\x0eServerMetadata\x12\x1c\n\x14has_server_wake_word\x18\x01 \x01(\x08\x12\x16\n\x0eserver_version\x18\x02 \x01(\t*\xfd\x02\n\x0bMessageKind\x12\x1c\n\x18MESSAGE_KIND_UNSPECIFIED\x10\x00\x12\x1a\n\x16MESSAGE_KIND_AUDIO_PCM\x10\x01\x12\x1a\n\x16MESSAGE_KIND_AUDIO_WAV\x10\x02\x12\x1a\n\x16MESSAGE_KIND_STATE_CMD\x10\x03\x12\x1e\n\x1aMESSAGE_KIND_WAKE_WORD_EVT\x10\x04\x12\x1a\n\x16MESSAGE_KIND_STATE_EVT\x10\x05\x12\x1f\n\x1bMESSAGE_KIND_SPEAK_DONE_EVT\x10\x06\x12\x1a\n\x16MESSAGE_KIND_SERVO_CMD\x10\x07\x12\x1f\n\x1bMESSAGE_KIND_SERVO_DONE_EVT\x10\x08\x12\"\n\x1eMESSAGE_KIND_FIRMWARE_METADATA\x10\t\x12 \n\x1cMESSAGE_KIND_SERVER_METADATA\x10\n\x12\x1c\n\x18MESSAGE_KIND_STORED_FILE\x10\x0b*p\n\x0bMessageType\x12\x1c\n\x18MESSAGE_TYPE_UNSPECIFIED\x10\x00\x12\x16\n\x12MESSAGE_TYPE_START\x10\x01\x12\x15\n\x11MESSAGE_TYPE_DATA\x10\x02\x12\x14\n\x10MESSAGE_TYPE_END\x10\x03*\x85\x01\n\x0eStackchanState\x12\x18\n\x14STACKCHAN_STATE_IDLE\x10\x00\x12\x1d\n\x19STACKCHAN_STATE_LISTENING\x10\x01\x12\x1c\n\x18STACKCHAN_STATE_THINKING\x10\x02\x12\x1c\n\x18STACKCHAN_STATE_SPEAKING\x10\x03*c\n\x0eServoOperation\x12\x19\n\x15SERVO_OPERATION_SLEEP\x10\x00\x12\x1a\n\x16SERVO_OPERATION_MOVE_X\x10\x01\x12\x1a\n\x16SERVO_OPERATION_MOVE_Y\x10\x02*\x85\x01\n\nDeviceType\x12\x1b\n\x17\x44\x45VICE_TYPE_UNSPECIFIED\x10\x00\x12\x1e\n\x1a\x44\x45VICE_TYPE_M5STACK_CORES3\x10\x01\x12\x1a\n\x16\x44\x45VICE_TYPE_M5ATOM_S3R\x10\x02\x12\x1e\n\x1a\x44\x45VICE_TYPE_M5ATOM_ECHOS3R\x10\x03*i\n\tServoType\x12\x1a\n\x16SERVO_TYPE_UNSPECIFIED\x10\x00\x12\x13\n\x0fSERVO_TYPE_NONE\x10\x01\x12\x13\n\x0fSERVO_TYPE_SG90\x10\x02\x12\x16\n\x12SERVO_TYPE_SCS0009\x10\x03\x62\x06proto3') _globals = globals() _builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals) _builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'websocket_message_pb2', _globals) if not _descriptor._USE_C_DESCRIPTORS: DESCRIPTOR._loaded_options = None - _globals['_MESSAGEKIND']._serialized_start=2016 - _globals['_MESSAGEKIND']._serialized_end=2367 - _globals['_MESSAGETYPE']._serialized_start=2369 - _globals['_MESSAGETYPE']._serialized_end=2481 - _globals['_STACKCHANSTATE']._serialized_start=2484 - _globals['_STACKCHANSTATE']._serialized_end=2617 - _globals['_SERVOOPERATION']._serialized_start=2619 - _globals['_SERVOOPERATION']._serialized_end=2718 - _globals['_DEVICETYPE']._serialized_start=2721 - _globals['_DEVICETYPE']._serialized_end=2854 - _globals['_SERVOTYPE']._serialized_start=2856 - _globals['_SERVOTYPE']._serialized_end=2961 + _globals['_MESSAGEKIND']._serialized_start=2383 + _globals['_MESSAGEKIND']._serialized_end=2764 + _globals['_MESSAGETYPE']._serialized_start=2766 + _globals['_MESSAGETYPE']._serialized_end=2878 + _globals['_STACKCHANSTATE']._serialized_start=2881 + _globals['_STACKCHANSTATE']._serialized_end=3014 + _globals['_SERVOOPERATION']._serialized_start=3016 + _globals['_SERVOOPERATION']._serialized_end=3115 + _globals['_DEVICETYPE']._serialized_start=3118 + _globals['_DEVICETYPE']._serialized_end=3251 + _globals['_SERVOTYPE']._serialized_start=3253 + _globals['_SERVOTYPE']._serialized_end=3358 _globals['_WEBSOCKETMESSAGE']._serialized_start=52 - _globals['_WEBSOCKETMESSAGE']._serialized_end=1098 - _globals['_AUDIOPCMSTART']._serialized_start=1100 - _globals['_AUDIOPCMSTART']._serialized_end=1115 - _globals['_AUDIOPCMEND']._serialized_start=1117 - _globals['_AUDIOPCMEND']._serialized_end=1130 - _globals['_AUDIOWAVSTART']._serialized_start=1132 - _globals['_AUDIOWAVSTART']._serialized_end=1186 - _globals['_AUDIOWAVEND']._serialized_start=1188 - _globals['_AUDIOWAVEND']._serialized_end=1201 - _globals['_AUDIOCHUNK']._serialized_start=1203 - _globals['_AUDIOCHUNK']._serialized_end=1234 - _globals['_STATECOMMAND']._serialized_start=1236 - _globals['_STATECOMMAND']._serialized_end=1305 - _globals['_WAKEWORDEVENT']._serialized_start=1307 - _globals['_WAKEWORDEVENT']._serialized_end=1340 - _globals['_STATEEVENT']._serialized_start=1342 - _globals['_STATEEVENT']._serialized_end=1409 - _globals['_SPEAKDONEEVENT']._serialized_start=1411 - _globals['_SPEAKDONEEVENT']._serialized_end=1441 - _globals['_SERVOCOMMANDSEQUENCE']._serialized_start=1443 - _globals['_SERVOCOMMANDSEQUENCE']._serialized_end=1521 - _globals['_SERVOCOMMAND']._serialized_start=1523 - _globals['_SERVOCOMMAND']._serialized_end=1625 - _globals['_SERVODONEEVENT']._serialized_start=1627 - _globals['_SERVODONEEVENT']._serialized_end=1657 - _globals['_FIRMWAREMETADATA']._serialized_start=1660 - _globals['_FIRMWAREMETADATA']._serialized_end=1941 - _globals['_SERVERMETADATA']._serialized_start=1943 - _globals['_SERVERMETADATA']._serialized_end=2013 + _globals['_WEBSOCKETMESSAGE']._serialized_end=1297 + _globals['_AUDIOPCMSTART']._serialized_start=1299 + _globals['_AUDIOPCMSTART']._serialized_end=1314 + _globals['_AUDIOPCMEND']._serialized_start=1316 + _globals['_AUDIOPCMEND']._serialized_end=1329 + _globals['_AUDIOWAVSTART']._serialized_start=1331 + _globals['_AUDIOWAVSTART']._serialized_end=1385 + _globals['_AUDIOWAVEND']._serialized_start=1387 + _globals['_AUDIOWAVEND']._serialized_end=1400 + _globals['_AUDIOCHUNK']._serialized_start=1402 + _globals['_AUDIOCHUNK']._serialized_end=1433 + _globals['_FILECHUNK']._serialized_start=1435 + _globals['_FILECHUNK']._serialized_end=1467 + _globals['_STATECOMMAND']._serialized_start=1469 + _globals['_STATECOMMAND']._serialized_end=1538 + _globals['_WAKEWORDEVENT']._serialized_start=1540 + _globals['_WAKEWORDEVENT']._serialized_end=1573 + _globals['_STATEEVENT']._serialized_start=1575 + _globals['_STATEEVENT']._serialized_end=1642 + _globals['_SPEAKDONEEVENT']._serialized_start=1644 + _globals['_SPEAKDONEEVENT']._serialized_end=1674 + _globals['_STOREDFILESTART']._serialized_start=1676 + _globals['_STOREDFILESTART']._serialized_end=1791 + _globals['_STOREDFILEEND']._serialized_start=1793 + _globals['_STOREDFILEEND']._serialized_end=1808 + _globals['_SERVOCOMMANDSEQUENCE']._serialized_start=1810 + _globals['_SERVOCOMMANDSEQUENCE']._serialized_end=1888 + _globals['_SERVOCOMMAND']._serialized_start=1890 + _globals['_SERVOCOMMAND']._serialized_end=1992 + _globals['_SERVODONEEVENT']._serialized_start=1994 + _globals['_SERVODONEEVENT']._serialized_end=2024 + _globals['_FIRMWAREMETADATA']._serialized_start=2027 + _globals['_FIRMWAREMETADATA']._serialized_end=2308 + _globals['_SERVERMETADATA']._serialized_start=2310 + _globals['_SERVERMETADATA']._serialized_end=2380 # @@protoc_insertion_point(module_scope) diff --git a/stackchan_server/protobuf_ws.py b/stackchan_server/protobuf_ws.py index 8569004..e84d2d1 100644 --- a/stackchan_server/protobuf_ws.py +++ b/stackchan_server/protobuf_ws.py @@ -92,6 +92,48 @@ def encode_audio_wav_end_message(seq: int) -> bytes: return message.SerializeToString() +def encode_stored_file_start_message( + seq: int, + *, + file_id: str, + content_type: str, + total_size: int, + sample_rate: int = 0, + channels: int = 0, +) -> bytes: + message = _new_message( + ws_pb2.MESSAGE_KIND_STORED_FILE, + ws_pb2.MESSAGE_TYPE_START, + seq, + ) + message.stored_file_start.file_id = file_id + message.stored_file_start.content_type = content_type + message.stored_file_start.total_size = int(total_size) + message.stored_file_start.sample_rate = int(sample_rate) + message.stored_file_start.channels = int(channels) + return message.SerializeToString() + + +def encode_stored_file_data_message(seq: int, chunk_bytes: bytes) -> bytes: + message = _new_message( + ws_pb2.MESSAGE_KIND_STORED_FILE, + ws_pb2.MESSAGE_TYPE_DATA, + seq, + ) + message.stored_file_data.chunk_bytes = chunk_bytes + return message.SerializeToString() + + +def encode_stored_file_end_message(seq: int) -> bytes: + message = _new_message( + ws_pb2.MESSAGE_KIND_STORED_FILE, + ws_pb2.MESSAGE_TYPE_END, + seq, + ) + message.stored_file_end.SetInParent() + return message.SerializeToString() + + def encode_state_command_message(seq: int, state_id: int) -> bytes: message = _new_message( ws_pb2.MESSAGE_KIND_STATE_CMD, @@ -181,6 +223,9 @@ def encode_servo_command_message(seq: int, commands: Sequence[ServoCommand]) -> "encode_audio_wav_start_message", "encode_server_metadata_message", "encode_servo_command_message", + "encode_stored_file_data_message", + "encode_stored_file_end_message", + "encode_stored_file_start_message", "encode_state_command_message", "parse_websocket_message", "ws_pb2", diff --git a/stackchan_server/wakeword_sound.py b/stackchan_server/wakeword_sound.py new file mode 100644 index 0000000..e746f0c --- /dev/null +++ b/stackchan_server/wakeword_sound.py @@ -0,0 +1,201 @@ +from __future__ import annotations + +import os +import sys +import wave +from array import array +from dataclasses import dataclass +from logging import getLogger +from pathlib import Path + +DEFAULT_WAKE_WORD_SOUND_FILE_ID = "wakeword-detected-sound" +DEFAULT_WAKE_WORD_SOUND_CONTENT_TYPE = "audio/pcm" +WAKEWORD_SOUND_PATH_ENV_VAR = "STACKCHAN_WAKEWORD_SOUND_PATH" +WAKEWORD_SOUND_TARGET_SAMPLE_RATE = 24000 +WAKEWORD_SOUND_TARGET_CHANNELS = 1 +WAKEWORD_SOUND_PREROLL_MS = 40 +WAKEWORD_SOUND_POSTROLL_MS = 180 +WAKEWORD_SOUND_MIN_DURATION_MS = 700 + +logger = getLogger(__name__) + + +@dataclass(frozen=True) +class WakeWordSound: + file_id: str + content_type: str + payload: bytes + sample_rate: int + channels: int + + +def _decode_pcm16le(payload: bytes) -> array[int]: + samples = array("h") + samples.frombytes(payload) + if sys.byteorder != "little": + samples.byteswap() + return samples + + +def _encode_pcm16le(samples: array[int]) -> bytes: + encoded = array("h", samples) + if sys.byteorder != "little": + encoded.byteswap() + return encoded.tobytes() + + +def _clamp_pcm16(value: float) -> int: + return max(-32768, min(32767, int(round(value)))) + + +def _pcm16_silence(sample_count: int) -> array[int]: + if sample_count <= 0: + return array("h") + return array("h", [0]) * sample_count + + +def _mix_to_mono(samples: array[int], channels: int) -> array[int]: + if channels <= 1: + return array("h", samples) + + mono_samples = array("h") + frame_count = len(samples) // channels + for frame_index in range(frame_count): + base = frame_index * channels + mixed = sum(samples[base + channel_index] for channel_index in range(channels)) / channels + mono_samples.append(_clamp_pcm16(mixed)) + return mono_samples + + +def _resample_mono_pcm16(samples: array[int], src_rate: int, dst_rate: int) -> array[int]: + if src_rate == dst_rate or len(samples) <= 1: + return array("h", samples) + + dst_length = max(1, round(len(samples) * dst_rate / src_rate)) + resampled = array("h") + for dst_index in range(dst_length): + src_position = dst_index * src_rate / dst_rate + left_index = int(src_position) + if left_index >= len(samples) - 1: + resampled.append(samples[-1]) + continue + + right_index = left_index + 1 + fraction = src_position - left_index + interpolated = samples[left_index] + (samples[right_index] - samples[left_index]) * fraction + resampled.append(_clamp_pcm16(interpolated)) + return resampled + + +def _normalize_pcm16_payload(payload: bytes, sample_rate: int, channels: int) -> tuple[bytes, int, int]: + samples = _decode_pcm16le(payload) + mono_samples = _mix_to_mono(samples, channels) + normalized_samples = _resample_mono_pcm16( + mono_samples, + sample_rate, + WAKEWORD_SOUND_TARGET_SAMPLE_RATE, + ) + return ( + _encode_pcm16le(normalized_samples), + WAKEWORD_SOUND_TARGET_SAMPLE_RATE, + WAKEWORD_SOUND_TARGET_CHANNELS, + ) + + +def _pad_pcm16_for_short_playback(payload: bytes, sample_rate: int) -> bytes: + samples = _decode_pcm16le(payload) + preroll_samples = round(sample_rate * WAKEWORD_SOUND_PREROLL_MS / 1000) + postroll_samples = round(sample_rate * WAKEWORD_SOUND_POSTROLL_MS / 1000) + min_duration_samples = round(sample_rate * WAKEWORD_SOUND_MIN_DURATION_MS / 1000) + + padded = _pcm16_silence(preroll_samples) + padded.extend(samples) + padded.extend(_pcm16_silence(postroll_samples)) + + if len(padded) < min_duration_samples: + padded.extend(_pcm16_silence(min_duration_samples - len(padded))) + + return _encode_pcm16le(padded) + + +def load_wake_word_detected_sound_from_env() -> WakeWordSound | None: + raw_path = os.getenv(WAKEWORD_SOUND_PATH_ENV_VAR, "").strip() + if not raw_path: + logger.info( + "Wake-word sound WAV path is not configured: env=%s", + WAKEWORD_SOUND_PATH_ENV_VAR, + ) + return None + + wav_path = Path(raw_path).expanduser() + if not wav_path.is_absolute(): + wav_path = Path.cwd() / wav_path + + resolved_path = wav_path.resolve() + logger.info("Loading wake-word sound WAV from %s", resolved_path) + if not resolved_path.is_file(): + raise FileNotFoundError( + f"wake word sound wav file not found: {resolved_path}" + ) + + with wave.open(str(resolved_path), "rb") as wav_fp: + channels = wav_fp.getnchannels() + sample_width = wav_fp.getsampwidth() + sample_rate = wav_fp.getframerate() + payload = wav_fp.readframes(wav_fp.getnframes()) + + if sample_width != 2: + raise ValueError( + "wake word notification sound wav must be 16-bit PCM" + ) + if sample_rate <= 0: + raise ValueError("wake word notification sound wav has invalid sample rate") + if channels <= 0: + raise ValueError("wake word notification sound wav has invalid channels") + if not payload: + raise ValueError("wake word notification sound wav is empty") + + normalized_payload, normalized_sample_rate, normalized_channels = _normalize_pcm16_payload( + payload, + sample_rate, + channels, + ) + playback_ready_payload = _pad_pcm16_for_short_playback( + normalized_payload, + normalized_sample_rate, + ) + + logger.info( + "Loaded wake-word sound WAV path=%s source_sample_rate=%d source_channels=%d source_bytes=%d normalized_sample_rate=%d normalized_channels=%d normalized_bytes=%d playback_ready_bytes=%d preroll_ms=%d postroll_ms=%d min_duration_ms=%d", + resolved_path, + sample_rate, + channels, + len(payload), + normalized_sample_rate, + normalized_channels, + len(normalized_payload), + len(playback_ready_payload), + WAKEWORD_SOUND_PREROLL_MS, + WAKEWORD_SOUND_POSTROLL_MS, + WAKEWORD_SOUND_MIN_DURATION_MS, + ) + + return WakeWordSound( + file_id=DEFAULT_WAKE_WORD_SOUND_FILE_ID, + content_type=DEFAULT_WAKE_WORD_SOUND_CONTENT_TYPE, + payload=playback_ready_payload, + sample_rate=normalized_sample_rate, + channels=normalized_channels, + ) + + +__all__ = [ + "DEFAULT_WAKE_WORD_SOUND_CONTENT_TYPE", + "DEFAULT_WAKE_WORD_SOUND_FILE_ID", + "WAKEWORD_SOUND_PATH_ENV_VAR", + "WAKEWORD_SOUND_MIN_DURATION_MS", + "WAKEWORD_SOUND_POSTROLL_MS", + "WAKEWORD_SOUND_PREROLL_MS", + "WakeWordSound", + "load_wake_word_detected_sound_from_env", +] diff --git a/stackchan_server/ws_proxy.py b/stackchan_server/ws_proxy.py index 1c45236..39dcbab 100644 --- a/stackchan_server/ws_proxy.py +++ b/stackchan_server/ws_proxy.py @@ -21,11 +21,18 @@ encode_server_metadata_message, encode_servo_command_message, encode_state_command_message, + encode_stored_file_data_message, + encode_stored_file_end_message, + encode_stored_file_start_message, parse_websocket_message, ) from .speak import SpeakHandler from .static import LISTEN_AUDIO_FORMAT from .types import SpeechRecognizer, SpeechSynthesizer +from .wakeword_sound import ( + WAKEWORD_SOUND_PATH_ENV_VAR, + load_wake_word_detected_sound_from_env, +) logger = getLogger(__name__) @@ -35,6 +42,7 @@ _RECORDINGS_DIR = _BASE_DIR / "recordings" _DOWN_WAV_CHUNK = 4096 # bytes per WebSocket frame for synthesized audio (raw PCM) +_DOWN_FILE_CHUNK = 4096 # bytes per WebSocket frame for stored-file transfer _DOWN_SEGMENT_MILLIS = ( 2000 # duration of a single START-DATA-END segment in milliseconds ) @@ -191,6 +199,81 @@ async def speak(self, text: str) -> None: async def send_state_command(self, state_id: int | FirmwareState) -> None: await self._send_state_command(state_id) + async def send_file( + self, + *, + file_id: str, + content_type: str, + payload: bytes, + sample_rate: int = 0, + channels: int = 0, + ) -> None: + if not file_id: + raise ValueError("file_id must not be empty") + if not content_type: + raise ValueError("content_type must not be empty") + + logger.info( + "Sending stored file id=%s type=%s bytes=%d sample_rate=%d channels=%d", + file_id, + content_type, + len(payload), + sample_rate, + channels, + ) + await self.ws.send_bytes( + encode_stored_file_start_message( + self._next_down_seq(), + file_id=file_id, + content_type=content_type, + total_size=len(payload), + sample_rate=sample_rate, + channels=channels, + ) + ) + offset = 0 + chunk_count = 0 + while offset < len(payload): + chunk = payload[offset : offset + _DOWN_FILE_CHUNK] + await self.ws.send_bytes( + encode_stored_file_data_message(self._next_down_seq(), chunk) + ) + offset += len(chunk) + chunk_count += 1 + logger.info( + "Stored file payload sent id=%s chunks=%d bytes=%d", + file_id, + chunk_count, + len(payload), + ) + await self.ws.send_bytes(encode_stored_file_end_message(self._next_down_seq())) + logger.info("Stored file transfer completed id=%s", file_id) + + async def send_default_wake_word_sound(self) -> None: + sound = load_wake_word_detected_sound_from_env() + if sound is None: + logger.info( + "Wake-word sound upload skipped because %s is not set", + WAKEWORD_SOUND_PATH_ENV_VAR, + ) + return + + logger.info( + "Uploading wake-word sound id=%s sample_rate=%d channels=%d bytes=%d", + sound.file_id, + sound.sample_rate, + sound.channels, + len(sound.payload), + ) + + await self.send_file( + file_id=sound.file_id, + content_type=sound.content_type, + payload=sound.payload, + sample_rate=sound.sample_rate, + channels=sound.channels, + ) + async def reset_state(self) -> None: await self.send_state_command(FirmwareState.IDLE)