Skip to content

Commit 9167b6e

Browse files
author
LittleMouse
committed
[update] update llm_vlm encoder. update audio cache.
1 parent 73c4a49 commit 9167b6e

16 files changed

Lines changed: 1725 additions & 3223 deletions

File tree

projects/llm_framework/main_asr/src/main.cpp

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,7 @@ class llm_task {
5858
std::atomic_bool audio_flage_;
5959
std::atomic_bool awake_flage_;
6060
int awake_delay_ = 50;
61-
int delay_audio_frame_ = 10;
61+
int delay_audio_frame_ = 11;
6262
buffer_t *pcmdata;
6363

6464
std::function<void(void)> pause;
@@ -187,18 +187,20 @@ class llm_task {
187187
count++;
188188
return;
189189
}
190-
buffer_write_char(pcmdata, raw.data(), raw.length());
191190
buffer_position_set(pcmdata, 0);
192-
count = 0;
191+
193192
std::vector<float> floatSamples;
194193
{
195194
int16_t audio_val;
196-
while (buffer_read_u16(pcmdata, (unsigned short *)&audio_val, 1)) {
197-
float normalizedSample = (float)audio_val / INT16_MAX;
195+
while (buffer_read_i16(pcmdata, &audio_val, 1)) {
196+
float normalizedSample = static_cast<float>(audio_val) / INT16_MAX;
198197
floatSamples.push_back(normalizedSample);
199198
}
200199
}
201-
buffer_position_set(pcmdata, 0);
200+
201+
buffer_resize(pcmdata, 0);
202+
count = 0;
203+
202204
if (awake_flage_ && recognizer_stream_) {
203205
recognizer_stream_.reset();
204206
awake_flage_ = false;

projects/llm_framework/main_kws/src/main.cpp

Lines changed: 9 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -59,7 +59,7 @@ class llm_task {
5959
bool enwake_audio_;
6060
std::atomic_bool audio_flage_;
6161
task_callback_t out_callback_;
62-
int delay_audio_frame_ = 10;
62+
int delay_audio_frame_ = 11;
6363
buffer_t *pcmdata;
6464
std::string wake_wav_file_;
6565

@@ -229,22 +229,24 @@ class llm_task {
229229
{
230230
static int count = 0;
231231
if (count < delay_audio_frame_) {
232-
buffer_write_char(pcmdata, raw.c_str(), raw.length());
232+
buffer_write_char(pcmdata, raw.data(), raw.length());
233233
count++;
234234
return;
235235
}
236-
buffer_write_char(pcmdata, raw.data(), raw.length());
237236
buffer_position_set(pcmdata, 0);
238-
count = 0;
237+
239238
std::vector<float> floatSamples;
240239
{
241240
int16_t audio_val;
242-
while (buffer_read_u16(pcmdata, (unsigned short *)&audio_val, 1)) {
243-
float normalizedSample = (float)audio_val / INT16_MAX;
241+
while (buffer_read_i16(pcmdata, &audio_val, 1)) {
242+
float normalizedSample = static_cast<float>(audio_val) / INT16_MAX;
244243
floatSamples.push_back(normalizedSample);
245244
}
246245
}
247-
buffer_position_set(pcmdata, 0);
246+
247+
buffer_resize(pcmdata, 0);
248+
count = 0;
249+
248250
spotter_stream_->AcceptWaveform(mode_config_.feat_config.sampling_rate, floatSamples.data(),
249251
floatSamples.size());
250252
while (spotter_->IsReady(spotter_stream_.get())) {

projects/llm_framework/main_llm/src/main.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -130,6 +130,7 @@ class llm_task {
130130
std::string base_model = base_model_path_ + model_ + "/";
131131
SLOGI("base_model %s", base_model.c_str());
132132

133+
CONFIG_AUTO_SET(file_body["mode_param"], system_prompt);
133134
CONFIG_AUTO_SET(file_body["mode_param"], tokenizer_type);
134135
CONFIG_AUTO_SET(file_body["mode_param"], filename_tokenizer_model);
135136
CONFIG_AUTO_SET(file_body["mode_param"], url_tokenizer_model);

projects/llm_framework/main_vad/src/main.cpp

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,7 @@ class llm_task {
6060
std::string superior_id_;
6161
task_callback_t out_callback_;
6262
int awake_delay_ = 50;
63-
int delay_audio_frame_ = 3;
63+
int delay_audio_frame_ = 4;
6464
buffer_t *pcmdata;
6565
std::string wake_wav_file_;
6666

@@ -158,18 +158,19 @@ class llm_task {
158158
count++;
159159
return;
160160
}
161-
buffer_write_char(pcmdata, raw.data(), raw.length());
162161
buffer_position_set(pcmdata, 0);
163-
count = 0;
162+
164163
std::vector<float> floatSamples;
165164
{
166165
int16_t audio_val;
167-
while (buffer_read_u16(pcmdata, (unsigned short *)&audio_val, 1)) {
168-
float normalizedSample = (float)audio_val / INT16_MAX;
166+
while (buffer_read_i16(pcmdata, &audio_val, 1)) {
167+
float normalizedSample = static_cast<float>(audio_val) / INT16_MAX;
169168
floatSamples.push_back(normalizedSample);
170169
}
171170
}
172-
buffer_position_set(pcmdata, 0);
171+
buffer_resize(pcmdata, 0);
172+
count = 0;
173+
173174
vad_->AcceptWaveform(floatSamples.data(), floatSamples.size());
174175

175176
if (vad_->IsSpeechDetected() && !printed) {

0 commit comments

Comments
 (0)