Skip to content

Commit 0ed9518

Browse files
committed
[update] update whisper
1 parent c8ed5ec commit 0ed9518

1 file changed

Lines changed: 18 additions & 12 deletions

File tree

  • projects/llm_framework/main_whisper/src

projects/llm_framework/main_whisper/src/main.cpp

Lines changed: 18 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@
2929
using namespace StackFlows;
3030

3131
int main_exit_flage = 0;
32+
3233
static void __sigint(int iSigNo)
3334
{
3435
SLOGW("llm_sys will be exit!");
@@ -85,13 +86,14 @@ class llm_task {
8586
std::string model_;
8687
std::string response_format_;
8788
std::vector<std::string> inputs_;
89+
std::string language_;
8890
bool enoutput_;
8991
bool enstream_;
9092
bool ensleep_;
9193
std::atomic_bool superior_flage_;
9294
std::atomic_bool audio_flage_;
9395
std::atomic_bool awake_flage_;
94-
std::atomic_bool vad_endpoint_;
96+
std::atomic_bool endpoint_flage_;
9597
std::string superior_id_;
9698
static int ax_init_flage_;
9799
task_callback_t out_callback_;
@@ -107,6 +109,7 @@ class llm_task {
107109
model_ = config_body.at("model");
108110
response_format_ = config_body.at("response_format");
109111
enoutput_ = config_body.at("enoutput");
112+
language_ = config_body.at("language");
110113
if (config_body.contains("input")) {
111114
if (config_body["input"].is_string()) {
112115
inputs_.push_back(config_body["input"].get<std::string>());
@@ -199,7 +202,8 @@ class llm_task {
199202
// Compatible operation
200203
if (model_ == "whisper-tiny")
201204
config_file_paths = get_config_file_paths(base_model_path_, base_model_config_path_, "whisper-tiny");
202-
205+
else if (model_ == "whisper-base")
206+
config_file_paths = get_config_file_paths(base_model_path_, base_model_config_path_, "whisper-base");
203207
try {
204208
for (auto file_name : config_file_paths) {
205209
std::ifstream config_file(file_name);
@@ -302,7 +306,10 @@ class llm_task {
302306
void sys_pcm_on_data(const std::string &raw)
303307
{
304308
static int count = 0;
305-
if (count < delay_audio_frame_) {
309+
double start, end;
310+
double start_all, end_all;
311+
312+
if (count < delay_audio_frame_ || endpoint_flage_) {
306313
buffer_write_char(pcmdata, raw.c_str(), raw.length());
307314
count++;
308315
return;
@@ -373,8 +380,6 @@ class llm_task {
373380
memcpy(continous_mel.data() + i * n_len, mel[i].data(), sizeof(float) * n_len);
374381
}
375382

376-
double start, end;
377-
double start_all, end_all;
378383
start = get_current_time();
379384
start_all = get_current_time();
380385
encoder_->SetInput(continous_mel.data(), 0);
@@ -387,7 +392,7 @@ class llm_task {
387392
printf("Encoder run take %.2f ms\n", (end - start));
388393

389394
// detect language
390-
SOT_SEQUENCE[1] = detect_language(mode_config_.language);
395+
SOT_SEQUENCE[1] = detect_language(language_);
391396

392397
// decoder_main
393398
start = get_current_time();
@@ -483,7 +488,6 @@ class llm_task {
483488
if (ensleep_) {
484489
if (pause) pause();
485490
}
486-
// }
487491
}
488492

489493
void kws_awake()
@@ -534,6 +538,7 @@ class llm_task {
534538
buffer_destroy(pcmdata);
535539
}
536540
};
541+
537542
int llm_task::ax_init_flage_ = 0;
538543
#undef CONFIG_AUTO_SET
539544

@@ -545,6 +550,7 @@ class llm_whisper : public StackFlow {
545550

546551
public:
547552
enum { EVENT_LOAD_CONFIG = EVENT_EXPORT + 1, EVENT_TASK_PAUSE };
553+
548554
llm_whisper() : StackFlow("whisper")
549555
{
550556
task_count_ = 1;
@@ -726,7 +732,7 @@ class llm_whisper : public StackFlow {
726732
return;
727733
}
728734
if (data == "true" || data == "false") {
729-
llm_task_obj->vad_endpoint_ = (data == "true");
735+
llm_task_obj->endpoint_flage_ = (data == "true");
730736
}
731737
}
732738

@@ -816,7 +822,7 @@ class llm_whisper : public StackFlow {
816822
std::weak_ptr<llm_channel_obj>(llm_channel), std::placeholders::_1,
817823
std::placeholders::_2));
818824
} else if (input.find("vad") != std::string::npos) {
819-
llm_task_obj->vad_endpoint_ = true;
825+
llm_task_obj->endpoint_flage_ = false;
820826
// task_pause(work_id, "");
821827
llm_channel->subscriber_work_id(
822828
input, std::bind(&llm_whisper::vad_endpoint, this, std::weak_ptr<llm_task>(llm_task_obj),
@@ -867,11 +873,11 @@ class llm_whisper : public StackFlow {
867873
std::weak_ptr<llm_channel_obj>(llm_channel), std::placeholders::_1, std::placeholders::_2));
868874
llm_task_obj->inputs_.push_back(data);
869875
} else if (data.find("vad") != std::string::npos) {
870-
llm_task_obj->vad_endpoint_ = true;
871-
ret = llm_channel->subscriber_work_id(
876+
llm_task_obj->endpoint_flage_ = false;
877+
ret = llm_channel->subscriber_work_id(
872878
data,
873879
std::bind(&llm_whisper::vad_endpoint, this, std::weak_ptr<llm_task>(llm_task_obj),
874-
std::weak_ptr<llm_channel_obj>(llm_channel), std::placeholders::_1, std::placeholders::_2));
880+
std::weak_ptr<llm_channel_obj>(llm_channel), std::placeholders::_1, std::placeholders::_2));
875881
}
876882
if (ret) {
877883
error_body["code"] = -20;

0 commit comments

Comments
 (0)