Skip to content

Commit fb854d6

Browse files
committed
Merge branch 'dev' of github.com:m5stack/StackFlow into dev
2 parents 433d77a + 8d4f0af commit fb854d6

8 files changed

Lines changed: 667 additions & 255 deletions

File tree

projects/llm_framework/main_llm/src/main.cpp

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -119,6 +119,8 @@ class llm_task {
119119
CONFIG_AUTO_SET(file_body["mode_param"], b_use_mmap_load_embed);
120120
CONFIG_AUTO_SET(file_body["mode_param"], b_dynamic_load_axmodel_layer);
121121
CONFIG_AUTO_SET(file_body["mode_param"], max_token_len);
122+
CONFIG_AUTO_SET(file_body["mode_param"], temperature);
123+
CONFIG_AUTO_SET(file_body["mode_param"], top_p);
122124

123125
if (mode_config_.filename_tokenizer_model.find("http:") != std::string::npos) {
124126
std::string tokenizer_file;
@@ -279,6 +281,33 @@ class llm_llm : public StackFlow {
279281
}
280282
}
281283

284+
void task_pause(const std::weak_ptr<llm_task> llm_task_obj_weak,
285+
const std::weak_ptr<llm_channel_obj> llm_channel_weak)
286+
{
287+
auto llm_task_obj = llm_task_obj_weak.lock();
288+
auto llm_channel = llm_channel_weak.lock();
289+
if (!(llm_task_obj && llm_channel)) {
290+
return;
291+
}
292+
llm_task_obj->lLaMa_->Stop();
293+
}
294+
295+
void pause(const std::string &work_id, const std::string &object, const std::string &data) override
296+
{
297+
SLOGI("llm_asr::work:%s", data.c_str());
298+
299+
nlohmann::json error_body;
300+
int work_id_num = sample_get_work_id_num(work_id);
301+
if (llm_task_.find(work_id_num) == llm_task_.end()) {
302+
error_body["code"] = -6;
303+
error_body["message"] = "Unit Does Not Exist";
304+
send("None", "None", error_body, work_id);
305+
return;
306+
}
307+
task_pause(llm_task_[work_id_num], get_channel(work_id_num));
308+
send("None", "None", LLM_NO_ERROR, work_id);
309+
}
310+
282311
void task_user_data(const std::weak_ptr<llm_task> llm_task_obj_weak,
283312
const std::weak_ptr<llm_channel_obj> llm_channel_weak, const std::string &object,
284313
const std::string &data)

projects/llm_framework/main_llm/src/runner/LLM.hpp

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,8 @@ struct LLMAttrType
4848
int kv_cache_num = 1024; // auto calc
4949
int kv_cache_size = 256; // auto calc
5050

51+
float temperature = 0.7f;
52+
float top_p = 0.9f;
5153
bool b_use_mmap_load_embed = false;
5254
bool b_dynamic_load_axmodel_layer = false;
5355

@@ -86,7 +88,7 @@ class LLM
8688

8789
bool b_stop = false;
8890

89-
static int post_process(unsigned short *p, int n, std::vector<int> &history, float *val = 0)
91+
int post_process(unsigned short *p, int n, std::vector<int> &history, float *val = 0)
9092
{
9193
std::vector<float> logits(n);
9294
for (int i = 0; i < n; i++)
@@ -95,10 +97,10 @@ class LLM
9597
logits[i] = *reinterpret_cast<float *>(&proc);
9698
}
9799
LLMPostprocess postprocess;
98-
postprocess.set_temperature(true, 0.8f);
100+
postprocess.set_temperature(true, _attr.temperature);
99101
postprocess.set_repetition_penalty(true, 1.2f);
100102
// postprocess.set_top_k_sampling(true, 40);
101-
postprocess.set_top_p_sampling(true, 0.9f);
103+
postprocess.set_top_p_sampling(true, _attr.top_p);
102104

103105
return postprocess.apply(logits, history);
104106

projects/llm_framework/main_melotts/src/main.cpp

Lines changed: 25 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -233,9 +233,20 @@ class llm_task {
233233
src_delete(src_state);
234234
}
235235

236-
bool TTS(const std::string &msg_str)
236+
bool TTS(const std::string &msg_str, bool finish)
237237
{
238238
try {
239+
std::vector<int16_t> wav_pcm_data;
240+
if (msg_str.empty()) {
241+
SLOGI("empty");
242+
if (out_callback_) {
243+
std::string output = wav_pcm_data.empty() ?
244+
std::string() :
245+
std::string((char *)wav_pcm_data.data(), wav_pcm_data.size() * sizeof(int16_t));
246+
out_callback_(output, finish);
247+
}
248+
return false;
249+
}
239250
std::vector<int> phones_bef, tones_bef;
240251
lexicon_->convert(msg_str, phones_bef, tones_bef);
241252
// Add blank between words
@@ -284,11 +295,10 @@ class llm_task {
284295
std::vector<float> tmp_pcm((pcmlist.size() * src_ratio + 1));
285296
int len;
286297
resample_audio(pcmlist.data(), pcmlist.size(), tmp_pcm.data(), &len, src_ratio);
287-
std::vector<int16_t> wav_pcm_data;
288298
std::transform(tmp_pcm.begin(), tmp_pcm.begin() + len, std::back_inserter(wav_pcm_data),
289299
[](const auto val) { return (int16_t)(val * INT16_MAX); });
290300
if (out_callback_)
291-
out_callback_(std::string((char *)wav_pcm_data.data(), wav_pcm_data.size() * sizeof(int16_t)), true);
301+
out_callback_(std::string((char *)wav_pcm_data.data(), wav_pcm_data.size() * sizeof(int16_t)), finish);
292302
} catch (...) {
293303
return true;
294304
}
@@ -342,6 +352,9 @@ class llm_task {
342352

343353
~llm_task()
344354
{
355+
if (decoder_) {
356+
decoder_->Release();
357+
}
345358
_ax_deinit();
346359
}
347360
};
@@ -368,15 +381,17 @@ class llm_tts : public StackFlow {
368381
return;
369382
}
370383
std::string base64_data;
371-
int len = encode_base64(data, base64_data);
384+
if (!data.empty()) {
385+
int len = encode_base64(data, base64_data);
386+
}
372387
if (llm_channel->enstream_) {
373388
static int count = 0;
374389
nlohmann::json data_body;
375390
data_body["index"] = count++;
376-
if (!finish)
391+
if (!data.empty())
377392
data_body["delta"] = base64_data;
378393
else
379-
data_body["delta"] = std::string("");
394+
data_body["delta"] = "";
380395
data_body["finish"] = finish;
381396
if (finish) count = 0;
382397
llm_channel->send(llm_task_obj->response_format_, data_body, LLM_NO_ERROR);
@@ -433,7 +448,7 @@ class llm_tts : public StackFlow {
433448
for (auto cutf8 : tmp_data) {
434449
if (is_breakpoint(cutf8)) {
435450
llm_task_obj->tts_string_stream_buff += cutf8;
436-
ret = llm_task_obj->TTS(llm_task_obj->tts_string_stream_buff);
451+
ret = llm_task_obj->TTS(llm_task_obj->tts_string_stream_buff, false);
437452
llm_task_obj->tts_string_stream_buff.clear();
438453
if (ret) {
439454
error_body["code"] = -11;
@@ -447,13 +462,15 @@ class llm_tts : public StackFlow {
447462
if (finish_flage) {
448463
if (!llm_task_obj->tts_string_stream_buff.empty()) {
449464
llm_task_obj->tts_string_stream_buff.push_back('.');
450-
ret = llm_task_obj->TTS(llm_task_obj->tts_string_stream_buff);
465+
ret = llm_task_obj->TTS(llm_task_obj->tts_string_stream_buff, true);
451466
llm_task_obj->tts_string_stream_buff.clear();
452467
if (ret) {
453468
error_body["code"] = -11;
454469
error_body["message"] = "Model run failed.";
455470
llm_channel->send("None", "None", error_body, llm_channel->work_id_);
456471
}
472+
} else {
473+
llm_task_obj->TTS("", true);
457474
}
458475
}
459476
}

projects/llm_framework/main_sys/src/event_loop.cpp

Lines changed: 65 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,3 @@
1-
21
/*
32
* SPDX-FileCopyrightText: 2024 M5Stack Technology CO LTD
43
*
@@ -227,6 +226,70 @@ int sys_unit_call(int com_id, const nlohmann::json &json_obj)
227226
return out;
228227
}
229228

229+
void get_mem_cmm_info(unsigned long *total_size, unsigned long *used, unsigned long *remain)
230+
{
231+
std::ifstream file("/proc/ax_proc/mem_cmm_info");
232+
std::vector<std::string> lines;
233+
std::string line;
234+
235+
while (std::getline(file, line)) {
236+
lines.push_back(line);
237+
}
238+
239+
if (!lines.empty()) {
240+
std::string last_line = lines.back();
241+
242+
size_t pos = last_line.find("total size=");
243+
if (pos != std::string::npos) {
244+
pos += 11;
245+
size_t end = last_line.find('K', pos);
246+
*total_size = std::stoul(last_line.substr(pos, end - pos));
247+
}
248+
249+
pos = last_line.find("used=");
250+
if (pos != std::string::npos) {
251+
pos += 5;
252+
size_t end = last_line.find('K', pos);
253+
*used = std::stoul(last_line.substr(pos, end - pos));
254+
}
255+
256+
pos = last_line.find("remain=");
257+
if (pos != std::string::npos) {
258+
pos += 7;
259+
size_t end = last_line.find('K', pos);
260+
*remain = std::stoul(last_line.substr(pos, end - pos));
261+
}
262+
}
263+
}
264+
265+
void _sys_cmminfo(int com_id, const nlohmann::json &json_obj)
266+
{
267+
unsigned long total_size, used, remain;
268+
get_mem_cmm_info(&total_size, &used, &remain);
269+
270+
nlohmann::json out_body;
271+
nlohmann::json data_body;
272+
out_body["request_id"] = json_obj["request_id"];
273+
out_body["work_id"] = std::string("sys");
274+
out_body["created"] = time(NULL);
275+
out_body["error"] = nlohmann::json::parse("{\"code\":0, \"message\":\"\"}");
276+
out_body["object"] = std::string("sys.cmminfo");
277+
data_body["total"] = total_size;
278+
data_body["used"] = used;
279+
data_body["remain"] = remain;
280+
out_body["data"] = data_body;
281+
std::string out = out_body.dump();
282+
zmq_com_send(com_id, out);
283+
}
284+
285+
int sys_cmminfo(int com_id, const nlohmann::json &json_obj)
286+
{
287+
int out = 0;
288+
std::thread t(_sys_cmminfo, com_id, json_obj);
289+
t.detach();
290+
return out;
291+
}
292+
230293
int sys_lsmode(int com_id, const nlohmann::json &json_obj)
231294
{
232295
int out;
@@ -675,6 +738,7 @@ void server_work()
675738
key_sql["sys.version"] = sys_version;
676739
key_sql["sys.rmmode"] = sys_rmmode;
677740
key_sql["sys.unit_call"] = sys_unit_call;
741+
key_sql["sys.cmminfo"] = sys_cmminfo;
678742
}
679743

680744
void server_stop_work()

projects/llm_framework/main_vlm/src/main.cpp

Lines changed: 52 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -44,8 +44,8 @@ class llm_task {
4444
std::string response_format_;
4545
std::vector<std::string> inputs_;
4646
std::vector<unsigned short> prompt_data_;
47-
std::vector<unsigned char> image_data_;
48-
std::vector<unsigned short> img_embed;
47+
std::vector<std::vector<unsigned char>> image_datas_;
48+
std::vector<std::vector<unsigned short>> img_embeds;
4949
std::string prompt_;
5050
task_callback_t out_callback_;
5151
bool enoutput_;
@@ -125,6 +125,8 @@ class llm_task {
125125
CONFIG_AUTO_SET(file_body["mode_param"], b_use_mmap_load_embed);
126126
CONFIG_AUTO_SET(file_body["mode_param"], b_dynamic_load_axmodel_layer);
127127
CONFIG_AUTO_SET(file_body["mode_param"], max_token_len);
128+
CONFIG_AUTO_SET(file_body["mode_param"], temperature);
129+
CONFIG_AUTO_SET(file_body["mode_param"], top_p);
128130

129131
if (mode_config_.filename_tokenizer_model.find("http:") != std::string::npos) {
130132
std::string tokenizer_file;
@@ -171,7 +173,11 @@ class llm_task {
171173
}
172174
};
173175
lLaMa_ = std::make_unique<LLM>();
174-
if (!lLaMa_->Init(mode_config_)) return -2;
176+
if (!lLaMa_->Init(mode_config_)) {
177+
lLaMa_->Deinit();
178+
lLaMa_.reset();
179+
return -2;
180+
}
175181

176182
} catch (...) {
177183
SLOGE("config false");
@@ -209,18 +215,25 @@ class llm_task {
209215
void inference(const std::string &msg)
210216
{
211217
try {
212-
if (image_data_.empty()) {
218+
if (image_datas_.empty()) {
213219
lLaMa_->Encode(prompt_data_, prompt_complete(msg));
214220
std::string out = lLaMa_->Run(prompt_data_);
215221
if (out_callback_) out_callback_(out, true);
216222
} else {
217-
cv::Mat src = cv::imdecode(image_data_, cv::IMREAD_COLOR);
218-
if (src.empty()) return;
219-
image_data_.clear();
220-
lLaMa_->Encode(src, img_embed);
221-
lLaMa_->Encode(img_embed, prompt_data_, prompt_complete(msg));
222-
std::string out = lLaMa_->Run(prompt_data_);
223-
if (out_callback_) out_callback_(out, true);
223+
img_embeds.clear();
224+
for (auto &img_data : image_datas_) {
225+
cv::Mat src = cv::imdecode(img_data, cv::IMREAD_COLOR);
226+
if (src.empty()) continue;
227+
std::vector<unsigned short> embed;
228+
lLaMa_->Encode(src, embed);
229+
img_embeds.push_back(embed);
230+
}
231+
image_datas_.clear();
232+
if (!img_embeds.empty()) {
233+
lLaMa_->Encode(img_embeds, prompt_data_, prompt_complete(msg));
234+
std::string out = lLaMa_->Run(prompt_data_);
235+
if (out_callback_) out_callback_(out, true);
236+
}
224237
}
225238
} catch (...) {
226239
SLOGW("lLaMa_->Run have error!");
@@ -293,6 +306,33 @@ class llm_llm : public StackFlow {
293306
}
294307
}
295308

309+
void task_pause(const std::weak_ptr<llm_task> llm_task_obj_weak,
310+
const std::weak_ptr<llm_channel_obj> llm_channel_weak)
311+
{
312+
auto llm_task_obj = llm_task_obj_weak.lock();
313+
auto llm_channel = llm_channel_weak.lock();
314+
if (!(llm_task_obj && llm_channel)) {
315+
return;
316+
}
317+
llm_task_obj->lLaMa_->Stop();
318+
}
319+
320+
void pause(const std::string &work_id, const std::string &object, const std::string &data) override
321+
{
322+
SLOGI("llm_asr::work:%s", data.c_str());
323+
324+
nlohmann::json error_body;
325+
int work_id_num = sample_get_work_id_num(work_id);
326+
if (llm_task_.find(work_id_num) == llm_task_.end()) {
327+
error_body["code"] = -6;
328+
error_body["message"] = "Unit Does Not Exist";
329+
send("None", "None", error_body, work_id);
330+
return;
331+
}
332+
task_pause(llm_task_[work_id_num], get_channel(work_id_num));
333+
send("None", "None", LLM_NO_ERROR, work_id);
334+
}
335+
296336
void task_user_data(const std::weak_ptr<llm_task> llm_task_obj_weak,
297337
const std::weak_ptr<llm_channel_obj> llm_channel_weak, const std::string &object,
298338
const std::string &data)
@@ -336,7 +376,7 @@ class llm_llm : public StackFlow {
336376
next_data = &tmp_msg2;
337377
}
338378
if (object.find("jpeg") != std::string::npos) {
339-
llm_task_obj->image_data_.assign(next_data->begin(), next_data->end());
379+
llm_task_obj->image_datas_.emplace_back(next_data->begin(), next_data->end());
340380
return;
341381
}
342382
llm_task_obj->inference((*next_data));

0 commit comments

Comments
 (0)