@@ -44,8 +44,8 @@ class llm_task {
4444 std::string response_format_;
4545 std::vector<std::string> inputs_;
4646 std::vector<unsigned short > prompt_data_;
47- std::vector<unsigned char > image_data_ ;
48- std::vector<unsigned short > img_embed ;
47+ std::vector<std::vector< unsigned char >> image_datas_ ;
48+ std::vector<std::vector< unsigned short >> img_embeds ;
4949 std::string prompt_;
5050 task_callback_t out_callback_;
5151 bool enoutput_;
@@ -215,18 +215,25 @@ class llm_task {
215215 void inference (const std::string &msg)
216216 {
217217 try {
218- if (image_data_ .empty ()) {
218+ if (image_datas_ .empty ()) {
219219 lLaMa_->Encode (prompt_data_, prompt_complete (msg));
220220 std::string out = lLaMa_->Run (prompt_data_);
221221 if (out_callback_) out_callback_ (out, true );
222222 } else {
223- cv::Mat src = cv::imdecode (image_data_, cv::IMREAD_COLOR);
224- if (src.empty ()) return ;
225- image_data_.clear ();
226- lLaMa_->Encode (src, img_embed);
227- lLaMa_->Encode (img_embed, prompt_data_, prompt_complete (msg));
228- std::string out = lLaMa_->Run (prompt_data_);
229- if (out_callback_) out_callback_ (out, true );
223+ img_embeds.clear ();
224+ for (auto &img_data : image_datas_) {
225+ cv::Mat src = cv::imdecode (img_data, cv::IMREAD_COLOR);
226+ if (src.empty ()) continue ;
227+ std::vector<unsigned short > embed;
228+ lLaMa_->Encode (src, embed);
229+ img_embeds.push_back (embed);
230+ }
231+ image_datas_.clear ();
232+ if (!img_embeds.empty ()) {
233+ lLaMa_->Encode (img_embeds, prompt_data_, prompt_complete (msg));
234+ std::string out = lLaMa_->Run (prompt_data_);
235+ if (out_callback_) out_callback_ (out, true );
236+ }
230237 }
231238 } catch (...) {
232239 SLOGW (" lLaMa_->Run have error!" );
@@ -300,7 +307,7 @@ class llm_llm : public StackFlow {
300307 }
301308
302309 void task_pause (const std::weak_ptr<llm_task> llm_task_obj_weak,
303- const std::weak_ptr<llm_channel_obj> llm_channel_weak)
310+ const std::weak_ptr<llm_channel_obj> llm_channel_weak)
304311 {
305312 auto llm_task_obj = llm_task_obj_weak.lock ();
306313 auto llm_channel = llm_channel_weak.lock ();
@@ -369,7 +376,7 @@ class llm_llm : public StackFlow {
369376 next_data = &tmp_msg2;
370377 }
371378 if (object.find (" jpeg" ) != std::string::npos) {
372- llm_task_obj->image_data_ . assign (next_data->begin (), next_data->end ());
379+ llm_task_obj->image_datas_ . emplace_back (next_data->begin (), next_data->end ());
373380 return ;
374381 }
375382 llm_task_obj->inference ((*next_data));
0 commit comments