File tree Expand file tree Collapse file tree
projects/llm_framework/main_whisper/src Expand file tree Collapse file tree Original file line number Diff line number Diff line change @@ -192,6 +192,34 @@ class llm_task {
192192 return tv.tv_sec * 1000.0 + tv.tv_usec / 1000.0 ;
193193 }
194194
195+ bool is_valid_utf8 (const std::string &str)
196+ {
197+ int bytes = 0 ;
198+ for (unsigned char c : str) {
199+ if (bytes == 0 ) {
200+ if ((c >> 5 ) == 0b110 )
201+ bytes = 1 ;
202+ else if ((c >> 4 ) == 0b1110 )
203+ bytes = 2 ;
204+ else if ((c >> 3 ) == 0b11110 )
205+ bytes = 3 ;
206+ else if ((c >> 7 ))
207+ return false ;
208+ } else {
209+ if ((c >> 6 ) != 0b10 ) return false ;
210+ bytes--;
211+ }
212+ }
213+ return bytes == 0 ;
214+ }
215+
216+ void fix_utf8_string (std::string &s)
217+ {
218+ while (!s.empty () && !is_valid_utf8 (s)) {
219+ s.pop_back ();
220+ }
221+ }
222+
195223 int load_model (const nlohmann::json &config_body)
196224 {
197225 if (parse_config (config_body)) {
@@ -475,7 +503,7 @@ class llm_task {
475503 (uint32)mode_config_.token_tables [i].size (), str);
476504 s += str;
477505 }
478-
506+ fix_utf8_string (s);
479507 if (mode_config_.language == " en" || mode_config_.language == " ja" ) {
480508 if (out_callback_) out_callback_ (s, true );
481509 } else {
You can’t perform that action at this time.
0 commit comments