File tree Expand file tree Collapse file tree
projects/llm_framework/main_melotts/src/runner Expand file tree Collapse file tree Original file line number Diff line number Diff line change @@ -92,9 +92,45 @@ class Lexicon {
9292 return words;
9393 }
9494
95+ bool is_english (std::string s) {
96+ if (s.size () == 1 )
97+ return (s[0 ] >= ' A' && s[0 ] <= ' Z' ) || (s[0 ] >= ' a' && s[0 ] <= ' z' );
98+ else
99+ return false ;
100+ }
101+
102+ std::vector<std::string> merge_english (const std::vector<std::string>& splitted_text) {
103+ std::vector<std::string> words;
104+ int i = 0 ;
105+ while (i < splitted_text.size ()) {
106+ std::string s;
107+ if (is_english (splitted_text[i])) {
108+ while (i < splitted_text.size ()) {
109+ if (!is_english (splitted_text[i])) {
110+ break ;
111+ }
112+ s += splitted_text[i];
113+ i++;
114+ }
115+ // to lowercase
116+ std::transform (s.begin (), s.end (), s.begin (),
117+ [](unsigned char c){ return std::tolower (c); });
118+ words.push_back (s);
119+ if (i >= splitted_text.size ())
120+ break ;
121+ }
122+ else {
123+ words.push_back (splitted_text[i]);
124+ i++;
125+ }
126+ }
127+ return words;
128+ }
129+
95130 void convert (const std::string& text, std::vector<int >& phones, std::vector<int >& tones) {
96131 auto splitted_text = splitEachChar (text);
97- for (auto c : splitted_text) {
132+ auto zh_mix_en = merge_english (splitted_text);
133+ for (auto c : zh_mix_en) {
98134 std::string s{c};
99135 if (s == " ," )
100136 s = " ," ;
You can’t perform that action at this time.
0 commit comments