Skip to content

Commit 71fe7f2

Browse files
committed
[update] fix melotts english generate.
1 parent fb8feab commit 71fe7f2

1 file changed

Lines changed: 37 additions & 1 deletion

File tree

  • projects/llm_framework/main_melotts/src/runner

projects/llm_framework/main_melotts/src/runner/Lexicon.hpp

Lines changed: 37 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -92,9 +92,45 @@ class Lexicon {
9292
return words;
9393
}
9494

95+
bool is_english(std::string s) {
96+
if (s.size() == 1)
97+
return (s[0] >= 'A' && s[0] <= 'Z') || (s[0] >= 'a' && s[0] <= 'z');
98+
else
99+
return false;
100+
}
101+
102+
std::vector<std::string> merge_english(const std::vector<std::string>& splitted_text) {
103+
std::vector<std::string> words;
104+
int i = 0;
105+
while (i < splitted_text.size()) {
106+
std::string s;
107+
if (is_english(splitted_text[i])) {
108+
while (i < splitted_text.size()) {
109+
if (!is_english(splitted_text[i])) {
110+
break;
111+
}
112+
s += splitted_text[i];
113+
i++;
114+
}
115+
// to lowercase
116+
std::transform(s.begin(), s.end(), s.begin(),
117+
[](unsigned char c){ return std::tolower(c); });
118+
words.push_back(s);
119+
if (i >= splitted_text.size())
120+
break;
121+
}
122+
else {
123+
words.push_back(splitted_text[i]);
124+
i++;
125+
}
126+
}
127+
return words;
128+
}
129+
95130
void convert(const std::string& text, std::vector<int>& phones, std::vector<int>& tones) {
96131
auto splitted_text = splitEachChar(text);
97-
for (auto c : splitted_text) {
132+
auto zh_mix_en = merge_english(splitted_text);
133+
for (auto c : zh_mix_en) {
98134
std::string s{c};
99135
if (s == "")
100136
s = ",";

0 commit comments

Comments
 (0)