Skip to content

Commit 9237dee

Browse files
committed
Core/Utils: Transform char toUpper/toLower functions into callable function objects to better integrate with std algorithms (not calling through a function pointer allows inlining)
(cherry picked from commit 054dd3596fa349805a353836952f96691e67912d)
1 parent 1f25a0f commit 9237dee

1 file changed

Lines changed: 58 additions & 42 deletions

File tree

src/common/Utilities/Util.h

Lines changed: 58 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -229,59 +229,75 @@ inline bool isEastAsianString(std::wstring_view wstr, bool numericOrSpace)
229229
return true;
230230
}
231231

232-
inline wchar_t wcharToUpper(wchar_t wchar)
232+
struct WcharToUpper
233233
{
234-
if (wchar >= L'a' && wchar <= L'z') // LATIN SMALL LETTER A - LATIN SMALL LETTER Z
235-
return wchar_t(uint16(wchar)-0x0020);
236-
if (wchar == 0x00DF) // LATIN SMALL LETTER SHARP S
237-
return wchar_t(0x1E9E);
238-
if (wchar >= 0x00E0 && wchar <= 0x00F6) // LATIN SMALL LETTER A WITH GRAVE - LATIN SMALL LETTER O WITH DIAERESIS
239-
return wchar_t(uint16(wchar)-0x0020);
240-
if (wchar >= 0x00F8 && wchar <= 0x00FE) // LATIN SMALL LETTER O WITH STROKE - LATIN SMALL LETTER THORN
241-
return wchar_t(uint16(wchar)-0x0020);
242-
if (wchar >= 0x0101 && wchar <= 0x012F) // LATIN SMALL LETTER A WITH MACRON - LATIN SMALL LETTER I WITH OGONEK (only %2=1)
234+
wchar_t operator()(wchar_t wchar) const
243235
{
244-
if (wchar % 2 == 1)
245-
return wchar_t(uint16(wchar)-0x0001);
246-
}
247-
if (wchar >= 0x0430 && wchar <= 0x044F) // CYRILLIC SMALL LETTER A - CYRILLIC SMALL LETTER YA
248-
return wchar_t(uint16(wchar)-0x0020);
249-
if (wchar == 0x0451) // CYRILLIC SMALL LETTER IO
250-
return wchar_t(0x0401);
236+
if (wchar >= L'a' && wchar <= L'z') // LATIN SMALL LETTER A - LATIN SMALL LETTER Z
237+
return wchar_t(uint16(wchar) - 0x0020);
238+
if (wchar == 0x00DF) // LATIN SMALL LETTER SHARP S
239+
return wchar_t(0x1E9E);
240+
if (wchar >= 0x00E0 && wchar <= 0x00F6) // LATIN SMALL LETTER A WITH GRAVE - LATIN SMALL LETTER O WITH DIAERESIS
241+
return wchar_t(uint16(wchar) - 0x0020);
242+
if (wchar >= 0x00F8 && wchar <= 0x00FE) // LATIN SMALL LETTER O WITH STROKE - LATIN SMALL LETTER THORN
243+
return wchar_t(uint16(wchar) - 0x0020);
244+
if (wchar >= 0x0101 && wchar <= 0x012F) // LATIN SMALL LETTER A WITH MACRON - LATIN SMALL LETTER I WITH OGONEK (only %2=1)
245+
{
246+
if (wchar % 2 == 1)
247+
return wchar_t(uint16(wchar) - 0x0001);
248+
}
249+
if (wchar >= 0x0430 && wchar <= 0x044F) // CYRILLIC SMALL LETTER A - CYRILLIC SMALL LETTER YA
250+
return wchar_t(uint16(wchar) - 0x0020);
251+
if (wchar == 0x0451) // CYRILLIC SMALL LETTER IO
252+
return wchar_t(0x0401);
251253

252-
return wchar;
253-
}
254+
return wchar;
255+
}
256+
} inline constexpr wcharToUpper;
254257

255-
inline wchar_t wcharToUpperOnlyLatin(wchar_t wchar)
258+
struct WcharToUpperOnlyLatin
256259
{
257-
return isBasicLatinCharacter(wchar) ? wcharToUpper(wchar) : wchar;
258-
}
260+
wchar_t operator()(wchar_t wchar) const
261+
{
262+
return isBasicLatinCharacter(wchar) ? wcharToUpper(wchar) : wchar;
263+
}
264+
} inline constexpr wcharToUpperOnlyLatin;
259265

260-
inline wchar_t wcharToLower(wchar_t wchar)
266+
struct WcharToLower
261267
{
262-
if (wchar >= L'A' && wchar <= L'Z') // LATIN CAPITAL LETTER A - LATIN CAPITAL LETTER Z
263-
return wchar_t(uint16(wchar)+0x0020);
264-
if (wchar >= 0x00C0 && wchar <= 0x00D6) // LATIN CAPITAL LETTER A WITH GRAVE - LATIN CAPITAL LETTER O WITH DIAERESIS
265-
return wchar_t(uint16(wchar)+0x0020);
266-
if (wchar >= 0x00D8 && wchar <= 0x00DE) // LATIN CAPITAL LETTER O WITH STROKE - LATIN CAPITAL LETTER THORN
267-
return wchar_t(uint16(wchar)+0x0020);
268-
if (wchar >= 0x0100 && wchar <= 0x012E) // LATIN CAPITAL LETTER A WITH MACRON - LATIN CAPITAL LETTER I WITH OGONEK (only %2=0)
268+
wchar_t operator()(wchar_t wchar) const
269269
{
270-
if (wchar % 2 == 0)
271-
return wchar_t(uint16(wchar)+0x0001);
270+
if (wchar >= L'A' && wchar <= L'Z') // LATIN CAPITAL LETTER A - LATIN CAPITAL LETTER Z
271+
return wchar_t(uint16(wchar)+0x0020);
272+
if (wchar >= 0x00C0 && wchar <= 0x00D6) // LATIN CAPITAL LETTER A WITH GRAVE - LATIN CAPITAL LETTER O WITH DIAERESIS
273+
return wchar_t(uint16(wchar)+0x0020);
274+
if (wchar >= 0x00D8 && wchar <= 0x00DE) // LATIN CAPITAL LETTER O WITH STROKE - LATIN CAPITAL LETTER THORN
275+
return wchar_t(uint16(wchar)+0x0020);
276+
if (wchar >= 0x0100 && wchar <= 0x012E) // LATIN CAPITAL LETTER A WITH MACRON - LATIN CAPITAL LETTER I WITH OGONEK (only %2=0)
277+
{
278+
if (wchar % 2 == 0)
279+
return wchar_t(uint16(wchar)+0x0001);
280+
}
281+
if (wchar == 0x1E9E) // LATIN CAPITAL LETTER SHARP S
282+
return wchar_t(0x00DF);
283+
if (wchar == 0x0401) // CYRILLIC CAPITAL LETTER IO
284+
return wchar_t(0x0451);
285+
if (wchar >= 0x0410 && wchar <= 0x042F) // CYRILLIC CAPITAL LETTER A - CYRILLIC CAPITAL LETTER YA
286+
return wchar_t(uint16(wchar)+0x0020);
287+
288+
return wchar;
272289
}
273-
if (wchar == 0x1E9E) // LATIN CAPITAL LETTER SHARP S
274-
return wchar_t(0x00DF);
275-
if (wchar == 0x0401) // CYRILLIC CAPITAL LETTER IO
276-
return wchar_t(0x0451);
277-
if (wchar >= 0x0410 && wchar <= 0x042F) // CYRILLIC CAPITAL LETTER A - CYRILLIC CAPITAL LETTER YA
278-
return wchar_t(uint16(wchar)+0x0020);
290+
} inline constexpr wcharToLower;
279291

280-
return wchar;
281-
}
292+
struct CharToUpper
293+
{
294+
char operator()(char c) const { return std::toupper(static_cast<unsigned char>(c)); }
295+
} inline constexpr charToUpper;
282296

283-
inline char charToUpper(char c) { return std::toupper(c); }
284-
inline char charToLower(char c) { return std::tolower(c); }
297+
struct CharToLower
298+
{
299+
char operator()(char c) const { return std::tolower(static_cast<unsigned char>(c)); }
300+
} inline constexpr charToLower;
285301

286302
TC_COMMON_API void wstrToUpper(std::wstring& str);
287303
TC_COMMON_API void wstrToLower(std::wstring& str);

0 commit comments

Comments
 (0)