Skip to content

Commit 09401a5

Browse files
committed
Support mode for typing escape codes such as \u1234 and ሴ. This also lets you easily inspect the codes of pasted content if you switch to escape mode.
1 parent 6b8af9a commit 09401a5

11 files changed

Lines changed: 435 additions & 132 deletions

Attributes.ixx

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -205,10 +205,10 @@ HRESULT Attribute::ParseString(
205205
{
206206
hr = S_OK;
207207
size_t sourceCount = 0;
208-
UnicodeCharacterReader reader = { stringValue, stringValue + 2 };
208+
UnicodeCharacterReader reader(stringValue, stringValue + 2);
209209
data.ch32 = reader.ReadNext();
210-
if (*reader.current != '\0')
211-
stringValue = reader.current;
210+
if (reader.front() != '\0')
211+
stringValue = reader.data();
212212
}
213213
break;
214214

Common.String.h

Lines changed: 27 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,11 @@ inline char16_t GetTrailingSurrogate(char32_t ch)
4545
return char16_t(0xDC00 + (ch & 0x3FF));
4646
}
4747

48+
inline bool IsHexDigit(char32_t ch) throw()
49+
{
50+
return (ch >= '0' && ch <= '9') || (ch &= ~32, ch >= 'A' && ch <= 'F');
51+
}
52+
4853
enum UnicodeCodePoint
4954
{
5055
UnicodeSpace = 0x000020,
@@ -104,9 +109,14 @@ void AppendFormattedString(_Inout_ std::u16string& returnString, _In_z_ const ch
104109
void TrimSpaces(_Inout_ std::u16string& text);
105110
void UnquoteString(_Inout_ std::u16string& path);
106111
void ToUpperCase(_Inout_ array_ref<char16_t> s);
107-
void UnescapeString(_In_z_ const char16_t* escapedText, OUT std::u16string& expandedText);
112+
void UnescapeCppUniversalCharacterNames(array_ref<char16_t const> escapedText, OUT std::u16string& expandedText);
113+
void UnescapeHtmlNamedCharacterReferences(array_ref<char16_t const> escapedText, OUT std::u16string& expandedText);
114+
void EscapeCppUniversalCharacterNames(array_ref<char16_t const> text, OUT std::u16string& escapedText);
115+
void EscapeHtmlNamedCharacterReferences(array_ref<char16_t const> text, OUT std::u16string& escapedText);
108116
void RemoveTrailingZeroes(_Inout_ std::u16string& text) throw();
109-
array_ref<wchar_t> to_wstring(int32_t value, OUT array_ref<wchar_t> s);
117+
void WriteZeroPaddedHexNum(uint32_t value, OUT array_ref<char16_t> buffer);
118+
uint32_t ReadUnsignedNumericValue(_Inout_ array_ref<char16_t const>& text, uint32_t base); // Unlike wcstoul, respects length limit, and doesn't throw exception!
119+
array_ref<wchar_t> ToWString(int32_t value, OUT array_ref<wchar_t> s);
110120

111121
static_assert(sizeof(wchar_t) == sizeof(char16_t), "These casts only work on platforms where wchar_t is 16 bits.");
112122
inline wchar_t* ToWChar(char16_t* p) { return reinterpret_cast<wchar_t*>(p); }
@@ -121,35 +131,40 @@ uint32_t IntLen(_In_z_ char16_t const* text);
121131

122132
array_ref<char16_t const> ToChar16ArrayRef(_In_z_ char16_t const* text);
123133

124-
struct UnicodeCharacterReader
134+
struct UnicodeCharacterReader : public array_ref<char16_t const>
125135
{
126-
const char16_t* current;
127-
const char16_t* end;
136+
UnicodeCharacterReader() = default;
137+
138+
UnicodeCharacterReader(array_ref::pointer begin, array_ref::pointer end) : array_ref(begin, end)
139+
{}
140+
141+
// todo: Inheriting constructors causes Visual Studio 15.4.1 compiler to crash when calling base class size().
142+
//using array_ref::array_ref;
128143

129144
bool IsAtEnd()
130145
{
131-
return current >= end;
146+
return begin_ >= end_;
132147
}
133148

134149
char32_t ReadNext()
135150
{
136-
if (current >= end)
151+
if (begin_ >= end_)
137152
return 0;
138153

139-
char32_t ch = *current;
140-
++current;
154+
char32_t ch = *begin_;
155+
++begin_;
141156

142157
// Just use the character if not a surrogate code point.
143158
// For unpaired surrogates, pass the isolated surrogate
144159
// through (rather than remap to U+FFFD).
145-
if (IsLeadingSurrogate(ch) && current < end)
160+
if (IsLeadingSurrogate(ch) && begin_ < end_)
146161
{
147162
char32_t leading = ch;
148-
char32_t trailing = *current;
163+
char32_t trailing = *begin_;
149164
if (IsTrailingSurrogate(trailing))
150165
{
151166
ch = MakeUnicodeCodePoint(leading, trailing);
152-
++current;
167+
++begin_;
153168
}
154169
}
155170

0 commit comments

Comments
 (0)