@@ -45,6 +45,11 @@ inline char16_t GetTrailingSurrogate(char32_t ch)
4545 return char16_t (0xDC00 + (ch & 0x3FF ));
4646}
4747
48+ inline bool IsHexDigit (char32_t ch) throw()
49+ {
50+ return (ch >= ' 0' && ch <= ' 9' ) || (ch &= ~32 , ch >= ' A' && ch <= ' F' );
51+ }
52+
4853enum UnicodeCodePoint
4954{
5055 UnicodeSpace = 0x000020 ,
@@ -104,9 +109,14 @@ void AppendFormattedString(_Inout_ std::u16string& returnString, _In_z_ const ch
104109void TrimSpaces (_Inout_ std::u16string& text);
105110void UnquoteString (_Inout_ std::u16string& path);
106111void ToUpperCase (_Inout_ array_ref<char16_t > s);
107- void UnescapeString (_In_z_ const char16_t * escapedText, OUT std::u16string& expandedText);
112+ void UnescapeCppUniversalCharacterNames (array_ref<char16_t const > escapedText, OUT std::u16string& expandedText);
113+ void UnescapeHtmlNamedCharacterReferences (array_ref<char16_t const > escapedText, OUT std::u16string& expandedText);
114+ void EscapeCppUniversalCharacterNames (array_ref<char16_t const > text, OUT std::u16string& escapedText);
115+ void EscapeHtmlNamedCharacterReferences (array_ref<char16_t const > text, OUT std::u16string& escapedText);
108116void RemoveTrailingZeroes (_Inout_ std::u16string& text) throw();
109- array_ref<wchar_t > to_wstring (int32_t value, OUT array_ref<wchar_t > s);
117+ void WriteZeroPaddedHexNum (uint32_t value, OUT array_ref<char16_t > buffer);
118+ uint32_t ReadUnsignedNumericValue (_Inout_ array_ref<char16_t const >& text, uint32_t base); // Unlike wcstoul, respects length limit, and doesn't throw exception!
119+ array_ref<wchar_t > ToWString (int32_t value, OUT array_ref<wchar_t > s);
110120
111121static_assert (sizeof (wchar_t ) == sizeof (char16_t ), " These casts only work on platforms where wchar_t is 16 bits." );
112122inline wchar_t * ToWChar (char16_t * p) { return reinterpret_cast <wchar_t *>(p); }
@@ -121,35 +131,40 @@ uint32_t IntLen(_In_z_ char16_t const* text);
121131
122132array_ref<char16_t const > ToChar16ArrayRef (_In_z_ char16_t const * text);
123133
124- struct UnicodeCharacterReader
134+ struct UnicodeCharacterReader : public array_ref < char16_t const >
125135{
126- const char16_t * current;
127- const char16_t * end;
136+ UnicodeCharacterReader () = default ;
137+
138+ UnicodeCharacterReader (array_ref::pointer begin, array_ref::pointer end) : array_ref(begin, end)
139+ {}
140+
141+ // todo: Inheriting constructors causes Visual Studio 15.4.1 compiler to crash when calling base class size().
142+ // using array_ref::array_ref;
128143
129144 bool IsAtEnd ()
130145 {
131- return current >= end ;
146+ return begin_ >= end_ ;
132147 }
133148
134149 char32_t ReadNext ()
135150 {
136- if (current >= end )
151+ if (begin_ >= end_ )
137152 return 0 ;
138153
139- char32_t ch = *current ;
140- ++current ;
154+ char32_t ch = *begin_ ;
155+ ++begin_ ;
141156
142157 // Just use the character if not a surrogate code point.
143158 // For unpaired surrogates, pass the isolated surrogate
144159 // through (rather than remap to U+FFFD).
145- if (IsLeadingSurrogate (ch) && current < end )
160+ if (IsLeadingSurrogate (ch) && begin_ < end_ )
146161 {
147162 char32_t leading = ch;
148- char32_t trailing = *current ;
163+ char32_t trailing = *begin_ ;
149164 if (IsTrailingSurrogate (trailing))
150165 {
151166 ch = MakeUnicodeCodePoint (leading, trailing);
152- ++current ;
167+ ++begin_ ;
153168 }
154169 }
155170
0 commit comments