Skip to content
This repository was archived by the owner on Apr 14, 2022. It is now read-only.

Commit 39a1aed

Browse files
author
Mikhail Arkhipov
authored
Merge pull request #102 from MikhailArkhipov/enc
Remove exceptions on ASCII read
2 parents b98996e + b8328b3 commit 39a1aed

3 files changed

Lines changed: 5 additions & 194 deletions

File tree

src/Analysis/Engine/Impl/Parsing/Parser.cs

Lines changed: 3 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -133,11 +133,9 @@ public static Parser CreateParser(Stream stream, PythonLanguageVersion version)
133133
/// </summary>
134134
public static Parser CreateParser(Stream stream, PythonLanguageVersion version, ParserOptions parserOptions = null) {
135135
var options = parserOptions ?? ParserOptions.Default;
136-
137-
var defaultEncoding = version.Is2x() ? PythonAsciiEncoding.Instance : Encoding.UTF8;
136+
var defaultEncoding = version.Is2x() ? Encoding.ASCII : Encoding.UTF8;
138137

139138
var reader = GetStreamReaderWithEncoding(stream, defaultEncoding, options.ErrorSink);
140-
141139
return CreateParser(reader, version, options);
142140
}
143141

@@ -5325,9 +5323,7 @@ public AsciiEncodingInfoWrapper()
53255323
: base((EncodingInfo)null) {
53265324
}
53275325

5328-
public override Encoding GetEncoding() {
5329-
return PythonAsciiEncoding.Instance;
5330-
}
5326+
public override Encoding GetEncoding() => Encoding.ASCII;
53315327
}
53325328

53335329
class EncodingWrapper : Encoding {
@@ -5478,23 +5474,7 @@ private static string ReadOneLine(List<byte> previewedBytes, ref int curIndex, S
54785474
return noNewlineRes;
54795475
}
54805476

5481-
/// <summary>
5482-
/// Returns an Encoding object which raises a BadSourceException when invalid characters are encountered.
5483-
/// </summary>
5484-
public static Encoding DefaultEncoding {
5485-
get {
5486-
return PythonAsciiEncoding.SourceEncoding;
5487-
}
5488-
}
5489-
5490-
/// <summary>
5491-
/// Returns an Encoding object which will not provide any fallback for invalid characters.
5492-
/// </summary>
5493-
public static Encoding DefaultEncodingNoFallback {
5494-
get {
5495-
return PythonAsciiEncoding.SourceEncodingNoFallback;
5496-
}
5497-
}
5477+
public static Encoding DefaultEncoding => Encoding.ASCII;
54985478

54995479
#endregion
55005480

src/Analysis/Engine/Impl/Parsing/PythonAsciiEncoding.cs

Lines changed: 0 additions & 138 deletions
Original file line numberDiff line numberDiff line change
@@ -22,144 +22,6 @@
2222
using Microsoft.PythonTools.Analysis.Infrastructure;
2323

2424
namespace Microsoft.PythonTools.Parsing {
25-
/// <summary>
26-
/// Simple implementation of ASCII encoding/decoding. The default instance (PythonAsciiEncoding.Instance) is
27-
/// setup to always convert even values outside of the ASCII range. The EncoderFallback/DecoderFallbacks can
28-
/// be replaced with versions that will throw exceptions instead though.
29-
/// </summary>
30-
[Serializable]
31-
sealed class PythonAsciiEncoding : Encoding {
32-
internal static readonly Encoding Instance = MakeNonThrowing();
33-
internal static readonly Encoding SourceEncoding = MakeSourceEncoding();
34-
internal static readonly Encoding SourceEncodingNoFallback = MakeSourceEncodingNoFallback();
35-
36-
internal PythonAsciiEncoding()
37-
: base() {
38-
}
39-
40-
internal static Encoding MakeNonThrowing() {
41-
// we need to Clone the new instance here so that the base class marks us as non-readonly
42-
Encoding enc = (Encoding)new PythonAsciiEncoding().Clone();
43-
enc.DecoderFallback = new NonStrictDecoderFallback();
44-
enc.EncoderFallback = new NonStrictEncoderFallback();
45-
return enc;
46-
}
47-
48-
private static Encoding MakeSourceEncoding() {
49-
// we need to Clone the new instance here so that the base class marks us as non-readonly
50-
Encoding enc = (Encoding)new PythonAsciiEncoding().Clone();
51-
enc.DecoderFallback = new SourceNonStrictDecoderFallback();
52-
return enc;
53-
}
54-
55-
private static Encoding MakeSourceEncodingNoFallback() {
56-
// we need to Clone the new instance here so that the base class marks us as non-readonly
57-
Encoding enc = (Encoding)new PythonAsciiEncoding().Clone();
58-
enc.DecoderFallback = new SourceNonStrictDecoderFallbackNoFallback();
59-
return enc;
60-
}
61-
62-
public override int GetByteCount(char[] chars, int index, int count) {
63-
int byteCount = 0;
64-
int charEnd = index + count;
65-
while (index < charEnd) {
66-
char c = chars[index];
67-
if (c > 0x7f) {
68-
EncoderFallbackBuffer efb = EncoderFallback.CreateFallbackBuffer();
69-
if (efb.Fallback(c, index)) {
70-
byteCount += efb.Remaining;
71-
}
72-
} else {
73-
byteCount++;
74-
}
75-
index++;
76-
}
77-
return byteCount;
78-
}
79-
80-
public override int GetBytes(char[] chars, int charIndex, int charCount, byte[] bytes, int byteIndex) {
81-
int charEnd = charIndex + charCount;
82-
int outputBytes = 0;
83-
while (charIndex < charEnd) {
84-
char c = chars[charIndex];
85-
if (c > 0x7f) {
86-
EncoderFallbackBuffer efb = EncoderFallback.CreateFallbackBuffer();
87-
if (efb.Fallback(c, charIndex)) {
88-
while (efb.Remaining != 0) {
89-
bytes[byteIndex++] = (byte)efb.GetNextChar();
90-
outputBytes++;
91-
}
92-
}
93-
} else {
94-
bytes[byteIndex++] = (byte)c;
95-
outputBytes++;
96-
}
97-
charIndex++;
98-
}
99-
return outputBytes;
100-
}
101-
102-
public override int GetCharCount(byte[] bytes, int index, int count) {
103-
int byteEnd = index + count;
104-
int outputChars = 0;
105-
while (index < byteEnd) {
106-
byte b = bytes[index];
107-
if (b > 0x7f) {
108-
DecoderFallbackBuffer dfb = DecoderFallback.CreateFallbackBuffer();
109-
if (dfb.Fallback(new byte[] { b }, index)) {
110-
outputChars += dfb.Remaining;
111-
}
112-
} else {
113-
outputChars++;
114-
}
115-
index++;
116-
}
117-
return outputChars;
118-
}
119-
120-
public override int GetChars(byte[] bytes, int byteIndex, int byteCount, char[] chars, int charIndex) {
121-
int byteEnd = byteIndex + byteCount;
122-
int outputChars = 0;
123-
while (byteIndex < byteEnd) {
124-
byte b = bytes[byteIndex];
125-
if (b > 0x7f) {
126-
DecoderFallbackBuffer dfb = DecoderFallback.CreateFallbackBuffer();
127-
if (dfb.Fallback(new byte[] { b }, byteIndex)) {
128-
while (dfb.Remaining != 0) {
129-
chars[charIndex++] = dfb.GetNextChar();
130-
outputChars++;
131-
}
132-
}
133-
} else {
134-
chars[charIndex++] = (char)b;
135-
outputChars++;
136-
}
137-
byteIndex++;
138-
}
139-
return outputChars;
140-
}
141-
142-
public override int GetMaxByteCount(int charCount) {
143-
return charCount * 4;
144-
}
145-
146-
public override int GetMaxCharCount(int byteCount) {
147-
return byteCount;
148-
}
149-
150-
public override string WebName {
151-
get {
152-
return "ascii";
153-
}
154-
}
155-
156-
public override string EncodingName {
157-
get {
158-
return "ascii";
159-
}
160-
}
161-
}
162-
16325
class NonStrictEncoderFallback : EncoderFallback {
16426
public override EncoderFallbackBuffer CreateFallbackBuffer() {
16527
return new NonStrictEncoderFallbackBuffer();

src/Analysis/Engine/Impl/Parsing/Tokenizer.cs

Lines changed: 2 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -2631,39 +2631,8 @@ private void RefillBuffer() {
26312631
}
26322632

26332633
// make the buffer full:
2634-
try {
2635-
int count = _reader.Read(_buffer, _end, _buffer.Length - _end);
2636-
_end += count;
2637-
} catch (BadSourceException bse) {
2638-
StreamReader streamReader = _reader as StreamReader;
2639-
if (streamReader != null && streamReader.CurrentEncoding != PythonAsciiEncoding.SourceEncoding) {
2640-
_errors.Add(
2641-
"(unicode error) '{0}' codec can't decode byte 0x{1:x} in position {2}".FormatUI(
2642-
Parser.NormalizeEncodingName(streamReader.CurrentEncoding.WebName),
2643-
bse.BadByte,
2644-
bse.Index + CurrentIndex
2645-
),
2646-
null,
2647-
CurrentIndex + bse.Index,
2648-
CurrentIndex + bse.Index + 1,
2649-
ErrorCodes.SyntaxError,
2650-
Severity.FatalError
2651-
);
2652-
} else {
2653-
_errors.Add(
2654-
"Non-ASCII character '\\x{0:x}' at position {1}, but no encoding declared; see http://www.python.org/peps/pep-0263.html for details".FormatUI(
2655-
bse.BadByte,
2656-
bse.Index + CurrentIndex
2657-
),
2658-
null,
2659-
CurrentIndex + bse.Index,
2660-
CurrentIndex + bse.Index + 1,
2661-
ErrorCodes.SyntaxError,
2662-
Severity.FatalError
2663-
);
2664-
}
2665-
throw;
2666-
}
2634+
int count = _reader.Read(_buffer, _end, _buffer.Length - _end);
2635+
_end += count;
26672636

26682637
ClearInvalidChars();
26692638
}

0 commit comments

Comments
 (0)