@@ -1682,12 +1682,27 @@ private static bool IsSign(char ch) {
16821682 return ch == '+' || ch == '-' ;
16831683 }
16841684
1685- internal static string GetEncodingName ( Encoding encoding , bool normalize = true ) {
1685+ internal static string GetEncodingName ( Encoding encoding , bool normalize = true , string defaultName = "unknown" ) {
16861686 string ? name = null ;
16871687
16881688 // if we have a valid code page try and get a reasonable name. The
16891689 // web names / mail displays tend to match CPython's terse names
16901690 if ( encoding . CodePage != 0 ) {
1691+ switch ( encoding . CodePage ) {
1692+
1693+ // recognize a few common cases
1694+ case 1200 : name = ( defaultName == "utf-16" && BitConverter . IsLittleEndian ) ? defaultName : "utf-16-le" ; break ;
1695+ case 1201 : name = ( defaultName == "utf-16" && ! BitConverter . IsLittleEndian ) ? defaultName : "utf-16-be" ; break ;
1696+
1697+ case 12000 : name = ( defaultName == "utf-32" && BitConverter . IsLittleEndian ) ? defaultName : "utf-32-le" ; break ;
1698+ case 12001 : name = ( defaultName == "utf-32" && ! BitConverter . IsLittleEndian ) ? defaultName : "utf-32-be" ; break ;
1699+
1700+ case 20127 : name = "ascii" ; break ;
1701+ case 28591 : name = "latin-1" ; break ;
1702+
1703+ case 65000 : name = "utf-7" ; break ;
1704+ case 65001 : name = "utf-8" ; break ;
1705+ }
16911706#if ! NETCOREAPP && ! NETSTANDARD
16921707 if ( encoding . IsBrowserDisplay ) {
16931708 name = encoding . WebName ;
@@ -1699,30 +1714,18 @@ internal static string GetEncodingName(Encoding encoding, bool normalize = true)
16991714#endif
17001715
17011716 if ( name == null ) {
1702- switch ( encoding . CodePage ) {
1703-
1704- // recognize a few common cases
1705- case 1200 : name = "utf-16LE" ; break ;
1706- case 1201 : name = "utf-16BE" ; break ;
1707-
1708- case 12000 : name = "utf-32LE" ; break ;
1709- case 12001 : name = "utf-32BE" ; break ;
1710-
1711- case 20127 : name = "us-ascii" ; break ;
1712- case 28591 : name = "iso-8859-1" ; break ;
1713-
1714- case 65000 : name = "utf-7" ; break ;
1715- case 65001 : name = "utf-8" ; break ;
1716-
1717- // otherwise use a code page number which also matches CPython
1718- default : name = "cp" + encoding . CodePage ; break ;
1719- }
1717+ // otherwise use a code page number which also matches CPython
1718+ name = "cp" + encoding . CodePage ;
17201719 }
17211720 }
17221721
17231722 if ( name == null ) {
17241723 // otherwise just finally fall back to the human readable name
1725- name = encoding . EncodingName ;
1724+ try {
1725+ name = encoding . EncodingName ; // may throw on .NET Core for some encodings
1726+ } catch ( NotSupportedException ) {
1727+ name = defaultName ;
1728+ }
17261729 }
17271730
17281731 return normalize ? NormalizeEncodingName ( name ) : name ;
@@ -1802,9 +1805,9 @@ Encoding setFallback(Encoding enc, DecoderFallback fb) {
18021805 case "strict" : e = setFallback ( e , new ExceptionFallback ( e is UTF8Encoding ) ) ; break ;
18031806 case "replace" : e = setFallback ( e , ReplacementFallback ) ; break ;
18041807 case "ignore" : e = setFallback ( e , new DecoderReplacementFallback ( string . Empty ) ) ; break ;
1805- case "surrogateescape" : e = pe = new PythonSurrogateEscapeEncoding ( e , encoding ) ; break ;
1806- case "surrogatepass" : e = pe = new PythonSurrogatePassEncoding ( e , encoding ) ; break ;
1807- default : e = pe = new PythonErrorHandlerEncoding ( context , e , encoding , errors ) ; break ;
1808+ case "surrogateescape" : e = pe = new PythonSurrogateEscapeEncoding ( e ) ; break ;
1809+ case "surrogatepass" : e = pe = new PythonSurrogatePassEncoding ( e ) ; break ;
1810+ default : e = pe = new PythonErrorHandlerEncoding ( context , e , errors ) ; break ;
18081811 }
18091812
18101813 string decoded = string . Empty ;
@@ -1821,7 +1824,7 @@ Encoding setFallback(Encoding enc, DecoderFallback fb) {
18211824 }
18221825 } catch ( DecoderFallbackException ex ) {
18231826 // augmenting the caught exception instead of creating UnicodeDecodeError to preserve the stack trace
1824- if ( ! ex . Data . Contains ( "encoding" ) ) ex . Data [ "encoding" ] = encoding ;
1827+ if ( ! ex . Data . Contains ( "encoding" ) ) ex . Data [ "encoding" ] = GetEncodingName ( e , normalize : false , defaultName : encoding ) ;
18251828 if ( ! ex . Data . Contains ( "object" ) ) ex . Data [ "object" ] = Bytes . Make ( span . Slice ( start , length ) . ToArray ( ) ) ; ;
18261829 throw ;
18271830 }
@@ -1878,9 +1881,9 @@ static Encoding setFallback(Encoding enc, EncoderFallback fb) {
18781881 case "backslashreplace" : e = setFallback ( e , new BackslashEncoderReplaceFallback ( ) ) ; break ;
18791882 case "xmlcharrefreplace" : e = setFallback ( e , new XmlCharRefEncoderReplaceFallback ( ) ) ; break ;
18801883 case "ignore" : e = setFallback ( e , new EncoderReplacementFallback ( string . Empty ) ) ; break ;
1881- case "surrogateescape" : e = new PythonSurrogateEscapeEncoding ( e , encoding ) ; break ;
1882- case "surrogatepass" : e = new PythonSurrogatePassEncoding ( e , encoding ) ; break ;
1883- default : e = new PythonErrorHandlerEncoding ( context , e , encoding , errors ) ; break ;
1884+ case "surrogateescape" : e = new PythonSurrogateEscapeEncoding ( e ) ; break ;
1885+ case "surrogatepass" : e = new PythonSurrogatePassEncoding ( e ) ; break ;
1886+ default : e = new PythonErrorHandlerEncoding ( context , e , errors ) ; break ;
18841887 }
18851888
18861889 byte [ ] ? preamble = includePreamble ? e . GetPreamble ( ) : null ;
@@ -1893,7 +1896,7 @@ static Encoding setFallback(Encoding enc, EncoderFallback fb) {
18931896 }
18941897 e . GetBytes ( s , 0 , s . Length , bytes , preambleLen ) ;
18951898 } catch ( EncoderFallbackException ex ) {
1896- if ( ! ex . Data . Contains ( "encoding" ) ) ex . Data [ "encoding" ] = encoding ;
1899+ if ( ! ex . Data . Contains ( "encoding" ) ) ex . Data [ "encoding" ] = GetEncodingName ( e , normalize : false , defaultName : encoding ) ;
18971900 if ( ! ex . Data . Contains ( "object" ) ) ex . Data [ "object" ] = s ;
18981901 throw ;
18991902 }
@@ -1951,7 +1954,7 @@ static CodecsInfo() {
19511954 d [ "iso_8859_1" ] = d [ "iso8859_1" ] = d [ "8859" ] = d [ "iso8859" ]
19521955 = d [ "cp28591" ] = d [ "28591" ] = d [ "cp819" ] = d [ "819" ]
19531956 = d [ "latin_1" ] = d [ "latin1" ] = d [ "latin" ] = d [ "l1" ] = makeEncodingProxy ( ( ) => Latin1Encoding ) ;
1954- d [ "cp20127" ] = d [ "us_ascii" ] = d [ "us" ] = d [ "ascii" ] = d [ "646" ] = makeEncodingProxy ( ( ) => PythonAsciiEncoding . Instance ) ;
1957+ d [ "cp20127" ] = d [ "us_ascii" ] = d [ "us" ] = d [ "ascii" ] = d [ "646" ] = makeEncodingProxy ( ( ) => Encoding . ASCII ) ;
19551958 d [ "cp65000" ] = d [ "utf_7" ] = d [ "u7" ] = d [ "unicode_1_1_utf_7" ] = makeEncodingProxy ( ( ) => new UTF7Encoding ( allowOptionals : true ) ) ;
19561959 d [ "cp65001" ] = d [ "utf_8" ] = d [ "utf8" ] = d [ "u8" ] = makeEncodingProxy ( ( ) => new UTF8Encoding ( encoderShouldEmitUTF8Identifier : false ) ) ;
19571960 d [ "utf_8_sig" ] = makeEncodingProxy ( ( ) => new UTF8Encoding ( encoderShouldEmitUTF8Identifier : true ) ) ;
0 commit comments