Skip to content

Commit a22647e

Browse files
committed
Align multibyte codec aliases with actual JDK support
Map shift_jis_2004 to the JDK charset name that actually exists, x-SJIS_0213, and stop advertising EUC/ISO-2022 Japanese codec aliases that still have no backing Java charset. This keeps encodings.aliases and CharsetMapping consistent with real runtime support, restores shift_jis_2004 for callers like charset-normalizer and requests, and preserves ImportError behavior for the codecs that remain unsupported.
1 parent 65482f4 commit a22647e

3 files changed

Lines changed: 8 additions & 10 deletions

File tree

graalpython/com.oracle.graal.python.test/src/tests/test_codecs.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
#
44
# Licensed under the PYTHON SOFTWARE FOUNDATION LICENSE VERSION 2
55
import importlib
6+
import importlib.util
67
import sys
78
from pathlib import Path
89

@@ -909,7 +910,6 @@ def test_unsupported_multibyte_codec_modules_raise_import_error_on_graalpy(self)
909910
'encodings.iso2022_jp_2004',
910911
'encodings.iso2022_jp_3',
911912
'encodings.iso2022_jp_ext',
912-
'encodings.shift_jis_2004',
913913
):
914914
with self.subTest(module_name=module_name):
915915
module_path = encodings_dir / f'{module_name.rsplit(".", 1)[1]}.py'
@@ -920,6 +920,9 @@ def test_unsupported_multibyte_codec_modules_raise_import_error_on_graalpy(self)
920920
else:
921921
spec.loader.exec_module(module)
922922

923+
def test_shift_jis_2004_codec_module_imports(self):
924+
import encodings.shift_jis_2004
925+
923926
# just a smoke test
924927
def test_encode(self):
925928
import _codecs_tw

graalpython/com.oracle.graal.python/src/com/oracle/graal/python/util/CharsetMapping.java

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -306,7 +306,7 @@ private static void addAlias(String alias, String pythonName) {
306306
addMapping("mac_turkish", "x-MacTurkish");
307307
addMapping("palmos", null);
308308
addMapping("ptcp154", null);
309-
addMapping("shift_jis_2004", "Shift_JISX0213");
309+
addMapping("shift_jis_2004", "x-SJIS_0213");
310310
addMapping("shift_jis", "Shift_JIS");
311311
addMapping("shift_jisx0213", "x-SJIS_0213");
312312
addMapping("utf_16_be", "UTF-16BE");
@@ -438,10 +438,6 @@ private static void addAlias(String alias, String pythonName) {
438438
addAlias("uhc", "cp949");
439439
addAlias("950", "cp950");
440440
addAlias("ms950", "cp950");
441-
addAlias("jisx0213", "euc_jis_2004");
442-
addAlias("eucjis2004", "euc_jis_2004");
443-
addAlias("euc_jis2004", "euc_jis_2004");
444-
addAlias("eucjisx0213", "euc_jisx0213");
445441
addAlias("eucjp", "euc_jp");
446442
addAlias("ujis", "euc_jp");
447443
addAlias("u_jis", "euc_jp");

graalpython/lib-python/3/encodings/aliases.py

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -491,10 +491,9 @@
491491
's_jis' : 'shift_jis',
492492

493493
# shift_jis_2004 codec
494-
# GraalPy change: Java doesn't have this codec
495-
# 'shiftjis2004' : 'shift_jis_2004',
496-
# 'sjis_2004' : 'shift_jis_2004',
497-
# 's_jis_2004' : 'shift_jis_2004',
494+
'shiftjis2004' : 'shift_jis_2004',
495+
'sjis_2004' : 'shift_jis_2004',
496+
's_jis_2004' : 'shift_jis_2004',
498497

499498
# shift_jisx0213 codec
500499
'shiftjisx0213' : 'shift_jisx0213',

0 commit comments

Comments
 (0)