|
| 1 | +"""Tests for the Big5-BBS hybrid codec.""" |
| 2 | + |
| 3 | +# std imports |
| 4 | +import codecs |
| 5 | + |
| 6 | +# 3rd party |
| 7 | +import pytest |
| 8 | + |
| 9 | +# local |
| 10 | +import telnetlib3 # noqa: F401 |
| 11 | + |
| 12 | + |
| 13 | +def test_codec_lookup(): |
| 14 | + info = codecs.lookup("big5bbs") |
| 15 | + assert info.name == "big5bbs" |
| 16 | + |
| 17 | + |
| 18 | +def test_codec_alias_hyphen(): |
| 19 | + codecs.lookup("big5bbs") |
| 20 | + info = codecs.lookup("big5-bbs") |
| 21 | + assert info.name == "big5bbs" |
| 22 | + |
| 23 | + |
| 24 | +def test_codec_alias_underscore(): |
| 25 | + codecs.lookup("big5bbs") |
| 26 | + info = codecs.lookup("big5_bbs") |
| 27 | + assert info.name == "big5bbs" |
| 28 | + |
| 29 | + |
| 30 | +def test_ascii_passthrough(): |
| 31 | + data = b"Hello, World!\n" |
| 32 | + assert data.decode("big5bbs") == "Hello, World!\n" |
| 33 | + |
| 34 | + |
| 35 | +def test_valid_big5_pair(): |
| 36 | + # Encode a known CJK character to Big5 and verify round-trip |
| 37 | + char = "夢" |
| 38 | + big5_bytes = char.encode("big5") |
| 39 | + assert big5_bytes.decode("big5bbs") == char |
| 40 | + |
| 41 | + |
| 42 | +def test_valid_big5_pair_in_context(): |
| 43 | + # Inline Big5 pair between ASCII text |
| 44 | + char = "夢" |
| 45 | + big5_bytes = char.encode("big5") |
| 46 | + data = b"test" + big5_bytes + b"end" |
| 47 | + assert data.decode("big5bbs") == "test" + char + "end" |
| 48 | + |
| 49 | + |
| 50 | +def test_lone_lead_0xa1_before_esc(): |
| 51 | + # 0xA1 is a Big5 lead byte; ESC is not a valid second byte → CP437 fallback |
| 52 | + # CP437 0xA1 = í (LATIN SMALL LETTER I WITH ACUTE) |
| 53 | + data = bytes([0xA1, 0x1B, 0x5B, 0x33, 0x32, 0x6D]) # 0xA1 ESC[32m |
| 54 | + result = data.decode("big5bbs") |
| 55 | + assert result[0] == "\u00ed" # í |
| 56 | + assert result[1:] == "\x1b[32m" |
| 57 | + |
| 58 | + |
| 59 | +def test_lone_lead_0xb0_before_esc(): |
| 60 | + # CP437 0xB0 = ░ (LIGHT SHADE, U+2591) |
| 61 | + data = bytes([0xB0, 0x1B]) |
| 62 | + result = data.decode("big5bbs") |
| 63 | + assert result == "\u2591\x1b" |
| 64 | + |
| 65 | + |
| 66 | +def test_lone_lead_0xb6_before_esc(): |
| 67 | + # CP437 0xB6 = ╢ (BOX DRAWINGS LIGHT VERTICAL AND LEFT, U+2562) |
| 68 | + data = bytes([0xB6, 0x1B]) |
| 69 | + result = data.decode("big5bbs") |
| 70 | + assert result == "\u2562\x1b" |
| 71 | + |
| 72 | + |
| 73 | +@pytest.mark.parametrize( |
| 74 | + "byte_val,expected_unicode", |
| 75 | + [ |
| 76 | + (0xA1, "\u00ed"), # í |
| 77 | + (0xA2, "\u00f3"), # ó |
| 78 | + (0xA8, "\u00bf"), # ¿ |
| 79 | + (0xA9, "\u2310"), # ⌐ |
| 80 | + (0xAA, "\u00ac"), # ¬ |
| 81 | + (0xAB, "\u00bd"), # ½ |
| 82 | + (0xB0, "\u2591"), # ░ |
| 83 | + (0xB6, "\u2562"), # ╢ |
| 84 | + (0xBF, "\u2510"), # ┐ |
| 85 | + (0xC3, "\u251c"), # ├ |
| 86 | + (0xC6, "\u255e"), # ╞ |
| 87 | + (0xC7, "\u255f"), # ╟ |
| 88 | + (0xCA, "\u2569"), # ╩ |
| 89 | + (0xD1, "\u2564"), # ╤ |
| 90 | + (0xEE, "\u03b5"), # ε |
| 91 | + (0xEF, "\u2229"), # ∩ |
| 92 | + ], |
| 93 | +) |
| 94 | +def test_lone_lead_bytes_cp437_fallback(byte_val, expected_unicode): |
| 95 | + # Each lone lead byte followed by ESC (invalid second byte) falls back to CP437 |
| 96 | + data = bytes([byte_val, 0x1B]) |
| 97 | + result = data.decode("big5bbs") |
| 98 | + assert result[0] == expected_unicode |
| 99 | + assert result[1] == "\x1b" |
| 100 | + |
| 101 | + |
| 102 | +def test_split_across_chunks_big5_pair(): |
| 103 | + # Lead byte in chunk 1, second byte in chunk 2 → valid Big5 pair |
| 104 | + char = "夢" |
| 105 | + big5_bytes = char.encode("big5") |
| 106 | + assert len(big5_bytes) == 2 |
| 107 | + decoder = codecs.getincrementaldecoder("big5bbs")() |
| 108 | + result1 = decoder.decode(big5_bytes[:1], final=False) |
| 109 | + assert result1 == "" # buffered |
| 110 | + result2 = decoder.decode(big5_bytes[1:], final=False) |
| 111 | + assert result2 == char |
| 112 | + |
| 113 | + |
| 114 | +def test_split_lead_byte_final_true(): |
| 115 | + # Lead byte at end of stream with final=True → CP437 fallback |
| 116 | + decoder = codecs.getincrementaldecoder("big5bbs")() |
| 117 | + result = decoder.decode(bytes([0xB0]), final=True) |
| 118 | + assert result == "\u2591" # ░ |
| 119 | + |
| 120 | + |
| 121 | +def test_split_lead_byte_not_final(): |
| 122 | + # Lead byte with final=False → buffered, returns empty string |
| 123 | + decoder = codecs.getincrementaldecoder("big5bbs")() |
| 124 | + result = decoder.decode(bytes([0xB0]), final=False) |
| 125 | + assert result == "" |
| 126 | + |
| 127 | + |
| 128 | +def test_round_trip_big5_text(): |
| 129 | + text = "夢想台灣" |
| 130 | + encoded = text.encode("big5bbs") |
| 131 | + decoded = encoded.decode("big5bbs") |
| 132 | + assert decoded == text |
| 133 | + |
| 134 | + |
| 135 | +def test_getstate_setstate_preserves_pending_byte(): |
| 136 | + decoder = codecs.getincrementaldecoder("big5bbs")() |
| 137 | + char = "夢" |
| 138 | + big5_bytes = char.encode("big5") |
| 139 | + decoder.decode(big5_bytes[:1], final=False) |
| 140 | + state = decoder.getstate() |
| 141 | + assert state[0] == big5_bytes[:1] |
| 142 | + |
| 143 | + decoder2 = codecs.getincrementaldecoder("big5bbs")() |
| 144 | + decoder2.setstate(state) |
| 145 | + result = decoder2.decode(big5_bytes[1:], final=True) |
| 146 | + assert result == char |
| 147 | + |
| 148 | + |
| 149 | +def test_reset_clears_pending_byte(): |
| 150 | + decoder = codecs.getincrementaldecoder("big5bbs")() |
| 151 | + char = "夢" |
| 152 | + big5_bytes = char.encode("big5") |
| 153 | + decoder.decode(big5_bytes[:1], final=False) |
| 154 | + decoder.reset() |
| 155 | + state = decoder.getstate() |
| 156 | + assert state[0] == b"" |
| 157 | + |
| 158 | + |
| 159 | +def test_mixed_stream(): |
| 160 | + # Simulate a BBS art stream: Chinese text + lone art bytes + ANSI escape |
| 161 | + char = "夢" |
| 162 | + big5_bytes = char.encode("big5") |
| 163 | + data = big5_bytes + bytes([0xB0, 0x1B, 0x5B, 0x33, 0x32, 0x6D]) + b"text" |
| 164 | + result = data.decode("big5bbs") |
| 165 | + assert result == char + "\u2591\x1b[32mtext" |
| 166 | + |
| 167 | + |
| 168 | +def test_incremental_encoder_ascii(): |
| 169 | + encoder = codecs.getincrementalencoder("big5bbs")() |
| 170 | + assert encoder.encode("Hello") == b"Hello" |
| 171 | + |
| 172 | + |
| 173 | +def test_incremental_encoder_big5(): |
| 174 | + encoder = codecs.getincrementalencoder("big5bbs")() |
| 175 | + char = "夢" |
| 176 | + assert encoder.encode(char) == char.encode("big5") |
| 177 | + |
| 178 | + |
| 179 | +def test_incremental_encoder_getstate(): |
| 180 | + encoder = codecs.getincrementalencoder("big5bbs")() |
| 181 | + assert encoder.getstate() == 0 |
0 commit comments