Skip to content

Commit 0ece7a0

Browse files
committed
Remove byte sniffing from Ext::Parser
1 parent 4b843b5 commit 0ece7a0

2 files changed

Lines changed: 57 additions & 127 deletions

File tree

ext/json/ext/parser/parser.c

Lines changed: 53 additions & 88 deletions
Original file line numberDiff line numberDiff line change
@@ -68,9 +68,9 @@ static int convert_UTF32_to_UTF8(char *buf, UTF32 ch)
6868
}
6969

7070
#ifdef HAVE_RUBY_ENCODING_H
71-
static VALUE CEncoding_ASCII_8BIT, CEncoding_UTF_8, CEncoding_UTF_16BE,
72-
CEncoding_UTF_16LE, CEncoding_UTF_32BE, CEncoding_UTF_32LE;
73-
static ID i_encoding, i_encode;
71+
static VALUE CEncoding_UTF_8;
72+
73+
static ID i_encode;
7474
#else
7575
static ID i_iconv;
7676
#endif
@@ -89,11 +89,11 @@ static ID i_json_creatable_p, i_json_create, i_create_id, i_create_additions,
8989

9090

9191
#line 92 "parser.c"
92-
enum {JSON_object_start = 1};
93-
enum {JSON_object_first_final = 27};
94-
enum {JSON_object_error = 0};
92+
static const int JSON_object_start = 1;
93+
static const int JSON_object_first_final = 27;
94+
static const int JSON_object_error = 0;
9595

96-
enum {JSON_object_en_main = 1};
96+
static const int JSON_object_en_main = 1;
9797

9898

9999
#line 151 "parser.rl"
@@ -467,11 +467,11 @@ case 26:
467467

468468

469469
#line 470 "parser.c"
470-
enum {JSON_value_start = 1};
471-
enum {JSON_value_first_final = 21};
472-
enum {JSON_value_error = 0};
470+
static const int JSON_value_start = 1;
471+
static const int JSON_value_first_final = 21;
472+
static const int JSON_value_error = 0;
473473

474-
enum {JSON_value_en_main = 1};
474+
static const int JSON_value_en_main = 1;
475475

476476

477477
#line 271 "parser.rl"
@@ -776,11 +776,11 @@ case 20:
776776

777777

778778
#line 779 "parser.c"
779-
enum {JSON_integer_start = 1};
780-
enum {JSON_integer_first_final = 3};
781-
enum {JSON_integer_error = 0};
779+
static const int JSON_integer_start = 1;
780+
static const int JSON_integer_first_final = 3;
781+
static const int JSON_integer_error = 0;
782782

783-
enum {JSON_integer_en_main = 1};
783+
static const int JSON_integer_en_main = 1;
784784

785785

786786
#line 295 "parser.rl"
@@ -875,11 +875,11 @@ case 5:
875875

876876

877877
#line 878 "parser.c"
878-
enum {JSON_float_start = 1};
879-
enum {JSON_float_first_final = 8};
880-
enum {JSON_float_error = 0};
878+
static const int JSON_float_start = 1;
879+
static const int JSON_float_first_final = 8;
880+
static const int JSON_float_error = 0;
881881

882-
enum {JSON_float_en_main = 1};
882+
static const int JSON_float_en_main = 1;
883883

884884

885885
#line 329 "parser.rl"
@@ -1041,11 +1041,11 @@ case 7:
10411041

10421042

10431043
#line 1044 "parser.c"
1044-
enum {JSON_array_start = 1};
1045-
enum {JSON_array_first_final = 17};
1046-
enum {JSON_array_error = 0};
1044+
static const int JSON_array_start = 1;
1045+
static const int JSON_array_first_final = 17;
1046+
static const int JSON_array_error = 0;
10471047

1048-
enum {JSON_array_en_main = 1};
1048+
static const int JSON_array_en_main = 1;
10491049

10501050

10511051
#line 381 "parser.rl"
@@ -1373,11 +1373,11 @@ static VALUE json_string_unescape(VALUE result, char *string, char *stringEnd)
13731373

13741374

13751375
#line 1376 "parser.c"
1376-
enum {JSON_string_start = 1};
1377-
enum {JSON_string_first_final = 8};
1378-
enum {JSON_string_error = 0};
1376+
static const int JSON_string_start = 1;
1377+
static const int JSON_string_first_final = 8;
1378+
static const int JSON_string_error = 0;
13791379

1380-
enum {JSON_string_en_main = 1};
1380+
static const int JSON_string_en_main = 1;
13811381

13821382

13831383
#line 494 "parser.rl"
@@ -1568,38 +1568,9 @@ static VALUE convert_encoding(VALUE source)
15681568
{
15691569
char *ptr = RSTRING_PTR(source);
15701570
long len = RSTRING_LEN(source);
1571-
if (len < 2) {
1572-
rb_raise(eParserError, "A JSON text must at least contain two octets!");
1573-
}
15741571
#ifdef HAVE_RUBY_ENCODING_H
15751572
{
1576-
VALUE encoding = rb_funcall(source, i_encoding, 0);
1577-
if (encoding == CEncoding_ASCII_8BIT) {
1578-
if (len >= 4 && ptr[0] == 0 && ptr[1] == 0 && ptr[2] == 0) {
1579-
source = rb_funcall(source, i_encode, 2, CEncoding_UTF_8, CEncoding_UTF_32BE);
1580-
} else if (len >= 4 && ptr[0] == 0 && ptr[2] == 0) {
1581-
source = rb_funcall(source, i_encode, 2, CEncoding_UTF_8, CEncoding_UTF_16BE);
1582-
} else if (len >= 4 && ptr[1] == 0 && ptr[2] == 0 && ptr[3] == 0) {
1583-
source = rb_funcall(source, i_encode, 2, CEncoding_UTF_8, CEncoding_UTF_32LE);
1584-
} else if (len >= 4 && ptr[1] == 0 && ptr[3] == 0) {
1585-
source = rb_funcall(source, i_encode, 2, CEncoding_UTF_8, CEncoding_UTF_16LE);
1586-
} else {
1587-
source = rb_str_dup(source);
1588-
FORCE_UTF8(source);
1589-
}
1590-
} else {
1591-
source = rb_funcall(source, i_encode, 1, CEncoding_UTF_8);
1592-
}
1593-
}
1594-
#else
1595-
if (len >= 4 && ptr[0] == 0 && ptr[1] == 0 && ptr[2] == 0) {
1596-
source = rb_funcall(mJSON, i_iconv, 3, rb_str_new2("utf-8"), rb_str_new2("utf-32be"), source);
1597-
} else if (len >= 4 && ptr[0] == 0 && ptr[2] == 0) {
1598-
source = rb_funcall(mJSON, i_iconv, 3, rb_str_new2("utf-8"), rb_str_new2("utf-16be"), source);
1599-
} else if (len >= 4 && ptr[1] == 0 && ptr[2] == 0 && ptr[3] == 0) {
1600-
source = rb_funcall(mJSON, i_iconv, 3, rb_str_new2("utf-8"), rb_str_new2("utf-32le"), source);
1601-
} else if (len >= 4 && ptr[1] == 0 && ptr[3] == 0) {
1602-
source = rb_funcall(mJSON, i_iconv, 3, rb_str_new2("utf-8"), rb_str_new2("utf-16le"), source);
1573+
source = rb_funcall(source, i_encode, 1, CEncoding_UTF_8);
16031574
}
16041575
#endif
16051576
return source;
@@ -1729,15 +1700,15 @@ static VALUE cParser_initialize(int argc, VALUE *argv, VALUE self)
17291700
}
17301701

17311702

1732-
#line 1733 "parser.c"
1733-
enum {JSON_start = 1};
1734-
enum {JSON_first_final = 10};
1735-
enum {JSON_error = 0};
1703+
#line 1704 "parser.c"
1704+
static const int JSON_start = 1;
1705+
static const int JSON_first_final = 10;
1706+
static const int JSON_error = 0;
17361707

1737-
enum {JSON_en_main = 1};
1708+
static const int JSON_en_main = 1;
17381709

17391710

1740-
#line 740 "parser.rl"
1711+
#line 711 "parser.rl"
17411712

17421713

17431714
static VALUE cParser_parse_strict(VALUE self)
@@ -1748,16 +1719,16 @@ static VALUE cParser_parse_strict(VALUE self)
17481719
GET_PARSER;
17491720

17501721

1751-
#line 1752 "parser.c"
1722+
#line 1723 "parser.c"
17521723
{
17531724
cs = JSON_start;
17541725
}
17551726

1756-
#line 750 "parser.rl"
1727+
#line 721 "parser.rl"
17571728
p = json->source;
17581729
pe = p + json->len;
17591730

1760-
#line 1761 "parser.c"
1731+
#line 1732 "parser.c"
17611732
{
17621733
if ( p == pe )
17631734
goto _test_eof;
@@ -1813,7 +1784,7 @@ case 5:
18131784
goto st1;
18141785
goto st5;
18151786
tr3:
1816-
#line 729 "parser.rl"
1787+
#line 700 "parser.rl"
18171788
{
18181789
char *np;
18191790
json->current_nesting = 1;
@@ -1822,7 +1793,7 @@ case 5:
18221793
}
18231794
goto st10;
18241795
tr4:
1825-
#line 722 "parser.rl"
1796+
#line 693 "parser.rl"
18261797
{
18271798
char *np;
18281799
json->current_nesting = 1;
@@ -1834,7 +1805,7 @@ case 5:
18341805
if ( ++p == pe )
18351806
goto _test_eof10;
18361807
case 10:
1837-
#line 1838 "parser.c"
1808+
#line 1809 "parser.c"
18381809
switch( (*p) ) {
18391810
case 13: goto st10;
18401811
case 32: goto st10;
@@ -1891,7 +1862,7 @@ case 9:
18911862
_out: {}
18921863
}
18931864

1894-
#line 753 "parser.rl"
1865+
#line 724 "parser.rl"
18951866

18961867
if (cs >= JSON_first_final && p == pe) {
18971868
return result;
@@ -1903,15 +1874,15 @@ case 9:
19031874

19041875

19051876

1906-
#line 1907 "parser.c"
1907-
enum {JSON_quirks_mode_start = 1};
1908-
enum {JSON_quirks_mode_first_final = 10};
1909-
enum {JSON_quirks_mode_error = 0};
1877+
#line 1878 "parser.c"
1878+
static const int JSON_quirks_mode_start = 1;
1879+
static const int JSON_quirks_mode_first_final = 10;
1880+
static const int JSON_quirks_mode_error = 0;
19101881

1911-
enum {JSON_quirks_mode_en_main = 1};
1882+
static const int JSON_quirks_mode_en_main = 1;
19121883

19131884

1914-
#line 778 "parser.rl"
1885+
#line 749 "parser.rl"
19151886

19161887

19171888
static VALUE cParser_parse_quirks_mode(VALUE self)
@@ -1922,16 +1893,16 @@ static VALUE cParser_parse_quirks_mode(VALUE self)
19221893
GET_PARSER;
19231894

19241895

1925-
#line 1926 "parser.c"
1896+
#line 1897 "parser.c"
19261897
{
19271898
cs = JSON_quirks_mode_start;
19281899
}
19291900

1930-
#line 788 "parser.rl"
1901+
#line 759 "parser.rl"
19311902
p = json->source;
19321903
pe = p + json->len;
19331904

1934-
#line 1935 "parser.c"
1905+
#line 1906 "parser.c"
19351906
{
19361907
if ( p == pe )
19371908
goto _test_eof;
@@ -1965,7 +1936,7 @@ case 1:
19651936
cs = 0;
19661937
goto _out;
19671938
tr2:
1968-
#line 770 "parser.rl"
1939+
#line 741 "parser.rl"
19691940
{
19701941
char *np = JSON_parse_value(json, p, pe, &result);
19711942
if (np == NULL) { p--; {p++; cs = 10; goto _out;} } else {p = (( np))-1;}
@@ -1975,7 +1946,7 @@ cs = 0;
19751946
if ( ++p == pe )
19761947
goto _test_eof10;
19771948
case 10:
1978-
#line 1979 "parser.c"
1949+
#line 1950 "parser.c"
19791950
switch( (*p) ) {
19801951
case 13: goto st10;
19811952
case 32: goto st10;
@@ -2064,7 +2035,7 @@ case 9:
20642035
_out: {}
20652036
}
20662037

2067-
#line 791 "parser.rl"
2038+
#line 762 "parser.rl"
20682039

20692040
if (cs >= JSON_quirks_mode_first_final && p == pe) {
20702041
return result;
@@ -2195,12 +2166,6 @@ void Init_parser(void)
21952166
i_leftshift = rb_intern("<<");
21962167
#ifdef HAVE_RUBY_ENCODING_H
21972168
CEncoding_UTF_8 = rb_funcall(rb_path2class("Encoding"), rb_intern("find"), 1, rb_str_new2("utf-8"));
2198-
CEncoding_UTF_16BE = rb_funcall(rb_path2class("Encoding"), rb_intern("find"), 1, rb_str_new2("utf-16be"));
2199-
CEncoding_UTF_16LE = rb_funcall(rb_path2class("Encoding"), rb_intern("find"), 1, rb_str_new2("utf-16le"));
2200-
CEncoding_UTF_32BE = rb_funcall(rb_path2class("Encoding"), rb_intern("find"), 1, rb_str_new2("utf-32be"));
2201-
CEncoding_UTF_32LE = rb_funcall(rb_path2class("Encoding"), rb_intern("find"), 1, rb_str_new2("utf-32le"));
2202-
CEncoding_ASCII_8BIT = rb_funcall(rb_path2class("Encoding"), rb_intern("find"), 1, rb_str_new2("ascii-8bit"));
2203-
i_encoding = rb_intern("encoding");
22042169
i_encode = rb_intern("encode");
22052170
#else
22062171
i_iconv = rb_intern("iconv");

ext/json/ext/parser/parser.rl

Lines changed: 4 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -66,9 +66,9 @@ static int convert_UTF32_to_UTF8(char *buf, UTF32 ch)
6666
}
6767

6868
#ifdef HAVE_RUBY_ENCODING_H
69-
static VALUE CEncoding_ASCII_8BIT, CEncoding_UTF_8, CEncoding_UTF_16BE,
70-
CEncoding_UTF_16LE, CEncoding_UTF_32BE, CEncoding_UTF_32LE;
71-
static ID i_encoding, i_encode;
69+
static VALUE CEncoding_UTF_8;
70+
71+
static ID i_encode;
7272
#else
7373
static ID i_iconv;
7474
#endif
@@ -552,38 +552,9 @@ static VALUE convert_encoding(VALUE source)
552552
{
553553
char *ptr = RSTRING_PTR(source);
554554
long len = RSTRING_LEN(source);
555-
if (len < 2) {
556-
rb_raise(eParserError, "A JSON text must at least contain two octets!");
557-
}
558555
#ifdef HAVE_RUBY_ENCODING_H
559556
{
560-
VALUE encoding = rb_funcall(source, i_encoding, 0);
561-
if (encoding == CEncoding_ASCII_8BIT) {
562-
if (len >= 4 && ptr[0] == 0 && ptr[1] == 0 && ptr[2] == 0) {
563-
source = rb_funcall(source, i_encode, 2, CEncoding_UTF_8, CEncoding_UTF_32BE);
564-
} else if (len >= 4 && ptr[0] == 0 && ptr[2] == 0) {
565-
source = rb_funcall(source, i_encode, 2, CEncoding_UTF_8, CEncoding_UTF_16BE);
566-
} else if (len >= 4 && ptr[1] == 0 && ptr[2] == 0 && ptr[3] == 0) {
567-
source = rb_funcall(source, i_encode, 2, CEncoding_UTF_8, CEncoding_UTF_32LE);
568-
} else if (len >= 4 && ptr[1] == 0 && ptr[3] == 0) {
569-
source = rb_funcall(source, i_encode, 2, CEncoding_UTF_8, CEncoding_UTF_16LE);
570-
} else {
571-
source = rb_str_dup(source);
572-
FORCE_UTF8(source);
573-
}
574-
} else {
575-
source = rb_funcall(source, i_encode, 1, CEncoding_UTF_8);
576-
}
577-
}
578-
#else
579-
if (len >= 4 && ptr[0] == 0 && ptr[1] == 0 && ptr[2] == 0) {
580-
source = rb_funcall(mJSON, i_iconv, 3, rb_str_new2("utf-8"), rb_str_new2("utf-32be"), source);
581-
} else if (len >= 4 && ptr[0] == 0 && ptr[2] == 0) {
582-
source = rb_funcall(mJSON, i_iconv, 3, rb_str_new2("utf-8"), rb_str_new2("utf-16be"), source);
583-
} else if (len >= 4 && ptr[1] == 0 && ptr[2] == 0 && ptr[3] == 0) {
584-
source = rb_funcall(mJSON, i_iconv, 3, rb_str_new2("utf-8"), rb_str_new2("utf-32le"), source);
585-
} else if (len >= 4 && ptr[1] == 0 && ptr[3] == 0) {
586-
source = rb_funcall(mJSON, i_iconv, 3, rb_str_new2("utf-8"), rb_str_new2("utf-16le"), source);
557+
source = rb_funcall(source, i_encode, 1, CEncoding_UTF_8);
587558
}
588559
#endif
589560
return source;
@@ -918,12 +889,6 @@ void Init_parser(void)
918889
i_leftshift = rb_intern("<<");
919890
#ifdef HAVE_RUBY_ENCODING_H
920891
CEncoding_UTF_8 = rb_funcall(rb_path2class("Encoding"), rb_intern("find"), 1, rb_str_new2("utf-8"));
921-
CEncoding_UTF_16BE = rb_funcall(rb_path2class("Encoding"), rb_intern("find"), 1, rb_str_new2("utf-16be"));
922-
CEncoding_UTF_16LE = rb_funcall(rb_path2class("Encoding"), rb_intern("find"), 1, rb_str_new2("utf-16le"));
923-
CEncoding_UTF_32BE = rb_funcall(rb_path2class("Encoding"), rb_intern("find"), 1, rb_str_new2("utf-32be"));
924-
CEncoding_UTF_32LE = rb_funcall(rb_path2class("Encoding"), rb_intern("find"), 1, rb_str_new2("utf-32le"));
925-
CEncoding_ASCII_8BIT = rb_funcall(rb_path2class("Encoding"), rb_intern("find"), 1, rb_str_new2("ascii-8bit"));
926-
i_encoding = rb_intern("encoding");
927892
i_encode = rb_intern("encode");
928893
#else
929894
i_iconv = rb_intern("iconv");

0 commit comments

Comments
 (0)