ruby-changes:66238
From: Jean <ko1@a...>
Date: Mon, 17 May 2021 20:16:08 +0900 (JST)
Subject: [ruby-changes:66238] 1d2b4ccaf2 (master): [flori/json] Refactor json_string_unescape
https://git.ruby-lang.org/ruby.git/commit/?id=1d2b4ccaf2 From 1d2b4ccaf28596efee65c59dc69ea489a4237079 Mon Sep 17 00:00:00 2001 From: Jean Boussier <jean.boussier@g...> Date: Wed, 18 Nov 2020 11:33:42 +0100 Subject: [flori/json] Refactor json_string_unescape https://github.com/flori/json/commit/f398769332 --- ext/json/parser/parser.c | 4779 ++++++++++++++++++++++++++++----------------- ext/json/parser/parser.h | 2 +- ext/json/parser/parser.rl | 48 +- 3 files changed, 3007 insertions(+), 1822 deletions(-) diff --git a/ext/json/parser/parser.c b/ext/json/parser/parser.c index 29b5674..241ec0d 100644 --- a/ext/json/parser/parser.c +++ b/ext/json/parser/parser.c @@ -9,14 +9,14 @@ https://github.com/ruby/ruby/blob/trunk/ext/json/parser/parser.c#L9 static void enc_raise(rb_encoding *enc, VALUE exc, const char *fmt, ...) { - va_list args; - VALUE mesg; + va_list args; + VALUE mesg; - va_start(args, fmt); - mesg = rb_enc_vsprintf(enc, fmt, args); - va_end(args); + va_start(args, fmt); + mesg = rb_enc_vsprintf(enc, fmt, args); + va_end(args); - rb_exc_raise(rb_exc_new3(exc, mesg)); + rb_exc_raise(rb_exc_new3(exc, mesg)); } # define rb_enc_raise enc_raise # endif @@ -28,2152 +28,3305 @@ enc_raise(rb_encoding *enc, VALUE exc, const char *fmt, ...) https://github.com/ruby/ruby/blob/trunk/ext/json/parser/parser.c#L28 /* unicode */ static const signed char digit_values[256] = { - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, -1, - -1, -1, -1, -1, -1, -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1 + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, -1, + -1, -1, -1, -1, -1, -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1 }; static UTF32 unescape_unicode(const unsigned char *p) { - signed char b; - UTF32 result = 0; - b = digit_values[p[0]]; - if (b < 0) return UNI_REPLACEMENT_CHAR; - result = (result << 4) | (unsigned char)b; - b = digit_values[p[1]]; - if (b < 0) return UNI_REPLACEMENT_CHAR; - result = (result << 4) | (unsigned char)b; - b = digit_values[p[2]]; - if (b < 0) return UNI_REPLACEMENT_CHAR; - result = (result << 4) | (unsigned char)b; - b = digit_values[p[3]]; - if (b < 0) return UNI_REPLACEMENT_CHAR; - result = (result << 4) | (unsigned char)b; - return result; + signed char b; + UTF32 result = 0; + b = digit_values[p[0]]; + if (b < 0) return UNI_REPLACEMENT_CHAR; + result = (result << 4) | (unsigned char)b; + b = digit_values[p[1]]; + if (b < 0) return UNI_REPLACEMENT_CHAR; + result = (result << 4) | (unsigned char)b; + b = digit_values[p[2]]; + if (b < 0) return UNI_REPLACEMENT_CHAR; + result = (result << 4) | (unsigned char)b; + b = digit_values[p[3]]; + if (b < 0) return UNI_REPLACEMENT_CHAR; + result = (result << 4) | (unsigned char)b; + return result; } static int convert_UTF32_to_UTF8(char *buf, UTF32 ch) { - int len = 1; - if (ch <= 0x7F) { - buf[0] = (char) ch; - } else if (ch <= 0x07FF) { - buf[0] = (char) ((ch >> 6) | 0xC0); - buf[1] = (char) ((ch & 0x3F) | 0x80); - len++; - } else if (ch <= 0xFFFF) { - buf[0] = (char) ((ch >> 12) | 0xE0); - buf[1] = (char) (((ch >> 6) & 0x3F) | 0x80); - buf[2] = (char) ((ch & 0x3F) | 0x80); - len += 2; - } else if (ch <= 0x1fffff) { - buf[0] =(char) ((ch >> 18) | 0xF0); - buf[1] =(char) (((ch >> 12) & 0x3F) | 0x80); - buf[2] =(char) (((ch >> 6) & 0x3F) | 0x80); - buf[3] =(char) ((ch & 0x3F) | 0x80); - len += 3; - } else { - buf[0] = '?'; - } - return len; + int len = 1; + if (ch <= 0x7F) { + buf[0] = (char) ch; + } else if (ch <= 0x07FF) { + buf[0] = (char) ((ch >> 6) | 0xC0); + buf[1] = (char) ((ch & 0x3F) | 0x80); + len++; + } else if (ch <= 0xFFFF) { + buf[0] = (char) ((ch >> 12) | 0xE0); + buf[1] = (char) (((ch >> 6) & 0x3F) | 0x80); + buf[2] = (char) ((ch & 0x3F) | 0x80); + len += 2; + } else if (ch <= 0x1fffff) { + buf[0] =(char) ((ch >> 18) | 0xF0); + buf[1] =(char) (((ch >> 12) & 0x3F) | 0x80); + buf[2] =(char) (((ch >> 6) & 0x3F) | 0x80); + buf[3] =(char) ((ch & 0x3F) | 0x80); + len += 3; + } else { + buf[0] = '?'; + } + return len; } static VALUE mJSON, mExt, cParser, eParserError, eNestingError; static VALUE CNaN, CInfinity, CMinusInfinity; static ID i_json_creatable_p, i_json_create, i_create_id, i_create_additions, - i_chr, i_max_nesting, i_allow_nan, i_symbolize_names, - i_object_class, i_array_class, i_decimal_class, i_key_p, - i_deep_const_get, i_match, i_match_string, i_aset, i_aref, - i_leftshift, i_new, i_try_convert, i_freeze, i_uminus; +i_chr, i_max_nesting, i_allow_nan, i_symbolize_names, +i_object_class, i_array_class, i_decimal_class, i_key_p, +i_deep_const_get, i_match, i_match_string, i_aset, i_aref, +i_leftshift, i_new, i_try_convert, i_freeze, i_uminus; -#line 126 "parser.rl" +#line 125 "parser.rl" -#line 108 "parser.c" enum {JSON_object_start = 1}; enum {JSON_object_first_final = 27}; enum {JSON_object_error = 0}; enum {JSON_object_en_main = 1}; +static const char _JSON_object_nfa_targs[] = { + 0, 0 +}; -#line 168 "parser.rl" +static const char _JSON_object_nfa_offsets[] = { + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0 +}; + +static const char _JSON_object_nfa_push_actions[] = { + 0, 0 +}; + +static const char _JSON_object_nfa_pop_trans[] = { + 0, 0 +}; + + +#line 167 "parser.rl" static char *JSON_parse_object(JSON_Parser *json, char *p, char *pe, VALUE *result, int current_nesting) { - int cs = EVIL; - VALUE last_name = Qnil; - VALUE object_class = json->object_class; + int cs = EVIL; + VALUE last_name = Qnil; + VALUE object_class = json->object_class; - if (json->max_nesting && current_nesting > json->max_nesting) { - rb_raise(eNestingError, "nesting of %d is too deep", current_nesting); - } + if (json->max_nesting && current_nesting > json->max_nesting) { + rb_raise(eNestingError, "nesting of %d is too deep", current_nesting); + } - *result = NIL_P(object_class) ? rb_hash_new() : rb_class_new_instance(0, 0, object_class); + *result = NIL_P(object_class) ? rb_hash_new() : rb_class_new_instance(0, 0, object_class); -#line 132 "parser.c" { - cs = JSON_object_start; + cs = (int)JSON_object_start; } -#line 183 "parser.rl" + #line 182 "parser.rl" + -#line 139 "parser.c" { - if ( p == pe ) + if ( p == pe ) goto _test_eof; - switch ( cs ) - { -case 1: - if ( (*p) == 123 ) - goto st2; - goto st0; -st0: -cs = 0; - goto _out; -st2: - if ( ++p == pe ) + switch ( cs ) + { + case 1: + goto st_case_1; + case 0: + goto st_case_0; + case 2: + goto st_case_2; + case 3: + goto st_case_3; + case 4: + goto st_case_4; + case 5: + goto st_case_5; + case 6: + goto st_case_6; + case 7: + goto st_case_7; + case 8: + goto st_case_8; + case 9: + goto st_case_9; + case 10: + goto st_case_10; + case 11: + goto st_case_11; + case 12: + goto st_case_12; + case 13: + goto st_case_13; + case 14: + goto st_case_14; + case 15: + goto st_case_15; + case 16: + goto st_case_16; + case 17: + goto st_case_17; + case 18: + goto st_case_18; + case 27: + goto st_case_27; + case 19: + goto st_case_19; + case 20: + goto st_case_20; + case 21: + goto st_case_21; + case 22: + goto st_case_22; + case 23: + goto st_case_23; + case 24: + goto st_case_24; + case 25: + goto st_case_25; + case 26: + goto st_case_26; + } + goto st_out; + st_case_1: + if ( ( (*( p))) == 123 ) { + goto st2; + } + { + goto st0; + } + st_case_0: + st0: + cs = 0; + goto _out; + st2: + p+= 1; + if ( p == pe ) goto _test_eof2; -case 2: - switch( (*p) ) { - case 13: goto st2; - case 32: goto st2; - case 34: goto tr2; - case 47: goto st23; - case 125: goto tr4; - } - if ( 9 <= (*p) && (*p) <= 10 ) - goto st2; - goto st0; -tr2: -#line 150 "parser.rl" - { - char *np; - json->parsing_name = 1; - np = JSON_parse_string(json, p, pe, &last_name); - json->parsing_na (... truncated) -- ML: ruby-changes@q... Info: http://www.atdot.net/~ko1/quickml/