ruby-changes:42812
From: naruse <ko1@a...>
Date: Mon, 2 May 2016 20:07:28 +0900 (JST)
Subject: [ruby-changes:42812] naruse:r54886 (trunk): * re.c (str_coderange): to avoid function call when the string already
naruse 2016-05-02 21:04:04 +0900 (Mon, 02 May 2016) New Revision: 54886 https://svn.ruby-lang.org/cgi-bin/viewvc.cgi?view=revision&revision=54886 Log: * re.c (str_coderange): to avoid function call when the string already has coderange information. * re.c (rb_reg_prepare_enc): add shortcut path when the regexp has the same encoding of given string. * re.c (rb_reg_prepare_re): avoid duplicated allocation of onig_errmsg_buffer. Modified files: trunk/ChangeLog trunk/re.c Index: ChangeLog =================================================================== --- ChangeLog (revision 54885) +++ ChangeLog (revision 54886) @@ -1,3 +1,15 @@ https://github.com/ruby/ruby/blob/trunk/ChangeLog#L1 +Mon May 2 20:59:21 2016 NARUSE, Yui <naruse@r...> + + * re.c (str_coderange): to avoid function call when the string already + has coderange information. + + * re.c (rb_reg_prepare_enc): add shortcut path when the regexp has + the same encoding of given string. + + * re.c (rb_reg_prepare_re): avoid duplicated allocation of + onig_errmsg_buffer. + + Mon May 2 12:34:52 2016 Tanaka Akira <akr@f...> * test/ruby/test_refinement.rb (test_inspect): Use Integer instead of Index: re.c =================================================================== --- re.c (revision 54885) +++ re.c (revision 54886) @@ -1370,11 +1370,21 @@ reg_enc_error(VALUE re, VALUE str) https://github.com/ruby/ruby/blob/trunk/re.c#L1370 rb_enc_name(rb_enc_get(str))); } +static inline int +str_coderange(VALUE str) +{ + int cr = ENC_CODERANGE(str); + if (cr == ENC_CODERANGE_UNKNOWN) { + cr = rb_enc_str_coderange(str); + } + return cr; +} + static rb_encoding* rb_reg_prepare_enc(VALUE re, VALUE str, int warn) { rb_encoding *enc = 0; - int cr = rb_enc_str_coderange(str); + int cr = str_coderange(str); if (cr == ENC_CODERANGE_BROKEN) { rb_raise(rb_eArgError, @@ -1384,25 +1394,23 @@ rb_reg_prepare_enc(VALUE re, VALUE str, https://github.com/ruby/ruby/blob/trunk/re.c#L1394 rb_reg_check(re); enc = rb_enc_get(str); - if (!rb_enc_asciicompat(enc)) { - if (RREGEXP_PTR(re)->enc != enc) { - reg_enc_error(re, str); - } + if (RREGEXP_PTR(re)->enc == enc) { + } + else if (cr == ENC_CODERANGE_7BIT && + RREGEXP_PTR(re)->enc == rb_usascii_encoding()) { + enc = RREGEXP_PTR(re)->enc; + } + else if (!rb_enc_asciicompat(enc)) { + reg_enc_error(re, str); } else if (rb_reg_fixed_encoding_p(re)) { - if (RREGEXP_PTR(re)->enc != enc && - (!rb_enc_asciicompat(RREGEXP_PTR(re)->enc) || + if ((!rb_enc_asciicompat(RREGEXP_PTR(re)->enc) || cr != ENC_CODERANGE_7BIT)) { reg_enc_error(re, str); } enc = RREGEXP_PTR(re)->enc; } - else if (cr == ENC_CODERANGE_7BIT && - (RREGEXP_PTR(re)->enc == rb_usascii_encoding() - )) { - enc = RREGEXP_PTR(re)->enc; - } - if (warn && (RBASIC(re)->flags & REG_ENCODING_NONE) && + else if (warn && (RBASIC(re)->flags & REG_ENCODING_NONE) && enc != rb_ascii8bit_encoding() && cr != ENC_CODERANGE_7BIT) { rb_warn("regexp match /.../n against to %s string", @@ -1412,10 +1420,9 @@ rb_reg_prepare_enc(VALUE re, VALUE str, https://github.com/ruby/ruby/blob/trunk/re.c#L1420 } regex_t * -rb_reg_prepare_re(VALUE re, VALUE str) +rb_reg_prepare_re0(VALUE re, VALUE str, onig_errmsg_buffer err) { regex_t *reg = RREGEXP_PTR(re); - onig_errmsg_buffer err = ""; int r; OnigErrorInfo einfo; const char *pattern; @@ -1450,6 +1457,13 @@ rb_reg_prepare_re(VALUE re, VALUE str) https://github.com/ruby/ruby/blob/trunk/re.c#L1457 return reg; } +regex_t * +rb_reg_prepare_re(VALUE re, VALUE str) +{ + onig_errmsg_buffer err = ""; + return rb_reg_prepare_re0(re, str, err); +} + long rb_reg_adjust_startpos(VALUE re, VALUE str, long pos, int reverse) { @@ -1491,13 +1505,14 @@ rb_reg_search0(VALUE re, VALUE str, long https://github.com/ruby/ruby/blob/trunk/re.c#L1505 char *range = RSTRING_PTR(str); regex_t *reg; int tmpreg; + onig_errmsg_buffer err = ""; if (pos > RSTRING_LEN(str) || pos < 0) { rb_backref_set(Qnil); return -1; } - reg = rb_reg_prepare_re(re, str); + reg = rb_reg_prepare_re0(re, str, err); tmpreg = reg != RREGEXP_PTR(re); if (!tmpreg) RREGEXP(re)->usecnt++; @@ -1540,7 +1555,6 @@ rb_reg_search0(VALUE re, VALUE str, long https://github.com/ruby/ruby/blob/trunk/re.c#L1555 return result; } else { - onig_errmsg_buffer err = ""; onig_error_code_to_str((UChar*)err, (int)result); rb_reg_raise(RREGEXP_SRC_PTR(re), RREGEXP_SRC_LEN(re), err, re); } @@ -2558,7 +2572,7 @@ rb_reg_preprocess_dregexp(VALUE ary, int https://github.com/ruby/ruby/blob/trunk/re.c#L2572 src_enc = rb_enc_get(str); if (options & ARG_ENCODING_NONE && src_enc != ascii8bit) { - if (rb_enc_str_coderange(str) != ENC_CODERANGE_7BIT) + if (str_coderange(str) != ENC_CODERANGE_7BIT) rb_raise(rb_eRegexpError, "/.../n has a non escaped non ASCII character in non ASCII-8BIT script"); else src_enc = ascii8bit; @@ -2669,7 +2683,7 @@ rb_reg_initialize_str(VALUE obj, VALUE s https://github.com/ruby/ruby/blob/trunk/re.c#L2683 if (options & ARG_ENCODING_NONE) { rb_encoding *ascii8bit = rb_ascii8bit_encoding(); if (enc != ascii8bit) { - if (rb_enc_str_coderange(str) != ENC_CODERANGE_7BIT) { + if (str_coderange(str) != ENC_CODERANGE_7BIT) { errcpy(err, "/.../n has a non escaped non ASCII character in non ASCII-8BIT script"); return -1; } -- ML: ruby-changes@q... Info: http://www.atdot.net/~ko1/quickml/