[前][次][番号順一覧][スレッド一覧]

ruby-changes:42812

From: naruse <ko1@a...>
Date: Mon, 2 May 2016 20:07:28 +0900 (JST)
Subject: [ruby-changes:42812] naruse:r54886 (trunk): * re.c (str_coderange): to avoid function call when the string already

naruse	2016-05-02 21:04:04 +0900 (Mon, 02 May 2016)

  New Revision: 54886

  https://svn.ruby-lang.org/cgi-bin/viewvc.cgi?view=revision&revision=54886

  Log:
    * re.c (str_coderange): to avoid function call when the string already
      has coderange information.
    
    * re.c (rb_reg_prepare_enc): add shortcut path when the regexp has
      the same encoding of given string.
    
    * re.c (rb_reg_prepare_re): avoid duplicated allocation of
      onig_errmsg_buffer.

  Modified files:
    trunk/ChangeLog
    trunk/re.c
Index: ChangeLog
===================================================================
--- ChangeLog	(revision 54885)
+++ ChangeLog	(revision 54886)
@@ -1,3 +1,15 @@ https://github.com/ruby/ruby/blob/trunk/ChangeLog#L1
+Mon May  2 20:59:21 2016  NARUSE, Yui  <naruse@r...>
+
+	* re.c (str_coderange): to avoid function call when the string already
+	  has coderange information.
+
+	* re.c (rb_reg_prepare_enc): add shortcut path when the regexp has
+	  the same encoding of given string.
+
+	* re.c (rb_reg_prepare_re): avoid duplicated allocation of
+	  onig_errmsg_buffer.
+
+
 Mon May  2 12:34:52 2016  Tanaka Akira  <akr@f...>
 
 	* test/ruby/test_refinement.rb (test_inspect): Use Integer instead of
Index: re.c
===================================================================
--- re.c	(revision 54885)
+++ re.c	(revision 54886)
@@ -1370,11 +1370,21 @@ reg_enc_error(VALUE re, VALUE str) https://github.com/ruby/ruby/blob/trunk/re.c#L1370
 	     rb_enc_name(rb_enc_get(str)));
 }
 
+static inline int
+str_coderange(VALUE str)
+{
+    int cr = ENC_CODERANGE(str);
+    if (cr == ENC_CODERANGE_UNKNOWN) {
+	cr = rb_enc_str_coderange(str);
+    }
+    return cr;
+}
+
 static rb_encoding*
 rb_reg_prepare_enc(VALUE re, VALUE str, int warn)
 {
     rb_encoding *enc = 0;
-    int cr = rb_enc_str_coderange(str);
+    int cr = str_coderange(str);
 
     if (cr == ENC_CODERANGE_BROKEN) {
         rb_raise(rb_eArgError,
@@ -1384,25 +1394,23 @@ rb_reg_prepare_enc(VALUE re, VALUE str, https://github.com/ruby/ruby/blob/trunk/re.c#L1394
 
     rb_reg_check(re);
     enc = rb_enc_get(str);
-    if (!rb_enc_asciicompat(enc)) {
-        if (RREGEXP_PTR(re)->enc != enc) {
-	    reg_enc_error(re, str);
-	}
+    if (RREGEXP_PTR(re)->enc == enc) {
+    }
+    else if (cr == ENC_CODERANGE_7BIT &&
+	    RREGEXP_PTR(re)->enc == rb_usascii_encoding()) {
+	enc = RREGEXP_PTR(re)->enc;
+    }
+    else if (!rb_enc_asciicompat(enc)) {
+	reg_enc_error(re, str);
     }
     else if (rb_reg_fixed_encoding_p(re)) {
-        if (RREGEXP_PTR(re)->enc != enc &&
-	    (!rb_enc_asciicompat(RREGEXP_PTR(re)->enc) ||
+        if ((!rb_enc_asciicompat(RREGEXP_PTR(re)->enc) ||
 	     cr != ENC_CODERANGE_7BIT)) {
 	    reg_enc_error(re, str);
 	}
 	enc = RREGEXP_PTR(re)->enc;
     }
-    else if (cr == ENC_CODERANGE_7BIT &&
-	    (RREGEXP_PTR(re)->enc == rb_usascii_encoding()
-	     )) {
-	enc = RREGEXP_PTR(re)->enc;
-    }
-    if (warn && (RBASIC(re)->flags & REG_ENCODING_NONE) &&
+    else if (warn && (RBASIC(re)->flags & REG_ENCODING_NONE) &&
 	enc != rb_ascii8bit_encoding() &&
 	cr != ENC_CODERANGE_7BIT) {
 	rb_warn("regexp match /.../n against to %s string",
@@ -1412,10 +1420,9 @@ rb_reg_prepare_enc(VALUE re, VALUE str, https://github.com/ruby/ruby/blob/trunk/re.c#L1420
 }
 
 regex_t *
-rb_reg_prepare_re(VALUE re, VALUE str)
+rb_reg_prepare_re0(VALUE re, VALUE str, onig_errmsg_buffer err)
 {
     regex_t *reg = RREGEXP_PTR(re);
-    onig_errmsg_buffer err = "";
     int r;
     OnigErrorInfo einfo;
     const char *pattern;
@@ -1450,6 +1457,13 @@ rb_reg_prepare_re(VALUE re, VALUE str) https://github.com/ruby/ruby/blob/trunk/re.c#L1457
     return reg;
 }
 
+regex_t *
+rb_reg_prepare_re(VALUE re, VALUE str)
+{
+    onig_errmsg_buffer err = "";
+    return rb_reg_prepare_re0(re, str, err);
+}
+
 long
 rb_reg_adjust_startpos(VALUE re, VALUE str, long pos, int reverse)
 {
@@ -1491,13 +1505,14 @@ rb_reg_search0(VALUE re, VALUE str, long https://github.com/ruby/ruby/blob/trunk/re.c#L1505
     char *range = RSTRING_PTR(str);
     regex_t *reg;
     int tmpreg;
+    onig_errmsg_buffer err = "";
 
     if (pos > RSTRING_LEN(str) || pos < 0) {
 	rb_backref_set(Qnil);
 	return -1;
     }
 
-    reg = rb_reg_prepare_re(re, str);
+    reg = rb_reg_prepare_re0(re, str, err);
     tmpreg = reg != RREGEXP_PTR(re);
     if (!tmpreg) RREGEXP(re)->usecnt++;
 
@@ -1540,7 +1555,6 @@ rb_reg_search0(VALUE re, VALUE str, long https://github.com/ruby/ruby/blob/trunk/re.c#L1555
 	    return result;
 	}
 	else {
-	    onig_errmsg_buffer err = "";
 	    onig_error_code_to_str((UChar*)err, (int)result);
 	    rb_reg_raise(RREGEXP_SRC_PTR(re), RREGEXP_SRC_LEN(re), err, re);
 	}
@@ -2558,7 +2572,7 @@ rb_reg_preprocess_dregexp(VALUE ary, int https://github.com/ruby/ruby/blob/trunk/re.c#L2572
 	src_enc = rb_enc_get(str);
 	if (options & ARG_ENCODING_NONE &&
 		src_enc != ascii8bit) {
-	    if (rb_enc_str_coderange(str) != ENC_CODERANGE_7BIT)
+	    if (str_coderange(str) != ENC_CODERANGE_7BIT)
 		rb_raise(rb_eRegexpError, "/.../n has a non escaped non ASCII character in non ASCII-8BIT script");
 	    else
 		src_enc = ascii8bit;
@@ -2669,7 +2683,7 @@ rb_reg_initialize_str(VALUE obj, VALUE s https://github.com/ruby/ruby/blob/trunk/re.c#L2683
     if (options & ARG_ENCODING_NONE) {
         rb_encoding *ascii8bit = rb_ascii8bit_encoding();
         if (enc != ascii8bit) {
-            if (rb_enc_str_coderange(str) != ENC_CODERANGE_7BIT) {
+            if (str_coderange(str) != ENC_CODERANGE_7BIT) {
                 errcpy(err, "/.../n has a non escaped non ASCII character in non ASCII-8BIT script");
                 return -1;
             }

--
ML: ruby-changes@q...
Info: http://www.atdot.net/~ko1/quickml/

[前][次][番号順一覧][スレッド一覧]