[前][次][番号順一覧][スレッド一覧]

ruby-changes:6123

From: akr <ko1@a...>
Date: Sat, 28 Jun 2008 21:26:10 +0900 (JST)
Subject: [ruby-changes:6123] Ruby:r17635 (trunk): * include/ruby/ruby.h (struct RRegexp): new field usecnt. replace

akr	2008-06-28 21:25:45 +0900 (Sat, 28 Jun 2008)

  New Revision: 17635

  Modified files:
    trunk/ChangeLog
    trunk/gc.c
    trunk/include/ruby/ruby.h
    trunk/marshal.c
    trunk/re.c
    trunk/string.c

  Log:
    * include/ruby/ruby.h (struct RRegexp): new field usecnt.  replace
      str and len by src.
    
    * gc.c (gc_mark_children): mark src field of regexp.
      (obj_free): don't free str field.
    
    * re.c (REG_BUSY): removed.
      (rb_reg_initialize): prohibit re-initialize regexp.
      (rb_reg_search): use usecnt to prevent freeing regexp currently
      using.  this prevents SEGV by:
        r = /\A((a.)*(a.)*)*b/
        r =~ "ab" + "\xc2\xa1".force_encoding("euc-jp")
        t = Thread.new { r =~ "ab"*8 + "\xc2\xa1".force_encoding("utf-8")}
        sleep 0.2
        r =~ "ab"*8 + "\xc2\xa1".force_encoding("euc-jp")
    


  http://svn.ruby-lang.org/cgi-bin/viewvc.cgi/trunk/include/ruby/ruby.h?r1=17635&r2=17634&diff_format=u
  http://svn.ruby-lang.org/cgi-bin/viewvc.cgi/trunk/string.c?r1=17635&r2=17634&diff_format=u
  http://svn.ruby-lang.org/cgi-bin/viewvc.cgi/trunk/ChangeLog?r1=17635&r2=17634&diff_format=u
  http://svn.ruby-lang.org/cgi-bin/viewvc.cgi/trunk/gc.c?r1=17635&r2=17634&diff_format=u
  http://svn.ruby-lang.org/cgi-bin/viewvc.cgi/trunk/marshal.c?r1=17635&r2=17634&diff_format=u
  http://svn.ruby-lang.org/cgi-bin/viewvc.cgi/trunk/re.c?r1=17635&r2=17634&diff_format=u

Index: include/ruby/ruby.h
===================================================================
--- include/ruby/ruby.h	(revision 17634)
+++ include/ruby/ruby.h	(revision 17635)
@@ -522,9 +522,12 @@
 struct RRegexp {
     struct RBasic basic;
     struct re_pattern_buffer *ptr;
-    long len;
-    char *str;
+    VALUE src;
+    unsigned long usecnt;
 };
+#define RREGEXP_SRC(r) RREGEXP(r)->src
+#define RREGEXP_SRC_PTR(r) RSTRING_PTR(RREGEXP(r)->src)
+#define RREGEXP_SRC_LEN(r) RSTRING_LEN(RREGEXP(r)->src)
 
 struct RHash {
     struct RBasic basic;
Index: re.c
===================================================================
--- re.c	(revision 17634)
+++ re.c	(revision 17635)
@@ -238,7 +238,6 @@
 
 #define REG_LITERAL FL_USER5
 #define REG_ENCODING_NONE FL_USER6
-#define REG_BUSY FL_USER7
 
 #define KCODE_FIXED FL_USER4
 
@@ -309,7 +308,7 @@
 static void
 rb_reg_check(VALUE re)
 {
-    if (!RREGEXP(re)->ptr || !RREGEXP(re)->str) {
+    if (!RREGEXP(re)->ptr || !RREGEXP_SRC(re) || !RREGEXP_SRC_PTR(re)) {
 	rb_raise(rb_eTypeError, "uninitialized Regexp");
     }
 }
@@ -416,7 +415,7 @@
     VALUE str;
 
     rb_reg_check(re);
-    str = rb_enc_str_new(RREGEXP(re)->str,RREGEXP(re)->len, rb_enc_get(re));
+    str = rb_enc_str_new(RREGEXP_SRC_PTR(re),RREGEXP_SRC_LEN(re), rb_enc_get(re));
     if (OBJ_TAINTED(re)) OBJ_TAINT(str);
     return str;
 }
@@ -437,7 +436,7 @@
 rb_reg_inspect(VALUE re)
 {
     rb_reg_check(re);
-    return rb_reg_desc(RREGEXP(re)->str, RREGEXP(re)->len, re);
+    return rb_reg_desc(RREGEXP_SRC_PTR(re), RREGEXP_SRC_LEN(re), re);
 }
 
 
@@ -475,8 +474,8 @@
 
     rb_enc_copy(str, re);
     options = RREGEXP(re)->ptr->options;
-    ptr = (UChar*)RREGEXP(re)->str;
-    len = RREGEXP(re)->len;
+    ptr = (UChar*)RREGEXP_SRC_PTR(re);
+    len = RREGEXP_SRC_LEN(re);
   again:
     if (len >= 4 && ptr[0] == '(' && ptr[1] == '?') {
 	int err = 1;
@@ -528,8 +527,8 @@
 	}
 	if (err) {
 	    options = RREGEXP(re)->ptr->options;
-	    ptr = (UChar*)RREGEXP(re)->str;
-	    len = RREGEXP(re)->len;
+	    ptr = (UChar*)RREGEXP_SRC_PTR(re);
+	    len = RREGEXP_SRC_LEN(re);
 	}
     }
 
@@ -1220,10 +1219,10 @@
 
     rb_reg_check(re);
     reg = RREGEXP(re)->ptr;
-    pattern = RREGEXP(re)->str;
+    pattern = RREGEXP_SRC_PTR(re);
 
     unescaped = rb_reg_preprocess(
-	pattern, pattern + RREGEXP(re)->len, enc,
+	pattern, pattern + RREGEXP_SRC_LEN(re), enc,
 	&fixed_enc, err);
 
     if (unescaped == Qnil) {
@@ -1236,7 +1235,7 @@
 		 OnigDefaultSyntax, &einfo);
     if (r) {
 	onig_error_code_to_str((UChar*)err, r, &einfo);
-	rb_reg_raise(pattern, RREGEXP(re)->len, err, re);
+	rb_reg_raise(pattern, RREGEXP_SRC_LEN(re), err, re);
     }
 
     RB_GC_GUARD(unescaped);
@@ -1281,8 +1280,8 @@
     VALUE match;
     struct re_registers regi, *regs = &regi;
     char *range = RSTRING_PTR(str);
-    regex_t *reg0 = RREGEXP(re)->ptr, *reg;
-    int busy = FL_TEST(re, REG_BUSY);
+    regex_t *reg;
+    int tmpreg;
 
     if (pos > RSTRING_LEN(str) || pos < 0) {
 	rb_backref_set(Qnil);
@@ -1290,6 +1289,8 @@
     }
 
     reg = rb_reg_prepare_re(re, str);
+    tmpreg = reg != RREGEXP(re)->ptr;
+    if (!tmpreg) RREGEXP(re)->usecnt++;
 
     match = rb_backref_get();
     if (!NIL_P(match)) {
@@ -1303,7 +1304,6 @@
     if (NIL_P(match)) {
 	MEMZERO(regs, struct re_registers, 1);
     }
-    FL_SET(re, REG_BUSY);
     if (!reverse) {
 	range += RSTRING_LEN(str);
     }
@@ -1313,17 +1313,16 @@
 			 ((UChar*)(RSTRING_PTR(str)) + pos),
 			 ((UChar*)range),
 			 regs, ONIG_OPTION_NONE);
-
-    if (RREGEXP(re)->ptr != reg) {
-	if (busy) {
+    if (!tmpreg) RREGEXP(re)->usecnt--;
+    if (tmpreg) {
+	if (RREGEXP(re)->usecnt) {
 	    onig_free(reg);
 	}
 	else {
-	    onig_free(reg0);
+	    onig_free(RREGEXP(re)->ptr);
 	    RREGEXP(re)->ptr = reg;
 	}
     }
-    if (!busy) FL_UNSET(re, REG_BUSY);
     if (result < 0) {
 	if (regs == &regi)
 	    onig_region_free(regs, 0);
@@ -1334,7 +1333,7 @@
 	else {
 	    onig_errmsg_buffer err = "";
 	    onig_error_code_to_str((UChar*)err, result);
-	    rb_reg_raise(RREGEXP(re)->str, RREGEXP(re)->len, err, 0);
+	    rb_reg_raise(RREGEXP_SRC_PTR(re), RREGEXP_SRC_LEN(re), err, 0);
 	}
     }
 
@@ -2295,10 +2294,9 @@
     rb_check_frozen(obj);
     if (FL_TEST(obj, REG_LITERAL))
 	rb_raise(rb_eSecurityError, "can't modify literal regexp");
-    if (re->ptr) onig_free(re->ptr);
-    if (re->str) xfree(re->str);
+    if (re->ptr)
+        rb_raise(rb_eTypeError, "already initialized regexp");
     re->ptr = 0;
-    re->str = 0;
 
     unescaped = rb_reg_preprocess(s, s+len, enc, &fixed_enc, err);
     if (unescaped == Qnil)
@@ -2330,10 +2328,8 @@
     re->ptr = make_regexp(RSTRING_PTR(unescaped), RSTRING_LEN(unescaped), enc,
 			  options & ARG_REG_OPTION_MASK, err);
     if (!re->ptr) return -1;
-    re->str = ALLOC_N(char, len+1);
-    memcpy(re->str, s, len);
-    re->str[len] = '\0';
-    re->len = len;
+    re->src = rb_enc_str_new(s, len, enc);
+    OBJ_FREEZE(re->src);
     RB_GC_GUARD(unescaped);
     return 0;
 }
@@ -2366,8 +2362,8 @@
     OBJSETUP(re, klass, T_REGEXP);
 
     re->ptr = 0;
-    re->len = 0;
-    re->str = 0;
+    re->src = 0;
+    re->usecnt = 0;
 
     return (VALUE)re;
 }
@@ -2431,9 +2427,9 @@
 rb_reg_regcomp(VALUE str)
 {
     volatile VALUE save_str = str;
-    if (reg_cache && RREGEXP(reg_cache)->len == RSTRING_LEN(str)
+    if (reg_cache && RREGEXP_SRC_LEN(reg_cache) == RSTRING_LEN(str)
 	&& ENCODING_GET(reg_cache) == ENCODING_GET(str)
-	&& memcmp(RREGEXP(reg_cache)->str, RSTRING_PTR(str), RSTRING_LEN(str)) == 0)
+	&& memcmp(RREGEXP_SRC_PTR(reg_cache), RSTRING_PTR(str), RSTRING_LEN(str)) == 0)
 	return reg_cache;
 
     return reg_cache = rb_reg_new_str(save_str, 0);
@@ -2454,8 +2450,8 @@
 
     rb_reg_check(re);
     hashval = RREGEXP(re)->ptr->options;
-    len = RREGEXP(re)->len;
-    p  = RREGEXP(re)->str;
+    len = RREGEXP_SRC_LEN(re);
+    p  = RREGEXP_SRC_PTR(re);
     while (len--) {
 	hashval = hashval * 33 + *p++;
     }
@@ -2488,9 +2484,9 @@
     rb_reg_check(re1); rb_reg_check(re2);
     if (FL_TEST(re1, KCODE_FIXED) != FL_TEST(re2, KCODE_FIXED)) return Qfalse;
     if (RREGEXP(re1)->ptr->options != RREGEXP(re2)->ptr->options) return Qfalse;
-    if (RREGEXP(re1)->len != RREGEXP(re2)->len) return Qfalse;
+    if (RREGEXP_SRC_LEN(re1) != RREGEXP_SRC_LEN(re2)) return Qfalse;
     if (ENCODING_GET(re1) != ENCODING_GET(re2)) return Qfalse;
-    if (memcmp(RREGEXP(re1)->str, RREGEXP(re2)->str, RREGEXP(re1)->len) == 0) {
+    if (memcmp(RREGEXP_SRC_PTR(re1), RREGEXP_SRC_PTR(re2), RREGEXP_SRC_LEN(re1)) == 0) {
 	return Qtrue;
     }
     return Qfalse;
@@ -2756,8 +2752,8 @@
 	}
 	rb_reg_check(re);
 	flags = rb_reg_options(re);
-	ptr = RREGEXP(re)->str;
-	len = RREGEXP(re)->len;
+	ptr = RREGEXP_SRC_PTR(re);
+	len = RREGEXP_SRC_LEN(re);
 	enc = rb_enc_get(re);
 	if (rb_reg_initialize(self, ptr, len, enc, flags, err)) {
 	    str = rb_enc_str_new(ptr, len, enc);
@@ -3107,8 +3103,8 @@
 	rb_raise(rb_eTypeError, "wrong argument type");
     }
     rb_reg_check(re);
-    s = RREGEXP(re)->str;
-    len = RREGEXP(re)->len;
+    s = RREGEXP_SRC_PTR(re);
+    len = RREGEXP_SRC_LEN(re);
     if (rb_reg_initialize(copy, s, len, rb_enc_get(re), rb_reg_options(re), err) != 0) {
 	rb_reg_raise(s, len, err, re);
     }
Index: ChangeLog
===================================================================
--- ChangeLog	(revision 17634)
+++ ChangeLog	(revision 17635)
@@ -1,3 +1,21 @@
+Sat Jun 28 21:25:08 2008  Tanaka Akira  <akr@f...>
+
+	* include/ruby/ruby.h (struct RRegexp): new field usecnt.  replace
+	  str and len by src.
+
+	* gc.c (gc_mark_children): mark src field of regexp.
+	  (obj_free): don't free str field.
+
+	* re.c (REG_BUSY): removed.
+	  (rb_reg_initialize): prohibit re-initialize regexp.
+	  (rb_reg_search): use usecnt to prevent freeing regexp currently
+	  using.  this prevents SEGV by:
+	    r = /\A((a.)*(a.)*)*b/
+	    r =~ "ab" + "\xc2\xa1".force_encoding("euc-jp")
+	    t = Thread.new { r =~ "ab"*8 + "\xc2\xa1".force_encoding("utf-8")}
+	    sleep 0.2
+	    r =~ "ab"*8 + "\xc2\xa1".force_encoding("euc-jp")
+
 Sat Jun 28 21:15:43 2008  Nobuyoshi Nakada  <nobu@r...>
 
 	* include/ruby/intern.h (rb_str_new2, rb_tainted_str_new2,
Index: string.c
===================================================================
--- string.c	(revision 17634)
+++ string.c	(revision 17635)
@@ -2247,7 +2247,7 @@
     switch (TYPE(sub)) {
       case T_REGEXP:
 	/* enc = rb_get_check(str, sub); */
-	if (RREGEXP(sub)->len) {
+	if (!RREGEXP(sub)->ptr || RREGEXP_SRC_LEN(sub)) {
 	    pos = rb_reg_adjust_startpos(sub, str, pos, 1);
 	    pos = rb_reg_search(sub, str, pos, 1);
 	    pos = rb_str_sublen(str, pos);
Index: gc.c
===================================================================
--- gc.c	(revision 17634)
+++ gc.c	(revision 17635)
@@ -1267,6 +1267,9 @@
         break;
 
       case T_REGEXP:
+        gc_mark(objspace, obj->as.regexp.src, lev);
+        break;
+
       case T_FLOAT:
       case T_BIGNUM:
 	break;
@@ -1505,9 +1508,6 @@
 	if (RANY(obj)->as.regexp.ptr) {
 	    onig_free(RANY(obj)->as.regexp.ptr);
 	}
-	if (RANY(obj)->as.regexp.str) {
-	    xfree(RANY(obj)->as.regexp.str);
-	}
 	break;
       case T_DATA:
 	if (DATA_PTR(obj)) {
Index: marshal.c
===================================================================
--- marshal.c	(revision 17634)
+++ marshal.c	(revision 17635)
@@ -693,10 +693,13 @@
 	    break;
 
 	  case T_REGEXP:
-	    w_uclass(obj, rb_cRegexp, arg);
-	    w_byte(TYPE_REGEXP, arg);
-	    w_bytes(RREGEXP(obj)->str, RREGEXP(obj)->len, arg);
-	    w_byte((char)rb_reg_options(obj), arg);
+            w_uclass(obj, rb_cRegexp, arg);
+            w_byte(TYPE_REGEXP, arg);
+            {
+                int opts = rb_reg_options(obj);
+                w_bytes(RREGEXP_SRC_PTR(obj), RREGEXP_SRC_LEN(obj), arg);
+                w_byte((char)opts, arg);
+            }
 	    break;
 
 	  case T_ARRAY:

--
ML: ruby-changes@q...
Info: http://www.atdot.net/~ko1/quickml/

[前][次][番号順一覧][スレッド一覧]