[前][次][番号順一覧][スレッド一覧]

ruby-changes:2680

From: ko1@a...
Date: 10 Dec 2007 14:13:28 +0900
Subject: [ruby-changes:2680] nobu - Ruby:r14171 (trunk): * re.c (rb_reg_search): return byte offset.

nobu	2007-12-10 13:50:35 +0900 (Mon, 10 Dec 2007)

  New Revision: 14171

  Modified files:
    trunk/ChangeLog
    trunk/re.c
    trunk/string.c

  Log:
    * re.c (rb_reg_search): return byte offset.  [ruby-dev:32452]
    
    * re.c (rb_reg_match, rb_reg_match2, rb_reg_match_m): convert byte
      offset to char index.
    
    * string.c (rb_str_index): return byte offset.  [ruby-dev:32472]
    
    * string.c (rb_str_split_m): calculate in byte offset.


  http://svn.ruby-lang.org/cgi-bin/viewvc.cgi/trunk/string.c?r1=14171&r2=14170
  http://svn.ruby-lang.org/cgi-bin/viewvc.cgi/trunk/ChangeLog?r1=14171&r2=14170
  http://svn.ruby-lang.org/cgi-bin/viewvc.cgi/trunk/re.c?r1=14171&r2=14170

Index: re.c
===================================================================
--- re.c	(revision 14170)
+++ re.c	(revision 14171)
@@ -1070,7 +1070,7 @@
     OBJ_INFECT(match, re);
     OBJ_INFECT(match, str);
 
-    return rb_str_sublen(RMATCH(match)->str, result);
+    return result;
 }
 
 VALUE
@@ -2123,28 +2123,24 @@
     }
 }
 
-static VALUE
+static long
 rb_reg_match_pos(VALUE re, VALUE str, long pos)
 {
     if (NIL_P(str)) {
 	rb_backref_set(Qnil);
-	return Qnil;
+	return -1;
     }
     str = reg_operand(str, Qtrue);
     if (pos != 0) {
 	if (pos < 0) {
 	    pos += RSTRING_LEN(str);
 	    if (pos < 0) {
-		return Qnil;
+		return pos;
 	    }
 	}
 	pos = rb_reg_adjust_startpos(re, str, pos, 0);
     }
-    pos = rb_reg_search(re, str, pos, 0);
-    if (pos < 0) {
-	return Qnil;
-    }
-    return LONG2FIX(pos);
+    return rb_reg_search(re, str, pos, 0);
 }
 
 /*
@@ -2160,7 +2156,10 @@
 VALUE
 rb_reg_match(VALUE re, VALUE str)
 {
-    return rb_reg_match_pos(re, str, 0);
+    long pos = rb_reg_match_pos(re, str, 0);
+    if (pos < 0) return Qnil;
+    pos = rb_str_sublen(str, pos);
+    return LONG2FIX(pos);
 }
 
 /*
@@ -2225,6 +2224,7 @@
     if (start < 0) {
 	return Qnil;
     }
+    start = rb_str_sublen(line, start);
     return LONG2FIX(start);
 }
 
@@ -2270,8 +2270,8 @@
 	pos = 0;
     }
 
-    result = rb_reg_match_pos(re, str, pos);
-    if (NIL_P(result)) {
+    pos = rb_reg_match_pos(re, str, pos);
+    if (pos < 0) {
 	rb_backref_set(Qnil);
 	return Qnil;
     }
Index: ChangeLog
===================================================================
--- ChangeLog	(revision 14170)
+++ ChangeLog	(revision 14171)
@@ -1,3 +1,14 @@
+Mon Dec 10 13:50:33 2007  Nobuyoshi Nakada  <nobu@r...>
+
+	* re.c (rb_reg_search): return byte offset.  [ruby-dev:32452]
+
+	* re.c (rb_reg_match, rb_reg_match2, rb_reg_match_m): convert byte
+	  offset to char index.
+
+	* string.c (rb_str_index): return byte offset.  [ruby-dev:32472]
+
+	* string.c (rb_str_split_m): calculate in byte offset.
+
 Mon Dec 10 09:56:29 2007  Koichi Sasada  <ko1@a...>
 
 	* benchmark/bm_vm1_neq.rb, bm_vm1_not.rb: added.
Index: string.c
===================================================================
--- string.c	(revision 14170)
+++ string.c	(revision 14171)
@@ -1412,7 +1412,7 @@
 rb_str_index(VALUE str, VALUE sub, long offset)
 {
     long pos;
-    char *s;
+    char *s, *sptr;
     long len, slen;
     rb_encoding *enc;
 
@@ -1424,12 +1424,27 @@
 	if (offset < 0) return -1;
     }
     if (len - offset < slen) return -1;
+    s = RSTRING_PTR(str);
+    if (offset) {
+	s = str_nth(s, RSTRING_END(str), offset, enc);
+	offset = s - RSTRING_PTR(str);
+    }
     if (slen == 0) return offset;
-    s = offset ? str_nth(RSTRING_PTR(str), RSTRING_END(str), offset, enc) : RSTRING_PTR(str);
     /* need proceed one character at a time */
-    pos = rb_memsearch(RSTRING_PTR(sub), RSTRING_LEN(sub),
-		       s, RSTRING_LEN(str)-(s - RSTRING_PTR(str)));
-    if (pos < 0) return pos;
+    sptr = RSTRING_PTR(sub);
+    slen = RSTRING_LEN(sub);
+    len = RSTRING_LEN(str) - offset;
+    for (;;) {
+	char *t;
+	pos = rb_memsearch(sptr, slen, s, len);
+	if (pos < 0) return pos;
+	t = (char *)onigenc_get_right_adjust_char_head(enc, (const UChar *)s,
+						       (const UChar *)s + pos);
+	if (t == s) break;
+	if ((len -= t - s) <= 0) return -1;
+	offset += t - s;
+	s = t;
+    }
     return pos + offset;
 }
 
@@ -4024,34 +4039,35 @@
     if (awk_split) {
 	char *ptr = RSTRING_PTR(str);
 	char *eptr = RSTRING_END(str);
+	char *bptr = ptr;
 	int skip = 1;
 	int c;
 
 	end = beg;
 	while (ptr < eptr) {
 	    c = rb_enc_codepoint(ptr, eptr, enc);
+	    ptr += rb_enc_mbclen(ptr, eptr, enc);
 	    if (skip) {
 		if (rb_enc_isspace(c, enc)) {
-		    beg++;
+		    beg = ptr - bptr;
 		}
 		else {
-		    end = beg+1;
+		    end = ptr - bptr;
 		    skip = 0;
 		    if (!NIL_P(limit) && lim <= i) break;
 		}
 	    }
 	    else {
 		if (rb_enc_isspace(c, enc)) {
-		    rb_ary_push(result, rb_str_substr(str, beg, end-beg));
+		    rb_ary_push(result, rb_str_subseq(str, beg, end-beg));
 		    skip = 1;
-		    beg = end + 1;
+		    beg = ptr - bptr;
 		    if (!NIL_P(limit)) ++i;
 		}
 		else {
-		    end++;
+		    end = ptr - bptr;
 		}
 	    }
-	    ptr += rb_enc_codelen(c, enc);
 	}
     }
     else {
@@ -5114,14 +5130,13 @@
       failed:
 	return rb_ary_new3(3, str, rb_str_new(0,0),rb_str_new(0,0));
     }
-    pos = rb_str_sublen(str, pos);
     if (regex) {
 	sep = rb_str_subpat(str, sep, 0);
 	if (pos == 0 && RSTRING_LEN(sep) == 0) goto failed;
     }
-    return rb_ary_new3(3, rb_str_substr(str, 0, pos),
+    return rb_ary_new3(3, rb_str_subseq(str, 0, pos),
 		          sep,
-		          rb_str_substr(str, pos+RSTRING_LEN(sep),
+		          rb_str_subseq(str, pos+RSTRING_LEN(sep),
 					     RSTRING_LEN(str)-pos-RSTRING_LEN(sep)));
 }
 
@@ -5162,13 +5177,12 @@
     if (pos < 0) {
 	return rb_ary_new3(3, rb_str_new(0,0),rb_str_new(0,0), str);
     }
-    pos = rb_str_sublen(str, pos);
     if (regex) {
 	sep = rb_reg_nth_match(0, rb_backref_get());
     }
-    return rb_ary_new3(3, rb_str_substr(str, 0, pos),
+    return rb_ary_new3(3, rb_str_subseq(str, 0, pos),
 		          sep,
-		          rb_str_substr(str, pos+RSTRING_LEN(sep),
+		          rb_str_subseq(str, pos+RSTRING_LEN(sep),
 					     RSTRING_LEN(str)-pos-RSTRING_LEN(sep)));
 }
 

--
ML: ruby-changes@q...
Info: http://www.atdot.net/~ko1/quickml

[前][次][番号順一覧][スレッド一覧]