[前][次][番号順一覧][スレッド一覧]

ruby-changes:1804

From: ko1@a...
Date: 28 Aug 2007 15:45:44 +0900
Subject: [ruby-changes:1804] matz - Ruby:r13295 (trunk): * string.c (rb_str_subseq): retrieve substring based on byte offset.

matz	2007-08-28 15:45:32 +0900 (Tue, 28 Aug 2007)

  New Revision: 13295

  Modified files:
    trunk/ChangeLog
    trunk/include/ruby/intern.h
    trunk/re.c
    trunk/string.c

  Log:
    * string.c (rb_str_subseq): retrieve substring based on byte offset.
    
    * string.c (rb_str_rindex_m): was confusing character offset and
      byte offset.

  http://svn.ruby-lang.org/cgi-bin/viewvc.cgi/trunk/string.c?r1=13295&r2=13294
  http://svn.ruby-lang.org/cgi-bin/viewvc.cgi/trunk/ChangeLog?r1=13295&r2=13294
  http://svn.ruby-lang.org/cgi-bin/viewvc.cgi/trunk/re.c?r1=13295&r2=13294
  http://svn.ruby-lang.org/cgi-bin/viewvc.cgi/trunk/include/ruby/intern.h?r1=13295&r2=13294

Index: include/ruby/intern.h
===================================================================
--- include/ruby/intern.h	(revision 13294)
+++ include/ruby/intern.h	(revision 13295)
@@ -500,6 +500,7 @@
 VALUE rb_str_times(VALUE, VALUE);
 int rb_str_sublen(VALUE, int);
 VALUE rb_str_substr(VALUE, long, long);
+VALUE rb_str_subseq(VALUE, long, long);
 void rb_str_modify(VALUE);
 VALUE rb_str_freeze(VALUE);
 void rb_str_set_len(VALUE, long);
Index: re.c
===================================================================
--- re.c	(revision 13294)
+++ re.c	(revision 13295)
@@ -1114,7 +1114,7 @@
     if (start == -1) return Qnil;
     end = RMATCH(match)->END(nth);
     len = end - start;
-    str = rb_str_substr(RMATCH(match)->str, start, len);
+    str = rb_str_subseq(RMATCH(match)->str, start, len);
     OBJ_INFECT(str, match);
     return str;
 }
@@ -1144,7 +1144,7 @@
 
     if (NIL_P(match)) return Qnil;
     if (RMATCH(match)->BEG(0) == -1) return Qnil;
-    str = rb_str_substr(RMATCH(match)->str, 0, RMATCH(match)->BEG(0));
+    str = rb_str_subseq(RMATCH(match)->str, 0, RMATCH(match)->BEG(0));
     if (OBJ_TAINTED(match)) OBJ_TAINT(str);
     return str;
 }
@@ -1171,7 +1171,7 @@
     if (RMATCH(match)->BEG(0) == -1) return Qnil;
     str = RMATCH(match)->str;
     pos = RMATCH(match)->END(0);
-    str = rb_str_substr(str, pos, RSTRING_LEN(str) - pos);
+    str = rb_str_subseq(str, pos, RSTRING_LEN(str) - pos);
     if (OBJ_TAINTED(match)) OBJ_TAINT(str);
     return str;
 }
@@ -1228,7 +1228,7 @@
 	    rb_ary_push(ary, Qnil);
 	}
 	else {
-	    VALUE str = rb_str_substr(target, regs->beg[i], regs->end[i]-regs->beg[i]);
+	    VALUE str = rb_str_subseq(target, regs->beg[i], regs->end[i]-regs->beg[i]);
 	    if (taint) OBJ_TAINT(str);
 	    rb_ary_push(ary, str);
 	}
@@ -1423,7 +1423,7 @@
 	int taint = OBJ_TAINTED(match);
 
 	for (i=0; i<regs->num_regs; i++) {
-	    VALUE str = rb_str_substr(target, regs->beg[i], regs->end[i]-regs->beg[i]);
+	    VALUE str = rb_str_subseq(target, regs->beg[i], regs->end[i]-regs->beg[i]);
 	    if (taint) OBJ_TAINT(str);
 	    if (RTEST(rb_yield(str))) {
 		rb_ary_push(result, str);
Index: ChangeLog
===================================================================
--- ChangeLog	(revision 13294)
+++ ChangeLog	(revision 13295)
@@ -1,3 +1,10 @@
+Tue Aug 28 15:42:09 2007  Yukihiro Matsumoto  <matz@r...>
+
+	* string.c (rb_str_subseq): retrieve substring based on byte offset.
+
+	* string.c (rb_str_rindex_m): was confusing character offset and
+	  byte offset.
+
 Tue Aug 28 14:23:31 2007  Nobuyoshi Nakada  <nobu@r...>
 
 	* string.c (rb_str_splice_0): should check to modify.  [ruby-dev:31665]
Index: string.c
===================================================================
--- string.c	(revision 13294)
+++ string.c	(revision 13295)
@@ -400,10 +400,10 @@
     return str;
 }
 
-static int
+static long
 str_strlen(VALUE str, rb_encoding *enc)
 {
-    int len;
+    long len;
 
     if (!enc) enc = rb_enc_get(str);
     len = rb_enc_strlen(RSTRING_PTR(str), RSTRING_END(str), enc);
@@ -696,14 +696,14 @@
     return pp - p;
 }
 
-static int
-str_sublen(VALUE str, int pos, rb_encoding *enc)
+static long
+str_sublen(VALUE str, long pos, rb_encoding *enc)
 {
     if (rb_enc_mbmaxlen(enc) == 1 || pos < 0) return pos;
     else {
 	char *p = RSTRING_PTR(str);
 	char *e = p + pos;
-	int i;
+	long i;
 
 	i = 0;
 	while (p < e) {
@@ -721,6 +721,17 @@
 }
 
 VALUE
+rb_str_subseq(VALUE str, long beg, long len)
+{
+    VALUE str2 = rb_str_new5(str, RSTRING_PTR(str)+beg, len);
+
+    rb_enc_copy(str2, str);
+    OBJ_INFECT(str2, str);
+
+    return str2;
+}
+
+VALUE
 rb_str_substr(VALUE str, long beg, long len)
 {
     rb_encoding *enc = rb_enc_get(str);
@@ -1439,12 +1450,13 @@
 {
     VALUE sub;
     VALUE vpos;
-    long pos;
+    rb_encoding *enc = rb_enc_get(str);
+    long pos, len = str_strlen(str, enc);
 
     if (rb_scan_args(argc, argv, "11", &sub, &vpos) == 2) {
 	pos = NUM2LONG(vpos);
 	if (pos < 0) {
-	    pos += RSTRING_LEN(str);
+	    pos += len;
 	    if (pos < 0) {
 		if (TYPE(sub) == T_REGEXP) {
 		    rb_backref_set(Qnil);
@@ -1452,17 +1464,15 @@
 		return Qnil;
 	    }
 	}
-	if (pos > RSTRING_LEN(str)) pos = RSTRING_LEN(str);
+	if (pos > len) pos = len;
     }
     else {
-	pos = RSTRING_LEN(str);
+	pos = len;
     }
 
     switch (TYPE(sub)) {
       case T_REGEXP:
 	/* enc = rb_get_check(str, sub); */
-	pos++; 			/* xxx adjust for Oniguruma 5.x */
-	pos = str_offset(RSTRING_PTR(str), RSTRING_END(str), pos, rb_enc_get(str));
 	if (RREGEXP(sub)->len) {
 	    pos = rb_reg_adjust_startpos(sub, str, pos, 1);
 	    pos = rb_reg_search(sub, str, pos, 1);
@@ -1483,6 +1493,7 @@
       }
 	/* fall through */
       case T_STRING:
+	pos = str_sublen(str, pos, enc);
 	pos = rb_str_rindex(str, sub, pos);
 	if (pos >= 0) return LONG2NUM(pos);
 	break;
@@ -2076,6 +2087,7 @@
     for (i=0; i<argc; i++) {
 	buf[i] = argv[i];
     }
+    rb_str_modify(str);
     buf[i] = rb_str_new(0,0);
     result = rb_str_aref_m(argc, buf, str);
     if (!NIL_P(result)) {
@@ -3786,7 +3798,7 @@
 		    break;
 		}
 		else if (last_null == 1) {
-		    rb_ary_push(result, rb_str_substr(str, beg,
+		    rb_ary_push(result, rb_str_subseq(str, beg,
 						      rb_enc_mbclen(RSTRING_PTR(str)+beg,enc)));
 		    beg = start;
 		}
@@ -3797,7 +3809,7 @@
 		}
 	    }
 	    else {
-		rb_ary_push(result, rb_str_substr(str, beg, end-beg));
+		rb_ary_push(result, rb_str_subseq(str, beg, end-beg));
 		beg = start = END(0);
 	    }
 	    last_null = 0;
@@ -3807,7 +3819,7 @@
 		if (BEG(idx) == END(idx))
 		    tmp = rb_str_new5(str, 0, 0);
 		else
-		    tmp = rb_str_substr(str, BEG(idx), END(idx)-BEG(idx));
+		    tmp = rb_str_subseq(str, BEG(idx), END(idx)-BEG(idx));
 		rb_ary_push(result, tmp);
 	    }
 	    if (!NIL_P(limit) && lim <= ++i) break;
@@ -3817,7 +3829,7 @@
 	if (RSTRING_LEN(str) == beg)
 	    tmp = rb_str_new5(str, 0, 0);
 	else
-	    tmp = rb_str_substr(str, beg, RSTRING_LEN(str)-beg);
+	    tmp = rb_str_subseq(str, beg, RSTRING_LEN(str)-beg);
 	rb_ary_push(result, tmp);
     }
     if (NIL_P(limit) && lim == 0) {

--
ML: ruby-changes@q...
Info: http://www.atdot.net/~ko1/quickml

[前][次][番号順一覧][スレッド一覧]