ruby-changes:2680
From: ko1@a...
Date: 10 Dec 2007 14:13:28 +0900
Subject: [ruby-changes:2680] nobu - Ruby:r14171 (trunk): * re.c (rb_reg_search): return byte offset.
nobu 2007-12-10 13:50:35 +0900 (Mon, 10 Dec 2007)
New Revision: 14171
Modified files:
trunk/ChangeLog
trunk/re.c
trunk/string.c
Log:
* re.c (rb_reg_search): return byte offset. [ruby-dev:32452]
* re.c (rb_reg_match, rb_reg_match2, rb_reg_match_m): convert byte
offset to char index.
* string.c (rb_str_index): return byte offset. [ruby-dev:32472]
* string.c (rb_str_split_m): calculate in byte offset.
http://svn.ruby-lang.org/cgi-bin/viewvc.cgi/trunk/string.c?r1=14171&r2=14170
http://svn.ruby-lang.org/cgi-bin/viewvc.cgi/trunk/ChangeLog?r1=14171&r2=14170
http://svn.ruby-lang.org/cgi-bin/viewvc.cgi/trunk/re.c?r1=14171&r2=14170
Index: re.c
===================================================================
--- re.c (revision 14170)
+++ re.c (revision 14171)
@@ -1070,7 +1070,7 @@
OBJ_INFECT(match, re);
OBJ_INFECT(match, str);
- return rb_str_sublen(RMATCH(match)->str, result);
+ return result;
}
VALUE
@@ -2123,28 +2123,24 @@
}
}
-static VALUE
+static long
rb_reg_match_pos(VALUE re, VALUE str, long pos)
{
if (NIL_P(str)) {
rb_backref_set(Qnil);
- return Qnil;
+ return -1;
}
str = reg_operand(str, Qtrue);
if (pos != 0) {
if (pos < 0) {
pos += RSTRING_LEN(str);
if (pos < 0) {
- return Qnil;
+ return pos;
}
}
pos = rb_reg_adjust_startpos(re, str, pos, 0);
}
- pos = rb_reg_search(re, str, pos, 0);
- if (pos < 0) {
- return Qnil;
- }
- return LONG2FIX(pos);
+ return rb_reg_search(re, str, pos, 0);
}
/*
@@ -2160,7 +2156,10 @@
VALUE
rb_reg_match(VALUE re, VALUE str)
{
- return rb_reg_match_pos(re, str, 0);
+ long pos = rb_reg_match_pos(re, str, 0);
+ if (pos < 0) return Qnil;
+ pos = rb_str_sublen(str, pos);
+ return LONG2FIX(pos);
}
/*
@@ -2225,6 +2224,7 @@
if (start < 0) {
return Qnil;
}
+ start = rb_str_sublen(line, start);
return LONG2FIX(start);
}
@@ -2270,8 +2270,8 @@
pos = 0;
}
- result = rb_reg_match_pos(re, str, pos);
- if (NIL_P(result)) {
+ pos = rb_reg_match_pos(re, str, pos);
+ if (pos < 0) {
rb_backref_set(Qnil);
return Qnil;
}
Index: ChangeLog
===================================================================
--- ChangeLog (revision 14170)
+++ ChangeLog (revision 14171)
@@ -1,3 +1,14 @@
+Mon Dec 10 13:50:33 2007 Nobuyoshi Nakada <nobu@r...>
+
+ * re.c (rb_reg_search): return byte offset. [ruby-dev:32452]
+
+ * re.c (rb_reg_match, rb_reg_match2, rb_reg_match_m): convert byte
+ offset to char index.
+
+ * string.c (rb_str_index): return byte offset. [ruby-dev:32472]
+
+ * string.c (rb_str_split_m): calculate in byte offset.
+
Mon Dec 10 09:56:29 2007 Koichi Sasada <ko1@a...>
* benchmark/bm_vm1_neq.rb, bm_vm1_not.rb: added.
Index: string.c
===================================================================
--- string.c (revision 14170)
+++ string.c (revision 14171)
@@ -1412,7 +1412,7 @@
rb_str_index(VALUE str, VALUE sub, long offset)
{
long pos;
- char *s;
+ char *s, *sptr;
long len, slen;
rb_encoding *enc;
@@ -1424,12 +1424,27 @@
if (offset < 0) return -1;
}
if (len - offset < slen) return -1;
+ s = RSTRING_PTR(str);
+ if (offset) {
+ s = str_nth(s, RSTRING_END(str), offset, enc);
+ offset = s - RSTRING_PTR(str);
+ }
if (slen == 0) return offset;
- s = offset ? str_nth(RSTRING_PTR(str), RSTRING_END(str), offset, enc) : RSTRING_PTR(str);
/* need proceed one character at a time */
- pos = rb_memsearch(RSTRING_PTR(sub), RSTRING_LEN(sub),
- s, RSTRING_LEN(str)-(s - RSTRING_PTR(str)));
- if (pos < 0) return pos;
+ sptr = RSTRING_PTR(sub);
+ slen = RSTRING_LEN(sub);
+ len = RSTRING_LEN(str) - offset;
+ for (;;) {
+ char *t;
+ pos = rb_memsearch(sptr, slen, s, len);
+ if (pos < 0) return pos;
+ t = (char *)onigenc_get_right_adjust_char_head(enc, (const UChar *)s,
+ (const UChar *)s + pos);
+ if (t == s) break;
+ if ((len -= t - s) <= 0) return -1;
+ offset += t - s;
+ s = t;
+ }
return pos + offset;
}
@@ -4024,34 +4039,35 @@
if (awk_split) {
char *ptr = RSTRING_PTR(str);
char *eptr = RSTRING_END(str);
+ char *bptr = ptr;
int skip = 1;
int c;
end = beg;
while (ptr < eptr) {
c = rb_enc_codepoint(ptr, eptr, enc);
+ ptr += rb_enc_mbclen(ptr, eptr, enc);
if (skip) {
if (rb_enc_isspace(c, enc)) {
- beg++;
+ beg = ptr - bptr;
}
else {
- end = beg+1;
+ end = ptr - bptr;
skip = 0;
if (!NIL_P(limit) && lim <= i) break;
}
}
else {
if (rb_enc_isspace(c, enc)) {
- rb_ary_push(result, rb_str_substr(str, beg, end-beg));
+ rb_ary_push(result, rb_str_subseq(str, beg, end-beg));
skip = 1;
- beg = end + 1;
+ beg = ptr - bptr;
if (!NIL_P(limit)) ++i;
}
else {
- end++;
+ end = ptr - bptr;
}
}
- ptr += rb_enc_codelen(c, enc);
}
}
else {
@@ -5114,14 +5130,13 @@
failed:
return rb_ary_new3(3, str, rb_str_new(0,0),rb_str_new(0,0));
}
- pos = rb_str_sublen(str, pos);
if (regex) {
sep = rb_str_subpat(str, sep, 0);
if (pos == 0 && RSTRING_LEN(sep) == 0) goto failed;
}
- return rb_ary_new3(3, rb_str_substr(str, 0, pos),
+ return rb_ary_new3(3, rb_str_subseq(str, 0, pos),
sep,
- rb_str_substr(str, pos+RSTRING_LEN(sep),
+ rb_str_subseq(str, pos+RSTRING_LEN(sep),
RSTRING_LEN(str)-pos-RSTRING_LEN(sep)));
}
@@ -5162,13 +5177,12 @@
if (pos < 0) {
return rb_ary_new3(3, rb_str_new(0,0),rb_str_new(0,0), str);
}
- pos = rb_str_sublen(str, pos);
if (regex) {
sep = rb_reg_nth_match(0, rb_backref_get());
}
- return rb_ary_new3(3, rb_str_substr(str, 0, pos),
+ return rb_ary_new3(3, rb_str_subseq(str, 0, pos),
sep,
- rb_str_substr(str, pos+RSTRING_LEN(sep),
+ rb_str_subseq(str, pos+RSTRING_LEN(sep),
RSTRING_LEN(str)-pos-RSTRING_LEN(sep)));
}
--
ML: ruby-changes@q...
Info: http://www.atdot.net/~ko1/quickml