ruby-changes:2680
From: ko1@a...
Date: 10 Dec 2007 14:13:28 +0900
Subject: [ruby-changes:2680] nobu - Ruby:r14171 (trunk): * re.c (rb_reg_search): return byte offset.
nobu 2007-12-10 13:50:35 +0900 (Mon, 10 Dec 2007) New Revision: 14171 Modified files: trunk/ChangeLog trunk/re.c trunk/string.c Log: * re.c (rb_reg_search): return byte offset. [ruby-dev:32452] * re.c (rb_reg_match, rb_reg_match2, rb_reg_match_m): convert byte offset to char index. * string.c (rb_str_index): return byte offset. [ruby-dev:32472] * string.c (rb_str_split_m): calculate in byte offset. http://svn.ruby-lang.org/cgi-bin/viewvc.cgi/trunk/string.c?r1=14171&r2=14170 http://svn.ruby-lang.org/cgi-bin/viewvc.cgi/trunk/ChangeLog?r1=14171&r2=14170 http://svn.ruby-lang.org/cgi-bin/viewvc.cgi/trunk/re.c?r1=14171&r2=14170 Index: re.c =================================================================== --- re.c (revision 14170) +++ re.c (revision 14171) @@ -1070,7 +1070,7 @@ OBJ_INFECT(match, re); OBJ_INFECT(match, str); - return rb_str_sublen(RMATCH(match)->str, result); + return result; } VALUE @@ -2123,28 +2123,24 @@ } } -static VALUE +static long rb_reg_match_pos(VALUE re, VALUE str, long pos) { if (NIL_P(str)) { rb_backref_set(Qnil); - return Qnil; + return -1; } str = reg_operand(str, Qtrue); if (pos != 0) { if (pos < 0) { pos += RSTRING_LEN(str); if (pos < 0) { - return Qnil; + return pos; } } pos = rb_reg_adjust_startpos(re, str, pos, 0); } - pos = rb_reg_search(re, str, pos, 0); - if (pos < 0) { - return Qnil; - } - return LONG2FIX(pos); + return rb_reg_search(re, str, pos, 0); } /* @@ -2160,7 +2156,10 @@ VALUE rb_reg_match(VALUE re, VALUE str) { - return rb_reg_match_pos(re, str, 0); + long pos = rb_reg_match_pos(re, str, 0); + if (pos < 0) return Qnil; + pos = rb_str_sublen(str, pos); + return LONG2FIX(pos); } /* @@ -2225,6 +2224,7 @@ if (start < 0) { return Qnil; } + start = rb_str_sublen(line, start); return LONG2FIX(start); } @@ -2270,8 +2270,8 @@ pos = 0; } - result = rb_reg_match_pos(re, str, pos); - if (NIL_P(result)) { + pos = rb_reg_match_pos(re, str, pos); + if (pos < 0) { rb_backref_set(Qnil); return Qnil; } Index: ChangeLog =================================================================== --- ChangeLog (revision 14170) +++ ChangeLog (revision 14171) @@ -1,3 +1,14 @@ +Mon Dec 10 13:50:33 2007 Nobuyoshi Nakada <nobu@r...> + + * re.c (rb_reg_search): return byte offset. [ruby-dev:32452] + + * re.c (rb_reg_match, rb_reg_match2, rb_reg_match_m): convert byte + offset to char index. + + * string.c (rb_str_index): return byte offset. [ruby-dev:32472] + + * string.c (rb_str_split_m): calculate in byte offset. + Mon Dec 10 09:56:29 2007 Koichi Sasada <ko1@a...> * benchmark/bm_vm1_neq.rb, bm_vm1_not.rb: added. Index: string.c =================================================================== --- string.c (revision 14170) +++ string.c (revision 14171) @@ -1412,7 +1412,7 @@ rb_str_index(VALUE str, VALUE sub, long offset) { long pos; - char *s; + char *s, *sptr; long len, slen; rb_encoding *enc; @@ -1424,12 +1424,27 @@ if (offset < 0) return -1; } if (len - offset < slen) return -1; + s = RSTRING_PTR(str); + if (offset) { + s = str_nth(s, RSTRING_END(str), offset, enc); + offset = s - RSTRING_PTR(str); + } if (slen == 0) return offset; - s = offset ? str_nth(RSTRING_PTR(str), RSTRING_END(str), offset, enc) : RSTRING_PTR(str); /* need proceed one character at a time */ - pos = rb_memsearch(RSTRING_PTR(sub), RSTRING_LEN(sub), - s, RSTRING_LEN(str)-(s - RSTRING_PTR(str))); - if (pos < 0) return pos; + sptr = RSTRING_PTR(sub); + slen = RSTRING_LEN(sub); + len = RSTRING_LEN(str) - offset; + for (;;) { + char *t; + pos = rb_memsearch(sptr, slen, s, len); + if (pos < 0) return pos; + t = (char *)onigenc_get_right_adjust_char_head(enc, (const UChar *)s, + (const UChar *)s + pos); + if (t == s) break; + if ((len -= t - s) <= 0) return -1; + offset += t - s; + s = t; + } return pos + offset; } @@ -4024,34 +4039,35 @@ if (awk_split) { char *ptr = RSTRING_PTR(str); char *eptr = RSTRING_END(str); + char *bptr = ptr; int skip = 1; int c; end = beg; while (ptr < eptr) { c = rb_enc_codepoint(ptr, eptr, enc); + ptr += rb_enc_mbclen(ptr, eptr, enc); if (skip) { if (rb_enc_isspace(c, enc)) { - beg++; + beg = ptr - bptr; } else { - end = beg+1; + end = ptr - bptr; skip = 0; if (!NIL_P(limit) && lim <= i) break; } } else { if (rb_enc_isspace(c, enc)) { - rb_ary_push(result, rb_str_substr(str, beg, end-beg)); + rb_ary_push(result, rb_str_subseq(str, beg, end-beg)); skip = 1; - beg = end + 1; + beg = ptr - bptr; if (!NIL_P(limit)) ++i; } else { - end++; + end = ptr - bptr; } } - ptr += rb_enc_codelen(c, enc); } } else { @@ -5114,14 +5130,13 @@ failed: return rb_ary_new3(3, str, rb_str_new(0,0),rb_str_new(0,0)); } - pos = rb_str_sublen(str, pos); if (regex) { sep = rb_str_subpat(str, sep, 0); if (pos == 0 && RSTRING_LEN(sep) == 0) goto failed; } - return rb_ary_new3(3, rb_str_substr(str, 0, pos), + return rb_ary_new3(3, rb_str_subseq(str, 0, pos), sep, - rb_str_substr(str, pos+RSTRING_LEN(sep), + rb_str_subseq(str, pos+RSTRING_LEN(sep), RSTRING_LEN(str)-pos-RSTRING_LEN(sep))); } @@ -5162,13 +5177,12 @@ if (pos < 0) { return rb_ary_new3(3, rb_str_new(0,0),rb_str_new(0,0), str); } - pos = rb_str_sublen(str, pos); if (regex) { sep = rb_reg_nth_match(0, rb_backref_get()); } - return rb_ary_new3(3, rb_str_substr(str, 0, pos), + return rb_ary_new3(3, rb_str_subseq(str, 0, pos), sep, - rb_str_substr(str, pos+RSTRING_LEN(sep), + rb_str_subseq(str, pos+RSTRING_LEN(sep), RSTRING_LEN(str)-pos-RSTRING_LEN(sep))); } -- ML: ruby-changes@q... Info: http://www.atdot.net/~ko1/quickml