ruby-changes:1804
From: ko1@a...
Date: 28 Aug 2007 15:45:44 +0900
Subject: [ruby-changes:1804] matz - Ruby:r13295 (trunk): * string.c (rb_str_subseq): retrieve substring based on byte offset.
matz 2007-08-28 15:45:32 +0900 (Tue, 28 Aug 2007) New Revision: 13295 Modified files: trunk/ChangeLog trunk/include/ruby/intern.h trunk/re.c trunk/string.c Log: * string.c (rb_str_subseq): retrieve substring based on byte offset. * string.c (rb_str_rindex_m): was confusing character offset and byte offset. http://svn.ruby-lang.org/cgi-bin/viewvc.cgi/trunk/string.c?r1=13295&r2=13294 http://svn.ruby-lang.org/cgi-bin/viewvc.cgi/trunk/ChangeLog?r1=13295&r2=13294 http://svn.ruby-lang.org/cgi-bin/viewvc.cgi/trunk/re.c?r1=13295&r2=13294 http://svn.ruby-lang.org/cgi-bin/viewvc.cgi/trunk/include/ruby/intern.h?r1=13295&r2=13294 Index: include/ruby/intern.h =================================================================== --- include/ruby/intern.h (revision 13294) +++ include/ruby/intern.h (revision 13295) @@ -500,6 +500,7 @@ VALUE rb_str_times(VALUE, VALUE); int rb_str_sublen(VALUE, int); VALUE rb_str_substr(VALUE, long, long); +VALUE rb_str_subseq(VALUE, long, long); void rb_str_modify(VALUE); VALUE rb_str_freeze(VALUE); void rb_str_set_len(VALUE, long); Index: re.c =================================================================== --- re.c (revision 13294) +++ re.c (revision 13295) @@ -1114,7 +1114,7 @@ if (start == -1) return Qnil; end = RMATCH(match)->END(nth); len = end - start; - str = rb_str_substr(RMATCH(match)->str, start, len); + str = rb_str_subseq(RMATCH(match)->str, start, len); OBJ_INFECT(str, match); return str; } @@ -1144,7 +1144,7 @@ if (NIL_P(match)) return Qnil; if (RMATCH(match)->BEG(0) == -1) return Qnil; - str = rb_str_substr(RMATCH(match)->str, 0, RMATCH(match)->BEG(0)); + str = rb_str_subseq(RMATCH(match)->str, 0, RMATCH(match)->BEG(0)); if (OBJ_TAINTED(match)) OBJ_TAINT(str); return str; } @@ -1171,7 +1171,7 @@ if (RMATCH(match)->BEG(0) == -1) return Qnil; str = RMATCH(match)->str; pos = RMATCH(match)->END(0); - str = rb_str_substr(str, pos, RSTRING_LEN(str) - pos); + str = rb_str_subseq(str, pos, RSTRING_LEN(str) - pos); if (OBJ_TAINTED(match)) OBJ_TAINT(str); return str; } @@ -1228,7 +1228,7 @@ rb_ary_push(ary, Qnil); } else { - VALUE str = rb_str_substr(target, regs->beg[i], regs->end[i]-regs->beg[i]); + VALUE str = rb_str_subseq(target, regs->beg[i], regs->end[i]-regs->beg[i]); if (taint) OBJ_TAINT(str); rb_ary_push(ary, str); } @@ -1423,7 +1423,7 @@ int taint = OBJ_TAINTED(match); for (i=0; i<regs->num_regs; i++) { - VALUE str = rb_str_substr(target, regs->beg[i], regs->end[i]-regs->beg[i]); + VALUE str = rb_str_subseq(target, regs->beg[i], regs->end[i]-regs->beg[i]); if (taint) OBJ_TAINT(str); if (RTEST(rb_yield(str))) { rb_ary_push(result, str); Index: ChangeLog =================================================================== --- ChangeLog (revision 13294) +++ ChangeLog (revision 13295) @@ -1,3 +1,10 @@ +Tue Aug 28 15:42:09 2007 Yukihiro Matsumoto <matz@r...> + + * string.c (rb_str_subseq): retrieve substring based on byte offset. + + * string.c (rb_str_rindex_m): was confusing character offset and + byte offset. + Tue Aug 28 14:23:31 2007 Nobuyoshi Nakada <nobu@r...> * string.c (rb_str_splice_0): should check to modify. [ruby-dev:31665] Index: string.c =================================================================== --- string.c (revision 13294) +++ string.c (revision 13295) @@ -400,10 +400,10 @@ return str; } -static int +static long str_strlen(VALUE str, rb_encoding *enc) { - int len; + long len; if (!enc) enc = rb_enc_get(str); len = rb_enc_strlen(RSTRING_PTR(str), RSTRING_END(str), enc); @@ -696,14 +696,14 @@ return pp - p; } -static int -str_sublen(VALUE str, int pos, rb_encoding *enc) +static long +str_sublen(VALUE str, long pos, rb_encoding *enc) { if (rb_enc_mbmaxlen(enc) == 1 || pos < 0) return pos; else { char *p = RSTRING_PTR(str); char *e = p + pos; - int i; + long i; i = 0; while (p < e) { @@ -721,6 +721,17 @@ } VALUE +rb_str_subseq(VALUE str, long beg, long len) +{ + VALUE str2 = rb_str_new5(str, RSTRING_PTR(str)+beg, len); + + rb_enc_copy(str2, str); + OBJ_INFECT(str2, str); + + return str2; +} + +VALUE rb_str_substr(VALUE str, long beg, long len) { rb_encoding *enc = rb_enc_get(str); @@ -1439,12 +1450,13 @@ { VALUE sub; VALUE vpos; - long pos; + rb_encoding *enc = rb_enc_get(str); + long pos, len = str_strlen(str, enc); if (rb_scan_args(argc, argv, "11", &sub, &vpos) == 2) { pos = NUM2LONG(vpos); if (pos < 0) { - pos += RSTRING_LEN(str); + pos += len; if (pos < 0) { if (TYPE(sub) == T_REGEXP) { rb_backref_set(Qnil); @@ -1452,17 +1464,15 @@ return Qnil; } } - if (pos > RSTRING_LEN(str)) pos = RSTRING_LEN(str); + if (pos > len) pos = len; } else { - pos = RSTRING_LEN(str); + pos = len; } switch (TYPE(sub)) { case T_REGEXP: /* enc = rb_get_check(str, sub); */ - pos++; /* xxx adjust for Oniguruma 5.x */ - pos = str_offset(RSTRING_PTR(str), RSTRING_END(str), pos, rb_enc_get(str)); if (RREGEXP(sub)->len) { pos = rb_reg_adjust_startpos(sub, str, pos, 1); pos = rb_reg_search(sub, str, pos, 1); @@ -1483,6 +1493,7 @@ } /* fall through */ case T_STRING: + pos = str_sublen(str, pos, enc); pos = rb_str_rindex(str, sub, pos); if (pos >= 0) return LONG2NUM(pos); break; @@ -2076,6 +2087,7 @@ for (i=0; i<argc; i++) { buf[i] = argv[i]; } + rb_str_modify(str); buf[i] = rb_str_new(0,0); result = rb_str_aref_m(argc, buf, str); if (!NIL_P(result)) { @@ -3786,7 +3798,7 @@ break; } else if (last_null == 1) { - rb_ary_push(result, rb_str_substr(str, beg, + rb_ary_push(result, rb_str_subseq(str, beg, rb_enc_mbclen(RSTRING_PTR(str)+beg,enc))); beg = start; } @@ -3797,7 +3809,7 @@ } } else { - rb_ary_push(result, rb_str_substr(str, beg, end-beg)); + rb_ary_push(result, rb_str_subseq(str, beg, end-beg)); beg = start = END(0); } last_null = 0; @@ -3807,7 +3819,7 @@ if (BEG(idx) == END(idx)) tmp = rb_str_new5(str, 0, 0); else - tmp = rb_str_substr(str, BEG(idx), END(idx)-BEG(idx)); + tmp = rb_str_subseq(str, BEG(idx), END(idx)-BEG(idx)); rb_ary_push(result, tmp); } if (!NIL_P(limit) && lim <= ++i) break; @@ -3817,7 +3829,7 @@ if (RSTRING_LEN(str) == beg) tmp = rb_str_new5(str, 0, 0); else - tmp = rb_str_substr(str, beg, RSTRING_LEN(str)-beg); + tmp = rb_str_subseq(str, beg, RSTRING_LEN(str)-beg); rb_ary_push(result, tmp); } if (NIL_P(limit) && lim == 0) { -- ML: ruby-changes@q... Info: http://www.atdot.net/~ko1/quickml