ruby-changes:18611
From: nobu <ko1@a...>
Date: Sun, 23 Jan 2011 08:00:27 +0900 (JST)
Subject: [ruby-changes:18611] Ruby:r30635 (trunk): * string.c (str_nth_len, str_utf8_nth): return the rest length together.
nobu 2011-01-23 08:00:12 +0900 (Sun, 23 Jan 2011) New Revision: 30635 http://svn.ruby-lang.org/cgi-bin/viewvc.cgi?view=rev&revision=30635 Log: * string.c (str_nth_len, str_utf8_nth): return the rest length together. * string.c (rb_str_substr): get rid of measure the length always to improve performance for huge string. Modified files: trunk/ChangeLog trunk/string.c Index: ChangeLog =================================================================== --- ChangeLog (revision 30634) +++ ChangeLog (revision 30635) @@ -1,3 +1,10 @@ +Sun Jan 23 08:00:09 2011 Nobuyoshi Nakada <nobu@r...> + + * string.c (str_nth_len, str_utf8_nth): return the rest length together. + + * string.c (rb_str_substr): get rid of measure the length always + to improve performance for huge string. + Sun Jan 23 00:40:10 2011 KOSAKI Motohiro <kosaki.motohiro@g...> * test/test_syslog.rb: Fix to make a lot of test failure if @@ -9,7 +16,7 @@ when multiple merge keys are specified. * test/psych/test_merge_keys.rb: tests for multi-merge key support - + Sat Jan 22 11:33:04 2011 Aaron Patterson <aaron@t...> * ext/psych/lib/psych/visitors/to_ruby.rb: merge keys are actually @@ -29,7 +36,7 @@ * ext/psych/parser.c (parse): fix assertion error when reusing a parser after an exception has been raised - + * test/psych/test_parser.rb: test for assertion error Sat Jan 22 04:09:22 2011 Aaron Patterson <aaron@t...> Index: string.c =================================================================== --- string.c (revision 30634) +++ string.c (revision 30635) @@ -1420,9 +1420,10 @@ return rb_check_string_type(str); } -char* -rb_enc_nth(const char *p, const char *e, long nth, rb_encoding *enc) +static char* +str_nth_len(const char *p, const char *e, long *nthp, rb_encoding *enc) { + long nth = *nthp; if (rb_enc_mbmaxlen(enc) == 1) { p += nth; } @@ -1435,12 +1436,16 @@ while (p < e && 0 < nth) { e2 = p + nth; - if (e < e2) + if (e < e2) { + *nthp = nth; return (char *)e; + } if (ISASCII(*p)) { p2 = search_nonascii(p, e2); - if (!p2) + if (!p2) { + *nthp = nth; return (char *)e2; + } nth -= p2 - p; p = p2; } @@ -1448,26 +1453,35 @@ p += n; nth--; } - if (nth != 0) + *nthp = nth; + if (nth != 0) { return (char *)e; + } return (char *)p; } else { - while (p<e && nth--) { + while (p < e && nth--) { p += rb_enc_mbclen(p, e, enc); } } if (p > e) p = e; + *nthp = nth; return (char*)p; } +char* +rb_enc_nth(const char *p, const char *e, long nth, rb_encoding *enc) +{ + return str_nth_len(p, e, &nth, enc); +} + static char* str_nth(const char *p, const char *e, long nth, rb_encoding *enc, int singlebyte) { if (singlebyte) p += nth; else { - p = rb_enc_nth(p, e, nth, enc); + p = str_nth_len(p, e, &nth, enc); } if (!p) return 0; if (p > e) p = e; @@ -1492,8 +1506,9 @@ #ifdef NONASCII_MASK static char * -str_utf8_nth(const char *p, const char *e, long nth) +str_utf8_nth(const char *p, const char *e, long *nthp) { + long nth = *nthp; if ((int)SIZEOF_VALUE < e - p && (int)SIZEOF_VALUE * 2 < nth) { const VALUE *s, *t; const VALUE lowbits = sizeof(VALUE) - 1; @@ -1516,13 +1531,14 @@ } p++; } + *nthp = nth; return (char *)p; } static long str_utf8_offset(const char *p, const char *e, long nth) { - const char *pp = str_utf8_nth(p, e, nth); + const char *pp = str_utf8_nth(p, e, &nth); return pp - p; } #endif @@ -1603,16 +1619,18 @@ if (beg < 0) return Qnil; } } - else if (beg > 0 && beg > str_strlen(str, enc)) { + else if (beg > 0 && beg > RSTRING_LEN(str)) { return Qnil; } if (len == 0) { + if (beg > str_strlen(str, enc)) return Qnil; p = 0; } #ifdef NONASCII_MASK else if (ENC_CODERANGE(str) == ENC_CODERANGE_VALID && enc == rb_utf8_encoding()) { - p = str_utf8_nth(s, e, beg); + p = str_utf8_nth(s, e, &beg); + if (beg > 0) return Qnil; len = str_utf8_offset(p, e, len); } #endif @@ -1621,15 +1639,15 @@ p = s + beg * char_sz; if (p > e) { - p = e; - len = 0; + return Qnil; } else if (len * char_sz > e - p) len = e - p; else len *= char_sz; } - else if ((p = str_nth(s, e, beg, enc, 0)) == e) { + else if ((p = str_nth_len(s, e, &beg, enc)) == e) { + if (beg > 0) return Qnil; len = 0; } else { -- ML: ruby-changes@q... Info: http://www.atdot.net/~ko1/quickml/