ruby-changes:30887
From: glass <ko1@a...>
Date: Wed, 18 Sep 2013 23:34:18 +0900 (JST)
Subject: [ruby-changes:30887] glass:r42966 (trunk): * string.c (rb_str_enumerate_lines): make String#each_line and
glass 2013-09-18 23:34:04 +0900 (Wed, 18 Sep 2013) New Revision: 42966 http://svn.ruby-lang.org/cgi-bin/viewvc.cgi?view=rev&revision=42966 Log: * string.c (rb_str_enumerate_lines): make String#each_line and #lines not raise invalid byte sequence error when it is called with an argument. The patch also causes performance improvement. [ruby-dev:47549] [Bug #8698] * test/ruby/test_m17n_comb.rb (test_str_each_line): remove assertions which check that String#each_line and #lines will raise an error if the receiver includes invalid byte sequence. Modified files: trunk/ChangeLog trunk/string.c trunk/test/ruby/test_m17n_comb.rb Index: ChangeLog =================================================================== --- ChangeLog (revision 42965) +++ ChangeLog (revision 42966) @@ -1,3 +1,14 @@ https://github.com/ruby/ruby/blob/trunk/ChangeLog#L1 +Wed Sep 18 23:14:58 2013 Masaki Matsushita <glass.saga@g...> + + * string.c (rb_str_enumerate_lines): make String#each_line and + #lines not raise invalid byte sequence error when it is called + with an argument. The patch also causes performance improvement. + [ruby-dev:47549] [Bug #8698] + + * test/ruby/test_m17n_comb.rb (test_str_each_line): remove + assertions which check that String#each_line and #lines will + raise an error if the receiver includes invalid byte sequence. + Wed Sep 18 16:32:15 2013 Nobuyoshi Nakada <nobu@r...> * proc.c (mnew_from_me): allocate structs after allocated wrapper Index: string.c =================================================================== --- string.c (revision 42965) +++ string.c (revision 42966) @@ -6352,21 +6352,17 @@ static VALUE https://github.com/ruby/ruby/blob/trunk/string.c#L6352 rb_str_enumerate_lines(int argc, VALUE *argv, VALUE str, int wantarray) { rb_encoding *enc; - VALUE rs; - unsigned int newline; - const char *p, *pend, *s, *ptr; - long len, rslen; - VALUE line; - int n; - VALUE orig = str; + VALUE line, rs, orig = str; + const char *ptr, *pend, *subptr, *subend, *rsptr, *hit, *adjusted; + long pos, len, rslen; + int paragraph_mode = 0; + VALUE UNINITIALIZED_VAR(ary); - if (argc == 0) { + if (argc == 0) rs = rb_rs; - } - else { + else rb_scan_args(argc, argv, "01", &rs); - } if (rb_block_given_p()) { if (wantarray) { @@ -6396,76 +6392,63 @@ rb_str_enumerate_lines(int argc, VALUE * https://github.com/ruby/ruby/blob/trunk/string.c#L6392 return orig; } } + str = rb_str_new4(str); - ptr = p = s = RSTRING_PTR(str); - pend = p + RSTRING_LEN(str); + ptr = subptr = RSTRING_PTR(str); + pend = RSTRING_END(str); len = RSTRING_LEN(str); StringValue(rs); - if (rs == rb_default_rs) { - enc = rb_enc_get(str); - while (p < pend) { - char *p0; + rslen = RSTRING_LEN(rs); - p = memchr(p, '\n', pend - p); - if (!p) break; - p0 = rb_enc_left_char_head(s, p, pend, enc); - if (!rb_enc_is_newline(p0, pend, enc)) { - p++; - continue; - } - p = p0 + rb_enc_mbclen(p0, pend, enc); - line = rb_str_subseq(str, s - ptr, p - s); - if (wantarray) - rb_ary_push(ary, line); - else - rb_yield(line); - str_mod_check(str, ptr, len); - s = p; - } - goto finish; - } + if (rs == rb_default_rs) + enc = rb_enc_get(str); + else + enc = rb_enc_check(str, rs); - enc = rb_enc_check(str, rs); - rslen = RSTRING_LEN(rs); if (rslen == 0) { - newline = '\n'; + rsptr = "\n\n"; + rslen = 2; + paragraph_mode = 1; } else { - newline = rb_enc_codepoint(RSTRING_PTR(rs), RSTRING_END(rs), enc); + rsptr = RSTRING_PTR(rs); } - while (p < pend) { - unsigned int c = rb_enc_codepoint_len(p, pend, &n, enc); - - again: - if (rslen == 0 && c == newline) { - p += n; - if (p < pend && (c = rb_enc_codepoint_len(p, pend, &n, enc)) != newline) { - goto again; - } - while (p < pend && rb_enc_codepoint(p, pend, enc) == newline) { - p += n; + if ((rs == rb_default_rs || paragraph_mode) && !rb_enc_asciicompat(enc)) { + rs = rb_str_new(rsptr, rslen); + rs = rb_str_encode(rs, rb_enc_from_encoding(enc), 0, Qnil); + rsptr = RSTRING_PTR(rs); + rslen = RSTRING_LEN(rs); + } + + while (subptr < pend) { + pos = rb_memsearch(rsptr, rslen, subptr, pend - subptr, enc); + if (pos < 0) break; + hit = subptr + pos; + adjusted = rb_enc_right_char_head(subptr, hit, pend, enc); + if (hit != adjusted) { + subptr = adjusted; + continue; + } + subend = hit + rslen; + if (paragraph_mode) { + while (subend < pend && rb_enc_is_newline(subend, pend, enc)) { + subend += rb_enc_mbclen(subend, pend, enc); } - p -= n; } - if (c == newline && - (rslen <= 1 || - (pend - p >= rslen && memcmp(RSTRING_PTR(rs), p, rslen) == 0))) { - const char *pp = p + (rslen ? rslen : n); - line = rb_str_subseq(str, s - ptr, pp - s); - if (wantarray) - rb_ary_push(ary, line); - else - rb_yield(line); + line = rb_str_subseq(str, subptr - ptr, subend - subptr); + if (wantarray) { + rb_ary_push(ary, line); + } + else { + rb_yield(line); str_mod_check(str, ptr, len); - s = pp; } - p += n; + subptr = subend; } - finish: - if (s != pend) { - line = rb_str_subseq(str, s - ptr, pend - s); + if (subptr != pend) { + line = rb_str_subseq(str, subptr - ptr, pend - subptr); if (wantarray) rb_ary_push(ary, line); else Index: test/ruby/test_m17n_comb.rb =================================================================== --- test/ruby/test_m17n_comb.rb (revision 42965) +++ test/ruby/test_m17n_comb.rb (revision 42966) @@ -798,17 +798,12 @@ class TestM17NComb < Test::Unit::TestCas https://github.com/ruby/ruby/blob/trunk/test/ruby/test_m17n_comb.rb#L798 def test_str_each_line combination(STRINGS, STRINGS) {|s1, s2| - if !s1.valid_encoding? || !s2.valid_encoding? - assert_raise(ArgumentError, Encoding::CompatibilityError) { s1.each_line(s2) {} } - next - end if !s1.ascii_only? && !s2.ascii_only? && s1.encoding != s2.encoding assert_raise(Encoding::CompatibilityError) { s1.each_line(s2) {} } next end lines = [] enccall(s1, :each_line, s2) {|line| - assert(line.valid_encoding?) assert_equal(s1.encoding, line.encoding) lines << line } -- ML: ruby-changes@q... Info: http://www.atdot.net/~ko1/quickml/