ruby-changes:25781
From: knu <ko1@a...>
Date: Sun, 25 Nov 2012 03:46:31 +0900 (JST)
Subject: [ruby-changes:25781] knu:r37838 (trunk): String#{lines,chars,codepoints,bytes} now return an array.
knu 2012-11-25 03:46:15 +0900 (Sun, 25 Nov 2012) New Revision: 37838 http://svn.ruby-lang.org/cgi-bin/viewvc.cgi?view=rev&revision=37838 Log: String#{lines,chars,codepoints,bytes} now return an array. * string.c (rb_str_each_line, rb_str_lines): String#lines now returns an array instead of an enumerator. Passing a block is deprecated but still supported for backwards compatibility. Based on the patch by yhara. [Feature #6670] * string.c (rb_str_each_char, rb_str_chars): Ditto for String#chars. * string.c (rb_str_each_codepoint, rb_str_codepoints): Ditto for String#codepoints. * string.c (rb_str_each_byte, rb_str_bytes): Ditto for String#bytes. * NEWS: Add notes for the above changes. Modified files: trunk/ChangeLog trunk/NEWS trunk/string.c trunk/test/ruby/test_string.rb Index: ChangeLog =================================================================== --- ChangeLog (revision 37837) +++ ChangeLog (revision 37838) @@ -1,3 +1,21 @@ +Sun Nov 25 03:44:50 2012 Akinori MUSHA <knu@i...> + + * string.c (rb_str_each_line, rb_str_lines): String#lines now + returns an array instead of an enumerator. Passing a block is + deprecated but still supported for backwards compatibility. + Based on the patch by yhara. [Feature #6670] + + * string.c (rb_str_each_char, rb_str_chars): Ditto for + String#chars. + + * string.c (rb_str_each_codepoint, rb_str_codepoints): Ditto for + String#codepoints. + + * string.c (rb_str_each_byte, rb_str_bytes): Ditto for + String#bytes. + + * NEWS: Add notes for the above changes. + Sun Nov 25 02:07:37 2012 Akinori MUSHA <knu@i...> * test/ruby/envutil.rb (Test::Unit::Assertions#assert_warning) Index: string.c =================================================================== --- string.c (revision 37837) +++ string.c (revision 37838) @@ -6098,45 +6098,8 @@ } -/* - * call-seq: - * str.each_line(separator=$/) {|substr| block } -> str - * str.each_line(separator=$/) -> an_enumerator - * - * str.lines(separator=$/) {|substr| block } -> str - * str.lines(separator=$/) -> an_enumerator - * - * Splits <i>str</i> using the supplied parameter as the record separator - * (<code>$/</code> by default), passing each substring in turn to the supplied - * block. If a zero-length record separator is supplied, the string is split - * into paragraphs delimited by multiple successive newlines. - * - * If no block is given, an enumerator is returned instead. - * - * print "Example one\n" - * "hello\nworld".each_line {|s| p s} - * print "Example two\n" - * "hello\nworld".each_line('l') {|s| p s} - * print "Example three\n" - * "hello\n\n\nworld".each_line('') {|s| p s} - * - * <em>produces:</em> - * - * Example one - * "hello\n" - * "world" - * Example two - * "hel" - * "l" - * "o\nworl" - * "d" - * Example three - * "hello\n\n\n" - * "world" - */ - static VALUE -rb_str_each_line(int argc, VALUE *argv, VALUE str) +rb_str_enumerate_lines(int argc, VALUE *argv, VALUE str, int wantarray) { rb_encoding *enc; VALUE rs; @@ -6146,6 +6109,7 @@ VALUE line; int n; VALUE orig = str; + VALUE ary; if (argc == 0) { rs = rb_rs; @@ -6153,10 +6117,34 @@ else { rb_scan_args(argc, argv, "01", &rs); } - RETURN_ENUMERATOR(str, argc, argv); + + if (rb_block_given_p()) { + if (wantarray) { +#if 0 /* next major */ + rb_warn("given block not used"); + ary = rb_ary_new(); +#else + rb_warning("passing a block to String#lines is deprecated"); + wantarray = 0; +#endif + } + } + else { + if (wantarray) + ary = rb_ary_new(); + else + RETURN_ENUMERATOR(str, argc, argv); + } + if (NIL_P(rs)) { - rb_yield(str); - return orig; + if (wantarray) { + rb_ary_push(ary, str); + return ary; + } + else { + rb_yield(str); + return orig; + } } str = rb_str_new4(str); ptr = p = s = RSTRING_PTR(str); @@ -6179,7 +6167,10 @@ line = rb_str_new5(str, s, p - s); OBJ_INFECT(line, str); rb_enc_cr_str_copy_for_substr(line, str); - rb_yield(line); + if (wantarray) + rb_ary_push(ary, line); + else + rb_yield(line); str_mod_check(str, ptr, len); s = p; } @@ -6215,7 +6206,10 @@ line = rb_str_new5(str, s, p - s + (rslen ? rslen : n)); OBJ_INFECT(line, str); rb_enc_cr_str_copy_for_substr(line, str); - rb_yield(line); + if (wantarray) + rb_ary_push(ary, line); + else + rb_yield(line); str_mod_check(str, ptr, len); s = p + (rslen ? rslen : n); } @@ -6227,29 +6221,127 @@ line = rb_str_new5(str, s, pend - s); OBJ_INFECT(line, str); rb_enc_cr_str_copy_for_substr(line, str); - rb_yield(line); + if (wantarray) + rb_ary_push(ary, line); + else + rb_yield(line); RB_GC_GUARD(str); } - return orig; + if (wantarray) + return ary; + else + return orig; } +/* + * call-seq: + * str.each_line(separator=$/) {|substr| block } -> str + * str.each_line(separator=$/) -> an_enumerator + * + * Splits <i>str</i> using the supplied parameter as the record + * separator (<code>$/</code> by default), passing each substring in + * turn to the supplied block. If a zero-length record separator is + * supplied, the string is split into paragraphs delimited by + * multiple successive newlines. + * + * If no block is given, an enumerator is returned instead. + * + * print "Example one\n" + * "hello\nworld".each_line {|s| p s} + * print "Example two\n" + * "hello\nworld".each_line('l') {|s| p s} + * print "Example three\n" + * "hello\n\n\nworld".each_line('') {|s| p s} + * + * <em>produces:</em> + * + * Example one + * "hello\n" + * "world" + * Example two + * "hel" + * "l" + * "o\nworl" + * "d" + * Example three + * "hello\n\n\n" + * "world" + */ + static VALUE +rb_str_each_line(int argc, VALUE *argv, VALUE str) +{ + return rb_str_enumerate_lines(argc, argv, str, 0); +} + +/* + * call-seq: + * str.lines(separator=$/) -> an_array + * + * Returns an array of lines in <i>str</i> split using the supplied + * record separator (<code>$/</code> by default). This is a + * shorthand for <code>str.each_line(separator).to_a</code>. + * + * If a block is given, which is a deprecated form, works the same as + * <code>each_line</code>. + */ + +static VALUE +rb_str_lines(int argc, VALUE *argv, VALUE str) +{ + return rb_str_enumerate_lines(argc, argv, str, 1); +} + +static VALUE rb_str_each_byte_size(VALUE str, VALUE args) { return LONG2FIX(RSTRING_LEN(str)); } +static VALUE +rb_str_enumerate_bytes(VALUE str, int wantarray) +{ + long i; + VALUE ary; + + if (rb_block_given_p()) { + if (wantarray) { +#if 0 /* next major */ + rb_warn("given block not used"); + ary = rb_ary_new(); +#else + rb_warning("passing a block to String#bytes is deprecated"); + wantarray = 0; +#endif + } + } + else { + if (wantarray) + ary = rb_ary_new2(RSTRING_LEN(str)); + else + RETURN_SIZED_ENUMERATOR(str, 0, 0, rb_str_each_byte_size); + } + + for (i=0; i<RSTRING_LEN(str); i++) { + if (wantarray) + rb_ary_push(ary, INT2FIX(RSTRING_PTR(str)[i] & 0xff)); + else + rb_yield(INT2FIX(RSTRING_PTR(str)[i] & 0xff)); + } + if (wantarray) + return ary; + else + return str; +} + /* * call-seq: - * str.bytes {|fixnum| block } -> str - * str.bytes -> an_enumerator - * * str.each_byte {|fixnum| block } -> str * str.each_byte -> an_enumerator * - * Passes each byte in <i>str</i> to the given block, or returns - * an enumerator if no block is given. + * Passes each byte in <i>str</i> to the given block, or returns an + * enumerator if no block is given. * * "hello".each_byte {|c| print c, ' ' } * @@ -6261,13 +6353,24 @@ static VALUE rb_str_each_byte(VALUE str) { - long i; + return rb_str_enumerate_bytes(str, 0); +} - RETURN_SIZED_ENUMERATOR(str, 0, 0, rb_str_each_byte_size); - for (i=0; i<RSTRING_LEN(str); i++) { - rb_yield(INT2FIX(RSTRING_PTR(str)[i] & 0xff)); - } - return str; +/* + * call-seq: + * str.bytes -> an_array + * + * Returns an array of bytes in <i>str</i>. This is a shorthand for + * <code>str.each_byte.to_a</code>. + * + * If a block is given, which is a deprecated form, works the same as + * <code>each_byte</code>. + */ + +static VALUE +rb_str_bytes(VALUE str) +{ + return rb_str_enumerate_bytes(str, 1); } static VALUE @@ -6285,33 +6388,33 @@ return LONG2FIX(len); } -/* - * call-seq: - * str.chars {|cstr| block } -> str - * str.chars -> an_enumerator - * - * str.each_char {|cstr| block } -> str - * str.each_char -> an_enumerator - * - * Passes each character in <i>str</i> to the given block, or returns - * an enumerator if no block is given. - * - * "hello".each_char {|c| print c, ' ' } - * - * <em>produces:</em> - * - * h e l l o - */ - static VALUE -rb_str_each_char(VALUE str) +rb_str_enumerate_chars(VALUE str, int wantarray) { VALUE orig = str; long i, len, n; const char *ptr; rb_encoding *enc; + VALUE ary; - RETURN_SIZED_ENUMERATOR(str, 0, 0, rb_str_each_char_size); + if (rb_block_given_p()) { + if (wantarray) { +#if 0 /* next major */ + rb_warn("given block not used"); + ary = rb_ary_new(); +#else + rb_warning("passing a block to String#chars is deprecated"); + wantarray = 0; +#endif + } + } + else { + if (wantarray) + ary = rb_ary_new(); + else + RETURN_SIZED_ENUMERATOR(str, 0, 0, rb_str_each_char_size); + } + str = rb_str_new4(str); ptr = RSTRING_PTR(str); len = RSTRING_LEN(str); @@ -6321,63 +6424,159 @@ case ENC_CODERANGE_7BIT: for (i = 0; i < len; i += n) { n = rb_enc_fast_mbclen(ptr + i, ptr + len, enc); - rb_yield(rb_str_subseq(str, i, n)); + if (wantarray) + rb_ary_push(ary, rb_str_subseq(str, i, n)); + else + rb_yield(rb_str_subseq(str, i, n)); } break; default: for (i = 0; i < len; i += n) { n = rb_enc_mbclen(ptr + i, ptr + len, enc); - rb_yield(rb_str_subseq(str, i, n)); + if (wantarray) + rb_ary_push(ary, rb_str_subseq(str, i, n)); + else + rb_yield(rb_str_subseq(str, i, n)); } } - return orig; + if (wantarray) + return ary; + else + return orig; } /* * call-seq: - * str.codepoints {|integer| block } -> str - * str.codepoints -> an_enumerator + * str.each_char {|cstr| block } -> str + * str.each_char -> an_enumerator * - * str.each_codepoint {|integer| block } -> str - * str.each_codepoint -> an_enumerator + * Passes each character in <i>str</i> to the given block, or returns + * an enumerator if no block is given. * - * Passes the <code>Integer</code> ordinal of each character in <i>str</i>, - * also known as a <i>codepoint</i> when applied to Unicode strings to the - * given block. + * "hello".each_char {|c| print c, ' ' } * - * If no block is given, an enumerator is returned instead. + * <em>produces:</em> * - * "hello\u0639".each_codepoint {|c| print c, ' ' } + * h e l l o + */ + +static VALUE +rb_str_each_char(VALUE str) +{ + return rb_str_enumerate_chars(str, 0); +} + +/* + * call-seq: + * str.chars -> an_array * - * <em>produces:</em> + * Returns an array of characters in <i>str</i>. This is a shorthand + * for <code>str.each_char.to_a</code>. * - * 104 101 108 108 111 1593 + * If a block is given, which is a deprecated form, works the same as + * <code>each_char</code>. */ static VALUE -rb_str_each_codepoint(VALUE str) +rb_str_chars(VALUE str) { + return rb_str_enumerate_chars(str, 1); +} + + +static VALUE +rb_str_enumerate_codepoints(VALUE str, int wantarray) +{ VALUE orig = str; int n; unsigned int c; const char *ptr, *end; rb_encoding *enc; + VALUE ary; - if (single_byte_optimizable(str)) return rb_str_each_byte(str); - RETURN_SIZED_ENUMERATOR(str, 0, 0, rb_str_each_char_size); + if (single_byte_optimizable(str)) + return rb_str_enumerate_bytes(str, wantarray); + + if (rb_block_given_p()) { + if (wantarray) { +#if 0 /* next major */ + rb_warn("given block not used"); + ary = rb_ary_new(); +#else + rb_warning("passing a block to String#codepoints is deprecated"); + wantarray = 0; +#endif + } + } + else { + if (wantarray) + ary = rb_ary_new(); + else + RETURN_SIZED_ENUMERATOR(str, 0, 0, rb_str_each_char_size); + } + str = rb_str_new4(str); ptr = RSTRING_PTR(str); end = RSTRING_END(str); enc = STR_ENC_GET(str); while (ptr < end) { c = rb_enc_codepoint_len(ptr, end, &n, enc); - rb_yield(UINT2NUM(c)); + if (wantarray) + rb_ary_push(ary, UINT2NUM(c)); + else + rb_yield(UINT2NUM(c)); ptr += n; } RB_GC_GUARD(str); - return orig; + if (wantarray) + return ary; + else + return orig; } +/* + * call-seq: + * str.each_codepoint {|integer| block } -> str + * str.each_codepoint -> an_enumerator + * + * Passes the <code>Integer</code> ordinal of each character in <i>str</i>, + * also known as a <i>codepoint</i> when applied to Unicode strings to the + * given block. + * + * If no block is given, an enumerator is returned instead. + * + * "hello\u0639".each_codepoint {|c| print c, ' ' } + * + * <em>produces:</em> + * + * 104 101 108 108 111 1593 + */ + +static VALUE +rb_str_each_codepoint(VALUE str) +{ + return rb_str_enumerate_codepoints(str, 0); +} + +/* + * call-seq: + * str.codepoints -> an_array + * + * Returns an array of the <code>Integer</code> ordinals of the + * characters in <i>str</i>. This is a shorthand for + * <code>str.each_codepoint.to_a</code>. + * + * If a block is given, which is a deprecated form, works the same as + * <code>each_codepoint</code>. + */ + +static VALUE +rb_str_codepoints(VALUE str) +{ + return rb_str_enumerate_codepoints(str, 1); +} + + static long chopped_length(VALUE str) { @@ -7994,10 +8193,10 @@ rb_define_method(rb_cString, "hex", rb_str_hex, 0); rb_define_method(rb_cString, "oct", rb_str_oct, 0); rb_define_method(rb_cString, "split", rb_str_split_m, -1); - rb_define_method(rb_cString, "lines", rb_str_each_line, -1); - rb_define_method(rb_cString, "bytes", rb_str_each_byte, 0); - rb_define_method(rb_cString, "chars", rb_str_each_char, 0); - rb_define_method(rb_cString, "codepoints", rb_str_each_codepoint, 0); + rb_define_method(rb_cString, "lines", rb_str_lines, -1); + rb_define_method(rb_cString, "bytes", rb_str_bytes, 0); + rb_define_method(rb_cString, "chars", rb_str_chars, 0); + rb_define_method(rb_cString, "codepoints", rb_str_codepoints, 0); rb_define_method(rb_cString, "reverse", rb_str_reverse, 0); rb_define_method(rb_cString, "reverse!", rb_str_reverse_bang, 0); rb_define_method(rb_cString, "concat", rb_str_concat, 1); Index: NEWS =================================================================== --- NEWS (revision 37837) +++ NEWS (revision 37838) @@ -129,6 +129,11 @@ * String * added method: * added String#b returning a copied string whose encoding is ASCII-8BIT. + * change return value: + * String#lines now returns an array instead of an enumerator. + * String#chars now returns an array instead of an enumerator. + * String#codepoints now returns an array instead of an enumerator. + * String#bytes now returns an array instead of an enumerator. * Struct * added method: @@ -302,6 +307,18 @@ See above. + * String#lines + * String#chars + * String#codepoints + * String#bytes + + These methods no longer return an Enumerator, although passing a + block is still supported for backwards compatibility. + + Code like str.lines.with_index(1) { |line, lineno| ... } no longer + works because str.lines returns an array. Replace lines with + each_line in such cases. + * Signal.trap See above. Index: test/ruby/test_string.rb =================================================================== --- test/ruby/test_string.rb (revision 37837) +++ test/ruby/test_string.rb (revision 37838) @@ -626,36 +626,121 @@ end def test_each_byte + s = S("ABC") + res = [] - S("ABC").each_byte {|x| res << x } + assert_equal s.object_id, s.each_byte {|x| res << x }.object_id assert_equal(65, res[0]) assert_equal(66, res[1]) assert_equal(67, res[2]) + + assert_equal 65, s.each_byte.next end + def test_bytes + s = S("ABC") + assert_equal [65, 66, 67], s.bytes + + if RUBY_VERSION >= "2.1.0" + assert_warn(/block not used/) { + assert_equal [65, 66, 67], s.bytes {} + } + else + assert_warning(/deprecated/) { + res = [] + assert_equal s.object_id, s.bytes {|x| res << x }.object_id + assert_equal(65, res[0]) + assert_equal(66, res[1]) + assert_equal(67, res[2]) + } + end + end + def test_each_codepoint + # Single byte optimization + assert_equal 65, S("ABC").each_codepoint.next + + s = S("\u3042\u3044\u3046") + res = [] - S("ABC").codepoints.each {|x| res << x} - assert_equal([65, 66, 67], res) + assert_equal s.object_id, s.each_codepoint {|x| res << x }.object_id + assert_equal(0x3042, res[0]) + assert_equal(0x3044, res[1]) + assert_equal(0x3046, res[2]) + + assert_equal 0x3042, s.each_codepoint.next end + def test_codepoints + # Single byte optimization + assert_equal [65, 66, 67], S("ABC").codepoints + + s = S("\u3042\u3044\u3046") + assert_equal [0x3042, 0x3044, 0x3046], s.codepoints + + if RUBY_VERSION >= "2.1.0" + assert_warn(/block not used/) { + assert_equal [0x3042, 0x3044, 0x3046], s.codepoints {} + } + else + assert_warning(/deprecated/) { + res = [] + assert_equal s.object_id, s.codepoints {|x| res << x }.object_id + assert_equal(0x3042, res[0]) + assert_equal(0x3044, res[1]) + assert_equal(0x3046, res[2]) + } + end + end + + def test_each_char + s = S("ABC") + + res = [] + assert_equal s.object_id, s.each_char {|x| res << x }.object_id + assert_equal("A", res[0]) + assert_equal("B", res[1]) + assert_equal("C", res[2]) + + assert_equal "A", S("ABC").each_char.next + end + + def test_chars + s = S("ABC") + assert_equal ["A", "B", "C"], s.chars + + if RUBY_VERSION >= "2.1.0" + assert_warn(/block not used/) { + assert_equal ["A", "B", "C"], s.chars {} + } + else + assert_warning(/deprecated/) { + res = [] + assert_equal s.object_id, s.chars {|x| res << x }.object_id + assert_equal("A", res[0]) + assert_equal("B", res[1]) + assert_equal("C", res[2]) + } + end + end + def test_each_line save = $/ $/ = "\n" res=[] - S("hello\nworld").lines.each {|x| res << x} + S("hello\nworld").each_line {|x| res << x} assert_equal(S("hello\n"), res[0]) assert_equal(S("world"), res[1]) res=[] - S("hello\n\n\nworld").lines(S('')).each {|x| res << x} + S("hello\n\n\nworld").each_line(S('')) {|x| res << x} assert_equal(S("hello\n\n\n"), res[0]) assert_equal(S("world"), res[1]) $/ = "!" res=[] - S("hello!world").lines.each {|x| res << x} + S("hello!world").each_line {|x| res << x} assert_equal(S("hello!"), res[0]) assert_equal(S("world"), res[1]) @@ -671,8 +756,30 @@ s = nil "foo\nbar".each_line(nil) {|s2| s = s2 } assert_equal("foo\nbar", s) + + assert_equal "hello\n", S("hello\nworld").each_line.next + assert_equal "hello\nworld", S("hello\nworld").each_line(nil).next end + def test_lines + s = S("hello\nworld") + assert_equal ["hello\n", "world"], s.lines + assert_equal ["hello\nworld"], s.lines(nil) + + if RUBY_VERSION >= "2.1.0" + assert_warn(/block not used/) { + assert_equal ["hello\n", "world"], s.lines {} + } + else + assert_warning(/deprecated/) { + res = [] + assert_equal s.object_id, s.lines {|x| res << x }.object_id + assert_equal(S("hello\n"), res[0]) + assert_equal(S("world"), res[1]) + } + end + end + def test_empty? assert(S("").empty?) (... truncated) -- ML: ruby-changes@q... Info: http://www.atdot.net/~ko1/quickml/