ruby-changes:4151
From: ko1@a...
Date: Fri, 29 Feb 2008 22:33:47 +0900 (JST)
Subject: [ruby-changes:4151] matz - Ruby:r15641 (trunk): * string.c (rb_str_chomp_bang): now works on UTF-16.
matz 2008-02-29 22:33:26 +0900 (Fri, 29 Feb 2008) New Revision: 15641 Modified files: trunk/ChangeLog trunk/string.c trunk/test/ruby/test_string.rb trunk/test/ruby/test_utf16.rb Log: * string.c (rb_str_chomp_bang): now works on UTF-16. * string.c (tr_setup_table): negation should work on non ASCII compatible strings as well. * string.c (rb_str_split_m): awk split should work on non ASCII compatible strings as well. http://svn.ruby-lang.org/cgi-bin/viewvc.cgi/trunk/test/ruby/test_string.rb?r1=15641&r2=15640&diff_format=u http://svn.ruby-lang.org/cgi-bin/viewvc.cgi/trunk/string.c?r1=15641&r2=15640&diff_format=u http://svn.ruby-lang.org/cgi-bin/viewvc.cgi/trunk/ChangeLog?r1=15641&r2=15640&diff_format=u http://svn.ruby-lang.org/cgi-bin/viewvc.cgi/trunk/test/ruby/test_utf16.rb?r1=15641&r2=15640&diff_format=u Index: ChangeLog =================================================================== --- ChangeLog (revision 15640) +++ ChangeLog (revision 15641) @@ -1,3 +1,13 @@ +Fri Feb 29 20:25:07 2008 Yukihiro Matsumoto <matz@r...> + + * string.c (rb_str_chomp_bang): now works on UTF-16. + + * string.c (tr_setup_table): negation should work on non ASCII + compatible strings as well. + + * string.c (rb_str_split_m): awk split should work on non ASCII + compatible strings as well. + Fri Feb 29 18:08:43 2008 Yukihiro Matsumoto <matz@r...> * time.c (time_strftime): format should be ascii compatible. Index: string.c =================================================================== --- string.c (revision 15640) +++ string.c (revision 15641) @@ -4453,9 +4453,20 @@ tr.p = RSTRING_PTR(str); tr.pend = tr.p + RSTRING_LEN(str); tr.gen = tr.now = tr.max = 0; - if (RSTRING_LEN(str) > 1 && RSTRING_PTR(str)[0] == '^') { - cflag = 1; - tr.p++; + if (RSTRING_LEN(str) > 1) { + if (rb_enc_asciicompat(enc)) { + if (RSTRING_PTR(str)[0] == '^') { + cflag = 1; + tr.p++; + } + } + else { + c = rb_enc_codepoint(RSTRING_PTR(str), RSTRING_END(str), enc); + if (c == '^') { + cflag = 1; + tr.p+=rb_enc_codelen(c, enc); + } + } } if (first) { for (i=0; i<256; i++) { @@ -4838,11 +4849,21 @@ } else { fs_set: - if (TYPE(spat) == T_STRING && RSTRING_LEN(spat) == 1) { - if (RSTRING_PTR(spat)[0] == ' ') { - awk_split = Qtrue; + if (TYPE(spat) == T_STRING) { + rb_encoding *enc2 = STR_ENC_GET(spat); + + if (rb_enc_mbminlen(enc2) == 1) { + if (RSTRING_LEN(spat) == 1 && RSTRING_PTR(spat)[0] == ' '){ + awk_split = Qtrue; + } } else { + if (str_strlen(spat, enc2) == 1 && + rb_enc_codepoint(RSTRING_PTR(spat), RSTRING_END(spat), enc2) == ' ') { + awk_split = Qtrue; + } + } + if (!awk_split) { spat = rb_reg_regcomp(rb_reg_quote(spat)); } } @@ -5266,27 +5287,49 @@ char *p, *pp, *e; long len, rslen; + len = RSTRING_LEN(str); + if (len == 0) return Qnil; + p = RSTRING_PTR(str); + e = p + len; if (rb_scan_args(argc, argv, "01", &rs) == 0) { - len = RSTRING_LEN(str); - if (len == 0) return Qnil; - p = RSTRING_PTR(str); rs = rb_rs; if (rs == rb_default_rs) { smart_chomp: - rb_enc_check(str, rs); rb_str_modify(str); - if (RSTRING_PTR(str)[len-1] == '\n') { - STR_DEC_LEN(str); - if (RSTRING_LEN(str) > 0 && - RSTRING_PTR(str)[RSTRING_LEN(str)-1] == '\r') { - STR_DEC_LEN(str); + enc = rb_enc_get(str); + if (rb_enc_mbminlen(enc) > 1) { + len = str_strlen(str, enc); + pp = rb_enc_nth(p, e, len-1, enc); + if (rb_enc_is_newline(pp, e, enc)) { + e = pp; + len--; } + if (len > 0) { + p = rb_enc_nth(p, e, len-1, enc); + if (rb_enc_codepoint(p, e, enc) == '\r') { + pp = e = p; + } + } + if (e == RSTRING_END(str)) { + return Qnil; + } + len = pp - RSTRING_PTR(str); + STR_SET_LEN(str, len); } - else if (RSTRING_PTR(str)[len-1] == '\r') { - STR_DEC_LEN(str); - } else { - return Qnil; + if (RSTRING_PTR(str)[len-1] == '\n') { + STR_DEC_LEN(str); + if (RSTRING_LEN(str) > 0 && + RSTRING_PTR(str)[RSTRING_LEN(str)-1] == '\r') { + STR_DEC_LEN(str); + } + } + else if (RSTRING_PTR(str)[len-1] == '\r') { + STR_DEC_LEN(str); + } + else { + return Qnil; + } } RSTRING_PTR(str)[RSTRING_LEN(str)] = '\0'; return str; @@ -5294,10 +5337,6 @@ } if (NIL_P(rs)) return Qnil; StringValue(rs); - enc = rb_enc_check(str, rs); - len = RSTRING_LEN(str); - if (len == 0) return Qnil; - p = RSTRING_PTR(str); rslen = RSTRING_LEN(rs); if (rslen == 0) { while (len>0 && p[len-1] == '\n') { @@ -5321,8 +5360,8 @@ if (is_broken_string(rs)) { return Qnil; } - e = p + len; pp = e - rslen; + enc = rb_enc_check(str, rs); if (p[len-1] == newline && (rslen <= 1 || memcmp(RSTRING_PTR(rs), pp, rslen) == 0)) { Index: test/ruby/test_utf16.rb =================================================================== --- test/ruby/test_utf16.rb (revision 15640) +++ test/ruby/test_utf16.rb (revision 15641) @@ -232,9 +232,11 @@ def test_chomp s = "\1\n".force_encoding("utf-16be") - assert_raise(ArgumentError, "#{encdump s}.chomp") { - s.chomp - } + assert_equal(s, s.chomp, "#{encdump s}.chomp") + s = "\0\n".force_encoding("utf-16be") + assert_equal("", s.chomp, "#{encdump s}.chomp") + s = "\0\r\0\n".force_encoding("utf-16be") + assert_equal("", s.chomp, "#{encdump s}.chomp") end def test_succ Index: test/ruby/test_string.rb =================================================================== --- test/ruby/test_string.rb (revision 15640) +++ test/ruby/test_string.rb (revision 15641) @@ -280,6 +280,7 @@ def test_chomp assert_equal(S("hello"), S("hello").chomp("\n")) assert_equal(S("hello"), S("hello\n").chomp("\n")) + save = $/ $/ = "\n" @@ -289,7 +290,7 @@ $/ = "!" assert_equal(S("hello"), S("hello").chomp) assert_equal(S("hello"), S("hello!").chomp) - $/ = "\n" + $/ = save end def test_chomp! @@ -302,6 +303,7 @@ a = S("hello\n") a.chomp!(S("\n")) assert_equal(S("hello"), a) + save = $/ $/ = "\n" a = S("hello") @@ -321,7 +323,7 @@ a.chomp! assert_equal(S("hello"), a) - $/ = "\n" + $/ = save a = S("hello\n") b = a.dup @@ -474,6 +476,7 @@ end def test_each + save = $/ $/ = "\n" res=[] S("hello\nworld").lines.each {|x| res << x} @@ -490,7 +493,7 @@ S("hello!world").lines.each {|x| res << x} assert_equal(S("hello!"), res[0]) assert_equal(S("world"), res[1]) - $/ = "\n" + $/ = save end def test_each_byte @@ -502,6 +505,7 @@ end def test_each_line + save = $/ $/ = "\n" res=[] S("hello\nworld").lines.each {|x| res << x} @@ -520,7 +524,7 @@ assert_equal(S("hello!"), res[0]) assert_equal(S("world"), res[1]) - $/ = "\n" + $/ = save end def test_empty? -- ML: ruby-changes@q... Info: http://www.atdot.net/~ko1/quickml/