ruby-changes:4083
From: ko1@a...
Date: Fri, 22 Feb 2008 15:53:15 +0900 (JST)
Subject: [ruby-changes:4083] akr - Ruby:r15573 (trunk): * encoding.c (rb_enc_mbclen): return minlen instead of 1 when
akr 2008-02-22 15:52:54 +0900 (Fri, 22 Feb 2008) New Revision: 15573 Added files: trunk/test/ruby/test_utf32.rb Modified files: trunk/ChangeLog trunk/encoding.c trunk/string.c Log: * encoding.c (rb_enc_mbclen): return minlen instead of 1 when a character is not found properly. * string.c (rb_enc_strlen): round up string length with fixed multibyte encoding such as UTF-32. (rb_enc_strlen_cr): ditto. (rb_str_substr): fix substring with fixed multibyte encoding. (rb_str_justify): check number of characters. http://svn.ruby-lang.org/cgi-bin/viewvc.cgi/trunk/test/ruby/test_utf32.rb?revision=15573&view=markup http://svn.ruby-lang.org/cgi-bin/viewvc.cgi/trunk/test/ruby/test_utf32.rb?r1=15573&r2=15572&diff_format=u http://svn.ruby-lang.org/cgi-bin/viewvc.cgi/trunk/string.c?r1=15573&r2=15572&diff_format=u http://svn.ruby-lang.org/cgi-bin/viewvc.cgi/trunk/ChangeLog?r1=15573&r2=15572&diff_format=u http://svn.ruby-lang.org/cgi-bin/viewvc.cgi/trunk/encoding.c?r1=15573&r2=15572&diff_format=u Index: encoding.c =================================================================== --- encoding.c (revision 15572) +++ encoding.c (revision 15573) @@ -738,8 +738,10 @@ int n = ONIGENC_PRECISE_MBC_ENC_LEN(enc, (UChar*)p, (UChar*)e); if (MBCLEN_CHARFOUND_P(n) && MBCLEN_CHARFOUND_LEN(n) <= e-p) return MBCLEN_CHARFOUND_LEN(n); - else - return 1; + else { + int min = rb_enc_mbminlen(enc); + return min <= e-p ? min : e-p; + } } int Index: ChangeLog =================================================================== --- ChangeLog (revision 15572) +++ ChangeLog (revision 15573) @@ -1,3 +1,14 @@ +Fri Feb 22 15:47:36 2008 Tanaka Akira <akr@f...> + + * encoding.c (rb_enc_mbclen): return minlen instead of 1 when + a character is not found properly. + + * string.c (rb_enc_strlen): round up string length with fixed + multibyte encoding such as UTF-32. + (rb_enc_strlen_cr): ditto. + (rb_str_substr): fix substring with fixed multibyte encoding. + (rb_str_justify): check number of characters. + Fri Feb 22 12:11:12 2008 NARUSE, Yui <naruse@r...> * string.c (rb_str_inspect): string of ascii incompatible encoding Index: string.c =================================================================== --- string.c (revision 15572) +++ string.c (revision 15573) @@ -618,7 +618,7 @@ const char *q; if (rb_enc_mbmaxlen(enc) == rb_enc_mbminlen(enc)) { - return (e - p) / rb_enc_mbminlen(enc); + return (e - p + rb_enc_mbminlen(enc) - 1) / rb_enc_mbminlen(enc); } else if (rb_enc_asciicompat(enc)) { c = 0; @@ -651,7 +651,7 @@ *cr = 0; if (rb_enc_mbmaxlen(enc) == rb_enc_mbminlen(enc)) { - return (e - p) / rb_enc_mbminlen(enc); + return (e - p + rb_enc_mbminlen(enc) - 1) / rb_enc_mbminlen(enc); } else if (rb_enc_asciicompat(enc)) { c = 0; @@ -1223,10 +1223,9 @@ len = 0; } else if (rb_enc_mbmaxlen(enc) == rb_enc_mbminlen(enc)) { - long rest = (e - p) / rb_enc_mbmaxlen(enc); - if (len > rest) - len = rest; - else + if (len * rb_enc_mbmaxlen(enc) > e - p) + len = e - p; + else len *= rb_enc_mbmaxlen(enc); } else { @@ -5777,7 +5776,7 @@ flen = RSTRING_LEN(pad); fclen = str_strlen(pad, enc); singlebyte = single_byte_optimizable(pad); - if (flen == 0) { + if (flen == 0 || fclen == 0) { rb_raise(rb_eArgError, "zero width padding"); } } Index: test/ruby/test_utf32.rb =================================================================== --- test/ruby/test_utf32.rb (revision 0) +++ test/ruby/test_utf32.rb (revision 15573) @@ -0,0 +1,27 @@ +require 'test/unit' + +class TestUTF32 < Test::Unit::TestCase + def encdump(str) + d = str.dump + if /\.force_encoding\("[A-Za-z0-9.:_+-]*"\)\z/ =~ d + d + else + "#{d}.force_encoding(#{str.encoding.name.dump})" + end + end + + def assert_str_equal(expected, actual, message=nil) + full_message = build_message(message, <<EOT) +#{encdump expected} expected but not equal to +#{encdump actual}. +EOT + assert_block(full_message) { expected == actual } + end + + def test_substr + assert_str_equal( + "abcdefgh".force_encoding("utf-32be"), + "abcdefgh".force_encoding("utf-32be")[0,3]) + end +end + Property changes on: test/ruby/test_utf32.rb ___________________________________________________________________ Name: svn:eol-style + LF -- ML: ruby-changes@q... Info: http://www.atdot.net/~ko1/quickml/