[前][次][番号順一覧][スレッド一覧]

ruby-changes:4083

From: ko1@a...
Date: Fri, 22 Feb 2008 15:53:15 +0900 (JST)
Subject: [ruby-changes:4083] akr - Ruby:r15573 (trunk): * encoding.c (rb_enc_mbclen): return minlen instead of 1 when

akr	2008-02-22 15:52:54 +0900 (Fri, 22 Feb 2008)

  New Revision: 15573

  Added files:
    trunk/test/ruby/test_utf32.rb
  Modified files:
    trunk/ChangeLog
    trunk/encoding.c
    trunk/string.c

  Log:
    * encoding.c (rb_enc_mbclen): return minlen instead of 1 when
      a character is not found properly.
    
    * string.c (rb_enc_strlen): round up string length with fixed
      multibyte encoding such as UTF-32.
      (rb_enc_strlen_cr): ditto.
      (rb_str_substr): fix substring with fixed multibyte encoding.
      (rb_str_justify): check number of characters.
    


  http://svn.ruby-lang.org/cgi-bin/viewvc.cgi/trunk/test/ruby/test_utf32.rb?revision=15573&view=markup
  http://svn.ruby-lang.org/cgi-bin/viewvc.cgi/trunk/test/ruby/test_utf32.rb?r1=15573&r2=15572&diff_format=u
  http://svn.ruby-lang.org/cgi-bin/viewvc.cgi/trunk/string.c?r1=15573&r2=15572&diff_format=u
  http://svn.ruby-lang.org/cgi-bin/viewvc.cgi/trunk/ChangeLog?r1=15573&r2=15572&diff_format=u
  http://svn.ruby-lang.org/cgi-bin/viewvc.cgi/trunk/encoding.c?r1=15573&r2=15572&diff_format=u

Index: encoding.c
===================================================================
--- encoding.c	(revision 15572)
+++ encoding.c	(revision 15573)
@@ -738,8 +738,10 @@
     int n = ONIGENC_PRECISE_MBC_ENC_LEN(enc, (UChar*)p, (UChar*)e);
     if (MBCLEN_CHARFOUND_P(n) && MBCLEN_CHARFOUND_LEN(n) <= e-p)
         return MBCLEN_CHARFOUND_LEN(n);
-    else
-        return 1;
+    else {
+        int min = rb_enc_mbminlen(enc);
+        return min <= e-p ? min : e-p;
+    }
 }
 
 int
Index: ChangeLog
===================================================================
--- ChangeLog	(revision 15572)
+++ ChangeLog	(revision 15573)
@@ -1,3 +1,14 @@
+Fri Feb 22 15:47:36 2008  Tanaka Akira  <akr@f...>
+
+	* encoding.c (rb_enc_mbclen): return minlen instead of 1 when
+	  a character is not found properly.
+
+	* string.c (rb_enc_strlen): round up string length with fixed
+	  multibyte encoding such as UTF-32.
+	  (rb_enc_strlen_cr): ditto.
+	  (rb_str_substr): fix substring with fixed multibyte encoding.
+	  (rb_str_justify): check number of characters.
+
 Fri Feb 22 12:11:12 2008  NARUSE, Yui  <naruse@r...>
 
 	* string.c (rb_str_inspect): string of ascii incompatible encoding
Index: string.c
===================================================================
--- string.c	(revision 15572)
+++ string.c	(revision 15573)
@@ -618,7 +618,7 @@
     const char *q;
 
     if (rb_enc_mbmaxlen(enc) == rb_enc_mbminlen(enc)) {
-        return (e - p) / rb_enc_mbminlen(enc);
+        return (e - p + rb_enc_mbminlen(enc) - 1) / rb_enc_mbminlen(enc);
     }
     else if (rb_enc_asciicompat(enc)) {
         c = 0;
@@ -651,7 +651,7 @@
 
     *cr = 0;
     if (rb_enc_mbmaxlen(enc) == rb_enc_mbminlen(enc)) {
-	return (e - p) / rb_enc_mbminlen(enc);
+	return (e - p + rb_enc_mbminlen(enc) - 1) / rb_enc_mbminlen(enc);
     }
     else if (rb_enc_asciicompat(enc)) {
 	c = 0;
@@ -1223,10 +1223,9 @@
 	len = 0;
     }
     else if (rb_enc_mbmaxlen(enc) == rb_enc_mbminlen(enc)) {
-	long rest = (e - p) / rb_enc_mbmaxlen(enc);
-	if (len > rest)
-	    len = rest;
-	else
+        if (len * rb_enc_mbmaxlen(enc) > e - p)
+            len = e - p;
+        else
 	    len *= rb_enc_mbmaxlen(enc);
     }
     else {
@@ -5777,7 +5776,7 @@
 	flen = RSTRING_LEN(pad);
 	fclen = str_strlen(pad, enc);
 	singlebyte = single_byte_optimizable(pad);
-	if (flen == 0) {
+	if (flen == 0 || fclen == 0) {
 	    rb_raise(rb_eArgError, "zero width padding");
 	}
     }
Index: test/ruby/test_utf32.rb
===================================================================
--- test/ruby/test_utf32.rb	(revision 0)
+++ test/ruby/test_utf32.rb	(revision 15573)
@@ -0,0 +1,27 @@
+require 'test/unit'
+
+class TestUTF32 < Test::Unit::TestCase
+  def encdump(str)
+    d = str.dump
+    if /\.force_encoding\("[A-Za-z0-9.:_+-]*"\)\z/ =~ d
+      d
+    else
+      "#{d}.force_encoding(#{str.encoding.name.dump})"
+    end
+  end
+
+  def assert_str_equal(expected, actual, message=nil)
+    full_message = build_message(message, <<EOT)
+#{encdump expected} expected but not equal to
+#{encdump actual}.
+EOT
+    assert_block(full_message) { expected == actual }
+  end
+
+  def test_substr
+    assert_str_equal(
+      "abcdefgh".force_encoding("utf-32be"),
+      "abcdefgh".force_encoding("utf-32be")[0,3])
+  end
+end
+

Property changes on: test/ruby/test_utf32.rb
___________________________________________________________________
Name: svn:eol-style
   + LF


--
ML: ruby-changes@q...
Info: http://www.atdot.net/~ko1/quickml/

[前][次][番号順一覧][スレッド一覧]