ruby-changes:3032

akr	2007-12-23 23:06:00 +0900 (Sun, 23 Dec 2007)

  New Revision: 14524

  Modified files:
    trunk/ChangeLog
    trunk/encoding.c
    trunk/include/ruby/encoding.h
    trunk/test/ruby/test_m17n.rb

  Log:
    * encoding.c (rb_enc_codepoint): implemented to raise invalid
      encoding.
    
    * include/ruby/encoding.h (rb_enc_codepoint): macro is replaced as a
      declaration.


  http://svn.ruby-lang.org/cgi-bin/viewvc.cgi/trunk/ChangeLog?r1=14524&r2=14523
  http://svn.ruby-lang.org/cgi-bin/viewvc.cgi/trunk/include/ruby/encoding.h?r1=14524&r2=14523
  http://svn.ruby-lang.org/cgi-bin/viewvc.cgi/trunk/encoding.c?r1=14524&r2=14523
  http://svn.ruby-lang.org/cgi-bin/viewvc.cgi/trunk/test/ruby/test_m17n.rb?r1=14524&r2=14523

Index: encoding.c
===================================================================
--- encoding.c	(revision 14523)
+++ encoding.c	(revision 14524)
@@ -661,6 +661,15 @@
     return c;
 }
 
+int rb_enc_codepoint(const char *p, const char *e, rb_encoding *enc)
+{
+    int r = rb_enc_precise_mbclen(p, e, enc);
+    if (MBCLEN_CHARFOUND(r))
+        return ONIGENC_MBC_TO_CODE(enc,(UChar*)p,(UChar*)e);
+    else
+	rb_raise(rb_eArgError, "invalid mbstring sequence");
+}
+
 int
 rb_enc_codelen(int c, rb_encoding *enc)
 {
Index: include/ruby/encoding.h
===================================================================
--- include/ruby/encoding.h	(revision 14523)
+++ include/ruby/encoding.h	(revision 14524)
@@ -71,8 +71,8 @@
 #define rb_enc_mbminlen(enc) (enc)->min_enc_len
 #define rb_enc_mbmaxlen(enc) (enc)->max_enc_len
 
-/* ptr,endptr,encoding -> mbclen */
-int rb_enc_mbclen(const char*, const char *, rb_encoding*);
+/* -> mbclen (no error notification, no exception, 0 < ret <= e-p) */
+int rb_enc_mbclen(const char *p, const char *e, rb_encoding *enc);
 
 /* -> chlen, invalid or needmore */
 int rb_enc_precise_mbclen(const char *p, const char *e, rb_encoding *enc);
@@ -83,14 +83,14 @@
 /* -> 0x00..0x7f, -1 */
 int rb_enc_ascget(const char *p, const char *e, int *len, rb_encoding *enc);
 
-/* code,encoding -> codelen */
-int rb_enc_codelen(int, rb_encoding*);
+/* -> codelen or raise exception */
+int rb_enc_codelen(int code, rb_encoding *enc);
 
 /* code,ptr,encoding -> write buf */
 #define rb_enc_mbcput(c,buf,enc) ONIGENC_CODE_TO_MBC(enc,c,(UChar*)buf)
 
-/* ptr,ptr,encoding -> codepoint */
-#define rb_enc_codepoint(p,e,enc) ONIGENC_MBC_TO_CODE(enc,(UChar*)p,(UChar*)e) 
+/* -> code or raise exception */
+int rb_enc_codepoint(const char *p, const char *e, rb_encoding *enc);
 
 /* ptr, ptr, encoding -> prev_char */
 #define rb_enc_prev_char(s,p,enc) (char *)onigenc_get_prev_char_head(enc,(UChar*)s,(UChar*)p)
Index: ChangeLog
===================================================================
--- ChangeLog	(revision 14523)
+++ ChangeLog	(revision 14524)
@@ -1,3 +1,11 @@
+Sun Dec 23 23:03:13 2007  Tanaka Akira  <akr@f...>
+
+	* encoding.c (rb_enc_codepoint): implemented to raise invalid
+	  encoding.
+
+	* include/ruby/encoding.h (rb_enc_codepoint): macro is replaced as a
+	  declaration.
+
 Sun Dec 23 19:45:22 2007  Tanaka Akira  <akr@f...>
 
 	* lib/time.rb (Time.httpdate): fix 2 digits year for 20xx.
Index: test/ruby/test_m17n.rb
===================================================================
--- test/ruby/test_m17n.rb	(revision 14523)
+++ test/ruby/test_m17n.rb	(revision 14524)
@@ -1149,8 +1149,7 @@
   def test_str_count
     combination(STRINGS, STRINGS) {|s1, s2|
       if !s1.valid_encoding? || !s2.valid_encoding?
-        #assert_raise(ArgumentError) { s1.count(s2) }
-        #assert_nothing_raised { s1.count(s2) }
+        assert_raise(ArgumentError) { s1.count(s2) }
         next
       end
       if !s1.ascii_only? && !s2.ascii_only? && s1.encoding != s2.encoding
@@ -1178,8 +1177,7 @@
   def test_str_delete
     combination(STRINGS, STRINGS) {|s1, s2|
       if !s1.valid_encoding? || !s2.valid_encoding?
-        #assert_raise(ArgumentError) { s1.delete(s2) }
-        #assert_nothing_raised { s1.delete(s2) }
+        assert_raise(ArgumentError) { s1.delete(s2) }
         next
       end
       if !s1.ascii_only? && !s2.ascii_only? && s1.encoding != s2.encoding
@@ -1199,8 +1197,7 @@
   def test_str_downcase
     STRINGS.each {|s|
       if !s.valid_encoding?
-        #assert_raise(ArgumentError) { s.downcase }
-        #assert_nothing_raised { s.downcase }
+        assert_raise(ArgumentError) { s.downcase }
         next
       end
       t = s.downcase
@@ -1226,8 +1223,7 @@
   def test_str_each_line
     combination(STRINGS, STRINGS) {|s1, s2|
       if !s1.valid_encoding? || !s2.valid_encoding?
-        #assert_raise(ArgumentError) { s1.each_line(s2) {} }
-        #assert_nothing_raised { s1.each_line(s2) {} }
+        assert_raise(ArgumentError) { s1.each_line(s2) {} }
         next
       end
       if !s1.ascii_only? && !s2.ascii_only? && s1.encoding != s2.encoding
@@ -1294,13 +1290,19 @@
         assert(!s1.index(s2))
         assert(!s1.rindex(s2), "!#{encdump(s1)}.rindex(#{encdump(s2)})")
       end
-      if s1.valid_encoding? && s2.valid_encoding?
-        if t && s1.valid_encoding? && s2.valid_encoding?
-          assert_match(/#{Regexp.escape(s2)}/, s1)
-        else
-          assert_no_match(/#{Regexp.escape(s2)}/, s1)
-        end
+      if s2.empty?
+        assert_equal(true, t)
+        next
       end
+      if !s1.valid_encoding? || !s2.valid_encoding?
+        assert_equal(false, t, "#{encdump s1}.include?(#{encdump s2})")
+        next
+      end
+      if t && s1.valid_encoding? && s2.valid_encoding?
+        assert_match(/#{Regexp.escape(s2)}/, s1)
+      else
+        assert_no_match(/#{Regexp.escape(s2)}/, s1)
+      end
     }
   end
 
@@ -1311,13 +1313,28 @@
         next
       end
       t = s1.index(s2, pos)
+      if s2.empty?
+        if pos < 0 && pos+s1.length < 0
+          assert_equal(nil, t, "#{encdump s1}.index(#{encdump s2}, #{pos})");
+        elsif pos < 0
+          assert_equal(s1.length+pos, t, "#{encdump s1}.index(#{encdump s2}, #{pos})");
+        elsif s1.length < pos
+          assert_equal(nil, t, "#{encdump s1}.index(#{encdump s2}, #{pos})");
+        else
+          assert_equal(pos, t, "#{encdump s1}.index(#{encdump s2}, #{pos})");
+        end
+        next
+      end
       if !s1.valid_encoding? || !s2.valid_encoding?
+        assert_equal(nil, t, "#{encdump s1}.index(#{encdump s2}, #{pos})");
         next
       end
       if t
         re = /#{Regexp.escape(s2)}/
         assert(re.match(s1, pos))
         assert_equal($`.length, t, "#{encdump s1}.index(#{encdump s2}, #{pos})")
+      else
+        assert_no_match(/#{Regexp.escape(s2)}/, s1[pos..-1])
       end
     }
   end
@@ -1329,7 +1346,20 @@
         next
       end
       t = s1.rindex(s2, pos)
+      if s2.empty?
+        if pos < 0 && pos+s1.length < 0
+          assert_equal(nil, t, "#{encdump s1}.rindex(#{encdump s2}, #{pos})")
+        elsif pos < 0
+          assert_equal(s1.length+pos, t, "#{encdump s1}.rindex(#{encdump s2}, #{pos})")
+        elsif s1.length < pos
+          assert_equal(s1.length, t, "#{encdump s1}.rindex(#{encdump s2}, #{pos})")
+        else
+          assert_equal(pos, t, "#{encdump s1}.rindex(#{encdump s2}, #{pos})")
+        end
+        next
+      end
       if !s1.valid_encoding? || !s2.valid_encoding?
+        assert_equal(nil, t, "#{encdump s1}.rindex(#{encdump s2}, #{pos})")
         next
       end
       if t
@@ -1340,6 +1370,16 @@
         re = /\A(.{0,#{pos2}})#{Regexp.escape(s2)}/m
         assert(re.match(s1), "#{re.inspect}.match(#{encdump(s1)})")
         assert_equal($1.length, t, "#{encdump s1}.rindex(#{encdump s2}, #{pos})")
+      else
+        re = /#{Regexp.escape(s2)}/
+        n = re =~ s1
+        if n
+          if pos < 0
+            assert_operator(n, :>, s1.length+pos)
+          else
+            assert_operator(n, :>, pos)
+          end
+        end
       end
     }
   end
@@ -1411,9 +1451,11 @@
     STRINGS.each {|s|
       t = s.reverse
       assert_equal(s.bytesize, t.bytesize)
-      if s.valid_encoding?
-        assert_equal(s, t.reverse)
+      if !s.valid_encoding?
+        assert_operator(t.length, :<=, s.length)
+        next
       end
+      assert_equal(s, t.reverse)
     }
   end
 
@@ -1542,7 +1584,7 @@
   def test_str_squeeze
     combination(STRINGS, STRINGS) {|s1, s2|
       if !s1.valid_encoding? || !s2.valid_encoding?
-        #assert_raise(ArgumentError, "#{encdump s1}.squeeze(#{encdump s2})") { s1.squeeze(s2) }
+        assert_raise(ArgumentError, "#{encdump s1}.squeeze(#{encdump s2})") { s1.squeeze(s2) }
         next
       end
       if !s1.ascii_only? && !s2.ascii_only? && s1.encoding != s2.encoding
@@ -1565,8 +1607,7 @@
   def test_str_strip
     STRINGS.each {|s|
       if !s.valid_encoding?
-        #assert_raise(ArgumentError, "#{encdump s}.strip") { s.strip }
-        #assert_nothing_raised("#{encdump s}.strip") { s.strip }
+        assert_raise(ArgumentError, "#{encdump s}.strip") { s.strip }
         next
       end
       t = s.strip
@@ -1596,20 +1637,22 @@
 
   def test_str_swapcase
     STRINGS.each {|s|
-      begin
-        t1 = s.swapcase
-      rescue ArgumentError
-        assert(!s.valid_encoding?)
+      if !s.valid_encoding?
+        assert_raise(ArgumentError, "#{encdump s}.swapcase") { s.swapcase }
         next
       end
+      t1 = s.swapcase
       assert(t1.valid_encoding?) if s.valid_encoding?
       assert(t1.casecmp(s))
       t2 = s.dup
       t2.swapcase!
       assert_equal(t1, t2)
+      t3 = t1.swapcase
+      assert_equal(s, t3);
     }
   end
 
+
   def test_str_to_f
     STRINGS.each {|s|
       assert_nothing_raised { s.to_f }
@@ -1641,54 +1684,96 @@
       "a".force_encoding("ASCII-8BIT").tr("a".force_encoding("ASCII-8BIT"), "a".force_encoding("EUC-JP"))
     }
 
+    assert_equal("\xA1\xA1".force_encoding("EUC-JP"),
+      "a".force_encoding("ASCII-8BIT").tr("a".force_encoding("ASCII-8BIT"), "\xA1\xA1".force_encoding("EUC-JP")))
+
     combination(STRINGS, STRINGS, STRINGS) {|s1, s2, s3|
-      begin
-        #puts "#{encdump s1}.tr(#{encdump s2}, #{encdump s3})"
-        t = s1.tr(s2, s3)
-      rescue ArgumentError
-        e = $! unless /mbstring sequence/ =~ $!.message
+      desc = "#{encdump s1}.tr(#{encdump s2}, #{encdump s3})"
+      if s1.empty?
+        assert_equal(s1, s1.tr(s2, s3), desc)
+        next
       end
-      if e
-        encs = []
-        encs << s1.encoding if !s1.ascii_only?
-        encs << s2.encoding if !s2.ascii_only?
-        encs << s3.encoding if !s3.ascii_only?
-        encs.uniq!
-        #p e, encs
-        assert(1 < encs.length, "#{encdump s1}.tr(#{encdump s2}, #{encdump s3})")
+      if !str_enc_compatible?(s1, s2, s3)
+        assert_raise(ArgumentError, desc) { s1.tr(s2, s3) }
+        next
       end
+      if !s1.valid_encoding?
+        assert_raise(ArgumentError, desc) { s1.tr(s2, s3) }
+        next
+      end
+      if s2.empty?
+        assert_equal(s1, s1.tr(s2, s3), desc)
+        next
+      end
+      if !s2.valid_encoding? || !s3.valid_encoding?
+        assert_raise(ArgumentError, desc) { s1.tr(s2, s3) }
+        next
+      end
+      t = s1.tr(s2, s3)
+      if s3.empty?
+        assert_equal(0, t.length, desc)
+        next
+      end
+      assert_equal(s1.length, t.length, desc)
     }
   end
 
+  def str_enc_compatible?(*strs)
+    encs = []
+    strs.each {|s|
+      encs << s.encoding if !s.ascii_only?
+    }
+    encs.uniq!
+    encs.length <= 1
+  end
+
   def test_tr_s
+    assert_equal("\xA1\xA1".force_encoding("EUC-JP"),
+      "a".force_encoding("ASCII-8BIT").tr("a".force_encoding("ASCII-8BIT"), "\xA1\xA1".force_encoding("EUC-JP")))
+
     combination(STRINGS, STRINGS, STRINGS) {|s1, s2, s3|
-      begin
-        #puts "#{encdump s1}.tr_s(#{encdump s2}, #{encdump s3})"
-        t = s1.tr_s(s2, s3)
-      rescue ArgumentError
-        e = $! unless /mbstring sequence/ =~ $!.message
+      desc = "#{encdump s1}.tr_s(#{encdump s2}, #{encdump s3})"
+      if s1.empty?
+        assert_equal(s1, s1.tr_s(s2, s3), desc)
+        next
       end
-      if e
-        encs = []
-        encs << s1.encoding if !s1.ascii_only?
-        encs << s2.encoding if !s2.ascii_only?
-        encs << s3.encoding if !s3.ascii_only?
-        encs.uniq!
-        #p e, encs, 
-        assert(1 < encs.length, "#{encdump s1}.tr_s(#{encdump s2}, #{encdump s3})")
+      if !s1.valid_encoding?
+        assert_raise(ArgumentError, desc) { s1.tr_s(s2, s3) }
+        next
       end
+      if !str_enc_compatible?(s1, s2, s3)
+        assert_raise(ArgumentError, desc) { s1.tr(s2, s3) }
+        next
+      end
+      if s2.empty?
+        assert_equal(s1, s1.tr_s(s2, s3), desc)
+        next
+      end
+      if !s2.valid_encoding? || !s3.valid_encoding?
+        assert_raise(ArgumentError, desc) { s1.tr_s(s2, s3) }
+        next
+      end
+
+      t = nil
+      assert_nothing_raised(desc) { t = s1.tr_s(s2, s3) }
+
+      if s3.empty?
+        assert_equal(0, t.length, desc)
+        next
+      end
+      assert_operator(s1.length, :>=, t.length, desc)
     }
   end
 
   def test_str_upcase
     STRINGS.each {|s|
-      begin
-        t1 = s.upcase
-      rescue ArgumentError
-        assert(!s.valid_encoding?)
+      desc = "#{encdump s}.upcase"
+      if !s.valid_encoding?
+        assert_raise(ArgumentError, desc) { s.upcase }
         next
       end
-      assert(t1.valid_encoding?) if s.valid_encoding?
+      t1 = s.upcase
+      assert(t1.valid_encoding?)
       assert(t1.casecmp(s))
       t2 = s.dup
       t2.upcase!
@@ -1697,13 +1782,18 @@
   end
 
   def test_str_succ
-    s0 = e("\xA1\xA1")
-    s = s0.dup
-    n = 1000
-    n.times {
-      s.succ!
+    starts = [
+      e("\xA1\xA1"),
+      e("\xFE\xFE")
+    ]
+    starts.each {|s0|
+      s = s0.dup
+      n = 1000
+      n.times {|i|
+        assert_operator(s.length, :<=, s0.length + Math.log2(i+1) + 1, "#{encdump s0} succ! #{i} times => #{encdump s}")
+        s.succ!
+      }
     }
-    assert_operator(s.length, :<, s0.length + Math.log2(n) + 1)
   end
 
   def test_sub

--
ML: ruby-changes@q...
Info: http://www.atdot.net/~ko1/quickml