ruby-changes:2641
From: ko1@a...
Date: 8 Dec 2007 13:31:46 +0900
Subject: [ruby-changes:2641] akr - Ruby:r14132 (trunk): add test for UTF-8 bit pattern.
akr 2007-12-08 13:31:26 +0900 (Sat, 08 Dec 2007) New Revision: 14132 Modified files: trunk/test/ruby/test_m17n.rb Log: add test for UTF-8 bit pattern. http://svn.ruby-lang.org/cgi-bin/viewvc.cgi/trunk/test/ruby/test_m17n.rb?r1=14132&r2=14131 Index: test/ruby/test_m17n.rb =================================================================== --- test/ruby/test_m17n.rb (revision 14131) +++ test/ruby/test_m17n.rb (revision 14132) @@ -68,6 +68,65 @@ assert_equal('"\374"', u("\xfc").inspect) end + def test_validate_redundant_utf8 + bits_0x10ffff = "11110100 10001111 10111111 10111111" + [ + "0xxxxxxx", + "110XXXXx 10xxxxxx", + "1110XXXX 10Xxxxxx 10xxxxxx", + "11110XXX 10XXxxxx 10xxxxxx 10xxxxxx", + "111110XX 10XXXxxx 10xxxxxx 10xxxxxx 10xxxxxx", + "1111110X 10XXXXxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx", + "11111110 10XXXXXx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx", + "11111111 10XXXXXX 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx", + ].each {|pat0| + [ + pat0.gsub(/x/, '1'), + pat0.gsub(/x/, '0') + ].each {|pat1| + [ + pat1.sub(/X([^X]*)\z/, '1\1').gsub(/X/, "0"), + pat1.gsub(/X/, "1"), + ].each {|pat2| + s = [pat2.gsub(/ /, "")].pack("B*").force_encoding("utf-8") + if pat2 <= bits_0x10ffff + assert(s.valid_encoding?, "#{pat2}") + else + assert(!s.valid_encoding?, "#{pat2}") + end + } + if / / =~ pat0 + pat3 = pat1.gsub(/X/, "0") + s = [pat3.gsub(/ /, "")].pack("B*").force_encoding("utf-8") + assert(!s.valid_encoding?, "#{pat3}") + end + } + } + end + + def test_validate_surrogate + # 1110XXXX 10Xxxxxx 10xxxxxx : 3 bytes UTF-8 + pats = [ + "11101101 10011111 10111111", # just before surrogate high + "11101101 1010xxxx 10xxxxxx", # surrogate high + "11101101 1011xxxx 10xxxxxx", # surrogate low + "11101110 10000000 10000000", # just after surrogate low + ] + pats.values_at(1,2).each {|pat0| + [ + pat0.gsub(/x/, '0'), + pat0.gsub(/x/, '1'), + ].each {|pat1| + s = [pat1.gsub(/ /, "")].pack("B*").force_encoding("utf-8") + assert(!s.valid_encoding?, "#{pat1}") + } + } + pats.values_at(0,3).each {|pat| + s = [pat.gsub(/ /, "")].pack("B*").force_encoding("utf-8") + assert(s.valid_encoding?, "#{pat}") + } + end + def test_regexp_too_short_multibyte_character assert_raise(SyntaxError) { eval('/\xfe/e') } assert_raise(SyntaxError) { eval('/\x8e/e') } -- ML: ruby-changes@q... Info: http://www.atdot.net/~ko1/quickml