ruby-changes:2641
From: ko1@a...
Date: 8 Dec 2007 13:31:46 +0900
Subject: [ruby-changes:2641] akr - Ruby:r14132 (trunk): add test for UTF-8 bit pattern.
akr 2007-12-08 13:31:26 +0900 (Sat, 08 Dec 2007)
New Revision: 14132
Modified files:
trunk/test/ruby/test_m17n.rb
Log:
add test for UTF-8 bit pattern.
http://svn.ruby-lang.org/cgi-bin/viewvc.cgi/trunk/test/ruby/test_m17n.rb?r1=14132&r2=14131
Index: test/ruby/test_m17n.rb
===================================================================
--- test/ruby/test_m17n.rb (revision 14131)
+++ test/ruby/test_m17n.rb (revision 14132)
@@ -68,6 +68,65 @@
assert_equal('"\374"', u("\xfc").inspect)
end
+ def test_validate_redundant_utf8
+ bits_0x10ffff = "11110100 10001111 10111111 10111111"
+ [
+ "0xxxxxxx",
+ "110XXXXx 10xxxxxx",
+ "1110XXXX 10Xxxxxx 10xxxxxx",
+ "11110XXX 10XXxxxx 10xxxxxx 10xxxxxx",
+ "111110XX 10XXXxxx 10xxxxxx 10xxxxxx 10xxxxxx",
+ "1111110X 10XXXXxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx",
+ "11111110 10XXXXXx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx",
+ "11111111 10XXXXXX 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx",
+ ].each {|pat0|
+ [
+ pat0.gsub(/x/, '1'),
+ pat0.gsub(/x/, '0')
+ ].each {|pat1|
+ [
+ pat1.sub(/X([^X]*)\z/, '1\1').gsub(/X/, "0"),
+ pat1.gsub(/X/, "1"),
+ ].each {|pat2|
+ s = [pat2.gsub(/ /, "")].pack("B*").force_encoding("utf-8")
+ if pat2 <= bits_0x10ffff
+ assert(s.valid_encoding?, "#{pat2}")
+ else
+ assert(!s.valid_encoding?, "#{pat2}")
+ end
+ }
+ if / / =~ pat0
+ pat3 = pat1.gsub(/X/, "0")
+ s = [pat3.gsub(/ /, "")].pack("B*").force_encoding("utf-8")
+ assert(!s.valid_encoding?, "#{pat3}")
+ end
+ }
+ }
+ end
+
+ def test_validate_surrogate
+ # 1110XXXX 10Xxxxxx 10xxxxxx : 3 bytes UTF-8
+ pats = [
+ "11101101 10011111 10111111", # just before surrogate high
+ "11101101 1010xxxx 10xxxxxx", # surrogate high
+ "11101101 1011xxxx 10xxxxxx", # surrogate low
+ "11101110 10000000 10000000", # just after surrogate low
+ ]
+ pats.values_at(1,2).each {|pat0|
+ [
+ pat0.gsub(/x/, '0'),
+ pat0.gsub(/x/, '1'),
+ ].each {|pat1|
+ s = [pat1.gsub(/ /, "")].pack("B*").force_encoding("utf-8")
+ assert(!s.valid_encoding?, "#{pat1}")
+ }
+ }
+ pats.values_at(0,3).each {|pat|
+ s = [pat.gsub(/ /, "")].pack("B*").force_encoding("utf-8")
+ assert(s.valid_encoding?, "#{pat}")
+ }
+ end
+
def test_regexp_too_short_multibyte_character
assert_raise(SyntaxError) { eval('/\xfe/e') }
assert_raise(SyntaxError) { eval('/\x8e/e') }
--
ML: ruby-changes@q...
Info: http://www.atdot.net/~ko1/quickml