ruby-changes:4296
From: ko1@a...
Date: Sun, 16 Mar 2008 18:10:14 +0900 (JST)
Subject: [ruby-changes:4296] duerst - Ruby:r15786 (trunk): Sun Mar 16 18:07:07 2008 Martin Duerst <duerst@i...>
duerst 2008-03-16 18:09:53 +0900 (Sun, 16 Mar 2008)
New Revision: 15786
Modified files:
trunk/ChangeLog
trunk/enc/trans/utf_16_32.c
trunk/test/ruby/test_transcode.rb
Log:
Sun Mar 16 18:07:07 2008 Martin Duerst <duerst@i...>
* enc/trans/utf_16_32.c: bug fix (some invalid UTF-8 sequences
were legal)
* test/ruby/test_transcode.rb: test for above bug
http://svn.ruby-lang.org/cgi-bin/viewvc.cgi/trunk/enc/trans/utf_16_32.c?r1=15786&r2=15785&diff_format=u
http://svn.ruby-lang.org/cgi-bin/viewvc.cgi/trunk/test/ruby/test_transcode.rb?r1=15786&r2=15785&diff_format=u
http://svn.ruby-lang.org/cgi-bin/viewvc.cgi/trunk/ChangeLog?r1=15786&r2=15785&diff_format=u
Index: ChangeLog
===================================================================
--- ChangeLog (revision 15785)
+++ ChangeLog (revision 15786)
@@ -1,3 +1,10 @@
+Sun Mar 16 18:07:07 2008 Martin Duerst <duerst@i...>
+
+ * enc/trans/utf_16_32.c: bug fix (some invalid UTF-8 sequences
+ were legal)
+
+ * test/ruby/test_transcode.rb: test for above bug
+
Sun Mar 16 17:28:07 2008 NARUSE, Yui <naruse@r...>
* common.mk (LIBRUBY_SO): add dependency to $(BUILTIN_ENCOBJS).
Index: enc/trans/utf_16_32.c
===================================================================
--- enc/trans/utf_16_32.c (revision 15785)
+++ enc/trans/utf_16_32.c (revision 15786)
@@ -211,7 +211,7 @@
static const struct byte_lookup* const
from_UTF_16BE_00_infos[1] = {
/* used by from_UTF_16BE_00 */
- /* used by to_UTF_32BE_82 */
+ /* used by to_UTF_32BE_C2 */
FUNso,
};
static const BYTE_LOOKUP
@@ -324,8 +324,8 @@
};
static const unsigned char
-to_UTF_32BE_82_offsets[64] = {
- /* used by to_UTF_32BE_82 */
+to_UTF_32BE_C2_offsets[64] = {
+ /* used by to_UTF_32BE_C2 */
/* used by to_UTF_32BE_E1 */
/* used by to_UTF_32BE_F1 */
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
@@ -334,7 +334,7 @@
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
};
static const BYTE_LOOKUP
-to_UTF_32BE_82 = {
+to_UTF_32BE_C2 = {
/* used as to_UTF_32BE */
/* used as to_UTF_16BE */
/* used as to_UTF_32BE_E0 */
@@ -363,7 +363,7 @@
/* used as to_UTF_32LE_F1_80 */
/* used as to_UTF_16LE_F4_80 */
/* used as to_UTF_32LE_F4_80 */
- to_UTF_32BE_82_offsets,
+ to_UTF_32BE_C2_offsets,
from_UTF_16BE_00_infos
};
@@ -378,7 +378,7 @@
};
static const struct byte_lookup* const
to_UTF_32BE_E0_infos[2] = {
- INVALID, &to_UTF_32BE_82,
+ INVALID, &to_UTF_32BE_C2,
};
static const BYTE_LOOKUP
to_UTF_32BE_E0 = {
@@ -392,7 +392,7 @@
static const struct byte_lookup* const
to_UTF_32BE_E1_infos[1] = {
- &to_UTF_32BE_82,
+ &to_UTF_32BE_C2,
};
static const BYTE_LOOKUP
to_UTF_32BE_E1 = {
@@ -412,13 +412,13 @@
/* used as to_UTF_32LE_F1 */
/* used as to_UTF_16LE_F4 */
/* used as to_UTF_32LE_F4 */
- to_UTF_32BE_82_offsets,
+ to_UTF_32BE_C2_offsets,
to_UTF_32BE_E1_infos
};
static const struct byte_lookup* const
to_UTF_32BE_ED_infos[2] = {
- &to_UTF_32BE_82, INVALID,
+ &to_UTF_32BE_C2, INVALID,
};
static const BYTE_LOOKUP
to_UTF_32BE_ED = {
@@ -463,7 +463,7 @@
/* used as to_UTF_16BE */
/* used as to_UTF_16LE */
/* used as to_UTF_32LE */
- to_UTF_32BE_82_offsets,
+ to_UTF_32BE_C2_offsets,
to_UTF_32BE_F1_infos
};
@@ -491,18 +491,18 @@
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
- 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
- 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
- 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
- 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 5, 4, 4,
6, 7, 7, 7, 8, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
};
static const struct byte_lookup* const
to_UTF_32BE_infos[9] = {
- FUNso, INVALID, &to_UTF_32BE_82, &to_UTF_32BE_E0,
+ FUNso, INVALID, &to_UTF_32BE_C2, &to_UTF_32BE_E0,
&to_UTF_32BE_E1, &to_UTF_32BE_ED, &to_UTF_32BE_F0, &to_UTF_32BE_F1,
&to_UTF_32BE_F4,
};
Index: test/ruby/test_transcode.rb
===================================================================
--- test/ruby/test_transcode.rb (revision 15785)
+++ test/ruby/test_transcode.rb (revision 15786)
@@ -3,7 +3,7 @@
require 'test/unit'
class TestTranscode < Test::Unit::TestCase
- def setup # trick to create all the necessary encodings
+ def setup_really_needed? # trick to create all the necessary encodings
all_encodings = [ 'ISO-8859-1', 'ISO-8859-2',
'ISO-8859-3', 'ISO-8859-4',
'ISO-8859-5', 'ISO-8859-6',
@@ -248,5 +248,9 @@
"\x41\xC2\x3E\x42".encode('UTF-16BE', 'UTF-8', invalid: :ignore))
assert_equal("\x00\x41\x00\xF1\x00\x42".force_encoding('UTF-16BE'),
"\x41\xC2\xC3\xB1\x42".encode('UTF-16BE', 'UTF-8', invalid: :ignore))
+ assert_equal("\x00\x42".force_encoding('UTF-16BE'),
+ "\xF0\x80\x80\x42".encode('UTF-16BE', 'UTF-8', invalid: :ignore))
+ assert_equal(''.force_encoding('UTF-16BE'),
+ "\x82\xAB".encode('UTF-16BE', 'UTF-8', invalid: :ignore))
end
end
--
ML: ruby-changes@q...
Info: http://www.atdot.net/~ko1/quickml/