ruby-changes:32527
From: nobu <ko1@a...>
Date: Wed, 15 Jan 2014 14:04:40 +0900 (JST)
Subject: [ruby-changes:32527] nobu:r44606 (trunk): string.c: respect BOM
nobu 2014-01-15 14:04:36 +0900 (Wed, 15 Jan 2014) New Revision: 44606 http://svn.ruby-lang.org/cgi-bin/viewvc.cgi?view=rev&revision=44606 Log: string.c: respect BOM * string.c (get_encoding): respect BOM on pseudo encodings. [ruby-dev:47895] [Bug #9415] Modified files: trunk/ChangeLog trunk/string.c trunk/test/ruby/test_m17n.rb Index: ChangeLog =================================================================== --- ChangeLog (revision 44605) +++ ChangeLog (revision 44606) @@ -1,3 +1,8 @@ https://github.com/ruby/ruby/blob/trunk/ChangeLog#L1 +Wed Jan 15 14:04:33 2014 Nobuyoshi Nakada <nobu@r...> + + * string.c (get_encoding): respect BOM on pseudo encodings. + [ruby-dev:47895] [Bug #9415] + Wed Jan 15 14:03:47 2014 Nobuyoshi Nakada <nobu@r...> * string.c (get_actual_encoding): get actual encoding according to Index: string.c =================================================================== --- string.c (revision 44605) +++ string.c (revision 44606) @@ -121,7 +121,7 @@ VALUE rb_cSymbol; https://github.com/ruby/ruby/blob/trunk/string.c#L121 #define STR_HEAP_PTR(str) (RSTRING(str)->as.heap.ptr) #define STR_HEAP_SIZE(str) (RSTRING(str)->as.heap.aux.capa + TERM_LEN(str)) -#define STR_ENC_GET(str) rb_enc_from_index(ENCODING_GET(str)) +#define STR_ENC_GET(str) get_encoding(str) rb_encoding *rb_enc_get_from_index(int index); @@ -155,6 +155,12 @@ get_actual_encoding(const int encidx, VA https://github.com/ruby/ruby/blob/trunk/string.c#L155 return rb_enc_from_index(encidx); } +static rb_encoding * +get_encoding(VALUE str) +{ + return get_actual_encoding(ENCODING_GET(str), str); +} + static int fstring_cmp(VALUE a, VALUE b); static st_table* frozen_strings; Index: test/ruby/test_m17n.rb =================================================================== --- test/ruby/test_m17n.rb (revision 44605) +++ test/ruby/test_m17n.rb (revision 44606) @@ -229,6 +229,7 @@ class TestM17N < Test::Unit::TestCase https://github.com/ruby/ruby/blob/trunk/test/ruby/test_m17n.rb#L229 STR_WITHOUT_BOM = "\u3042".freeze STR_WITH_BOM = "\uFEFF\u3042".freeze bug8940 = '[ruby-core:59757] [Bug #8940]' + bug9415 = '[ruby-dev:47895] [Bug #9415]' %w/UTF-16 UTF-32/.each do |enc| %w/BE LE/.each do |endian| bom = "\uFEFF".encode("#{enc}#{endian}").force_encoding(enc) @@ -242,6 +243,14 @@ class TestM17N < Test::Unit::TestCase https://github.com/ruby/ruby/blob/trunk/test/ruby/test_m17n.rb#L243 assert_normal_exit("#{bom.b.dump}.force_encoding('#{enc}').inspect", bug8940) end + define_method("test_utf_16_32_codepoints(#{enc}#{endian})") do + assert_equal([0xFEFF], bom.codepoints, bug9415) + end + + define_method("test_utf_16_32_ord(#{enc}#{endian})") do + assert_equal(0xFEFF, bom.ord, bug9415) + end + define_method("test_utf_16_32_inspect(#{enc}#{endian}-BOM)") do s = STR_WITH_BOM.encode(enc + endian) # When a UTF-16/32 string has a BOM, -- ML: ruby-changes@q... Info: http://www.atdot.net/~ko1/quickml/