ruby-changes:17876
From: naruse <ko1@a...>
Date: Wed, 24 Nov 2010 01:44:01 +0900 (JST)
Subject: [ruby-changes:17876] Ruby:r29889 (trunk): * enc/utf_16_32.h: add UTF-16 and UTF-32 as a dummy encoding.
naruse 2010-11-24 01:42:47 +0900 (Wed, 24 Nov 2010) New Revision: 29889 http://svn.ruby-lang.org/cgi-bin/viewvc.cgi?view=rev&revision=29889 Log: * enc/utf_16_32.h: add UTF-16 and UTF-32 as a dummy encoding. * enc/trans/utf_16_32.trans: add a converter from UTF-16 to UTF-8. Added files: trunk/enc/utf_16_32.h Modified files: trunk/ChangeLog trunk/enc/trans/utf_16_32.trans trunk/test/ruby/test_transcode.rb Index: ChangeLog =================================================================== --- ChangeLog (revision 29888) +++ ChangeLog (revision 29889) @@ -1,3 +1,9 @@ +Wed Nov 24 01:40:23 2010 NARUSE, Yui <naruse@r...> + + * enc/utf_16_32.h: add UTF-16 and UTF-32 as a dummy encoding. + + * enc/trans/utf_16_32.trans: add a converter from UTF-16 to UTF-8. + Tue Nov 23 21:59:47 2010 Nobuyoshi Nakada <nobu@r...> * win32/win32.c (wlink, rb_w32_getppid): use typedef instead of Index: enc/trans/utf_16_32.trans =================================================================== --- enc/trans/utf_16_32.trans (revision 29888) +++ enc/trans/utf_16_32.trans (revision 29889) @@ -22,6 +22,10 @@ transcode_generate_node(ActionMap.parse(map), "from_UTF_32BE") map = {} + map["{00-ff}{00-ff}"] = :func_si + transcode_generate_node(ActionMap.parse(map), "from_UTF_16") + + map = {} map["{00-7f}"] = :func_so map["{c2-df}{80-bf}"] = :func_so map["e0{a0-bf}{80-bf}"] = :func_so @@ -259,6 +263,64 @@ return 4; } +static int +state_init(void *statep) +{ + unsigned char *sp = statep; + *sp = 0; + return 0; +} + +static VALUE +fun_si_from_utf_16(void *statep, const unsigned char *s, size_t l) +{ + #define BE 1 + #define LE 2 + unsigned char *sp = statep; + switch (*sp) { + case 0: + if (s[0] == 0xFE && s[1] == 0xFF) { + *sp = BE; + return ZERObt; + } + else if (s[0] == 0xFF && s[1] == 0xFE) { + *sp = LE; + return ZERObt; + } + break; + case BE: + if (0xD8 <= s[0] && s[0] <= 0xDB) { + return (VALUE)from_UTF_16BE_D8toDB_00toFF; + } + else { + return (VALUE)FUNso; + } + break; + case LE: + if (0xD8 <= s[1] && s[1] <= 0xDB) { + return (VALUE)from_UTF_16LE_00toFF_D8toDB; + } + else { + return (VALUE)FUNso; + } + break; + } + return (VALUE)INVALID; +} + +static ssize_t +fun_so_from_utf_16(void *statep, const unsigned char *s, size_t l, unsigned char *o, size_t osize) +{ + unsigned char *sp = statep; + switch (*sp) { + case BE: + return fun_so_from_utf_16be(statep, s, l, o, osize); + case LE: + return fun_so_from_utf_16le(statep, s, l, o, osize); + } + return 0; +} + static const rb_transcoder rb_from_UTF_16BE = { "UTF-16BE", "UTF-8", from_UTF_16BE, @@ -355,6 +417,18 @@ NULL, NULL, NULL, fun_so_to_utf_32le }; +static const rb_transcoder +rb_from_UTF_16 = { + "UTF-16", "UTF-8", from_UTF_16, + TRANSCODE_TABLE_INFO, + 2, /* input_unit_length */ + 4, /* max_input */ + 4, /* max_output */ + asciicompat_decoder, /* asciicompat_type */ + 1, state_init, NULL, /* state_size, state_init, state_fini */ + NULL, fun_si_from_utf_16, NULL, fun_so_from_utf_16 +}; + void Init_utf_16_32(void) { @@ -366,4 +440,5 @@ rb_register_transcoder(&rb_to_UTF_32BE); rb_register_transcoder(&rb_from_UTF_32LE); rb_register_transcoder(&rb_to_UTF_32LE); + rb_register_transcoder(&rb_from_UTF_16); } Index: enc/utf_16_32.h =================================================================== --- enc/utf_16_32.h (revision 0) +++ enc/utf_16_32.h (revision 29889) @@ -0,0 +1,4 @@ +#include "regenc.h" +/* dummy for unsupported, statefull encoding */ +ENC_DUMMY("UTF-16"); +ENC_DUMMY("UTF-32"); Index: test/ruby/test_transcode.rb =================================================================== --- test/ruby/test_transcode.rb (revision 29888) +++ test/ruby/test_transcode.rb (revision 29889) @@ -1019,6 +1019,12 @@ check_utf_16_both_ways("\u{F00FF}", "\xDB\x80\xDC\xFF") end + def test_utf_16_bom + expected = "\u{3042}\u{3044}\u{20bb7}" + assert_equal(expected, %w/fffe4230443042d8b7df/.pack("H*").encode("UTF-8","UTF-16")) + assert_equal(expected, %w/feff30423044d842dfb7/.pack("H*").encode("UTF-8","UTF-16")) + end + def check_utf_32_both_ways(utf8, raw) copy = raw.dup 0.step(copy.length-1, 4) do |i| -- ML: ruby-changes@q... Info: http://www.atdot.net/~ko1/quickml/