[前][次][番号順一覧][スレッド一覧]

ruby-changes:17876

From: naruse <ko1@a...>
Date: Wed, 24 Nov 2010 01:44:01 +0900 (JST)
Subject: [ruby-changes:17876] Ruby:r29889 (trunk): * enc/utf_16_32.h: add UTF-16 and UTF-32 as a dummy encoding.

naruse	2010-11-24 01:42:47 +0900 (Wed, 24 Nov 2010)

  New Revision: 29889

  http://svn.ruby-lang.org/cgi-bin/viewvc.cgi?view=rev&revision=29889

  Log:
    * enc/utf_16_32.h: add UTF-16 and UTF-32 as a dummy encoding.
    
    * enc/trans/utf_16_32.trans: add a converter from UTF-16 to UTF-8.

  Added files:
    trunk/enc/utf_16_32.h
  Modified files:
    trunk/ChangeLog
    trunk/enc/trans/utf_16_32.trans
    trunk/test/ruby/test_transcode.rb

Index: ChangeLog
===================================================================
--- ChangeLog	(revision 29888)
+++ ChangeLog	(revision 29889)
@@ -1,3 +1,9 @@
+Wed Nov 24 01:40:23 2010  NARUSE, Yui  <naruse@r...>
+
+	* enc/utf_16_32.h: add UTF-16 and UTF-32 as a dummy encoding.
+
+	* enc/trans/utf_16_32.trans: add a converter from UTF-16 to UTF-8.
+
 Tue Nov 23 21:59:47 2010  Nobuyoshi Nakada  <nobu@r...>
 
 	* win32/win32.c (wlink, rb_w32_getppid): use typedef instead of
Index: enc/trans/utf_16_32.trans
===================================================================
--- enc/trans/utf_16_32.trans	(revision 29888)
+++ enc/trans/utf_16_32.trans	(revision 29889)
@@ -22,6 +22,10 @@
   transcode_generate_node(ActionMap.parse(map), "from_UTF_32BE")
 
   map = {}
+  map["{00-ff}{00-ff}"] = :func_si
+  transcode_generate_node(ActionMap.parse(map), "from_UTF_16")
+
+  map = {}
   map["{00-7f}"] = :func_so
   map["{c2-df}{80-bf}"] = :func_so
   map["e0{a0-bf}{80-bf}"] = :func_so
@@ -259,6 +263,64 @@
     return 4;
 }
 
+static int
+state_init(void *statep)
+{
+    unsigned char *sp = statep;
+    *sp = 0;
+    return 0;
+}
+
+static VALUE
+fun_si_from_utf_16(void *statep, const unsigned char *s, size_t l)
+{
+    #define BE 1
+    #define LE 2
+    unsigned char *sp = statep;
+    switch (*sp) {
+    case 0:
+        if (s[0] == 0xFE && s[1] == 0xFF) {
+            *sp = BE;
+            return ZERObt;
+        }
+        else if (s[0] == 0xFF && s[1] == 0xFE) {
+            *sp = LE;
+            return ZERObt;
+        }
+        break;
+    case BE:
+        if (0xD8 <= s[0] && s[0] <= 0xDB) {
+            return (VALUE)from_UTF_16BE_D8toDB_00toFF;
+        }
+        else {
+            return (VALUE)FUNso;
+        }
+        break;
+    case LE:
+        if (0xD8 <= s[1] && s[1] <= 0xDB) {
+            return (VALUE)from_UTF_16LE_00toFF_D8toDB;
+        }
+        else {
+            return (VALUE)FUNso;
+        }
+        break;
+    }
+    return (VALUE)INVALID;
+}
+
+static ssize_t
+fun_so_from_utf_16(void *statep, const unsigned char *s, size_t l, unsigned char *o, size_t osize)
+{
+    unsigned char *sp = statep;
+    switch (*sp) {
+    case BE:
+        return fun_so_from_utf_16be(statep, s, l, o, osize);
+    case LE:
+        return fun_so_from_utf_16le(statep, s, l, o, osize);
+    }
+    return 0;
+}
+
 static const rb_transcoder
 rb_from_UTF_16BE = {
     "UTF-16BE", "UTF-8", from_UTF_16BE,
@@ -355,6 +417,18 @@
     NULL, NULL, NULL, fun_so_to_utf_32le
 };
 
+static const rb_transcoder
+rb_from_UTF_16 = {
+    "UTF-16", "UTF-8", from_UTF_16,
+    TRANSCODE_TABLE_INFO,
+    2, /* input_unit_length */
+    4, /* max_input */
+    4, /* max_output */
+    asciicompat_decoder, /* asciicompat_type */
+    1, state_init, NULL, /* state_size, state_init, state_fini */
+    NULL, fun_si_from_utf_16, NULL, fun_so_from_utf_16
+};
+
 void
 Init_utf_16_32(void)
 {
@@ -366,4 +440,5 @@
     rb_register_transcoder(&rb_to_UTF_32BE);
     rb_register_transcoder(&rb_from_UTF_32LE);
     rb_register_transcoder(&rb_to_UTF_32LE);
+    rb_register_transcoder(&rb_from_UTF_16);
 }
Index: enc/utf_16_32.h
===================================================================
--- enc/utf_16_32.h	(revision 0)
+++ enc/utf_16_32.h	(revision 29889)
@@ -0,0 +1,4 @@
+#include "regenc.h"
+/* dummy for unsupported, statefull encoding */
+ENC_DUMMY("UTF-16");
+ENC_DUMMY("UTF-32");
Index: test/ruby/test_transcode.rb
===================================================================
--- test/ruby/test_transcode.rb	(revision 29888)
+++ test/ruby/test_transcode.rb	(revision 29889)
@@ -1019,6 +1019,12 @@
     check_utf_16_both_ways("\u{F00FF}", "\xDB\x80\xDC\xFF")
   end
 
+  def test_utf_16_bom
+    expected = "\u{3042}\u{3044}\u{20bb7}"
+    assert_equal(expected, %w/fffe4230443042d8b7df/.pack("H*").encode("UTF-8","UTF-16"))
+    assert_equal(expected, %w/feff30423044d842dfb7/.pack("H*").encode("UTF-8","UTF-16"))
+  end
+
   def check_utf_32_both_ways(utf8, raw)
     copy = raw.dup
     0.step(copy.length-1, 4) do |i|

--
ML: ruby-changes@q...
Info: http://www.atdot.net/~ko1/quickml/

[前][次][番号順一覧][スレッド一覧]