[前][次][番号順一覧][スレッド一覧]

ruby-changes:7351

From: akr <ko1@a...>
Date: Tue, 26 Aug 2008 21:55:33 +0900 (JST)
Subject: [ruby-changes:7351] Ruby:r18870 (trunk): * transcode.c (rb_econv_open): disable newline conversion for ASCII

akr	2008-08-26 21:55:14 +0900 (Tue, 26 Aug 2008)

  New Revision: 18870

  http://svn.ruby-lang.org/cgi-bin/viewvc.cgi?view=rev&revision=18870

  Log:
    * transcode.c (rb_econv_open): disable newline conversion for ASCII
      incompatible encodings.
      (str_transcode0): don't need disable newline conversion here.

  Modified files:
    trunk/ChangeLog
    trunk/include/ruby/encoding.h
    trunk/test/ruby/test_io_m17n.rb
    trunk/transcode.c

Index: include/ruby/encoding.h
===================================================================
--- include/ruby/encoding.h	(revision 18869)
+++ include/ruby/encoding.h	(revision 18870)
@@ -297,12 +297,18 @@
 #define ECONV_UNDEF_IGNORE                      0x0010
 #define ECONV_UNDEF_REPLACE                     0x0020
 
+/* effective only if output is ascii compatible */
 #define ECONV_UNIVERSAL_NEWLINE_DECODER         0x0100
+
+/* effective only if input is ascii compatible */
 #define ECONV_CRLF_NEWLINE_ENCODER              0x0200
 #define ECONV_CR_NEWLINE_ENCODER                0x0400
 
+/* end of flags for rb_econv_open */
+
 /* flags for rb_econv_convert */
 #define ECONV_PARTIAL_INPUT                   0x10000
 #define ECONV_OUTPUT_FOLLOWED_BY_INPUT        0x20000
+/* end of flags for rb_econv_convert */
 
 #endif /* RUBY_ENCODING_H */
Index: ChangeLog
===================================================================
--- ChangeLog	(revision 18869)
+++ ChangeLog	(revision 18870)
@@ -1,3 +1,9 @@
+Tue Aug 26 21:53:56 2008  Tanaka Akira  <akr@f...>
+
+	* transcode.c (rb_econv_open): disable newline conversion for ASCII
+	  incompatible encodings.
+	  (str_transcode0): don't need disable newline conversion here.
+
 Tue Aug 26 21:44:39 2008  Tanaka Akira  <akr@f...>
 
 	* transcode.c (rb_econv_binmode): binmode is effective only once.
Index: test/ruby/test_io_m17n.rb
===================================================================
--- test/ruby/test_io_m17n.rb	(revision 18869)
+++ test/ruby/test_io_m17n.rb	(revision 18870)
@@ -1154,8 +1154,18 @@
     }
   end
 
+  SYSTEM_NEWLINE = []
   def system_newline
-    File::BINARY == 0 ? "\n" : "\r\n"
+    return SYSTEM_NEWLINE.first if !SYSTEM_NEWLINE.empty?
+    with_tmpdir {
+      open("newline", "wt") {|f|
+        f.print "\n"
+      }
+      open("newline", "rb") {|f|
+        SYSTEM_NEWLINE << f.read
+      }
+    }
+    SYSTEM_NEWLINE.first
   end
 
   def test_textmode_encode_newline
@@ -1170,6 +1180,41 @@
     }
   end
 
+  def test_textmode_encode_newline_enc
+    with_tmpdir {
+      open("t.txt", "wt:euc-jp") {|f|
+        f.puts "abc\u3042"
+        f.puts "def\u3044"
+      }
+      content = File.read("t.txt", :mode=>"rb:ascii-8bit")
+      nl = system_newline
+      assert_equal("abc\xA4\xA2#{nl}def\xA4\xA4#{nl}", content)
+    }
+  end
+
+  def test_textmode_read_ascii_incompat_internal
+    with_tmpdir {
+      generate_file("t.utf8.crlf", "a\r\nb\r\n")
+      open("t.utf8.crlf", "rt:utf-8:utf-16be") {|f|
+        content = f.read
+        # textmode doesn't affect for ascii incompatible internal encoding.
+        assert_equal("\0a\0\r\0\n\0b\0\r\0\n".force_encoding("UTF-16BE"),
+                     content)
+      }
+    }
+  end
+
+  def test_textmode_write_ascii_incompat_internal
+    with_tmpdir {
+      open("t.utf8.lf", "wt:utf-8:utf-16be") {|f|
+        f.print "\0a\0\n\0b\0\n".force_encoding("UTF-16BE")
+      }
+      content = File.read("t.utf8.lf", :mode=>"rb:ascii-8bit")
+      # textmode doesn't affect for ascii incompatible internal encoding.
+      assert_equal("a\nb\n", content)
+    }
+  end
+
   def test_binary
     with_tmpdir {
       src = "a\nb\rc\r\nd\n"
@@ -1180,7 +1225,7 @@
       open("t.txt", "r", :binmode=>true) {|f|
         assert_equal(src, f.read)
       }
-      if File::BINARY == 0
+      if system_newline == "\n"
         open("t.txt", "r") {|f|
           assert_equal(src, f.read)
         }
Index: transcode.c
===================================================================
--- transcode.c	(revision 18869)
+++ transcode.c	(revision 18870)
@@ -748,7 +748,27 @@
     int num_additional;
     static rb_econv_t *ec;
     int flags = opts ? opts->flags : 0;
+    int universal_newline_decoder_added = 0;
 
+    rb_encoding *senc, *denc;
+    int sidx, didx;
+
+    senc = NULL;
+    if (*from) {
+        sidx = rb_enc_find_index(from);
+        if (0 <= sidx) {
+            senc = rb_enc_from_index(sidx);
+        }
+    }
+
+    denc = NULL;
+    if (*to) {
+        didx = rb_enc_find_index(to);
+        if (0 <= didx) {
+            denc = rb_enc_from_index(didx);
+        }
+    }
+
     if (*from == '\0' && *to == '\0') {
         num_trans = 0;
         entries = ALLOC_N(transcoder_entry_t *, 1+2);
@@ -763,7 +783,8 @@
     }
 
     num_additional = 0;
-    if (flags & (ECONV_CRLF_NEWLINE_ENCODER|ECONV_CR_NEWLINE_ENCODER)) {
+    if ((!*from || (senc && rb_enc_asciicompat(senc))) &&
+        (flags & (ECONV_CRLF_NEWLINE_ENCODER|ECONV_CR_NEWLINE_ENCODER))) {
         const char *name = (flags & ECONV_CRLF_NEWLINE_ENCODER) ? "crlf_newline" : "cr_newline";
         transcoder_entry_t *e = get_transcoder_entry("", name);
         if (flags & ECONV_CRLF_NEWLINE_ENCODER)
@@ -779,8 +800,12 @@
         num_trans++;
         num_additional++;
     }
+    else {
+        flags &= ~(ECONV_CRLF_NEWLINE_ENCODER|ECONV_CR_NEWLINE_ENCODER);
+    }
 
-    if (flags & ECONV_UNIVERSAL_NEWLINE_DECODER) {
+    if ((!*to || (denc && rb_enc_asciicompat(denc))) &&
+        (flags & ECONV_UNIVERSAL_NEWLINE_DECODER)) {
         transcoder_entry_t *e = get_transcoder_entry("universal_newline", "");
         if (!e) {
             xfree(entries);
@@ -788,7 +813,11 @@
         }
         entries[num_trans++] = e;
         num_additional++;
+        universal_newline_decoder_added = 1;
     }
+    else {
+        flags &= ~ECONV_UNIVERSAL_NEWLINE_DECODER;
+    }
 
     ec = rb_econv_open_by_transcoder_entries(num_trans, entries);
     xfree(entries);
@@ -799,6 +828,7 @@
         ec->opts.flags = 0;
     else
         ec->opts = *opts;
+    ec->opts.flags = flags;
     ec->source_encoding_name = from;
     ec->destination_encoding_name = to;
 
@@ -806,7 +836,7 @@
         ec->last_tc = NULL;
         ec->last_trans_index = -1;
     }
-    else if (flags & ECONV_UNIVERSAL_NEWLINE_DECODER) {
+    else if (universal_newline_decoder_added) {
         ec->last_tc = ec->elems[ec->num_trans-2].tc;
         ec->last_trans_index = ec->num_trans-2;
     }
@@ -1886,17 +1916,6 @@
     else
         rb_econv_opts(Qnil, &ecopts);
 
-    /* disable newline conversion for ascii incompatible encoding.
-     * xxx: convert newline in ascii-compatible encoding?
-     * ex. UTF-16BE -> UTF-8 -> newline conversion -> UTF-8 -> UTF-16BE.
-     */
-    if (!from_enc || !rb_enc_asciicompat(from_enc)) {
-        ecopts.flags &= ~(ECONV_CRLF_NEWLINE_ENCODER|ECONV_CR_NEWLINE_ENCODER);
-    }
-    if (!to_enc || !rb_enc_asciicompat(to_enc)) {
-        ecopts.flags &= ~ECONV_UNIVERSAL_NEWLINE_DECODER;
-    }
-
     if ((ecopts.flags & (ECONV_UNIVERSAL_NEWLINE_DECODER|
                          ECONV_CRLF_NEWLINE_ENCODER|
                          ECONV_CR_NEWLINE_ENCODER)) == 0) {

--
ML: ruby-changes@q...
Info: http://www.atdot.net/~ko1/quickml/

[前][次][番号順一覧][スレッド一覧]