[前][次][番号順一覧][スレッド一覧]

ruby-changes:7262

From: akr <ko1@a...>
Date: Sat, 23 Aug 2008 01:44:24 +0900 (JST)
Subject: [ruby-changes:7262] Ruby:r18780 (trunk): * include/ruby/io.h (FMODE_TEXTMODE): defined.

akr	2008-08-23 01:44:00 +0900 (Sat, 23 Aug 2008)

  New Revision: 18780

  http://svn.ruby-lang.org/cgi-bin/viewvc.cgi?view=rev&revision=18780

  Log:
    * include/ruby/io.h (FMODE_TEXTMODE): defined.
    
    * include/ruby/encoding.h (rb_econv_t): new field: flags.
      (rb_econv_binmode): declared.
    
    * io.c (io_unread): text mode hack removed.
      (NEED_NEWLINE_DECODER): defined.
      (NEED_NEWLINE_ENCODER): defined.
      (NEED_READCONV): defined.
      (NEED_WRITECONV): defined.
      (TEXTMODE_NEWLINE_ENCODER): defined for windows.
      (make_writeconv): setup converter with TEXTMODE_NEWLINE_ENCODER for
      text mode.
      (io_fwrite): use NEED_WRITECONV.  character code conversion is
      disabled if fptr->writeconv_stateless is nil.
      (make_readconv): setup converter with
      ECONV_UNIVERSAL_NEWLINE_DECODER for text mode.
      (read_all): use NEED_READCONV.
      (appendline): use NEED_READCONV.
      (rb_io_getline_1): use NEED_READCONV.
      (io_getc): use NEED_READCONV.
      (rb_io_ungetc): use NEED_READCONV.
      (rb_io_binmode): OS-level text mode test removed.  call
      rb_econv_binmode.
      (rb_io_binmode_m): call rb_io_binmode_m with write_io as well.
      (rb_io_flags_mode): return mode string including "t".
      (rb_io_mode_flags): detect "t" for text mode.
      (rb_sysopen): always specify O_BINARY.
    
    * transcode.c (rb_econv_open_by_transcoder_entries): initialize flags.
      (rb_econv_open): if source and destination encoding is
      both empty string, open newline converter.  last_tc will be NULL in
      this case.
      (rb_econv_encoding_to_insert_output): last_tc may be NULL now.
      (rb_econv_string): ditto.
      (output_replacement_character): ditto.
      (transcode_loop): ditto.
      (econv_init): ditto.
      (econv_inspect): ditto.
      (rb_econv_binmode): new function.

  Modified files:
    trunk/ChangeLog
    trunk/enc/trans/newline.trans
    trunk/include/ruby/encoding.h
    trunk/include/ruby/io.h
    trunk/io.c
    trunk/test/ruby/test_econv.rb
    trunk/test/ruby/test_io_m17n.rb
    trunk/transcode.c

Index: include/ruby/io.h
===================================================================
--- include/ruby/io.h	(revision 18779)
+++ include/ruby/io.h	(revision 18780)
@@ -85,6 +85,7 @@
 #define FMODE_WSPLIT                    0x0200
 #define FMODE_WSPLIT_INITIALIZED        0x0400
 #define FMODE_TRUNC                     0x0800
+#define FMODE_TEXTMODE                  0x1000
 /* #define FMODE_PREP                  0x10000 */
 
 #define GetOpenFile(obj,fp) rb_io_check_closed((fp) = RFILE(rb_io_taint_check(obj))->fptr)
Index: include/ruby/encoding.h
===================================================================
--- include/ruby/encoding.h	(revision 18779)
+++ include/ruby/encoding.h	(revision 18780)
@@ -217,6 +217,7 @@
 } rb_econv_elem_t;
 
 typedef struct {
+    int flags;
     const char *source_encoding_name;
     const char *destination_encoding_name;
 
@@ -273,6 +274,8 @@
 
 VALUE rb_econv_string(rb_econv_t *ec, VALUE src, long off, long len, VALUE dst, int flags);
 
+void rb_econv_binmode(rb_econv_t *ec);
+
 /* flags for rb_econv_open */
 #define ECONV_UNIVERSAL_NEWLINE_DECODER       0x100
 #define ECONV_CRLF_NEWLINE_ENCODER            0x200
Index: ChangeLog
===================================================================
--- ChangeLog	(revision 18779)
+++ ChangeLog	(revision 18780)
@@ -1,3 +1,46 @@
+Sat Aug 23 01:42:22 2008  Tanaka Akira  <akr@f...>
+
+	* include/ruby/io.h (FMODE_TEXTMODE): defined.
+
+	* include/ruby/encoding.h (rb_econv_t): new field: flags.
+	  (rb_econv_binmode): declared.
+
+	* io.c (io_unread): text mode hack removed.
+	  (NEED_NEWLINE_DECODER): defined.
+	  (NEED_NEWLINE_ENCODER): defined.
+	  (NEED_READCONV): defined.
+	  (NEED_WRITECONV): defined.
+	  (TEXTMODE_NEWLINE_ENCODER): defined for windows.
+	  (make_writeconv): setup converter with TEXTMODE_NEWLINE_ENCODER for
+	  text mode.
+	  (io_fwrite): use NEED_WRITECONV.  character code conversion is
+	  disabled if fptr->writeconv_stateless is nil.
+	  (make_readconv): setup converter with
+	  ECONV_UNIVERSAL_NEWLINE_DECODER for text mode.
+	  (read_all): use NEED_READCONV.
+	  (appendline): use NEED_READCONV.
+	  (rb_io_getline_1): use NEED_READCONV.
+	  (io_getc): use NEED_READCONV.
+	  (rb_io_ungetc): use NEED_READCONV.
+	  (rb_io_binmode): OS-level text mode test removed.  call
+	  rb_econv_binmode.
+	  (rb_io_binmode_m): call rb_io_binmode_m with write_io as well.
+	  (rb_io_flags_mode): return mode string including "t".
+	  (rb_io_mode_flags): detect "t" for text mode.
+	  (rb_sysopen): always specify O_BINARY.
+
+	* transcode.c (rb_econv_open_by_transcoder_entries): initialize flags.
+	  (rb_econv_open): if source and destination encoding is
+	  both empty string, open newline converter.  last_tc will be NULL in
+	  this case.
+	  (rb_econv_encoding_to_insert_output): last_tc may be NULL now.
+	  (rb_econv_string): ditto.
+	  (output_replacement_character): ditto.
+	  (transcode_loop): ditto.
+	  (econv_init): ditto.
+	  (econv_inspect): ditto.
+	  (rb_econv_binmode): new function.
+
 Fri Aug 22 21:18:40 2008  Tadayoshi Funaba  <tadf@d...>
 
 	* complex.c (nucomp_div): now behaves as quo.
Index: enc/trans/newline.trans
===================================================================
--- enc/trans/newline.trans	(revision 18779)
+++ enc/trans/newline.trans	(revision 18780)
@@ -44,7 +44,7 @@
     1, /* input_unit_length */
     1, /* max_input */
     1, /* max_output */
-    stateless_converter, /* stateful_type */
+    stateful_decoder, /* stateful_type */
     NULL, NULL, NULL, fun_so_universal_newline
 };
 
Index: io.c
===================================================================
--- io.c	(revision 18779)
+++ io.c	(revision 18780)
@@ -300,17 +300,6 @@
     if (fptr->rbuf_len == 0 || fptr->mode & FMODE_DUPLEX)
         return;
     /* xxx: target position may be negative if buffer is filled by ungetc */
-#if defined(_WIN32) || defined(DJGPP) || defined(__CYGWIN__) || defined(__human68k__) || defined(__EMX__)
-    if (!(fptr->mode & FMODE_BINMODE)) {
-	int len = fptr->rbuf_len;
-	while (fptr->rbuf_len-- > 0) {
-	    if (fptr->rbuf[fptr->rbuf_len] == '\n')
-		++len;
-	}
-	r = lseek(fptr->fd, -len, SEEK_CUR);
-    }
-    else
-#endif
     r = lseek(fptr->fd, -fptr->rbuf_len, SEEK_CUR);
     if (r < 0) {
         if (errno == ESPIPE)
@@ -681,13 +670,42 @@
     }
 }
 
+/* xxx: better way to determine the newline of the platform? */
+#if defined(O_BINARY) && O_BINARY != 0
+/* Windows */
+# define NEED_NEWLINE_DECODER(fptr) (!(fptr->mode & FMODE_BINMODE))
+# define NEED_NEWLINE_ENCODER(fptr) (!(fptr->mode & FMODE_BINMODE))
+# define TEXTMODE_NEWLINE_ENCODER ECONV_CRLF_NEWLINE_ENCODER
+#else
+/* Unix */
+# define NEED_NEWLINE_DECODER(fptr) (fptr->mode & FMODE_TEXTMODE)
+# define NEED_NEWLINE_ENCODER(fptr) 0
+#endif
+#define NEED_READCONV(fptr) (fptr->enc2 != NULL || NEED_NEWLINE_DECODER(fptr))
+#define NEED_WRITECONV(fptr) (fptr->enc != NULL || NEED_NEWLINE_ENCODER(fptr))
+
 static void
 make_writeconv(rb_io_t *fptr)
 {
     if (!fptr->writeconv_initialized) {
         const char *senc, *denc;
         rb_encoding *enc;
+        int ecflags;
 
+        fptr->writeconv_initialized = 1;
+
+        ecflags = 0;
+#ifdef TEXTMODE_NEWLINE_ENCODER
+        if (NEED_NEWLINE_ENCODER(fptr))
+            ecflags |= TEXTMODE_NEWLINE_ENCODER;
+
+        if (!fptr->enc) {
+            fptr->writeconv = rb_econv_open("", "", ecflags);
+            fptr->writeconv_stateless = Qnil;
+            return;
+        }
+#endif
+
         enc = fptr->enc2 ? fptr->enc2 : fptr->enc;
         senc = rb_econv_stateless_encoding(enc->name);
         if (senc) {
@@ -699,14 +717,13 @@
             fptr->writeconv_stateless = Qnil;
         }
         if (senc) {
-            fptr->writeconv = rb_econv_open(senc, denc, 0);
+            fptr->writeconv = rb_econv_open(senc, denc, ecflags);
             if (!fptr->writeconv)
                 rb_raise(rb_eIOError, "code converter open failed (%s to %s)", senc, denc);
         }
         else {
             fptr->writeconv = NULL;
         }
-        fptr->writeconv_initialized = 1;
     }
 }
 
@@ -716,14 +733,12 @@
 {
     long len, n, r, l, offset = 0;
 
-    /*
-     * If an external encoding was specified and it differs from
-     * the strings encoding then we must transcode before writing.
-     */
-    if (fptr->enc) {
+    if (NEED_WRITECONV(fptr)) {
         make_writeconv(fptr);
         if (fptr->writeconv) {
-            str = rb_str_transcode(str, fptr->writeconv_stateless);
+            if (!NIL_P(fptr->writeconv_stateless)) {
+                str = rb_str_transcode(str, fptr->writeconv_stateless);
+            }
             str = rb_econv_string(fptr->writeconv, str, 0, RSTRING_LEN(str), Qnil, ECONV_PARTIAL_INPUT);
         }
         else {
@@ -1411,9 +1426,20 @@
 make_readconv(rb_io_t *fptr)
 {
     if (!fptr->readconv) {
-        fptr->readconv = rb_econv_open(fptr->enc2->name, fptr->enc->name, 0);
+        int ecflags = 0;
+        const char *sname, *dname;
+        if (NEED_NEWLINE_DECODER(fptr))
+            ecflags |= ECONV_UNIVERSAL_NEWLINE_DECODER;
+        if (fptr->enc2) {
+            sname = fptr->enc2->name;
+            dname = fptr->enc->name;
+        }
+        else {
+            sname = dname = "";
+        }
+        fptr->readconv = rb_econv_open(sname, dname, ecflags);
         if (!fptr->readconv)
-            rb_raise(rb_eIOError, "code converter open failed (%s to %s)", fptr->enc2->name, fptr->enc->name);
+            rb_raise(rb_eIOError, "code converter open failed (%s to %s)", sname, dname);
         fptr->crbuf_off = 0;
         fptr->crbuf_len = 0;
         fptr->crbuf_capa = 1024;
@@ -1519,7 +1545,7 @@
     rb_encoding *enc;
     int cr;
 
-    if (fptr->enc2) {
+    if (NEED_READCONV(fptr)) {
         VALUE str = rb_str_new(NULL, 0);
         make_readconv(fptr);
         while (1) {
@@ -1873,7 +1899,7 @@
     VALUE str = *strp;
     long limit = *lp;
 
-    if (fptr->enc2) {
+    if (NEED_READCONV(fptr)) {
         make_readconv(fptr);
         while (1) {
             const char *p, *e;
@@ -2084,7 +2110,7 @@
     else if (limit == 0) {
 	return rb_enc_str_new(0, 0, io_read_encoding(fptr));
     }
-    else if (rs == rb_default_rs && limit < 0 && !fptr->enc2 &&
+    else if (rs == rb_default_rs && limit < 0 && !NEED_READCONV(fptr) &&
              rb_enc_asciicompat(enc = io_read_encoding(fptr))) {
 	return rb_io_getline_fast(fptr, enc);
     }
@@ -2409,18 +2435,19 @@
     int r, n, cr = 0;
     VALUE str;
 
-    if (fptr->enc2) {
+    if (NEED_READCONV(fptr)) {
         VALUE str = Qnil;
 
-        if (!fptr->readconv) {
-            make_readconv(fptr);
-        }
+        make_readconv(fptr);
 
         while (1) {
             if (fptr->crbuf_len) {
-                r = rb_enc_precise_mbclen(fptr->crbuf+fptr->crbuf_off,
-                                          fptr->crbuf+fptr->crbuf_off+fptr->crbuf_len,
-                                          fptr->enc);
+                if (fptr->enc)
+                    r = rb_enc_precise_mbclen(fptr->crbuf+fptr->crbuf_off,
+                                              fptr->crbuf+fptr->crbuf_off+fptr->crbuf_len,
+                                              fptr->enc);
+                else
+                    r = ONIGENC_CONSTRUCT_MBCLEN_CHARFOUND(1);
                 if (!MBCLEN_NEEDMORE_P(r))
                     break;
                 if (fptr->crbuf_len == fptr->crbuf_capa) {
@@ -2776,7 +2803,7 @@
     else {
 	SafeStringValue(c);
     }
-    if (fptr->enc2) {
+    if (NEED_READCONV(fptr)) {
         make_readconv(fptr);
         len = RSTRING_LEN(c);
         if (fptr->crbuf_capa - fptr->crbuf_len < len)
@@ -3462,14 +3489,12 @@
     rb_io_t *fptr;
 
     GetOpenFile(io, fptr);
-#if defined(_WIN32) || defined(DJGPP) || defined(__CYGWIN__) || defined(__human68k__) || defined(__EMX__)
-    if (!(fptr->mode & FMODE_BINMODE) && READ_DATA_BUFFERED(fptr)) {
-	rb_raise(rb_eIOError, "buffer already filled with text-mode content");
-    }
-    if (0 <= fptr->fd && setmode(fptr->fd, O_BINARY) == -1)
-	rb_sys_fail(fptr->path);
-#endif
+    if (fptr->readconv)
+        rb_econv_binmode(fptr->readconv);
+    if (fptr->writeconv)
+        rb_econv_binmode(fptr->writeconv);
     fptr->mode |= FMODE_BINMODE;
+    fptr->mode &= ~FMODE_TEXTMODE;
     return io;
 }
 
@@ -3485,17 +3510,13 @@
 static VALUE
 rb_io_binmode_m(VALUE io)
 {
-#if defined(_WIN32) || defined(DJGPP) || defined(__CYGWIN__) || defined(__human68k__) || defined(__EMX__)
     VALUE write_io;
-#endif
 
     rb_io_binmode(io);
 
-#if defined(_WIN32) || defined(DJGPP) || defined(__CYGWIN__) || defined(__human68k__) || defined(__EMX__)
     write_io = GetWriteIO(io);
     if (write_io != io)
         rb_io_binmode(write_io);
-#endif
     return io;
 }
 
@@ -3516,27 +3537,24 @@
 static const char*
 rb_io_flags_mode(int flags)
 {
-#ifdef O_BINARY
-# define MODE_BINMODE(a,b) ((flags & FMODE_BINMODE) ? (b) : (a))
-#else
-# define MODE_BINMODE(a,b) (a)
-#endif
+# define MODE_BTMODE(a,b,c) ((flags & FMODE_BINMODE) ? (b) : \
+                             (flags & FMODE_TEXTMODE) ? (c) : (a))
     if (flags & FMODE_APPEND) {
 	if ((flags & FMODE_READWRITE) == FMODE_READWRITE) {
-	    return MODE_BINMODE("a+", "ab+");
+	    return MODE_BTMODE("a+", "ab+", "at+");
 	}
-	return MODE_BINMODE("a", "ab");
+	return MODE_BTMODE("a", "ab", "at");
     }
     switch (flags & FMODE_READWRITE) {
       case FMODE_READABLE:
-	return MODE_BINMODE("r", "rb");
+	return MODE_BTMODE("r", "rb", "rt");
       case FMODE_WRITABLE:
-	return MODE_BINMODE("w", "wb");
+	return MODE_BTMODE("w", "wb", "wt");
       case FMODE_READWRITE:
 	if (flags & FMODE_CREATE) {
-	    return MODE_BINMODE("w+", "wb+");
+	    return MODE_BTMODE("w+", "wb+", "wt+");
 	}
-	return MODE_BINMODE("r+", "rb+");
+	return MODE_BTMODE("r+", "rb+", "rt+");
     }
     rb_raise(rb_eArgError, "invalid access modenum 0x%x", flags);
     return NULL;		/* not reached */
@@ -3568,16 +3586,23 @@
 	  case 'b':
             flags |= FMODE_BINMODE;
             break;
+	  case 't':
+            flags |= FMODE_TEXTMODE;
+            break;
 	  case '+':
             flags |= FMODE_READWRITE;
             break;
 	  default:
             goto error;
 	  case ':':
-	    return flags;
+            goto finished;
         }
     }
 
+finished:
+    if ((flags & FMODE_BINMODE) && (flags & FMODE_TEXTMODE))
+        goto error;
+
     return flags;
 }
 
@@ -3887,6 +3912,10 @@
 {
     int fd;
 
+#ifdef O_BINARY
+    flags |= O_BINARY;
+#endif
+
     fd = rb_sysopen_internal(fname, flags, mode);
     if (fd < 0) {
 	if (errno == EMFILE || errno == ENFILE) {
Index: test/ruby/test_io_m17n.rb
===================================================================
--- test/ruby/test_io_m17n.rb	(revision 18779)
+++ test/ruby/test_io_m17n.rb	(revision 18780)
@@ -979,5 +979,179 @@
     }
   end
 
+  def test_textmode_decode_universal_newline_read
+    with_tmpdir {
+      generate_file("t.crlf", "a\r\nb\r\nc\r\n")
+      assert_equal("a\nb\nc\n", File.read("t.crlf", mode:"rt:euc-jp:utf-8"))
+      assert_equal("a\nb\nc\n", File.read("t.crlf", mode:"rt"))
+
+      generate_file("t.cr", "a\rb\rc\r")
+      assert_equal("a\nb\nc\n", File.read("t.cr", mode:"rt:euc-jp:utf-8"))
+      assert_equal("a\nb\nc\n", File.read("t.cr", mode:"rt"))
+
+      generate_file("t.lf", "a\nb\nc\n")
+      assert_equal("a\nb\nc\n", File.read("t.cr", mode:"rt:euc-jp:utf-8"))
+      assert_equal("a\nb\nc\n", File.read("t.cr", mode:"rt"))
+    }
+  end
+
+  def test_textmode_decode_universal_newline_getc
+    with_tmpdir {
+      generate_file("t.crlf", "a\r\nb\r\nc\r\n")
+      open("t.crlf", "rt") {|f|
+        assert_equal("a", f.getc)
+        assert_equal("\n", f.getc)
+        assert_equal("b", f.getc)
+        assert_equal("\n", f.getc)
+        assert_equal("c", f.getc)
+        assert_equal("\n", f.getc)
+        assert_equal(nil, f.getc)
+      }
+
+      generate_file("t.cr", "a\rb\rc\r")
+      open("t.cr", "rt") {|f|
+        assert_equal("a", f.getc)
+        assert_equal("\n", f.getc)
+        assert_equal("b", f.getc)
+        assert_equal("\n", f.getc)
+        assert_equal("c", f.getc)
+        assert_equal("\n", f.getc)
+        assert_equal(nil, f.getc)
+      }
+
+      generate_file("t.lf", "a\nb\nc\n")
+      open("t.lf", "rt") {|f|
+        assert_equal("a", f.getc)
+        assert_equal("\n", f.getc)
+        assert_equal("b", f.getc)
+        assert_equal("\n", f.getc)
+        assert_equal("c", f.getc)
+        assert_equal("\n", f.getc)
+        assert_equal(nil, f.getc)
+      }
+    }
+  end
+
+  def test_textmode_decode_universal_newline_gets
+    with_tmpdir {
+      generate_file("t.crlf", "a\r\nb\r\nc\r\n")
+      open("t.crlf", "rt") {|f|
+        assert_equal("a\n", f.gets)
+        assert_equal("b\n", f.gets)
+        assert_equal("c\n", f.gets)
+        assert_equal(nil, f.gets)
+      }
+
+      generate_file("t.cr", "a\rb\rc\r")
+      open("t.cr", "rt") {|f|
+        assert_equal("a\n", f.gets)
+        assert_equal("b\n", f.gets)
+        assert_equal("c\n", f.gets)
+        assert_equal(nil, f.gets)
+      }
+
+      generate_file("t.lf", "a\nb\nc\n")
+      open("t.lf", "rt") {|f|
+        assert_equal("a\n", f.gets)
+        assert_equal("b\n", f.gets)
+        assert_equal("c\n", f.gets)
+        assert_equal(nil, f.gets)
+      }
+    }
+  end
+
+  def test_textmode_decode_universal_newline_utf16
+    with_tmpdir {
+      generate_file("t.utf16be.crlf", "\0a\0\r\0\n\0b\0\r\0\n\0c\0\r\0\n")
+      assert_equal("a\nb\nc\n", File.read("t.utf16be.crlf", mode:"rt:utf-16be:utf-8"))
+
+      generate_file("t.utf16le.crlf", "a\0\r\0\n\0b\0\r\0\n\0c\0\r\0\n\0")
+      assert_equal("a\nb\nc\n", File.read("t.utf16le.crlf", mode:"rt:utf-16le:utf-8"))
+
+      generate_file("t.utf16be.cr", "\0a\0\r\0b\0\r\0c\0\r")
+      assert_equal("a\nb\nc\n", File.read("t.utf16be.cr", mode:"rt:utf-16be:utf-8"))
+
+      generate_file("t.utf16le.cr", "a\0\r\0b\0\r\0c\0\r\0")
+      assert_equal("a\nb\nc\n", File.read("t.utf16le.cr", mode:"rt:utf-16le:utf-8"))
+
+      generate_file("t.utf16be.lf", "\0a\0\n\0b\0\n\0c\0\n")
+      assert_equal("a\nb\nc\n", File.read("t.utf16be.lf", mode:"rt:utf-16be:utf-8"))
+
+      generate_file("t.utf16le.lf", "a\0\n\0b\0\n\0c\0\n\0")
+      assert_equal("a\nb\nc\n", File.read("t.utf16le.lf", mode:"rt:utf-16le:utf-8"))
+    }
+  end
+
+  def system_newline
+    File::BINARY == 0 ? "\n" : "\r\n"
+  end
+
+  def test_textmode_encode_newline
+    with_tmpdir {
+      open("t.txt", "wt") {|f|
+        f.puts "abc"
+        f.puts "def"
+      }
+      content = File.read("t.txt", :mode=>"rb")
+      nl = system_newline
+      assert_equal("abc#{nl}def#{nl}", content)
+    }
+  end
+
+  def test_binary
+    with_tmpdir {
+      src = "a\nb\rc\r\nd\n"
+      generate_file("t.txt", src)
+      open("t.txt", "rb") {|f|
+        assert_equal(src, f.read)
+      }
+      if File::BINARY == 0
+        open("t.txt", "r") {|f|
+          assert_equal(src, f.read)
+        }
+      end
+    }
+  end
+
+  def test_binmode
+    with_tmpdir {
+      src = "a\r\nb\r\nc\r\n"
+      generate_file("t.txt", src)
+      open("t.txt", "rt") {|f|
+        assert_equal("a", f.getc)
+        assert_equal("\n", f.getc)
+        f.binmode
+        assert_equal("\n", f.getc)
+        assert_equal("b", f.getc)
+        assert_equal("\r", f.getc)
+        assert_equal("\n", f.getc)
+        assert_equal("c", f.getc)
+        assert_equal("\r", f.getc)
+        assert_equal("\n", f.getc)
+        assert_equal(nil, f.getc)
+      }
+    }
+  end
+
+  def test_binmode2
+    with_tmpdir {
+      src = "a\r\nb\r\nc\r\n"
+      generate_file("t.txt", src)
+      open("t.txt", "rt:euc-jp:utf-8") {|f|
+        assert_equal("a", f.getc)
+        assert_equal("\n", f.getc)
+        f.binmode
+        assert_equal("\n", f.getc)
+        assert_equal("b", f.getc)
+        assert_equal("\r", f.getc)
+        assert_equal("\n", f.getc)
+        assert_equal("c", f.getc)
+        assert_equal("\r", f.getc)
+        assert_equal("\n", f.getc)
+        assert_equal(nil, f.getc)
+      }
+    }
+  end
+
 end
 
Index: test/ruby/test_econv.rb
===================================================================
--- test/ruby/test_econv.rb	(revision 18779)
+++ test/ruby/test_econv.rb	(revision 18780)
@@ -305,16 +305,37 @@
     src << "\nyz";       check_ec("abc\ndefghi\njklmno\npqrstu\nvwx\nyz", "", :source_buffer_empty, *a)
   end
 
+  def test_universal_newline2
+    ec = Encoding::Converter.new("", "", Encoding::Converter::UNIVERSAL_NEWLINE_DECODER)
+    a = ["", src="", ec, nil, 50, Encoding::Converter::PARTIAL_INPUT]
+    src << "abc\r\ndef"; check_ec("abc\ndef",                             "", :source_buffer_empty, *a)
+    src << "ghi\njkl";   check_ec("abc\ndefghi\njkl",                     "", :source_buffer_empty, *a)
+    src << "mno\rpqr";   check_ec("abc\ndefghi\njklmno\npqr",             "", :source_buffer_empty, *a)
+    src << "stu\r";      check_ec("abc\ndefghi\njklmno\npqrstu\n",        "", :source_buffer_empty, *a)
+    src << "\nvwx";      check_ec("abc\ndefghi\njklmno\npqrstu\nvwx",     "", :source_buffer_empty, *a)
+    src << "\nyz";       check_ec("abc\ndefghi\njklmno\npqrstu\nvwx\nyz", "", :source_buffer_empty, *a)
+  end
+
   def test_crlf_newline
     ec = Encoding::Converter.new("UTF-8", "EUC-JP", Encoding::Converter::CRLF_NEWLINE_ENCODER)
     assert_econv("abc\r\ndef", :finished, 50, ec, "abc\ndef", "")
   end
 
+  def test_crlf_newline2
+    ec = Encoding::Converter.new("", "", Encoding::Converter::CRLF_NEWLINE_ENCODER)
+    assert_econv("abc\r\ndef", :finished, 50, ec, "abc\ndef", "")
+  end
+
   def test_cr_newline
     ec = Encoding::Converter.new("UTF-8", "EUC-JP", Encoding::Converter::CR_NEWLINE_ENCODER)
     assert_econv("abc\rdef", :finished, 50, ec, "abc\ndef", "")
   end
 
+  def test_cr_newline2
+    ec = Encoding::Converter.new("", "", Encoding::Converter::CR_NEWLINE_ENCODER)
+    assert_econv("abc\rdef", :finished, 50, ec, "abc\ndef", "")
+  end
+
   def test_output_followed_by_input
     ec = Encoding::Converter.new("UTF-8", "EUC-JP")
     a =     ["",  "abc\u{3042}def", ec, nil, 100, Encoding::Converter::OUTPUT_FOLLOWED_BY_INPUT]
Index: transcode.c
===================================================================
--- transcode.c	(revision 18779)
+++ transcode.c	(revision 18780)
@@ -680,6 +680,7 @@
     }
 
     ec = ALLOC(rb_econv_t);
+    ec->flags = 0;
     ec->source_encoding_name = NULL;
     ec->destination_encoding_name = NULL;
     ec->in_buf_start = NULL;
@@ -741,7 +742,13 @@
     int num_trans;
     static rb_econv_t *ec;
 
-    num_trans = transcode_search_path(from, to, trans_open_i, (void *)&entries);
+    if (*from == '\0' && *to == '\0') {
+        num_trans = 0;
+        entries = ALLOC_N(transcoder_entry_t *, 1+2);
+    }
+    else {
+        num_trans = transcode_search_path(from, to, trans_open_i, (void *)&entries);
+    }
 
     if (num_trans < 0 || !entries) {
         xfree(entries);
@@ -751,6 +758,10 @@
     if (flags & (ECONV_CRLF_NEWLINE_ENCODER|ECONV_CR_NEWLINE_ENCODER)) {
         const char *name = (flags & ECONV_CRLF_NEWLINE_ENCODER) ? "crlf_newline" : "cr_newline";
         transcoder_entry_t *e = get_transcoder_entry("", name);
+        if (flags & ECONV_CRLF_NEWLINE_ENCODER)
+            flags &= ~ECONV_CR_NEWLINE_ENCODER;
+        else
+            flags &= ~ECONV_CRLF_NEWLINE_ENCODER;
         if (!e) {
             xfree(entries);
             return NULL;
@@ -774,12 +785,19 @@
     if (!ec)
         rb_raise(rb_eArgError, "encoding conversion not supported (from %s to %s)", from, to);
 
+    ec->flags = flags;
     ec->source_encoding_name = from;
     ec->destination_encoding_name = to;
 
     if (flags & ECONV_UNIVERSAL_NEWLINE_DECODER) {
-        ec->last_tc = ec->elems[ec->num_trans-2].tc;
-        ec->last_trans_index = ec->num_trans-2;
+        if (ec->num_trans == 1) {
+            ec->last_tc = NULL;
+            ec->last_trans_index = -1;
+        }
+        else {
+            ec->last_tc = ec->elems[ec->num_trans-2].tc;
+            ec->last_trans_index = ec->num_trans-2;
+        }
     }
 
     return ec;
@@ -1037,8 +1055,13 @@
 rb_econv_encoding_to_insert_output(rb_econv_t *ec)
 {
     rb_transcoding *tc = ec->last_tc;
-    const rb_transcoder *tr = tc->transcoder;
+    const rb_transcoder *tr;
 
+    if (tc == NULL)
+        return "";
+
+    tr = tc->transcoder;
+
     if (tr->stateful_type == stateful_encoder)
         return tr->from_encoding;
     return tr->to_encoding;
@@ -1103,7 +1126,6 @@
     size_t insert_len;
 
     rb_transcoding *tc;
-    const rb_transcoder *tr;
 
     unsigned char **buf_start_p;
     unsigned char **data_start_p;
@@ -1125,11 +1147,16 @@
             return -1;
     }
 
+    need = insert_len;
+
     tc = ec->last_tc;
-    tr = tc->transcoder;
-
-    need = insert_len;
-    if (tr->stateful_type == stateful_encoder) {
+    if (!tc) {
+        buf_start_p = &ec->in_buf_start;
+        data_start_p = &ec->in_data_start;
+        data_end_p = &ec->in_data_end;
+        buf_end_p = &ec->in_buf_end;
+    }
+    else if (tc->transcoder->stateful_type == stateful_encoder) {
         need += tc->readagain_len;
         if (need < insert_len)
             goto fail;
@@ -1179,7 +1206,7 @@
         }
     }
 
-    if (tr->stateful_type == stateful_encoder) {
+    if (tc && tc->transcoder->stateful_type == stateful_encoder) {
         memcpy(*data_end_p, TRANSCODING_READBUF(tc)+tc->recognized_len, tc->readagain_len);
         *data_end_p += tc->readagain_len;
         tc->readagain_len = 0;
@@ -1267,15 +1294,20 @@
     unsigned const char *ss, *sp, *se;
     unsigned char *ds, *dp, *de;
     rb_econv_result_t res;
+    int max_output;
 
     if (NIL_P(dst)) {
         dst = rb_str_buf_new(len);
     }
 
+    if (ec->last_tc)
+        max_output = ec->last_tc->transcoder->max_output;
+    else
+        max_output = 1;
+
     res = econv_destination_buffer_full;
     while (res == econv_destination_buffer_full) {
         long dlen = RSTRING_LEN(dst);
-        int max_output = ec->last_tc->transcoder->max_output;
         if (rb_str_capacity(dst) - dlen < (size_t)len + max_output) {
             unsigned long new_capa = (unsigned long)dlen + len + max_output;
             if (LONG_MAX < new_capa)
@@ -1297,6 +1329,27 @@
     return dst;
 }
 
+void
+rb_econv_binmode(rb_econv_t *ec)
+{
+    if (ec->flags & ECONV_UNIVERSAL_NEWLINE_DECODER) {
+        int i = ec->num_trans-1;
+        rb_transcoding_close(ec->elems[i].tc);
+        xfree(ec->elems[i].out_buf_start);
+        ec->elems[i].tc = NULL;
+        ec->elems[i].out_buf_start = NULL;
+        ec->elems[i].out_data_start = NULL;
+        ec->elems[i].out_data_end = NULL;
+        ec->elems[i].out_buf_end = NULL;
+        ec->num_trans--;
+    }
+    if (ec->flags & (ECONV_CRLF_NEWLINE_ENCODER|ECONV_CR_NEWLINE_ENCODER)) {
+        rb_transcoding_close(ec->elems[0].tc);
+        xfree(ec->elems[0].out_buf_start);
+        MEMMOVE(&ec->elems[0], &ec->elems[1], rb_econv_elem_t, ec->num_trans-1);
+        ec->num_trans--;
+    }
+}
 
 static VALUE
 make_econv_exception(rb_econv_t *ec)
@@ -1358,7 +1411,7 @@
 static int
 output_replacement_character(rb_econv_t *ec)
 {
-    rb_transcoding *tc = ec->last_tc;
+    rb_transcoding *tc;
     const rb_transcoder *tr;
     rb_encoding *enc;
     const unsigned char *replacement;
@@ -1366,11 +1419,18 @@
     int len;
     int ret;
 
-    tr = tc->transcoder;
-    enc = rb_enc_find(tr->to_encoding);
+    tc = ec->last_tc;
+    if (tc) {
+        tr = tc->transcoder;
+        enc = rb_enc_find(tr->to_encoding);
+        replacement = (const unsigned char *)get_replacement_character(enc, &len, &repl_enc);
+    }
+    else {
+        replacement = (unsigned char *)"?";
+        len = 1;
+        repl_enc = "";
+    }
 
-    replacement = (const unsigned char *)get_replacement_character(enc, &len, &repl_enc);
-
     ret = rb_econv_insert_output(ec, replacement, len, repl_enc);
     if (ret == -1)
         return -1;
@@ -1400,7 +1460,7 @@
         rb_raise(rb_eArgError, "transcoding not supported (from %s to %s)", from_encoding, to_encoding);
 
     last_tc = ec->last_tc;
-    max_output = last_tc->transcoder->max_output;
+    max_output = last_tc ? last_tc->transcoder->max_output : 1;
 
 resume:
     ret = rb_econv_convert(ec, in_pos, in_stop, out_pos, out_stop, opt);
@@ -1465,7 +1525,7 @@
         rb_raise(rb_eArgError, "transcoding not supported (from %s to %s)", from_encoding, to_encoding);
 
     last_tc = ec->last_tc;
-    max_output = ec->elems[ec->num_trans-1].tc->transcoder->max_output;
+    max_output = last_tc ? last_tc->transcoder->max_output : 1;
 
     ret = econv_source_buffer_empty;
     ptr = *in_pos;
@@ -1835,8 +1895,14 @@
     ec->source_encoding = senc;
     ec->destination_encoding = denc;
 
-    ec->source_encoding_name = ec->elems[0].tc->transcoder->from_encoding;
-    ec->destination_encoding_name = ec->last_tc->transcoder->to_encoding;
+    if (ec->last_tc) {
+        ec->source_encoding_name = ec->elems[0].tc->transcoder->from_encoding;
+        ec->destination_encoding_name = ec->last_tc->transcoder->to_encoding;
+    }
+    else {
+        ec->source_encoding_name = "";
+        ec->destination_encoding_name = "";
+    }
 
     DATA_PTR(self) = ec;
 
@@ -1851,10 +1917,13 @@
 
     if (!ec)
         return rb_sprintf("#<%s: uninitialized>", cname);
-    else
-        return rb_sprintf("#<%s: %s to %s>", cname,
-            ec->source_encoding_name,
-            ec->destination_encoding_name);
+    else {
+        const char *sname = ec->source_encoding_name;
+        const char *dname = ec->destination_encoding_name;
+        if (*sname == '\0') sname = "(none)";
+        if (*dname == '\0') dname = "(none)";
+        return rb_sprintf("#<%s: %s to %s>", cname, sname, dname);
+    }
 }
 
 #define IS_ECONV(obj) (RDATA(obj)->dfree == (RUBY_DATA_FUNC)econv_free)

--
ML: ruby-changes@q...
Info: http://www.atdot.net/~ko1/quickml/

[前][次][番号順一覧][スレッド一覧]