[前][次][番号順一覧][スレッド一覧]

ruby-changes:7172

From: akr <ko1@a...>
Date: Mon, 18 Aug 2008 21:09:13 +0900 (JST)
Subject: [ruby-changes:7172] Ruby:r18691 (trunk): * include/ruby/io.h (rb_io_t): new fields: writeconv,

akr	2008-08-18 21:06:42 +0900 (Mon, 18 Aug 2008)

  New Revision: 18691

  http://svn.ruby-lang.org/cgi-bin/viewvc.cgi?view=rev&revision=18691

  Log:
    * include/ruby/io.h (rb_io_t): new fields: writeconv,
      writeconv_stateless and writeconv_initialized.
      (MakeOpenFile): initialize them.
    
    * include/ruby/encoding.h (rb_econv_stateless_encoding): declared.
      (rb_econv_string): declared.
    
    * io.c (make_writeconv): new function.
      (io_fwrite): use econv.
      (make_readconv): fix error message.
      (finish_writeconv): new function.
      (fptr_finalize): call finish_writeconv.
      (clear_writeconv): new function.
      (clear_codeconv): new function to call both clear_readconv and
      clear_writeconv.
      (rb_io_fptr_finalize): call clear_codeconv instead of
      clear_readconv.
      (mode_enc): ditto.
      (io_set_encoding): ditto.
      (argf_next_argv): ditto.
      (io_encoding_set): ditto.
    
    * gc.c (gc_mark_children): mark writeconv_stateless in T_FILE.
    
    * transcode.c (stateless_encoding_i): new function.
      (rb_econv_stateless_encoding): ditto.
      (rb_econv_string): ditto.

  Modified files:
    trunk/ChangeLog
    trunk/gc.c
    trunk/include/ruby/encoding.h
    trunk/include/ruby/io.h
    trunk/io.c
    trunk/test/ruby/test_io_m17n.rb
    trunk/transcode.c

Index: include/ruby/io.h
===================================================================
--- include/ruby/io.h	(revision 18690)
+++ include/ruby/io.h	(revision 18691)
@@ -63,6 +63,11 @@
     int crbuf_off;
     int crbuf_len;
     int crbuf_capa;
+
+    rb_econv_t *writeconv;
+    VALUE writeconv_stateless;
+    int writeconv_initialized;
+
 } rb_io_t;
 
 #define HAVE_RB_IO_T 1
@@ -110,6 +115,9 @@
     fp->crbuf_off = 0;\
     fp->crbuf_len = 0;\
     fp->crbuf_capa = 0;\
+    fp->writeconv = NULL;\
+    fp->writeconv_stateless = Qnil;\
+    fp->writeconv_initialized = 0;\
     fp->tied_io_for_writing = 0;\
     fp->enc = 0;\
     fp->enc2 = 0;\
Index: include/ruby/encoding.h
===================================================================
--- include/ruby/encoding.h	(revision 18690)
+++ include/ruby/encoding.h	(revision 18691)
@@ -268,6 +268,11 @@
 int rb_econv_putbackable(rb_econv_t *ec);
 void rb_econv_putback(rb_econv_t *ec, unsigned char *p, int n);
 
+/* returns corresponding stateless encoding, or NULL if not stateful. */
+const char *rb_econv_stateless_encoding(const char *stateful_enc);
+
+VALUE rb_econv_string(rb_econv_t *ec, VALUE src, long off, long len, VALUE dst, int flags);
+
 /* flags for rb_econv_open */
 #define ECONV_UNIVERSAL_NEWLINE_DECODER       0x100
 #define ECONV_CRLF_NEWLINE_ENCODER            0x200
Index: ChangeLog
===================================================================
--- ChangeLog	(revision 18690)
+++ ChangeLog	(revision 18691)
@@ -1,3 +1,33 @@
+Mon Aug 18 21:02:08 2008  Tanaka Akira  <akr@f...>
+
+	* include/ruby/io.h (rb_io_t): new fields: writeconv,
+	  writeconv_stateless and writeconv_initialized.
+	  (MakeOpenFile): initialize them.
+
+	* include/ruby/encoding.h (rb_econv_stateless_encoding): declared.
+	  (rb_econv_string): declared.
+
+	* io.c (make_writeconv): new function.
+	  (io_fwrite): use econv.
+	  (make_readconv): fix error message.
+	  (finish_writeconv): new function.
+	  (fptr_finalize): call finish_writeconv.
+	  (clear_writeconv): new function.
+	  (clear_codeconv): new function to call both clear_readconv and
+	  clear_writeconv.
+	  (rb_io_fptr_finalize): call clear_codeconv instead of
+	  clear_readconv.
+	  (mode_enc): ditto.
+	  (io_set_encoding): ditto.
+	  (argf_next_argv): ditto.
+	  (io_encoding_set): ditto.
+
+	* gc.c (gc_mark_children): mark writeconv_stateless in T_FILE.
+
+	* transcode.c (stateless_encoding_i): new function.
+	  (rb_econv_stateless_encoding): ditto.
+	  (rb_econv_string): ditto.
+
 Mon Aug 18 17:23:38 2008  Tanaka Akira  <akr@f...>
 
 	* io.c (clear_readconv): extracted from rb_io_fptr_finalize.
Index: io.c
===================================================================
--- io.c	(revision 18690)
+++ io.c	(revision 18691)
@@ -689,6 +689,38 @@
     }
 }
 
+static void
+make_writeconv(rb_io_t *fptr)
+{
+    if (!fptr->writeconv_initialized) {
+        const char *senc, *denc;
+        fptr->writeconv_stateless = Qnil;
+        if (fptr->enc2) {
+            senc = fptr->enc->name;
+            denc = fptr->enc2->name;
+        }
+        else {
+            senc = rb_econv_stateless_encoding(fptr->enc->name);
+            if (senc) {
+                denc = fptr->enc->name;
+                fptr->writeconv_stateless = rb_str_new2(senc);
+            }
+            else {
+                denc = NULL;
+            }
+        }
+        if (senc) {
+            fptr->writeconv = rb_econv_open(senc, denc, 0);
+            if (!fptr->writeconv)
+                rb_raise(rb_eIOError, "code converter open failed (%s to %s)", senc, denc);
+        }
+        else {
+            fptr->writeconv = NULL;
+        }
+        fptr->writeconv_initialized = 1;
+    }
+}
+
 /* writing functions */
 static long
 io_fwrite(VALUE str, rb_io_t *fptr)
@@ -701,17 +733,18 @@
      * We must also transcode if two encodings were specified
      */
     if (fptr->enc) {
-	/* transcode str before output */
-	/* the methods in transcode.c are static, so call indirectly */
-	/* Can't use encode! because puts writes a frozen newline */
+        make_writeconv(fptr);
 	if (fptr->enc2) {
-	    str = rb_funcall(str, id_encode, 2,
-			     rb_enc_from_encoding(fptr->enc2),
-			     rb_enc_from_encoding(fptr->enc));
+            str = rb_econv_string(fptr->writeconv, str, 0, RSTRING_LEN(str), Qnil, ECONV_PARTIAL_INPUT);
 	}
 	else {
-	    str = rb_funcall(str, id_encode, 1,
-			     rb_enc_from_encoding(fptr->enc));
+            if (fptr->writeconv) {
+                str = rb_str_transcode(str, fptr->writeconv_stateless);
+                str = rb_econv_string(fptr->writeconv, str, 0, RSTRING_LEN(str), Qnil, ECONV_PARTIAL_INPUT);
+            }
+            else {
+                str = rb_str_transcode(str, rb_enc_from_encoding(fptr->enc));
+            }
 	}
     }
 
@@ -1394,7 +1427,7 @@
     if (!fptr->readconv) {
         fptr->readconv = rb_econv_open(fptr->enc2->name, fptr->enc->name, 0);
         if (!fptr->readconv)
-            rb_raise(rb_eIOError, "code converter open failed (%s to %s)", fptr->enc->name, fptr->enc2->name);
+            rb_raise(rb_eIOError, "code converter open failed (%s to %s)", fptr->enc2->name, fptr->enc->name);
         fptr->crbuf_off = 0;
         fptr->crbuf_len = 0;
         fptr->crbuf_capa = 1024;
@@ -2845,9 +2878,77 @@
 #define PREP_STDIO_NAME(f) ((f)->path)
 
 static void
+finish_writeconv(rb_io_t *fptr, int noraise)
+{
+    unsigned char *ds, *dp, *de;
+    rb_econv_result_t res;
+
+    if (!fptr->wbuf) {
+        unsigned char buf[1024];
+        int r;
+
+        res = econv_destination_buffer_full;
+        while (res == econv_destination_buffer_full) {
+            ds = dp = buf;
+            de = buf + sizeof(buf);
+            res = rb_econv_convert(fptr->writeconv, NULL, NULL, &dp, de, 0);
+            while (dp-ds) {
+retry:
+                r = rb_write_internal(fptr->fd, ds, dp-ds);
+                if (r == dp-ds)
+                    break;
+                if (0 <= r) {
+                    ds += r;
+                }
+                if (rb_io_wait_writable(fptr->fd)) {
+                    if (!noraise)
+                        rb_io_check_closed(fptr);
+                    else if (fptr->fd < 0)
+                        return;
+                    goto retry;
+                }
+                return;
+            }
+            if (!noraise) {
+                rb_econv_check_error(fptr->writeconv);
+            }
+            if (res == econv_invalid_byte_sequence ||
+                res == econv_undefined_conversion) {
+                break;
+            }
+        }
+
+        return;
+    }
+
+    res = econv_destination_buffer_full;
+    while (res == econv_destination_buffer_full) {
+        if (fptr->wbuf_len == fptr->wbuf_capa) {
+            io_fflush(fptr);
+        }
+
+        ds = dp = (unsigned char *)fptr->wbuf + fptr->wbuf_off + fptr->wbuf_len;
+        de = (unsigned char *)fptr->wbuf + fptr->wbuf_capa;
+        res = rb_econv_convert(fptr->writeconv, NULL, NULL, &dp, de, 0);
+        fptr->wbuf_len += dp - ds;
+        if (!noraise) {
+            rb_econv_check_error(fptr->writeconv);
+        }
+        if (res == econv_invalid_byte_sequence ||
+            res == econv_undefined_conversion) {
+            break;
+        }
+    }
+
+}
+
+static void
 fptr_finalize(rb_io_t *fptr, int noraise)
 {
     int ebadf = 0;
+    if (fptr->writeconv) {
+        finish_writeconv(fptr, noraise);
+    }
     if (fptr->wbuf_len) {
         io_fflush(fptr);
     }
@@ -2907,6 +3008,23 @@
     }
 }
 
+static void
+clear_writeconv(rb_io_t *fptr)
+{
+    if (fptr->writeconv) {
+        rb_econv_close(fptr->writeconv);
+        fptr->writeconv = NULL;
+    }
+    fptr->writeconv_initialized = 0;
+}
+
+static void
+clear_codeconv(rb_io_t *fptr)
+{
+    clear_readconv(fptr);
+    clear_writeconv(fptr);
+}
+
 int
 rb_io_fptr_finalize(rb_io_t *fptr)
 {
@@ -2926,7 +3044,7 @@
         free(fptr->wbuf);
         fptr->wbuf = 0;
     }
-    clear_readconv(fptr);
+    clear_codeconv(fptr);
     free(fptr);
     return 1;
 }
@@ -3535,7 +3653,7 @@
 
     fptr->enc = 0;
     fptr->enc2 = 0;
-    clear_readconv(fptr);
+    clear_codeconv(fptr);
 
     p0 = strrchr(estr, ':');
     if (!p0) p1 = estr;
@@ -4265,7 +4383,7 @@
 	GetOpenFile(io, fptr);
         fptr->enc = 0;
         fptr->enc2 = 0;
-        clear_readconv(fptr);
+        clear_codeconv(fptr);
 	if (!NIL_P(encoding)) {
 	    rb_warn("Ignoring encoding parameter '%s': external_encoding is used",
 		    RSTRING_PTR(encoding));
@@ -5612,7 +5730,7 @@
 		GetOpenFile(current_file, fptr);
 		fptr->enc = argf_enc;
 		fptr->enc2 = argf_enc2;
-                clear_readconv(fptr);
+                clear_codeconv(fptr);
 	    }
 	}
 	else {
@@ -6340,13 +6458,13 @@
     if (argc == 2) {
 	fptr->enc2 = rb_to_encoding(v1);
 	fptr->enc = rb_to_encoding(v2);
-        clear_readconv(fptr);
+        clear_codeconv(fptr);
     }
     else if (argc == 1) {
 	if (NIL_P(v1)) {
 	    fptr->enc = 0;
 	    fptr->enc2 = 0;
-            clear_readconv(fptr);
+            clear_codeconv(fptr);
 	}
 	else {
 	    VALUE tmp = rb_check_string_type(v1);
@@ -6356,7 +6474,7 @@
 	    else {
 		fptr->enc = rb_to_encoding(v1);
 		fptr->enc2 = 0;
-                clear_readconv(fptr);
+                clear_codeconv(fptr);
 	    }
 	}
     }
Index: gc.c
===================================================================
--- gc.c	(revision 18690)
+++ gc.c	(revision 18691)
@@ -1507,8 +1507,10 @@
 	break;
 
       case T_FILE:
-        if (obj->as.file.fptr)
+        if (obj->as.file.fptr) {
             gc_mark(objspace, obj->as.file.fptr->tied_io_for_writing, lev);
+            gc_mark(objspace, obj->as.file.fptr->writeconv_stateless, lev);
+        }
         break;
 
       case T_REGEXP:
Index: test/ruby/test_io_m17n.rb
===================================================================
--- test/ruby/test_io_m17n.rb	(revision 18690)
+++ test/ruby/test_io_m17n.rb	(revision 18691)
@@ -601,5 +601,50 @@
     }
   end
 
+  def test_write_conversion_fixenc
+    with_pipe {|r, w|
+      w.set_encoding("iso-2022-jp:utf-8")
+      t = Thread.new { r.read.force_encoding("ascii-8bit") }
+      w << "\u3042"
+      w << "\u3044"
+      w.close
+      assert_equal("\e$B$\"$$\e(B".force_encoding("ascii-8bit"), t.value)
+    }
+  end
+
+  def test_write_conversion_anyenc_stateful
+    with_pipe {|r, w|
+      w.set_encoding("iso-2022-jp")
+      t = Thread.new { r.read.force_encoding("ascii-8bit") }
+      w << "\u3042"
+      w << "\x82\xa2".force_encoding("sjis")
+      w.close
+      assert_equal("\e$B$\"$$\e(B".force_encoding("ascii-8bit"), t.value)
+    }
+  end
+
+  def test_write_conversion_anyenc_stateless
+    with_pipe {|r, w|
+      w.set_encoding("euc-jp")
+      t = Thread.new { r.read.force_encoding("ascii-8bit") }
+      w << "\u3042"
+      w << "\x82\xa2".force_encoding("sjis")
+      w.close
+      assert_equal("\xa4\xa2\xa4\xa4".force_encoding("ascii-8bit"), t.value)
+    }
+  end
+
+  def test_write_conversion_anyenc_stateful_nosync
+    with_pipe {|r, w|
+      w.sync = false
+      w.set_encoding("iso-2022-jp")
+      t = Thread.new { r.read.force_encoding("ascii-8bit") }
+      w << "\u3042"
+      w << "\x82\xa2".force_encoding("sjis")
+      w.close
+      assert_equal("\e$B$\"$$\e(B".force_encoding("ascii-8bit"), t.value)
+    }
+  end
+
 end
 
Index: transcode.c
===================================================================
--- transcode.c	(revision 18690)
+++ transcode.c	(revision 18691)
@@ -1219,6 +1219,78 @@
     tc->readagain_len -= n;
 }
 
+struct stateless_encoding_t {
+    const char *stateless_enc;
+    const char *stateful_enc;
+};
+
+static int
+stateless_encoding_i(st_data_t key, st_data_t val, st_data_t arg)
+{
+    struct stateless_encoding_t *data = (struct stateless_encoding_t *)arg;
+    st_table *table2 = (st_table *)val;
+    st_data_t v;
+
+    if (st_lookup(table2, (st_data_t)data->stateful_enc, &v)) {
+        transcoder_entry_t *entry = (transcoder_entry_t *)v;
+        const rb_transcoder *tr = load_transcoder_entry(entry);
+        if (tr && tr->stateful_type == stateful_encoder) {
+            data->stateless_enc = tr->from_encoding;
+            return ST_STOP;
+        }
+    }
+    return ST_CONTINUE;
+}
+
+const char *
+rb_econv_stateless_encoding(const char *stateful_enc)
+{
+    struct stateless_encoding_t data;
+    data.stateful_enc = stateful_enc;
+    data.stateless_enc = NULL;
+    st_foreach(transcoder_table, stateless_encoding_i, (st_data_t)&data);
+    if (data.stateless_enc)
+        return data.stateless_enc;
+    return NULL;
+}
+
+VALUE
+rb_econv_string(rb_econv_t *ec, VALUE src, long off, long len, VALUE dst, int flags)
+{
+    unsigned const char *ss, *sp, *se;
+    unsigned char *ds, *dp, *de;
+    rb_econv_result_t res;
+
+    if (NIL_P(dst)) {
+        dst = rb_str_buf_new(len);
+    }
+
+    res = econv_destination_buffer_full;
+    while (res == econv_destination_buffer_full) {
+        long dlen = RSTRING_LEN(dst);
+        int max_output = ec->last_tc->transcoder->max_output;
+        if (rb_str_capacity(dst) - dlen < (size_t)len + max_output) {
+            unsigned long new_capa = (unsigned long)dlen + len + max_output;
+            if (LONG_MAX < new_capa)
+                rb_raise(rb_eArgError, "too long string");
+            rb_str_resize(dst, new_capa);
+            rb_str_set_len(dst, dlen);
+        }
+        ss = sp = (const unsigned char *)RSTRING_PTR(src) + off;
+        se = ss + len;
+        ds = dp = (unsigned char *)RSTRING_PTR(dst) + dlen;
+        de = ds + rb_str_capacity(dst);
+        res = rb_econv_convert(ec, &sp, se, &dp, de, flags);
+        off += sp - ss;
+        len -= sp - ss;
+        rb_str_set_len(dst, dlen + (dp - ds));
+        rb_econv_check_error(ec);
+    }
+
+    return dst;
+}
+
+
 static VALUE
 make_econv_exception(rb_econv_t *ec)
 {

--
ML: ruby-changes@q...
Info: http://www.atdot.net/~ko1/quickml/

[前][次][番号順一覧][スレッド一覧]