[前][次][番号順一覧][スレッド一覧]

ruby-changes:7147

From: akr <ko1@a...>
Date: Sun, 17 Aug 2008 02:06:50 +0900 (JST)
Subject: [ruby-changes:7147] Ruby:r18666 (trunk): * include/ruby/io.h (rb_io_t): new fields: readconv, crbuf, crbuf_off,

akr	2008-08-17 02:06:35 +0900 (Sun, 17 Aug 2008)

  New Revision: 18666

  http://svn.ruby-lang.org/cgi-bin/viewvc.cgi?view=rev&revision=18666

  Log:
    * include/ruby/io.h (rb_io_t): new fields: readconv, crbuf, crbuf_off,
      crbuf_len, crbuf_capa.
      (MakeOpenFile): initialize them.
    
    * io.c (io_shift_crbuf): new function.
      (io_getc): use econv.
      (rb_io_fptr_finalize): finalize readconv and crbuf.

  Modified files:
    trunk/ChangeLog
    trunk/include/ruby/io.h
    trunk/io.c
    trunk/test/ruby/test_io_m17n.rb

Index: include/ruby/io.h
===================================================================
--- include/ruby/io.h	(revision 18665)
+++ include/ruby/io.h	(revision 18666)
@@ -36,17 +36,26 @@
     char *path;			/* pathname for file */
     void (*finalize)(struct rb_io_t*,int); /* finalize proc */
     long refcnt;
+
     char *wbuf;                 /* wbuf_off + wbuf_len <= wbuf_capa */
     int wbuf_off;
     int wbuf_len;
     int wbuf_capa;
+
     char *rbuf;                 /* rbuf_off + rbuf_len <= rbuf_capa */
     int rbuf_off;
     int rbuf_len;
     int rbuf_capa;
+
     VALUE tied_io_for_writing;
-    rb_encoding *enc;
-    rb_encoding *enc2;
+    rb_encoding *enc;   /* int_enc if enc2.  ext_enc otherwise. */
+    rb_encoding *enc2;  /* ext_enc if not NULL. */
+
+    rb_econv_t *readconv;
+    char *crbuf;                /* crbuf_off + crbuf_len <= crbuf_capa */
+    int crbuf_off;
+    int crbuf_len;
+    int crbuf_capa;
 } rb_io_t;
 
 #define HAVE_RB_IO_T 1
@@ -89,6 +98,11 @@
     fp->rbuf_off = 0;\
     fp->rbuf_len = 0;\
     fp->rbuf_capa = 0;\
+    fp->readconv = NULL;\
+    fp->crbuf = NULL;\
+    fp->crbuf_off = 0;\
+    fp->crbuf_len = 0;\
+    fp->crbuf_capa = 0;\
     fp->tied_io_for_writing = 0;\
     fp->enc = 0;\
     fp->enc2 = 0;\
Index: ChangeLog
===================================================================
--- ChangeLog	(revision 18665)
+++ ChangeLog	(revision 18666)
@@ -1,3 +1,13 @@
+Sun Aug 17 01:29:46 2008  Tanaka Akira  <akr@f...>
+
+	* include/ruby/io.h (rb_io_t): new fields: readconv, crbuf, crbuf_off,
+	  crbuf_len, crbuf_capa.
+	  (MakeOpenFile): initialize them.
+
+	* io.c (io_shift_crbuf): new function.
+	  (io_getc): use econv.
+	  (rb_io_fptr_finalize): finalize readconv and crbuf.
+
 Sun Aug 17 00:02:07 2008  Tanaka Akira  <akr@f...>
 
 	* include/ruby/encoding.h (rb_econv_check_error): declared.
Index: io.c
===================================================================
--- io.c	(revision 18665)
+++ io.c	(revision 18666)
@@ -2269,13 +2269,76 @@
 }
 
 static VALUE
+io_shift_crbuf(rb_io_t *fptr, int len)
+{
+    VALUE str;
+    str = rb_str_new(fptr->crbuf+fptr->crbuf_off, len);
+    fptr->crbuf_off += len;
+    fptr->crbuf_len -= len;
+    OBJ_TAINT(str);
+    rb_enc_associate(str, fptr->enc);
+    /* xxx: set coderange */
+    if (fptr->crbuf_len == 0)
+        fptr->crbuf_off = 0;
+    if (fptr->crbuf_off < fptr->crbuf_capa/2) {
+        memmove(fptr->crbuf, fptr->crbuf+fptr->crbuf_off, fptr->crbuf_len);
+        fptr->crbuf_off = 0;
+    }
+    return str;
+}
+
+static VALUE
 io_getc(rb_io_t *fptr, rb_encoding *enc)
 {
     int r, n, cr = 0;
     VALUE str;
 
-    if (rb_enc_dummy_p(enc)) {
-	rb_raise(rb_eNotImpError, "getc against dummy encoding is not currently supported");
+    if (fptr->enc2) {
+        if (!fptr->readconv) {
+            fptr->readconv = rb_econv_open(fptr->enc2->name, fptr->enc->name, 0);
+            if (!fptr->readconv)
+                rb_raise(rb_eIOError, "code converter open failed (%s to %s)", fptr->enc->name, fptr->enc2->name);
+            fptr->crbuf_off = 0;
+            fptr->crbuf_len = 0;
+            fptr->crbuf_capa = 1024;
+            fptr->crbuf = ALLOC_N(char, fptr->crbuf_capa);
+        }
+
+        while (1) {
+            const unsigned char *ss, *sp, *se;
+            unsigned char *ds, *dp, *de;
+            rb_econv_result_t res;
+            if (fptr->crbuf_len) {
+                r = rb_enc_precise_mbclen(fptr->crbuf+fptr->crbuf_off, fptr->crbuf+fptr->crbuf_off+fptr->crbuf_len, fptr->enc);
+                if (!MBCLEN_NEEDMORE_P(r))
+                    break;
+                if (fptr->crbuf_len == fptr->crbuf_capa) {
+                    rb_raise(rb_eIOError, "too long character");
+                }
+            }
+            if (fptr->rbuf_len == 0) {
+                if (io_fillbuf(fptr) == -1) {
+                    if (fptr->crbuf_len == 0)
+                        return Qnil;
+                    /* return an incomplete character just before EOF */
+                    return io_shift_crbuf(fptr, fptr->crbuf_len);
+                }
+            }
+            ss = sp = (const unsigned char *)fptr->rbuf + fptr->rbuf_off;
+            se = sp + fptr->rbuf_len;
+            ds = dp = (unsigned char *)fptr->crbuf + fptr->crbuf_off + fptr->crbuf_len;
+            de = (unsigned char *)fptr->crbuf + fptr->crbuf_capa;
+            res = rb_econv_convert(fptr->readconv, &sp, se, &dp, de, ECONV_PARTIAL_INPUT|ECONV_OUTPUT_FOLLOWED_BY_INPUT);
+            fptr->rbuf_off += sp - ss;
+            fptr->rbuf_len -= sp - ss;
+            fptr->crbuf_len += dp - ds;
+            rb_econv_check_error(fptr->readconv);
+        }
+        if (MBCLEN_INVALID_P(r)) {
+            r = rb_enc_mbclen(fptr->crbuf+fptr->crbuf_off, fptr->crbuf+fptr->crbuf_off+fptr->crbuf_len, fptr->enc);
+            return io_shift_crbuf(fptr, r);
+        }
+        return io_shift_crbuf(fptr, MBCLEN_CHARFOUND_LEN(r));
     }
 
     if (io_fillbuf(fptr) < 0) {
@@ -2766,6 +2829,14 @@
         free(fptr->wbuf);
         fptr->wbuf = 0;
     }
+    if (fptr->readconv) {
+        rb_econv_close(fptr->readconv);
+        fptr->readconv = NULL;
+    }
+    if (fptr->crbuf) {
+        free(fptr->crbuf);
+        fptr->crbuf = NULL;
+    }
     free(fptr);
     return 1;
 }
@@ -3370,6 +3441,8 @@
     char *enc2name;
     int idx, idx2;
 
+    /* parse estr as "enc" or "enc2:enc" */
+
     p0 = strrchr(estr, ':');
     if (!p0) p1 = estr;
     else     p1 = p0 + 1;
Index: test/ruby/test_io_m17n.rb
===================================================================
--- test/ruby/test_io_m17n.rb	(revision 18665)
+++ test/ruby/test_io_m17n.rb	(revision 18666)
@@ -220,12 +220,10 @@
     with_tmpdir {
       src = "\e$B\x23\x30\x23\x31\e(B".force_encoding("iso-2022-jp")
       generate_file('tmp', src)
-      assert_raise(NotImplementedError) do
-        open("tmp", "r:iso-2022-jp:euc-jp") {|f|
-          assert_equal("\xa3\xb0".force_encoding("euc-jp"), f.getc)
-          assert_equal("\xa3\xb1".force_encoding("euc-jp"), f.getc)
-        }
-      end
+      open("tmp", "r:iso-2022-jp:euc-jp") {|f|
+        assert_equal("\xa3\xb0".force_encoding("euc-jp"), f.getc)
+        assert_equal("\xa3\xb1".force_encoding("euc-jp"), f.getc)
+      }
     }
   end
 

--
ML: ruby-changes@q...
Info: http://www.atdot.net/~ko1/quickml/

[前][次][番号順一覧][スレッド一覧]