[前][次][番号順一覧][スレッド一覧]

ruby-changes:7165

From: akr <ko1@a...>
Date: Mon, 18 Aug 2008 10:40:21 +0900 (JST)
Subject: [ruby-changes:7165] Ruby:r18684 (trunk): * io.c (io_enc_str_converted): new function.

akr	2008-08-18 10:40:01 +0900 (Mon, 18 Aug 2008)

  New Revision: 18684

  http://svn.ruby-lang.org/cgi-bin/viewvc.cgi?view=rev&revision=18684

  Log:
    * io.c (io_enc_str_converted): new function.
      (make_readconv): extracted from io_getc.
      (more_char): ditto.
      (appendline): use econv via make_readconv and more_char for code
      conversion.
      (prepare_getline_args): don't convert record separator.
      (rb_io_getline_1): don't use rb_io_getline_fast if enc2 is set.
      (io_getc): use make_readconv and more_char.

  Modified files:
    trunk/ChangeLog
    trunk/io.c
    trunk/test/ruby/test_io_m17n.rb

Index: ChangeLog
===================================================================
--- ChangeLog	(revision 18683)
+++ ChangeLog	(revision 18684)
@@ -1,3 +1,14 @@
+Mon Aug 18 10:35:25 2008  Tanaka Akira  <akr@f...>
+
+	* io.c (io_enc_str_converted): new function.
+	  (make_readconv): extracted from io_getc.
+	  (more_char): ditto.
+	  (appendline): use econv via make_readconv and more_char for code
+	  conversion.
+	  (prepare_getline_args): don't convert record separator.
+	  (rb_io_getline_1): don't use rb_io_getline_fast if enc2 is set.
+	  (io_getc): use make_readconv and more_char.
+
 Mon Aug 18 08:27:44 2008  Kazuhiro NISHIYAMA  <zn@m...>
 
 	* common.mk: fix error in uncommon.mk.
Index: io.c
===================================================================
--- io.c	(revision 18683)
+++ io.c	(revision 18684)
@@ -1399,6 +1399,14 @@
 }
 
 static VALUE
+io_enc_str_converted(VALUE str, rb_io_t *fptr)
+{
+    OBJ_TAINT(str);
+    rb_enc_associate(str, io_read_encoding(fptr));
+    return str;
+}
+
+static VALUE
 read_all(rb_io_t *fptr, long siz, VALUE str)
 {
     long bytes = 0;
@@ -1736,12 +1744,131 @@
 	rb_raise(rb_eRuntimeError, "rs modified");
 }
 
+static void
+make_readconv(rb_io_t *fptr)
+{
+    if (!fptr->readconv) {
+        fptr->readconv = rb_econv_open(fptr->enc2->name, fptr->enc->name, 0);
+        if (!fptr->readconv)
+            rb_raise(rb_eIOError, "code converter open failed (%s to %s)", fptr->enc->name, fptr->enc2->name);
+        fptr->crbuf_off = 0;
+        fptr->crbuf_len = 0;
+        fptr->crbuf_capa = 1024;
+        fptr->crbuf = ALLOC_N(char, fptr->crbuf_capa);
+    }
+}
+
 static int
+more_char(rb_io_t *fptr)
+{
+    const unsigned char *ss, *sp, *se;
+    unsigned char *ds, *dp, *de;
+    rb_econv_result_t res;
+    int putbackable;
+    int crbuf_len0;
+
+    if (fptr->crbuf_len == fptr->crbuf_capa)
+        return 0; /* crbuf full */
+    if (fptr->crbuf_len == 0)
+        fptr->crbuf_off = 0;
+    else if (fptr->crbuf_off + fptr->crbuf_len == fptr->crbuf_capa) {
+        memmove(fptr->crbuf, fptr->crbuf+fptr->crbuf_off, fptr->crbuf_len);
+        fptr->crbuf_off = 0;
+    }
+
+    crbuf_len0 = fptr->crbuf_len;
+
+    while (1) {
+        ss = sp = (const unsigned char *)fptr->rbuf + fptr->rbuf_off;
+        se = sp + fptr->rbuf_len;
+        ds = dp = (unsigned char *)fptr->crbuf + fptr->crbuf_off + fptr->crbuf_len;
+        de = (unsigned char *)fptr->crbuf + fptr->crbuf_capa;
+        res = rb_econv_convert(fptr->readconv, &sp, se, &dp, de, ECONV_PARTIAL_INPUT|ECONV_OUTPUT_FOLLOWED_BY_INPUT);
+        fptr->rbuf_off += sp - ss;
+        fptr->rbuf_len -= sp - ss;
+        fptr->crbuf_len += dp - ds;
+
+        putbackable = rb_econv_putbackable(fptr->readconv);
+        if (putbackable) {
+            rb_econv_putback(fptr->readconv, (unsigned char *)fptr->rbuf + fptr->rbuf_off - putbackable, putbackable);
+            fptr->rbuf_off -= putbackable;
+            fptr->rbuf_len += putbackable;
+        }
+
+        rb_econv_check_error(fptr->readconv);
+
+        if (crbuf_len0 != fptr->crbuf_len)
+            return 0;
+
+        if (res == econv_finished)
+            return -1;
+
+        if (res == econv_source_buffer_empty) {
+            if (fptr->rbuf_len == 0) {
+                rb_thread_wait_fd(fptr->fd);
+                rb_io_check_closed(fptr);
+                if (io_fillbuf(fptr) == -1) {
+                    ds = dp = (unsigned char *)fptr->crbuf + fptr->crbuf_off + fptr->crbuf_len;
+                    de = (unsigned char *)fptr->crbuf + fptr->crbuf_capa;
+                    res = rb_econv_convert(fptr->readconv, NULL, NULL, &dp, de, 0);
+                    fptr->crbuf_len += dp - ds;
+                    rb_econv_check_error(fptr->readconv);
+                }
+            }
+        }
+    }
+}
+
+static int
 appendline(rb_io_t *fptr, int delim, VALUE *strp, long *lp)
 {
     VALUE str = *strp;
     long limit = *lp;
 
+    if (fptr->enc2) {
+        make_readconv(fptr);
+        while (1) {
+            const char *p, *e;
+            int searchlen;
+            if (fptr->crbuf_len) {
+                p = fptr->crbuf+fptr->crbuf_off;
+                searchlen = fptr->crbuf_len;
+                if (0 < limit && limit < searchlen)
+                    searchlen = limit;
+                e = memchr(p, delim, searchlen);
+                if (e) {
+                    if (NIL_P(str))
+                        *strp = str = rb_str_new(p, e-p+1);
+                    else
+                        rb_str_buf_cat(str, p, e-p+1);
+                    fptr->crbuf_off += e-p+1;
+                    fptr->crbuf_len -= e-p+1;
+                    limit -= e-p+1;
+                    *lp = limit;
+                    return delim;
+                }
+
+                if (NIL_P(str))
+                    *strp = str = rb_str_new(p, searchlen);
+                else
+                    rb_str_buf_cat(str, p, searchlen);
+                fptr->crbuf_off += searchlen;
+                fptr->crbuf_len -= searchlen;
+                limit -= searchlen;
+
+                if (limit == 0) {
+                    *lp = limit;
+                    return (unsigned char)RSTRING_PTR(str)[RSTRING_LEN(str)-1];
+                }
+            }
+
+            if (more_char(fptr) == -1) {
+                *lp = limit;
+                return EOF;
+            }
+        }
+    }
+
     while (1) {
 	long pending = READ_DATA_PENDING_COUNT(fptr);
 	if (pending > 0) {
@@ -1887,15 +2014,6 @@
                          rb_enc_name(enc_rs));
             }
 	}
-	if (fptr->enc2) {
-            VALUE rs2;
-	    rs2 = rb_funcall(rs, id_encode, 2,
-			    rb_enc_from_encoding(fptr->enc2),
-			    rb_enc_from_encoding(fptr->enc));
-            if (!RTEST(rb_str_equal(rs, rs2))) {
-                rs = rs2;
-            }
-	}
     }
     *rsp = rs;
     *limit = NIL_P(lim) ? -1L : NUM2LONG(lim);
@@ -1911,9 +2029,6 @@
 
     GetOpenFile(io, fptr);
     rb_io_check_readable(fptr);
-    if (rb_enc_dummy_p(io_input_encoding(fptr)) && rs != rb_default_rs) {
-	rb_raise(rb_eNotImpError, "gets with delimiter against dummy encoding is not currently supported");
-    }
     if (NIL_P(rs)) {
 	str = read_all(fptr, 0, Qnil);
 	if (RSTRING_LEN(str) == 0) return Qnil;
@@ -1921,7 +2036,7 @@
     else if (limit == 0) {
 	return rb_enc_str_new(0, 0, io_read_encoding(fptr));
     }
-    else if (rs == rb_default_rs && limit < 0 &&
+    else if (rs == rb_default_rs && limit < 0 && !fptr->enc2 &&
              rb_enc_asciicompat(enc = io_read_encoding(fptr))) {
 	return rb_io_getline_fast(fptr, enc);
     }
@@ -1945,7 +2060,10 @@
 	}
 	newline = (unsigned char)rsptr[rslen - 1];
 
-	enc = io_input_encoding(fptr);
+        if (fptr->enc2)
+            enc = fptr->enc;
+        else
+            enc = io_input_encoding(fptr);
 	while ((c = appendline(fptr, newline, &str, &limit)) != EOF) {
             const char *s, *p, *pp;
 
@@ -1981,7 +2099,8 @@
 		swallow(fptr, '\n');
 	    }
 	}
-	if (!NIL_P(str)) str = io_enc_str(str, fptr);
+	if (!NIL_P(str))
+            str = io_enc_str_converted(str, fptr);
     }
 
     if (!NIL_P(str)) {
@@ -2263,54 +2382,32 @@
 
     if (fptr->enc2) {
         if (!fptr->readconv) {
-            fptr->readconv = rb_econv_open(fptr->enc2->name, fptr->enc->name, 0);
-            if (!fptr->readconv)
-                rb_raise(rb_eIOError, "code converter open failed (%s to %s)", fptr->enc->name, fptr->enc2->name);
-            fptr->crbuf_off = 0;
-            fptr->crbuf_len = 0;
-            fptr->crbuf_capa = 1024;
-            fptr->crbuf = ALLOC_N(char, fptr->crbuf_capa);
+            make_readconv(fptr);
         }
 
         while (1) {
-            const unsigned char *ss, *sp, *se;
-            unsigned char *ds, *dp, *de;
-            rb_econv_result_t res;
-            int putbackable;
             if (fptr->crbuf_len) {
-                r = rb_enc_precise_mbclen(fptr->crbuf+fptr->crbuf_off, fptr->crbuf+fptr->crbuf_off+fptr->crbuf_len, fptr->enc);
+                r = rb_enc_precise_mbclen(fptr->crbuf+fptr->crbuf_off,
+                                          fptr->crbuf+fptr->crbuf_off+fptr->crbuf_len,
+                                          fptr->enc);
                 if (!MBCLEN_NEEDMORE_P(r))
                     break;
                 if (fptr->crbuf_len == fptr->crbuf_capa) {
                     rb_raise(rb_eIOError, "too long character");
                 }
             }
-            if (fptr->rbuf_len == 0) {
-                if (io_fillbuf(fptr) == -1) {
-                    if (fptr->crbuf_len == 0)
-                        return Qnil;
-                    /* return an incomplete character just before EOF */
-                    return io_shift_crbuf(fptr, fptr->crbuf_len);
-                }
+
+            if (more_char(fptr) == -1) {
+                if (fptr->crbuf_len == 0)
+                    return Qnil;
+                /* return an incomplete character just before EOF */
+                return io_shift_crbuf(fptr, fptr->crbuf_len);
             }
-            ss = sp = (const unsigned char *)fptr->rbuf + fptr->rbuf_off;
-            se = sp + fptr->rbuf_len;
-            ds = dp = (unsigned char *)fptr->crbuf + fptr->crbuf_off + fptr->crbuf_len;
-            de = (unsigned char *)fptr->crbuf + fptr->crbuf_capa;
-            res = rb_econv_convert(fptr->readconv, &sp, se, &dp, de, ECONV_PARTIAL_INPUT|ECONV_OUTPUT_FOLLOWED_BY_INPUT);
-            fptr->rbuf_off += sp - ss;
-            fptr->rbuf_len -= sp - ss;
-            fptr->crbuf_len += dp - ds;
-            putbackable = rb_econv_putbackable(fptr->readconv);
-            if (putbackable) {
-                rb_econv_putback(fptr->readconv, (unsigned char *)fptr->rbuf + fptr->rbuf_off - putbackable, putbackable);
-                fptr->rbuf_off -= putbackable;
-                fptr->rbuf_len += putbackable;
-            }
-            rb_econv_check_error(fptr->readconv);
         }
         if (MBCLEN_INVALID_P(r)) {
-            r = rb_enc_mbclen(fptr->crbuf+fptr->crbuf_off, fptr->crbuf+fptr->crbuf_off+fptr->crbuf_len, fptr->enc);
+            r = rb_enc_mbclen(fptr->crbuf+fptr->crbuf_off,
+                              fptr->crbuf+fptr->crbuf_off+fptr->crbuf_len,
+                              fptr->enc);
             return io_shift_crbuf(fptr, r);
         }
         return io_shift_crbuf(fptr, MBCLEN_CHARFOUND_LEN(r));
Index: test/ruby/test_io_m17n.rb
===================================================================
--- test/ruby/test_io_m17n.rb	(revision 18683)
+++ test/ruby/test_io_m17n.rb	(revision 18684)
@@ -161,13 +161,11 @@
     with_tmpdir {
       src = "before \e$B\x23\x30\x23\x31\e(B after".force_encoding("iso-2022-jp")
       generate_file('tmp', src)
-      assert_raise(NotImplementedError) do
-        s = open("tmp", "r:iso-2022-jp:euc-jp") {|f|
-          f.gets("0".force_encoding("euc-jp"))
-        }
-        assert_equal(Encoding.find("euc-jp"), s.encoding)
-        assert_str_equal(src.encode("euc-jp"), s)
-      end
+      s = open("tmp", "r:iso-2022-jp:euc-jp") {|f|
+        f.gets("0".force_encoding("euc-jp"))
+      }
+      assert_equal(Encoding.find("euc-jp"), s.encoding)
+      assert_str_equal(src.encode("euc-jp"), s)
     }
   end
 

--
ML: ruby-changes@q...
Info: http://www.atdot.net/~ko1/quickml/

[前][次][番号順一覧][スレッド一覧]