[前][次][番号順一覧][スレッド一覧]

ruby-changes:7166

From: akr <ko1@a...>
Date: Mon, 18 Aug 2008 12:16:08 +0900 (JST)
Subject: [ruby-changes:7166] Ruby:r18685 (trunk): * io.c (io_shift_crbuf): add strp argument to append into existing

akr	2008-08-18 12:13:53 +0900 (Mon, 18 Aug 2008)

  New Revision: 18685

  http://svn.ruby-lang.org/cgi-bin/viewvc.cgi?view=rev&revision=18685

  Log:
    * io.c (io_shift_crbuf): add strp argument to append into existing
      string.
      (read_all): use econv if enc2 is set.
      (io_getc): follow the io_shift_crbuf change.

  Modified files:
    trunk/ChangeLog
    trunk/io.c
    trunk/test/ruby/test_io_m17n.rb

Index: ChangeLog
===================================================================
--- ChangeLog	(revision 18684)
+++ ChangeLog	(revision 18685)
@@ -1,3 +1,10 @@
+Mon Aug 18 12:12:29 2008  Tanaka Akira  <akr@f...>
+
+	* io.c (io_shift_crbuf): add strp argument to append into existing
+	  string.
+	  (read_all): use econv if enc2 is set.
+	  (io_getc): follow the io_shift_crbuf change.
+
 Mon Aug 18 10:35:25 2008  Tanaka Akira  <akr@f...>
 
 	* io.c (io_enc_str_converted): new function.
Index: io.c
===================================================================
--- io.c	(revision 18684)
+++ io.c	(revision 18685)
@@ -1406,15 +1406,137 @@
     return str;
 }
 
+static void
+make_readconv(rb_io_t *fptr)
+{
+    if (!fptr->readconv) {
+        fptr->readconv = rb_econv_open(fptr->enc2->name, fptr->enc->name, 0);
+        if (!fptr->readconv)
+            rb_raise(rb_eIOError, "code converter open failed (%s to %s)", fptr->enc->name, fptr->enc2->name);
+        fptr->crbuf_off = 0;
+        fptr->crbuf_len = 0;
+        fptr->crbuf_capa = 1024;
+        fptr->crbuf = ALLOC_N(char, fptr->crbuf_capa);
+    }
+}
+
+static int
+more_char(rb_io_t *fptr)
+{
+    const unsigned char *ss, *sp, *se;
+    unsigned char *ds, *dp, *de;
+    rb_econv_result_t res;
+    int putbackable;
+    int crbuf_len0;
+
+    if (fptr->crbuf_len == fptr->crbuf_capa)
+        return 0; /* crbuf full */
+    if (fptr->crbuf_len == 0)
+        fptr->crbuf_off = 0;
+    else if (fptr->crbuf_off + fptr->crbuf_len == fptr->crbuf_capa) {
+        memmove(fptr->crbuf, fptr->crbuf+fptr->crbuf_off, fptr->crbuf_len);
+        fptr->crbuf_off = 0;
+    }
+
+    crbuf_len0 = fptr->crbuf_len;
+
+    while (1) {
+        ss = sp = (const unsigned char *)fptr->rbuf + fptr->rbuf_off;
+        se = sp + fptr->rbuf_len;
+        ds = dp = (unsigned char *)fptr->crbuf + fptr->crbuf_off + fptr->crbuf_len;
+        de = (unsigned char *)fptr->crbuf + fptr->crbuf_capa;
+        res = rb_econv_convert(fptr->readconv, &sp, se, &dp, de, ECONV_PARTIAL_INPUT|ECONV_OUTPUT_FOLLOWED_BY_INPUT);
+        fptr->rbuf_off += sp - ss;
+        fptr->rbuf_len -= sp - ss;
+        fptr->crbuf_len += dp - ds;
+
+        putbackable = rb_econv_putbackable(fptr->readconv);
+        if (putbackable) {
+            rb_econv_putback(fptr->readconv, (unsigned char *)fptr->rbuf + fptr->rbuf_off - putbackable, putbackable);
+            fptr->rbuf_off -= putbackable;
+            fptr->rbuf_len += putbackable;
+        }
+
+        rb_econv_check_error(fptr->readconv);
+
+        if (crbuf_len0 != fptr->crbuf_len)
+            return 0;
+
+        if (res == econv_finished)
+            return -1;
+
+        if (res == econv_source_buffer_empty) {
+            if (fptr->rbuf_len == 0) {
+                rb_thread_wait_fd(fptr->fd);
+                rb_io_check_closed(fptr);
+                if (io_fillbuf(fptr) == -1) {
+                    ds = dp = (unsigned char *)fptr->crbuf + fptr->crbuf_off + fptr->crbuf_len;
+                    de = (unsigned char *)fptr->crbuf + fptr->crbuf_capa;
+                    res = rb_econv_convert(fptr->readconv, NULL, NULL, &dp, de, 0);
+                    fptr->crbuf_len += dp - ds;
+                    rb_econv_check_error(fptr->readconv);
+                }
+            }
+        }
+    }
+}
+
 static VALUE
+io_shift_crbuf(rb_io_t *fptr, int len, VALUE *strp)
+{
+    VALUE str;
+    if (NIL_P(*strp)) {
+        *strp = str = rb_str_new(fptr->crbuf+fptr->crbuf_off, len);
+    }
+    else {
+        size_t slen;
+        str = *strp;
+        slen = RSTRING_LEN(str);
+        rb_str_resize(str, RSTRING_LEN(str) + len);
+        memcpy(RSTRING_PTR(str)+slen, fptr->crbuf+fptr->crbuf_off, len);
+    }
+    fptr->crbuf_off += len;
+    fptr->crbuf_len -= len;
+    OBJ_TAINT(str);
+    rb_enc_associate(str, fptr->enc);
+    /* xxx: set coderange */
+    if (fptr->crbuf_len == 0)
+        fptr->crbuf_off = 0;
+    if (fptr->crbuf_off < fptr->crbuf_capa/2) {
+        memmove(fptr->crbuf, fptr->crbuf+fptr->crbuf_off, fptr->crbuf_len);
+        fptr->crbuf_off = 0;
+    }
+    return str;
+}
+
+static VALUE
 read_all(rb_io_t *fptr, long siz, VALUE str)
 {
-    long bytes = 0;
+    long bytes;
     long n;
-    long pos = 0;
-    rb_encoding *enc = io_read_encoding(fptr);
-    int cr = fptr->enc2 ? ENC_CODERANGE_BROKEN : 0;
+    long pos;
+    rb_encoding *enc;
+    int cr;
 
+    if (fptr->enc2) {
+        VALUE str = rb_str_new(NULL, 0);
+        make_readconv(fptr);
+        while (1) {
+            if (fptr->crbuf_len) {
+                io_shift_crbuf(fptr, fptr->crbuf_len, &str);
+            }
+            if (more_char(fptr) == -1) {
+                return io_enc_str_converted(str, fptr);
+            }
+        }
+    }
+
+    bytes = 0;
+    pos = 0;
+
+    enc = io_read_encoding(fptr);
+    cr = fptr->enc2 ? ENC_CODERANGE_BROKEN : 0;
+
     if (siz == 0) siz = BUFSIZ;
     if (NIL_P(str)) {
 	str = rb_str_new(0, siz);
@@ -1744,82 +1866,7 @@
 	rb_raise(rb_eRuntimeError, "rs modified");
 }
 
-static void
-make_readconv(rb_io_t *fptr)
-{
-    if (!fptr->readconv) {
-        fptr->readconv = rb_econv_open(fptr->enc2->name, fptr->enc->name, 0);
-        if (!fptr->readconv)
-            rb_raise(rb_eIOError, "code converter open failed (%s to %s)", fptr->enc->name, fptr->enc2->name);
-        fptr->crbuf_off = 0;
-        fptr->crbuf_len = 0;
-        fptr->crbuf_capa = 1024;
-        fptr->crbuf = ALLOC_N(char, fptr->crbuf_capa);
-    }
-}
-
 static int
-more_char(rb_io_t *fptr)
-{
-    const unsigned char *ss, *sp, *se;
-    unsigned char *ds, *dp, *de;
-    rb_econv_result_t res;
-    int putbackable;
-    int crbuf_len0;
-
-    if (fptr->crbuf_len == fptr->crbuf_capa)
-        return 0; /* crbuf full */
-    if (fptr->crbuf_len == 0)
-        fptr->crbuf_off = 0;
-    else if (fptr->crbuf_off + fptr->crbuf_len == fptr->crbuf_capa) {
-        memmove(fptr->crbuf, fptr->crbuf+fptr->crbuf_off, fptr->crbuf_len);
-        fptr->crbuf_off = 0;
-    }
-
-    crbuf_len0 = fptr->crbuf_len;
-
-    while (1) {
-        ss = sp = (const unsigned char *)fptr->rbuf + fptr->rbuf_off;
-        se = sp + fptr->rbuf_len;
-        ds = dp = (unsigned char *)fptr->crbuf + fptr->crbuf_off + fptr->crbuf_len;
-        de = (unsigned char *)fptr->crbuf + fptr->crbuf_capa;
-        res = rb_econv_convert(fptr->readconv, &sp, se, &dp, de, ECONV_PARTIAL_INPUT|ECONV_OUTPUT_FOLLOWED_BY_INPUT);
-        fptr->rbuf_off += sp - ss;
-        fptr->rbuf_len -= sp - ss;
-        fptr->crbuf_len += dp - ds;
-
-        putbackable = rb_econv_putbackable(fptr->readconv);
-        if (putbackable) {
-            rb_econv_putback(fptr->readconv, (unsigned char *)fptr->rbuf + fptr->rbuf_off - putbackable, putbackable);
-            fptr->rbuf_off -= putbackable;
-            fptr->rbuf_len += putbackable;
-        }
-
-        rb_econv_check_error(fptr->readconv);
-
-        if (crbuf_len0 != fptr->crbuf_len)
-            return 0;
-
-        if (res == econv_finished)
-            return -1;
-
-        if (res == econv_source_buffer_empty) {
-            if (fptr->rbuf_len == 0) {
-                rb_thread_wait_fd(fptr->fd);
-                rb_io_check_closed(fptr);
-                if (io_fillbuf(fptr) == -1) {
-                    ds = dp = (unsigned char *)fptr->crbuf + fptr->crbuf_off + fptr->crbuf_len;
-                    de = (unsigned char *)fptr->crbuf + fptr->crbuf_capa;
-                    res = rb_econv_convert(fptr->readconv, NULL, NULL, &dp, de, 0);
-                    fptr->crbuf_len += dp - ds;
-                    rb_econv_check_error(fptr->readconv);
-                }
-            }
-        }
-    }
-}
-
-static int
 appendline(rb_io_t *fptr, int delim, VALUE *strp, long *lp)
 {
     VALUE str = *strp;
@@ -2356,31 +2403,14 @@
 }
 
 static VALUE
-io_shift_crbuf(rb_io_t *fptr, int len)
-{
-    VALUE str;
-    str = rb_str_new(fptr->crbuf+fptr->crbuf_off, len);
-    fptr->crbuf_off += len;
-    fptr->crbuf_len -= len;
-    OBJ_TAINT(str);
-    rb_enc_associate(str, fptr->enc);
-    /* xxx: set coderange */
-    if (fptr->crbuf_len == 0)
-        fptr->crbuf_off = 0;
-    if (fptr->crbuf_off < fptr->crbuf_capa/2) {
-        memmove(fptr->crbuf, fptr->crbuf+fptr->crbuf_off, fptr->crbuf_len);
-        fptr->crbuf_off = 0;
-    }
-    return str;
-}
-
-static VALUE
 io_getc(rb_io_t *fptr, rb_encoding *enc)
 {
     int r, n, cr = 0;
     VALUE str;
 
     if (fptr->enc2) {
+        VALUE str = Qnil;
+
         if (!fptr->readconv) {
             make_readconv(fptr);
         }
@@ -2401,16 +2431,16 @@
                 if (fptr->crbuf_len == 0)
                     return Qnil;
                 /* return an incomplete character just before EOF */
-                return io_shift_crbuf(fptr, fptr->crbuf_len);
+                return io_shift_crbuf(fptr, fptr->crbuf_len, &str);
             }
         }
         if (MBCLEN_INVALID_P(r)) {
             r = rb_enc_mbclen(fptr->crbuf+fptr->crbuf_off,
                               fptr->crbuf+fptr->crbuf_off+fptr->crbuf_len,
                               fptr->enc);
-            return io_shift_crbuf(fptr, r);
+            return io_shift_crbuf(fptr, r, &str);
         }
-        return io_shift_crbuf(fptr, MBCLEN_CHARFOUND_LEN(r));
+        return io_shift_crbuf(fptr, MBCLEN_CHARFOUND_LEN(r), &str);
     }
 
     if (io_fillbuf(fptr) < 0) {
Index: test/ruby/test_io_m17n.rb
===================================================================
--- test/ruby/test_io_m17n.rb	(revision 18684)
+++ test/ruby/test_io_m17n.rb	(revision 18685)
@@ -473,6 +473,77 @@
     }
   end
 
+  def test_gets_invalid
+    with_pipe("utf-8:euc-jp") {|r, w|
+      before = "\u{3042}\u{3044}"
+      invalid = "\x80".force_encoding("utf-8")
+      after = "\u{3046}\u{3048}"
+      w << before + invalid + after
+      w.close
+      err = assert_raise(Encoding::InvalidByteSequence) { r.gets }
+      assert_equal(invalid.force_encoding("ascii-8bit"), err.error_bytes)
+      assert_equal(after.encode("euc-jp"), r.gets)
+    }
+  end
+
+  def test_getc_invalid
+    with_pipe("utf-8:euc-jp") {|r, w|
+      before1 = "\u{3042}"
+      before2 = "\u{3044}"
+      invalid = "\x80".force_encoding("utf-8")
+      after1 = "\u{3046}"
+      after2 = "\u{3048}"
+      w << before1 + before2 + invalid + after1 + after2
+      w.close
+      assert_equal(before1.encode("euc-jp"), r.getc)
+      assert_equal(before2.encode("euc-jp"), r.getc)
+      err = assert_raise(Encoding::InvalidByteSequence) { r.getc }
+      assert_equal(invalid.force_encoding("ascii-8bit"), err.error_bytes)
+      assert_equal(after1.encode("euc-jp"), r.getc)
+      assert_equal(after2.encode("euc-jp"), r.getc)
+    }
+  end
+
+  def test_getc_invalid2
+    with_pipe("utf-16le:euc-jp") {|r, w|
+      before1 = "\x42\x30".force_encoding("utf-16le")
+      before2 = "\x44\x30".force_encoding("utf-16le")
+      invalid = "\x00\xd8".force_encoding("utf-16le")
+      after1 = "\x46\x30".force_encoding("utf-16le")
+      after2 = "\x48\x30".force_encoding("utf-16le")
+      w << before1 + before2 + invalid + after1 + after2
+      w.close
+      assert_equal(before1.encode("euc-jp"), r.getc)
+      assert_equal(before2.encode("euc-jp"), r.getc)
+      err = assert_raise(Encoding::InvalidByteSequence) { r.getc }
+      assert_equal(invalid.force_encoding("ascii-8bit"), err.error_bytes)
+      assert_equal(after1.encode("euc-jp"), r.getc)
+      assert_equal(after2.encode("euc-jp"), r.getc)
+    }
+  end
+
+  def test_read_all
+    with_pipe("utf-8:euc-jp") {|r, w|
+      str = "\u3042\u3044"
+      w << str
+      w.close
+      assert_equal(str.encode("euc-jp"), r.read)
+    }
+  end
+
+  def test_read_all_invalid
+    with_pipe("utf-8:euc-jp") {|r, w|
+      before = "\u{3042}\u{3044}"
+      invalid = "\x80".force_encoding("utf-8")
+      after = "\u{3046}\u{3048}"
+      w << before + invalid + after
+      w.close
+      err = assert_raise(Encoding::InvalidByteSequence) { r.read }
+      assert_equal(invalid.force_encoding("ascii-8bit"), err.error_bytes)
+      assert_equal(after.encode("euc-jp"), r.read)
+    }
+  end
+
   def test_file_foreach
     with_tmpdir {
       generate_file('tst', 'a' * 8191 + "\xa1\xa1")

--
ML: ruby-changes@q...
Info: http://www.atdot.net/~ko1/quickml/

[前][次][番号順一覧][スレッド一覧]