[前][次][番号順一覧][スレッド一覧]

ruby-changes:3127

From: ko1@a...
Date: 25 Dec 2007 01:36:37 +0900
Subject: [ruby-changes:3127] matz - Ruby:r14619 (trunk): * include/ruby/encoding.h (rb_enc_left_char_head): new utility macro.

matz	2007-12-25 01:36:14 +0900 (Tue, 25 Dec 2007)

  New Revision: 14619

  Modified files:
    trunk/ChangeLog
    trunk/include/ruby/encoding.h
    trunk/io.c
    trunk/string.c
    trunk/test/ruby/test_io_m17n.rb
    trunk/version.h

  Log:
    * include/ruby/encoding.h (rb_enc_left_char_head): new utility macro.
    
    * include/ruby/encoding.h (rb_enc_right_char_head): ditto.
    
    * io.c (appendline): does multibyte RS search in the function.
    
    * io.c (prepare_getline_args): RS may be nil.
    
    * io.c (rb_io_getc): should process character based on external
      encoding, when transcoding required.

  http://svn.ruby-lang.org/cgi-bin/viewvc.cgi/trunk/version.h?r1=14619&r2=14618
  http://svn.ruby-lang.org/cgi-bin/viewvc.cgi/trunk/string.c?r1=14619&r2=14618
  http://svn.ruby-lang.org/cgi-bin/viewvc.cgi/trunk/ChangeLog?r1=14619&r2=14618
  http://svn.ruby-lang.org/cgi-bin/viewvc.cgi/trunk/test/ruby/test_io_m17n.rb?r1=14619&r2=14618
  http://svn.ruby-lang.org/cgi-bin/viewvc.cgi/trunk/io.c?r1=14619&r2=14618
  http://svn.ruby-lang.org/cgi-bin/viewvc.cgi/trunk/include/ruby/encoding.h?r1=14619&r2=14618

Index: include/ruby/encoding.h
===================================================================
--- include/ruby/encoding.h	(revision 14618)
+++ include/ruby/encoding.h	(revision 14619)
@@ -94,6 +94,9 @@
 
 /* ptr, ptr, encoding -> prev_char */
 #define rb_enc_prev_char(s,p,enc) (char *)onigenc_get_prev_char_head(enc,(UChar*)s,(UChar*)p)
+/* ptr, ptr, encoding -> next_char */
+#define rb_enc_left_char_head(s,p,enc) (char *)onigenc_get_left_adjust_char_head(enc,(UChar*)s,(UChar*)p)
+#define rb_enc_right_char_head(s,p,enc) (char *)onigenc_get_right_adjust_char_head(enc,(UChar*)s,(UChar*)p)
 
 #define rb_enc_isctype(c,t,enc) ONIGENC_IS_CODE_CTYPE(enc,c,t)
 #define rb_enc_isascii(c,enc) ONIGENC_IS_CODE_ASCII(c)
Index: ChangeLog
===================================================================
--- ChangeLog	(revision 14618)
+++ ChangeLog	(revision 14619)
@@ -1,3 +1,16 @@
+Tue Dec 25 01:19:18 2007  Yukihiro Matsumoto  <matz@r...>
+
+	* include/ruby/encoding.h (rb_enc_left_char_head): new utility macro.
+
+	* include/ruby/encoding.h (rb_enc_right_char_head): ditto.
+
+	* io.c (appendline): does multibyte RS search in the function.
+
+	* io.c (prepare_getline_args): RS may be nil.
+
+	* io.c (rb_io_getc): should process character based on external
+	  encoding, when transcoding required.
+
 Tue Dec 25 01:07:57 2007  Tanaka Akira  <akr@f...>
 
 	* lib/irb/output-method.rb: translate a comment to English to
Index: string.c
===================================================================
--- string.c	(revision 14618)
+++ string.c	(revision 14619)
@@ -1491,8 +1491,7 @@
 	char *t;
 	pos = rb_memsearch(sptr, slen, s, len);
 	if (pos < 0) return pos;
-	t = (char *)onigenc_get_right_adjust_char_head(enc, (const UChar *)s,
-						       (const UChar *)s + pos);
+	t = rb_enc_right_char_head(s, s+pos, enc);
 	if (t == s) break;
 	if ((len -= t - s) <= 0) return -1;
 	offset += t - s;
@@ -4564,7 +4563,7 @@
     if (p[len-1] == newline &&
 	(rslen <= 1 ||
 	 memcmp(RSTRING_PTR(rs), pp, rslen) == 0)) {
-	if (ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, (UChar *)p, (UChar *)pp) != (const UChar*)pp)
+	if (rb_enc_left_char_head(p, pp, enc) != pp)
 	    return Qnil;
 	rb_str_modify(str);
 	STR_SET_LEN(str, RSTRING_LEN(str) - rslen);
Index: io.c
===================================================================
--- io.c	(revision 14618)
+++ io.c	(revision 14619)
@@ -363,6 +363,15 @@
 	: rb_default_external_encoding();
 }
 
+static rb_encoding*
+io_input_encoding(rb_io_t *fptr)
+{
+    if (fptr->enc2) {
+	return fptr->enc2;
+    }
+    return io_read_encoding(fptr);
+}
+
 void
 rb_io_check_writable(rb_io_t *fptr)
 {
@@ -1646,31 +1655,51 @@
     return str;
 }
 
+static void
+rscheck(const char *rsptr, long rslen, VALUE rs)
+{
+    if (!rs) return;
+    if (RSTRING_PTR(rs) != rsptr && RSTRING_LEN(rs) != rslen)
+	rb_raise(rb_eRuntimeError, "rs modified");
+}
+
 static int
-appendline(rb_io_t *fptr, int delim, VALUE *strp, long *lp, int mb)
+appendline(rb_io_t *fptr, int delim, const char *rsptr, int rslen, VALUE rs, VALUE *strp, long *lp)
 {
     VALUE str = *strp;
     int c = EOF;
     long limit = *lp;
-    rb_encoding *enc = io_read_encoding(fptr);
+    rb_encoding *enc = io_input_encoding(fptr);
 
     do {
 	long pending = READ_DATA_PENDING_COUNT(fptr);
 	if (pending > 0) {
 	    const char *s = READ_DATA_PENDING_PTR(fptr);
-	    const char *p, *e;
+	    const char *p, *e, *pp;
 	    long last = 0, len = (c != EOF);
 
 	    if (limit > 0 && pending > limit) pending = limit;
-	    p = s;
+	    pp = p = s;
 	  again:
 	    e = memchr(p, delim, pending);
 	    if (e) {
-		if (mb &&
-		    ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc,(UChar*)s,(UChar*)e) != (UChar*)e) {
+		const char *p0 = e - rslen + 1;
+		if (p0 < s) {
 		    p = e + 1;
 		    goto again;
 		}
+		pp = rb_enc_left_char_head(pp, p0, enc);
+		if (pp != p0) {
+		    p = e + 1;
+		    goto again;
+		}
+		if (rsptr) {
+		    rscheck(rsptr, rslen, rs);
+		    if (memcmp(p0, rsptr, rslen) != 0) {
+			p = e + 1;
+			goto again;
+		    }
+		}
 		pending = e - s + 1;
 	    }
 	    len += pending;
@@ -1752,7 +1781,7 @@
     int c, nolimit = 0;
 
     for (;;) {
-	c = appendline(fptr, delim, &str, &limit, 0);
+	c = appendline(fptr, delim, 0, 0, 0, &str, &limit);
 	if (c == EOF || c == delim) break;
 	if (limit == 0) {
 	    nolimit = 1;
@@ -1770,14 +1799,6 @@
     return str;
 }
 
-static int
-rscheck(const char *rsptr, long rslen, VALUE rs)
-{
-    if (RSTRING_PTR(rs) != rsptr && RSTRING_LEN(rs) != rslen)
-	rb_raise(rb_eRuntimeError, "rs modified");
-    return 0;
-}
-
 static void
 prepare_getline_args(int argc, VALUE *argv, VALUE *rsp, long *limit, VALUE io)
 {
@@ -1803,10 +1824,20 @@
 	}
     }
     GetOpenFile(io, fptr);
-    if (fptr->enc2) {
-	rs = rb_funcall(rs, id_encode, 2, 
-			rb_enc_from_encoding(fptr->enc2),
-			rb_enc_from_encoding(fptr->enc));
+    if (!NIL_P(rs)) {
+	rb_encoding *enc_rs = rb_enc_get(rs);
+	rb_encoding *enc_io = io_read_encoding(fptr);
+
+	if (enc_io != enc_rs &&
+	    (rb_enc_str_coderange(rs) != ENC_CODERANGE_7BIT ||
+	     !rb_enc_asciicompat(enc_io))) {
+	    rb_raise(rb_eArgError, "IO and RS encodings differ");
+	}
+	if (fptr->enc2) {
+	    rs = rb_funcall(rs, id_encode, 2, 
+			    rb_enc_from_encoding(fptr->enc2),
+			    rb_enc_from_encoding(fptr->enc));
+	}
     }
     *rsp = rs;
     *limit = NIL_P(lim) ? -1L : NUM2LONG(lim);
@@ -1843,6 +1874,7 @@
 	    rslen = 2;
 	    rspara = 1;
 	    swallow(fptr, '\n');
+	    rs = 0;
 	}
 	else if (rslen == 1) {
 	    return rb_io_getline_fast(fptr, (unsigned char)RSTRING_PTR(rs)[0], limit);
@@ -1852,12 +1884,9 @@
 	}
 	newline = rsptr[rslen - 1];
 
-	while ((c = appendline(fptr, newline, &str, &limit, 1)) != EOF) {
+	while ((c = appendline(fptr, newline, rsptr, rslen, rs, &str, &limit)) != EOF) {
 	    if (c == newline) {
-		if (RSTRING_LEN(str) < rslen) continue;
-		if (!rspara) rscheck(rsptr, rslen, rs);
-		if (memcmp(RSTRING_PTR(str) + RSTRING_LEN(str) - rslen,
-			   rsptr, rslen) == 0) break;
+		break;
 	    }
 	    if (limit == 0) {
 		nolimit = 1;
@@ -2201,7 +2230,7 @@
     GetOpenFile(io, fptr);
     rb_io_check_readable(fptr);
 
-    enc = io_read_encoding(fptr);
+    enc = io_input_encoding(fptr);
     READ_CHECK(fptr);
     if (io_fillbuf(fptr) < 0) {
 	return Qnil;
Index: version.h
===================================================================
--- version.h	(revision 14618)
+++ version.h	(revision 14619)
@@ -1,7 +1,7 @@
 #define RUBY_VERSION "1.9.0"
-#define RUBY_RELEASE_DATE "2007-12-24"
+#define RUBY_RELEASE_DATE "2007-12-25"
 #define RUBY_VERSION_CODE 190
-#define RUBY_RELEASE_CODE 20071224
+#define RUBY_RELEASE_CODE 20071225
 #define RUBY_PATCHLEVEL 0
 
 #define RUBY_VERSION_MAJOR 1
@@ -9,7 +9,7 @@
 #define RUBY_VERSION_TEENY 0
 #define RUBY_RELEASE_YEAR 2007
 #define RUBY_RELEASE_MONTH 12
-#define RUBY_RELEASE_DAY 24
+#define RUBY_RELEASE_DAY 25
 
 #ifdef RUBY_EXTERN
 RUBY_EXTERN const char ruby_version[];
Index: test/ruby/test_io_m17n.rb
===================================================================
--- test/ruby/test_io_m17n.rb	(revision 14618)
+++ test/ruby/test_io_m17n.rb	(revision 14619)
@@ -87,7 +87,7 @@
   def test_open_w
     with_tmpdir {
       open("tmp", "w") {|f|
-        assert_equal(nil, f.external_encoding)
+        assert_equal(Encoding.default_external, f.external_encoding)
         assert_equal(nil, f.internal_encoding)
       }
     }
@@ -96,7 +96,7 @@
   def test_open_wb
     with_tmpdir {
       open("tmp", "wb") {|f|
-        assert_equal(nil, f.external_encoding)
+        assert_equal(Encoding::ASCII_8BIT, f.external_encoding)
         assert_equal(nil, f.internal_encoding)
       }
     }
@@ -135,12 +135,12 @@
   end
 
   def test_stdout
-    assert_equal(nil, STDOUT.external_encoding)
+    assert_equal(Encoding.default_external, STDOUT.external_encoding)
     assert_equal(nil, STDOUT.internal_encoding)
   end
 
   def test_stderr
-    assert_equal(nil, STDERR.external_encoding)
+    assert_equal(Encoding.default_external, STDERR.external_encoding)
     assert_equal(nil, STDERR.internal_encoding)
   end
 
@@ -181,6 +181,7 @@
     with_pipe("euc-jp:utf-8") {|r, w|
       w.write "before \xa2\xa2 after"
       rs = "\xA2\xA2".encode("utf-8", "euc-jp")
+      w.close
       timeout(1) {
         assert_equal("before \xa2\xa2".encode("utf-8", "euc-jp"),
                      r.gets(rs))

--
ML: ruby-changes@q...
Info: http://www.atdot.net/~ko1/quickml

[前][次][番号順一覧][スレッド一覧]