ruby-changes:3062
From: ko1@a...
Date: 24 Dec 2007 04:55:30 +0900
Subject: [ruby-changes:3062] matz - Ruby:r14554 (trunk): * io.c (appendline): should do multibyte aware RS search.
matz 2007-12-24 04:55:18 +0900 (Mon, 24 Dec 2007)
New Revision: 14554
Modified files:
trunk/ChangeLog
trunk/io.c
trunk/test/ruby/test_io_m17n.rb
Log:
* io.c (appendline): should do multibyte aware RS search.
http://svn.ruby-lang.org/cgi-bin/viewvc.cgi/trunk/ChangeLog?r1=14554&r2=14553
http://svn.ruby-lang.org/cgi-bin/viewvc.cgi/trunk/test/ruby/test_io_m17n.rb?r1=14554&r2=14553
http://svn.ruby-lang.org/cgi-bin/viewvc.cgi/trunk/io.c?r1=14554&r2=14553
Index: ChangeLog
===================================================================
--- ChangeLog (revision 14553)
+++ ChangeLog (revision 14554)
@@ -25,6 +25,8 @@
* io.c (io_enc_str): should preserve default_external encoding.
+ * io.c (appendline): should do multibyte aware RS search.
+
Mon Dec 24 02:06:35 2007 Yukihiro Matsumoto <matz@r...>
* io.c (rb_f_open): documentation update.
Index: io.c
===================================================================
--- io.c (revision 14553)
+++ io.c (revision 14554)
@@ -1647,22 +1647,32 @@
}
static int
-appendline(rb_io_t *fptr, int delim, VALUE *strp, long *lp)
+appendline(rb_io_t *fptr, int delim, VALUE *strp, long *lp, int mb)
{
VALUE str = *strp;
int c = EOF;
long limit = *lp;
+ rb_encoding *enc = io_read_encoding(fptr);
do {
long pending = READ_DATA_PENDING_COUNT(fptr);
if (pending > 0) {
- const char *p = READ_DATA_PENDING_PTR(fptr);
- const char *e;
+ const char *s = READ_DATA_PENDING_PTR(fptr);
+ const char *p, *e;
long last = 0, len = (c != EOF);
if (limit > 0 && pending > limit) pending = limit;
+ p = s;
+ again:
e = memchr(p, delim, pending);
- if (e) pending = e - p + 1;
+ if (e) {
+ if (mb &&
+ ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc,(UChar*)s,(UChar*)e) != (UChar*)e) {
+ p = e + 1;
+ goto again;
+ }
+ pending = e - s + 1;
+ }
len += pending;
if (!NIL_P(str)) {
last = RSTRING_LEN(str);
@@ -1742,7 +1752,7 @@
int c, nolimit = 0;
for (;;) {
- c = appendline(fptr, delim, &str, &limit);
+ c = appendline(fptr, delim, &str, &limit, 0);
if (c == EOF || c == delim) break;
if (limit == 0) {
nolimit = 1;
@@ -1842,7 +1852,7 @@
}
newline = rsptr[rslen - 1];
- while ((c = appendline(fptr, newline, &str, &limit)) != EOF) {
+ while ((c = appendline(fptr, newline, &str, &limit, 1)) != EOF) {
if (c == newline) {
if (RSTRING_LEN(str) < rslen) continue;
if (!rspara) rscheck(rsptr, rslen, rs);
Index: test/ruby/test_io_m17n.rb
===================================================================
--- test/ruby/test_io_m17n.rb (revision 14553)
+++ test/ruby/test_io_m17n.rb (revision 14554)
@@ -60,8 +60,8 @@
s = open("tmp", "r:euc-jp:utf-8") {|f|
f.gets("\xA2\xA2".force_encoding("euc-jp").encode("utf-8"))
}
- assert_equal(Encoding.find("euc-jp"), s.encoding)
- assert_str_equal("before \xA1\xA2\xA2\xA3 after".force_encoding("iso-8859-1"), s, '[ruby-core:14319]')
+ assert_equal(Encoding.find("utf-8"), s.encoding)
+ assert_str_equal("before \xA1\xA2\xA2\xA3 after".force_encoding("euc-jp").encode("utf-8"), s, '[ruby-core:14319]')
}
end
@@ -196,8 +196,6 @@
w.close
s = r.read
assert_equal(Encoding.default_external, s.encoding)
- puts encdump(s)
- puts encdump(utf8)
assert_str_equal(utf8, s)
}
@@ -226,7 +224,9 @@
}
}
- ENCS.reject {|e| e == Encoding::ASCII_8BIT }.each {|enc|
+ ENCS.each {|enc|
+ next if enc == Encoding::ASCII_8BIT
+ next if enc == Encoding::UTF_8
with_pipe("#{enc}:UTF-8") {|r, w|
w << "\xc2\xa1"
w.close
--
ML: ruby-changes@q...
Info: http://www.atdot.net/~ko1/quickml