ruby-changes:3710
From: ko1@a...
Date: Thu, 24 Jan 2008 03:44:13 +0900 (JST)
Subject: [ruby-changes:3710] matz - Ruby:r15199 (trunk): * string.c (rb_str_each_line): use memchr(3) for faster newline
matz 2008-01-24 03:43:51 +0900 (Thu, 24 Jan 2008)
New Revision: 15199
Modified files:
trunk/ChangeLog
trunk/include/ruby/encoding.h
trunk/io.c
trunk/string.c
Log:
* string.c (rb_str_each_line): use memchr(3) for faster newline
search.
* io.c (appendline): remove unused arguments
* io.c (rb_io_getline_fast): make much simpler (and faster).
http://svn.ruby-lang.org/cgi-bin/viewvc.cgi/trunk/string.c?r1=15199&r2=15198&diff_format=u
http://svn.ruby-lang.org/cgi-bin/viewvc.cgi/trunk/ChangeLog?r1=15199&r2=15198&diff_format=u
http://svn.ruby-lang.org/cgi-bin/viewvc.cgi/trunk/io.c?r1=15199&r2=15198&diff_format=u
http://svn.ruby-lang.org/cgi-bin/viewvc.cgi/trunk/include/ruby/encoding.h?r1=15199&r2=15198&diff_format=u
Index: include/ruby/encoding.h
===================================================================
--- include/ruby/encoding.h (revision 15198)
+++ include/ruby/encoding.h (revision 15199)
@@ -134,7 +134,7 @@
#define rb_enc_right_char_head(s,p,enc) (char *)onigenc_get_right_adjust_char_head(enc,(UChar*)(s),(UChar*)(p))
/* ptr, ptr, encoding -> newline_or_not */
-#define rb_enc_is_newline(p,end,enc) ONIGENC_IS_MBC_NEWLINE(enc,p,end)
+#define rb_enc_is_newline(p,end,enc) ONIGENC_IS_MBC_NEWLINE(enc,(UChar*)p,(UChar*)end)
#define rb_enc_isctype(c,t,enc) ONIGENC_IS_CODE_CTYPE(enc,c,t)
#define rb_enc_isascii(c,enc) ONIGENC_IS_CODE_ASCII(c)
Index: ChangeLog
===================================================================
--- ChangeLog (revision 15198)
+++ ChangeLog (revision 15199)
@@ -1,3 +1,12 @@
+Thu Jan 24 03:23:44 2008 Yukihiro Matsumoto <matz@r...>
+
+ * string.c (rb_str_each_line): use memchr(3) for faster newline
+ search.
+
+ * io.c (appendline): remove unused arguments
+
+ * io.c (rb_io_getline_fast): make much simpler (and faster).
+
Thu Jan 24 02:13:07 2008 Yusuke Endoh <mame@t...>
* insns.def (expandarray): fix stack inc.
Index: string.c
===================================================================
--- string.c (revision 15198)
+++ string.c (revision 15199)
@@ -4470,9 +4470,8 @@
rb_encoding *enc;
VALUE rs;
int newline;
- char *p = RSTRING_PTR(str), *pend = p + RSTRING_LEN(str), *s = p;
- char *ptr = p;
- long len = RSTRING_LEN(str), rslen;
+ char *p, *pend, *s, *ptr;
+ long len, rslen;
VALUE line;
int n;
@@ -4480,29 +4479,39 @@
rs = rb_rs;
}
RETURN_ENUMERATOR(str, argc, argv);
-
if (NIL_P(rs)) {
rb_yield(str);
return str;
}
+ str = rb_str_new4(str);
+ ptr = p = s = RSTRING_PTR(str);
+ pend = p + RSTRING_LEN(str);
+ len = RSTRING_LEN(str);
StringValue(rs);
- enc = rb_enc_check(str, rs);
if (rs == rb_default_rs) {
+ enc = rb_enc_get(str);
while (p < pend) {
- n = rb_enc_mbclen(p, pend, enc);
- if (rb_enc_is_newline(p, pend, enc)) {
- line = rb_str_new5(str, s, p - s + n);
- OBJ_INFECT(line, str);
- rb_enc_copy(line, str);
- rb_yield(line);
- str_mod_check(str, ptr, len);
- s = p + n;
+ char *p0;
+
+ p = memchr(p, '\n', pend - p);
+ if (!p) break;
+ p0 = rb_enc_left_char_head(s, p, enc);
+ if (!rb_enc_is_newline(p0, pend, enc)) {
+ p++;
+ continue;
}
- p += n;
+ p = p0 + rb_enc_mbclen(s, p0, enc);
+ line = rb_str_new5(str, s, p - s);
+ OBJ_INFECT(line, str);
+ rb_enc_copy(line, str);
+ rb_yield(line);
+ str_mod_check(str, ptr, len);
+ s = p;
}
goto finish;
}
+ enc = rb_enc_check(str, rs);
rslen = RSTRING_LEN(rs);
if (rslen == 0) {
newline = '\n';
@@ -4535,8 +4544,7 @@
finish:
if (s != pend) {
- if (p > pend) p = pend;
- line = rb_str_new5(str, s, p - s);
+ line = rb_str_new5(str, s, pend - s);
OBJ_INFECT(line, str);
rb_enc_copy(line, str);
rb_yield(line);
Index: io.c
===================================================================
--- io.c (revision 15198)
+++ io.c (revision 15199)
@@ -1686,21 +1686,19 @@
}
static int
-appendline(rb_io_t *fptr, int delim, const char *rsptr, int rslen, VALUE *strp, long *lp)
+appendline(rb_io_t *fptr, int delim, VALUE *strp, long *lp)
{
VALUE str = *strp;
int c = EOF;
long limit = *lp;
- if (rsptr == 0)
- rslen = 1;
-
do {
long pending = READ_DATA_PENDING_COUNT(fptr);
if (pending > 0) {
const char *p = READ_DATA_PENDING_PTR(fptr);
const char *e;
long last = 0, len = (c != EOF);
+ rb_encoding *enc = io_read_encoding(fptr);
if (limit > 0 && pending > limit) pending = limit;
e = memchr(p, delim, pending);
@@ -1720,7 +1718,7 @@
if (limit > 0 && limit == pending) {
char *p = fptr->rbuf+fptr->rbuf_off;
char *pp = p + limit;
- char *pl = rb_enc_left_char_head(p, pp, io_read_encoding(fptr));
+ char *pl = rb_enc_left_char_head(p, pp, enc);
if (pl < pp) {
int diff = pp - pl;
@@ -1790,27 +1788,53 @@
}
static VALUE
-rb_io_getline_fast(rb_io_t *fptr, unsigned char delim, long limit)
+rb_io_getline_fast(rb_io_t *fptr)
{
VALUE str = Qnil;
- int c, nolimit = 0;
+ int len = 0;
+ rb_encoding *enc = io_read_encoding(fptr);
for (;;) {
- c = appendline(fptr, delim, 0, 0, &str, &limit);
- if (c == EOF || c == delim) break;
- if (limit == 0) {
- nolimit = 1;
+ long pending = READ_DATA_PENDING_COUNT(fptr);
+
+ if (pending > 0) {
+ const char *p = READ_DATA_PENDING_PTR(fptr);
+ const char *e;
+
+ e = memchr(p, '\n', pending);
+ if (e) {
+ const char *p0 = rb_enc_left_char_head(p, e, enc);
+ const char *pend = rb_enc_left_char_head(p, p+pending, enc);
+ if (rb_enc_is_newline(p0, pend, enc)) {
+ pending = p0 - p + rb_enc_mbclen(p0, pend, enc);
+ }
+ else {
+ e = 0;
+ }
+ }
+ if (NIL_P(str)) {
+ str = rb_str_new(p, pending);
+ fptr->rbuf_off += pending;
+ fptr->rbuf_len -= pending;
+ }
+ else {
+ rb_str_resize(str, len + pending);
+ read_buffered_data(RSTRING_PTR(str)+len, pending, fptr);
+ }
+ len += pending;
+ if (e) break;
+ }
+ rb_thread_wait_fd(fptr->fd);
+ rb_io_check_closed(fptr);
+ if (io_fillbuf(fptr) < 0) {
+ if (NIL_P(str)) return Qnil;
break;
}
}
- if (!NIL_P(str)) {
- str = io_enc_str(str, fptr);
- if (!nolimit) {
- fptr->lineno++;
- lineno = INT2FIX(fptr->lineno);
- }
- }
+ str = io_enc_str(str, fptr);
+ fptr->lineno++;
+ lineno = INT2FIX(fptr->lineno);
return str;
}
@@ -1838,11 +1862,12 @@
}
}
}
- GetOpenFile(io, fptr);
- if (!NIL_P(rs)) {
- rb_encoding *enc_rs = rb_enc_get(rs);
- rb_encoding *enc_io = io_read_encoding(fptr);
+ if (!NIL_P(rs) && rs != rb_default_rs) {
+ rb_encoding *enc_rs, *enc_io;
+ GetOpenFile(io, fptr);
+ enc_rs = rb_enc_get(rs);
+ enc_io = io_read_encoding(fptr);
if (enc_io != enc_rs &&
(rb_enc_str_coderange(rs) != ENC_CODERANGE_7BIT ||
!rb_enc_asciicompat(enc_io))) {
@@ -1876,8 +1901,8 @@
else if (limit == 0) {
return rb_enc_str_new(0, 0, io_read_encoding(fptr));
}
- else if (rs == rb_default_rs) {
- return rb_io_getline_fast(fptr, '\n', limit);
+ else if (rs == rb_default_rs && limit < 0) {
+ return rb_io_getline_fast(fptr);
}
else {
int c, newline;
@@ -1893,15 +1918,12 @@
swallow(fptr, '\n');
rs = 0;
}
- else if (rslen == 1) {
- return rb_io_getline_fast(fptr, (unsigned char)RSTRING_PTR(rs)[0], limit);
- }
else {
rsptr = RSTRING_PTR(rs);
}
newline = rsptr[rslen - 1];
- while ((c = appendline(fptr, newline, rsptr, rslen, &str, &limit)) != EOF) {
+ while ((c = appendline(fptr, newline, &str, &limit)) != EOF) {
if (c == newline) {
const char *s, *p, *pp;
@@ -1954,7 +1976,7 @@
GetOpenFile(io, fptr);
rb_io_check_readable(fptr);
- return rb_io_getline_fast(fptr, '\n', 0);
+ return rb_io_getline_fast(fptr);
}
/*
--
ML: ruby-changes@q...
Info: http://www.atdot.net/~ko1/quickml/