[前][次][番号順一覧][スレッド一覧]

ruby-changes:3710

From: ko1@a...
Date: Thu, 24 Jan 2008 03:44:13 +0900 (JST)
Subject: [ruby-changes:3710] matz - Ruby:r15199 (trunk): * string.c (rb_str_each_line): use memchr(3) for faster newline

matz	2008-01-24 03:43:51 +0900 (Thu, 24 Jan 2008)

  New Revision: 15199

  Modified files:
    trunk/ChangeLog
    trunk/include/ruby/encoding.h
    trunk/io.c
    trunk/string.c

  Log:
    * string.c (rb_str_each_line): use memchr(3) for faster newline
      search.
    
    * io.c (appendline): remove unused arguments
    
    * io.c (rb_io_getline_fast): make much simpler (and faster).

  http://svn.ruby-lang.org/cgi-bin/viewvc.cgi/trunk/string.c?r1=15199&r2=15198&diff_format=u
  http://svn.ruby-lang.org/cgi-bin/viewvc.cgi/trunk/ChangeLog?r1=15199&r2=15198&diff_format=u
  http://svn.ruby-lang.org/cgi-bin/viewvc.cgi/trunk/io.c?r1=15199&r2=15198&diff_format=u
  http://svn.ruby-lang.org/cgi-bin/viewvc.cgi/trunk/include/ruby/encoding.h?r1=15199&r2=15198&diff_format=u

Index: include/ruby/encoding.h
===================================================================
--- include/ruby/encoding.h	(revision 15198)
+++ include/ruby/encoding.h	(revision 15199)
@@ -134,7 +134,7 @@
 #define rb_enc_right_char_head(s,p,enc) (char *)onigenc_get_right_adjust_char_head(enc,(UChar*)(s),(UChar*)(p))
 
 /* ptr, ptr, encoding -> newline_or_not */
-#define rb_enc_is_newline(p,end,enc)  ONIGENC_IS_MBC_NEWLINE(enc,p,end)
+#define rb_enc_is_newline(p,end,enc)  ONIGENC_IS_MBC_NEWLINE(enc,(UChar*)p,(UChar*)end)
 
 #define rb_enc_isctype(c,t,enc) ONIGENC_IS_CODE_CTYPE(enc,c,t)
 #define rb_enc_isascii(c,enc) ONIGENC_IS_CODE_ASCII(c)
Index: ChangeLog
===================================================================
--- ChangeLog	(revision 15198)
+++ ChangeLog	(revision 15199)
@@ -1,3 +1,12 @@
+Thu Jan 24 03:23:44 2008  Yukihiro Matsumoto  <matz@r...>
+
+	* string.c (rb_str_each_line): use memchr(3) for faster newline
+	  search.
+
+	* io.c (appendline): remove unused arguments
+
+	* io.c (rb_io_getline_fast): make much simpler (and faster).
+
 Thu Jan 24 02:13:07 2008  Yusuke Endoh  <mame@t...>
 
 	* insns.def (expandarray): fix stack inc.
Index: string.c
===================================================================
--- string.c	(revision 15198)
+++ string.c	(revision 15199)
@@ -4470,9 +4470,8 @@
     rb_encoding *enc;
     VALUE rs;
     int newline;
-    char *p = RSTRING_PTR(str), *pend = p + RSTRING_LEN(str), *s = p;
-    char *ptr = p;
-    long len = RSTRING_LEN(str), rslen;
+    char *p, *pend, *s, *ptr;
+    long len, rslen; 
     VALUE line;
     int n;
 
@@ -4480,29 +4479,39 @@
 	rs = rb_rs;
     }
     RETURN_ENUMERATOR(str, argc, argv);
-
     if (NIL_P(rs)) {
 	rb_yield(str);
 	return str;
     }
+    str = rb_str_new4(str);
+    ptr = p = s = RSTRING_PTR(str);
+    pend = p + RSTRING_LEN(str);
+    len = RSTRING_LEN(str);
     StringValue(rs);
-    enc = rb_enc_check(str, rs);
     if (rs == rb_default_rs) {
+	enc = rb_enc_get(str);
 	while (p < pend) {
-	    n = rb_enc_mbclen(p, pend, enc);
-	    if (rb_enc_is_newline(p, pend, enc)) {
-		line = rb_str_new5(str, s, p - s + n);
-		OBJ_INFECT(line, str);
-		rb_enc_copy(line, str);
-		rb_yield(line);
-		str_mod_check(str, ptr, len);
-		s = p + n;
+	    char *p0;
+
+	    p = memchr(p, '\n', pend - p);
+	    if (!p) break;
+	    p0 = rb_enc_left_char_head(s, p, enc);
+	    if (!rb_enc_is_newline(p0, pend, enc)) {
+		p++;
+		continue;
 	    }
-	    p += n;
+	    p = p0 + rb_enc_mbclen(s, p0, enc);
+	    line = rb_str_new5(str, s, p - s);
+	    OBJ_INFECT(line, str);
+	    rb_enc_copy(line, str);
+	    rb_yield(line);
+	    str_mod_check(str, ptr, len);
+	    s = p;
 	}
 	goto finish;
     }
 
+    enc = rb_enc_check(str, rs);
     rslen = RSTRING_LEN(rs);
     if (rslen == 0) {
 	newline = '\n';
@@ -4535,8 +4544,7 @@
 
   finish:
     if (s != pend) {
-	if (p > pend) p = pend;
-	line = rb_str_new5(str, s, p - s);
+	line = rb_str_new5(str, s, pend - s);
 	OBJ_INFECT(line, str);
 	rb_enc_copy(line, str);
 	rb_yield(line);
Index: io.c
===================================================================
--- io.c	(revision 15198)
+++ io.c	(revision 15199)
@@ -1686,21 +1686,19 @@
 }
 
 static int
-appendline(rb_io_t *fptr, int delim, const char *rsptr, int rslen, VALUE *strp, long *lp)
+appendline(rb_io_t *fptr, int delim, VALUE *strp, long *lp)
 {
     VALUE str = *strp;
     int c = EOF;
     long limit = *lp;
 
-    if (rsptr == 0)
-      rslen = 1;
-
     do {
 	long pending = READ_DATA_PENDING_COUNT(fptr);
 	if (pending > 0) {
 	    const char *p = READ_DATA_PENDING_PTR(fptr);
 	    const char *e;
 	    long last = 0, len = (c != EOF);
+	    rb_encoding *enc = io_read_encoding(fptr);
 
 	    if (limit > 0 && pending > limit) pending = limit;
 	    e = memchr(p, delim, pending);
@@ -1720,7 +1718,7 @@
 	    if (limit > 0 && limit == pending) {
 		char *p = fptr->rbuf+fptr->rbuf_off;
 		char *pp = p + limit;
-		char *pl = rb_enc_left_char_head(p, pp, io_read_encoding(fptr));
+		char *pl = rb_enc_left_char_head(p, pp, enc);
 
 		if (pl < pp) {
 		    int diff = pp - pl;
@@ -1790,27 +1788,53 @@
 }
 
 static VALUE
-rb_io_getline_fast(rb_io_t *fptr, unsigned char delim, long limit)
+rb_io_getline_fast(rb_io_t *fptr)
 {
     VALUE str = Qnil;
-    int c, nolimit = 0;
+    int len = 0;
+    rb_encoding *enc = io_read_encoding(fptr);
 
     for (;;) {
-	c = appendline(fptr, delim, 0, 0, &str, &limit);
-	if (c == EOF || c == delim) break;
-	if (limit == 0) {
-	    nolimit = 1;
+	long pending = READ_DATA_PENDING_COUNT(fptr);
+
+	if (pending > 0) {
+	    const char *p = READ_DATA_PENDING_PTR(fptr);
+	    const char *e;
+
+	    e = memchr(p, '\n', pending);
+	    if (e) {
+		const char *p0 = rb_enc_left_char_head(p, e, enc);
+		const char *pend = rb_enc_left_char_head(p, p+pending, enc);
+		if (rb_enc_is_newline(p0, pend, enc)) {
+		    pending = p0 - p + rb_enc_mbclen(p0, pend, enc);
+		}
+		else {
+		    e = 0;
+		}
+	    }
+	    if (NIL_P(str)) {
+		str = rb_str_new(p, pending);
+		fptr->rbuf_off += pending;
+		fptr->rbuf_len -= pending;
+	    }
+	    else {
+		rb_str_resize(str, len + pending);
+		read_buffered_data(RSTRING_PTR(str)+len, pending, fptr);
+	    }
+	    len += pending;
+	    if (e) break;
+	}
+	rb_thread_wait_fd(fptr->fd);
+	rb_io_check_closed(fptr);
+	if (io_fillbuf(fptr) < 0) {
+	    if (NIL_P(str)) return Qnil;
 	    break;
 	}
     }
 
-    if (!NIL_P(str)) {
-	str = io_enc_str(str, fptr);
-	if (!nolimit) {
-	    fptr->lineno++;
-	    lineno = INT2FIX(fptr->lineno);
-	}
-    }
+    str = io_enc_str(str, fptr);
+    fptr->lineno++;
+    lineno = INT2FIX(fptr->lineno);
     return str;
 }
 
@@ -1838,11 +1862,12 @@
 	    }
 	}
     }
-    GetOpenFile(io, fptr);
-    if (!NIL_P(rs)) {
-	rb_encoding *enc_rs = rb_enc_get(rs);
-	rb_encoding *enc_io = io_read_encoding(fptr);
+    if (!NIL_P(rs) && rs != rb_default_rs) {
+	rb_encoding *enc_rs, *enc_io;
 
+	GetOpenFile(io, fptr);
+	enc_rs = rb_enc_get(rs);
+	enc_io = io_read_encoding(fptr);
 	if (enc_io != enc_rs &&
 	    (rb_enc_str_coderange(rs) != ENC_CODERANGE_7BIT ||
 	     !rb_enc_asciicompat(enc_io))) {
@@ -1876,8 +1901,8 @@
     else if (limit == 0) {
 	return rb_enc_str_new(0, 0, io_read_encoding(fptr));
     }
-    else if (rs == rb_default_rs) {
-	return rb_io_getline_fast(fptr, '\n', limit);
+    else if (rs == rb_default_rs && limit < 0) {
+	return rb_io_getline_fast(fptr);
     }
     else {
 	int c, newline;
@@ -1893,15 +1918,12 @@
 	    swallow(fptr, '\n');
 	    rs = 0;
 	}
-	else if (rslen == 1) {
-	    return rb_io_getline_fast(fptr, (unsigned char)RSTRING_PTR(rs)[0], limit);
-	}
 	else {
 	    rsptr = RSTRING_PTR(rs);
 	}
 	newline = rsptr[rslen - 1];
 
-	while ((c = appendline(fptr, newline, rsptr, rslen, &str, &limit)) != EOF) {
+	while ((c = appendline(fptr, newline, &str, &limit)) != EOF) {
 	    if (c == newline) {
 		const char *s, *p, *pp;
 		
@@ -1954,7 +1976,7 @@
 
     GetOpenFile(io, fptr);
     rb_io_check_readable(fptr);
-    return rb_io_getline_fast(fptr, '\n', 0);
+    return rb_io_getline_fast(fptr);
 }
 
 /*

--
ML: ruby-changes@q...
Info: http://www.atdot.net/~ko1/quickml/

[前][次][番号順一覧][スレッド一覧]