[前][次][番号順一覧][スレッド一覧]

ruby-changes:25715

From: glass <ko1@a...>
Date: Wed, 21 Nov 2012 00:17:25 +0900 (JST)
Subject: [ruby-changes:25715] glass:r37772 (trunk): * marshal.c: add marshal readahead. marshalized Array, Hash and Struct

glass	2012-11-21 00:17:15 +0900 (Wed, 21 Nov 2012)

  New Revision: 37772

  http://svn.ruby-lang.org/cgi-bin/viewvc.cgi?view=rev&revision=37772

  Log:
    * marshal.c: add marshal readahead. marshalized Array, Hash and Struct
      have size at least number of its elements, marshal readahead will
      read the certain readable length and buffer when it needs more bytes.
      marshal readahead prevents many calls to IO#getbyte and IO#read,
      then it enables performace improvement.
      [ruby-dev:45637] [Feature #6440]

  Modified files:
    trunk/ChangeLog
    trunk/marshal.c

Index: ChangeLog
===================================================================
--- ChangeLog	(revision 37771)
+++ ChangeLog	(revision 37772)
@@ -1,3 +1,12 @@
+Tue Nov 20 23:28:26 2012  Masaki Matsushita  <glass.saga@g...>
+
+	* marshal.c: add marshal readahead. marshalized Array, Hash and Struct
+	  have size at least number of its elements, marshal readahead will
+	  read the certain readable length and buffer when it needs more bytes.
+	  marshal readahead prevents many calls to IO#getbyte and IO#read,
+	  then it enables performace improvement.
+	  [ruby-dev:45637] [Feature #6440]
+
 Tue Nov 20 22:35:02 2012  NARUSE, Yui  <naruse@r...>
 
 	* Makefile.in (.d.h): replace char * to const char * because somehow
Index: marshal.c
===================================================================
--- marshal.c	(revision 37771)
+++ marshal.c	(revision 37772)
@@ -969,6 +969,9 @@
 
 struct load_arg {
     VALUE src;
+    char *buf;
+    long buflen;
+    long readable;
     long offset;
     st_table *symbols;
     st_table *data;
@@ -1022,6 +1025,13 @@
 static ID r_symbol(struct load_arg *arg);
 static VALUE path2class(VALUE path);
 
+NORETURN(static void too_short(void));
+static void
+too_short(void)
+{
+    rb_raise(rb_eArgError, "marshal data too short");
+}
+
 static st_index_t
 r_prepare(struct load_arg *arg)
 {
@@ -1031,6 +1041,27 @@
     return idx;
 }
 
+static unsigned char
+r_byte1_buffered(struct load_arg *arg)
+{
+    if (arg->buflen == 0) {
+	long readable = arg->readable < BUFSIZ ? arg->readable : BUFSIZ;
+	VALUE str, n = LONG2NUM(readable);
+
+	str = rb_funcall2(arg->src, s_read, 1, &n);
+
+	check_load_arg(arg, s_read);
+	if (NIL_P(str)) too_short();
+	StringValue(str);
+	arg->infection |= (int)FL_TEST(str, MARSHAL_INFECTION);
+	memcpy(arg->buf, RSTRING_PTR(str), RSTRING_LEN(str));
+	arg->offset = 0;
+	arg->buflen = RSTRING_LEN(str);
+    }
+    arg->buflen--;
+    return arg->buf[arg->offset++];
+}
+
 static int
 r_byte(struct load_arg *arg)
 {
@@ -1041,15 +1072,19 @@
 	    c = (unsigned char)RSTRING_PTR(arg->src)[arg->offset++];
 	}
 	else {
-	    rb_raise(rb_eArgError, "marshal data too short");
+	    too_short();
 	}
     }
     else {
-	VALUE src = arg->src;
-	VALUE v = rb_funcall2(src, s_getbyte, 0, 0);
-	check_load_arg(arg, s_getbyte);
-	if (NIL_P(v)) rb_eof_error();
-	c = (unsigned char)NUM2CHR(v);
+	if (arg->readable >0 || arg->buflen > 0) {
+	    c = r_byte1_buffered(arg);
+	}
+	else {
+	    VALUE v = rb_funcall2(arg->src, s_getbyte, 0, 0);
+	    check_load_arg(arg, s_getbyte);
+	    if (NIL_P(v)) rb_eof_error();
+	    c = (unsigned char)NUM2CHR(v);
+	}
     }
     return c;
 }
@@ -1102,6 +1137,68 @@
     return x;
 }
 
+static VALUE
+r_bytes1(long len, struct load_arg *arg)
+{
+    VALUE str, n = LONG2NUM(len);
+
+    str = rb_funcall2(arg->src, s_read, 1, &n);
+    check_load_arg(arg, s_read);
+    if (NIL_P(str)) too_short();
+    StringValue(str);
+    if (RSTRING_LEN(str) != len) too_short();
+    arg->infection |= (int)FL_TEST(str, MARSHAL_INFECTION);
+
+    return str;
+}
+
+static VALUE
+r_bytes1_buffered(long len, struct load_arg *arg)
+{
+    VALUE str;
+
+    if (len <= arg->buflen) {
+	str = rb_str_new(arg->buf+arg->offset, len);
+	arg->offset += len;
+	arg->buflen -= len;
+    }
+    else {
+	long buflen = arg->buflen;
+	long readable = arg->readable + 1;
+	long tmp_len, read_len, need_len = len - buflen;
+	VALUE tmp, n;
+
+	readable = readable < BUFSIZ ? readable : BUFSIZ;
+	read_len = need_len > readable ? need_len : readable;
+	n = LONG2NUM(read_len);
+	tmp = rb_funcall2(arg->src, s_read, 1, &n);
+
+	check_load_arg(arg, s_read);
+	if (NIL_P(tmp)) too_short();
+	StringValue(tmp);
+
+	tmp_len = RSTRING_LEN(tmp);
+
+	if (tmp_len < need_len) too_short();
+	arg->infection |= (int)FL_TEST(tmp, MARSHAL_INFECTION);
+
+	str = rb_str_new(arg->buf+arg->offset, buflen);
+	rb_str_cat(str, RSTRING_PTR(tmp), need_len);
+
+	if (tmp_len > need_len) {
+	    buflen = tmp_len - need_len;
+	    memcpy(arg->buf, RSTRING_PTR(tmp)+need_len, buflen);
+	    arg->buflen = buflen;
+	}
+	else {
+	    arg->buflen = 0;
+	}
+	arg->offset = 0;
+    }
+
+    return str;
+}
+
 #define r_bytes(arg) r_bytes0(r_long(arg), (arg))
 
 static VALUE
@@ -1116,19 +1213,16 @@
 	    arg->offset += len;
 	}
 	else {
-	  too_short:
-	    rb_raise(rb_eArgError, "marshal data too short");
+	    too_short();
 	}
     }
     else {
-	VALUE src = arg->src;
-	VALUE n = LONG2NUM(len);
-	str = rb_funcall2(src, s_read, 1, &n);
-	check_load_arg(arg, s_read);
-	if (NIL_P(str)) goto too_short;
-	StringValue(str);
-	if (RSTRING_LEN(str) != len) goto too_short;
-	arg->infection |= (int)FL_TEST(str, MARSHAL_INFECTION);
+	if (arg->readable > 0 || arg->buflen > 0) {
+	    str = r_bytes1_buffered(len, arg);
+	}
+	else {
+	    str = r_bytes1(len, arg);
+	}
     }
     return str;
 }
@@ -1545,10 +1639,13 @@
 
 	    v = rb_ary_new2(len);
 	    v = r_entry(v, arg);
+	    arg->readable += len - 1;
 	    while (len--) {
 		rb_ary_push(v, r_object(arg));
+		arg->readable--;
 	    }
             v = r_leave(v, arg);
+	    arg->readable++;
 	}
 	break;
 
@@ -1559,11 +1656,14 @@
 
 	    v = rb_hash_new();
 	    v = r_entry(v, arg);
+	    arg->readable += (len - 1) * 2;
 	    while (len--) {
 		VALUE key = r_object(arg);
 		VALUE value = r_object(arg);
 		rb_hash_aset(v, key, value);
+		arg->readable -= 2;
 	    }
+	    arg->readable += 2;
 	    if (type == TYPE_HASH_DEF) {
 		RHASH_IFNONE(v) = r_object(arg);
 	    }
@@ -1590,6 +1690,7 @@
                          rb_class2name(klass));
             }
 
+	    arg->readable += (len - 1) * 2;
 	    v = r_entry0(v, idx, arg);
 	    values = rb_ary_new2(len);
 	    for (i=0; i<len; i++) {
@@ -1602,9 +1703,11 @@
 			     rb_id2name(SYM2ID(RARRAY_PTR(mem)[i])));
 		}
                 rb_ary_push(values, r_object(arg));
+		arg->readable -= 2;
 	    }
             rb_struct_initialize(v, values);
             v = r_leave(v, arg);
+	    arg->readable += 2;
 	}
 	break;
 
@@ -1751,6 +1854,13 @@
 static void
 clear_load_arg(struct load_arg *arg)
 {
+    if (arg->buf) {
+	xfree(arg->buf);
+	arg->buf = 0;
+    }
+    arg->buflen = 0;
+    arg->offset = 0;
+    arg->readable = 0;
     if (!arg->symbols) return;
     st_free_table(arg->symbols);
     arg->symbols = 0;
@@ -1803,7 +1913,13 @@
     arg->data    = st_init_numtable();
     arg->compat_tbl = st_init_numtable();
     arg->proc = 0;
+    arg->readable = 0;
 
+    if (NIL_P(v))
+	arg->buf = xmalloc(BUFSIZ);
+    else
+	arg->buf = 0;
+
     major = r_byte(arg);
     minor = r_byte(arg);
     if (major != MARSHAL_MAJOR || minor > MARSHAL_MINOR) {

--
ML: ruby-changes@q...
Info: http://www.atdot.net/~ko1/quickml/

[前][次][番号順一覧][スレッド一覧]