[前][次][番号順一覧][スレッド一覧]

ruby-changes:45307

From: nobu <ko1@a...>
Date: Fri, 20 Jan 2017 11:39:32 +0900 (JST)
Subject: [ruby-changes:45307] nobu:r57380 (trunk): array.c: improve Array#sample

nobu	2017-01-20 11:39:27 +0900 (Fri, 20 Jan 2017)

  New Revision: 57380

  https://svn.ruby-lang.org/cgi-bin/viewvc.cgi?view=revision&revision=57380

  Log:
    array.c: improve Array#sample
    
    * array.c (rb_ary_sample): improve performance when many samples
      from a large array.  based on the patch by tomoya ishida
      <tomoyapenguin AT gmail.com> in [ruby-dev:49956].  [Bug #13136]

  Modified files:
    trunk/array.c
Index: array.c
===================================================================
--- array.c	(revision 57379)
+++ array.c	(revision 57380)
@@ -4797,6 +4797,7 @@ rb_ary_sample(int argc, VALUE *argv, VAL https://github.com/ruby/ruby/blob/trunk/array.c#L4797
     VALUE opts, randgen = rb_cRandom;
     long n, len, i, j, k, idx[10];
     long rnds[numberof(idx)];
+    long memo_threshold;
 
     if (OPTHASH_GIVEN_P(opts)) {
 	VALUE rnd;
@@ -4856,6 +4857,11 @@ rb_ary_sample(int argc, VALUE *argv, VAL https://github.com/ruby/ruby/blob/trunk/array.c#L4857
 	}
 	return rb_ary_new_from_args(3, RARRAY_AREF(ary, i), RARRAY_AREF(ary, j), RARRAY_AREF(ary, k));
     }
+    memo_threshold =
+	len < 2560 ? len / 128 :
+	len < 5120 ? len / 64 :
+	len < 10240 ? len / 32 :
+	len / 16;
     if (n <= numberof(idx)) {
 	long sorted[numberof(idx)];
 	sorted[0] = idx[0] = rnds[0];
@@ -4875,6 +4881,38 @@ rb_ary_sample(int argc, VALUE *argv, VAL https://github.com/ruby/ruby/blob/trunk/array.c#L4881
 	    }
 	});
     }
+    else if (n <= memo_threshold / 2) {
+	long max_idx = 0;
+#undef RUBY_UNTYPED_DATA_WARNING
+#define RUBY_UNTYPED_DATA_WARNING 0
+	VALUE vmemo = Data_Wrap_Struct(0, 0, 0, st_free_table);
+	st_table *memo = st_init_numtable_with_size(n);
+	DATA_PTR(vmemo) = memo;
+	result = rb_ary_new_capa(n);
+	RARRAY_PTR_USE(result, ptr_result, {
+	    for (i=0; i<n; i++) {
+		long r = RAND_UPTO(len-i) + i;
+		ptr_result[i] = r;
+		if (r > max_idx) max_idx = r;
+	    }
+	    len = RARRAY_LEN(ary);
+	    if (len <= max_idx) n = 0;
+	    else if (n > len) n = len;
+	    RARRAY_PTR_USE(ary, ptr_ary, {
+		for (i=0; i<n; i++) {
+		    long j2 = j = ptr_result[i];
+		    long i2 = i;
+		    st_data_t value;
+		    if (st_lookup(memo, (st_data_t)i, &value)) i2 = (long)value;
+		    if (st_lookup(memo, (st_data_t)j, &value)) j2 = (long)value;
+		    st_insert(memo, (st_data_t)j, (st_data_t)i2);
+		    ptr_result[i] = ptr_ary[j2];
+		}
+	    });
+	});
+	DATA_PTR(vmemo) = 0;
+	st_free_table(memo);
+    }
     else {
 	result = rb_ary_dup(ary);
 	RBASIC_CLEAR_CLASS(result);

--
ML: ruby-changes@q...
Info: http://www.atdot.net/~ko1/quickml/

[前][次][番号順一覧][スレッド一覧]