ruby-changes:47942

nobu	2017-09-29 16:43:22 +0900 (Fri, 29 Sep 2017)

  New Revision: 60057

  https://svn.ruby-lang.org/cgi-bin/viewvc.cgi?view=revision&revision=60057

  Log:
    array.c: improve operations on small arrays
    
    [Feature #13884]
    
    Reduce number of memory allocations for "and", "or" and "diff"
    operations on small arrays
    
    Very often, arrays are used to filter parameters and to select
    interesting items from 2 collections and very often these
    collections are small enough, for example:
    
    ```ruby
    SAFE_COLUMNS = [:id, :title, :created_at]
    
    def columns
      @all_columns & SAFE_COLUMNS
    end
    ```
    
    In this patch, I got rid of unnecessary memory allocations for
    small arrays when "and", "or" and "diff" operations are performed.
    
    name             | HEAD  | PATCH
    -----------------+------:+------:
    array_small_and  | 0.615 | 0.263
    array_small_diff | 0.676 | 0.282
    array_small_or   | 0.953 | 0.463
    
    name             | PATCH
    -----------------+------:
    array_small_and  | 2.343
    array_small_diff | 2.392
    array_small_or   | 2.056
    
    name             | HEAD  | PATCH
    -----------------+------:+------:
    array_small_and  | 1.429 | 1.005
    array_small_diff | 1.493 | 0.878
    array_small_or   | 1.672 | 1.152
    
    name             | PATCH
    -----------------+------:
    array_small_and  | 1.422
    array_small_diff | 1.700
    array_small_or   | 1.452
    
    Author:    Dmitry Bochkarev <dimabochkarev@g...>

  Added files:
    trunk/benchmark/bm_array_small_and.rb
    trunk/benchmark/bm_array_small_diff.rb
    trunk/benchmark/bm_array_small_or.rb
  Modified files:
    trunk/array.c
    trunk/spec/ruby/core/array/intersection_spec.rb
    trunk/spec/ruby/core/array/minus_spec.rb
    trunk/spec/ruby/core/array/union_spec.rb
    trunk/test/ruby/test_array.rb
Index: benchmark/bm_array_small_or.rb
===================================================================
--- benchmark/bm_array_small_or.rb	(nonexistent)
+++ benchmark/bm_array_small_or.rb	(revision 60057)
@@ -0,0 +1,17 @@ https://github.com/ruby/ruby/blob/trunk/benchmark/bm_array_small_or.rb#L1
+MIN_SIZE = ENV.fetch('SMALL_ARRAY_MIN', 0).to_i
+MAX_SIZE = ENV.fetch('SMALL_ARRAY_MAX', 16).to_i
+ITERATIONS = ENV.fetch('SMALL_ARRAY_ITERATIONS', 100).to_i
+
+ARRAYS = (MIN_SIZE..MAX_SIZE).map do |size1|
+  (MIN_SIZE..MAX_SIZE).map do |size2|
+    [Array.new(size1) { rand(MAX_SIZE) }, Array.new(size2) { rand(MAX_SIZE) }]
+  end
+end
+
+ITERATIONS.times do
+  ARRAYS.each do |group|
+    group.each do |arr1, arr2|
+      arr1 | arr2
+    end
+  end
+end

Property changes on: benchmark/bm_array_small_or.rb
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+LF
\ No newline at end of property
Index: benchmark/bm_array_small_and.rb
===================================================================
--- benchmark/bm_array_small_and.rb	(nonexistent)
+++ benchmark/bm_array_small_and.rb	(revision 60057)
@@ -0,0 +1,17 @@ https://github.com/ruby/ruby/blob/trunk/benchmark/bm_array_small_and.rb#L1
+MIN_SIZE = ENV.fetch('SMALL_ARRAY_MIN', 0).to_i
+MAX_SIZE = ENV.fetch('SMALL_ARRAY_MAX', 16).to_i
+ITERATIONS = ENV.fetch('SMALL_ARRAY_ITERATIONS', 100).to_i
+
+ARRAYS = (MIN_SIZE..MAX_SIZE).map do |size1|
+  (MIN_SIZE..MAX_SIZE).map do |size2|
+    [Array.new(size1) { rand(MAX_SIZE) }, Array.new(size2) { rand(MAX_SIZE) }]
+  end
+end
+
+ITERATIONS.times do
+  ARRAYS.each do |group|
+    group.each do |arr1, arr2|
+      arr1 & arr2
+    end
+  end
+end

Property changes on: benchmark/bm_array_small_and.rb
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+LF
\ No newline at end of property
Index: benchmark/bm_array_small_diff.rb
===================================================================
--- benchmark/bm_array_small_diff.rb	(nonexistent)
+++ benchmark/bm_array_small_diff.rb	(revision 60057)
@@ -0,0 +1,17 @@ https://github.com/ruby/ruby/blob/trunk/benchmark/bm_array_small_diff.rb#L1
+MIN_SIZE = ENV.fetch('SMALL_ARRAY_MIN', 0).to_i
+MAX_SIZE = ENV.fetch('SMALL_ARRAY_MAX', 16).to_i
+ITERATIONS = ENV.fetch('SMALL_ARRAY_ITERATIONS', 100).to_i
+
+ARRAYS = (MIN_SIZE..MAX_SIZE).map do |size1|
+  (MIN_SIZE..MAX_SIZE).map do |size2|
+    [Array.new(size1) { rand(MAX_SIZE) }, Array.new(size2) { rand(MAX_SIZE) }]
+  end
+end
+
+ITERATIONS.times do
+  ARRAYS.each do |group|
+    group.each do |arr1, arr2|
+      arr1 - arr2
+    end
+  end
+end

Property changes on: benchmark/bm_array_small_diff.rb
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+LF
\ No newline at end of property
Index: test/ruby/test_array.rb
===================================================================
--- test/ruby/test_array.rb	(revision 60056)
+++ test/ruby/test_array.rb	(revision 60057)
@@ -224,6 +224,13 @@ class TestArray < Test::Unit::TestCase https://github.com/ruby/ruby/blob/trunk/test/ruby/test_array.rb#L224
     assert_equal(@cls[],     @cls[ 1, 2, 3 ]    & @cls[ 4, 5, 6 ])
   end
 
+  def test_AND_big_array # '&'
+    assert_equal(@cls[1, 3], @cls[ 1, 1, 3, 5 ]*64 & @cls[ 1, 2, 3 ]*64)
+    assert_equal(@cls[],     @cls[ 1, 1, 3, 5 ]*64 & @cls[ ])
+    assert_equal(@cls[],     @cls[  ]           & @cls[ 1, 2, 3 ]*64)
+    assert_equal(@cls[],     @cls[ 1, 2, 3 ]*64 & @cls[ 4, 5, 6 ]*64)
+  end
+
   def test_MUL # '*'
     assert_equal(@cls[], @cls[]*3)
     assert_equal(@cls[1, 1, 1], @cls[1]*3)
@@ -260,6 +267,18 @@ class TestArray < Test::Unit::TestCase https://github.com/ruby/ruby/blob/trunk/test/ruby/test_array.rb#L267
     assert_equal(@cls[1, 2, 3], @cls[1, 2, 3] - @cls[4, 5, 6])
   end
 
+  def test_MINUS_big_array # '-'
+    assert_equal(@cls[1]*64, @cls[1, 2, 3, 4, 5]*64 - @cls[2, 3, 4, 5]*64)
+    # Ruby 1.8 feature change
+    #assert_equal(@cls[1], @cls[1, 2, 1, 3, 1, 4, 1, 5]*64 - @cls[2, 3, 4, 5]*64)
+    assert_equal(@cls[1, 1, 1, 1]*64, @cls[1, 2, 1, 3, 1, 4, 1, 5]*64 - @cls[2, 3, 4, 5]*64)
+    a = @cls[]
+    1000.times { a << 1 }
+    assert_equal(1000, a.length)
+    #assert_equal(@cls[1], a - @cls[2])
+   assert_equal(@cls[1] * 1000, a - @cls[2])
+  end
+
   def test_LSHIFT # '<<'
     a = @cls[]
     a << 1
@@ -1837,6 +1856,31 @@ class TestArray < Test::Unit::TestCase https://github.com/ruby/ruby/blob/trunk/test/ruby/test_array.rb#L1856
     assert_equal([obj1], [obj1]|[obj2])
   end
 
+  def test_OR_big_in_order
+    obj1, obj2 = Class.new do
+      attr_reader :name
+      def initialize(name) @name = name; end
+      def inspect; "test_OR_in_order(#{@name})"; end
+      def hash; 0; end
+      def eql?(a) true; end
+      break [new("1"), new("2")]
+    end
+    assert_equal([obj1], [obj1]*64|[obj2]*64)
+  end
+
+  def test_OR_big_array # '|'
+    assert_equal(@cls[1,2], @cls[1]*64 | @cls[2]*64)
+    assert_equal(@cls[1,2], @cls[1, 2]*64 | @cls[1, 2]*64)
+
+    a = (1..64).to_a
+    b = (1..128).to_a
+    c = a | b
+    assert_equal(c, b)
+    assert_not_same(c, b)
+    assert_equal((1..64).to_a, a)
+    assert_equal((1..128).to_a, b)
+  end
+
   def test_combination
     a = @cls[]
     assert_equal(1, a.combination(0).size)
Index: array.c
===================================================================
--- array.c	(revision 60056)
+++ array.c	(revision 60057)
@@ -30,6 +30,7 @@ VALUE rb_cArray; https://github.com/ruby/ruby/blob/trunk/array.c#L30
 
 #define ARY_DEFAULT_SIZE 16
 #define ARY_MAX_SIZE (LONG_MAX / (int)sizeof(VALUE))
+#define SMALL_ARRAY_LEN 16
 
 # define ARY_SHARED_P(ary) \
     (assert(!FL_TEST((ary), ELTS_SHARED) || !FL_TEST((ary), RARRAY_EMBED_FLAG)), \
@@ -3985,6 +3986,20 @@ rb_ary_includes(VALUE ary, VALUE item) https://github.com/ruby/ruby/blob/trunk/array.c#L3986
     return Qfalse;
 }
 
+static VALUE
+rb_ary_includes_by_eql(VALUE ary, VALUE item)
+{
+    long i;
+    VALUE e;
+
+    for (i=0; i<RARRAY_LEN(ary); i++) {
+	e = RARRAY_AREF(ary, i);
+	if (rb_eql(item, e)) {
+	    return Qtrue;
+	}
+    }
+    return Qfalse;
+}
 
 static VALUE
 recursive_cmp(VALUE ary1, VALUE ary2, int recur)
@@ -4135,9 +4150,19 @@ rb_ary_diff(VALUE ary1, VALUE ary2) https://github.com/ruby/ruby/blob/trunk/array.c#L4150
     VALUE hash;
     long i;
 
-    hash = ary_make_hash(to_ary(ary2));
+    ary2 = to_ary(ary2);
     ary3 = rb_ary_new();
 
+    if (RARRAY_LEN(ary2) <= SMALL_ARRAY_LEN) {
+	for (i=0; i<RARRAY_LEN(ary1); i++) {
+	    VALUE elt = rb_ary_elt(ary1, i);
+	    if (rb_ary_includes_by_eql(ary2, elt)) continue;
+	    rb_ary_push(ary3, elt);
+	}
+	return ary3;
+    }
+
+    hash = ary_make_hash(ary2);
     for (i=0; i<RARRAY_LEN(ary1); i++) {
 	if (st_lookup(rb_hash_tbl_raw(hash), RARRAY_AREF(ary1, i), 0)) continue;
 	rb_ary_push(ary3, rb_ary_elt(ary1, i));
@@ -4173,6 +4198,17 @@ rb_ary_and(VALUE ary1, VALUE ary2) https://github.com/ruby/ruby/blob/trunk/array.c#L4198
     ary2 = to_ary(ary2);
     ary3 = rb_ary_new();
     if (RARRAY_LEN(ary2) == 0) return ary3;
+
+    if (RARRAY_LEN(ary1) <= SMALL_ARRAY_LEN && RARRAY_LEN(ary2) <= SMALL_ARRAY_LEN) {
+	for (i=0; i<RARRAY_LEN(ary1); i++) {
+	    v = RARRAY_AREF(ary1, i);
+	    if (!rb_ary_includes_by_eql(ary2, v)) continue;
+	    if (rb_ary_includes_by_eql(ary3, v)) continue;
+	    rb_ary_push(ary3, v);
+	}
+	return ary3;
+    }
+
     hash = ary_make_hash(ary2);
     table = rb_hash_tbl_raw(hash);
 
@@ -4218,8 +4254,22 @@ rb_ary_or(VALUE ary1, VALUE ary2) https://github.com/ruby/ruby/blob/trunk/array.c#L4254
     long i;
 
     ary2 = to_ary(ary2);
-    hash = ary_make_hash(ary1);
+    if (RARRAY_LEN(ary1) + RARRAY_LEN(ary2) <= SMALL_ARRAY_LEN) {
+	ary3 = rb_ary_new();
+	for (i=0; i<RARRAY_LEN(ary1); i++) {
+	    VALUE elt = rb_ary_elt(ary1, i);
+	    if (rb_ary_includes_by_eql(ary3, elt)) continue;
+	    rb_ary_push(ary3, elt);
+	}
+	for (i=0; i<RARRAY_LEN(ary2); i++) {
+	    VALUE elt = rb_ary_elt(ary2, i);
+	    if (rb_ary_includes_by_eql(ary3, elt)) continue;
+	    rb_ary_push(ary3, elt);
+	}
+	return ary3;
+    }
 
+    hash = ary_make_hash(ary1);
     for (i=0; i<RARRAY_LEN(ary2); i++) {
 	VALUE elt = RARRAY_AREF(ary2, i);
 	if (!st_update(RHASH_TBL_RAW(hash), (st_data_t)elt, ary_hash_orset, (st_data_t)elt)) {
Index: spec/ruby/core/array/union_spec.rb
===================================================================
--- spec/ruby/core/array/union_spec.rb	(revision 60056)
+++ spec/ruby/core/array/union_spec.rb	(revision 60057)
@@ -45,8 +45,8 @@ describe "Array#|" do https://github.com/ruby/ruby/blob/trunk/spec/ruby/core/array/union_spec.rb#L45
 
     obj1 = mock('1')
     obj2 = mock('2')
-    obj1.should_receive(:hash).at_least(1).and_return(0)
-    obj2.should_receive(:hash).at_least(1).and_return(0)
+    obj1.stub!(:hash).and_return(0)
+    obj2.stub!(:hash).and_return(0)
     obj2.should_receive(:eql?).at_least(1).and_return(true)
 
     ([obj1] | [obj2]).should == [obj1]
@@ -54,8 +54,8 @@ describe "Array#|" do https://github.com/ruby/ruby/blob/trunk/spec/ruby/core/array/union_spec.rb#L54
 
     obj1 = mock('3')
     obj2 = mock('4')
-    obj1.should_receive(:hash).at_least(1).and_return(0)
-    obj2.should_receive(:hash).at_least(1).and_return(0)
+    obj1.stub!(:hash).and_return(0)
+    obj2.stub!(:hash).and_return(0)
     obj2.should_receive(:eql?).at_least(1).and_return(false)
 
     ([obj1] | [obj2]).should == [obj1, obj2]
@@ -74,7 +74,7 @@ describe "Array#|" do https://github.com/ruby/ruby/blob/trunk/spec/ruby/core/array/union_spec.rb#L74
 
   it "properly handles an identical item even when its #eql? isn't reflexive" do
     x = mock('x')
-    x.should_receive(:hash).at_least(1).and_return(42)
+    x.stub!(:hash).and_return(42)
     x.stub!(:eql?).and_return(false) # Stubbed for clarity and latitude in implementation; not actually sent by MRI.
 
     ([x] | [x]).should == [x]
Index: spec/ruby/core/array/minus_spec.rb
===================================================================
--- spec/ruby/core/array/minus_spec.rb	(revision 60056)
+++ spec/ruby/core/array/minus_spec.rb	(revision 60057)
@@ -46,8 +46,8 @@ describe "Array#-" do https://github.com/ruby/ruby/blob/trunk/spec/ruby/core/array/minus_spec.rb#L46
   it "removes an item identified as equivalent via #hash and #eql?" do
     obj1 = mock('1')
     obj2 = mock('2')
-    obj1.should_receive(:hash).at_least(1).and_return(0)
-    obj2.should_receive(:hash).at_least(1).and_return(0)
+    obj1.stub!(:hash).and_return(0)
+    obj2.stub!(:hash).and_return(0)
     obj1.should_receive(:eql?).at_least(1).and_return(true)
 
     ([obj1] - [obj2]).should == []
@@ -57,8 +57,8 @@ describe "Array#-" do https://github.com/ruby/ruby/blob/trunk/spec/ruby/core/array/minus_spec.rb#L57
   it "doesn't remove an item with the same hash but not #eql?" do
     obj1 = mock('1')
     obj2 = mock('2')
-    obj1.should_receive(:hash).at_least(1).and_return(0)
-    obj2.should_receive(:hash).at_least(1).and_return(0)
+    obj1.stub!(:hash).and_return(0)
+    obj2.stub!(:hash).and_return(0)
     obj1.should_receive(:eql?).at_least(1).and_return(false)
 
     ([obj1] - [obj2]).should == [obj1]
@@ -67,7 +67,7 @@ describe "Array#-" do https://github.com/ruby/ruby/blob/trunk/spec/ruby/core/array/minus_spec.rb#L67
 
   it "removes an identical item even when its #eql? isn't reflexive" do
     x = mock('x')
-    x.should_receive(:hash).at_least(1).and_return(42)
+    x.stub!(:hash).and_return(42)
     x.stub!(:eql?).and_return(false) # Stubbed for clarity and latitude in implementation; not actually sent by MRI.
 
     ([x] - [x]).should == []
Index: spec/ruby/core/array/intersection_spec.rb
===================================================================
--- spec/ruby/core/array/intersection_spec.rb	(revision 60056)
+++ spec/ruby/core/array/intersection_spec.rb	(revision 60057)
@@ -49,17 +49,18 @@ describe "Array#&" do https://github.com/ruby/ruby/blob/trunk/spec/ruby/core/array/intersection_spec.rb#L49
 
     obj1 = mock('1')
     obj2 = mock('2')
-    obj1.should_receive(:hash).at_least(1).and_return(0)
-    obj2.should_receive(:hash).at_least(1).and_return(0)
+    obj1.stub!(:hash).and_return(0)
+    obj2.stub!(:hash).and_return(0)
     obj1.should_receive(:eql?).at_least(1).and_return(true)
+    obj2.should_receive(:eql?).at_least(1).and_return(true)
 
     ([obj1] & [obj2]).should == [obj1]
     ([obj1, obj1, obj2, obj2] & [obj2]).should == [obj1]
 
     obj1 = mock('3')
     obj2 = mock('4')
-    obj1.should_receive(:hash).at_least(1).and_return(0)
-    obj2.should_receive(:hash).at_least(1).and_return(0)
+    obj1.stub!(:hash).and_return(0)
+    obj2.stub!(:hash).and_return(0)
     obj1.should_receive(:eql?).at_least(1).and_return(false)
 
     ([obj1] & [obj2]).should == []
@@ -78,7 +79,7 @@ describe "Array#&" do https://github.com/ruby/ruby/blob/trunk/spec/ruby/core/array/intersection_spec.rb#L79
 
   it "properly handles an identical item even when its #eql? isn't reflexive" do
     x = mock('x')
-    x.should_receive(:hash).at_least(1).and_return(42)
+    x.stub!(:hash).and_return(42)
     x.stub!(:eql?).and_return(false) # Stubbed for clarity and latitude in implementation; not actually sent by MRI.
 
     ([x] & [x]).should == [x]

--
ML: ruby-changes@q...
Info: http://www.atdot.net/~ko1/quickml/