ruby-changes:47942
From: nobu <ko1@a...>
Date: Fri, 29 Sep 2017 16:43:27 +0900 (JST)
Subject: [ruby-changes:47942] nobu:r60057 (trunk): array.c: improve operations on small arrays
nobu 2017-09-29 16:43:22 +0900 (Fri, 29 Sep 2017) New Revision: 60057 https://svn.ruby-lang.org/cgi-bin/viewvc.cgi?view=revision&revision=60057 Log: array.c: improve operations on small arrays [Feature #13884] Reduce number of memory allocations for "and", "or" and "diff" operations on small arrays Very often, arrays are used to filter parameters and to select interesting items from 2 collections and very often these collections are small enough, for example: ```ruby SAFE_COLUMNS = [:id, :title, :created_at] def columns @all_columns & SAFE_COLUMNS end ``` In this patch, I got rid of unnecessary memory allocations for small arrays when "and", "or" and "diff" operations are performed. name | HEAD | PATCH -----------------+------:+------: array_small_and | 0.615 | 0.263 array_small_diff | 0.676 | 0.282 array_small_or | 0.953 | 0.463 name | PATCH -----------------+------: array_small_and | 2.343 array_small_diff | 2.392 array_small_or | 2.056 name | HEAD | PATCH -----------------+------:+------: array_small_and | 1.429 | 1.005 array_small_diff | 1.493 | 0.878 array_small_or | 1.672 | 1.152 name | PATCH -----------------+------: array_small_and | 1.422 array_small_diff | 1.700 array_small_or | 1.452 Author: Dmitry Bochkarev <dimabochkarev@g...> Added files: trunk/benchmark/bm_array_small_and.rb trunk/benchmark/bm_array_small_diff.rb trunk/benchmark/bm_array_small_or.rb Modified files: trunk/array.c trunk/spec/ruby/core/array/intersection_spec.rb trunk/spec/ruby/core/array/minus_spec.rb trunk/spec/ruby/core/array/union_spec.rb trunk/test/ruby/test_array.rb Index: benchmark/bm_array_small_or.rb =================================================================== --- benchmark/bm_array_small_or.rb (nonexistent) +++ benchmark/bm_array_small_or.rb (revision 60057) @@ -0,0 +1,17 @@ https://github.com/ruby/ruby/blob/trunk/benchmark/bm_array_small_or.rb#L1 +MIN_SIZE = ENV.fetch('SMALL_ARRAY_MIN', 0).to_i +MAX_SIZE = ENV.fetch('SMALL_ARRAY_MAX', 16).to_i +ITERATIONS = ENV.fetch('SMALL_ARRAY_ITERATIONS', 100).to_i + +ARRAYS = (MIN_SIZE..MAX_SIZE).map do |size1| + (MIN_SIZE..MAX_SIZE).map do |size2| + [Array.new(size1) { rand(MAX_SIZE) }, Array.new(size2) { rand(MAX_SIZE) }] + end +end + +ITERATIONS.times do + ARRAYS.each do |group| + group.each do |arr1, arr2| + arr1 | arr2 + end + end +end Property changes on: benchmark/bm_array_small_or.rb ___________________________________________________________________ Added: svn:eol-style ## -0,0 +1 ## +LF \ No newline at end of property Index: benchmark/bm_array_small_and.rb =================================================================== --- benchmark/bm_array_small_and.rb (nonexistent) +++ benchmark/bm_array_small_and.rb (revision 60057) @@ -0,0 +1,17 @@ https://github.com/ruby/ruby/blob/trunk/benchmark/bm_array_small_and.rb#L1 +MIN_SIZE = ENV.fetch('SMALL_ARRAY_MIN', 0).to_i +MAX_SIZE = ENV.fetch('SMALL_ARRAY_MAX', 16).to_i +ITERATIONS = ENV.fetch('SMALL_ARRAY_ITERATIONS', 100).to_i + +ARRAYS = (MIN_SIZE..MAX_SIZE).map do |size1| + (MIN_SIZE..MAX_SIZE).map do |size2| + [Array.new(size1) { rand(MAX_SIZE) }, Array.new(size2) { rand(MAX_SIZE) }] + end +end + +ITERATIONS.times do + ARRAYS.each do |group| + group.each do |arr1, arr2| + arr1 & arr2 + end + end +end Property changes on: benchmark/bm_array_small_and.rb ___________________________________________________________________ Added: svn:eol-style ## -0,0 +1 ## +LF \ No newline at end of property Index: benchmark/bm_array_small_diff.rb =================================================================== --- benchmark/bm_array_small_diff.rb (nonexistent) +++ benchmark/bm_array_small_diff.rb (revision 60057) @@ -0,0 +1,17 @@ https://github.com/ruby/ruby/blob/trunk/benchmark/bm_array_small_diff.rb#L1 +MIN_SIZE = ENV.fetch('SMALL_ARRAY_MIN', 0).to_i +MAX_SIZE = ENV.fetch('SMALL_ARRAY_MAX', 16).to_i +ITERATIONS = ENV.fetch('SMALL_ARRAY_ITERATIONS', 100).to_i + +ARRAYS = (MIN_SIZE..MAX_SIZE).map do |size1| + (MIN_SIZE..MAX_SIZE).map do |size2| + [Array.new(size1) { rand(MAX_SIZE) }, Array.new(size2) { rand(MAX_SIZE) }] + end +end + +ITERATIONS.times do + ARRAYS.each do |group| + group.each do |arr1, arr2| + arr1 - arr2 + end + end +end Property changes on: benchmark/bm_array_small_diff.rb ___________________________________________________________________ Added: svn:eol-style ## -0,0 +1 ## +LF \ No newline at end of property Index: test/ruby/test_array.rb =================================================================== --- test/ruby/test_array.rb (revision 60056) +++ test/ruby/test_array.rb (revision 60057) @@ -224,6 +224,13 @@ class TestArray < Test::Unit::TestCase https://github.com/ruby/ruby/blob/trunk/test/ruby/test_array.rb#L224 assert_equal(@cls[], @cls[ 1, 2, 3 ] & @cls[ 4, 5, 6 ]) end + def test_AND_big_array # '&' + assert_equal(@cls[1, 3], @cls[ 1, 1, 3, 5 ]*64 & @cls[ 1, 2, 3 ]*64) + assert_equal(@cls[], @cls[ 1, 1, 3, 5 ]*64 & @cls[ ]) + assert_equal(@cls[], @cls[ ] & @cls[ 1, 2, 3 ]*64) + assert_equal(@cls[], @cls[ 1, 2, 3 ]*64 & @cls[ 4, 5, 6 ]*64) + end + def test_MUL # '*' assert_equal(@cls[], @cls[]*3) assert_equal(@cls[1, 1, 1], @cls[1]*3) @@ -260,6 +267,18 @@ class TestArray < Test::Unit::TestCase https://github.com/ruby/ruby/blob/trunk/test/ruby/test_array.rb#L267 assert_equal(@cls[1, 2, 3], @cls[1, 2, 3] - @cls[4, 5, 6]) end + def test_MINUS_big_array # '-' + assert_equal(@cls[1]*64, @cls[1, 2, 3, 4, 5]*64 - @cls[2, 3, 4, 5]*64) + # Ruby 1.8 feature change + #assert_equal(@cls[1], @cls[1, 2, 1, 3, 1, 4, 1, 5]*64 - @cls[2, 3, 4, 5]*64) + assert_equal(@cls[1, 1, 1, 1]*64, @cls[1, 2, 1, 3, 1, 4, 1, 5]*64 - @cls[2, 3, 4, 5]*64) + a = @cls[] + 1000.times { a << 1 } + assert_equal(1000, a.length) + #assert_equal(@cls[1], a - @cls[2]) + assert_equal(@cls[1] * 1000, a - @cls[2]) + end + def test_LSHIFT # '<<' a = @cls[] a << 1 @@ -1837,6 +1856,31 @@ class TestArray < Test::Unit::TestCase https://github.com/ruby/ruby/blob/trunk/test/ruby/test_array.rb#L1856 assert_equal([obj1], [obj1]|[obj2]) end + def test_OR_big_in_order + obj1, obj2 = Class.new do + attr_reader :name + def initialize(name) @name = name; end + def inspect; "test_OR_in_order(#{@name})"; end + def hash; 0; end + def eql?(a) true; end + break [new("1"), new("2")] + end + assert_equal([obj1], [obj1]*64|[obj2]*64) + end + + def test_OR_big_array # '|' + assert_equal(@cls[1,2], @cls[1]*64 | @cls[2]*64) + assert_equal(@cls[1,2], @cls[1, 2]*64 | @cls[1, 2]*64) + + a = (1..64).to_a + b = (1..128).to_a + c = a | b + assert_equal(c, b) + assert_not_same(c, b) + assert_equal((1..64).to_a, a) + assert_equal((1..128).to_a, b) + end + def test_combination a = @cls[] assert_equal(1, a.combination(0).size) Index: array.c =================================================================== --- array.c (revision 60056) +++ array.c (revision 60057) @@ -30,6 +30,7 @@ VALUE rb_cArray; https://github.com/ruby/ruby/blob/trunk/array.c#L30 #define ARY_DEFAULT_SIZE 16 #define ARY_MAX_SIZE (LONG_MAX / (int)sizeof(VALUE)) +#define SMALL_ARRAY_LEN 16 # define ARY_SHARED_P(ary) \ (assert(!FL_TEST((ary), ELTS_SHARED) || !FL_TEST((ary), RARRAY_EMBED_FLAG)), \ @@ -3985,6 +3986,20 @@ rb_ary_includes(VALUE ary, VALUE item) https://github.com/ruby/ruby/blob/trunk/array.c#L3986 return Qfalse; } +static VALUE +rb_ary_includes_by_eql(VALUE ary, VALUE item) +{ + long i; + VALUE e; + + for (i=0; i<RARRAY_LEN(ary); i++) { + e = RARRAY_AREF(ary, i); + if (rb_eql(item, e)) { + return Qtrue; + } + } + return Qfalse; +} static VALUE recursive_cmp(VALUE ary1, VALUE ary2, int recur) @@ -4135,9 +4150,19 @@ rb_ary_diff(VALUE ary1, VALUE ary2) https://github.com/ruby/ruby/blob/trunk/array.c#L4150 VALUE hash; long i; - hash = ary_make_hash(to_ary(ary2)); + ary2 = to_ary(ary2); ary3 = rb_ary_new(); + if (RARRAY_LEN(ary2) <= SMALL_ARRAY_LEN) { + for (i=0; i<RARRAY_LEN(ary1); i++) { + VALUE elt = rb_ary_elt(ary1, i); + if (rb_ary_includes_by_eql(ary2, elt)) continue; + rb_ary_push(ary3, elt); + } + return ary3; + } + + hash = ary_make_hash(ary2); for (i=0; i<RARRAY_LEN(ary1); i++) { if (st_lookup(rb_hash_tbl_raw(hash), RARRAY_AREF(ary1, i), 0)) continue; rb_ary_push(ary3, rb_ary_elt(ary1, i)); @@ -4173,6 +4198,17 @@ rb_ary_and(VALUE ary1, VALUE ary2) https://github.com/ruby/ruby/blob/trunk/array.c#L4198 ary2 = to_ary(ary2); ary3 = rb_ary_new(); if (RARRAY_LEN(ary2) == 0) return ary3; + + if (RARRAY_LEN(ary1) <= SMALL_ARRAY_LEN && RARRAY_LEN(ary2) <= SMALL_ARRAY_LEN) { + for (i=0; i<RARRAY_LEN(ary1); i++) { + v = RARRAY_AREF(ary1, i); + if (!rb_ary_includes_by_eql(ary2, v)) continue; + if (rb_ary_includes_by_eql(ary3, v)) continue; + rb_ary_push(ary3, v); + } + return ary3; + } + hash = ary_make_hash(ary2); table = rb_hash_tbl_raw(hash); @@ -4218,8 +4254,22 @@ rb_ary_or(VALUE ary1, VALUE ary2) https://github.com/ruby/ruby/blob/trunk/array.c#L4254 long i; ary2 = to_ary(ary2); - hash = ary_make_hash(ary1); + if (RARRAY_LEN(ary1) + RARRAY_LEN(ary2) <= SMALL_ARRAY_LEN) { + ary3 = rb_ary_new(); + for (i=0; i<RARRAY_LEN(ary1); i++) { + VALUE elt = rb_ary_elt(ary1, i); + if (rb_ary_includes_by_eql(ary3, elt)) continue; + rb_ary_push(ary3, elt); + } + for (i=0; i<RARRAY_LEN(ary2); i++) { + VALUE elt = rb_ary_elt(ary2, i); + if (rb_ary_includes_by_eql(ary3, elt)) continue; + rb_ary_push(ary3, elt); + } + return ary3; + } + hash = ary_make_hash(ary1); for (i=0; i<RARRAY_LEN(ary2); i++) { VALUE elt = RARRAY_AREF(ary2, i); if (!st_update(RHASH_TBL_RAW(hash), (st_data_t)elt, ary_hash_orset, (st_data_t)elt)) { Index: spec/ruby/core/array/union_spec.rb =================================================================== --- spec/ruby/core/array/union_spec.rb (revision 60056) +++ spec/ruby/core/array/union_spec.rb (revision 60057) @@ -45,8 +45,8 @@ describe "Array#|" do https://github.com/ruby/ruby/blob/trunk/spec/ruby/core/array/union_spec.rb#L45 obj1 = mock('1') obj2 = mock('2') - obj1.should_receive(:hash).at_least(1).and_return(0) - obj2.should_receive(:hash).at_least(1).and_return(0) + obj1.stub!(:hash).and_return(0) + obj2.stub!(:hash).and_return(0) obj2.should_receive(:eql?).at_least(1).and_return(true) ([obj1] | [obj2]).should == [obj1] @@ -54,8 +54,8 @@ describe "Array#|" do https://github.com/ruby/ruby/blob/trunk/spec/ruby/core/array/union_spec.rb#L54 obj1 = mock('3') obj2 = mock('4') - obj1.should_receive(:hash).at_least(1).and_return(0) - obj2.should_receive(:hash).at_least(1).and_return(0) + obj1.stub!(:hash).and_return(0) + obj2.stub!(:hash).and_return(0) obj2.should_receive(:eql?).at_least(1).and_return(false) ([obj1] | [obj2]).should == [obj1, obj2] @@ -74,7 +74,7 @@ describe "Array#|" do https://github.com/ruby/ruby/blob/trunk/spec/ruby/core/array/union_spec.rb#L74 it "properly handles an identical item even when its #eql? isn't reflexive" do x = mock('x') - x.should_receive(:hash).at_least(1).and_return(42) + x.stub!(:hash).and_return(42) x.stub!(:eql?).and_return(false) # Stubbed for clarity and latitude in implementation; not actually sent by MRI. ([x] | [x]).should == [x] Index: spec/ruby/core/array/minus_spec.rb =================================================================== --- spec/ruby/core/array/minus_spec.rb (revision 60056) +++ spec/ruby/core/array/minus_spec.rb (revision 60057) @@ -46,8 +46,8 @@ describe "Array#-" do https://github.com/ruby/ruby/blob/trunk/spec/ruby/core/array/minus_spec.rb#L46 it "removes an item identified as equivalent via #hash and #eql?" do obj1 = mock('1') obj2 = mock('2') - obj1.should_receive(:hash).at_least(1).and_return(0) - obj2.should_receive(:hash).at_least(1).and_return(0) + obj1.stub!(:hash).and_return(0) + obj2.stub!(:hash).and_return(0) obj1.should_receive(:eql?).at_least(1).and_return(true) ([obj1] - [obj2]).should == [] @@ -57,8 +57,8 @@ describe "Array#-" do https://github.com/ruby/ruby/blob/trunk/spec/ruby/core/array/minus_spec.rb#L57 it "doesn't remove an item with the same hash but not #eql?" do obj1 = mock('1') obj2 = mock('2') - obj1.should_receive(:hash).at_least(1).and_return(0) - obj2.should_receive(:hash).at_least(1).and_return(0) + obj1.stub!(:hash).and_return(0) + obj2.stub!(:hash).and_return(0) obj1.should_receive(:eql?).at_least(1).and_return(false) ([obj1] - [obj2]).should == [obj1] @@ -67,7 +67,7 @@ describe "Array#-" do https://github.com/ruby/ruby/blob/trunk/spec/ruby/core/array/minus_spec.rb#L67 it "removes an identical item even when its #eql? isn't reflexive" do x = mock('x') - x.should_receive(:hash).at_least(1).and_return(42) + x.stub!(:hash).and_return(42) x.stub!(:eql?).and_return(false) # Stubbed for clarity and latitude in implementation; not actually sent by MRI. ([x] - [x]).should == [] Index: spec/ruby/core/array/intersection_spec.rb =================================================================== --- spec/ruby/core/array/intersection_spec.rb (revision 60056) +++ spec/ruby/core/array/intersection_spec.rb (revision 60057) @@ -49,17 +49,18 @@ describe "Array#&" do https://github.com/ruby/ruby/blob/trunk/spec/ruby/core/array/intersection_spec.rb#L49 obj1 = mock('1') obj2 = mock('2') - obj1.should_receive(:hash).at_least(1).and_return(0) - obj2.should_receive(:hash).at_least(1).and_return(0) + obj1.stub!(:hash).and_return(0) + obj2.stub!(:hash).and_return(0) obj1.should_receive(:eql?).at_least(1).and_return(true) + obj2.should_receive(:eql?).at_least(1).and_return(true) ([obj1] & [obj2]).should == [obj1] ([obj1, obj1, obj2, obj2] & [obj2]).should == [obj1] obj1 = mock('3') obj2 = mock('4') - obj1.should_receive(:hash).at_least(1).and_return(0) - obj2.should_receive(:hash).at_least(1).and_return(0) + obj1.stub!(:hash).and_return(0) + obj2.stub!(:hash).and_return(0) obj1.should_receive(:eql?).at_least(1).and_return(false) ([obj1] & [obj2]).should == [] @@ -78,7 +79,7 @@ describe "Array#&" do https://github.com/ruby/ruby/blob/trunk/spec/ruby/core/array/intersection_spec.rb#L79 it "properly handles an identical item even when its #eql? isn't reflexive" do x = mock('x') - x.should_receive(:hash).at_least(1).and_return(42) + x.stub!(:hash).and_return(42) x.stub!(:eql?).and_return(false) # Stubbed for clarity and latitude in implementation; not actually sent by MRI. ([x] & [x]).should == [x] -- ML: ruby-changes@q... Info: http://www.atdot.net/~ko1/quickml/