ruby-changes:25598
From: mame <ko1@a...>
Date: Thu, 15 Nov 2012 00:59:58 +0900 (JST)
Subject: [ruby-changes:25598] mame:r37655 (trunk): * array.c (rb_ary_bsearch): add Array#bsearch for binary search.
mame 2012-11-15 00:53:50 +0900 (Thu, 15 Nov 2012) New Revision: 37655 http://svn.ruby-lang.org/cgi-bin/viewvc.cgi?view=rev&revision=37655 Log: * array.c (rb_ary_bsearch): add Array#bsearch for binary search. [ruby-core:36390] [Feature #4766] * test/ruby/test_array.rb: add a test for above. * range.c (range_bsearch): add Range#bsearch for binary search. [ruby-core:36390] [Feature #4766] * test/ruby/test_range.rb: add a test for above * NEWS: added the two new methods. Modified files: trunk/ChangeLog trunk/NEWS trunk/array.c trunk/range.c trunk/test/ruby/test_array.rb trunk/test/ruby/test_range.rb Index: array.c =================================================================== --- array.c (revision 37654) +++ array.c (revision 37655) @@ -2373,8 +2373,105 @@ return ary; } +/* + * call-seq: + * ary.bsearch {|x| block } -> elem + * + * By using binary search, finds a value from this array which meets + * the given condition in O(n log n) where n is the size of the array. + * + * You can use this method in two use cases: a find-minimum mode and + * a find-any mode. In either case, the elements of the array must be + * monotone (or sorted) with respect to the block. + * + * In find-minimum mode (this is a good choice for typical use case), + * the block must return true or false, and there must be an index i + * (0 <= i <= ary.size) so that: + * + * - the block returns false for any element whose index is less than + * i, and + * - the block returns true for any element whose index is greater + * than or equal to i. + * + * This method returns the i-th element. If i is equal to ary.size, + * it returns nil. + * + * ary = [0, 4, 7, 10, 12] + * ary.bsearch {|x| x >= 4 } #=> 4 + * ary.bsearch {|x| x >= 6 } #=> 7 + * ary.bsearch {|x| x >= -1 } #=> 0 + * ary.bsearch {|x| x >= 100 } #=> nil + * + * In find-any mode (this behaves like libc's bsearch(3)), the block + * must return a number, and there must be two indices i and j + * (0 <= i <= j <= ary.size) so that: + * + * - the block returns a positive number for ary[k] if 0 <= k < i, + * - the block returns zero for ary[k] if i <= k < j, and + * - the block returns a negative number for ary[k] if + * j <= k < ary.size. + * + * Under this condition, this method returns any element whose index + * is within i...j. If i is equal to j (i.e., there is no element + * that satisfies the block), this method returns nil. + * + * ary = [0, 4, 7, 10, 12] + * # try to find v such that 4 <= v < 8 + * ary.bsearch {|x| 1 - x / 4 } #=> 4 or 7 + * # try to find v such that 8 <= v < 10 + * ary.bsearch {|x| 4 - x / 2 } #=> nil + * + * You must not mix the two modes at a time; the block must always + * return either true/false, or always return a number. It is + * undefined which value is actually picked up at each iteration. + */ static VALUE +rb_ary_bsearch(VALUE ary) +{ + long low = 0, high = RARRAY_LEN(ary), mid; + int smaller = 0, satisfied = 0; + VALUE v, val; + + while (low < high) { + mid = low + ((high - low) / 2); + val = rb_ary_entry(ary, mid); + v = rb_yield(val); + if (FIXNUM_P(v)) { + if (FIX2INT(v) == 0) return val; + smaller = FIX2INT(v) < 0; + } + else if (v == Qtrue) { + satisfied = 1; + smaller = 1; + } + else if (v == Qfalse || v == Qnil) { + smaller = 0; + } + else if (rb_obj_is_kind_of(v, rb_cNumeric)) { + switch (rb_cmpint(rb_funcall(v, id_cmp, 1, INT2FIX(0)), v, INT2FIX(0))) { + case 0: return val; + case 1: smaller = 1; + case -1: smaller = 0; + } + } + else { + smaller = RTEST(v); + } + if (smaller) { + high = mid; + } + else { + low = mid + 1; + } + } + if (low == RARRAY_LEN(ary)) return Qnil; + if (!satisfied) return Qnil; + return rb_ary_entry(ary, low); +} + + +static VALUE sort_by_i(VALUE i) { return rb_yield(i); @@ -5399,6 +5496,7 @@ rb_define_method(rb_cArray, "take_while", rb_ary_take_while, 0); rb_define_method(rb_cArray, "drop", rb_ary_drop, 1); rb_define_method(rb_cArray, "drop_while", rb_ary_drop_while, 0); + rb_define_method(rb_cArray, "bsearch", rb_ary_bsearch, 0); id_cmp = rb_intern("<=>"); sym_random = ID2SYM(rb_intern("random")); Index: ChangeLog =================================================================== --- ChangeLog (revision 37654) +++ ChangeLog (revision 37655) @@ -1,3 +1,17 @@ +Thu Nov 15 00:47:20 2012 Yusuke Endoh <mame@t...> + + * array.c (rb_ary_bsearch): add Array#bsearch for binary search. + [ruby-core:36390] [Feature #4766] + + * test/ruby/test_array.rb: add a test for above. + + * range.c (range_bsearch): add Range#bsearch for binary search. + [ruby-core:36390] [Feature #4766] + + * test/ruby/test_range.rb: add a test for above + + * NEWS: added the two new methods. + Wed Nov 14 13:25:00 2012 Zachary Scott <zachary@z...> * lib/fileutils.rb (chmod): Add "X" to modes, convert format to table Index: range.c =================================================================== --- range.c (revision 37654) +++ range.c (revision 37655) @@ -13,6 +13,11 @@ #include "ruby/encoding.h" #include "internal.h" +#ifdef HAVE_FLOAT_H +#include <float.h> +#endif +#include <math.h> + VALUE rb_cRange; static ID id_cmp, id_succ, id_beg, id_end, id_excl; @@ -465,7 +470,292 @@ return range; } +/* + * call-seq: + * rng.bsearch {|obj| block } -> value + * + * By using binary search, finds a value in range which meets the given + * condition in O(n log n) where n is the size of the array. + * + * You can use this method in two use cases: a find-minimum mode and + * a find-any mode. In either case, the elements of the array must be + * monotone (or sorted) with respect to the block. + * + * In find-minimum mode (this is a good choice for typical use case), + * the block must return true or false, and there must be a value x + * so that: + * + * - the block returns false for any value which is less than x, and + * - the block returns true for any value which is greater than or + * equal to i. + * + * If x is within the range, this method returns the value x. + * Otherwise, it returns nil. + * + * ary = [0, 4, 7, 10, 12] + * (0...ary.size).bsearch {|i| ary[i] >= 4 } #=> 1 + * (0...ary.size).bsearch {|i| ary[i] >= 6 } #=> 2 + * (0...ary.size).bsearch {|i| ary[i] >= 8 } #=> 3 + * (0...ary.size).bsearch {|i| ary[i] >= 100 } #=> nil + * + * (0.0...Float::INFINITY).bsearch {|x| Math.log(x) >= 0 } #=> 1.0 + * + * In find-any mode (this behaves like libc's bsearch(3)), the block + * must return a number, and there must be two values x and y (x <= y) + * so that: + * + * - the block returns a positive number for v if v < x, + * - the block returns zero for v if x <= v < y, and + * - the block returns a negative number for v if y <= v. + * + * This method returns any value which is within the intersection of + * the given range and x...y (if any). If there is no value that + * satisfies the condition, it returns nil. + * + * ary = [0, 100, 100, 100, 200] + * (0..4).bsearch {|i| 100 - i } #=> 1, 2 or 3 + * (0..4).bsearch {|i| 300 - i } #=> nil + * (0..4).bsearch {|i| 50 - i } #=> nil + * + * You must not mix the two modes at a time; the block must always + * return either true/false, or always return a number. It is + * undefined which value is actually picked up at each iteration. + */ + static VALUE +range_bsearch(VALUE range) +{ + VALUE beg, end; + int smaller, satisfied = 0; + +#define BSEARCH_CHECK(val) \ + do { \ + VALUE v = rb_yield(val); \ + if (FIXNUM_P(v)) { \ + if (FIX2INT(v) == 0) return val; \ + smaller = FIX2INT(v) < 0; \ + } \ + else if (v == Qtrue) { \ + satisfied = 1; \ + smaller = 1; \ + } \ + else if (v == Qfalse || v == Qnil) { \ + smaller = 0; \ + } \ + else if (rb_obj_is_kind_of(v, rb_cNumeric)) { \ + switch (rb_cmpint(rb_funcall(v, id_cmp, 1, INT2FIX(0)), v, INT2FIX(0)) < 0) { \ + case 0: return val; \ + case 1: smaller = 1; \ + case -1: smaller = 0; \ + } \ + } \ + else { \ + smaller = RTEST(v); \ + } \ + } while (0) + + beg = RANGE_BEG(range); + end = RANGE_END(range); + + if (FIXNUM_P(beg) && FIXNUM_P(end)) { + long low = FIX2LONG(beg); + long high = FIX2LONG(end); + long mid, org_high; + if (EXCL(range)) high--; + org_high = high; + + while (low < high) { + mid = low + ((high - low) / 2); + BSEARCH_CHECK(INT2FIX(mid)); + if (smaller) { + high = mid; + } + else { + low = mid + 1; + } + } + if (low == org_high) { + BSEARCH_CHECK(INT2FIX(low)); + if (!smaller) return Qnil; + } + if (!satisfied) return Qnil; + return INT2FIX(low); + } + else if (TYPE(beg) == T_FLOAT || TYPE(end) == T_FLOAT) { + double low = RFLOAT_VALUE(rb_Float(beg)); + double high = RFLOAT_VALUE(rb_Float(end)); + double mid, org_high; + int count; +#ifdef FLT_RADIX +#ifdef DBL_MANT_DIG +#define BSEARCH_MAXCOUNT (((FLT_RADIX) - 1) * (DBL_MANT_DIG + DBL_MAX_EXP) + 100) +#else +#define BSEARCH_MAXCOUNT (53 + 1023 + 100) +#endif +#else +#define BSEARCH_MAXCOUNT (53 + 1023 + 100) +#endif + if (isinf(high) && high > 0) { + /* the range is (low..INFINITY) */ + double nhigh = 1.0, inc; + if (nhigh < low) nhigh = low; + count = BSEARCH_MAXCOUNT; + /* find upper bound by checking low, low*2, low*4, ... */ + while (count >= 0 && !isinf(nhigh)) { + BSEARCH_CHECK(DBL2NUM(nhigh)); + if (smaller) break; + high = nhigh; + nhigh *= 2; + count--; + } + if (isinf(nhigh) || count < 0) { + /* upper bound not found; then, search in very near INFINITY */ + /* (x..INFINITY where x is not INFINITY but x*2 is INFINITY) */ + inc = high / 2; + count = BSEARCH_MAXCOUNT; + while (count >= 0 && inc > 0) { + nhigh = high + inc; + if (!isinf(nhigh)) { + BSEARCH_CHECK(DBL2NUM(nhigh)); + if (smaller) { + /* upper bound found; */ + /* the desired value is within high..nhigh */ + low = high; + high = nhigh; + goto binsearch; + } + else { + high = nhigh; + } + } + inc /= 2; + count--; + } + /* lower bound not found; */ + /* there is no candidate except INFINITY itself */ + high *= 2; /* generate INFINITY */ + if (isinf(high) && !EXCL(range)) { + BSEARCH_CHECK(DBL2NUM(high)); + if (!satisfied) return Qnil; + if (smaller) return DBL2NUM(high); + } + return Qnil; + } + /* upper bound found; the desired value is within low..nhigh */ + high = nhigh; + } + if (isinf(low) && low < 0) { + /* the range is (-INFINITY..high) */ + double nlow = -1.0, dec; + if (nlow > high) nlow = high; + count = BSEARCH_MAXCOUNT; + /* find lower bound by checking low, low*2, low*4, ... */ + while (count >= 0 && !isinf(nlow)) { + BSEARCH_CHECK(DBL2NUM(nlow)); + if (!smaller) break; + low = nlow; + nlow *= 2; + count--; + } + if (isinf(nlow) || count < 0) { + /* lower bound not found; then, search in very near -INFINITY */ + /* (-INFINITY..x where x is not -INFINITY but x*2 is -INFINITY) */ + dec = low / 2; + count = BSEARCH_MAXCOUNT; + while (count >= 0 && dec < 0) { + nlow = low + dec; + if (!isinf(nlow)) { + BSEARCH_CHECK(DBL2NUM(nlow)); + if (!smaller) { + /* lower bound found; */ + /* the desired value is within nlow..low */ + high = low; + low = nlow; + goto binsearch; + } + else { + low = nlow; + } + } + dec /= 2; + count--; + } + /* lower bound not found; */ + /* there is no candidate except -INFINITY itself */ + nlow = low * 2; /* generate -INFINITY */ + if (isinf(nlow)) { + BSEARCH_CHECK(DBL2NUM(nlow)); + if (!satisfied) return Qnil; + if (smaller) return DBL2NUM(nlow); + } + if (!satisfied) return Qnil; + return DBL2NUM(low); + } + low = nlow; + } + + binsearch: + /* find the desired value within low..high */ + /* where low is not -INFINITY and high is not INFINITY */ + org_high = high; + count = BSEARCH_MAXCOUNT; + while (low < high && count >= 0) { + mid = low + ((high - low) / 2); + BSEARCH_CHECK(DBL2NUM(mid)); + if (smaller) { + high = mid; + } + else { + low = mid; + } + count--; + } + BSEARCH_CHECK(DBL2NUM(low)); + if (!smaller) { + BSEARCH_CHECK(DBL2NUM(high)); + if (!smaller) { + return Qnil; + } + low = high; + } + if (!satisfied) return Qnil; + if (EXCL(range) && low >= org_high) return Qnil; + return DBL2NUM(low); +#undef BSEARCH_MAXCOUNT + } + else if (!NIL_P(rb_check_to_integer(beg, "to_int")) && + !NIL_P(rb_check_to_integer(end, "to_int"))) { + VALUE low = beg; + VALUE high = end; + VALUE mid, org_high; + if (EXCL(range)) high = rb_funcall(high, '-', 1, INT2FIX(1)); + org_high = high; + + while (rb_cmpint(rb_funcall(low, id_cmp, 1, high), low, high) < 0) { + mid = rb_funcall(rb_funcall(high, '+', 1, low), '/', 1, INT2FIX(2)); + BSEARCH_CHECK(mid); + if (smaller) { + high = mid; + } + else { + low = rb_funcall(mid, '+', 1, INT2FIX(1)); + } + } + if (rb_equal(low, org_high)) { + BSEARCH_CHECK(low); + if (!smaller) return Qnil; + } + if (!satisfied) return Qnil; + return low; + } + else { + rb_raise(rb_eTypeError, "can't do binary search for %s", rb_obj_classname(beg)); + } + return range; + +} + +static VALUE each_i(VALUE v, void *arg) { rb_yield(v); @@ -1112,6 +1402,7 @@ rb_define_method(rb_cRange, "hash", range_hash, 0); rb_define_method(rb_cRange, "each", range_each, 0); rb_define_method(rb_cRange, "step", range_step, -1); + rb_define_method(rb_cRange, "bsearch", range_bsearch, 0); rb_define_method(rb_cRange, "begin", range_begin, 0); rb_define_method(rb_cRange, "end", range_end, 0); rb_define_method(rb_cRange, "first", range_first, -1); Index: NEWS =================================================================== --- NEWS (revision 37654) +++ NEWS (revision 37655) @@ -19,6 +19,8 @@ * builtin classes * Array + * added method: + * added Array#bsearch for binary search. * incompatible changes: * random parameter of Array#shuffle! and Array#sample now will be called with one argument, maximum value. @@ -98,6 +100,7 @@ * Range * added method: * added Range#size for lazy size evaluation. + * added Range#bsearch for binary search. * Signal * incompatible changes: Index: test/ruby/test_array.rb =================================================================== --- test/ruby/test_array.rb (revision 37654) +++ test/ruby/test_array.rb (revision 37655) @@ -2240,4 +2240,31 @@ a = [1,2,3] assert_raise(ArgumentError) { a.rotate!(1, 1) } end + + def test_bsearch_in_find_minimum_mode + a = [0, 4, 7, 10, 12] + assert_equal(4, a.bsearch {|x| x >= 4 }) + assert_equal(7, a.bsearch {|x| x >= 6 }) + assert_equal(0, a.bsearch {|x| x >= -1 }) + assert_equal(nil, a.bsearch {|x| x >= 100 }) + end + + def test_bsearch_in_find_any_mode + a = [0, 4, 7, 10, 12] + assert_include([4, 7], a.bsearch {|x| 1 - x / 4 }) + assert_equal(nil, a.bsearch {|x| 4 - x / 2 }) + assert_equal(nil, a.bsearch {|x| 1 }) + assert_equal(nil, a.bsearch {|x| -1 }) + + assert_include([4, 7], a.bsearch {|x| (1 - x / 4) * (2**100) }) + assert_equal(nil, a.bsearch {|x| 1 * (2**100) }) + assert_equal(nil, a.bsearch {|x| (-1) * (2**100) }) + + assert_include([4, 7], a.bsearch {|x| (2**100).coerce((1 - x / 4) * (2**100)).first }) + end + + def test_bsearch_undefined + a = [0, 4, 7, 10, 12] + assert_equal(nil, a.bsearch {|x| "foo" }) # undefined behavior + end end Index: test/ruby/test_range.rb =================================================================== --- test/ruby/test_range.rb (revision 37654) +++ test/ruby/test_range.rb (revision 37655) @@ -355,4 +355,82 @@ assert_equal 5, (1.1...6).size assert_equal 42, (1..42).each.size end + + def test_bsearch_for_fixnum + ary = [3, 4, 7, 9, 12] + assert_equal(0, (0...ary.size).bsearch {|i| ary[i] >= 2 }) + assert_equal(1, (0...ary.size).bsearch {|i| ary[i] >= 4 }) + assert_equal(2, (0...ary.size).bsearch {|i| ary[i] >= 6 }) + assert_equal(3, (0...ary.size).bsearch {|i| ary[i] >= 8 }) + assert_equal(4, (0...ary.size).bsearch {|i| ary[i] >= 10 }) + assert_equal(nil, (0...ary.size).bsearch {|i| ary[i] >= 100 }) + assert_equal(0, (0...ary.size).bsearch {|i| true }) + assert_equal(nil, (0...ary.size).bsearch {|i| false }) + + ary = [0, 100, 100, 100, 200] + assert_equal(1, (0...ary.size).bsearch {|i| ary[i] >= 100 }) + end + + def test_bsearch_for_float + inf = Float::INFINITY + assert_in_delta(10.0, (0.0...100.0).bsearch {|x| x > 0 && Math.log(x / 10) >= 0 }, 0.0001) + assert_in_delta(10.0, (0.0...inf).bsearch {|x| x > 0 && Math.log(x / 10) >= 0 }, 0.0001) + assert_in_delta(-10.0, (-inf..100.0).bsearch {|x| x >= 0 || Math.log(-x / 10) < 0 }, 0.0001) + assert_in_delta(10.0, (-inf..inf).bsearch {|x| x > 0 && Math.log(x / 10) >= 0 }, 0.0001) + assert_equal(nil, (-inf..5).bsearch {|x| x > 0 && Math.log(x / 10) >= 0 }, 0.0001) + + assert_in_delta(10.0, (-inf.. 10).bsearch {|x| x > 0 && Math.log(x / 10) >= 0 }, 0.0001) + assert_equal(nil, (-inf...10).bsearch {|x| x > 0 && Math.log(x / 10) >= 0 }, 0.0001) + + assert_equal(nil, (-inf..inf).bsearch { false }) + assert_equal(-inf, (-inf..inf).bsearch { true }) + + assert_equal(inf, (0..inf).bsearch {|x| x == inf }) + assert_equal(nil, (0...inf).bsearch {|x| x == inf }) + + v = (-inf..0).bsearch {|x| x != -inf } + assert_operator(-Float::MAX, :>=, v) + assert_operator(-inf, :<, v) + + v = (0.0..1.0).bsearch {|x| x > 0 } # the nearest positive value to 0.0 + assert_in_delta(0, v, 0.0001) + assert_operator(0, :<, v) + assert_equal(0.0, (-1.0..0.0).bsearch {|x| x >= 0 }) + assert_equal(nil, (-1.0...0.0).bsearch {|x| x >= 0 }) + + v = (0..Float::MAX).bsearch {|x| x >= Float::MAX } + assert_in_delta(Float::MAX, v) + assert_equal(nil, v.infinite?) + + v = (0..inf).bsearch {|x| x >= Float::MAX } + assert_in_delta(Float::MAX, v) + assert_equal(nil, v.infinite?) + + v = (-Float::MAX..0).bsearch {|x| x > -Float::MAX } + assert_operator(-Float::MAX, :<, v) + assert_equal(nil, v.infinite?) + + v = (-inf..0).bsearch {|x| x >= -Float::MAX } + assert_in_delta(-Float::MAX, v) + assert_equal(nil, v.infinite?) + + v = (-inf..0).bsearch {|x| x > -Float::MAX } + assert_operator(-Float::MAX, :<, v) + assert_equal(nil, v.infinite?) + end + + def test_bsearch_for_bignum + bignum = 2**100 + ary = [3, 4, 7, 9, 12] + assert_equal(bignum + 0, (bignum...bignum+ary.size).bsearch {|i| ary[i - bignum] >= 2 }) + assert_equal(bignum + 1, (bignum...bignum+ary.size).bsearch {|i| ary[i - bignum] >= 4 }) + assert_equal(bignum + 2, (bignum...bignum+ary.size).bsearch {|i| ary[i - bignum] >= 6 }) + assert_equal(bignum + 3, (bignum...bignum+ary.size).bsearch {|i| ary[i - bignum] >= 8 }) + assert_equal(bignum + 4, (bignum...bignum+ary.size).bsearch {|i| ary[i - bignum] >= 10 }) + assert_equal(nil, (bignum...bignum+ary.size).bsearch {|i| ary[i - bignum] >= 100 }) + assert_equal(bignum + 0, (bignum...bignum+ary.size).bsearch {|i| true }) + assert_equal(nil, (bignum...bignum+ary.size).bsearch {|i| false }) + + assert_raise(TypeError) { ("a".."z").bsearch {} } + end end -- ML: ruby-changes@q... Info: http://www.atdot.net/~ko1/quickml/