ruby-changes:39501
From: normal <ko1@a...>
Date: Sat, 15 Aug 2015 04:52:14 +0900 (JST)
Subject: [ruby-changes:39501] normal:r51582 (trunk): hash.c: improve integer/fixnum hashing
normal 2015-08-15 04:52:06 +0900 (Sat, 15 Aug 2015) New Revision: 51582 http://svn.ruby-lang.org/cgi-bin/viewvc.cgi?view=revision&revision=51582 Log: hash.c: improve integer/fixnum hashing The low bits of Ruby object IDs are rarely populated in the current implementation, so ensure the get used. Early versions of this patch redundantly shifted static symbols in any_hash, causing regresions with static symbols in hash_aref_sym * hash.c (any_hash): skip rb_objid_hash for static syms (rb_num_hash_start): extract from rb_ident_hash (rb_objid_hash): call rb_num_hash_start (rb_ident_hash): ditto [ruby-core:70181] [Feature #11405] target 0: a (ruby 2.3.0dev (2015-07-30 trunk 51437) [x86_64-linux] target 1: b (ruby 2.3.0dev (2015-07-30 patch 51437) [x86_64-linux] benchmark results from Xeon E3-1230 v3 @ 3.30GHz (turbo disabled): minimum results in each 10 measurements. Execution time (sec) name a b hash_aref_dsym 0.316 0.300 hash_aref_dsym_long 5.106 5.063 hash_aref_fix 0.304 0.297 hash_aref_flo 0.061 0.060 hash_aref_miss 0.433 0.430 hash_aref_str 0.408 0.396 hash_aref_sym 0.312 0.306 hash_aref_sym_long 0.482 0.469 hash_flatten 0.385 0.273 hash_ident_flo 0.036 0.037 hash_ident_num 0.277 0.276 hash_ident_obj 0.291 0.284 hash_ident_str 0.289 0.286 hash_ident_sym 0.285 0.281 hash_keys 0.269 0.271 hash_shift 0.020 0.016 hash_values 0.264 0.264 loop_whileloop2 0.101 0.099 vm2_bighash* 3.066 2.972 Speedup ratio: compare with the result of `a' (greater is better) name b hash_aref_dsym 1.052 hash_aref_dsym_long 1.008 hash_aref_fix 1.024 hash_aref_flo 1.015 hash_aref_miss 1.007 hash_aref_str 1.031 hash_aref_sym 1.018 hash_aref_sym_long 1.027 hash_flatten 1.410 hash_ident_flo 0.994 hash_ident_num 1.001 hash_ident_obj 1.022 hash_ident_str 1.012 hash_ident_sym 1.016 hash_keys 0.992 hash_shift 1.237 hash_values 1.001 loop_whileloop2 1.013 vm2_bighash* 1.032 Modified files: trunk/ChangeLog trunk/hash.c Index: ChangeLog =================================================================== --- ChangeLog (revision 51581) +++ ChangeLog (revision 51582) @@ -1,3 +1,11 @@ https://github.com/ruby/ruby/blob/trunk/ChangeLog#L1 +Sat Aug 15 04:33:39 2015 Eric Wong <e@8...> + + * hash.c (any_hash): skip rb_objid_hash for static syms + (rb_num_hash_start): extract from rb_ident_hash + (rb_objid_hash): call rb_num_hash_start + (rb_ident_hash): ditto + [ruby-core:70181] [Feature #11405] + Sat Aug 15 04:16:13 2015 Eric Wong <e@8...> * iseq.c (rb_iseq_mark): reduce NULL checks Index: hash.c =================================================================== --- hash.c (revision 51581) +++ hash.c (revision 51582) @@ -138,7 +138,8 @@ any_hash(VALUE a, st_index_t (*other_fun https://github.com/ruby/ruby/blob/trunk/hash.c#L138 if (SPECIAL_CONST_P(a)) { if (a == Qundef) return 0; if (STATIC_SYM_P(a)) { - a >>= (RUBY_SPECIAL_SHIFT + ID_SCOPE_SHIFT); + hnum = a >> (RUBY_SPECIAL_SHIFT + ID_SCOPE_SHIFT); + goto out; } else if (FLONUM_P(a)) { /* prevent pathological behavior: [Bug #10761] */ @@ -160,6 +161,7 @@ any_hash(VALUE a, st_index_t (*other_fun https://github.com/ruby/ruby/blob/trunk/hash.c#L161 else { hnum = other_func(a); } + out: hnum <<= 1; return (st_index_t)RSHIFT(hnum, 1); } @@ -177,10 +179,31 @@ rb_any_hash(VALUE a) https://github.com/ruby/ruby/blob/trunk/hash.c#L179 return any_hash(a, obj_any_hash); } +static st_index_t +rb_num_hash_start(st_index_t n) +{ + /* + * This hash function is lightly-tuned for Ruby. Further tuning + * should be possible. Notes: + * + * - (n >> 3) alone is great for heap objects and OK for fixnum, + * however symbols perform poorly. + * - (n >> (RUBY_SPECIAL_SHIFT+3)) was added to make symbols hash well, + * n.b.: +3 to remove most ID scope, +1 worked well initially, too + * n.b.: +1 (instead of 3) worked well initially, too + * - (n << 3) was finally added to avoid losing bits for fixnums + * - avoid expensive modulo instructions, it is currently only + * shifts and bitmask operations. + */ + return (n >> (RUBY_SPECIAL_SHIFT + 3) | (n << 3)) ^ (n >> 3); +} + long rb_objid_hash(st_index_t index) { - st_index_t hnum = rb_hash_start(index); + st_index_t hnum = rb_num_hash_start(index); + + hnum = rb_hash_start(hnum); hnum = rb_hash_uint(hnum, (st_index_t)rb_any_hash); hnum = rb_hash_end(hnum); return hnum; @@ -215,28 +238,18 @@ static const struct st_hash_type objhash https://github.com/ruby/ruby/blob/trunk/hash.c#L238 static st_index_t rb_ident_hash(st_data_t n) { +#ifdef USE_FLONUM /* RUBY */ /* - * This hash function is lightly-tuned for Ruby. Further tuning - * should be possible. Notes: - * - * - (n >> 3) alone is great for heap objects and OK for fixnum, - * however symbols perform poorly. - * - (n >> (RUBY_SPECIAL_SHIFT+3)) was added to make symbols hash well, - * n.b.: +3 to remove ID scope, +1 worked well initially, too - * - (n << 3) was finally added to avoid losing bits for fixnums - * - avoid expensive modulo instructions, it is currently only - * shifts and bitmask operations. * - flonum (on 64-bit) is pathologically bad, mix the actual * float value in, but do not use the float value as-is since * many integers get interpreted as 2.0 or -2.0 [Bug #10761] */ -#ifdef USE_FLONUM /* RUBY */ if (FLONUM_P(n)) { n ^= (st_data_t)rb_float_value(n); } #endif - return (st_index_t)((n>>(RUBY_SPECIAL_SHIFT+3)|(n<<3)) ^ (n>>3)); + return (st_index_t)rb_num_hash_start((st_index_t)n); } static const struct st_hash_type identhash = { -- ML: ruby-changes@q... Info: http://www.atdot.net/~ko1/quickml/