[前][次][番号順一覧][スレッド一覧]

ruby-changes:44919

From: nobu <ko1@a...>
Date: Tue, 6 Dec 2016 13:43:54 +0900 (JST)
Subject: [ruby-changes:44919] nobu:r56992 (trunk): switching hash removal

nobu	2016-12-06 13:43:48 +0900 (Tue, 06 Dec 2016)

  New Revision: 56992

  https://svn.ruby-lang.org/cgi-bin/viewvc.cgi?view=revision&revision=56992

  Log:
    switching hash removal
    
    * st.h (struct st_hash_type): Remove strong_hash.
      (struct st_table): Remove inside_rebuild_p and curr_hash.
    * st.c (do_hash): Use type->hash instead of curr_hash.
      (make_tab_empty): Remove setting up curr_hash.
      (st_init_table_with_size): Remove setting up inside_rebuild_p.
      (rebuild_table): Remove clearing inside_rebuild_p.
      (reset_entry_hashes, HIT_THRESHOULD_FOR_STRONG_HASH): Remove code
      recognizing a denial attack and switching to strong hash.
    * hash.c (rb_dbl_long_hash, rb_objid_hash, rb_ident_hash): Use
      rb_hash_start to randomize the hash.
      (str_seed): Remove.
      (any_hash): Remove strong_p and use always rb_str_hash for
      strings.
      (any_hash_weak, rb_any_hash_weak): Remove.
      (st_hash_type objhash): Remove rb_any_hash_weak.
    
    based on the patch by Vladimir N Makarov <vmakarov@r...> at
    [ruby-core:78490].  [Bug #13002]
    
    * test/ruby/test_hash.rb (test_wrapper): objects other than special
      constants should be able to be wrapped.

  Modified files:
    trunk/hash.c
    trunk/include/ruby/st.h
    trunk/st.c
    trunk/test/ruby/test_hash.rb
    trunk/test/ruby/test_string.rb
Index: test/ruby/test_string.rb
===================================================================
--- test/ruby/test_string.rb	(revision 56991)
+++ test/ruby/test_string.rb	(revision 56992)
@@ -979,18 +979,6 @@ CODE https://github.com/ruby/ruby/blob/trunk/test/ruby/test_string.rb#L979
     assert_not_equal(S("sub-setter").hash, S("discover").hash, bug9172)
   end
 
-  def test_hash_random
-    str = 'abc'
-    a = [str.hash.to_s]
-    3.times {
-      assert_in_out_err(["-e", "print #{str.dump}.hash"], "") do |r, e|
-        a += r
-        assert_equal([], e)
-      end
-    }
-    assert_not_equal([str.hash.to_s], a.uniq)
-  end
-
   def test_hex
     assert_equal(255,  S("0xff").hex)
     assert_equal(-255, S("-0xff").hex)
Index: test/ruby/test_hash.rb
===================================================================
--- test/ruby/test_hash.rb	(revision 56991)
+++ test/ruby/test_hash.rb	(revision 56992)
@@ -1302,7 +1302,7 @@ class TestHash < Test::Unit::TestCase https://github.com/ruby/ruby/blob/trunk/test/ruby/test_hash.rb#L1302
     assert_no_memory_leak([], prepare, code, bug9187)
   end
 
-  def test_wrapper_of_special_const
+  def test_wrapper
     bug9381 = '[ruby-core:59638] [Bug #9381]'
 
     wrapper = Class.new do
@@ -1323,6 +1323,7 @@ class TestHash < Test::Unit::TestCase https://github.com/ruby/ruby/blob/trunk/test/ruby/test_hash.rb#L1323
       5, true, false, nil,
       0.0, 1.72723e-77,
       :foo, "dsym_#{self.object_id.to_s(16)}_#{Time.now.to_i.to_s(16)}".to_sym,
+      "str",
     ].select do |x|
       hash = {x => bug9381}
       hash[wrapper.new(x)] != bug9381
@@ -1330,6 +1331,44 @@ class TestHash < Test::Unit::TestCase https://github.com/ruby/ruby/blob/trunk/test/ruby/test_hash.rb#L1331
     assert_empty(bad, bug9381)
   end
 
+  def assert_hash_random(obj, dump = obj.inspect)
+    a = [obj.hash.to_s]
+    3.times {
+      assert_in_out_err(["-e", "print #{dump}.hash"], "") do |r, e|
+        a += r
+        assert_equal([], e)
+      end
+    }
+    assert_not_equal([obj.hash.to_s], a.uniq)
+    assert_operator(a.uniq.size, :>, 2, proc {a.inspect})
+  end
+
+  def test_string_hash_random
+    assert_hash_random('abc')
+  end
+
+  def test_symbol_hash_random
+    assert_hash_random(:-)
+    assert_hash_random(:foo)
+    assert_hash_random("dsym_#{self.object_id.to_s(16)}_#{Time.now.to_i.to_s(16)}".to_sym)
+  end
+
+  def test_integer_hash_random
+    assert_hash_random(0)
+    assert_hash_random(+1)
+    assert_hash_random(-1)
+    assert_hash_random(+(1<<100))
+    assert_hash_random(-(1<<100))
+  end
+
+  def test_float_hash_random
+    assert_hash_random(0.0)
+    assert_hash_random(+1.0)
+    assert_hash_random(-1.0)
+    assert_hash_random(1.72723e-77)
+    assert_hash_random(Float::INFINITY, "Float::INFINITY")
+  end
+
   def test_label_syntax
     return unless @cls == Hash
 
Index: include/ruby/st.h
===================================================================
--- include/ruby/st.h	(revision 56991)
+++ include/ruby/st.h	(revision 56992)
@@ -61,11 +61,6 @@ typedef char st_check_for_sizeof_st_inde https://github.com/ruby/ruby/blob/trunk/include/ruby/st.h#L61
 struct st_hash_type {
     int (*compare)(ANYARGS /*st_data_t, st_data_t*/); /* st_compare_func* */
     st_index_t (*hash)(ANYARGS /*st_data_t*/);        /* st_hash_func* */
-    /* The following is an optional func for stronger hash.  When we
-       have many different keys with the same hash we can switch to
-       use it to prevent a denial attack with usage of hash table
-       collisions. */
-    st_index_t (*strong_hash)(ANYARGS /*st_data_t*/);
 };
 
 #if defined(HAVE_BUILTIN___BUILTIN_CHOOSE_EXPR) && defined(HAVE_BUILTIN___BUILTIN_TYPES_COMPATIBLE_P)
@@ -82,12 +77,8 @@ struct st_table_entry; /* defined in st. https://github.com/ruby/ruby/blob/trunk/include/ruby/st.h#L77
 struct st_table {
     /* Cached features of the table -- see st.c for more details.  */
     unsigned char entry_power, bin_power, size_ind;
-    /* True when we are rebuilding the table.  */
-    unsigned char inside_rebuild_p;
     /* How many times the table was rebuilt.  */
     unsigned int rebuilds_num;
-    /* Currently used hash function.  */
-    st_index_t (*curr_hash)(ANYARGS /*st_data_t*/);
     const struct st_hash_type *type;
     /* Number of entries currently in the table.  */
     st_index_t num_entries;
Index: hash.c
===================================================================
--- hash.c	(revision 56991)
+++ hash.c	(revision 56992)
@@ -157,19 +157,13 @@ rb_dbl_long_hash(double d) https://github.com/ruby/ruby/blob/trunk/hash.c#L157
 	union {double d; uint64_t i;} u;
 
 	u.d = d;
-	return rb_objid_hash(u.i);
+	return rb_objid_hash(rb_hash_start(u.i));
     }
 #endif
 }
 
-#if SIZEOF_INT == SIZEOF_VOIDP
-static const st_index_t str_seed = 0xfa835867;
-#else
-static const st_index_t str_seed = 0xc42b5e2e6480b23bULL;
-#endif
-
 static inline st_index_t
-any_hash_general(VALUE a, int strong_p, st_index_t (*other_func)(VALUE))
+any_hash(VALUE a, st_index_t (*other_func)(VALUE))
 {
     VALUE hval;
     st_index_t hnum;
@@ -177,6 +171,7 @@ any_hash_general(VALUE a, int strong_p, https://github.com/ruby/ruby/blob/trunk/hash.c#L171
     if (SPECIAL_CONST_P(a)) {
 	if (STATIC_SYM_P(a)) {
 	    hnum = a >> (RUBY_SPECIAL_SHIFT + ID_SCOPE_SHIFT);
+	    hnum = rb_hash_start(hnum);
 	    goto out;
 	}
 	else if (FLONUM_P(a)) {
@@ -186,9 +181,7 @@ any_hash_general(VALUE a, int strong_p, https://github.com/ruby/ruby/blob/trunk/hash.c#L181
 	hnum = rb_objid_hash((st_index_t)a);
     }
     else if (BUILTIN_TYPE(a) == T_STRING) {
-	hnum = (strong_p
-		? rb_str_hash(a)
-		: st_hash(RSTRING_PTR(a), RSTRING_LEN(a), str_seed));
+	hnum = rb_str_hash(a);
     }
     else if (BUILTIN_TYPE(a) == T_SYMBOL) {
 	hnum = RSYMBOL(a)->hashval;
@@ -216,24 +209,6 @@ obj_any_hash(VALUE obj) https://github.com/ruby/ruby/blob/trunk/hash.c#L209
     return FIX2LONG(obj);
 }
 
-static inline st_index_t
-any_hash_weak(VALUE a, st_index_t (*other_func)(VALUE))
-{
-    return any_hash_general(a, FALSE, other_func);
-}
-
-static st_index_t
-rb_any_hash_weak(VALUE a)
-{
-    return any_hash_weak(a, obj_any_hash);
-}
-
-static inline st_index_t
-any_hash(VALUE a, st_index_t (*other_func)(VALUE))
-{
-    return any_hash_general(a, TRUE, other_func);
-}
-
 static st_index_t
 rb_any_hash(VALUE a)
 {
@@ -275,7 +250,7 @@ key64_hash(uint64_t key, uint32_t seed) https://github.com/ruby/ruby/blob/trunk/hash.c#L250
 long
 rb_objid_hash(st_index_t index)
 {
-    return (long)key64_hash(index, (uint32_t)prime2);
+    return (long)key64_hash(rb_hash_start(index), (uint32_t)prime2);
 }
 
 static st_index_t
@@ -299,7 +274,6 @@ rb_hash_iter_lev(VALUE h) https://github.com/ruby/ruby/blob/trunk/hash.c#L274
 
 static const struct st_hash_type objhash = {
     rb_any_cmp,
-    rb_any_hash_weak,
     rb_any_hash,
 };
 
@@ -319,7 +293,7 @@ rb_ident_hash(st_data_t n) https://github.com/ruby/ruby/blob/trunk/hash.c#L293
     }
 #endif
 
-    return (st_index_t) key64_hash((st_index_t)n, (uint32_t) prime2);
+    return (st_index_t)key64_hash(rb_hash_start((st_index_t)n), (uint32_t)prime2);
 }
 
 static const struct st_hash_type identhash = {
Index: st.c
===================================================================
--- st.c	(revision 56991)
+++ st.c	(revision 56992)
@@ -461,7 +461,6 @@ initialize_bins(st_table *tab) https://github.com/ruby/ruby/blob/trunk/st.c#L461
 static void
 make_tab_empty(st_table *tab)
 {
-    tab->curr_hash = tab->type->hash;
     tab->num_entries = 0;
     tab->entries_start = tab->entries_bound = 0;
     if (tab->bins != NULL)
@@ -575,7 +574,6 @@ st_init_table_with_size(const struct st_ https://github.com/ruby/ruby/blob/trunk/st.c#L574
     tab->entry_power = n;
     tab->bin_power = features[n].bin_power;
     tab->size_ind = features[n].size_ind;
-    tab->inside_rebuild_p = FALSE;
     if (n <= MAX_POWER2_FOR_TABLES_WITHOUT_BINS)
         tab->bins = NULL;
     else
@@ -744,7 +742,6 @@ rebuild_table(st_table *tab) https://github.com/ruby/ruby/blob/trunk/st.c#L742
     st_assert(tab != NULL);
     bound = tab->entries_bound;
     entries = tab->entries;
-    tab->inside_rebuild_p = TRUE;
     if ((2 * tab->num_entries <= get_allocated_entries(tab)
 	 && REBUILD_THRESHOLD * tab->num_entries > get_allocated_entries(tab))
 	|| tab->num_entries < (1 << MINIMAL_POWER2)) {
@@ -758,8 +755,6 @@ rebuild_table(st_table *tab) https://github.com/ruby/ruby/blob/trunk/st.c#L755
     else {
         new_tab = st_init_table_with_size(tab->type,
 					  2 * tab->num_entries - 1);
-	st_assert(new_tab->curr_hash == new_tab->type->hash);
-	new_tab->curr_hash = tab->curr_hash;
 	new_entries = new_tab->entries;
     }
     ni = 0;
@@ -798,7 +793,6 @@ rebuild_table(st_table *tab) https://github.com/ruby/ruby/blob/trunk/st.c#L793
     tab->entries_start = 0;
     tab->entries_bound = tab->num_entries;
     tab->rebuilds_num++;
-    tab->inside_rebuild_p = FALSE;
 #ifdef ST_DEBUG
     st_check(tab);
 #endif
@@ -966,28 +960,6 @@ find_table_bin_ind_direct(st_table *tab, https://github.com/ruby/ruby/blob/trunk/st.c#L960
     }
 }
 
-/* Recalculate hashes of entries in table TAB.  */
-static void
-reset_entry_hashes (st_table *tab)
-{
-    st_index_t i, bound;
-    st_table_entry *entries, *curr_entry_ptr;
-
-    bound = tab->entries_bound;
-    entries = tab->entries;
-    for (i = tab->entries_start; i < bound; i++) {
-	curr_entry_ptr = &entries[i];
-	if (! DELETED_ENTRY_P(curr_entry_ptr))
-	    curr_entry_ptr->hash = do_hash(curr_entry_ptr->key, tab);
-    }
-}
-
-/* If we have the following number of collisions with different keys
-   but with the same hash during finding a bin for new entry
-   inclusions, possibly a denial attack is going on.  Start to use a
-   stronger hash.  */
-#define HIT_THRESHOULD_FOR_STRONG_HASH 10
-
 /* Return index of table TAB bin for HASH_VALUE and KEY through
    BIN_IND and the pointed value as the function result.  Reserve the
    bin for inclusion of the corresponding entry into the table if it
@@ -1009,12 +981,10 @@ find_table_bin_ptr_and_reserve(st_table https://github.com/ruby/ruby/blob/trunk/st.c#L981
     st_index_t entry_index;
     st_index_t first_deleted_bin_ind;
     st_table_entry *entries;
-    int hit;
 
     st_assert(tab != NULL && tab->bins != NULL
 	      && tab->entries_bound <= get_allocated_entries(tab)
 	      && tab->entries_start <= tab->entries_bound);
-  repeat:
     ind = hash_bin(curr_hash_value, tab);
 #ifdef QUADRATIC_PROBE
     d = 1;
@@ -1024,7 +994,6 @@ find_table_bin_ptr_and_reserve(st_table https://github.com/ruby/ruby/blob/trunk/st.c#L994
     FOUND_BIN;
     first_deleted_bin_ind = UNDEFINED_BIN_IND;
     entries = tab->entries;
-    hit = 0;
     for (;;) {
         entry_index = get_bin(tab->bins, get_size_ind(tab), ind);
         if (EMPTY_BIN_P(entry_index)) {
@@ -1039,19 +1008,6 @@ find_table_bin_ptr_and_reserve(st_table https://github.com/ruby/ruby/blob/trunk/st.c#L1008
         } else if (! DELETED_BIN_P(entry_index)) {
             if (PTR_EQUAL(tab, &entries[entry_index - ENTRY_BASE], curr_hash_value, key))
                 break;
-	    if (curr_hash_value == entries[entry_index - ENTRY_BASE].hash) {
-	        hit++;
-		if (hit > HIT_THRESHOULD_FOR_STRONG_HASH
-		    && tab->curr_hash != tab->type->strong_hash
-		    && tab->type->strong_hash != NULL
-		    && ! tab->inside_rebuild_p) {
-		    tab->curr_hash = tab->type->strong_hash;
-		    *hash_value = curr_hash_value = do_hash(key, tab);
-		    reset_entry_hashes(tab);
-		    rebuild_table(tab);
-		    goto repeat;
-		}
-	    }
         } else if (first_deleted_bin_ind == UNDEFINED_BIN_IND)
             first_deleted_bin_ind = ind;
 #ifdef QUADRATIC_PROBE

--
ML: ruby-changes@q...
Info: http://www.atdot.net/~ko1/quickml/

[前][次][番号順一覧][スレッド一覧]