ruby-changes:44577

ko1	2016-11-07 09:45:00 +0900 (Mon, 07 Nov 2016)

  New Revision: 56650

  https://svn.ruby-lang.org/cgi-bin/viewvc.cgi?view=revision&revision=56650

  Log:
    Introduce table improvement by Vladimir Makarov <vmakarov@r...>.
    [Feature #12142]
    See header of st.c for improvment details.
    
    You can see all of code history here:
    <https://github.com/vnmakarov/ruby/tree/hash_tables_with_open_addressing>
    
    This improvement is discussed at
    <https://bugs.ruby-lang.org/issues/12142>
    with many people, especially with Yura Sokolov.
    
    * st.c: improve st_table.
    
    * include/ruby/st.h: ditto.
    
    * internal.h, numeric.c, hash.c (rb_dbl_long_hash): extract a function.
    
    * ext/-test-/st/foreach/foreach.c: catch up this change.

  Modified files:
    trunk/NEWS
    trunk/ext/-test-/st/foreach/foreach.c
    trunk/hash.c
    trunk/include/ruby/st.h
    trunk/internal.h
    trunk/numeric.c
    trunk/st.c
Index: numeric.c
===================================================================
--- numeric.c	(revision 56649)
+++ numeric.c	(revision 56650)
@@ -1421,12 +1421,7 @@ flo_hash(VALUE num) https://github.com/ruby/ruby/blob/trunk/numeric.c#L1421
 VALUE
 rb_dbl_hash(double d)
 {
-    st_index_t hash;
-
-    /* normalize -0.0 to 0.0 */
-    if (d == 0.0) d = 0.0;
-    hash = rb_memhash(&d, sizeof(d));
-    return ST2FIX(hash);
+    return LONG2FIX(rb_dbl_long_hash (d));
 }
 
 VALUE
Index: hash.c
===================================================================
--- hash.c	(revision 56649)
+++ hash.c	(revision 56650)
@@ -145,14 +145,36 @@ rb_hash(VALUE obj) https://github.com/ruby/ruby/blob/trunk/hash.c#L145
 
 long rb_objid_hash(st_index_t index);
 
-static st_index_t
-any_hash(VALUE a, st_index_t (*other_func)(VALUE))
+long
+rb_dbl_long_hash(double d)
+{
+    /* normalize -0.0 to 0.0 */
+    if (d == 0.0) d = 0.0;
+#if SIZEOF_INT == SIZEOF_VOIDP
+    return rb_memhash(&d, sizeof(d));
+#else
+    {
+	union {double d; uint64_t i;} u;
+
+	u.d = d;
+	return rb_objid_hash(u.i);
+    }
+#endif
+}
+
+#if SIZEOF_INT == SIZEOF_VOIDP
+static const st_index_t str_seed = 0xfa835867;
+#else
+static const st_index_t str_seed = 0xc42b5e2e6480b23bULL;
+#endif
+
+static inline st_index_t
+any_hash_general(VALUE a, int strong_p, st_index_t (*other_func)(VALUE))
 {
     VALUE hval;
     st_index_t hnum;
 
     if (SPECIAL_CONST_P(a)) {
-	if (a == Qundef) return 0;
 	if (STATIC_SYM_P(a)) {
 	    hnum = a >> (RUBY_SPECIAL_SHIFT + ID_SCOPE_SHIFT);
 	    goto out;
@@ -164,7 +186,9 @@ any_hash(VALUE a, st_index_t (*other_fun https://github.com/ruby/ruby/blob/trunk/hash.c#L186
 	hnum = rb_objid_hash((st_index_t)a);
     }
     else if (BUILTIN_TYPE(a) == T_STRING) {
-	hnum = rb_str_hash(a);
+	hnum = (strong_p
+		? rb_str_hash(a)
+		: st_hash(RSTRING_PTR(a), RSTRING_LEN(a), str_seed));
     }
     else if (BUILTIN_TYPE(a) == T_SYMBOL) {
 	hnum = RSYMBOL(a)->hashval;
@@ -175,8 +199,7 @@ any_hash(VALUE a, st_index_t (*other_fun https://github.com/ruby/ruby/blob/trunk/hash.c#L199
     }
     else if (BUILTIN_TYPE(a) == T_FLOAT) {
       flt:
-	hval = rb_dbl_hash(rb_float_value(a));
-	hnum = FIX2LONG(hval);
+	hnum = rb_dbl_long_hash(rb_float_value(a));
     }
     else {
 	hnum = other_func(a);
@@ -193,40 +216,62 @@ obj_any_hash(VALUE obj) https://github.com/ruby/ruby/blob/trunk/hash.c#L216
     return FIX2LONG(obj);
 }
 
+static inline st_index_t
+any_hash_weak(VALUE a, st_index_t (*other_func)(VALUE)) {
+    return any_hash_general(a, FALSE, other_func);
+}
+
 static st_index_t
-rb_any_hash(VALUE a)
-{
-    return any_hash(a, obj_any_hash);
+rb_any_hash_weak(VALUE a) {
+    return any_hash_weak(a, obj_any_hash);
+}
+
+static inline st_index_t
+any_hash(VALUE a, st_index_t (*other_func)(VALUE)) {
+    return any_hash_general(a, TRUE, other_func);
 }
 
 static st_index_t
-rb_num_hash_start(st_index_t n)
+rb_any_hash(VALUE a) {
+    return any_hash(a, obj_any_hash);
+}
+
+/* Here is a hash function for 64-bit key.  It is about 5 times faster
+   (2 times faster when uint128 type is absent) on Haswell than
+   tailored Spooky or City hash function can be.  */
+
+/* Here we two primes with random bit generation.  */
+static const uint64_t prime1 = 0x2e0bb864e9ea7df5ULL;
+static const uint64_t prime2 = 0xcdb32970830fcaa1ULL;
+
+
+static inline uint64_t
+mult_and_mix (uint64_t m1, uint64_t m2)
+{
+#if defined(__GNUC__) && UINT_MAX != ULONG_MAX
+    __uint128_t r = (__uint128_t) m1 * (__uint128_t) m2;
+    return (uint64_t) (r >> 64) ^ (uint64_t) r;
+#else
+    uint64_t hm1 = m1 >> 32, hm2 = m2 >> 32;
+    uint64_t lm1 = m1, lm2 = m2;
+    uint64_t v64_128 = hm1 * hm2;
+    uint64_t v32_96 = hm1 * lm2 + lm1 * hm2;
+    uint64_t v1_32 = lm1 * lm2;
+
+    return (v64_128 + (v32_96 >> 32)) ^ ((v32_96 << 32) + v1_32);
+#endif
+}
+
+static inline uint64_t
+key64_hash (uint64_t key, uint32_t seed)
 {
-    /*
-     * This hash function is lightly-tuned for Ruby.  Further tuning
-     * should be possible.  Notes:
-     *
-     * - (n >> 3) alone is great for heap objects and OK for fixnum,
-     *   however symbols perform poorly.
-     * - (n >> (RUBY_SPECIAL_SHIFT+3)) was added to make symbols hash well,
-     *   n.b.: +3 to remove most ID scope, +1 worked well initially, too
-     *   n.b.: +1 (instead of 3) worked well initially, too
-     * - (n << 16) was finally added to avoid losing bits for fixnums
-     * - avoid expensive modulo instructions, it is currently only
-     *   shifts and bitmask operations.
-     */
-    return (n >> (RUBY_SPECIAL_SHIFT + 3) ^ (n << 16)) ^ (n >> 3);
+    return mult_and_mix(key + seed, prime1);
 }
 
 long
 rb_objid_hash(st_index_t index)
 {
-    st_index_t hnum = rb_num_hash_start(index);
-
-    hnum = rb_hash_start(hnum);
-    hnum = rb_hash_uint(hnum, (st_index_t)rb_any_hash);
-    hnum = rb_hash_end(hnum);
-    return hnum;
+    return key64_hash(index, (uint32_t) prime2);
 }
 
 static st_index_t
@@ -250,6 +295,7 @@ rb_hash_iter_lev(VALUE h) https://github.com/ruby/ruby/blob/trunk/hash.c#L295
 
 static const struct st_hash_type objhash = {
     rb_any_cmp,
+    rb_any_hash_weak,
     rb_any_hash,
 };
 
@@ -269,7 +315,7 @@ rb_ident_hash(st_data_t n) https://github.com/ruby/ruby/blob/trunk/hash.c#L315
     }
 #endif
 
-    return (st_index_t)rb_num_hash_start((st_index_t)n);
+    return (st_index_t) key64_hash((st_index_t)n, (uint32_t) prime2);
 }
 
 static const struct st_hash_type identhash = {
Index: ext/-test-/st/foreach/foreach.c
===================================================================
--- ext/-test-/st/foreach/foreach.c	(revision 56649)
+++ ext/-test-/st/foreach/foreach.c	(revision 56650)
@@ -14,13 +14,13 @@ force_unpack_check(struct checker *c, st https://github.com/ruby/ruby/blob/trunk/ext/-test-/st/foreach/foreach.c#L14
     if (c->nr == 0) {
 	st_data_t i;
 
-	if (!c->tbl->entries_packed) rb_bug("should be packed\n");
+	if (c->tbl->bins != NULL) rb_bug("should be packed\n");
 
 	/* force unpacking during iteration: */
 	for (i = 1; i < expect_size; i++)
 	    st_add_direct(c->tbl, i, i);
 
-	if (c->tbl->entries_packed) rb_bug("should be unpacked\n");
+	if (c->tbl->bins == NULL) rb_bug("should be unpacked\n");
     }
 
     if (key != c->nr) {
@@ -84,7 +84,7 @@ unp_fec(VALUE self, VALUE test) https://github.com/ruby/ruby/blob/trunk/ext/-test-/st/foreach/foreach.c#L84
 
     st_add_direct(tbl, 0, 0);
 
-    if (!tbl->entries_packed) rb_bug("should still be packed\n");
+    if (tbl->bins != NULL) rb_bug("should still be packed\n");
 
     st_foreach_check(tbl, unp_fec_i, (st_data_t)&c, -1);
 
@@ -98,7 +98,7 @@ unp_fec(VALUE self, VALUE test) https://github.com/ruby/ruby/blob/trunk/ext/-test-/st/foreach/foreach.c#L98
 		(VALUE)c.nr, (VALUE)expect_size);
     }
 
-    if (tbl->entries_packed) rb_bug("should be unpacked\n");
+    if (tbl->bins == NULL) rb_bug("should be unpacked\n");
 
     st_free_table(tbl);
 
@@ -145,7 +145,7 @@ unp_fe(VALUE self, VALUE test) https://github.com/ruby/ruby/blob/trunk/ext/-test-/st/foreach/foreach.c#L145
 
     st_add_direct(tbl, 0, 0);
 
-    if (!tbl->entries_packed) rb_bug("should still be packed\n");
+    if (tbl->bins != NULL) rb_bug("should still be packed\n");
 
     st_foreach(tbl, unp_fe_i, (st_data_t)&c);
 
@@ -159,7 +159,7 @@ unp_fe(VALUE self, VALUE test) https://github.com/ruby/ruby/blob/trunk/ext/-test-/st/foreach/foreach.c#L159
 		(VALUE)c.nr, (VALUE)expect_size);
     }
 
-    if (tbl->entries_packed) rb_bug("should be unpacked\n");
+    if (tbl->bins == NULL) rb_bug("should be unpacked\n");
 
     st_free_table(tbl);
 
Index: include/ruby/st.h
===================================================================
--- include/ruby/st.h	(revision 56649)
+++ include/ruby/st.h	(revision 56650)
@@ -1,6 +1,8 @@ https://github.com/ruby/ruby/blob/trunk/include/ruby/st.h#L1
-/* This is a public domain general purpose hash table package written by Peter Moore @ UCB. */
+/* This is a public domain general purpose hash table package
+   originally written by Peter Moore @ UCB.
 
-/* @(#) st.h 5.1 89/12/14 */
+   The hash table data strutures were redesigned and the package was
+   rewritten by Vladimir Makarov <vmakarov@r...>.  */
 
 #ifndef RUBY_ST_H
 #define RUBY_ST_H 1
@@ -46,6 +48,10 @@ typedef unsigned LONG_LONG st_data_t; https://github.com/ruby/ruby/blob/trunk/include/ruby/st.h#L48
 typedef struct st_table st_table;
 
 typedef st_data_t st_index_t;
+
+/* Maximal value of unsigned integer type st_index_t.  */
+#define MAX_ST_INDEX_VAL (~(st_index_t) 0)
+
 typedef int st_compare_func(st_data_t, st_data_t);
 typedef st_index_t st_hash_func(st_data_t);
 
@@ -55,10 +61,13 @@ typedef char st_check_for_sizeof_st_inde https://github.com/ruby/ruby/blob/trunk/include/ruby/st.h#L61
 struct st_hash_type {
     int (*compare)(ANYARGS /*st_data_t, st_data_t*/); /* st_compare_func* */
     st_index_t (*hash)(ANYARGS /*st_data_t*/);        /* st_hash_func* */
+    /* The following is an optional func for stronger hash.  When we
+       have many different keys with the same hash we can switch to
+       use it to prevent a denial attack with usage of hash table
+       collisions. */
+    st_index_t (*strong_hash)(ANYARGS /*st_data_t*/);
 };
 
-#define ST_INDEX_BITS (sizeof(st_index_t) * CHAR_BIT)
-
 #if defined(HAVE_BUILTIN___BUILTIN_CHOOSE_EXPR) && defined(HAVE_BUILTIN___BUILTIN_TYPES_COMPATIBLE_P)
 # define ST_DATA_COMPATIBLE_P(type) \
    __builtin_choose_expr(__builtin_types_compatible_p(type, st_data_t), 1, 0)
@@ -66,33 +75,30 @@ struct st_hash_type { https://github.com/ruby/ruby/blob/trunk/include/ruby/st.h#L75
 # define ST_DATA_COMPATIBLE_P(type) 0
 #endif
 
+typedef struct st_table_entry st_table_entry;
+
+struct st_table_entry; /* defined in st.c */
+
 struct st_table {
+    /* Cached features of the table -- see st.c for more details.  */
+    unsigned char entry_power, bin_power, size_ind;
+    /* True when we are rebuilding the table.  */
+    unsigned char inside_rebuild_p;
+    /* How many times the table was rebuilt.  */
+    unsigned int rebuilds_num;
+    /* Currently used hash function.  */
+    st_index_t (*curr_hash)(ANYARGS /*st_data_t*/);
     const struct st_hash_type *type;
-    st_index_t num_bins;
-    unsigned int entries_packed : 1;
-#ifdef __GNUC__
-    /*
-     * C spec says,
-     *   A bit-field shall have a type that is a qualified or unqualified
-     *   version of _Bool, signed int, unsigned int, or some other
-     *   implementation-defined type. It is implementation-defined whether
-     *   atomic types are permitted.
-     * In short, long and long long bit-field are implementation-defined
-     * feature. Therefore we want to suppress a warning explicitly.
-     */
-    __extension__
-#endif
-    st_index_t num_entries : ST_INDEX_BITS - 1;
-    union {
-	struct {
-	    struct st_table_entry **bins;
-	    void *private_list_head[2];
-	} big;
-	struct {
-	    struct st_packed_entry *entries;
-	    st_index_t real_entries;
-	} packed;
-    } as;
+    /* Number of entries currently in the table.  */
+    st_index_t num_entries;
+    /* Array of bins used for access by keys.  */
+    st_index_t *bins;
+    /* Start and bound index of entries in array entries.
+       entries_starts and entries_bound are in interval
+       [0,allocated_entries].  */
+    st_index_t entries_start, entries_bound;
+    /* Array of size 2^entry_power.  */
+    st_table_entry *entries;
 };
 
 #define st_is_member(table,key) st_lookup((table),(key),(st_data_t *)0)
@@ -121,7 +127,6 @@ typedef int st_update_callback_func(st_d https://github.com/ruby/ruby/blob/trunk/include/ruby/st.h#L127
 int st_update(st_table *table, st_data_t key, st_update_callback_func *func, st_data_t arg);
 int st_foreach(st_table *, int (*)(ANYARGS), st_data_t);
 int st_foreach_check(st_table *, int (*)(ANYARGS), st_data_t, st_data_t);
-int st_reverse_foreach(st_table *, int (*)(ANYARGS), st_data_t);
 st_index_t st_keys(st_table *table, st_data_t *keys, st_index_t size);
 st_index_t st_keys_check(st_table *table, st_data_t *keys, st_index_t size, st_data_t never);
 st_index_t st_values(st_table *table, st_data_t *values, st_index_t size);
Index: st.c
===================================================================
--- st.c	(revision 56649)
+++ st.c	(revision 56650)
@@ -1,6 +1,99 @@ https://github.com/ruby/ruby/blob/trunk/st.c#L1
-/* This is a public domain general purpose hash table package written by Peter Moore @ UCB. */
+/* This is a public domain general purpose hash table package
+   originally written by Peter Moore @ UCB.
 
-/* static	char	sccsid[] = "@(#) st.c 5.1 89/12/14 Crucible"; */
+   The hash table data structures were redesigned and the package was
+   rewritten by Vladimir Makarov <vmakarov@r...>.  */
+
+/* The original package implemented classic bucket-based hash tables
+   with entries doubly linked for an access by their insertion order.
+   To decrease pointer chasing and as a consequence to improve a data
+   locality the current implementation is based on storing entries in
+   an array and using hash tables with open addressing.  The current
+   entries are more compact in comparison with the original ones and
+   this also improves the data locality.
+
+   The hash table has two arrays called *bins* and *entries*.
+
+     bins:
+    -------
+   |       |                  entries array:
+   |-------|            --------------------------------
+   | index |           |      | entry:  |        |      |
+   |-------|           |      |         |        |      |
+   | ...   |           | ...  | hash    |  ...   | ...  |
+   |-------|           |      | key     |        |      |
+   | empty |           |      | record  |        |      |
+   |-------|            --------------------------------
+   | ...   |                   ^                  ^
+   |-------|                   |_ entries start   |_ entries bound
+   |deleted|
+    -------
+
+   o The entry array contains table entries in the same order as they
+     were inserted.
+
+     When the first entry is deleted, a variable containing index of
+     the current first entry (*entries start*) is changed.  In all
+     other cases of the deletion, we just mark the entry as deleted by
+     using a reserved hash value.
+
+     Such organization of the entry storage makes operations of the
+     table shift and the entries traversal very fast.
+
+   o The bins provide access to the entries by their keys.  The
+     key hash is mapped to a bin containing *index* of the
+     corresponding entry in the entry array.
+
+     The bin array size is always power of two, it makes mapping very
+     fast by using the corresponding lower bits of the hash.
+     Generally it is not a good idea to ignore some part of the hash.
+     But alternative approach is worse.  For example, we could use a
+     modulo operation for mapping and a prime number for the size of
+     the bin array.  Unfortunately, the modulo operation for big
+     64-bit numbers are extremely slow (it takes more than 100 cycles
+     on modern Intel CPUs).
+
+     Still other bits of the hash value are used when the mapping
+     results in a collision.  In this case we use a secondary hash
+     value which is a result of a function of the collision bin
+     index and the original hash value.  The function choice
+     guarantees that we can traverse all bins and finally find the
+     corresponding bin as after several iterations the function
+     becomes a full cycle linear congruential generator because it
+     satisfies requirements of the Hull-Dobell theorem.
+
+     When an entry is removed from the table besides marking the
+     hash in the corresponding entry described above, we also mark
+     the bin by a special value in order to find entries which had
+     a collision with the removed entries.
+
+     There are two reserved values for the bins.  One denotes an
+     empty bin, another one denotes a bin for a deleted entry.
+
+   o The length of the bin array is at least two times more than the
+     entry array length.  This keeps the table load factor healthy.
+     The trigger of rebuilding the table is always a case when we can
+     not insert an entry anymore at the entries bound.  We could
+     change the entries bound too in case of deletion but than we need
+     a special code to count bins with corresponding deleted entries
+     and reset the bin values when there are too many bins
+     corresponding deleted entries
+
+     Table rebuilding is done by creation of a new entry array and
+     bins of an appropriate size.  We also try to reuse the arrays
+     in some cases by compacting the array and removing deleted
+     entries.
+
+   o To save memory very small tables have no allocated arrays
+     bins.  We use a linear search for an access by a key.
+
+   o To save more memory we use 8-, 16-, 32- and 64- bit indexes in
+     bins depending on the current hash table size.
+
+   This implementation speeds up the Ruby hash table benchmarks in
+   average by more 40% on Intel Haswell CPU.
+
+*/
 
 #ifdef NOT_RUBY
 #include "regint.h"
@@ -14,46 +107,33 @@ https://github.com/ruby/ruby/blob/trunk/st.c#L107
 #include <stdlib.h>
 #endif
 #include <string.h>
-#include "ccan/list/list.h"
+#include <assert.h>
 
-typedef struct st_table_entry st_table_entry;
+#ifdef __GNUC__
+#define PREFETCH(addr, write_p) __builtin_prefetch(addr, write_p)
+#define EXPECT(expr, val) __builtin_expect(expr, val)
+#define ATTRIBUTE_UNUSED  __attribute__((unused))
+#else
+#define PREFETCH(addr, write_p)
+#define EXPECT(expr, val) (expr)
+#define ATTRIBUTE_UNUSED
+#endif
+
+#ifdef ST_DEBUG
+#define st_assert(cond) assert(cond)
+#else
+#define st_assert(cond) ((void)(0 && (cond)))
+#endif
+
+/* The type of hashes.  */
+typedef st_index_t st_hash_t;
 
 struct st_table_entry {
-    st_index_t hash;
+    st_hash_t hash;
     st_data_t key;
     st_data_t record;
-    st_table_entry *next;
-    struct list_node olist;
 };
 
-typedef struct st_packed_entry {
-    st_index_t hash;
-    st_data_t key, val;
-} st_packed_entry;
-
-#ifndef STATIC_ASSERT
-#define STATIC_ASSERT(name, expr) typedef int static_assert_##name##_check[(expr) ? 1 : -1]
-#endif
-
-#define ST_DEFAULT_MAX_DENSITY 5
-#define ST_DEFAULT_INIT_TABLE_SIZE 16
-#define ST_DEFAULT_PACKED_TABLE_SIZE 18
-#define PACKED_UNIT (int)(sizeof(st_packed_entry) / sizeof(st_table_entry*))
-#define MAX_PACKED_HASH (int)(ST_DEFAULT_PACKED_TABLE_SIZE * sizeof(st_table_entry*) / sizeof(st_packed_entry))
-
-STATIC_ASSERT(st_packed_entry, sizeof(st_packed_entry) == sizeof(st_table_entry*[PACKED_UNIT]));
-STATIC_ASSERT(st_packed_bins, sizeof(st_packed_entry[MAX_PACKED_HASH]) <= sizeof(st_table_entry*[ST_DEFAULT_PACKED_TABLE_SIZE]));
-
-    /*
-     * DEFAULT_MAX_DENSITY is the default for the largest we allow the
-     * average number of items per bin before increasing the number of
-     * bins
-     *
-     * DEFAULT_INIT_TABLE_SIZE is the default for the number of bins
-     * allocated initially
-     *
-     */
-
 #define type_numhash st_hashtype_num
 const struct st_hash_type st_hashtype_num = {
     st_numcmp,
@@ -73,105 +153,365 @@ static const struct st_hash_type type_st https://github.com/ruby/ruby/blob/trunk/st.c#L153
     strcasehash,
 };
 
-static void rehash(st_table *);
+/* Value used to catch uninitialized entries/bins during debugging.
+   There is a possibility for a false alarm, but its probability is
+   extremely small.  */
+#define ST_INIT_VAL 0xafafafafafafafaf
+#define ST_INIT_VAL_BYTE 0xafa
 
 #ifdef RUBY
 #undef malloc
 #undef realloc
 #undef calloc
 #undef free
-#define malloc xmalloc
-#define calloc xcalloc
-#define realloc xrealloc
-#define free(x) xfree(x)
-#endif
-
-#define EQUAL(table,x,ent) ((x)==(ent)->key || (*(table)->type->compare)((x),(ent)->key) == 0)
-
-#define do_hash(key,table) (st_index_t)(*(table)->type->hash)((key))
-#define hash_pos(h,n) ((h) & (n - 1))
 (... truncated)

--
ML: ruby-changes@q...
Info: http://www.atdot.net/~ko1/quickml/