ruby-changes:44577
From: ko1 <ko1@a...>
Date: Mon, 7 Nov 2016 09:45:06 +0900 (JST)
Subject: [ruby-changes:44577] ko1:r56650 (trunk): Introduce table improvement by Vladimir Makarov <vmakarov@r...>.
ko1 2016-11-07 09:45:00 +0900 (Mon, 07 Nov 2016) New Revision: 56650 https://svn.ruby-lang.org/cgi-bin/viewvc.cgi?view=revision&revision=56650 Log: Introduce table improvement by Vladimir Makarov <vmakarov@r...>. [Feature #12142] See header of st.c for improvment details. You can see all of code history here: <https://github.com/vnmakarov/ruby/tree/hash_tables_with_open_addressing> This improvement is discussed at <https://bugs.ruby-lang.org/issues/12142> with many people, especially with Yura Sokolov. * st.c: improve st_table. * include/ruby/st.h: ditto. * internal.h, numeric.c, hash.c (rb_dbl_long_hash): extract a function. * ext/-test-/st/foreach/foreach.c: catch up this change. Modified files: trunk/NEWS trunk/ext/-test-/st/foreach/foreach.c trunk/hash.c trunk/include/ruby/st.h trunk/internal.h trunk/numeric.c trunk/st.c Index: numeric.c =================================================================== --- numeric.c (revision 56649) +++ numeric.c (revision 56650) @@ -1421,12 +1421,7 @@ flo_hash(VALUE num) https://github.com/ruby/ruby/blob/trunk/numeric.c#L1421 VALUE rb_dbl_hash(double d) { - st_index_t hash; - - /* normalize -0.0 to 0.0 */ - if (d == 0.0) d = 0.0; - hash = rb_memhash(&d, sizeof(d)); - return ST2FIX(hash); + return LONG2FIX(rb_dbl_long_hash (d)); } VALUE Index: hash.c =================================================================== --- hash.c (revision 56649) +++ hash.c (revision 56650) @@ -145,14 +145,36 @@ rb_hash(VALUE obj) https://github.com/ruby/ruby/blob/trunk/hash.c#L145 long rb_objid_hash(st_index_t index); -static st_index_t -any_hash(VALUE a, st_index_t (*other_func)(VALUE)) +long +rb_dbl_long_hash(double d) +{ + /* normalize -0.0 to 0.0 */ + if (d == 0.0) d = 0.0; +#if SIZEOF_INT == SIZEOF_VOIDP + return rb_memhash(&d, sizeof(d)); +#else + { + union {double d; uint64_t i;} u; + + u.d = d; + return rb_objid_hash(u.i); + } +#endif +} + +#if SIZEOF_INT == SIZEOF_VOIDP +static const st_index_t str_seed = 0xfa835867; +#else +static const st_index_t str_seed = 0xc42b5e2e6480b23bULL; +#endif + +static inline st_index_t +any_hash_general(VALUE a, int strong_p, st_index_t (*other_func)(VALUE)) { VALUE hval; st_index_t hnum; if (SPECIAL_CONST_P(a)) { - if (a == Qundef) return 0; if (STATIC_SYM_P(a)) { hnum = a >> (RUBY_SPECIAL_SHIFT + ID_SCOPE_SHIFT); goto out; @@ -164,7 +186,9 @@ any_hash(VALUE a, st_index_t (*other_fun https://github.com/ruby/ruby/blob/trunk/hash.c#L186 hnum = rb_objid_hash((st_index_t)a); } else if (BUILTIN_TYPE(a) == T_STRING) { - hnum = rb_str_hash(a); + hnum = (strong_p + ? rb_str_hash(a) + : st_hash(RSTRING_PTR(a), RSTRING_LEN(a), str_seed)); } else if (BUILTIN_TYPE(a) == T_SYMBOL) { hnum = RSYMBOL(a)->hashval; @@ -175,8 +199,7 @@ any_hash(VALUE a, st_index_t (*other_fun https://github.com/ruby/ruby/blob/trunk/hash.c#L199 } else if (BUILTIN_TYPE(a) == T_FLOAT) { flt: - hval = rb_dbl_hash(rb_float_value(a)); - hnum = FIX2LONG(hval); + hnum = rb_dbl_long_hash(rb_float_value(a)); } else { hnum = other_func(a); @@ -193,40 +216,62 @@ obj_any_hash(VALUE obj) https://github.com/ruby/ruby/blob/trunk/hash.c#L216 return FIX2LONG(obj); } +static inline st_index_t +any_hash_weak(VALUE a, st_index_t (*other_func)(VALUE)) { + return any_hash_general(a, FALSE, other_func); +} + static st_index_t -rb_any_hash(VALUE a) -{ - return any_hash(a, obj_any_hash); +rb_any_hash_weak(VALUE a) { + return any_hash_weak(a, obj_any_hash); +} + +static inline st_index_t +any_hash(VALUE a, st_index_t (*other_func)(VALUE)) { + return any_hash_general(a, TRUE, other_func); } static st_index_t -rb_num_hash_start(st_index_t n) +rb_any_hash(VALUE a) { + return any_hash(a, obj_any_hash); +} + +/* Here is a hash function for 64-bit key. It is about 5 times faster + (2 times faster when uint128 type is absent) on Haswell than + tailored Spooky or City hash function can be. */ + +/* Here we two primes with random bit generation. */ +static const uint64_t prime1 = 0x2e0bb864e9ea7df5ULL; +static const uint64_t prime2 = 0xcdb32970830fcaa1ULL; + + +static inline uint64_t +mult_and_mix (uint64_t m1, uint64_t m2) +{ +#if defined(__GNUC__) && UINT_MAX != ULONG_MAX + __uint128_t r = (__uint128_t) m1 * (__uint128_t) m2; + return (uint64_t) (r >> 64) ^ (uint64_t) r; +#else + uint64_t hm1 = m1 >> 32, hm2 = m2 >> 32; + uint64_t lm1 = m1, lm2 = m2; + uint64_t v64_128 = hm1 * hm2; + uint64_t v32_96 = hm1 * lm2 + lm1 * hm2; + uint64_t v1_32 = lm1 * lm2; + + return (v64_128 + (v32_96 >> 32)) ^ ((v32_96 << 32) + v1_32); +#endif +} + +static inline uint64_t +key64_hash (uint64_t key, uint32_t seed) { - /* - * This hash function is lightly-tuned for Ruby. Further tuning - * should be possible. Notes: - * - * - (n >> 3) alone is great for heap objects and OK for fixnum, - * however symbols perform poorly. - * - (n >> (RUBY_SPECIAL_SHIFT+3)) was added to make symbols hash well, - * n.b.: +3 to remove most ID scope, +1 worked well initially, too - * n.b.: +1 (instead of 3) worked well initially, too - * - (n << 16) was finally added to avoid losing bits for fixnums - * - avoid expensive modulo instructions, it is currently only - * shifts and bitmask operations. - */ - return (n >> (RUBY_SPECIAL_SHIFT + 3) ^ (n << 16)) ^ (n >> 3); + return mult_and_mix(key + seed, prime1); } long rb_objid_hash(st_index_t index) { - st_index_t hnum = rb_num_hash_start(index); - - hnum = rb_hash_start(hnum); - hnum = rb_hash_uint(hnum, (st_index_t)rb_any_hash); - hnum = rb_hash_end(hnum); - return hnum; + return key64_hash(index, (uint32_t) prime2); } static st_index_t @@ -250,6 +295,7 @@ rb_hash_iter_lev(VALUE h) https://github.com/ruby/ruby/blob/trunk/hash.c#L295 static const struct st_hash_type objhash = { rb_any_cmp, + rb_any_hash_weak, rb_any_hash, }; @@ -269,7 +315,7 @@ rb_ident_hash(st_data_t n) https://github.com/ruby/ruby/blob/trunk/hash.c#L315 } #endif - return (st_index_t)rb_num_hash_start((st_index_t)n); + return (st_index_t) key64_hash((st_index_t)n, (uint32_t) prime2); } static const struct st_hash_type identhash = { Index: ext/-test-/st/foreach/foreach.c =================================================================== --- ext/-test-/st/foreach/foreach.c (revision 56649) +++ ext/-test-/st/foreach/foreach.c (revision 56650) @@ -14,13 +14,13 @@ force_unpack_check(struct checker *c, st https://github.com/ruby/ruby/blob/trunk/ext/-test-/st/foreach/foreach.c#L14 if (c->nr == 0) { st_data_t i; - if (!c->tbl->entries_packed) rb_bug("should be packed\n"); + if (c->tbl->bins != NULL) rb_bug("should be packed\n"); /* force unpacking during iteration: */ for (i = 1; i < expect_size; i++) st_add_direct(c->tbl, i, i); - if (c->tbl->entries_packed) rb_bug("should be unpacked\n"); + if (c->tbl->bins == NULL) rb_bug("should be unpacked\n"); } if (key != c->nr) { @@ -84,7 +84,7 @@ unp_fec(VALUE self, VALUE test) https://github.com/ruby/ruby/blob/trunk/ext/-test-/st/foreach/foreach.c#L84 st_add_direct(tbl, 0, 0); - if (!tbl->entries_packed) rb_bug("should still be packed\n"); + if (tbl->bins != NULL) rb_bug("should still be packed\n"); st_foreach_check(tbl, unp_fec_i, (st_data_t)&c, -1); @@ -98,7 +98,7 @@ unp_fec(VALUE self, VALUE test) https://github.com/ruby/ruby/blob/trunk/ext/-test-/st/foreach/foreach.c#L98 (VALUE)c.nr, (VALUE)expect_size); } - if (tbl->entries_packed) rb_bug("should be unpacked\n"); + if (tbl->bins == NULL) rb_bug("should be unpacked\n"); st_free_table(tbl); @@ -145,7 +145,7 @@ unp_fe(VALUE self, VALUE test) https://github.com/ruby/ruby/blob/trunk/ext/-test-/st/foreach/foreach.c#L145 st_add_direct(tbl, 0, 0); - if (!tbl->entries_packed) rb_bug("should still be packed\n"); + if (tbl->bins != NULL) rb_bug("should still be packed\n"); st_foreach(tbl, unp_fe_i, (st_data_t)&c); @@ -159,7 +159,7 @@ unp_fe(VALUE self, VALUE test) https://github.com/ruby/ruby/blob/trunk/ext/-test-/st/foreach/foreach.c#L159 (VALUE)c.nr, (VALUE)expect_size); } - if (tbl->entries_packed) rb_bug("should be unpacked\n"); + if (tbl->bins == NULL) rb_bug("should be unpacked\n"); st_free_table(tbl); Index: include/ruby/st.h =================================================================== --- include/ruby/st.h (revision 56649) +++ include/ruby/st.h (revision 56650) @@ -1,6 +1,8 @@ https://github.com/ruby/ruby/blob/trunk/include/ruby/st.h#L1 -/* This is a public domain general purpose hash table package written by Peter Moore @ UCB. */ +/* This is a public domain general purpose hash table package + originally written by Peter Moore @ UCB. -/* @(#) st.h 5.1 89/12/14 */ + The hash table data strutures were redesigned and the package was + rewritten by Vladimir Makarov <vmakarov@r...>. */ #ifndef RUBY_ST_H #define RUBY_ST_H 1 @@ -46,6 +48,10 @@ typedef unsigned LONG_LONG st_data_t; https://github.com/ruby/ruby/blob/trunk/include/ruby/st.h#L48 typedef struct st_table st_table; typedef st_data_t st_index_t; + +/* Maximal value of unsigned integer type st_index_t. */ +#define MAX_ST_INDEX_VAL (~(st_index_t) 0) + typedef int st_compare_func(st_data_t, st_data_t); typedef st_index_t st_hash_func(st_data_t); @@ -55,10 +61,13 @@ typedef char st_check_for_sizeof_st_inde https://github.com/ruby/ruby/blob/trunk/include/ruby/st.h#L61 struct st_hash_type { int (*compare)(ANYARGS /*st_data_t, st_data_t*/); /* st_compare_func* */ st_index_t (*hash)(ANYARGS /*st_data_t*/); /* st_hash_func* */ + /* The following is an optional func for stronger hash. When we + have many different keys with the same hash we can switch to + use it to prevent a denial attack with usage of hash table + collisions. */ + st_index_t (*strong_hash)(ANYARGS /*st_data_t*/); }; -#define ST_INDEX_BITS (sizeof(st_index_t) * CHAR_BIT) - #if defined(HAVE_BUILTIN___BUILTIN_CHOOSE_EXPR) && defined(HAVE_BUILTIN___BUILTIN_TYPES_COMPATIBLE_P) # define ST_DATA_COMPATIBLE_P(type) \ __builtin_choose_expr(__builtin_types_compatible_p(type, st_data_t), 1, 0) @@ -66,33 +75,30 @@ struct st_hash_type { https://github.com/ruby/ruby/blob/trunk/include/ruby/st.h#L75 # define ST_DATA_COMPATIBLE_P(type) 0 #endif +typedef struct st_table_entry st_table_entry; + +struct st_table_entry; /* defined in st.c */ + struct st_table { + /* Cached features of the table -- see st.c for more details. */ + unsigned char entry_power, bin_power, size_ind; + /* True when we are rebuilding the table. */ + unsigned char inside_rebuild_p; + /* How many times the table was rebuilt. */ + unsigned int rebuilds_num; + /* Currently used hash function. */ + st_index_t (*curr_hash)(ANYARGS /*st_data_t*/); const struct st_hash_type *type; - st_index_t num_bins; - unsigned int entries_packed : 1; -#ifdef __GNUC__ - /* - * C spec says, - * A bit-field shall have a type that is a qualified or unqualified - * version of _Bool, signed int, unsigned int, or some other - * implementation-defined type. It is implementation-defined whether - * atomic types are permitted. - * In short, long and long long bit-field are implementation-defined - * feature. Therefore we want to suppress a warning explicitly. - */ - __extension__ -#endif - st_index_t num_entries : ST_INDEX_BITS - 1; - union { - struct { - struct st_table_entry **bins; - void *private_list_head[2]; - } big; - struct { - struct st_packed_entry *entries; - st_index_t real_entries; - } packed; - } as; + /* Number of entries currently in the table. */ + st_index_t num_entries; + /* Array of bins used for access by keys. */ + st_index_t *bins; + /* Start and bound index of entries in array entries. + entries_starts and entries_bound are in interval + [0,allocated_entries]. */ + st_index_t entries_start, entries_bound; + /* Array of size 2^entry_power. */ + st_table_entry *entries; }; #define st_is_member(table,key) st_lookup((table),(key),(st_data_t *)0) @@ -121,7 +127,6 @@ typedef int st_update_callback_func(st_d https://github.com/ruby/ruby/blob/trunk/include/ruby/st.h#L127 int st_update(st_table *table, st_data_t key, st_update_callback_func *func, st_data_t arg); int st_foreach(st_table *, int (*)(ANYARGS), st_data_t); int st_foreach_check(st_table *, int (*)(ANYARGS), st_data_t, st_data_t); -int st_reverse_foreach(st_table *, int (*)(ANYARGS), st_data_t); st_index_t st_keys(st_table *table, st_data_t *keys, st_index_t size); st_index_t st_keys_check(st_table *table, st_data_t *keys, st_index_t size, st_data_t never); st_index_t st_values(st_table *table, st_data_t *values, st_index_t size); Index: st.c =================================================================== --- st.c (revision 56649) +++ st.c (revision 56650) @@ -1,6 +1,99 @@ https://github.com/ruby/ruby/blob/trunk/st.c#L1 -/* This is a public domain general purpose hash table package written by Peter Moore @ UCB. */ +/* This is a public domain general purpose hash table package + originally written by Peter Moore @ UCB. -/* static char sccsid[] = "@(#) st.c 5.1 89/12/14 Crucible"; */ + The hash table data structures were redesigned and the package was + rewritten by Vladimir Makarov <vmakarov@r...>. */ + +/* The original package implemented classic bucket-based hash tables + with entries doubly linked for an access by their insertion order. + To decrease pointer chasing and as a consequence to improve a data + locality the current implementation is based on storing entries in + an array and using hash tables with open addressing. The current + entries are more compact in comparison with the original ones and + this also improves the data locality. + + The hash table has two arrays called *bins* and *entries*. + + bins: + ------- + | | entries array: + |-------| -------------------------------- + | index | | | entry: | | | + |-------| | | | | | + | ... | | ... | hash | ... | ... | + |-------| | | key | | | + | empty | | | record | | | + |-------| -------------------------------- + | ... | ^ ^ + |-------| |_ entries start |_ entries bound + |deleted| + ------- + + o The entry array contains table entries in the same order as they + were inserted. + + When the first entry is deleted, a variable containing index of + the current first entry (*entries start*) is changed. In all + other cases of the deletion, we just mark the entry as deleted by + using a reserved hash value. + + Such organization of the entry storage makes operations of the + table shift and the entries traversal very fast. + + o The bins provide access to the entries by their keys. The + key hash is mapped to a bin containing *index* of the + corresponding entry in the entry array. + + The bin array size is always power of two, it makes mapping very + fast by using the corresponding lower bits of the hash. + Generally it is not a good idea to ignore some part of the hash. + But alternative approach is worse. For example, we could use a + modulo operation for mapping and a prime number for the size of + the bin array. Unfortunately, the modulo operation for big + 64-bit numbers are extremely slow (it takes more than 100 cycles + on modern Intel CPUs). + + Still other bits of the hash value are used when the mapping + results in a collision. In this case we use a secondary hash + value which is a result of a function of the collision bin + index and the original hash value. The function choice + guarantees that we can traverse all bins and finally find the + corresponding bin as after several iterations the function + becomes a full cycle linear congruential generator because it + satisfies requirements of the Hull-Dobell theorem. + + When an entry is removed from the table besides marking the + hash in the corresponding entry described above, we also mark + the bin by a special value in order to find entries which had + a collision with the removed entries. + + There are two reserved values for the bins. One denotes an + empty bin, another one denotes a bin for a deleted entry. + + o The length of the bin array is at least two times more than the + entry array length. This keeps the table load factor healthy. + The trigger of rebuilding the table is always a case when we can + not insert an entry anymore at the entries bound. We could + change the entries bound too in case of deletion but than we need + a special code to count bins with corresponding deleted entries + and reset the bin values when there are too many bins + corresponding deleted entries + + Table rebuilding is done by creation of a new entry array and + bins of an appropriate size. We also try to reuse the arrays + in some cases by compacting the array and removing deleted + entries. + + o To save memory very small tables have no allocated arrays + bins. We use a linear search for an access by a key. + + o To save more memory we use 8-, 16-, 32- and 64- bit indexes in + bins depending on the current hash table size. + + This implementation speeds up the Ruby hash table benchmarks in + average by more 40% on Intel Haswell CPU. + +*/ #ifdef NOT_RUBY #include "regint.h" @@ -14,46 +107,33 @@ https://github.com/ruby/ruby/blob/trunk/st.c#L107 #include <stdlib.h> #endif #include <string.h> -#include "ccan/list/list.h" +#include <assert.h> -typedef struct st_table_entry st_table_entry; +#ifdef __GNUC__ +#define PREFETCH(addr, write_p) __builtin_prefetch(addr, write_p) +#define EXPECT(expr, val) __builtin_expect(expr, val) +#define ATTRIBUTE_UNUSED __attribute__((unused)) +#else +#define PREFETCH(addr, write_p) +#define EXPECT(expr, val) (expr) +#define ATTRIBUTE_UNUSED +#endif + +#ifdef ST_DEBUG +#define st_assert(cond) assert(cond) +#else +#define st_assert(cond) ((void)(0 && (cond))) +#endif + +/* The type of hashes. */ +typedef st_index_t st_hash_t; struct st_table_entry { - st_index_t hash; + st_hash_t hash; st_data_t key; st_data_t record; - st_table_entry *next; - struct list_node olist; }; -typedef struct st_packed_entry { - st_index_t hash; - st_data_t key, val; -} st_packed_entry; - -#ifndef STATIC_ASSERT -#define STATIC_ASSERT(name, expr) typedef int static_assert_##name##_check[(expr) ? 1 : -1] -#endif - -#define ST_DEFAULT_MAX_DENSITY 5 -#define ST_DEFAULT_INIT_TABLE_SIZE 16 -#define ST_DEFAULT_PACKED_TABLE_SIZE 18 -#define PACKED_UNIT (int)(sizeof(st_packed_entry) / sizeof(st_table_entry*)) -#define MAX_PACKED_HASH (int)(ST_DEFAULT_PACKED_TABLE_SIZE * sizeof(st_table_entry*) / sizeof(st_packed_entry)) - -STATIC_ASSERT(st_packed_entry, sizeof(st_packed_entry) == sizeof(st_table_entry*[PACKED_UNIT])); -STATIC_ASSERT(st_packed_bins, sizeof(st_packed_entry[MAX_PACKED_HASH]) <= sizeof(st_table_entry*[ST_DEFAULT_PACKED_TABLE_SIZE])); - - /* - * DEFAULT_MAX_DENSITY is the default for the largest we allow the - * average number of items per bin before increasing the number of - * bins - * - * DEFAULT_INIT_TABLE_SIZE is the default for the number of bins - * allocated initially - * - */ - #define type_numhash st_hashtype_num const struct st_hash_type st_hashtype_num = { st_numcmp, @@ -73,105 +153,365 @@ static const struct st_hash_type type_st https://github.com/ruby/ruby/blob/trunk/st.c#L153 strcasehash, }; -static void rehash(st_table *); +/* Value used to catch uninitialized entries/bins during debugging. + There is a possibility for a false alarm, but its probability is + extremely small. */ +#define ST_INIT_VAL 0xafafafafafafafaf +#define ST_INIT_VAL_BYTE 0xafa #ifdef RUBY #undef malloc #undef realloc #undef calloc #undef free -#define malloc xmalloc -#define calloc xcalloc -#define realloc xrealloc -#define free(x) xfree(x) -#endif - -#define EQUAL(table,x,ent) ((x)==(ent)->key || (*(table)->type->compare)((x),(ent)->key) == 0) - -#define do_hash(key,table) (st_index_t)(*(table)->type->hash)((key)) -#define hash_pos(h,n) ((h) & (n - 1)) (... truncated) -- ML: ruby-changes@q... Info: http://www.atdot.net/~ko1/quickml/