ruby-changes:58337
From: Dylan <ko1@a...>
Date: Mon, 21 Oct 2019 17:29:40 +0900 (JST)
Subject: [ruby-changes:58337] b970259044 (master): Stop making a redundant hash copy in Hash#dup (#2489)
https://git.ruby-lang.org/ruby.git/commit/?id=b970259044 From b9702590445dfea62d271d0a5c942b7adfaaacdd Mon Sep 17 00:00:00 2001 From: Dylan Thacker-Smith <dylan.smith@s...> Date: Mon, 21 Oct 2019 04:29:21 -0400 Subject: Stop making a redundant hash copy in Hash#dup (#2489) * Stop making a redundant hash copy in Hash#dup It was making a copy of the hash without rehashing, then created an extra copy of the hash to do the rehashing. Since rehashing creates a new copy already, this change just uses that rehashing to make the copy. [Bug #16121] * Remove redundant Check_Type after to_hash * Fix freeing and clearing destination hash in Hash#initialize_copy The code was assuming the state of the destination hash based on the source hash for clearing any existing table on it. If these don't match, then that can cause the old table to be leaked. This can be seen by compiling hash.c with `#define HASH_DEBUG 1` and running the following script, which will crash from a debug assertion. ```ruby h = 9.times.map { |i| [i, i] }.to_h h.send(:initialize_copy, {}) ``` * Remove dead code paths in rb_hash_initialize_copy Given that `RHASH_ST_TABLE_P(h)` is defined as `(!RHASH_AR_TABLE_P(h))` it shouldn't be possible for a hash to be neither of these, so there is no need for the removed `else if` blocks. * Share implementation between Hash#replace and Hash#initialize_copy This also fixes key rehashing for small hashes backed by an array table for Hash#replace. This used to be done consistently in ruby 2.5.x, but stopped being done for small arrays in ruby 2.6.x. This also bring optimization improvements that were done for Hash#initialize_copy to Hash#replace. * Add the Hash#dup benchmark diff --git a/benchmark/hash_dup.yml b/benchmark/hash_dup.yml new file mode 100644 index 0000000..65f521e --- /dev/null +++ b/benchmark/hash_dup.yml @@ -0,0 +1,8 @@ https://github.com/ruby/ruby/blob/trunk/benchmark/hash_dup.yml#L1 +prelude: | + small_hash = { a: 1 } + larger_hash = 20.times.map { |i| [('a'.ord + i).chr.to_sym, i] }.to_h + +benchmark: + dup_small: small_hash.dup + dup_larger: larger_hash.dup +loop_count: 10000 diff --git a/hash.c b/hash.c index b73ade7..705ec31 100644 --- a/hash.c +++ b/hash.c @@ -2808,49 +2808,6 @@ rb_hash_aset(VALUE hash, VALUE key, VALUE val) https://github.com/ruby/ruby/blob/trunk/hash.c#L2808 return val; } -static int -replace_i(VALUE key, VALUE val, VALUE hash) -{ - rb_hash_aset(hash, key, val); - - return ST_CONTINUE; -} - -/* :nodoc: */ -static VALUE -rb_hash_initialize_copy(VALUE hash, VALUE hash2) -{ - rb_hash_modify_check(hash); - hash2 = to_hash(hash2); - - Check_Type(hash2, T_HASH); - - if (hash == hash2) return hash; - - if (RHASH_AR_TABLE_P(hash2)) { - if (RHASH_AR_TABLE_P(hash)) ar_free_and_clear_table(hash); - ar_copy(hash, hash2); - if (RHASH_AR_TABLE_SIZE(hash)) - rb_hash_rehash(hash); - } - else if (RHASH_ST_TABLE_P(hash2)) { - if (RHASH_ST_TABLE_P(hash)) st_free_table(RHASH_ST_TABLE(hash)); - RHASH_ST_TABLE_SET(hash, st_copy(RHASH_ST_TABLE(hash2))); - if (RHASH_ST_TABLE(hash)->num_entries) - rb_hash_rehash(hash); - } - else if (RHASH_AR_TABLE_P(hash)) { - ar_clear(hash); - } - else if (RHASH_ST_TABLE_P(hash)) { - st_clear(RHASH_ST_TABLE(hash)); - } - - COPY_DEFAULT(hash, hash2); - - return hash; -} - /* * call-seq: * hsh.replace(other_hash) -> hsh @@ -2868,26 +2825,35 @@ rb_hash_replace(VALUE hash, VALUE hash2) https://github.com/ruby/ruby/blob/trunk/hash.c#L2825 { rb_hash_modify_check(hash); if (hash == hash2) return hash; + if (RHASH_ITER_LEV(hash) > 0) { + rb_raise(rb_eRuntimeError, "can't replace hash during iteration"); + } hash2 = to_hash(hash2); COPY_DEFAULT(hash, hash2); - rb_hash_clear(hash); - if (RHASH_AR_TABLE_P(hash)) { - if (RHASH_AR_TABLE_P(hash2)) { - ar_copy(hash, hash2); - } - else { - goto st_to_st; - } + if (RHASH_AR_TABLE_P(hash2)) { + ar_clear(hash); + } + else { + ar_free_and_clear_table(hash); + RHASH_ST_TABLE_SET(hash, st_init_table_with_size(RHASH_TYPE(hash2), RHASH_SIZE(hash2))); + } } else { - if (RHASH_AR_TABLE_P(hash2)) ar_force_convert_table(hash2, __FILE__, __LINE__); - st_to_st: + if (RHASH_AR_TABLE_P(hash2)) { + st_free_table(RHASH_ST_TABLE(hash)); + RHASH_ST_CLEAR(hash); + } + else { + st_clear(RHASH_ST_TABLE(hash)); RHASH_TBL_RAW(hash)->type = RHASH_ST_TABLE(hash2)->type; - rb_hash_foreach(hash2, replace_i, hash); + } } + rb_hash_foreach(hash2, rb_hash_rehash_i, (VALUE)hash); + + rb_gc_writebarrier_remember(hash); return hash; } @@ -6143,7 +6109,7 @@ Init_Hash(void) https://github.com/ruby/ruby/blob/trunk/hash.c#L6109 rb_define_singleton_method(rb_cHash, "[]", rb_hash_s_create, -1); rb_define_singleton_method(rb_cHash, "try_convert", rb_hash_s_try_convert, 1); rb_define_method(rb_cHash, "initialize", rb_hash_initialize, -1); - rb_define_method(rb_cHash, "initialize_copy", rb_hash_initialize_copy, 1); + rb_define_method(rb_cHash, "initialize_copy", rb_hash_replace, 1); rb_define_method(rb_cHash, "rehash", rb_hash_rehash, 0); rb_define_method(rb_cHash, "to_hash", rb_hash_to_hash, 0); -- cgit v0.10.2 -- ML: ruby-changes@q... Info: http://www.atdot.net/~ko1/quickml/