[前][次][番号順一覧][スレッド一覧]

ruby-changes:58337

From: Dylan <ko1@a...>
Date: Mon, 21 Oct 2019 17:29:40 +0900 (JST)
Subject: [ruby-changes:58337] b970259044 (master): Stop making a redundant hash copy in Hash#dup (#2489)

https://git.ruby-lang.org/ruby.git/commit/?id=b970259044

From b9702590445dfea62d271d0a5c942b7adfaaacdd Mon Sep 17 00:00:00 2001
From: Dylan Thacker-Smith <dylan.smith@s...>
Date: Mon, 21 Oct 2019 04:29:21 -0400
Subject: Stop making a redundant hash copy in Hash#dup (#2489)

* Stop making a redundant hash copy in Hash#dup

It was making a copy of the hash without rehashing, then created an
extra copy of the hash to do the rehashing.  Since rehashing creates
a new copy already, this change just uses that rehashing to make
the copy.

[Bug #16121]

* Remove redundant Check_Type after to_hash

* Fix freeing and clearing destination hash in Hash#initialize_copy

The code was assuming the state of the destination hash based on the
source hash for clearing any existing table on it. If these don't match,
then that can cause the old table to be leaked. This can be seen by
compiling hash.c with `#define HASH_DEBUG 1` and running the following
script, which will crash from a debug assertion.

```ruby
h = 9.times.map { |i| [i, i] }.to_h
h.send(:initialize_copy, {})
```

* Remove dead code paths in rb_hash_initialize_copy

Given that `RHASH_ST_TABLE_P(h)` is defined as `(!RHASH_AR_TABLE_P(h))`
it shouldn't be possible for a hash to be neither of these, so there
is no need for the removed `else if` blocks.

* Share implementation between Hash#replace and Hash#initialize_copy

This also fixes key rehashing for small hashes backed by an array
table for Hash#replace.  This used to be done consistently in ruby
2.5.x, but stopped being done for small arrays in ruby 2.6.x.

This also bring optimization improvements that were done for
Hash#initialize_copy to Hash#replace.

* Add the Hash#dup benchmark

diff --git a/benchmark/hash_dup.yml b/benchmark/hash_dup.yml
new file mode 100644
index 0000000..65f521e
--- /dev/null
+++ b/benchmark/hash_dup.yml
@@ -0,0 +1,8 @@ https://github.com/ruby/ruby/blob/trunk/benchmark/hash_dup.yml#L1
+prelude: |
+  small_hash = { a: 1 }
+  larger_hash = 20.times.map { |i| [('a'.ord + i).chr.to_sym, i] }.to_h
+
+benchmark:
+  dup_small: small_hash.dup
+  dup_larger: larger_hash.dup
+loop_count: 10000
diff --git a/hash.c b/hash.c
index b73ade7..705ec31 100644
--- a/hash.c
+++ b/hash.c
@@ -2808,49 +2808,6 @@ rb_hash_aset(VALUE hash, VALUE key, VALUE val) https://github.com/ruby/ruby/blob/trunk/hash.c#L2808
     return val;
 }
 
-static int
-replace_i(VALUE key, VALUE val, VALUE hash)
-{
-    rb_hash_aset(hash, key, val);
-
-    return ST_CONTINUE;
-}
-
-/* :nodoc: */
-static VALUE
-rb_hash_initialize_copy(VALUE hash, VALUE hash2)
-{
-    rb_hash_modify_check(hash);
-    hash2 = to_hash(hash2);
-
-    Check_Type(hash2, T_HASH);
-
-    if (hash == hash2) return hash;
-
-    if (RHASH_AR_TABLE_P(hash2)) {
-        if (RHASH_AR_TABLE_P(hash)) ar_free_and_clear_table(hash);
-        ar_copy(hash, hash2);
-        if (RHASH_AR_TABLE_SIZE(hash))
-	    rb_hash_rehash(hash);
-    }
-    else if (RHASH_ST_TABLE_P(hash2)) {
-        if (RHASH_ST_TABLE_P(hash)) st_free_table(RHASH_ST_TABLE(hash));
-        RHASH_ST_TABLE_SET(hash, st_copy(RHASH_ST_TABLE(hash2)));
-        if (RHASH_ST_TABLE(hash)->num_entries)
-            rb_hash_rehash(hash);
-    }
-    else if (RHASH_AR_TABLE_P(hash)) {
-        ar_clear(hash);
-    }
-    else if (RHASH_ST_TABLE_P(hash)) {
-        st_clear(RHASH_ST_TABLE(hash));
-    }
-
-    COPY_DEFAULT(hash, hash2);
-
-    return hash;
-}
-
 /*
  *  call-seq:
  *     hsh.replace(other_hash) -> hsh
@@ -2868,26 +2825,35 @@ rb_hash_replace(VALUE hash, VALUE hash2) https://github.com/ruby/ruby/blob/trunk/hash.c#L2825
 {
     rb_hash_modify_check(hash);
     if (hash == hash2) return hash;
+    if (RHASH_ITER_LEV(hash) > 0) {
+        rb_raise(rb_eRuntimeError, "can't replace hash during iteration");
+    }
     hash2 = to_hash(hash2);
 
     COPY_DEFAULT(hash, hash2);
 
-    rb_hash_clear(hash);
-
     if (RHASH_AR_TABLE_P(hash)) {
-        if (RHASH_AR_TABLE_P(hash2)) {
-            ar_copy(hash, hash2);
-        }
-        else {
-            goto st_to_st;
-        }
+      if (RHASH_AR_TABLE_P(hash2)) {
+        ar_clear(hash);
+      }
+      else {
+        ar_free_and_clear_table(hash);
+        RHASH_ST_TABLE_SET(hash, st_init_table_with_size(RHASH_TYPE(hash2), RHASH_SIZE(hash2)));
+      }
     }
     else {
-        if (RHASH_AR_TABLE_P(hash2)) ar_force_convert_table(hash2, __FILE__, __LINE__);
-      st_to_st:
+      if (RHASH_AR_TABLE_P(hash2)) {
+        st_free_table(RHASH_ST_TABLE(hash));
+        RHASH_ST_CLEAR(hash);
+      }
+      else {
+        st_clear(RHASH_ST_TABLE(hash));
         RHASH_TBL_RAW(hash)->type = RHASH_ST_TABLE(hash2)->type;
-        rb_hash_foreach(hash2, replace_i, hash);
+      }
     }
+    rb_hash_foreach(hash2, rb_hash_rehash_i, (VALUE)hash);
+
+    rb_gc_writebarrier_remember(hash);
 
     return hash;
 }
@@ -6143,7 +6109,7 @@ Init_Hash(void) https://github.com/ruby/ruby/blob/trunk/hash.c#L6109
     rb_define_singleton_method(rb_cHash, "[]", rb_hash_s_create, -1);
     rb_define_singleton_method(rb_cHash, "try_convert", rb_hash_s_try_convert, 1);
     rb_define_method(rb_cHash, "initialize", rb_hash_initialize, -1);
-    rb_define_method(rb_cHash, "initialize_copy", rb_hash_initialize_copy, 1);
+    rb_define_method(rb_cHash, "initialize_copy", rb_hash_replace, 1);
     rb_define_method(rb_cHash, "rehash", rb_hash_rehash, 0);
 
     rb_define_method(rb_cHash, "to_hash", rb_hash_to_hash, 0);
-- 
cgit v0.10.2


--
ML: ruby-changes@q...
Info: http://www.atdot.net/~ko1/quickml/

[前][次][番号順一覧][スレッド一覧]