ruby-changes:58571
From: John <ko1@a...>
Date: Tue, 5 Nov 2019 08:27:35 +0900 (JST)
Subject: [ruby-changes:58571] ebbe396d3c (master): Use ident hash for top-level recursion check
https://git.ruby-lang.org/ruby.git/commit/?id=ebbe396d3c From ebbe396d3c89345a1c36c0b5154e314cc33e19b7 Mon Sep 17 00:00:00 2001 From: John Hawthorn <john@h...> Date: Sun, 3 Nov 2019 20:51:30 -0800 Subject: Use ident hash for top-level recursion check We track recursion in order to not infinite loop in ==, inspect, and similar methods by keeping a thread-local 1 or 2 level hash. This allows us to track when we have seen the same object (ex. using inspect) or same pair of objects (ex. using ==) in this stack before and to treat that differently. Previously both levels of this Hash used the object's memory_id as a key (using object_id would be slow and wasteful). Unfortunately, prettyprint (pp.rb) uses this thread local variable to "pretend" to be inspect and inherit its same recursion behaviour. This commit changes the top-level hash to be an identity hash and to use objects as keys instead of their object_ids. I'd like to have also converted the 2nd level hash to an ident hash, but it would have prevented an optimization which avoids allocating a 2nd level hash for only a single element, which we want to keep because it's by far the most common case. So the new format of this hash is: { object => true } (not paired) { lhs_object => rhs_object_memory_id } (paired, single object) { lhs_object => { rhs_object_memory_id => true, ... } } (paired, many objects) We must also update pp.rb to match this (using identity hashes). diff --git a/lib/pp.rb b/lib/pp.rb index 3e17c7f..de4b79c 100644 --- a/lib/pp.rb +++ b/lib/pp.rb @@ -106,17 +106,17 @@ class PP < PrettyPrint https://github.com/ruby/ruby/blob/trunk/lib/pp.rb#L106 # and preserves the previous set of objects being printed. def guard_inspect_key if Thread.current[:__recursive_key__] == nil - Thread.current[:__recursive_key__] = {}.taint + Thread.current[:__recursive_key__] = {}.compare_by_identity.taint end if Thread.current[:__recursive_key__][:inspect] == nil - Thread.current[:__recursive_key__][:inspect] = {}.taint + Thread.current[:__recursive_key__][:inspect] = {}.compare_by_identity.taint end save = Thread.current[:__recursive_key__][:inspect] begin - Thread.current[:__recursive_key__][:inspect] = {}.taint + Thread.current[:__recursive_key__][:inspect] = {}.compare_by_identity.taint yield ensure Thread.current[:__recursive_key__][:inspect] = save @@ -149,18 +149,16 @@ class PP < PrettyPrint https://github.com/ruby/ruby/blob/trunk/lib/pp.rb#L149 # Object#pretty_print_cycle is used when +obj+ is already # printed, a.k.a the object reference chain has a cycle. def pp(obj) - id = obj.object_id - - if check_inspect_key(id) + if check_inspect_key(obj) group {obj.pretty_print_cycle self} return end begin - push_inspect_key(id) + push_inspect_key(obj) group {obj.pretty_print self} ensure - pop_inspect_key(id) unless PP.sharing_detection + pop_inspect_key(obj) unless PP.sharing_detection end end diff --git a/thread.c b/thread.c index eff5d39..15dea98 100644 --- a/thread.c +++ b/thread.c @@ -4880,20 +4880,20 @@ recursive_list_access(VALUE sym) https://github.com/ruby/ruby/blob/trunk/thread.c#L4880 list = rb_hash_aref(hash, sym); } if (NIL_P(list) || !RB_TYPE_P(list, T_HASH)) { - list = rb_hash_new(); + list = rb_ident_hash_new(); rb_hash_aset(hash, sym, list); } return list; } /* - * Returns Qtrue iff obj_id (or the pair <obj, paired_obj>) is already + * Returns Qtrue iff obj (or the pair <obj, paired_obj>) is already * in the recursion list. * Assumes the recursion list is valid. */ static VALUE -recursive_check(VALUE list, VALUE obj_id, VALUE paired_obj_id) +recursive_check(VALUE list, VALUE obj, VALUE paired_obj_id) { #if SIZEOF_LONG == SIZEOF_VOIDP #define OBJ_ID_EQL(obj_id, other) ((obj_id) == (other)) @@ -4902,7 +4902,7 @@ recursive_check(VALUE list, VALUE obj_id, VALUE paired_obj_id) https://github.com/ruby/ruby/blob/trunk/thread.c#L4902 rb_big_eql((obj_id), (other)) : ((obj_id) == (other))) #endif - VALUE pair_list = rb_hash_lookup2(list, obj_id, Qundef); + VALUE pair_list = rb_hash_lookup2(list, obj, Qundef); if (pair_list == Qundef) return Qfalse; if (paired_obj_id) { @@ -4919,10 +4919,10 @@ recursive_check(VALUE list, VALUE obj_id, VALUE paired_obj_id) https://github.com/ruby/ruby/blob/trunk/thread.c#L4919 } /* - * Pushes obj_id (or the pair <obj_id, paired_obj_id>) in the recursion list. - * For a single obj_id, it sets list[obj_id] to Qtrue. - * For a pair, it sets list[obj_id] to paired_obj_id if possible, - * otherwise list[obj_id] becomes a hash like: + * Pushes obj (or the pair <obj, paired_obj>) in the recursion list. + * For a single obj, it sets list[obj] to Qtrue. + * For a pair, it sets list[obj] to paired_obj_id if possible, + * otherwise list[obj] becomes a hash like: * {paired_obj_id_1 => true, paired_obj_id_2 => true, ... } * Assumes the recursion list is valid. */ @@ -4950,10 +4950,10 @@ recursive_push(VALUE list, VALUE obj, VALUE paired_obj) https://github.com/ruby/ruby/blob/trunk/thread.c#L4950 } /* - * Pops obj_id (or the pair <obj_id, paired_obj_id>) from the recursion list. - * For a pair, if list[obj_id] is a hash, then paired_obj_id is + * Pops obj (or the pair <obj, paired_obj>) from the recursion list. + * For a pair, if list[obj] is a hash, then paired_obj_id is * removed from the hash and no attempt is made to simplify - * list[obj_id] from {only_one_paired_id => true} to only_one_paired_id + * list[obj] from {only_one_paired_id => true} to only_one_paired_id * Assumes the recursion list is valid. */ @@ -4980,7 +4980,6 @@ struct exec_recursive_params { https://github.com/ruby/ruby/blob/trunk/thread.c#L4980 VALUE (*func) (VALUE, VALUE, int); VALUE list; VALUE obj; - VALUE objid; VALUE pairid; VALUE arg; }; @@ -5012,13 +5011,12 @@ exec_recursive(VALUE (*func) (VALUE, VALUE, int), VALUE obj, VALUE pairid, VALUE https://github.com/ruby/ruby/blob/trunk/thread.c#L5011 struct exec_recursive_params p; int outermost; p.list = recursive_list_access(sym); - p.objid = rb_memory_id(obj); p.obj = obj; p.pairid = pairid; p.arg = arg; outermost = outer && !recursive_check(p.list, ID2SYM(recursive_key), 0); - if (recursive_check(p.list, p.objid, pairid)) { + if (recursive_check(p.list, p.obj, pairid)) { if (outer && !outermost) { rb_throw_obj(p.list, p.list); } @@ -5031,9 +5029,9 @@ exec_recursive(VALUE (*func) (VALUE, VALUE, int), VALUE obj, VALUE pairid, VALUE https://github.com/ruby/ruby/blob/trunk/thread.c#L5029 if (outermost) { recursive_push(p.list, ID2SYM(recursive_key), 0); - recursive_push(p.list, p.objid, p.pairid); + recursive_push(p.list, p.obj, p.pairid); result = rb_catch_protect(p.list, exec_recursive_i, (VALUE)&p, &state); - if (!recursive_pop(p.list, p.objid, p.pairid)) goto invalid; + if (!recursive_pop(p.list, p.obj, p.pairid)) goto invalid; if (!recursive_pop(p.list, ID2SYM(recursive_key), 0)) goto invalid; if (state != TAG_NONE) EC_JUMP_TAG(GET_EC(), state); if (result == p.list) { @@ -5042,13 +5040,13 @@ exec_recursive(VALUE (*func) (VALUE, VALUE, int), VALUE obj, VALUE pairid, VALUE https://github.com/ruby/ruby/blob/trunk/thread.c#L5040 } else { volatile VALUE ret = Qundef; - recursive_push(p.list, p.objid, p.pairid); + recursive_push(p.list, p.obj, p.pairid); EC_PUSH_TAG(GET_EC()); if ((state = EC_EXEC_TAG()) == TAG_NONE) { ret = (*func)(obj, arg, FALSE); } EC_POP_TAG(); - if (!recursive_pop(p.list, p.objid, p.pairid)) { + if (!recursive_pop(p.list, p.obj, p.pairid)) { invalid: rb_raise(rb_eTypeError, "invalid inspect_tbl pair_list " "for %+"PRIsVALUE" in %+"PRIsVALUE, -- cgit v0.10.2 -- ML: ruby-changes@q... Info: http://www.atdot.net/~ko1/quickml/