ruby-changes:55411
From: tenderlove <ko1@a...>
Date: Sat, 20 Apr 2019 10:19:56 +0900 (JST)
Subject: [ruby-changes:55411] tenderlove:r67620 (trunk): Add `GC.compact` again.
tenderlove 2019-04-20 10:19:47 +0900 (Sat, 20 Apr 2019) New Revision: 67620 https://svn.ruby-lang.org/cgi-bin/viewvc.cgi?view=revision&revision=67620 Log: Add `GC.compact` again. ?\240?\159?\153?\143 Added files: trunk/test/ruby/test_gc_compact.rb Modified files: trunk/NEWS trunk/class.c trunk/constant.h trunk/ext/json/generator/generator.c trunk/ext/json/parser/parser.c trunk/gc.c trunk/gc.h trunk/hash.c trunk/id_table.c trunk/id_table.h trunk/include/ruby/intern.h trunk/include/ruby/ruby.h trunk/include/ruby/st.h trunk/internal.h trunk/iseq.c trunk/iseq.h trunk/method.h trunk/st.c trunk/symbol.c trunk/symbol.h trunk/test/ruby/test_gc.rb trunk/thread.c trunk/transient_heap.c trunk/transient_heap.h trunk/variable.c trunk/vm.c trunk/vm_core.h trunk/vm_eval.c Index: NEWS =================================================================== --- NEWS (revision 67619) +++ NEWS (revision 67620) @@ -122,6 +122,15 @@ JIT:: https://github.com/ruby/ruby/blob/trunk/NEWS#L122 * Default value of +--jit-min-calls+ is changed from 5 to 10,000 +GC:: + + * New `GC.compact` method for compacting the heap. + This function compacts live objects in the heap so that fewer pages may + be used, and the heap may be more CoW friendly. [Feature #15626] + + Details on the algorithm and caveats can be found here: + https://bugs.ruby-lang.org/issues/15626 + === Miscellaneous changes * Require compilers to support C99 [Misc #15347] Index: id_table.c =================================================================== --- id_table.c (revision 67619) +++ id_table.c (revision 67620) @@ -267,6 +267,28 @@ rb_id_table_delete(struct rb_id_table *t https://github.com/ruby/ruby/blob/trunk/id_table.c#L267 } void +rb_id_table_foreach_with_replace(struct rb_id_table *tbl, rb_id_table_foreach_func_t *func, rb_id_table_update_callback_func_t *replace, void *data) +{ + int i, capa = tbl->capa; + + for (i=0; i<capa; i++) { + if (ITEM_KEY_ISSET(tbl, i)) { + const id_key_t key = ITEM_GET_KEY(tbl, i); + enum rb_id_table_iterator_result ret = (*func)(Qundef, tbl->items[i].val, data); + assert(key != 0); + + if (ret == ID_TABLE_REPLACE) { + VALUE val = tbl->items[i].val; + ret = (*replace)(NULL, &val, data, TRUE); + tbl->items[i].val = val; + } + else if (ret == ID_TABLE_STOP) + return; + } + } +} + +void rb_id_table_foreach(struct rb_id_table *tbl, rb_id_table_foreach_func_t *func, void *data) { int i, capa = tbl->capa; Index: id_table.h =================================================================== --- id_table.h (revision 67619) +++ id_table.h (revision 67620) @@ -9,6 +9,7 @@ enum rb_id_table_iterator_result { https://github.com/ruby/ruby/blob/trunk/id_table.h#L9 ID_TABLE_CONTINUE = ST_CONTINUE, ID_TABLE_STOP = ST_STOP, ID_TABLE_DELETE = ST_DELETE, + ID_TABLE_REPLACE = ST_REPLACE, ID_TABLE_ITERATOR_RESULT_END }; @@ -23,9 +24,11 @@ int rb_id_table_insert(struct rb_id_tabl https://github.com/ruby/ruby/blob/trunk/id_table.h#L24 int rb_id_table_lookup(struct rb_id_table *tbl, ID id, VALUE *valp); int rb_id_table_delete(struct rb_id_table *tbl, ID id); +typedef enum rb_id_table_iterator_result rb_id_table_update_callback_func_t(ID *id, VALUE *val, void *data, int existing); typedef enum rb_id_table_iterator_result rb_id_table_foreach_func_t(ID id, VALUE val, void *data); typedef enum rb_id_table_iterator_result rb_id_table_foreach_values_func_t(VALUE val, void *data); void rb_id_table_foreach(struct rb_id_table *tbl, rb_id_table_foreach_func_t *func, void *data); +void rb_id_table_foreach_with_replace(struct rb_id_table *tbl, rb_id_table_foreach_func_t *func, rb_id_table_update_callback_func_t *replace, void *data); void rb_id_table_foreach_values(struct rb_id_table *tbl, rb_id_table_foreach_values_func_t *func, void *data); #endif /* RUBY_ID_TABLE_H */ Index: vm.c =================================================================== --- vm.c (revision 67619) +++ vm.c (revision 67620) @@ -2203,6 +2203,15 @@ rb_vm_call_cfunc(VALUE recv, VALUE (*fun https://github.com/ruby/ruby/blob/trunk/vm.c#L2203 /* vm */ void +rb_vm_update_references(void *ptr) +{ + if (ptr) { + rb_vm_t *vm = ptr; + rb_update_st_references(vm->frozen_strings); + } +} + +void rb_vm_mark(void *ptr) { RUBY_MARK_ENTER("vm"); @@ -2210,12 +2219,30 @@ rb_vm_mark(void *ptr) https://github.com/ruby/ruby/blob/trunk/vm.c#L2219 if (ptr) { rb_vm_t *vm = ptr; rb_thread_t *th = 0; + long i, len; + const VALUE *obj_ary; list_for_each(&vm->living_threads, th, vmlt_node) { rb_gc_mark(th->self); } rb_gc_mark(vm->thgroup_default); rb_gc_mark(vm->mark_object_ary); + + len = RARRAY_LEN(vm->mark_object_ary); + obj_ary = RARRAY_CONST_PTR(vm->mark_object_ary); + for (i=0; i < len; i++) { + const VALUE *ptr; + long j, jlen; + + rb_gc_mark(*obj_ary); + jlen = RARRAY_LEN(*obj_ary); + ptr = RARRAY_CONST_PTR(*obj_ary); + for (j=0; j < jlen; j++) { + rb_gc_mark(*ptr++); + } + obj_ary++; + } + rb_gc_mark(vm->load_path); rb_gc_mark(vm->load_path_snapshot); RUBY_MARK_UNLESS_NULL(vm->load_path_check_cache); @@ -2225,6 +2252,8 @@ rb_vm_mark(void *ptr) https://github.com/ruby/ruby/blob/trunk/vm.c#L2252 rb_gc_mark(vm->top_self); RUBY_MARK_UNLESS_NULL(vm->coverages); rb_gc_mark(vm->defined_module_hash); + /* Prevent classes from moving */ + rb_mark_tbl(rb_hash_tbl(vm->defined_module_hash, __FILE__, __LINE__)); if (vm->loading_table) { rb_mark_tbl(vm->loading_table); @@ -2463,7 +2492,7 @@ rb_execution_context_mark(const rb_execu https://github.com/ruby/ruby/blob/trunk/vm.c#L2492 rb_control_frame_t *cfp = ec->cfp; rb_control_frame_t *limit_cfp = (void *)(ec->vm_stack + ec->vm_stack_size); - rb_gc_mark_values((long)(sp - p), p); + rb_gc_mark_stack_values((long)(sp - p), p); while (cfp != limit_cfp) { const VALUE *ep = cfp->ep; Index: variable.c =================================================================== --- variable.c (revision 67619) +++ variable.c (revision 67620) @@ -1201,6 +1201,16 @@ rb_mark_generic_ivar(VALUE obj) https://github.com/ruby/ruby/blob/trunk/variable.c#L1201 } void +rb_mv_generic_ivar(VALUE rsrc, VALUE dst) +{ + st_data_t key = (st_data_t)rsrc; + struct gen_ivtbl *ivtbl; + + if (st_delete(generic_iv_tbl, &key, (st_data_t *)&ivtbl)) + st_insert(generic_iv_tbl, (st_data_t)dst, (st_data_t)ivtbl); +} + +void rb_free_generic_ivar(VALUE obj) { st_data_t key = (st_data_t)obj; @@ -1950,7 +1960,7 @@ rb_mod_const_missing(VALUE klass, VALUE https://github.com/ruby/ruby/blob/trunk/variable.c#L1960 static void autoload_mark(void *ptr) { - rb_mark_tbl((st_table *)ptr); + rb_mark_tbl_no_pin((st_table *)ptr); } static void @@ -1966,9 +1976,15 @@ autoload_memsize(const void *ptr) https://github.com/ruby/ruby/blob/trunk/variable.c#L1976 return st_memsize(tbl); } +static void +autoload_compact(void *ptr) +{ + rb_gc_update_tbl_refs((st_table *)ptr); +} + static const rb_data_type_t autoload_data_type = { "autoload", - {autoload_mark, autoload_free, autoload_memsize,}, + {autoload_mark, autoload_free, autoload_memsize, autoload_compact,}, 0, 0, RUBY_TYPED_FREE_IMMEDIATELY }; @@ -2015,11 +2031,18 @@ struct autoload_data_i { https://github.com/ruby/ruby/blob/trunk/variable.c#L2031 }; static void +autoload_i_compact(void *ptr) +{ + struct autoload_data_i *p = ptr; + p->feature = rb_gc_new_location(p->feature); +} + +static void autoload_i_mark(void *ptr) { struct autoload_data_i *p = ptr; - rb_gc_mark(p->feature); + rb_gc_mark_no_pin(p->feature); /* allow GC to free us if no modules refer to this via autoload_const.ad */ if (list_empty(&p->constants)) { @@ -2046,7 +2069,7 @@ autoload_i_memsize(const void *ptr) https://github.com/ruby/ruby/blob/trunk/variable.c#L2069 static const rb_data_type_t autoload_data_i_type = { "autoload_i", - {autoload_i_mark, autoload_i_free, autoload_i_memsize,}, + {autoload_i_mark, autoload_i_free, autoload_i_memsize, autoload_i_compact}, 0, 0, RUBY_TYPED_FREE_IMMEDIATELY }; @@ -2971,6 +2994,7 @@ rb_define_const(VALUE klass, const char https://github.com/ruby/ruby/blob/trunk/variable.c#L2994 if (!rb_is_const_id(id)) { rb_warn("rb_define_const: invalid name `%s' for constant", name); } + rb_gc_register_mark_object(val); rb_const_set(klass, id, val); } Index: iseq.c =================================================================== --- iseq.c (revision 67619) +++ iseq.c (revision 67620) @@ -137,11 +137,11 @@ rb_vm_insn_null_translator(const void *a https://github.com/ruby/ruby/blob/trunk/iseq.c#L137 return (VALUE)addr; } -typedef void iseq_value_itr_t(void *ctx, VALUE obj); +typedef VALUE iseq_value_itr_t(void *ctx, VALUE obj); typedef VALUE rb_vm_insns_translator_t(const void *addr); static int -iseq_extract_values(const VALUE *code, size_t pos, iseq_value_itr_t * func, void *data, rb_vm_insns_translator_t * translator) +iseq_extract_values(VALUE *code, size_t pos, iseq_value_itr_t * func, void *data, rb_vm_insns_translator_t * translator) { VALUE insn = translator((void *)code[pos]); int len = insn_len(insn); @@ -157,7 +157,10 @@ iseq_extract_values(const VALUE *code, s https://github.com/ruby/ruby/blob/trunk/iseq.c#L157 { VALUE op = code[pos + op_no + 1]; if (!SPECIAL_CONST_P(op)) { - func(data, op); + VALUE newop = func(data, op); + if (newop != op) { + code[pos + op_no + 1] = newop; + } } break; } @@ -165,7 +168,10 @@ iseq_extract_values(const VALUE *code, s https://github.com/ruby/ruby/blob/trunk/iseq.c#L168 { union iseq_inline_storage_entry *const is = (union iseq_inline_storage_entry *)code[pos + op_no + 1]; if (is->once.value) { - func(data, is->once.value); + VALUE nv = func(data, is->once.value); + if (is->once.value != nv) { + is->once.value = nv; + } } break; } @@ -181,7 +187,7 @@ static void https://github.com/ruby/ruby/blob/trunk/iseq.c#L187 rb_iseq_each_value(const rb_iseq_t *iseq, iseq_value_itr_t * func, void *data) { unsigned int size; - const VALUE *code; + VALUE *code; size_t n; rb_vm_insns_translator_t * translator; const struct rb_iseq_constant_body *const body = iseq->body; @@ -205,10 +211,65 @@ rb_iseq_each_value(const rb_iseq_t *iseq https://github.com/ruby/ruby/blob/trunk/iseq.c#L211 } } -static void +static VALUE +update_each_insn_value(void *ctx, VALUE obj) +{ + return rb_gc_new_location(obj); +} + +void +rb_iseq_update_references(rb_iseq_t *iseq) +{ + if (iseq->body) { + struct rb_iseq_constant_body *body = iseq->body; + + body->variable.coverage = rb_gc_new_location(body->variable.coverage); + body->variable.pc2branchindex = rb_gc_new_location(body->variable.pc2branchindex); + body->location.label = rb_gc_new_location(body->location.label); + body->location.base_label = rb_gc_new_location(body->location.base_label); + body->location.pathobj = rb_gc_new_location(body->location.pathobj); + if (body->local_iseq) { + body->local_iseq = (struct rb_iseq_struct *)rb_gc_new_location((VALUE)body->local_iseq); + } + if (body->parent_iseq) { + body->parent_iseq = (struct rb_iseq_struct *)rb_gc_new_location((VALUE)body->parent_iseq); + } + if (FL_TEST(iseq, ISEQ_MARKABLE_ISEQ)) { + rb_iseq_each_value(iseq, update_each_insn_value, NULL); + } + + if (body->param.flags.has_kw && ISEQ_COMPILE_DATA(iseq) == NULL) { + int i, j; + + i = body->param.keyword->required_num; + + for (j = 0; i < body->param.keyword->num; i++, j++) { + VALUE obj = body->param.keyword->default_values[j]; + if (obj != Qundef) { + body->param.keyword->default_values[j] = rb_gc_new_location(obj); + } + } + } + + if (body->catch_table) { + struct iseq_catch_table *table = body->catch_table; + unsigned int i; + for(i = 0; i < table->size; i++) { + struct iseq_catch_table_entry *entry; + entry = &table->entries[i]; + if (entry->iseq) { + entry->iseq = (rb_iseq_t *)rb_gc_new_location((VALUE)entry->iseq); + } + } + } + } +} + +static VALUE each_insn_value(void *ctx, VALUE obj) { - rb_gc_mark(obj); + rb_gc_mark_no_pin(obj); + return obj; } void @@ -225,12 +286,12 @@ rb_iseq_mark(const rb_iseq_t *iseq) https://github.com/ruby/ruby/blob/trunk/iseq.c#L286 rb_iseq_each_value(iseq, each_insn_value, NULL); } - rb_gc_mark(body->variable.coverage); - rb_gc_mark(body->variable.pc2branchindex); - rb_gc_mark(body->location.label); - rb_gc_mark(body->location.base_label); - rb_gc_mark(body->location.pathobj); - RUBY_MARK_UNLESS_NULL((VALUE)body->parent_iseq); + rb_gc_mark_no_pin(body->variable.coverage); + rb_gc_mark_no_pin(body->variable.pc2branchindex); + rb_gc_mark_no_pin(body->location.label); + rb_gc_mark_no_pin(body->location.base_label); + rb_gc_mark_no_pin(body->location.pathobj); + RUBY_MARK_NO_PIN_UNLESS_NULL((VALUE)body->parent_iseq); if (body->param.flags.has_kw && ISEQ_COMPILE_DATA(iseq) == NULL) { const struct rb_iseq_param_keyword *const keyword = body->param.keyword; @@ -253,7 +314,7 @@ rb_iseq_mark(const rb_iseq_t *iseq) https://github.com/ruby/ruby/blob/trunk/iseq.c#L314 const struct iseq_catch_table_entry *entry; entry = &table->entries[i]; if (entry->iseq) { - rb_gc_mark((VALUE)entry->iseq); + rb_gc_mark_no_pin((VALUE)entry->iseq); } } } @@ -264,11 +325,14 @@ rb_iseq_mark(const rb_iseq_t *iseq) https://github.com/ruby/ruby/blob/trunk/iseq.c#L325 } else if (FL_TEST_RAW(iseq, ISEQ_USE_COMPILE_DATA)) { const struct iseq_compile_data *const compile_data = ISEQ_COMPILE_DATA(iseq); - VM_ASSERT(compile_data != NULL); - - RUBY_MARK_UNLESS_NULL(compile_data->mark_ary); + if (RTEST(compile_data->mark_ary)) { + rb_gc_mark(compile_data->mark_ary); + } RUBY_MARK_UNLESS_NULL(compile_data->err_info); - RUBY_MARK_UNLESS_NULL(compile_data->catch_table_ary); + if (RTEST(compile_data->catch_table_ary)) { + rb_gc_mark(compile_data->catch_table_ary); + } + VM_ASSERT(compile_data != NULL); } else { /* executable */ Index: class.c =================================================================== --- class.c (revision 67619) +++ class.c (revision 67620) @@ -539,6 +539,7 @@ boot_defclass(const char *name, VALUE su https://github.com/ruby/ruby/blob/trunk/class.c#L539 rb_name_class(obj, id); rb_const_set((rb_cObject ? rb_cObject : obj), id, obj); + rb_vm_add_root_module(id, obj); return obj; } @@ -730,6 +731,9 @@ rb_define_class_id_under(VALUE outer, ID https://github.com/ruby/ruby/blob/trunk/class.c#L731 " (%"PRIsVALUE" is given but was %"PRIsVALUE")", outer, rb_id2str(id), RCLASS_SUPER(klass), super); } + /* Class may have been defined in Ruby and not pin-rooted */ + rb_vm_add_root_module(id, klass); + return klass; } if (!super) { @@ -740,6 +744,7 @@ rb_define_class_id_under(VALUE outer, ID https://github.com/ruby/ruby/blob/trunk/class.c#L744 rb_set_class_path_string(klass, outer, rb_id2str(id)); rb_const_set(outer, id, klass); rb_class_inherited(super, klass); + rb_vm_add_root_module(id, klass); rb_gc_register_mark_object(klass); return klass; @@ -777,10 +782,13 @@ rb_define_module(const char *name) https://github.com/ruby/ruby/blob/trunk/class.c#L782 rb_raise(rb_eTypeError, "%s is not a module (%"PRIsVALUE")", name, rb_obj_class(module)); } + /* Module may have been defined in Ruby and not pin-rooted */ + rb_vm_add_root_module(id, module); return module; } module = rb_define_module_id(id); rb_vm_add_root_module(id, module); + rb_gc_register_mark_object(module); rb_const_set(rb_cObject, id, module); return module; Index: iseq.h =================================================================== --- iseq.h (revision 67619) +++ iseq.h (revision 67620) @@ -236,7 +236,7 @@ struct iseq_catch_table_entry { https://github.com/ruby/ruby/blob/trunk/iseq.h#L236 * CATCH_TYPE_REDO, CATCH_TYPE_NEXT: * NULL. */ - const rb_iseq_t *iseq; + rb_iseq_t *iseq; unsigned int start; unsigned int end; Index: ext/json/parser/parser.c =================================================================== --- ext/json/parser/parser.c (revision 67619) +++ ext/json/parser/parser.c (revision 67620) @@ -2091,6 +2091,8 @@ void Init_parser(void) https://github.com/ruby/ruby/blob/trunk/ext/json/parser/parser.c#L2091 cParser = rb_define_class_under(mExt, "Parser", rb_cObject); eParserError = rb_path2class("JSON::ParserError"); eNestingError = rb_path2class("JSON::NestingError"); + rb_gc_register_mark_object(eParserError); + rb_gc_register_mark_object(eNestingError); rb_define_alloc_func(cParser, cJSON_parser_s_allocate); rb_define_method(cParser, "initialize", cParser_initialize, -1); rb_define_method(cParser, "parse", cParser_parse, 0); Index: ext/json/generator/generator.c =================================================================== --- ext/json/generator/generator.c (revision 67619) +++ ext/json/generator/generator.c (revision 67620) @@ -1344,6 +1344,8 @@ void Init_generator(void) https://github.com/ruby/ruby/blob/trunk/ext/json/generator/generator.c#L1344 eGeneratorError = rb_path2class("JSON::GeneratorError"); eNestingError = rb_path2class("JSON::NestingError"); + rb_gc_register_mark_object(eGeneratorError); + rb_gc_register_mark_object(eNestingError); cState = rb_define_class_under(mGenerator, "State", rb_cObject); rb_define_alloc_func(cState, cState_s_allocate); Index: test/ruby/test_gc_compact.rb =================================================================== --- test/ruby/test_gc_compact.rb (nonexistent) +++ test/ruby/test_gc_compact.rb (revision 67620) @@ -0,0 +1,97 @@ https://github.com/ruby/ruby/blob/trunk/test/ruby/test_gc_compact.rb#L1 +# frozen_string_literal: true +require 'test/unit' +require 'fiddle' + +class TestGCCompact < Test::Unit::TestCase + def memory_location(obj) + (Fiddle.dlwrap(obj) >> 1) + end + + def assert_object_ids(list) + same_count = list.find_all { |obj| + memory_location(obj) == obj.object_id + }.count + list.count - same_count + end + + def big_list + 1000.times.map { + # try to make some empty slots by allocating an object and discarding + Object.new + Object.new + } # likely next to each other + end + + # Find an object that's allocated in a slot that had a previous + # tenant, and that tenant moved and is still alive + def find_object_in_recycled_slot(addresses) + new_object = nil + + 100_000.times do + new_object = Object.new + if addresses.include? memory_location(new_object) + break + end + end + + new_object + end + + def test_find_collided_object + list_of_objects = big_list + + ids = list_of_objects.map(&:object_id) # store id in map + addresses = list_of_objects.map(&self.:memory_location) + + assert_equal ids, addresses + + # All object ids should be equal + assert_equal 0, assert_object_ids(list_of_objects) # should be 0 + + GC.verify_compaction_references + + # Some should have moved + id_count = assert_object_ids(list_of_objects) + skip "couldn't get objects to move" if id_count == 0 + assert_operator id_count, :>, 0 + + new_ids = list_of_objects.map(&:object_id) + + # Object ids should not change after compaction + assert_equal ids, new_ids + + new_tenant = find_object_in_recycled_slot(addresses) + assert new_tenant + + (... truncated) -- ML: ruby-changes@q... Info: http://www.atdot.net/~ko1/quickml/