ruby-changes:69443
From: Peter <ko1@a...>
Date: Tue, 26 Oct 2021 02:26:41 +0900 (JST)
Subject: [ruby-changes:69443] a5b6598192 (master): [Feature #18239] Implement VWA for strings
https://git.ruby-lang.org/ruby.git/commit/?id=a5b6598192 From a5b6598192c30187b19b892af3110a46f6a70d76 Mon Sep 17 00:00:00 2001 From: Peter Zhu <peter@p...> Date: Thu, 26 Aug 2021 10:06:32 -0400 Subject: [Feature #18239] Implement VWA for strings This commit adds support for embedded strings with variable capacity and uses Variable Width Allocation to allocate strings. --- debug.c | 2 + ext/-test-/string/capacity.c | 9 +- ext/-test-/string/cstr.c | 10 +- gc.c | 145 +++++++++------ gc.rb | 10 + include/ruby/internal/config.h | 4 + include/ruby/internal/core/rstring.h | 20 ++ internal/gc.h | 27 ++- misc/lldb_cruby.py | 3 +- ruby.c | 7 +- spec/ruby/optional/capi/string_spec.rb | 12 +- string.c | 323 +++++++++++++++++++++++++-------- test/-ext-/string/test_capacity.rb | 37 +++- test/-ext-/string/test_rb_str_dup.rb | 6 +- test/objspace/test_objspace.rb | 4 +- transcode.c | 4 + 16 files changed, 452 insertions(+), 171 deletions(-) diff --git a/debug.c b/debug.c index 52bd0f7fb74..a5e6ce475a1 100644 --- a/debug.c +++ b/debug.c @@ -56,7 +56,9 @@ const union { https://github.com/ruby/ruby/blob/trunk/debug.c#L56 enum ruby_robject_consts robject_consts; enum ruby_rmodule_flags rmodule_flags; enum ruby_rstring_flags rstring_flags; +#if !USE_RVARGC enum ruby_rstring_consts rstring_consts; +#endif enum ruby_rarray_flags rarray_flags; enum ruby_rarray_consts rarray_consts; enum { diff --git a/ext/-test-/string/capacity.c b/ext/-test-/string/capacity.c index cb8d2c2b3ae..33b2023fd3b 100644 --- a/ext/-test-/string/capacity.c +++ b/ext/-test-/string/capacity.c @@ -4,10 +4,11 @@ https://github.com/ruby/ruby/blob/trunk/ext/-test-/string/capacity.c#L4 static VALUE bug_str_capacity(VALUE klass, VALUE str) { - return - STR_EMBED_P(str) ? INT2FIX(RSTRING_EMBED_LEN_MAX) : \ - STR_SHARED_P(str) ? INT2FIX(0) : \ - LONG2FIX(RSTRING(str)->as.heap.aux.capa); + if (!STR_EMBED_P(str) && STR_SHARED_P(str)) { + return INT2FIX(0); + } + + return LONG2FIX(rb_str_capacity(str)); } void diff --git a/ext/-test-/string/cstr.c b/ext/-test-/string/cstr.c index 4f837998d7c..1eadb8b4fd3 100644 --- a/ext/-test-/string/cstr.c +++ b/ext/-test-/string/cstr.c @@ -62,9 +62,13 @@ bug_str_unterminated_substring(VALUE str, VALUE vbeg, VALUE vlen) https://github.com/ruby/ruby/blob/trunk/ext/-test-/string/cstr.c#L62 if (RSTRING_LEN(str) < beg + len) rb_raise(rb_eIndexError, "end: %ld", beg + len); str = rb_str_new_shared(str); if (STR_EMBED_P(str)) { +#if USE_RVARGC + RSTRING(str)->as.embed.len = (short)len; +#else RSTRING(str)->basic.flags &= ~RSTRING_EMBED_LEN_MASK; RSTRING(str)->basic.flags |= len << RSTRING_EMBED_LEN_SHIFT; - memmove(RSTRING(str)->as.ary, RSTRING(str)->as.ary + beg, len); +#endif + memmove(RSTRING(str)->as.embed.ary, RSTRING(str)->as.embed.ary + beg, len); } else { RSTRING(str)->as.heap.ptr += beg; @@ -112,7 +116,11 @@ bug_str_s_cstr_noembed(VALUE self, VALUE str) https://github.com/ruby/ruby/blob/trunk/ext/-test-/string/cstr.c#L116 Check_Type(str, T_STRING); FL_SET((str2), STR_NOEMBED); memcpy(buf, RSTRING_PTR(str), capacity); +#if USE_RVARGC + RBASIC(str2)->flags &= ~(STR_SHARED | FL_USER5 | FL_USER6); +#else RBASIC(str2)->flags &= ~RSTRING_EMBED_LEN_MASK; +#endif RSTRING(str2)->as.heap.aux.capa = capacity; RSTRING(str2)->as.heap.ptr = buf; RSTRING(str2)->as.heap.len = RSTRING_LEN(str); diff --git a/gc.c b/gc.c index 04337e44400..0c739ba709b 100644 --- a/gc.c +++ b/gc.c @@ -888,6 +888,7 @@ static const bool USE_MMAP_ALIGNED_ALLOC = false; https://github.com/ruby/ruby/blob/trunk/gc.c#L888 #endif struct heap_page { + short slot_size; short total_slots; short free_slots; short pinned_slots; @@ -1849,7 +1850,7 @@ heap_page_add_freeobj(rb_objspace_t *objspace, struct heap_page *page, VALUE obj https://github.com/ruby/ruby/blob/trunk/gc.c#L1850 if (RGENGC_CHECK_MODE && /* obj should belong to page */ !(&page->start[0] <= (RVALUE *)obj && - (uintptr_t)obj < ((uintptr_t)page->start + (page->total_slots * page->size_pool->slot_size)) && + (uintptr_t)obj < ((uintptr_t)page->start + (page->total_slots * page->slot_size)) && obj % sizeof(RVALUE) == 0)) { rb_bug("heap_page_add_freeobj: %p is not rvalue.", (void *)p); } @@ -1938,7 +1939,7 @@ heap_pages_free_unused_pages(rb_objspace_t *objspace) https://github.com/ruby/ruby/blob/trunk/gc.c#L1939 } struct heap_page *hipage = heap_pages_sorted[heap_allocated_pages - 1]; - uintptr_t himem = (uintptr_t)hipage->start + (hipage->total_slots * hipage->size_pool->slot_size); + uintptr_t himem = (uintptr_t)hipage->start + (hipage->total_slots * hipage->slot_size); GC_ASSERT(himem <= (uintptr_t)heap_pages_himem); heap_pages_himem = (RVALUE *)himem; @@ -2034,6 +2035,7 @@ heap_page_allocate(rb_objspace_t *objspace, rb_size_pool_t *size_pool) https://github.com/ruby/ruby/blob/trunk/gc.c#L2035 page->start = (RVALUE *)start; page->total_slots = limit; + page->slot_size = size_pool->slot_size; page->size_pool = size_pool; page_body->header.page = page; @@ -2091,7 +2093,6 @@ heap_add_page(rb_objspace_t *objspace, rb_size_pool_t *size_pool, rb_heap_t *hea https://github.com/ruby/ruby/blob/trunk/gc.c#L2093 { /* Adding to eden heap during incremental sweeping is forbidden */ GC_ASSERT(!(heap == SIZE_POOL_EDEN_HEAP(size_pool) && heap->sweeping_page)); - GC_ASSERT(page->size_pool == size_pool); page->flags.in_tomb = (heap == SIZE_POOL_TOMB_HEAP(size_pool)); list_add_tail(&heap->pages, &page->page_node); heap->total_pages++; @@ -2324,18 +2325,37 @@ static inline void heap_add_freepage(rb_heap_t *heap, struct heap_page *page); https://github.com/ruby/ruby/blob/trunk/gc.c#L2325 static struct heap_page *heap_next_freepage(rb_objspace_t *objspace, rb_size_pool_t *size_pool, rb_heap_t *heap); static inline void ractor_set_cache(rb_ractor_t *cr, struct heap_page *page); -#if USE_RVARGC -void * -rb_gc_rvargc_object_data(VALUE obj) +size_t +rb_gc_obj_slot_size(VALUE obj) { - return (void *)(obj + sizeof(RVALUE)); + return GET_HEAP_PAGE(obj)->slot_size; } + +static inline size_t +size_pool_slot_size(char pool_id) +{ + GC_ASSERT(pool_id < SIZE_POOL_COUNT); + + size_t slot_size = (1 << pool_id) * sizeof(RVALUE); + +#if RGENGC_CHECK_MODE + rb_objspace_t *objspace = &rb_objspace; + GC_ASSERT(size_pools[pool_id].slot_size == slot_size); #endif + return slot_size; +} + +bool +rb_gc_size_allocatable_p(size_t size) +{ + return size <= size_pool_slot_size(SIZE_POOL_COUNT - 1); +} + static inline VALUE ractor_cached_free_region(rb_objspace_t *objspace, rb_ractor_t *cr, size_t size) { - if (size != sizeof(RVALUE)) { + if (size > sizeof(RVALUE)) { return Qfalse; } @@ -2409,6 +2429,25 @@ newobj_fill(VALUE obj, VALUE v1, VALUE v2, VALUE v3) https://github.com/ruby/ruby/blob/trunk/gc.c#L2429 } #if USE_RVARGC +static inline rb_size_pool_t * +size_pool_for_size(rb_objspace_t *objspace, size_t size) +{ + size_t slot_count = CEILDIV(size, sizeof(RVALUE)); + + /* size_pool_idx is ceil(log2(slot_count)) */ + size_t size_pool_idx = 64 - nlz_int64(slot_count - 1); + if (size_pool_idx >= SIZE_POOL_COUNT) { + rb_bug("size_pool_for_size: allocation size too large"); + } + + rb_size_pool_t *size_pool = &size_pools[size_pool_idx]; + GC_ASSERT(size_pool->slot_size >= (short)size); + GC_ASSERT(size_pool_idx == 0 || size_pools[size_pool_idx - 1].slot_size < (short)size); + + return size_pool; +} + + static inline VALUE heap_get_freeobj(rb_objspace_t *objspace, rb_size_pool_t *size_pool, rb_heap_t *heap) { @@ -2430,25 +2469,6 @@ heap_get_freeobj(rb_objspace_t *objspace, rb_size_pool_t *size_pool, rb_heap_t * https://github.com/ruby/ruby/blob/trunk/gc.c#L2469 return (VALUE)p; } - -static inline rb_size_pool_t * -size_pool_for_size(rb_objspace_t *objspace, size_t size) -{ - size_t slot_count = CEILDIV(size, sizeof(RVALUE)); - - /* size_pool_idx is ceil(log2(slot_count)) */ - size_t size_pool_idx = 64 - nlz_int64(slot_count - 1); - GC_ASSERT(size_pool_idx > 0); - if (size_pool_idx >= SIZE_POOL_COUNT) { - rb_bug("size_pool_for_size: allocation size too large"); - } - - rb_size_pool_t *size_pool = &size_pools[size_pool_idx]; - GC_ASSERT(size_pool->slot_size >= (short)size); - GC_ASSERT(size_pools[size_pool_idx - 1].slot_size < (short)size); - - return size_pool; -} #endif ALWAYS_INLINE(static VALUE newobj_slowpath(VALUE klass, VALUE flags, rb_objspace_t *objspace, rb_ractor_t *cr, int wb_protected, size_t alloc_size)); @@ -2574,7 +2594,6 @@ VALUE https://github.com/ruby/ruby/blob/trunk/gc.c#L2594 rb_wb_unprotected_newobj_of(VALUE klass, VALUE flags, size_t size) { GC_ASSERT((flags & FL_WB_PROTECTED) == 0); - size = size + sizeof(RVALUE); return newobj_of(klass, flags, 0, 0, 0, FALSE, size); } @@ -2582,7 +2601,6 @@ VALUE https://github.com/ruby/ruby/blob/trunk/gc.c#L2601 rb_wb_protected_newobj_of(VALUE klass, VALUE flags, size_t size) { GC_ASSERT((flags & FL_WB_PROTECTED) == 0); - size = size + sizeof(RVALUE); return newobj_of(klass, flags, 0, 0, 0, TRUE, size); } @@ -2590,7 +2608,6 @@ VALUE https://github.com/ruby/ruby/blob/trunk/gc.c#L2608 rb_ec_wb_protected_newobj_of(rb_execution_context_t *ec, VALUE klass, VALUE flags, size_t size) { GC_ASSERT((flags & FL_WB_PROTECTED) == 0); - size = size + sizeof(RVALUE); return newobj_of_cr(rb_ec_ractor_ptr(e (... truncated) -- ML: ruby-changes@q... Info: http://www.atdot.net/~ko1/quickml/