ruby-changes:63342
From: Koichi <ko1@a...>
Date: Wed, 14 Oct 2020 14:02:49 +0900 (JST)
Subject: [ruby-changes:63342] 11c2f0f36c (master): sync enc_table and rb_encoding_list
https://git.ruby-lang.org/ruby.git/commit/?id=11c2f0f36c From 11c2f0f36ccc50899a8dd69a260e85451f68b5ba Mon Sep 17 00:00:00 2001 From: Koichi Sasada <ko1@a...> Date: Tue, 13 Oct 2020 13:16:08 +0900 Subject: sync enc_table and rb_encoding_list enc_table which manages Encoding information. rb_encoding_list also manages Encoding objects. Both are accessed/modified by ractors simultaneously so that they should be synchronized. For enc_table, this patch introduced GLOBAL_ENC_TABLE_ENTER/LEAVE/EVAL to access this table with VM lock. To make shortcut, three new global variables global_enc_ascii, global_enc_utf_8, global_enc_us_ascii are also introduced. For rb_encoding_list, we split it to rb_default_encoding_list (256 entries) and rb_additional_encoding_list. rb_default_encoding_list is fixed sized Array so we don't need to synchronized (and most of apps only needs it). To manage 257 or more encoding objects, they are stored into rb_additional_encoding_list. To access rb_additional_encoding_list., VM lock is needed. diff --git a/bootstraptest/test_ractor.rb b/bootstraptest/test_ractor.rb index 9e90d95..05139e2 100644 --- a/bootstraptest/test_ractor.rb +++ b/bootstraptest/test_ractor.rb @@ -780,4 +780,18 @@ assert_equal "#{N}#{N}", %Q{ https://github.com/ruby/ruby/blob/trunk/bootstraptest/test_ractor.rb#L780 }.map{|r| r.take}.join } +assert_equal "#{N/10}", %Q{ + Ractor.new do + loop do + Encoding.find("test-enc-#{rand(5_000)}").inspect + rescue ArgumentError => e + end + end + + src = Encoding.find("UTF-8") + #{N/10}.times{|i| + src.replicate("test-enc-\#{i}") + } +} + end # if !ENV['GITHUB_WORKFLOW'] diff --git a/common.mk b/common.mk index 509f09c..e7b8f53 100644 --- a/common.mk +++ b/common.mk @@ -4617,13 +4617,17 @@ encoding.$(OBJEXT): {$(VPATH)}internal/variable.h https://github.com/ruby/ruby/blob/trunk/common.mk#L4617 encoding.$(OBJEXT): {$(VPATH)}internal/warning_push.h encoding.$(OBJEXT): {$(VPATH)}internal/xmalloc.h encoding.$(OBJEXT): {$(VPATH)}missing.h +encoding.$(OBJEXT): {$(VPATH)}node.h encoding.$(OBJEXT): {$(VPATH)}onigmo.h encoding.$(OBJEXT): {$(VPATH)}oniguruma.h +encoding.$(OBJEXT): {$(VPATH)}ractor_pub.h encoding.$(OBJEXT): {$(VPATH)}regenc.h encoding.$(OBJEXT): {$(VPATH)}ruby_assert.h encoding.$(OBJEXT): {$(VPATH)}st.h encoding.$(OBJEXT): {$(VPATH)}subst.h encoding.$(OBJEXT): {$(VPATH)}util.h +encoding.$(OBJEXT): {$(VPATH)}vm_debug.h +encoding.$(OBJEXT): {$(VPATH)}vm_sync.h enum.$(OBJEXT): $(hdrdir)/ruby.h enum.$(OBJEXT): $(hdrdir)/ruby/ruby.h enum.$(OBJEXT): $(top_srcdir)/internal/array.h diff --git a/encoding.c b/encoding.c index 7dab544..20bc257 100644 --- a/encoding.c +++ b/encoding.c @@ -26,6 +26,8 @@ https://github.com/ruby/ruby/blob/trunk/encoding.c#L26 #include "ruby/encoding.h" #include "ruby/util.h" #include "ruby_assert.h" +#include "ractor_pub.h" +#include "vm_sync.h" #ifndef ENC_DEBUG #define ENC_DEBUG 0 @@ -54,7 +56,10 @@ void rb_encdb_set_unicode(int index); https://github.com/ruby/ruby/blob/trunk/encoding.c#L56 static ID id_encoding; VALUE rb_cEncoding; -static VALUE rb_encoding_list; + +#define DEFAULT_ENCODING_LIST_CAPA 128 +static VALUE rb_default_encoding_list; +static VALUE rb_additional_encoding_list; struct rb_encoding_entry { const char *name; @@ -62,12 +67,27 @@ struct rb_encoding_entry { https://github.com/ruby/ruby/blob/trunk/encoding.c#L67 rb_encoding *base; }; -static struct { +static struct enc_table { struct rb_encoding_entry *list; int count; int size; st_table *names; -} enc_table; +} global_enc_table; + +static rb_encoding *global_enc_ascii, + *global_enc_utf_8, + *global_enc_us_ascii; + +#define GLOBAL_ENC_TABLE_ENTER(enc_table) struct enc_table *enc_table = &global_enc_table; RB_VM_LOCK_ENTER() +#define GLOBAL_ENC_TABLE_LEAVE() RB_VM_LOCK_LEAVE() +#define GLOBAL_ENC_TABLE_EVAL(enc_table, expr) do { \ + GLOBAL_ENC_TABLE_ENTER(enc_table); \ + { \ + expr; \ + } \ + GLOBAL_ENC_TABLE_LEAVE(); \ +} while (0) + #define ENC_DUMMY_FLAG (1<<24) #define ENC_INDEX_MASK (~(~0U<<24)) @@ -84,8 +104,6 @@ static struct { https://github.com/ruby/ruby/blob/trunk/encoding.c#L104 #define enc_autoload_p(enc) (!rb_enc_mbmaxlen(enc)) -static int load_encoding(const char *name); - static const rb_data_type_t encoding_data_type = { "encoding", {0, 0, 0,}, @@ -107,19 +125,63 @@ enc_new(rb_encoding *encoding) https://github.com/ruby/ruby/blob/trunk/encoding.c#L125 return TypedData_Wrap_Struct(rb_cEncoding, &encoding_data_type, (void *)encoding); } +static void +enc_list_update(int index, rb_raw_encoding *encoding) +{ + if (index < DEFAULT_ENCODING_LIST_CAPA) { + VALUE list = rb_default_encoding_list; + if (list && NIL_P(rb_ary_entry(list, index))) { + /* initialize encoding data */ + rb_ary_store(list, index, enc_new(encoding)); + } + } + else { + RB_VM_LOCK_ENTER(); + { + VALUE list = rb_additional_encoding_list; + if (list && NIL_P(rb_ary_entry(list, index))) { + /* initialize encoding data */ + rb_ary_store(list, index - DEFAULT_ENCODING_LIST_CAPA, enc_new(encoding)); + } + } + RB_VM_LOCK_LEAVE(); + } +} + static VALUE -rb_enc_from_encoding_index(int idx) +enc_list_lookup(int idx) { VALUE list, enc; - if (!(list = rb_encoding_list)) { - rb_bug("rb_enc_from_encoding_index(%d): no rb_encoding_list", idx); + if (idx < DEFAULT_ENCODING_LIST_CAPA) { + if (!(list = rb_default_encoding_list)) { + rb_bug("rb_enc_from_encoding_index(%d): no rb_default_encoding_list", idx); + } + enc = rb_ary_entry(list, idx); } - enc = rb_ary_entry(list, idx); + else { + RB_VM_LOCK_ENTER(); + { + if (!(list = rb_additional_encoding_list)) { + rb_bug("rb_enc_from_encoding_index(%d): no rb_additional_encoding_list", idx); + } + enc = rb_ary_entry(list, idx - DEFAULT_ENCODING_LIST_CAPA); + } + RB_VM_LOCK_LEAVE(); + } + if (NIL_P(enc)) { - rb_bug("rb_enc_from_encoding_index(%d): not created yet", idx); + rb_bug("rb_enc_from_encoding_index(%d): not created yet", idx); + } + else { + return enc; } - return enc; +} + +static VALUE +rb_enc_from_encoding_index(int idx) +{ + return enc_list_lookup(idx); } VALUE @@ -152,7 +214,7 @@ check_encoding(rb_encoding *enc) https://github.com/ruby/ruby/blob/trunk/encoding.c#L214 if (rb_enc_from_index(index) != enc) return -1; if (enc_autoload_p(enc)) { - index = enc_autoload(enc); + index = enc_autoload(enc); } return index; } @@ -269,26 +331,25 @@ rb_find_encoding(VALUE enc) https://github.com/ruby/ruby/blob/trunk/encoding.c#L331 } static int -enc_table_expand(int newsize) +enc_table_expand(struct enc_table *enc_table, int newsize) { struct rb_encoding_entry *ent; int count = newsize; - if (enc_table.size >= newsize) return newsize; + if (enc_table->size >= newsize) return newsize; newsize = (newsize + 7) / 8 * 8; - ent = REALLOC_N(enc_table.list, struct rb_encoding_entry, newsize); - memset(ent + enc_table.size, 0, sizeof(*ent)*(newsize - enc_table.size)); - enc_table.list = ent; - enc_table.size = newsize; + ent = REALLOC_N(enc_table->list, struct rb_encoding_entry, newsize); + memset(ent + enc_table->size, 0, sizeof(*ent)*(newsize - enc_table->size)); + enc_table->list = ent; + enc_table->size = newsize; return count; } static int -enc_register_at(int index, const char *name, rb_encoding *base_encoding) +enc_register_at(struct enc_table *enc_table, int index, const char *name, rb_encoding *base_encoding) { - struct rb_encoding_entry *ent = &enc_table.list[index]; + struct rb_encoding_entry *ent = &enc_table->list[index]; rb_raw_encoding *encoding; - VALUE list; if (!valid_encoding_name_p(name)) return -1; if (!ent->name) { @@ -310,76 +371,114 @@ enc_register_at(int index, const char *name, rb_encoding *base_encoding) https://github.com/ruby/ruby/blob/trunk/encoding.c#L371 encoding->name = name; encoding->ruby_encoding_index = index; ent->enc = encoding; - st_insert(enc_table.names, (st_data_t)name, (st_data_t)index); - list = rb_encoding_list; - if (list && NIL_P(rb_ary_entry(list, index))) { - /* initialize encoding data */ - rb_ary_store(list, index, enc_new(encoding)); - } + st_insert(enc_table->names, (st_data_t)name, (st_data_t)index); + + enc_list_update(index, encoding); return index; } static int -enc_register(const char *name, rb_encoding *encoding) +enc_register(struct enc_table *enc_table, const char *name, rb_encoding *encoding) { - int index = enc_table.count; + int index = enc_table->count; - if ((index = enc_table_expand(index + 1)) < 0) return -1; - enc_table.count = index; - return enc_register_at(index - 1, name, encoding); + if ((index = enc_table_expand(enc_table, index + 1)) < 0) return -1; + enc_table->count = index; + return enc_register_at(enc_table, index - 1, name, encoding); } static void set_encoding_const(const char *, rb_encoding *); -int rb_enc_registered(const char *name); +static int enc_registered(struct enc_table *enc_table, const char *name); + +static rb_encoding * +enc_from_index(struct enc_table *enc_table, int index) +{ + if (UNLIKELY(index < 0 || enc_table->count <= (index &= ENC_INDEX_MASK))) { + return 0; + } + return enc_table->list[index].enc; +} + +rb_encoding * +rb_enc_from_index(int index) +{ + rb_encoding *enc; + GLOBAL_ENC_TABLE_EVAL(enc_table, + enc = enc_from_index(enc_table, index)); + return enc; +} int rb_enc_register(const char *name, rb_encoding *encoding) { - int index = rb_enc_registere (... truncated) -- ML: ruby-changes@q... Info: http://www.atdot.net/~ko1/quickml/