[前][次][番号順一覧][スレッド一覧]

ruby-changes:63342

From: Koichi <ko1@a...>
Date: Wed, 14 Oct 2020 14:02:49 +0900 (JST)
Subject: [ruby-changes:63342] 11c2f0f36c (master): sync enc_table and rb_encoding_list

https://git.ruby-lang.org/ruby.git/commit/?id=11c2f0f36c

From 11c2f0f36ccc50899a8dd69a260e85451f68b5ba Mon Sep 17 00:00:00 2001
From: Koichi Sasada <ko1@a...>
Date: Tue, 13 Oct 2020 13:16:08 +0900
Subject: sync enc_table and rb_encoding_list

enc_table which manages Encoding information. rb_encoding_list
also manages Encoding objects. Both are accessed/modified by ractors
simultaneously so that they should be synchronized.

For enc_table, this patch introduced GLOBAL_ENC_TABLE_ENTER/LEAVE/EVAL
to access this table with VM lock. To make shortcut, three new global
variables global_enc_ascii, global_enc_utf_8, global_enc_us_ascii are
also introduced.

For rb_encoding_list, we split it to rb_default_encoding_list (256 entries)
and rb_additional_encoding_list. rb_default_encoding_list is fixed sized Array
so we don't need to synchronized (and most of apps only needs it). To manage
257 or more encoding objects, they are stored into rb_additional_encoding_list.
To access rb_additional_encoding_list., VM lock is needed.

diff --git a/bootstraptest/test_ractor.rb b/bootstraptest/test_ractor.rb
index 9e90d95..05139e2 100644
--- a/bootstraptest/test_ractor.rb
+++ b/bootstraptest/test_ractor.rb
@@ -780,4 +780,18 @@ assert_equal "#{N}#{N}", %Q{ https://github.com/ruby/ruby/blob/trunk/bootstraptest/test_ractor.rb#L780
   }.map{|r| r.take}.join
 }
 
+assert_equal "#{N/10}", %Q{
+  Ractor.new do
+    loop do
+      Encoding.find("test-enc-#{rand(5_000)}").inspect
+    rescue ArgumentError => e
+    end
+  end
+
+  src = Encoding.find("UTF-8")
+  #{N/10}.times{|i|
+    src.replicate("test-enc-\#{i}")
+  }
+}
+
 end # if !ENV['GITHUB_WORKFLOW']
diff --git a/common.mk b/common.mk
index 509f09c..e7b8f53 100644
--- a/common.mk
+++ b/common.mk
@@ -4617,13 +4617,17 @@ encoding.$(OBJEXT): {$(VPATH)}internal/variable.h https://github.com/ruby/ruby/blob/trunk/common.mk#L4617
 encoding.$(OBJEXT): {$(VPATH)}internal/warning_push.h
 encoding.$(OBJEXT): {$(VPATH)}internal/xmalloc.h
 encoding.$(OBJEXT): {$(VPATH)}missing.h
+encoding.$(OBJEXT): {$(VPATH)}node.h
 encoding.$(OBJEXT): {$(VPATH)}onigmo.h
 encoding.$(OBJEXT): {$(VPATH)}oniguruma.h
+encoding.$(OBJEXT): {$(VPATH)}ractor_pub.h
 encoding.$(OBJEXT): {$(VPATH)}regenc.h
 encoding.$(OBJEXT): {$(VPATH)}ruby_assert.h
 encoding.$(OBJEXT): {$(VPATH)}st.h
 encoding.$(OBJEXT): {$(VPATH)}subst.h
 encoding.$(OBJEXT): {$(VPATH)}util.h
+encoding.$(OBJEXT): {$(VPATH)}vm_debug.h
+encoding.$(OBJEXT): {$(VPATH)}vm_sync.h
 enum.$(OBJEXT): $(hdrdir)/ruby.h
 enum.$(OBJEXT): $(hdrdir)/ruby/ruby.h
 enum.$(OBJEXT): $(top_srcdir)/internal/array.h
diff --git a/encoding.c b/encoding.c
index 7dab544..20bc257 100644
--- a/encoding.c
+++ b/encoding.c
@@ -26,6 +26,8 @@ https://github.com/ruby/ruby/blob/trunk/encoding.c#L26
 #include "ruby/encoding.h"
 #include "ruby/util.h"
 #include "ruby_assert.h"
+#include "ractor_pub.h"
+#include "vm_sync.h"
 
 #ifndef ENC_DEBUG
 #define ENC_DEBUG 0
@@ -54,7 +56,10 @@ void rb_encdb_set_unicode(int index); https://github.com/ruby/ruby/blob/trunk/encoding.c#L56
 
 static ID id_encoding;
 VALUE rb_cEncoding;
-static VALUE rb_encoding_list;
+
+#define DEFAULT_ENCODING_LIST_CAPA 128
+static VALUE rb_default_encoding_list;
+static VALUE rb_additional_encoding_list;
 
 struct rb_encoding_entry {
     const char *name;
@@ -62,12 +67,27 @@ struct rb_encoding_entry { https://github.com/ruby/ruby/blob/trunk/encoding.c#L67
     rb_encoding *base;
 };
 
-static struct {
+static struct enc_table {
     struct rb_encoding_entry *list;
     int count;
     int size;
     st_table *names;
-} enc_table;
+} global_enc_table;
+
+static rb_encoding *global_enc_ascii,
+                   *global_enc_utf_8,
+                   *global_enc_us_ascii;
+
+#define GLOBAL_ENC_TABLE_ENTER(enc_table) struct enc_table *enc_table = &global_enc_table; RB_VM_LOCK_ENTER()
+#define GLOBAL_ENC_TABLE_LEAVE()                                                           RB_VM_LOCK_LEAVE()
+#define GLOBAL_ENC_TABLE_EVAL(enc_table, expr) do { \
+    GLOBAL_ENC_TABLE_ENTER(enc_table); \
+    { \
+        expr; \
+    } \
+    GLOBAL_ENC_TABLE_LEAVE(); \
+} while (0)
+
 
 #define ENC_DUMMY_FLAG (1<<24)
 #define ENC_INDEX_MASK (~(~0U<<24))
@@ -84,8 +104,6 @@ static struct { https://github.com/ruby/ruby/blob/trunk/encoding.c#L104
 
 #define enc_autoload_p(enc) (!rb_enc_mbmaxlen(enc))
 
-static int load_encoding(const char *name);
-
 static const rb_data_type_t encoding_data_type = {
     "encoding",
     {0, 0, 0,},
@@ -107,19 +125,63 @@ enc_new(rb_encoding *encoding) https://github.com/ruby/ruby/blob/trunk/encoding.c#L125
     return TypedData_Wrap_Struct(rb_cEncoding, &encoding_data_type, (void *)encoding);
 }
 
+static void
+enc_list_update(int index, rb_raw_encoding *encoding)
+{
+    if (index < DEFAULT_ENCODING_LIST_CAPA) {
+        VALUE list = rb_default_encoding_list;
+        if (list && NIL_P(rb_ary_entry(list, index))) {
+            /* initialize encoding data */
+            rb_ary_store(list, index, enc_new(encoding));
+        }
+    }
+    else {
+        RB_VM_LOCK_ENTER();
+        {
+            VALUE list = rb_additional_encoding_list;
+            if (list && NIL_P(rb_ary_entry(list, index))) {
+                /* initialize encoding data */
+                rb_ary_store(list, index - DEFAULT_ENCODING_LIST_CAPA, enc_new(encoding));
+            }
+        }
+        RB_VM_LOCK_LEAVE();
+    }
+}
+
 static VALUE
-rb_enc_from_encoding_index(int idx)
+enc_list_lookup(int idx)
 {
     VALUE list, enc;
 
-    if (!(list = rb_encoding_list)) {
-	rb_bug("rb_enc_from_encoding_index(%d): no rb_encoding_list", idx);
+    if (idx < DEFAULT_ENCODING_LIST_CAPA) {
+        if (!(list = rb_default_encoding_list)) {
+            rb_bug("rb_enc_from_encoding_index(%d): no rb_default_encoding_list", idx);
+        }
+        enc = rb_ary_entry(list, idx);
     }
-    enc = rb_ary_entry(list, idx);
+    else {
+        RB_VM_LOCK_ENTER();
+        {
+            if (!(list = rb_additional_encoding_list)) {
+                rb_bug("rb_enc_from_encoding_index(%d): no rb_additional_encoding_list", idx);
+            }
+            enc = rb_ary_entry(list, idx - DEFAULT_ENCODING_LIST_CAPA);
+        }
+        RB_VM_LOCK_LEAVE();
+    }
+
     if (NIL_P(enc)) {
-	rb_bug("rb_enc_from_encoding_index(%d): not created yet", idx);
+        rb_bug("rb_enc_from_encoding_index(%d): not created yet", idx);
+    }
+    else {
+        return enc;
     }
-    return enc;
+}
+
+static VALUE
+rb_enc_from_encoding_index(int idx)
+{
+    return enc_list_lookup(idx);
 }
 
 VALUE
@@ -152,7 +214,7 @@ check_encoding(rb_encoding *enc) https://github.com/ruby/ruby/blob/trunk/encoding.c#L214
     if (rb_enc_from_index(index) != enc)
 	return -1;
     if (enc_autoload_p(enc)) {
-	index = enc_autoload(enc);
+        index = enc_autoload(enc);
     }
     return index;
 }
@@ -269,26 +331,25 @@ rb_find_encoding(VALUE enc) https://github.com/ruby/ruby/blob/trunk/encoding.c#L331
 }
 
 static int
-enc_table_expand(int newsize)
+enc_table_expand(struct enc_table *enc_table, int newsize)
 {
     struct rb_encoding_entry *ent;
     int count = newsize;
 
-    if (enc_table.size >= newsize) return newsize;
+    if (enc_table->size >= newsize) return newsize;
     newsize = (newsize + 7) / 8 * 8;
-    ent = REALLOC_N(enc_table.list, struct rb_encoding_entry, newsize);
-    memset(ent + enc_table.size, 0, sizeof(*ent)*(newsize - enc_table.size));
-    enc_table.list = ent;
-    enc_table.size = newsize;
+    ent = REALLOC_N(enc_table->list, struct rb_encoding_entry, newsize);
+    memset(ent + enc_table->size, 0, sizeof(*ent)*(newsize - enc_table->size));
+    enc_table->list = ent;
+    enc_table->size = newsize;
     return count;
 }
 
 static int
-enc_register_at(int index, const char *name, rb_encoding *base_encoding)
+enc_register_at(struct enc_table *enc_table, int index, const char *name, rb_encoding *base_encoding)
 {
-    struct rb_encoding_entry *ent = &enc_table.list[index];
+    struct rb_encoding_entry *ent = &enc_table->list[index];
     rb_raw_encoding *encoding;
-    VALUE list;
 
     if (!valid_encoding_name_p(name)) return -1;
     if (!ent->name) {
@@ -310,76 +371,114 @@ enc_register_at(int index, const char *name, rb_encoding *base_encoding) https://github.com/ruby/ruby/blob/trunk/encoding.c#L371
     encoding->name = name;
     encoding->ruby_encoding_index = index;
     ent->enc = encoding;
-    st_insert(enc_table.names, (st_data_t)name, (st_data_t)index);
-    list = rb_encoding_list;
-    if (list && NIL_P(rb_ary_entry(list, index))) {
-	/* initialize encoding data */
-	rb_ary_store(list, index, enc_new(encoding));
-    }
+    st_insert(enc_table->names, (st_data_t)name, (st_data_t)index);
+
+    enc_list_update(index, encoding);
     return index;
 }
 
 static int
-enc_register(const char *name, rb_encoding *encoding)
+enc_register(struct enc_table *enc_table, const char *name, rb_encoding *encoding)
 {
-    int index = enc_table.count;
+    int index = enc_table->count;
 
-    if ((index = enc_table_expand(index + 1)) < 0) return -1;
-    enc_table.count = index;
-    return enc_register_at(index - 1, name, encoding);
+    if ((index = enc_table_expand(enc_table, index + 1)) < 0) return -1;
+    enc_table->count = index;
+    return enc_register_at(enc_table, index - 1, name, encoding);
 }
 
 static void set_encoding_const(const char *, rb_encoding *);
-int rb_enc_registered(const char *name);
+static int enc_registered(struct enc_table *enc_table, const char *name);
+
+static rb_encoding *
+enc_from_index(struct enc_table *enc_table, int index)
+{
+    if (UNLIKELY(index < 0 || enc_table->count <= (index &= ENC_INDEX_MASK))) {
+	return 0;
+    }
+    return enc_table->list[index].enc;
+}
+
+rb_encoding *
+rb_enc_from_index(int index)
+{
+    rb_encoding *enc;
+    GLOBAL_ENC_TABLE_EVAL(enc_table,
+                          enc = enc_from_index(enc_table, index));
+    return enc;
+}
 
 int
 rb_enc_register(const char *name, rb_encoding *encoding)
 {
-    int index = rb_enc_registere (... truncated)

--
ML: ruby-changes@q...
Info: http://www.atdot.net/~ko1/quickml/

[前][次][番号順一覧][スレッド一覧]