[前][次][番号順一覧][スレッド一覧]

ruby-changes:65620

From: Jean <ko1@a...>
Date: Mon, 22 Mar 2021 21:38:04 +0900 (JST)
Subject: [ruby-changes:65620] 7e8a9af9db (master): rb_enc_interned_str: handle autoloaded encodings

https://git.ruby-lang.org/ruby.git/commit/?id=7e8a9af9db

From 7e8a9af9db42a21f6a1125a29e98c45ff9d5833b Mon Sep 17 00:00:00 2001
From: Jean Boussier <jean.boussier@g...>
Date: Fri, 19 Mar 2021 11:29:06 +0100
Subject: rb_enc_interned_str: handle autoloaded encodings

If called with an autoloaded encoding that was not yet
initialized, `rb_enc_interned_str` would crash with
a NULL pointer exception.

See: https://github.com/ruby/ruby/pull/4119#issuecomment-800189841
---
 encoding.c                        | 28 ++++++++++++----------------
 ext/-test-/string/depend          |  3 +++
 ext/-test-/string/fstring.c       | 15 +++++++++++++++
 internal/encoding.h               |  3 +++
 string.c                          |  4 ++++
 test/-ext-/string/test_fstring.rb | 16 ++++++++++++++++
 6 files changed, 53 insertions(+), 16 deletions(-)

diff --git a/encoding.c b/encoding.c
index 330be29..32d5a34 100644
--- a/encoding.c
+++ b/encoding.c
@@ -101,8 +101,6 @@ static rb_encoding *global_enc_ascii, https://github.com/ruby/ruby/blob/trunk/encoding.c#L101
 #define ENCODING_NAMELEN_MAX 63
 #define valid_encoding_name_p(name) ((name) && strlen(name) <= ENCODING_NAMELEN_MAX)
 
-#define enc_autoload_p(enc) (!rb_enc_mbmaxlen(enc))
-
 static const rb_data_type_t encoding_data_type = {
     "encoding",
     {0, 0, 0,},
@@ -207,16 +205,14 @@ rb_enc_dummy_p(rb_encoding *enc) https://github.com/ruby/ruby/blob/trunk/encoding.c#L205
     return ENC_DUMMY_P(enc) != 0;
 }
 
-static int enc_autoload(rb_encoding *);
-
 static int
 check_encoding(rb_encoding *enc)
 {
     int index = rb_enc_to_index(enc);
     if (rb_enc_from_index(index) != enc)
 	return -1;
-    if (enc_autoload_p(enc)) {
-        index = enc_autoload(enc);
+    if (rb_enc_autoload_p(enc)) {
+        index = rb_enc_autoload(enc);
     }
     return index;
 }
@@ -260,7 +256,7 @@ must_encindex(int index) https://github.com/ruby/ruby/blob/trunk/encoding.c#L256
 	rb_raise(rb_eEncodingError, "wrong encoding index %d for %s (expected %d)",
 		 index, rb_enc_name(enc), ENC_TO_ENCINDEX(enc));
     }
-    if (enc_autoload_p(enc) && enc_autoload(enc) == -1) {
+    if (rb_enc_autoload_p(enc) && rb_enc_autoload(enc) == -1) {
 	rb_loaderror("failed to load encoding (%s)",
 		     rb_enc_name(enc));
     }
@@ -444,7 +440,7 @@ rb_enc_register(const char *name, rb_encoding *encoding) https://github.com/ruby/ruby/blob/trunk/encoding.c#L440
             if (STRCASECMP(name, rb_enc_name(oldenc))) {
                 index = enc_register(enc_table, name, encoding);
             }
-            else if (enc_autoload_p(oldenc) || !ENC_DUMMY_P(oldenc)) {
+            else if (rb_enc_autoload_p(oldenc) || !ENC_DUMMY_P(oldenc)) {
                 enc_register_at(enc_table, index, name, encoding);
             }
             else {
@@ -834,7 +830,7 @@ load_encoding(const char *name) https://github.com/ruby/ruby/blob/trunk/encoding.c#L830
         else if ((idx = enc_registered(enc_table, name)) < 0) {
             idx = -1;
         }
-        else if (enc_autoload_p(enc_table->list[idx].enc)) {
+        else if (rb_enc_autoload_p(enc_table->list[idx].enc)) {
             idx = -1;
         }
     }
@@ -853,8 +849,8 @@ enc_autoload_body(struct enc_table *enc_table, rb_encoding *enc) https://github.com/ruby/ruby/blob/trunk/encoding.c#L849
 	do {
 	    if (i >= enc_table->count) return -1;
 	} while (enc_table->list[i].enc != base && (++i, 1));
-	if (enc_autoload_p(base)) {
-	    if (enc_autoload(base) < 0) return -1;
+	if (rb_enc_autoload_p(base)) {
+	    if (rb_enc_autoload(base) < 0) return -1;
 	}
 	i = enc->ruby_encoding_index;
 	enc_register_at(enc_table, i & ENC_INDEX_MASK, rb_enc_name(enc), base);
@@ -867,8 +863,8 @@ enc_autoload_body(struct enc_table *enc_table, rb_encoding *enc) https://github.com/ruby/ruby/blob/trunk/encoding.c#L863
     }
 }
 
-static int
-enc_autoload(rb_encoding *enc)
+int
+rb_enc_autoload(rb_encoding *enc)
 {
     int i;
     GLOBAL_ENC_TABLE_EVAL(enc_table, i = enc_autoload_body(enc_table, enc));
@@ -895,8 +891,8 @@ rb_enc_find_index(const char *name) https://github.com/ruby/ruby/blob/trunk/encoding.c#L891
 	    rb_raise(rb_eArgError, "encoding %s is not registered", name);
 	}
     }
-    else if (enc_autoload_p(enc)) {
-	if (enc_autoload(enc) < 0) {
+    else if (rb_enc_autoload_p(enc)) {
+	if (rb_enc_autoload(enc) < 0) {
 	    rb_warn("failed to load encoding (%s); use ASCII-8BIT instead",
 		    name);
 	    return 0;
@@ -1340,7 +1336,7 @@ enc_inspect(VALUE self) https://github.com/ruby/ruby/blob/trunk/encoding.c#L1336
 			  "#<%"PRIsVALUE":%s%s%s>", rb_obj_class(self),
 			  rb_enc_name(enc),
 			  (ENC_DUMMY_P(enc) ? " (dummy)" : ""),
-			  enc_autoload_p(enc) ? " (autoload)" : "");
+			  rb_enc_autoload_p(enc) ? " (autoload)" : "");
 }
 
 /*
diff --git a/ext/-test-/string/depend b/ext/-test-/string/depend
index 67dfd22..7db4465 100644
--- a/ext/-test-/string/depend
+++ b/ext/-test-/string/depend
@@ -1000,6 +1000,7 @@ fstring.o: $(hdrdir)/ruby/backward/2/long_long.h https://github.com/ruby/ruby/blob/trunk/ext/-test-/string/depend#L1000
 fstring.o: $(hdrdir)/ruby/backward/2/stdalign.h
 fstring.o: $(hdrdir)/ruby/backward/2/stdarg.h
 fstring.o: $(hdrdir)/ruby/defines.h
+fstring.o: $(hdrdir)/ruby/encoding.h
 fstring.o: $(hdrdir)/ruby/intern.h
 fstring.o: $(hdrdir)/ruby/internal/anyargs.h
 fstring.o: $(hdrdir)/ruby/internal/arithmetic.h
@@ -1142,6 +1143,8 @@ fstring.o: $(hdrdir)/ruby/internal/variable.h https://github.com/ruby/ruby/blob/trunk/ext/-test-/string/depend#L1143
 fstring.o: $(hdrdir)/ruby/internal/warning_push.h
 fstring.o: $(hdrdir)/ruby/internal/xmalloc.h
 fstring.o: $(hdrdir)/ruby/missing.h
+fstring.o: $(hdrdir)/ruby/onigmo.h
+fstring.o: $(hdrdir)/ruby/oniguruma.h
 fstring.o: $(hdrdir)/ruby/ruby.h
 fstring.o: $(hdrdir)/ruby/st.h
 fstring.o: $(hdrdir)/ruby/subst.h
diff --git a/ext/-test-/string/fstring.c b/ext/-test-/string/fstring.c
index 30120b4..2374319 100644
--- a/ext/-test-/string/fstring.c
+++ b/ext/-test-/string/fstring.c
@@ -1,4 +1,5 @@ https://github.com/ruby/ruby/blob/trunk/ext/-test-/string/fstring.c#L1
 #include "ruby.h"
+#include "ruby/encoding.h"
 
 VALUE rb_fstring(VALUE str);
 
@@ -8,8 +9,22 @@ bug_s_fstring(VALUE self, VALUE str) https://github.com/ruby/ruby/blob/trunk/ext/-test-/string/fstring.c#L9
     return rb_fstring(str);
 }
 
+VALUE
+bug_s_rb_enc_interned_str(VALUE self, VALUE encoding)
+{
+    return rb_enc_interned_str("foo", 3, RDATA(encoding)->data);
+}
+
+VALUE
+bug_s_rb_enc_str_new(VALUE self, VALUE encoding)
+{
+    return rb_enc_str_new("foo", 3, RDATA(encoding)->data);
+}
+
 void
 Init_string_fstring(VALUE klass)
 {
     rb_define_singleton_method(klass, "fstring", bug_s_fstring, 1);
+    rb_define_singleton_method(klass, "rb_enc_interned_str", bug_s_rb_enc_interned_str, 1);
+    rb_define_singleton_method(klass, "rb_enc_str_new", bug_s_rb_enc_str_new, 1);
 }
diff --git a/internal/encoding.h b/internal/encoding.h
index af236da..c0cf061 100644
--- a/internal/encoding.h
+++ b/internal/encoding.h
@@ -12,12 +12,15 @@ https://github.com/ruby/ruby/blob/trunk/internal/encoding.h#L12
 #include "ruby/ruby.h"          /* for ID */
 #include "ruby/encoding.h"      /* for rb_encoding */
 
+#define rb_enc_autoload_p(enc) (!rb_enc_mbmaxlen(enc))
+
 /* encoding.c */
 ID rb_id_encoding(void);
 rb_encoding *rb_enc_get_from_index(int index);
 rb_encoding *rb_enc_check_str(VALUE str1, VALUE str2);
 int rb_encdb_replicate(const char *alias, const char *orig);
 int rb_encdb_alias(const char *alias, const char *orig);
+int rb_enc_autoload(rb_encoding *enc);
 int rb_encdb_dummy(const char *name);
 void rb_encdb_declare(const char *name);
 void rb_enc_set_base(const char *name, const char *orig);
diff --git a/string.c b/string.c
index 7605c22..90f6bca 100644
--- a/string.c
+++ b/string.c
@@ -11498,6 +11498,10 @@ rb_interned_str_cstr(const char *ptr) https://github.com/ruby/ruby/blob/trunk/string.c#L11498
 VALUE
 rb_enc_interned_str(const char *ptr, long len, rb_encoding *enc)
 {
+    if (UNLIKELY(rb_enc_autoload_p(enc))) {
+        rb_enc_autoload(enc);
+    }
+
     struct RString fake_str;
     return register_fstring(rb_setup_fake_str(&fake_str, ptr, len, enc), TRUE);
 }
diff --git a/test/-ext-/string/test_fstring.rb b/test/-ext-/string/test_fstring.rb
index 76afa30..9b4956e 100644
--- a/test/-ext-/string/test_fstring.rb
+++ b/test/-ext-/string/test_fstring.rb
@@ -12,6 +12,22 @@ class Test_String_Fstring < Test::Unit::TestCase https://github.com/ruby/ruby/blob/trunk/test/-ext-/string/test_fstring.rb#L12
     yield fstr
   end
 
+  def test_rb_enc_interned_str_autoloaded_encoding
+    assert_separately([], <<~RUBY)
+      require '-test-/string'
+      assert_include(Encoding::Windows_31J.inspect, 'autoload')
+      Bug::String.rb_enc_interned_str(Encoding::Windows_31J)
+    RUBY
+  end
+
+  def test_rb_enc_str_new_autoloaded_encoding
+    assert_separately([], <<~RUBY)
+      require '-test-/string'
+      assert_include(Encoding::Windows_31J.inspect, 'autoload')
+      Bug::String.rb_enc_str_new(Encoding::Windows_31J)
+    RUBY
+  end
+
   def test_instance_variable
     str = __method__.to_s * 3
     str.instance_variable_set(:@test, 42)
-- 
cgit v1.1


--
ML: ruby-changes@q...
Info: http://www.atdot.net/~ko1/quickml/

[前][次][番号順一覧][スレッド一覧]