ruby-changes:65620
From: Jean <ko1@a...>
Date: Mon, 22 Mar 2021 21:38:04 +0900 (JST)
Subject: [ruby-changes:65620] 7e8a9af9db (master): rb_enc_interned_str: handle autoloaded encodings
https://git.ruby-lang.org/ruby.git/commit/?id=7e8a9af9db From 7e8a9af9db42a21f6a1125a29e98c45ff9d5833b Mon Sep 17 00:00:00 2001 From: Jean Boussier <jean.boussier@g...> Date: Fri, 19 Mar 2021 11:29:06 +0100 Subject: rb_enc_interned_str: handle autoloaded encodings If called with an autoloaded encoding that was not yet initialized, `rb_enc_interned_str` would crash with a NULL pointer exception. See: https://github.com/ruby/ruby/pull/4119#issuecomment-800189841 --- encoding.c | 28 ++++++++++++---------------- ext/-test-/string/depend | 3 +++ ext/-test-/string/fstring.c | 15 +++++++++++++++ internal/encoding.h | 3 +++ string.c | 4 ++++ test/-ext-/string/test_fstring.rb | 16 ++++++++++++++++ 6 files changed, 53 insertions(+), 16 deletions(-) diff --git a/encoding.c b/encoding.c index 330be29..32d5a34 100644 --- a/encoding.c +++ b/encoding.c @@ -101,8 +101,6 @@ static rb_encoding *global_enc_ascii, https://github.com/ruby/ruby/blob/trunk/encoding.c#L101 #define ENCODING_NAMELEN_MAX 63 #define valid_encoding_name_p(name) ((name) && strlen(name) <= ENCODING_NAMELEN_MAX) -#define enc_autoload_p(enc) (!rb_enc_mbmaxlen(enc)) - static const rb_data_type_t encoding_data_type = { "encoding", {0, 0, 0,}, @@ -207,16 +205,14 @@ rb_enc_dummy_p(rb_encoding *enc) https://github.com/ruby/ruby/blob/trunk/encoding.c#L205 return ENC_DUMMY_P(enc) != 0; } -static int enc_autoload(rb_encoding *); - static int check_encoding(rb_encoding *enc) { int index = rb_enc_to_index(enc); if (rb_enc_from_index(index) != enc) return -1; - if (enc_autoload_p(enc)) { - index = enc_autoload(enc); + if (rb_enc_autoload_p(enc)) { + index = rb_enc_autoload(enc); } return index; } @@ -260,7 +256,7 @@ must_encindex(int index) https://github.com/ruby/ruby/blob/trunk/encoding.c#L256 rb_raise(rb_eEncodingError, "wrong encoding index %d for %s (expected %d)", index, rb_enc_name(enc), ENC_TO_ENCINDEX(enc)); } - if (enc_autoload_p(enc) && enc_autoload(enc) == -1) { + if (rb_enc_autoload_p(enc) && rb_enc_autoload(enc) == -1) { rb_loaderror("failed to load encoding (%s)", rb_enc_name(enc)); } @@ -444,7 +440,7 @@ rb_enc_register(const char *name, rb_encoding *encoding) https://github.com/ruby/ruby/blob/trunk/encoding.c#L440 if (STRCASECMP(name, rb_enc_name(oldenc))) { index = enc_register(enc_table, name, encoding); } - else if (enc_autoload_p(oldenc) || !ENC_DUMMY_P(oldenc)) { + else if (rb_enc_autoload_p(oldenc) || !ENC_DUMMY_P(oldenc)) { enc_register_at(enc_table, index, name, encoding); } else { @@ -834,7 +830,7 @@ load_encoding(const char *name) https://github.com/ruby/ruby/blob/trunk/encoding.c#L830 else if ((idx = enc_registered(enc_table, name)) < 0) { idx = -1; } - else if (enc_autoload_p(enc_table->list[idx].enc)) { + else if (rb_enc_autoload_p(enc_table->list[idx].enc)) { idx = -1; } } @@ -853,8 +849,8 @@ enc_autoload_body(struct enc_table *enc_table, rb_encoding *enc) https://github.com/ruby/ruby/blob/trunk/encoding.c#L849 do { if (i >= enc_table->count) return -1; } while (enc_table->list[i].enc != base && (++i, 1)); - if (enc_autoload_p(base)) { - if (enc_autoload(base) < 0) return -1; + if (rb_enc_autoload_p(base)) { + if (rb_enc_autoload(base) < 0) return -1; } i = enc->ruby_encoding_index; enc_register_at(enc_table, i & ENC_INDEX_MASK, rb_enc_name(enc), base); @@ -867,8 +863,8 @@ enc_autoload_body(struct enc_table *enc_table, rb_encoding *enc) https://github.com/ruby/ruby/blob/trunk/encoding.c#L863 } } -static int -enc_autoload(rb_encoding *enc) +int +rb_enc_autoload(rb_encoding *enc) { int i; GLOBAL_ENC_TABLE_EVAL(enc_table, i = enc_autoload_body(enc_table, enc)); @@ -895,8 +891,8 @@ rb_enc_find_index(const char *name) https://github.com/ruby/ruby/blob/trunk/encoding.c#L891 rb_raise(rb_eArgError, "encoding %s is not registered", name); } } - else if (enc_autoload_p(enc)) { - if (enc_autoload(enc) < 0) { + else if (rb_enc_autoload_p(enc)) { + if (rb_enc_autoload(enc) < 0) { rb_warn("failed to load encoding (%s); use ASCII-8BIT instead", name); return 0; @@ -1340,7 +1336,7 @@ enc_inspect(VALUE self) https://github.com/ruby/ruby/blob/trunk/encoding.c#L1336 "#<%"PRIsVALUE":%s%s%s>", rb_obj_class(self), rb_enc_name(enc), (ENC_DUMMY_P(enc) ? " (dummy)" : ""), - enc_autoload_p(enc) ? " (autoload)" : ""); + rb_enc_autoload_p(enc) ? " (autoload)" : ""); } /* diff --git a/ext/-test-/string/depend b/ext/-test-/string/depend index 67dfd22..7db4465 100644 --- a/ext/-test-/string/depend +++ b/ext/-test-/string/depend @@ -1000,6 +1000,7 @@ fstring.o: $(hdrdir)/ruby/backward/2/long_long.h https://github.com/ruby/ruby/blob/trunk/ext/-test-/string/depend#L1000 fstring.o: $(hdrdir)/ruby/backward/2/stdalign.h fstring.o: $(hdrdir)/ruby/backward/2/stdarg.h fstring.o: $(hdrdir)/ruby/defines.h +fstring.o: $(hdrdir)/ruby/encoding.h fstring.o: $(hdrdir)/ruby/intern.h fstring.o: $(hdrdir)/ruby/internal/anyargs.h fstring.o: $(hdrdir)/ruby/internal/arithmetic.h @@ -1142,6 +1143,8 @@ fstring.o: $(hdrdir)/ruby/internal/variable.h https://github.com/ruby/ruby/blob/trunk/ext/-test-/string/depend#L1143 fstring.o: $(hdrdir)/ruby/internal/warning_push.h fstring.o: $(hdrdir)/ruby/internal/xmalloc.h fstring.o: $(hdrdir)/ruby/missing.h +fstring.o: $(hdrdir)/ruby/onigmo.h +fstring.o: $(hdrdir)/ruby/oniguruma.h fstring.o: $(hdrdir)/ruby/ruby.h fstring.o: $(hdrdir)/ruby/st.h fstring.o: $(hdrdir)/ruby/subst.h diff --git a/ext/-test-/string/fstring.c b/ext/-test-/string/fstring.c index 30120b4..2374319 100644 --- a/ext/-test-/string/fstring.c +++ b/ext/-test-/string/fstring.c @@ -1,4 +1,5 @@ https://github.com/ruby/ruby/blob/trunk/ext/-test-/string/fstring.c#L1 #include "ruby.h" +#include "ruby/encoding.h" VALUE rb_fstring(VALUE str); @@ -8,8 +9,22 @@ bug_s_fstring(VALUE self, VALUE str) https://github.com/ruby/ruby/blob/trunk/ext/-test-/string/fstring.c#L9 return rb_fstring(str); } +VALUE +bug_s_rb_enc_interned_str(VALUE self, VALUE encoding) +{ + return rb_enc_interned_str("foo", 3, RDATA(encoding)->data); +} + +VALUE +bug_s_rb_enc_str_new(VALUE self, VALUE encoding) +{ + return rb_enc_str_new("foo", 3, RDATA(encoding)->data); +} + void Init_string_fstring(VALUE klass) { rb_define_singleton_method(klass, "fstring", bug_s_fstring, 1); + rb_define_singleton_method(klass, "rb_enc_interned_str", bug_s_rb_enc_interned_str, 1); + rb_define_singleton_method(klass, "rb_enc_str_new", bug_s_rb_enc_str_new, 1); } diff --git a/internal/encoding.h b/internal/encoding.h index af236da..c0cf061 100644 --- a/internal/encoding.h +++ b/internal/encoding.h @@ -12,12 +12,15 @@ https://github.com/ruby/ruby/blob/trunk/internal/encoding.h#L12 #include "ruby/ruby.h" /* for ID */ #include "ruby/encoding.h" /* for rb_encoding */ +#define rb_enc_autoload_p(enc) (!rb_enc_mbmaxlen(enc)) + /* encoding.c */ ID rb_id_encoding(void); rb_encoding *rb_enc_get_from_index(int index); rb_encoding *rb_enc_check_str(VALUE str1, VALUE str2); int rb_encdb_replicate(const char *alias, const char *orig); int rb_encdb_alias(const char *alias, const char *orig); +int rb_enc_autoload(rb_encoding *enc); int rb_encdb_dummy(const char *name); void rb_encdb_declare(const char *name); void rb_enc_set_base(const char *name, const char *orig); diff --git a/string.c b/string.c index 7605c22..90f6bca 100644 --- a/string.c +++ b/string.c @@ -11498,6 +11498,10 @@ rb_interned_str_cstr(const char *ptr) https://github.com/ruby/ruby/blob/trunk/string.c#L11498 VALUE rb_enc_interned_str(const char *ptr, long len, rb_encoding *enc) { + if (UNLIKELY(rb_enc_autoload_p(enc))) { + rb_enc_autoload(enc); + } + struct RString fake_str; return register_fstring(rb_setup_fake_str(&fake_str, ptr, len, enc), TRUE); } diff --git a/test/-ext-/string/test_fstring.rb b/test/-ext-/string/test_fstring.rb index 76afa30..9b4956e 100644 --- a/test/-ext-/string/test_fstring.rb +++ b/test/-ext-/string/test_fstring.rb @@ -12,6 +12,22 @@ class Test_String_Fstring < Test::Unit::TestCase https://github.com/ruby/ruby/blob/trunk/test/-ext-/string/test_fstring.rb#L12 yield fstr end + def test_rb_enc_interned_str_autoloaded_encoding + assert_separately([], <<~RUBY) + require '-test-/string' + assert_include(Encoding::Windows_31J.inspect, 'autoload') + Bug::String.rb_enc_interned_str(Encoding::Windows_31J) + RUBY + end + + def test_rb_enc_str_new_autoloaded_encoding + assert_separately([], <<~RUBY) + require '-test-/string' + assert_include(Encoding::Windows_31J.inspect, 'autoload') + Bug::String.rb_enc_str_new(Encoding::Windows_31J) + RUBY + end + def test_instance_variable str = __method__.to_s * 3 str.instance_variable_set(:@test, 42) -- cgit v1.1 -- ML: ruby-changes@q... Info: http://www.atdot.net/~ko1/quickml/