ruby-changes:68186
From: Nobuyoshi <ko1@a...>
Date: Fri, 1 Oct 2021 20:29:06 +0900 (JST)
Subject: [ruby-changes:68186] 842b0008c1 (master): Skip broken strings as the locale encoding
https://git.ruby-lang.org/ruby.git/commit/?id=842b0008c1 From 842b0008c132dd587f09766a228041afb7fed24f Mon Sep 17 00:00:00 2001 From: Nobuyoshi Nakada <nobu@r...> Date: Wed, 29 Sep 2021 19:59:31 +0900 Subject: Skip broken strings as the locale encoding --- internal/string.h | 1 + ruby.c | 11 +++++++---- string.c | 6 ++++++ 3 files changed, 14 insertions(+), 4 deletions(-) diff --git a/internal/string.h b/internal/string.h index 546a0ac9a7..d010669ca8 100644 --- a/internal/string.h +++ b/internal/string.h @@ -42,6 +42,7 @@ size_t rb_str_memsize(VALUE); https://github.com/ruby/ruby/blob/trunk/internal/string.h#L42 char *rb_str_to_cstr(VALUE str); const char *ruby_escaped_char(int c); void rb_str_make_independent(VALUE str); +int rb_enc_str_coderange_scan(VALUE str, rb_encoding *enc); static inline bool STR_EMBED_P(VALUE str); static inline bool STR_SHARED_P(VALUE str); diff --git a/ruby.c b/ruby.c index 3358068bbb..818161710c 100644 --- a/ruby.c +++ b/ruby.c @@ -1680,7 +1680,11 @@ tty_enabled(void) https://github.com/ruby/ruby/blob/trunk/ruby.c#L1680 static VALUE copy_str(VALUE str, rb_encoding *enc, bool intern) { - if (!intern) return rb_enc_associate(rb_str_dup(str), enc); + if (!intern) { + if (rb_enc_str_coderange_scan(str, enc) == ENC_CODERANGE_BROKEN) + return 0; + return rb_enc_associate(rb_str_dup(str), enc); + } return rb_enc_interned_str(RSTRING_PTR(str), RSTRING_LEN(str), enc); } @@ -1916,7 +1920,7 @@ process_options(int argc, char **argv, ruby_cmdline_options_t *opt) https://github.com/ruby/ruby/blob/trunk/ruby.c#L1920 if (newpath == path) continue; path = newpath; #else - path = copy_str(path, lenc, !mark); + if (!(path = copy_str(path, lenc, !mark))) continue; #endif if (mark) rb_ivar_set(path, id_initial_load_path_mark, path); if (!modifiable) { @@ -1934,8 +1938,7 @@ process_options(int argc, char **argv, ruby_cmdline_options_t *opt) https://github.com/ruby/ruby/blob/trunk/ruby.c#L1938 bool modified = false; for (long i = loaded_before_enc; i < RARRAY_LEN(loaded_features); ++i) { VALUE path = RARRAY_AREF(loaded_features, i); - if (rb_enc_get(path) == IF_UTF8_PATH(uenc, lenc)) continue; - path = copy_str(path, IF_UTF8_PATH(uenc, lenc), true); + if (!(path = copy_str(path, IF_UTF8_PATH(uenc, lenc), true))) continue; modified = true; RARRAY_ASET(loaded_features, i, path); } diff --git a/string.c b/string.c index 299d506004..78e2ba923f 100644 --- a/string.c +++ b/string.c @@ -724,6 +724,12 @@ enc_coderange_scan(VALUE str, rb_encoding *enc, int encidx) https://github.com/ruby/ruby/blob/trunk/string.c#L724 } } +int +rb_enc_str_coderange_scan(VALUE str, rb_encoding *enc) +{ + return enc_coderange_scan(str, enc, rb_enc_to_index(enc)); +} + int rb_enc_str_coderange(VALUE str) { -- cgit v1.2.1 -- ML: ruby-changes@q... Info: http://www.atdot.net/~ko1/quickml/