[前][次][番号順一覧][スレッド一覧]

ruby-changes:73342

From: Nobuyoshi <ko1@a...>
Date: Wed, 31 Aug 2022 17:28:26 +0900 (JST)
Subject: [ruby-changes:73342] 576bdec03f (master): [Bug #18973] Promote US-ASCII to ASCII-8BIT when adding 8-bit char

https://git.ruby-lang.org/ruby.git/commit/?id=576bdec03f

From 576bdec03f0d58847690a0607c788ada433ce60f Mon Sep 17 00:00:00 2001
From: Nobuyoshi Nakada <nobu@r...>
Date: Tue, 30 Aug 2022 18:12:08 +0900
Subject: [Bug #18973] Promote US-ASCII to ASCII-8BIT when adding 8-bit char

---
 internal/string.h         |  1 +
 sprintf.c                 | 13 +++++++++----
 string.c                  | 32 ++++++++++++++++++++++++--------
 test/ruby/test_sprintf.rb |  3 +++
 4 files changed, 37 insertions(+), 12 deletions(-)

diff --git a/internal/string.h b/internal/string.h
index 8fb9553d03..46862d77f5 100644
--- a/internal/string.h
+++ b/internal/string.h
@@ -43,6 +43,7 @@ char *rb_str_to_cstr(VALUE str); https://github.com/ruby/ruby/blob/trunk/internal/string.h#L43
 const char *ruby_escaped_char(int c);
 void rb_str_make_independent(VALUE str);
 int rb_enc_str_coderange_scan(VALUE str, rb_encoding *enc);
+int rb_ascii8bit_appendable_encoding_index(rb_encoding *enc, unsigned int code);
 
 static inline bool STR_EMBED_P(VALUE str);
 static inline bool STR_SHARED_P(VALUE str);
diff --git a/sprintf.c b/sprintf.c
index b2bdd4a072..bfe25e1d3c 100644
--- a/sprintf.c
+++ b/sprintf.c
@@ -454,13 +454,18 @@ rb_str_format(int argc, const VALUE *argv, VALUE fmt) https://github.com/ruby/ruby/blob/trunk/sprintf.c#L454
                     str = tmp;
                     goto format_s1;
                 }
-                else {
-                    n = NUM2INT(val);
-                    if (n >= 0) n = rb_enc_codelen((c = n), enc);
-                }
+                n = NUM2INT(val);
+                if (n >= 0) n = rb_enc_codelen((c = n), enc);
                 if (n <= 0) {
                     rb_raise(rb_eArgError, "invalid character");
                 }
+                int encidx = rb_ascii8bit_appendable_encoding_index(enc, c);
+                if (encidx >= 0 && encidx != rb_enc_to_index(enc)) {
+                    /* special case */
+                    rb_enc_associate_index(result, encidx);
+                    enc = rb_enc_from_index(encidx);
+                    coderange = ENC_CODERANGE_VALID;
+                }
                 if (!(flags & FWIDTH)) {
                     CHECK(n);
                     rb_enc_mbcput(c, &buf[blen], enc);
diff --git a/string.c b/string.c
index 564812ae51..951aeca6dd 100644
--- a/string.c
+++ b/string.c
@@ -3481,17 +3481,13 @@ rb_str_concat(VALUE str1, VALUE str2) https://github.com/ruby/ruby/blob/trunk/string.c#L3481
         return rb_str_append(str1, str2);
     }
 
-    encidx = rb_enc_to_index(enc);
-    if (encidx == ENCINDEX_ASCII_8BIT || encidx == ENCINDEX_US_ASCII) {
-        /* US-ASCII automatically extended to ASCII-8BIT */
+    encidx = rb_ascii8bit_appendable_encoding_index(enc, code);
+    if (encidx >= 0) {
         char buf[1];
         buf[0] = (char)code;
-        if (code > 0xFF) {
-            rb_raise(rb_eRangeError, "%u out of char range", code);
-        }
         rb_str_cat(str1, buf, 1);
-        if (encidx == ENCINDEX_US_ASCII && code > 127) {
-            rb_enc_associate_index(str1, ENCINDEX_ASCII_8BIT);
+        if (encidx != rb_enc_to_index(enc)) {
+            rb_enc_associate_index(str1, encidx);
             ENC_CODERANGE_SET(str1, ENC_CODERANGE_VALID);
         }
     }
@@ -3524,6 +3520,26 @@ rb_str_concat(VALUE str1, VALUE str2) https://github.com/ruby/ruby/blob/trunk/string.c#L3520
     return str1;
 }
 
+int
+rb_ascii8bit_appendable_encoding_index(rb_encoding *enc, unsigned int code)
+{
+    int encidx = rb_enc_to_index(enc);
+
+    if (encidx == ENCINDEX_ASCII_8BIT || encidx == ENCINDEX_US_ASCII) {
+        /* US-ASCII automatically extended to ASCII-8BIT */
+        if (code > 0xFF) {
+            rb_raise(rb_eRangeError, "%u out of char range", code);
+        }
+        if (encidx == ENCINDEX_US_ASCII && code > 127) {
+            return ENCINDEX_ASCII_8BIT;
+        }
+        return encidx;
+    }
+    else {
+        return -1;
+    }
+}
+
 /*
  *  call-seq:
  *    prepend(*other_strings)  -> string
diff --git a/test/ruby/test_sprintf.rb b/test/ruby/test_sprintf.rb
index 803399fdb3..c453ecd350 100644
--- a/test/ruby/test_sprintf.rb
+++ b/test/ruby/test_sprintf.rb
@@ -369,6 +369,9 @@ class TestSprintf < Test::Unit::TestCase https://github.com/ruby/ruby/blob/trunk/test/ruby/test_sprintf.rb#L369
     assert_equal(" " * BSIZ + "a", sprintf("%#{ BSIZ + 1 }c", ?a))
     assert_equal("a" + " " * BSIZ, sprintf("%-#{ BSIZ + 1 }c", ?a))
     assert_raise(ArgumentError) { sprintf("%c", -1) }
+    s = sprintf("%c".encode(Encoding::US_ASCII), 0x80)
+    assert_equal("\x80".b, s)
+    assert_predicate(s, :valid_encoding?)
   end
 
   def test_string
-- 
cgit v1.2.1


--
ML: ruby-changes@q...
Info: http://www.atdot.net/~ko1/quickml/

[前][次][番号順一覧][スレッド一覧]