[前][次][番号順一覧][スレッド一覧]

ruby-changes:66606

From: Nobuyoshi <ko1@a...>
Date: Sat, 26 Jun 2021 16:53:49 +0900 (JST)
Subject: [ruby-changes:66606] 391abc543c (master): Scan the coderange in the given encoding

https://git.ruby-lang.org/ruby.git/commit/?id=391abc543c

From 391abc543cea118a9cd7d6310acadbfa352668ef Mon Sep 17 00:00:00 2001
From: Nobuyoshi Nakada <nobu@r...>
Date: Sat, 26 Jun 2021 16:05:15 +0900
Subject: Scan the coderange in the given encoding

---
 ext/-test-/string/enc_str_buf_cat.c       | 14 ++++++++++++++
 string.c                                  | 32 ++++++++++++++++++++++---------
 test/-ext-/string/test_enc_str_buf_cat.rb |  9 +++++++++
 3 files changed, 46 insertions(+), 9 deletions(-)

diff --git a/ext/-test-/string/enc_str_buf_cat.c b/ext/-test-/string/enc_str_buf_cat.c
index 9ac4a29..4c1b262 100644
--- a/ext/-test-/string/enc_str_buf_cat.c
+++ b/ext/-test-/string/enc_str_buf_cat.c
@@ -7,8 +7,22 @@ enc_str_buf_cat(VALUE str, VALUE str2) https://github.com/ruby/ruby/blob/trunk/ext/-test-/string/enc_str_buf_cat.c#L7
     return rb_enc_str_buf_cat(str, RSTRING_PTR(str2), RSTRING_LEN(str2), rb_enc_get(str2));
 }
 
+static VALUE
+str_conv_enc_opts(VALUE str, VALUE from, VALUE to, VALUE ecflags, VALUE ecopts)
+{
+    rb_encoding *from_enc = NIL_P(from) ? NULL : rb_to_encoding(from);
+    rb_encoding *to_enc = NIL_P(to) ? NULL : rb_to_encoding(to);
+    int flags = NUM2INT(ecflags);
+    if (!NIL_P(ecopts)) {
+        Check_Type(ecopts, T_HASH);
+        OBJ_FREEZE(ecopts);
+    }
+    return rb_str_conv_enc_opts(str, from_enc, to_enc, flags, ecopts);
+}
+
 void
 Init_string_enc_str_buf_cat(VALUE klass)
 {
     rb_define_method(klass, "enc_str_buf_cat", enc_str_buf_cat, 1);
+    rb_define_method(klass, "str_conv_enc_opts", str_conv_enc_opts, 4);
 }
diff --git a/string.c b/string.c
index 0bb015f..c183f2b 100644
--- a/string.c
+++ b/string.c
@@ -697,6 +697,18 @@ rb_enc_cr_str_exact_copy(VALUE dest, VALUE src) https://github.com/ruby/ruby/blob/trunk/string.c#L697
     ENC_CODERANGE_SET(dest, ENC_CODERANGE(src));
 }
 
+static int
+enc_coderange_scan(VALUE str, rb_encoding *enc, int encidx)
+{
+    if (rb_enc_mbminlen(enc) > 1 && rb_enc_dummy_p(enc) &&
+	rb_enc_mbminlen(enc = get_actual_encoding(encidx, str)) == 1) {
+	return ENC_CODERANGE_BROKEN;
+    }
+    else {
+	return coderange_scan(RSTRING_PTR(str), RSTRING_LEN(str), enc);
+    }
+}
+
 int
 rb_enc_str_coderange(VALUE str)
 {
@@ -705,14 +717,7 @@ rb_enc_str_coderange(VALUE str) https://github.com/ruby/ruby/blob/trunk/string.c#L717
     if (cr == ENC_CODERANGE_UNKNOWN) {
 	int encidx = ENCODING_GET(str);
 	rb_encoding *enc = rb_enc_from_index(encidx);
-	if (rb_enc_mbminlen(enc) > 1 && rb_enc_dummy_p(enc) &&
-            rb_enc_mbminlen(enc = get_actual_encoding(encidx, str)) == 1) {
-	    cr = ENC_CODERANGE_BROKEN;
-	}
-	else {
-	    cr = coderange_scan(RSTRING_PTR(str), RSTRING_LEN(str),
-                                enc);
-	}
+	cr = enc_coderange_scan(str, enc, encidx);
         ENC_CODERANGE_SET(str, cr);
     }
     return cr;
@@ -954,6 +959,15 @@ static VALUE str_cat_conv_enc_opts(VALUE newstr, long ofs, const char *ptr, long https://github.com/ruby/ruby/blob/trunk/string.c#L959
 				   rb_encoding *from, rb_encoding *to,
 				   int ecflags, VALUE ecopts);
 
+static inline bool
+is_enc_ascii_string(VALUE str, rb_encoding *enc)
+{
+    int encidx = rb_enc_to_index(enc);
+    if (rb_enc_get_index(str) == encidx)
+	return is_ascii_string(str);
+    return enc_coderange_scan(str, enc, encidx) == ENC_CODERANGE_7BIT;
+}
+
 VALUE
 rb_str_conv_enc_opts(VALUE str, rb_encoding *from, rb_encoding *to, int ecflags, VALUE ecopts)
 {
@@ -964,7 +978,7 @@ rb_str_conv_enc_opts(VALUE str, rb_encoding *from, rb_encoding *to, int ecflags, https://github.com/ruby/ruby/blob/trunk/string.c#L978
     if (!to) return str;
     if (!from) from = rb_enc_get(str);
     if (from == to) return str;
-    if ((rb_enc_asciicompat(to) && is_ascii_string(str)) ||
+    if ((rb_enc_asciicompat(to) && is_enc_ascii_string(str, from)) ||
 	to == rb_ascii8bit_encoding()) {
 	if (STR_ENC_GET(str) != to) {
 	    str = rb_str_dup(str);
diff --git a/test/-ext-/string/test_enc_str_buf_cat.rb b/test/-ext-/string/test_enc_str_buf_cat.rb
index 72f9039..b9a63ec 100644
--- a/test/-ext-/string/test_enc_str_buf_cat.rb
+++ b/test/-ext-/string/test_enc_str_buf_cat.rb
@@ -13,4 +13,13 @@ class Test_StringEncStrBufCat < Test::Unit::TestCase https://github.com/ruby/ruby/blob/trunk/test/-ext-/string/test_enc_str_buf_cat.rb#L13
     assert_equal(:unknown, Bug::String.new(cr_unknown_str).coderange, "an assertion for following tests")
     assert_equal(:valid, Bug::String.new(a8_str).enc_str_buf_cat(cr_unknown_str).coderange, Bug6509)
   end
+
+  def test_str_conv_enc
+    str = Bug::String.new("aaa".encode("US-ASCII"))
+    assert_same(str, str.str_conv_enc_opts("UTF-8", "US-ASCII", 0, nil))
+
+    str = Bug::String.new("aaa".encode("UTF-16LE").force_encoding("UTF-8"))
+    assert_predicate(str, :ascii_only?) # cache coderange
+    assert_equal("aaa", str.str_conv_enc_opts("UTF-16LE", "UTF-8", 0, nil))
+  end
 end
-- 
cgit v1.1


--
ML: ruby-changes@q...
Info: http://www.atdot.net/~ko1/quickml/

[前][次][番号順一覧][スレッド一覧]