[前][次][番号順一覧][スレッド一覧]

ruby-changes:50574

From: nobu <ko1@a...>
Date: Sun, 11 Mar 2018 09:05:20 +0900 (JST)
Subject: [ruby-changes:50574] nobu:r62718 (trunk): re.c: fixed escaped multibyte char

nobu	2018-03-11 09:05:12 +0900 (Sun, 11 Mar 2018)

  New Revision: 62718

  https://svn.ruby-lang.org/cgi-bin/viewvc.cgi?view=revision&revision=62718

  Log:
    re.c: fixed escaped multibyte char
    
    * re.c (unescape_nonascii): escaped multibyte character should be
      copied as-is, just with checking if the encoding matches.
      https://twitter.com/sakuro/status/972014409986883584

  Modified files:
    trunk/re.c
    trunk/test/ruby/test_regexp.rb
Index: test/ruby/test_regexp.rb
===================================================================
--- test/ruby/test_regexp.rb	(revision 62717)
+++ test/ruby/test_regexp.rb	(revision 62718)
@@ -515,6 +515,8 @@ class TestRegexp < Test::Unit::TestCase https://github.com/ruby/ruby/blob/trunk/test/ruby/test_regexp.rb#L515
     s = ".........."
     5.times { s.sub!(".", "") }
     assert_equal(".....", s)
+
+    assert_equal("\\\u{3042}", Regexp.new("\\\u{3042}").source)
   end
 
   def test_equal
Index: re.c
===================================================================
--- re.c	(revision 62717)
+++ re.c	(revision 62718)
@@ -2537,11 +2537,13 @@ unescape_nonascii(const char *p, const c https://github.com/ruby/ruby/blob/trunk/re.c#L2537
     while (p < end) {
         int chlen = rb_enc_precise_mbclen(p, end, enc);
         if (!MBCLEN_CHARFOUND_P(chlen)) {
+          invalid_multibyte:
             errcpy(err, "invalid multibyte character");
             return -1;
         }
         chlen = MBCLEN_CHARFOUND_LEN(chlen);
         if (1 < chlen || (*p & 0x80)) {
+          multibyte:
             rb_str_buf_cat(buf, p, chlen);
             p += chlen;
             if (*encp == 0)
@@ -2559,6 +2561,16 @@ unescape_nonascii(const char *p, const c https://github.com/ruby/ruby/blob/trunk/re.c#L2561
                 errcpy(err, "too short escape sequence");
                 return -1;
             }
+            chlen = rb_enc_precise_mbclen(p, end, enc);
+            if (!MBCLEN_CHARFOUND_P(chlen)) {
+                goto invalid_multibyte;
+            }
+            if ((chlen = MBCLEN_CHARFOUND_LEN(chlen)) > 1) {
+		/* include the previous backslash */
+                --p;
+                ++chlen;
+                goto multibyte;
+            }
             switch (c = *p++) {
               case '1': case '2': case '3':
               case '4': case '5': case '6': case '7': /* \O, \OO, \OOO or backref */

--
ML: ruby-changes@q...
Info: http://www.atdot.net/~ko1/quickml/

[前][次][番号順一覧][スレッド一覧]