ruby-changes:50574
From: nobu <ko1@a...>
Date: Sun, 11 Mar 2018 09:05:20 +0900 (JST)
Subject: [ruby-changes:50574] nobu:r62718 (trunk): re.c: fixed escaped multibyte char
nobu 2018-03-11 09:05:12 +0900 (Sun, 11 Mar 2018) New Revision: 62718 https://svn.ruby-lang.org/cgi-bin/viewvc.cgi?view=revision&revision=62718 Log: re.c: fixed escaped multibyte char * re.c (unescape_nonascii): escaped multibyte character should be copied as-is, just with checking if the encoding matches. https://twitter.com/sakuro/status/972014409986883584 Modified files: trunk/re.c trunk/test/ruby/test_regexp.rb Index: test/ruby/test_regexp.rb =================================================================== --- test/ruby/test_regexp.rb (revision 62717) +++ test/ruby/test_regexp.rb (revision 62718) @@ -515,6 +515,8 @@ class TestRegexp < Test::Unit::TestCase https://github.com/ruby/ruby/blob/trunk/test/ruby/test_regexp.rb#L515 s = ".........." 5.times { s.sub!(".", "") } assert_equal(".....", s) + + assert_equal("\\\u{3042}", Regexp.new("\\\u{3042}").source) end def test_equal Index: re.c =================================================================== --- re.c (revision 62717) +++ re.c (revision 62718) @@ -2537,11 +2537,13 @@ unescape_nonascii(const char *p, const c https://github.com/ruby/ruby/blob/trunk/re.c#L2537 while (p < end) { int chlen = rb_enc_precise_mbclen(p, end, enc); if (!MBCLEN_CHARFOUND_P(chlen)) { + invalid_multibyte: errcpy(err, "invalid multibyte character"); return -1; } chlen = MBCLEN_CHARFOUND_LEN(chlen); if (1 < chlen || (*p & 0x80)) { + multibyte: rb_str_buf_cat(buf, p, chlen); p += chlen; if (*encp == 0) @@ -2559,6 +2561,16 @@ unescape_nonascii(const char *p, const c https://github.com/ruby/ruby/blob/trunk/re.c#L2561 errcpy(err, "too short escape sequence"); return -1; } + chlen = rb_enc_precise_mbclen(p, end, enc); + if (!MBCLEN_CHARFOUND_P(chlen)) { + goto invalid_multibyte; + } + if ((chlen = MBCLEN_CHARFOUND_LEN(chlen)) > 1) { + /* include the previous backslash */ + --p; + ++chlen; + goto multibyte; + } switch (c = *p++) { case '1': case '2': case '3': case '4': case '5': case '6': case '7': /* \O, \OO, \OOO or backref */ -- ML: ruby-changes@q... Info: http://www.atdot.net/~ko1/quickml/