ruby-changes:41751
From: nagachika <ko1@a...>
Date: Mon, 15 Feb 2016 02:25:36 +0900 (JST)
Subject: [ruby-changes:41751] nagachika:r53825 (ruby_2_2): merge revision(s) 49096, 51353, 53168, 53169: [Backport #11834]
nagachika 2016-02-15 02:25:57 +0900 (Mon, 15 Feb 2016) New Revision: 53825 https://svn.ruby-lang.org/cgi-bin/viewvc.cgi?view=revision&revision=53825 Log: merge revision(s) 49096,51353,53168,53169: [Backport #11834] test_m17n.rb: split test_scrub * test/ruby/test_m17n.rb (TestM17N#test_scrub): split into some tests. * include/ruby/ruby.h: add raw FL macros, which assume always the argument object is not a special constant. * internal.h (STR_EMBED_P, STR_SHARED_P): valid only for T_STRING. * string.c: deal with taint flags directly across String instances. * transcode.c (rb_econv_substr_append, econv_primitive_convert): the result should be infected by the original string. * string.c (rb_str_scrub): the result should be infected by the original string. Modified directories: branches/ruby_2_2/ Modified files: branches/ruby_2_2/ChangeLog branches/ruby_2_2/include/ruby/ruby.h branches/ruby_2_2/internal.h branches/ruby_2_2/string.c branches/ruby_2_2/test/ruby/test_econv.rb branches/ruby_2_2/test/ruby/test_m17n.rb branches/ruby_2_2/transcode.c branches/ruby_2_2/version.h Index: ruby_2_2/ChangeLog =================================================================== --- ruby_2_2/ChangeLog (revision 53824) +++ ruby_2_2/ChangeLog (revision 53825) @@ -1,3 +1,22 @@ https://github.com/ruby/ruby/blob/trunk/ruby_2_2/ChangeLog#L1 +Mon Feb 15 02:05:13 2016 Nobuyoshi Nakada <nobu@r...> + + * string.c (rb_str_scrub): the result should be infected by the + original string. + +Mon Feb 15 02:05:13 2016 Nobuyoshi Nakada <nobu@r...> + + * transcode.c (rb_econv_substr_append, econv_primitive_convert): + the result should be infected by the original string. + +Mon Feb 15 02:05:13 2016 Nobuyoshi Nakada <nobu@r...> + + * include/ruby/ruby.h: add raw FL macros, which assume always the + argument object is not a special constant. + + * internal.h (STR_EMBED_P, STR_SHARED_P): valid only for T_STRING. + + * string.c: deal with taint flags directly across String instances. + Mon Feb 15 01:20:08 2016 Nobuyoshi Nakada <nobu@r...> * parse.y (regexp): set_yylval_num sets u1, should use nd_tag Index: ruby_2_2/include/ruby/ruby.h =================================================================== --- ruby_2_2/include/ruby/ruby.h (revision 53824) +++ ruby_2_2/include/ruby/ruby.h (revision 53825) @@ -1129,20 +1129,28 @@ struct RStruct { https://github.com/ruby/ruby/blob/trunk/ruby_2_2/include/ruby/ruby.h#L1129 #define FL_ABLE(x) (!SPECIAL_CONST_P(x) && BUILTIN_TYPE(x) != T_NODE) #define FL_TEST_RAW(x,f) (RBASIC(x)->flags&(f)) #define FL_TEST(x,f) (FL_ABLE(x)?FL_TEST_RAW((x),(f)):0) +#define FL_ANY_RAW(x,f) FL_TEST_RAW((x),(f)) #define FL_ANY(x,f) FL_TEST((x),(f)) +#define FL_ALL_RAW(x,f) (FL_TEST_RAW((x),(f)) == (f)) #define FL_ALL(x,f) (FL_TEST((x),(f)) == (f)) -#define FL_SET(x,f) do {if (FL_ABLE(x)) RBASIC(x)->flags |= (f);} while (0) -#define FL_UNSET(x,f) do {if (FL_ABLE(x)) RBASIC(x)->flags &= ~(f);} while (0) -#define FL_REVERSE(x,f) do {if (FL_ABLE(x)) RBASIC(x)->flags ^= (f);} while (0) +#define FL_SET_RAW(x,f) (RBASIC(x)->flags |= (f)) +#define FL_SET(x,f) (FL_ABLE(x) ? FL_SET_RAW(x, f) : 0) +#define FL_UNSET_RAW(x,f) (RBASIC(x)->flags &= ~(f)) +#define FL_UNSET(x,f) (FL_ABLE(x) ? FL_UNSET_RAW(x, f) : 0) +#define FL_REVERSE_RAW(x,f) (RBASIC(x)->flags ^= (f)) +#define FL_REVERSE(x,f) (FL_ABLE(x) ? FL_REVERSE_RAW(x, f) : 0) #define OBJ_TAINTABLE(x) (FL_ABLE(x) && BUILTIN_TYPE(x) != T_BIGNUM && BUILTIN_TYPE(x) != T_FLOAT) +#define OBJ_TAINTED_RAW(x) FL_TEST_RAW(x, FL_TAINT) #define OBJ_TAINTED(x) (!!FL_TEST((x), FL_TAINT)) -#define OBJ_TAINT(x) (OBJ_TAINTABLE(x) ? (RBASIC(x)->flags |= FL_TAINT) : 0) +#define OBJ_TAINT_RAW(x) FL_SET_RAW(x, FL_TAINT) +#define OBJ_TAINT(x) (OBJ_TAINTABLE(x) ? OBJ_TAINT_RAW(x) : 0) #define OBJ_UNTRUSTED(x) OBJ_TAINTED(x) #define OBJ_UNTRUST(x) OBJ_TAINT(x) +#define OBJ_INFECT_RAW(x,s) FL_SET_RAW(x, OBJ_TAINTED_RAW(s)) #define OBJ_INFECT(x,s) ( \ (OBJ_TAINTABLE(x) && FL_ABLE(s)) ? \ - RBASIC(x)->flags |= RBASIC(s)->flags & FL_TAINT : 0) + OBJ_INFECT_RAW(x, s) : 0) #define OBJ_FROZEN(x) (FL_ABLE(x) ? !!(RBASIC(x)->flags&FL_FREEZE) : 1) #define OBJ_FREEZE_RAW(x) (RBASIC(x)->flags |= FL_FREEZE) Index: ruby_2_2/transcode.c =================================================================== --- ruby_2_2/transcode.c (revision 53824) +++ ruby_2_2/transcode.c (revision 53825) @@ -1854,6 +1854,7 @@ rb_econv_substr_append(rb_econv_t *ec, V https://github.com/ruby/ruby/blob/trunk/ruby_2_2/transcode.c#L1854 src = rb_str_new_frozen(src); dst = rb_econv_append(ec, RSTRING_PTR(src) + off, len, dst, flags); RB_GC_GUARD(src); + OBJ_INFECT_RAW(dst, src); return dst; } @@ -3768,8 +3769,10 @@ econv_primitive_convert(int argc, VALUE https://github.com/ruby/ruby/blob/trunk/ruby_2_2/transcode.c#L3769 res = rb_econv_convert(ec, &ip, is, &op, os, flags); rb_str_set_len(output, op-(unsigned char *)RSTRING_PTR(output)); - if (!NIL_P(input)) + if (!NIL_P(input)) { + OBJ_INFECT_RAW(output, input); rb_str_drop_bytes(input, ip - (unsigned char *)RSTRING_PTR(input)); + } if (NIL_P(output_bytesize_v) && res == econv_destination_buffer_full) { if (LONG_MAX / 2 < output_bytesize) Index: ruby_2_2/version.h =================================================================== --- ruby_2_2/version.h (revision 53824) +++ ruby_2_2/version.h (revision 53825) @@ -1,6 +1,6 @@ https://github.com/ruby/ruby/blob/trunk/ruby_2_2/version.h#L1 #define RUBY_VERSION "2.2.5" #define RUBY_RELEASE_DATE "2016-02-15" -#define RUBY_PATCHLEVEL 238 +#define RUBY_PATCHLEVEL 239 #define RUBY_RELEASE_YEAR 2016 #define RUBY_RELEASE_MONTH 2 Index: ruby_2_2/string.c =================================================================== --- ruby_2_2/string.c (revision 53824) +++ ruby_2_2/string.c (revision 53825) @@ -1130,7 +1130,7 @@ rb_obj_as_string(VALUE obj) https://github.com/ruby/ruby/blob/trunk/ruby_2_2/string.c#L1130 str = rb_funcall(obj, id_to_s, 0); if (!RB_TYPE_P(str, T_STRING)) return rb_any_to_s(obj); - if (OBJ_TAINTED(obj)) OBJ_TAINT(str); + OBJ_INFECT(str, obj); return str; } @@ -1476,8 +1476,7 @@ rb_str_plus(VALUE str1, VALUE str2) https://github.com/ruby/ruby/blob/trunk/ruby_2_2/string.c#L1476 memcpy(ptr3+len1, ptr2, len2); TERM_FILL(&ptr3[len1+len2], rb_enc_mbminlen(enc)); - if (OBJ_TAINTED(str1) || OBJ_TAINTED(str2)) - OBJ_TAINT(str3); + FL_SET_RAW(str3, OBJ_TAINTED_RAW(str1) | OBJ_TAINTED_RAW(str2)); ENCODING_CODERANGE_SET(str3, rb_enc_to_index(enc), ENC_CODERANGE_AND(ENC_CODERANGE(str1), ENC_CODERANGE(str2))); RB_GC_GUARD(str1); @@ -4115,7 +4114,7 @@ rb_str_sub_bang(int argc, VALUE *argv, V https://github.com/ruby/ruby/blob/trunk/ruby_2_2/string.c#L4114 if (NIL_P(hash)) { StringValue(repl); } - if (OBJ_TAINTED(repl)) tainted = 1; + tainted = OBJ_TAINTED_RAW(repl); } pat = get_pat_quoted(argv[0], 1); @@ -4175,7 +4174,7 @@ rb_str_sub_bang(int argc, VALUE *argv, V https://github.com/ruby/ruby/blob/trunk/ruby_2_2/string.c#L4174 } rb_str_modify(str); rb_enc_associate(str, enc); - if (OBJ_TAINTED(repl)) tainted = 1; + tainted |= OBJ_TAINTED_RAW(repl); if (ENC_CODERANGE_UNKNOWN < cr && cr < ENC_CODERANGE_BROKEN) { int cr2 = ENC_CODERANGE(repl); if (cr2 == ENC_CODERANGE_BROKEN || @@ -4199,7 +4198,7 @@ rb_str_sub_bang(int argc, VALUE *argv, V https://github.com/ruby/ruby/blob/trunk/ruby_2_2/string.c#L4198 STR_SET_LEN(str, len); TERM_FILL(&RSTRING_PTR(str)[len], TERM_LEN(str)); ENC_CODERANGE_SET(str, cr); - if (tainted) OBJ_TAINT(str); + FL_SET_RAW(str, tainted); return str; } @@ -4285,7 +4284,7 @@ str_gsub(int argc, VALUE *argv, VALUE st https://github.com/ruby/ruby/blob/trunk/ruby_2_2/string.c#L4284 else { mode = MAP; } - if (OBJ_TAINTED(repl)) tainted = 1; + tainted = OBJ_TAINTED_RAW(repl); break; default: rb_check_arity(argc, 1, 2); @@ -4348,8 +4347,7 @@ str_gsub(int argc, VALUE *argv, VALUE st https://github.com/ruby/ruby/blob/trunk/ruby_2_2/string.c#L4347 val = repl; } - - if (OBJ_TAINTED(val)) tainted = 1; + tainted |= OBJ_TAINTED_RAW(val); len = beg0 - offset; /* copy pre-match substr */ if (len) { @@ -4383,11 +4381,11 @@ str_gsub(int argc, VALUE *argv, VALUE st https://github.com/ruby/ruby/blob/trunk/ruby_2_2/string.c#L4381 } else { RBASIC_SET_CLASS(dest, rb_obj_class(str)); - OBJ_INFECT(dest, str); + tainted |= OBJ_TAINTED_RAW(str); str = dest; } - if (tainted) OBJ_TAINT(str); + FL_SET_RAW(str, tainted); return str; } @@ -4616,7 +4614,7 @@ str_byte_substr(VALUE str, long beg, lon https://github.com/ruby/ruby/blob/trunk/ruby_2_2/string.c#L4614 } } - OBJ_INFECT(str2, str); + OBJ_INFECT_RAW(str2, str); return str2; } @@ -4741,7 +4739,7 @@ rb_str_reverse(VALUE str) https://github.com/ruby/ruby/blob/trunk/ruby_2_2/string.c#L4739 } } STR_SET_LEN(rev, RSTRING_LEN(str)); - OBJ_INFECT(rev, str); + OBJ_INFECT_RAW(rev, str); str_enc_copy(rev, str); ENC_CODERANGE_SET(rev, cr); @@ -5036,7 +5034,7 @@ rb_str_inspect(VALUE str) https://github.com/ruby/ruby/blob/trunk/ruby_2_2/string.c#L5034 if (p > prev) str_buf_cat(result, prev, p - prev); str_buf_cat2(result, "\""); - OBJ_INFECT(result, str); + OBJ_INFECT_RAW(result, str); return result; } @@ -5176,7 +5174,7 @@ rb_str_dump(VALUE str) https://github.com/ruby/ruby/blob/trunk/ruby_2_2/string.c#L5174 snprintf(q, qend-q, ".force_encoding(\"%s\")", enc->name); enc = rb_ascii8bit_encoding(); } - OBJ_INFECT(result, str); + OBJ_INFECT_RAW(result, str); /* result from dump is ASCII */ rb_enc_associate(result, enc); ENC_CODERANGE_SET(result, ENC_CODERANGE_7BIT); @@ -7687,8 +7685,7 @@ rb_str_crypt(VALUE str, VALUE salt) https://github.com/ruby/ruby/blob/trunk/ruby_2_2/string.c#L7685 rb_sys_fail("crypt"); } result = rb_str_new_cstr(res); - OBJ_INFECT(result, str); - OBJ_INFECT(result, salt); + FL_SET_RAW(result, OBJ_TAINTED_RAW(str) | OBJ_TAINTED_RAW(salt)); return result; } @@ -7861,8 +7858,8 @@ rb_str_justify(int argc, VALUE *argv, VA https://github.com/ruby/ruby/blob/trunk/ruby_2_2/string.c#L7858 } TERM_FILL(p, rb_enc_mbminlen(enc)); STR_SET_LEN(res, p-RSTRING_PTR(res)); - OBJ_INFECT(res, str); - if (!NIL_P(pad)) OBJ_INFECT(res, pad); + OBJ_INFECT_RAW(res, str); + if (!NIL_P(pad)) OBJ_INFECT_RAW(res, pad); rb_enc_associate(res, enc); if (argc == 2) cr = ENC_CODERANGE_AND(cr, ENC_CODERANGE(pad)); @@ -8120,7 +8117,7 @@ rb_str_b(VALUE str) https://github.com/ruby/ruby/blob/trunk/ruby_2_2/string.c#L8117 { VALUE str2 = str_alloc(rb_cString); str_replace_shared_without_enc(str2, str); - OBJ_INFECT(str2, str); + OBJ_INFECT_RAW(str2, str); ENC_CODERANGE_CLEAR(str2); return str2; } @@ -8250,6 +8247,10 @@ rb_str_scrub(VALUE str, VALUE repl) https://github.com/ruby/ruby/blob/trunk/ruby_2_2/string.c#L8247 int cr = ENC_CODERANGE(str); rb_encoding *enc; int encidx; + VALUE buf = Qnil; + const char *rep; + long replen; + int tainted = 0; if (cr == ENC_CODERANGE_7BIT || cr == ENC_CODERANGE_VALID) return Qnil; @@ -8257,6 +8258,7 @@ rb_str_scrub(VALUE str, VALUE repl) https://github.com/ruby/ruby/blob/trunk/ruby_2_2/string.c#L8258 enc = STR_ENC_GET(str); if (!NIL_P(repl)) { repl = str_compat_and_valid(repl, enc); + tainted = OBJ_TAINTED_RAW(repl); } if (rb_enc_dummy_p(enc)) { @@ -8273,10 +8275,7 @@ rb_str_scrub(VALUE str, VALUE repl) https://github.com/ruby/ruby/blob/trunk/ruby_2_2/string.c#L8275 const char *p = RSTRING_PTR(str); const char *e = RSTRING_END(str); const char *p1 = p; - const char *rep; - long replen; int rep7bit_p; - VALUE buf = Qnil; if (rb_block_given_p()) { rep = NULL; replen = 0; @@ -8342,6 +8341,7 @@ rb_str_scrub(VALUE str, VALUE repl) https://github.com/ruby/ruby/blob/trunk/ruby_2_2/string.c#L8341 else { repl = rb_yield(rb_enc_str_new(p, clen, enc)); repl = str_compat_and_valid(repl, enc); + tainted |= OBJ_TAINTED_RAW(repl); rb_str_buf_cat(buf, RSTRING_PTR(repl), RSTRING_LEN(repl)); if (ENC_CODERANGE(repl) == ENC_CODERANGE_VALID) cr = ENC_CODERANGE_VALID; @@ -8376,22 +8376,18 @@ rb_str_scrub(VALUE str, VALUE repl) https://github.com/ruby/ruby/blob/trunk/ruby_2_2/string.c#L8376 else { repl = rb_yield(rb_enc_str_new(p, e-p, enc)); repl = str_compat_and_valid(repl, enc); + tainted |= OBJ_TAINTED_RAW(repl); rb_str_buf_cat(buf, RSTRING_PTR(repl), RSTRING_LEN(repl)); if (ENC_CODERANGE(repl) == ENC_CODERANGE_VALID) cr = ENC_CODERANGE_VALID; } } - ENCODING_CODERANGE_SET(buf, rb_enc_to_index(enc), cr); - return buf; } else { /* ASCII incompatible */ const char *p = RSTRING_PTR(str); const char *e = RSTRING_END(str); const char *p1 = p; - VALUE buf = Qnil; - const char *rep; - long replen; long mbminlen = rb_enc_mbminlen(enc); if (!NIL_P(repl)) { rep = RSTRING_PTR(repl); @@ -8446,6 +8442,7 @@ rb_str_scrub(VALUE str, VALUE repl) https://github.com/ruby/ruby/blob/trunk/ruby_2_2/string.c#L8442 else { repl = rb_yield(rb_enc_str_new(p, e-p, enc)); repl = str_compat_and_valid(repl, enc); + tainted |= OBJ_TAINTED_RAW(repl); rb_str_buf_cat(buf, RSTRING_PTR(repl), RSTRING_LEN(repl)); } p += clen; @@ -8472,12 +8469,15 @@ rb_str_scrub(VALUE str, VALUE repl) https://github.com/ruby/ruby/blob/trunk/ruby_2_2/string.c#L8469 else { repl = rb_yield(rb_enc_str_new(p, e-p, enc)); repl = str_compat_and_valid(repl, enc); + tainted |= OBJ_TAINTED_RAW(repl); rb_str_buf_cat(buf, RSTRING_PTR(repl), RSTRING_LEN(repl)); } } - ENCODING_CODERANGE_SET(buf, rb_enc_to_index(enc), ENC_CODERANGE_VALID); - return buf; + cr = ENC_CODERANGE_VALID; } + FL_SET_RAW(buf, tainted|OBJ_TAINTED_RAW(str)); + ENCODING_CODERANGE_SET(buf, rb_enc_to_index(enc), cr); + return buf; } /* Index: ruby_2_2/test/ruby/test_econv.rb =================================================================== --- ruby_2_2/test/ruby/test_econv.rb (revision 53824) +++ ruby_2_2/test/ruby/test_econv.rb (revision 53825) @@ -683,6 +683,7 @@ class TestEncodingConverter < Test::Unit https://github.com/ruby/ruby/blob/trunk/ruby_2_2/test/ruby/test_econv.rb#L683 ec = Encoding::Converter.new("utf-8", "euc-jp") assert_raise(Encoding::InvalidByteSequenceError) { ec.convert("a\x80") } assert_raise(Encoding::UndefinedConversionError) { ec.convert("\ufffd") } + assert_predicate(ec.convert("abc".taint), :tainted?) ret = ec.primitive_convert(nil, "", nil, nil) assert_equal(:finished, ret) assert_raise(ArgumentError) { ec.convert("a") } Index: ruby_2_2/test/ruby/test_m17n.rb =================================================================== --- ruby_2_2/test/ruby/test_m17n.rb (revision 53824) +++ ruby_2_2/test/ruby/test_m17n.rb (revision 53825) @@ -1529,20 +1529,32 @@ class TestM17N < Test::Unit::TestCase https://github.com/ruby/ruby/blob/trunk/ruby_2_2/test/ruby/test_m17n.rb#L1529 assert_equal(a("\xE3\x81\x82"), s.b) assert_equal(Encoding::ASCII_8BIT, s.b.encoding) s.taint - assert_equal(true, s.b.tainted?) + assert_predicate(s.b, :tainted?) s = "abc".b - assert_equal(true, s.b.ascii_only?) + assert_predicate(s.b, :ascii_only?) end - def test_scrub + def test_scrub_valid_string + str = "foo" + assert_equal(str, str.scrub) + assert_not_same(str, str.scrub) + assert_predicate(str.dup.taint.scrub, :tainted?) str = "\u3042\u3044" + assert_equal(str, str.scrub) assert_not_same(str, str.scrub) + assert_predicate(str.dup.taint.scrub, :tainted?) str.force_encoding(Encoding::ISO_2022_JP) # dummy encoding + assert_equal(str, str.scrub) assert_not_same(str, str.scrub) assert_nothing_raised(ArgumentError) {str.scrub(nil)} + assert_predicate(str.dup.taint.scrub, :tainted?) + end + def test_scrub_replace_default assert_equal("\uFFFD\uFFFD\uFFFD", u("\x80\x80\x80").scrub) assert_equal("\uFFFDA", u("\xF4\x80\x80A").scrub) + assert_predicate(u("\x80\x80\x80").taint.scrub, :tainted?) + assert_predicate(u("\xF4\x80\x80A").taint.scrub, :tainted?) # examples in Unicode 6.1.0 D93b assert_equal("\x41\uFFFD\uFFFD\x41\uFFFD\x41", @@ -1553,14 +1565,28 @@ class TestM17N < Test::Unit::TestCase https://github.com/ruby/ruby/blob/trunk/ruby_2_2/test/ruby/test_m17n.rb#L1565 u("\x61\xF1\x80\x80\xE1\x80\xC2\x62\x80\x63\x80\xBF\x64").scrub) assert_equal("abcdefghijklmnopqrstuvwxyz\u0061\uFFFD\uFFFD\uFFFD\u0062\uFFFD\u0063\uFFFD\uFFFD\u0064", u("abcdefghijklmnopqrstuvwxyz\x61\xF1\x80\x80\xE1\x80\xC2\x62\x80\x63\x80\xBF\x64").scrub) + end + def test_scrub_replace_argument + assert_equal("foo", u("foo").scrub("\u3013")) + assert_predicate(u("foo").taint.scrub("\u3013"), :tainted?) + assert_not_predicate(u("foo").scrub("\u3013".taint), :tainted?) + assert_equal("\u3042\u3044", u("\xE3\x81\x82\xE3\x81\x84").scrub("\u3013")) + assert_predicate(u("\xE3\x81\x82\xE3\x81\x84").taint.scrub("\u3013"), :tainted?) + assert_not_predicate(u("\xE3\x81\x82\xE3\x81\x84").scrub("\u3013".taint), :tainted?) assert_equal("\u3042\u3013", u("\xE3\x81\x82\xE3\x81").scrub("\u3013")) + assert_predicate(u("\xE3\x81\x82\xE3\x81").taint.scrub("\u3013"), :tainted?) + assert_predicate(u("\xE3\x81\x82\xE3\x81").scrub("\u3013".taint), :tainted?) assert_raise(Encoding::CompatibilityError){ u("\xE3\x81\x82\xE3\x81").scrub(e("\xA4\xA2")) } assert_raise(TypeError){ u("\xE3\x81\x82\xE3\x81").scrub(1) } assert_raise(ArgumentError){ u("\xE3\x81\x82\xE3\x81\x82\xE3\x81").scrub(u("\x81")) } assert_equal(e("\xA4\xA2\xA2\xAE"), e("\xA4\xA2\xA4").scrub(e("\xA2\xAE"))) + end + def test_scrub_replace_block assert_equal("\u3042<e381>", u("\xE3\x81\x82\xE3\x81").scrub{|x|'<'+x.unpack('H*')[0]+'>'}) + assert_predicate(u("\xE3\x81\x82\xE3\x81").taint.scrub{|x|'<'+x.unpack('H*')[0]+'>'}, :tainted?) + assert_predicate(u("\xE3\x81\x82\xE3\x81").scrub{|x|('<'+x.unpack('H*')[0]+'>').taint}, :tainted?) assert_raise(Encoding::CompatibilityError){ u("\xE3\x81\x82\xE3\x81").scrub{e("\xA4\xA2")} } assert_raise(TypeError){ u("\xE3\x81\x82\xE3\x81").scrub{1} } assert_raise(ArgumentError){ u("\xE3\x81\x82\xE3\x81\x82\xE3\x81").scrub{u("\x81")} } @@ -1568,7 +1594,9 @@ class TestM17N < Test::Unit::TestCase https://github.com/ruby/ruby/blob/trunk/ruby_2_2/test/ruby/test_m17n.rb#L1594 assert_equal(u("\x81"), u("a\x81").scrub {|c| break c}) assert_raise(ArgumentError) {u("a\x81").scrub {|c| c}} + end + def test_scrub_widechar assert_equal("\uFFFD\u3042".encode("UTF-16BE"), "\xD8\x00\x30\x42".force_encoding(Encoding::UTF_16BE). scrub) Index: ruby_2_2/internal.h =================================================================== --- ruby_2_2/internal.h (revision 53824) +++ ruby_2_2/internal.h (revision 53825) @@ -993,8 +993,8 @@ VALUE rb_external_str_with_enc(VALUE str https://github.com/ruby/ruby/blob/trunk/ruby_2_2/internal.h#L993 #endif #define STR_NOEMBED FL_USER1 #define STR_SHARED FL_USER2 /* = ELTS_SHARED */ -#define STR_EMBED_P(str) (!FL_TEST((str), STR_NOEMBED)) -#define STR_SHARED_P(s) FL_ALL((s), STR_NOEMBED|ELTS_SHARED) +#define STR_EMBED_P(str) (!FL_TEST_RAW((str), STR_NOEMBED)) +#define STR_SHARED_P(s) FL_ALL_RAW((s), STR_NOEMBED|ELTS_SHARED) #define is_ascii_string(str) (rb_enc_str_coderange(str) == ENC_CODERANGE_7BIT) #define is_broken_string(str) (rb_enc_str_coderange(str) == ENC_CODERANGE_BROKEN) size_t rb_str_memsize(VALUE); Property changes on: ruby_2_2 ___________________________________________________________________ Modified: svn:mergeinfo Merged /trunk:r49096,51353,53168-53169 -- ML: ruby-changes@q... Info: http://www.atdot.net/~ko1/quickml/