ruby-changes:28758
From: naruse <ko1@a...>
Date: Sat, 18 May 2013 20:17:08 +0900 (JST)
Subject: [ruby-changes:28758] naruse:r40810 (trunk): * string.c (str_scrub_bang): add String#scrub!. [Feature #8414]
naruse 2013-05-18 20:16:56 +0900 (Sat, 18 May 2013) New Revision: 40810 http://svn.ruby-lang.org/cgi-bin/viewvc.cgi?view=rev&revision=40810 Log: * string.c (str_scrub_bang): add String#scrub!. [Feature #8414] Modified files: trunk/ChangeLog trunk/NEWS trunk/string.c trunk/test/ruby/test_m17n.rb Index: ChangeLog =================================================================== --- ChangeLog (revision 40809) +++ ChangeLog (revision 40810) @@ -1,3 +1,7 @@ https://github.com/ruby/ruby/blob/trunk/ChangeLog#L1 +Sat May 18 20:15:28 2013 NARUSE, Yui <naruse@r...> + + * string.c (str_scrub_bang): add String#scrub!. [Feature #8414] + Sat May 18 16:59:52 2013 Tanaka Akira <akr@f...> * ext/socket/mkconstants.rb (INTEGER2NUM): Renamed from INTEGER2VALUE. Index: string.c =================================================================== --- string.c (revision 40809) +++ string.c (revision 40810) @@ -8014,6 +8014,28 @@ rb_str_scrub(int argc, VALUE *argv, VALU https://github.com/ruby/ruby/blob/trunk/string.c#L8014 } } +/* + * call-seq: + * str.scrub! -> str + * str.scrub!(repl) -> str + * str.scrub!{|bytes|} -> str + * + * If the string is invalid byte sequence then replace invalid bytes with given replacement + * character, else returns self. + * If block is given, replace invalid bytes with returned value of the block. + * + * "abc\u3042\x81".scrub! #=> "abc\u3042\uFFFD" + * "abc\u3042\x81".scrub!("*") #=> "abc\u3042*" + * "abc\u3042\xE3\x80".scrub!{|bytes| '<'+bytes.unpack('H*')[0]+'>' } #=> "abc\u3042<e380>" + */ +VALUE +rb_str_scrub_bang(int argc, VALUE *argv, VALUE str) +{ + VALUE new = rb_str_scrub(argc, argv, str); + rb_str_replace(str, new); + return str; +} + /********************************************************************** * Document-class: Symbol * @@ -8500,6 +8522,7 @@ Init_String(void) https://github.com/ruby/ruby/blob/trunk/string.c#L8522 rb_define_method(rb_cString, "setbyte", rb_str_setbyte, 2); rb_define_method(rb_cString, "byteslice", rb_str_byteslice, -1); rb_define_method(rb_cString, "scrub", rb_str_scrub, -1); + rb_define_method(rb_cString, "scrub!", rb_str_scrub_bang, -1); rb_define_method(rb_cString, "to_i", rb_str_to_i, -1); rb_define_method(rb_cString, "to_f", rb_str_to_f, 0); Index: NEWS =================================================================== --- NEWS (revision 40809) +++ NEWS (revision 40810) @@ -32,7 +32,7 @@ with all sufficient information, see the https://github.com/ruby/ruby/blob/trunk/NEWS#L32 * String * New methods: - * added String#scrub to verify and fix invalid byte sequence. + * String#scrub and String#scrub! verify and fix invalid byte sequence. * extended methods: * If invalid: :replace is specified for String#encode, replace invalid byte sequence even if the destination encoding equals to Index: test/ruby/test_m17n.rb =================================================================== --- test/ruby/test_m17n.rb (revision 40809) +++ test/ruby/test_m17n.rb (revision 40810) @@ -1491,6 +1491,11 @@ class TestM17N < Test::Unit::TestCase https://github.com/ruby/ruby/blob/trunk/test/ruby/test_m17n.rb#L1491 end def test_scrub + str = "\u3042\u3044" + assert_not_same(str, str.scrub) + str.force_encoding(Encoding::ISO_2022_JP) # dummy encoding + assert_not_same(str, str.scrub) + assert_equal("\uFFFD\uFFFD\uFFFD", u("\x80\x80\x80").scrub) assert_equal("\uFFFDA", u("\xF4\x80\x80A").scrub) @@ -1529,4 +1534,16 @@ class TestM17N < Test::Unit::TestCase https://github.com/ruby/ruby/blob/trunk/test/ruby/test_m17n.rb#L1534 "\xff".force_encoding(Encoding::UTF_32LE). scrub) end + + def test_scrub_bang + str = "\u3042\u3044" + assert_same(str, str.scrub!) + str.force_encoding(Encoding::ISO_2022_JP) # dummy encoding + assert_same(str, str.scrub!) + + str = u("\x80\x80\x80") + str.scrub! + assert_same(str, str.scrub!) + assert_equal("\uFFFD\uFFFD\uFFFD", str) + end end -- ML: ruby-changes@q... Info: http://www.atdot.net/~ko1/quickml/