ruby-changes:28769
From: naruse <ko1@a...>
Date: Sun, 19 May 2013 04:00:24 +0900 (JST)
Subject: [ruby-changes:28769] naruse:r40821 (trunk): * string.c (str_scrub0): added for refactoring.
naruse 2013-05-19 04:00:11 +0900 (Sun, 19 May 2013) New Revision: 40821 http://svn.ruby-lang.org/cgi-bin/viewvc.cgi?view=rev&revision=40821 Log: * string.c (str_scrub0): added for refactoring. Modified files: trunk/ChangeLog trunk/string.c Index: ChangeLog =================================================================== --- ChangeLog (revision 40820) +++ ChangeLog (revision 40821) @@ -1,3 +1,7 @@ https://github.com/ruby/ruby/blob/trunk/ChangeLog#L1 +Sun May 19 03:59:29 2013 NARUSE, Yui <naruse@r...> + + * string.c (str_scrub0): added for refactoring. + Sun May 19 03:48:26 2013 NARUSE, Yui <naruse@r...> * lib/uri/common.rb (URI.decode_www_form): scrub string if decoded Index: string.c =================================================================== --- string.c (revision 40820) +++ string.c (revision 40821) @@ -7770,29 +7770,19 @@ str_compat_and_valid(VALUE str, rb_encod https://github.com/ruby/ruby/blob/trunk/string.c#L7770 return str; } -/* - * call-seq: - * str.scrub -> new_str - * str.scrub(repl) -> new_str - * str.scrub{|bytes|} -> new_str - * - * If the string is invalid byte sequence then replace invalid bytes with given replacement - * character, else returns self. - * If block is given, replace invalid bytes with returned value of the block. - * - * "abc\u3042\x81".scrub #=> "abc\u3042\uFFFD" - * "abc\u3042\x81".scrub("*") #=> "abc\u3042*" - * "abc\u3042\xE3\x80".scrub{|bytes| '<'+bytes.unpack('H*')[0]+'>' } #=> "abc\u3042<e380>" +/** + * @param repl the replacement character + * @return If given string is invalid, returns a new string. Otherwise, returns Qnil. */ -VALUE -rb_str_scrub(int argc, VALUE *argv, VALUE str) +static VALUE +str_scrub0(int argc, VALUE *argv, VALUE str) { int cr = ENC_CODERANGE(str); rb_encoding *enc; VALUE repl; if (cr == ENC_CODERANGE_7BIT || cr == ENC_CODERANGE_VALID) - return rb_str_dup(str); + return Qnil; enc = STR_ENC_GET(str); rb_scan_args(argc, argv, "01", &repl); @@ -7801,7 +7791,7 @@ rb_str_scrub(int argc, VALUE *argv, VALU https://github.com/ruby/ruby/blob/trunk/string.c#L7791 } if (rb_enc_dummy_p(enc)) { - return rb_str_dup(str); + return Qnil; } #define DEFAULT_REPLACE_CHAR(str) do { \ @@ -7816,7 +7806,7 @@ rb_str_scrub(int argc, VALUE *argv, VALU https://github.com/ruby/ruby/blob/trunk/string.c#L7806 const char *rep; long replen; int rep7bit_p; - VALUE buf = rb_str_buf_new(RSTRING_LEN(str)); + VALUE buf = Qnil; if (rb_block_given_p()) { rep = NULL; replen = 0; @@ -7856,6 +7846,7 @@ rb_str_scrub(int argc, VALUE *argv, VALU https://github.com/ruby/ruby/blob/trunk/string.c#L7846 * p ~e: invalid bytes + unknown bytes */ long clen = rb_enc_mbmaxlen(enc); + if (NIL_P(buf)) buf = rb_str_buf_new(RSTRING_LEN(str)); if (p > p1) { rb_str_buf_cat(buf, p1, p - p1); } @@ -7897,6 +7888,13 @@ rb_str_scrub(int argc, VALUE *argv, VALU https://github.com/ruby/ruby/blob/trunk/string.c#L7888 UNREACHABLE; } } + if (NIL_P(buf)) { + if (p == e) { + ENC_CODERANGE_SET(str, cr); + return Qnil; + } + buf = rb_str_buf_new(RSTRING_LEN(str)); + } if (p1 < p) { rb_str_buf_cat(buf, p1, p - p1); } @@ -7921,7 +7919,7 @@ rb_str_scrub(int argc, VALUE *argv, VALU https://github.com/ruby/ruby/blob/trunk/string.c#L7919 const char *p = RSTRING_PTR(str); const char *e = RSTRING_END(str); const char *p1 = p; - VALUE buf = rb_str_buf_new(RSTRING_LEN(str)); + VALUE buf = Qnil; const char *rep; long replen; long mbminlen = rb_enc_mbminlen(enc); @@ -7966,6 +7964,7 @@ rb_str_scrub(int argc, VALUE *argv, VALU https://github.com/ruby/ruby/blob/trunk/string.c#L7964 else if (MBCLEN_INVALID_P(ret)) { const char *q = p; long clen = rb_enc_mbmaxlen(enc); + if (NIL_P(buf)) buf = rb_str_buf_new(RSTRING_LEN(str)); if (p > p1) rb_str_buf_cat(buf, p1, p - p1); if (e - p < clen) clen = e - p; @@ -7996,6 +7995,13 @@ rb_str_scrub(int argc, VALUE *argv, VALU https://github.com/ruby/ruby/blob/trunk/string.c#L7995 UNREACHABLE; } } + if (NIL_P(buf)) { + if (p == e) { + ENC_CODERANGE_SET(str, ENC_CODERANGE_VALID); + return Qnil; + } + buf = rb_str_buf_new(RSTRING_LEN(str)); + } if (p1 < p) { rb_str_buf_cat(buf, p1, p - p1); } @@ -8016,6 +8022,27 @@ rb_str_scrub(int argc, VALUE *argv, VALU https://github.com/ruby/ruby/blob/trunk/string.c#L8022 /* * call-seq: + * str.scrub -> new_str + * str.scrub(repl) -> new_str + * str.scrub{|bytes|} -> new_str + * + * If the string is invalid byte sequence then replace invalid bytes with given replacement + * character, else returns self. + * If block is given, replace invalid bytes with returned value of the block. + * + * "abc\u3042\x81".scrub #=> "abc\u3042\uFFFD" + * "abc\u3042\x81".scrub("*") #=> "abc\u3042*" + * "abc\u3042\xE3\x80".scrub{|bytes| '<'+bytes.unpack('H*')[0]+'>' } #=> "abc\u3042<e380>" + */ +VALUE +rb_str_scrub(int argc, VALUE *argv, VALUE str) +{ + VALUE new = str_scrub0(argc, argv, str); + return NIL_P(new) ? rb_str_dup(str): new; +} + +/* + * call-seq: * str.scrub! -> str * str.scrub!(repl) -> str * str.scrub!{|bytes|} -> str @@ -8028,11 +8055,11 @@ rb_str_scrub(int argc, VALUE *argv, VALU https://github.com/ruby/ruby/blob/trunk/string.c#L8055 * "abc\u3042\x81".scrub!("*") #=> "abc\u3042*" * "abc\u3042\xE3\x80".scrub!{|bytes| '<'+bytes.unpack('H*')[0]+'>' } #=> "abc\u3042<e380>" */ -VALUE -rb_str_scrub_bang(int argc, VALUE *argv, VALUE str) +static VALUE +str_scrub_bang(int argc, VALUE *argv, VALUE str) { - VALUE new = rb_str_scrub(argc, argv, str); - rb_str_replace(str, new); + VALUE new = str_scrub0(argc, argv, str); + if (!NIL_P(new)) rb_str_replace(str, new); return str; } @@ -8522,7 +8549,7 @@ Init_String(void) https://github.com/ruby/ruby/blob/trunk/string.c#L8549 rb_define_method(rb_cString, "setbyte", rb_str_setbyte, 2); rb_define_method(rb_cString, "byteslice", rb_str_byteslice, -1); rb_define_method(rb_cString, "scrub", rb_str_scrub, -1); - rb_define_method(rb_cString, "scrub!", rb_str_scrub_bang, -1); + rb_define_method(rb_cString, "scrub!", str_scrub_bang, -1); rb_define_method(rb_cString, "to_i", rb_str_to_i, -1); rb_define_method(rb_cString, "to_f", rb_str_to_f, 0); -- ML: ruby-changes@q... Info: http://www.atdot.net/~ko1/quickml/