[前][次][番号順一覧][スレッド一覧]

ruby-changes:28769

From: naruse <ko1@a...>
Date: Sun, 19 May 2013 04:00:24 +0900 (JST)
Subject: [ruby-changes:28769] naruse:r40821 (trunk): * string.c (str_scrub0): added for refactoring.

naruse	2013-05-19 04:00:11 +0900 (Sun, 19 May 2013)

  New Revision: 40821

  http://svn.ruby-lang.org/cgi-bin/viewvc.cgi?view=rev&revision=40821

  Log:
    * string.c (str_scrub0): added for refactoring.

  Modified files:
    trunk/ChangeLog
    trunk/string.c

Index: ChangeLog
===================================================================
--- ChangeLog	(revision 40820)
+++ ChangeLog	(revision 40821)
@@ -1,3 +1,7 @@ https://github.com/ruby/ruby/blob/trunk/ChangeLog#L1
+Sun May 19 03:59:29 2013  NARUSE, Yui  <naruse@r...>
+
+	* string.c (str_scrub0): added for refactoring.
+
 Sun May 19 03:48:26 2013  NARUSE, Yui  <naruse@r...>
 
 	* lib/uri/common.rb (URI.decode_www_form): scrub string if decoded
Index: string.c
===================================================================
--- string.c	(revision 40820)
+++ string.c	(revision 40821)
@@ -7770,29 +7770,19 @@ str_compat_and_valid(VALUE str, rb_encod https://github.com/ruby/ruby/blob/trunk/string.c#L7770
     return str;
 }
 
-/*
- *  call-seq:
- *    str.scrub -> new_str
- *    str.scrub(repl) -> new_str
- *    str.scrub{|bytes|} -> new_str
- *
- *  If the string is invalid byte sequence then replace invalid bytes with given replacement
- *  character, else returns self.
- *  If block is given, replace invalid bytes with returned value of the block.
- *
- *     "abc\u3042\x81".scrub #=> "abc\u3042\uFFFD"
- *     "abc\u3042\x81".scrub("*") #=> "abc\u3042*"
- *     "abc\u3042\xE3\x80".scrub{|bytes| '<'+bytes.unpack('H*')[0]+'>' } #=> "abc\u3042<e380>"
+/**
+ * @param repl the replacement character
+ * @return If given string is invalid, returns a new string. Otherwise, returns Qnil.
  */
-VALUE
-rb_str_scrub(int argc, VALUE *argv, VALUE str)
+static VALUE
+str_scrub0(int argc, VALUE *argv, VALUE str)
 {
     int cr = ENC_CODERANGE(str);
     rb_encoding *enc;
     VALUE repl;
 
     if (cr == ENC_CODERANGE_7BIT || cr == ENC_CODERANGE_VALID)
-	return rb_str_dup(str);
+	return Qnil;
 
     enc = STR_ENC_GET(str);
     rb_scan_args(argc, argv, "01", &repl);
@@ -7801,7 +7791,7 @@ rb_str_scrub(int argc, VALUE *argv, VALU https://github.com/ruby/ruby/blob/trunk/string.c#L7791
     }
 
     if (rb_enc_dummy_p(enc)) {
-	return rb_str_dup(str);
+	return Qnil;
     }
 
 #define DEFAULT_REPLACE_CHAR(str) do { \
@@ -7816,7 +7806,7 @@ rb_str_scrub(int argc, VALUE *argv, VALU https://github.com/ruby/ruby/blob/trunk/string.c#L7806
 	const char *rep;
 	long replen;
 	int rep7bit_p;
-	VALUE buf = rb_str_buf_new(RSTRING_LEN(str));
+	VALUE buf = Qnil;
 	if (rb_block_given_p()) {
 	    rep = NULL;
 	    replen = 0;
@@ -7856,6 +7846,7 @@ rb_str_scrub(int argc, VALUE *argv, VALU https://github.com/ruby/ruby/blob/trunk/string.c#L7846
 		 * p ~e: invalid bytes + unknown bytes
 		 */
 		long clen = rb_enc_mbmaxlen(enc);
+		if (NIL_P(buf)) buf = rb_str_buf_new(RSTRING_LEN(str));
 		if (p > p1) {
 		    rb_str_buf_cat(buf, p1, p - p1);
 		}
@@ -7897,6 +7888,13 @@ rb_str_scrub(int argc, VALUE *argv, VALU https://github.com/ruby/ruby/blob/trunk/string.c#L7888
 		UNREACHABLE;
 	    }
 	}
+	if (NIL_P(buf)) {
+	    if (p == e) {
+		ENC_CODERANGE_SET(str, cr);
+		return Qnil;
+	    }
+	    buf = rb_str_buf_new(RSTRING_LEN(str));
+	}
 	if (p1 < p) {
 	    rb_str_buf_cat(buf, p1, p - p1);
 	}
@@ -7921,7 +7919,7 @@ rb_str_scrub(int argc, VALUE *argv, VALU https://github.com/ruby/ruby/blob/trunk/string.c#L7919
 	const char *p = RSTRING_PTR(str);
 	const char *e = RSTRING_END(str);
 	const char *p1 = p;
-	VALUE buf = rb_str_buf_new(RSTRING_LEN(str));
+	VALUE buf = Qnil;
 	const char *rep;
 	long replen;
 	long mbminlen = rb_enc_mbminlen(enc);
@@ -7966,6 +7964,7 @@ rb_str_scrub(int argc, VALUE *argv, VALU https://github.com/ruby/ruby/blob/trunk/string.c#L7964
 	    else if (MBCLEN_INVALID_P(ret)) {
 		const char *q = p;
 		long clen = rb_enc_mbmaxlen(enc);
+		if (NIL_P(buf)) buf = rb_str_buf_new(RSTRING_LEN(str));
 		if (p > p1) rb_str_buf_cat(buf, p1, p - p1);
 
 		if (e - p < clen) clen = e - p;
@@ -7996,6 +7995,13 @@ rb_str_scrub(int argc, VALUE *argv, VALU https://github.com/ruby/ruby/blob/trunk/string.c#L7995
 		UNREACHABLE;
 	    }
 	}
+	if (NIL_P(buf)) {
+	    if (p == e) {
+		ENC_CODERANGE_SET(str, ENC_CODERANGE_VALID);
+		return Qnil;
+	    }
+	    buf = rb_str_buf_new(RSTRING_LEN(str));
+	}
 	if (p1 < p) {
 	    rb_str_buf_cat(buf, p1, p - p1);
 	}
@@ -8016,6 +8022,27 @@ rb_str_scrub(int argc, VALUE *argv, VALU https://github.com/ruby/ruby/blob/trunk/string.c#L8022
 
 /*
  *  call-seq:
+ *    str.scrub -> new_str
+ *    str.scrub(repl) -> new_str
+ *    str.scrub{|bytes|} -> new_str
+ *
+ *  If the string is invalid byte sequence then replace invalid bytes with given replacement
+ *  character, else returns self.
+ *  If block is given, replace invalid bytes with returned value of the block.
+ *
+ *     "abc\u3042\x81".scrub #=> "abc\u3042\uFFFD"
+ *     "abc\u3042\x81".scrub("*") #=> "abc\u3042*"
+ *     "abc\u3042\xE3\x80".scrub{|bytes| '<'+bytes.unpack('H*')[0]+'>' } #=> "abc\u3042<e380>"
+ */
+VALUE
+rb_str_scrub(int argc, VALUE *argv, VALUE str)
+{
+    VALUE new = str_scrub0(argc, argv, str);
+    return NIL_P(new) ? rb_str_dup(str): new;
+}
+
+/*
+ *  call-seq:
  *    str.scrub! -> str
  *    str.scrub!(repl) -> str
  *    str.scrub!{|bytes|} -> str
@@ -8028,11 +8055,11 @@ rb_str_scrub(int argc, VALUE *argv, VALU https://github.com/ruby/ruby/blob/trunk/string.c#L8055
  *     "abc\u3042\x81".scrub!("*") #=> "abc\u3042*"
  *     "abc\u3042\xE3\x80".scrub!{|bytes| '<'+bytes.unpack('H*')[0]+'>' } #=> "abc\u3042<e380>"
  */
-VALUE
-rb_str_scrub_bang(int argc, VALUE *argv, VALUE str)
+static VALUE
+str_scrub_bang(int argc, VALUE *argv, VALUE str)
 {
-    VALUE new = rb_str_scrub(argc, argv, str);
-    rb_str_replace(str, new);
+    VALUE new = str_scrub0(argc, argv, str);
+    if (!NIL_P(new)) rb_str_replace(str, new);
     return str;
 }
 
@@ -8522,7 +8549,7 @@ Init_String(void) https://github.com/ruby/ruby/blob/trunk/string.c#L8549
     rb_define_method(rb_cString, "setbyte", rb_str_setbyte, 2);
     rb_define_method(rb_cString, "byteslice", rb_str_byteslice, -1);
     rb_define_method(rb_cString, "scrub", rb_str_scrub, -1);
-    rb_define_method(rb_cString, "scrub!", rb_str_scrub_bang, -1);
+    rb_define_method(rb_cString, "scrub!", str_scrub_bang, -1);
 
     rb_define_method(rb_cString, "to_i", rb_str_to_i, -1);
     rb_define_method(rb_cString, "to_f", rb_str_to_f, 0);

--
ML: ruby-changes@q...
Info: http://www.atdot.net/~ko1/quickml/

[前][次][番号順一覧][スレッド一覧]