[前][次][番号順一覧][スレッド一覧]

ruby-changes:41811

From: nobu <ko1@a...>
Date: Sun, 21 Feb 2016 13:57:24 +0900 (JST)
Subject: [ruby-changes:41811] nobu:r53885 (trunk): cgi/escape: Optimize CGI.unescape

nobu	2016-02-21 13:57:57 +0900 (Sun, 21 Feb 2016)

  New Revision: 53885

  https://svn.ruby-lang.org/cgi-bin/viewvc.cgi?view=revision&revision=53885

  Log:
    cgi/escape: Optimize CGI.unescape
    
    * cgi/escape/escape.c: Optimize CGI.unescape performance by C ext
      for ASCII-compatible encodings.  [Fix GH-1250]

  Modified files:
    trunk/ChangeLog
    trunk/ext/cgi/escape/escape.c
    trunk/lib/cgi/util.rb
Index: lib/cgi/util.rb
===================================================================
--- lib/cgi/util.rb	(revision 53884)
+++ lib/cgi/util.rb	(revision 53885)
@@ -16,6 +16,10 @@ module CGI::Util https://github.com/ruby/ruby/blob/trunk/lib/cgi/util.rb#L16
   #   string = CGI::unescape("%27Stop%21%27+said+Fred")
   #      # => "'Stop!' said Fred"
   def unescape(string,encoding=@@accept_charset)
+    _unescape(string,encoding)
+  end
+
+  private def _unescape(string, encoding)
     str=string.tr('+', ' ').b.gsub(/((?:%[0-9a-fA-F]{2})+)/) do |m|
       [m.delete('%')].pack('H*')
     end.force_encoding(encoding)
Index: ChangeLog
===================================================================
--- ChangeLog	(revision 53884)
+++ ChangeLog	(revision 53885)
@@ -1,3 +1,8 @@ https://github.com/ruby/ruby/blob/trunk/ChangeLog#L1
+Sun Feb 21 13:57:18 2016  Nobuyoshi Nakada  <nobu@r...>
+
+	* cgi/escape/escape.c: Optimize CGI.unescape performance by C ext
+	  for ASCII-compatible encodings.  [Fix GH-1250]
+
 Sun Feb 21 13:56:57 2016  Nobuyoshi Nakada  <nobu@r...>
 
 	* cgi/escape/escape.c: Optimize CGI.unescapeHTML performance by C
Index: ext/cgi/escape/escape.c
===================================================================
--- ext/cgi/escape/escape.c	(revision 53884)
+++ ext/cgi/escape/escape.c	(revision 53885)
@@ -3,8 +3,10 @@ https://github.com/ruby/ruby/blob/trunk/ext/cgi/escape/escape.c#L3
 
 RUBY_EXTERN unsigned long ruby_scan_digits(const char *str, ssize_t len, int base, size_t *retlen, int *overflow);
 RUBY_EXTERN const char ruby_hexdigits[];
+RUBY_EXTERN const signed char ruby_digit36_to_number_table[];
 #define lower_hexdigits (ruby_hexdigits+0)
 #define upper_hexdigits (ruby_hexdigits+16)
+#define char_to_number(c) ruby_digit36_to_number_table[(unsigned char)(c)]
 
 static VALUE rb_cCGI, rb_mUtil, rb_mEscape;
 
@@ -245,6 +247,68 @@ optimized_escape(VALUE str) https://github.com/ruby/ruby/blob/trunk/ext/cgi/escape/escape.c#L247
     }
 }
 
+static VALUE
+optimized_unescape(VALUE str, VALUE encoding)
+{
+    long i, len, beg = 0;
+    VALUE dest = 0;
+    const char *cstr;
+    int cr, origenc, encidx = rb_to_encoding_index(encoding);
+
+    len  = RSTRING_LEN(str);
+    cstr = RSTRING_PTR(str);
+
+    for (i = 0; i < len; ++i) {
+	char buf[1];
+	const char c = cstr[i];
+	int clen = 0;
+	if (c == '%') {
+	    if (i + 3 > len) break;
+	    if (!ISXDIGIT(cstr[i+1])) continue;
+	    if (!ISXDIGIT(cstr[i+2])) continue;
+	    buf[0] = ((char_to_number(cstr[i+1]) << 4)
+		      | char_to_number(cstr[i+2]));
+	    clen = 2;
+	}
+	else if (c == '+') {
+	    buf[0] = ' ';
+	}
+	else {
+	    continue;
+	}
+
+	if (!dest) {
+	    dest = rb_str_buf_new(len);
+	}
+
+	rb_str_cat(dest, cstr + beg, i - beg);
+	i += clen;
+	beg = i + 1;
+
+	rb_str_cat(dest, buf, 1);
+    }
+
+    if (dest) {
+	rb_str_cat(dest, cstr + beg, len - beg);
+	preserve_original_state(str, dest);
+	cr = ENC_CODERANGE_UNKNOWN;
+    }
+    else {
+	dest = rb_str_dup(str);
+	cr = ENC_CODERANGE(str);
+    }
+    origenc = rb_enc_get_index(str);
+    if (origenc != encidx) {
+	rb_enc_associate_index(dest, encidx);
+	if (!ENC_CODERANGE_CLEAN_P(rb_enc_str_coderange(dest))) {
+	    rb_enc_associate_index(dest, origenc);
+	    if (cr != ENC_CODERANGE_UNKNOWN)
+		ENC_CODERANGE_SET(dest, cr);
+	}
+    }
+    return dest;
+}
+
 /*
  *  call-seq:
  *     CGI.escapeHTML(string) -> string
@@ -305,6 +369,20 @@ cgiesc_escape(VALUE self, VALUE str) https://github.com/ruby/ruby/blob/trunk/ext/cgi/escape/escape.c#L369
     }
 }
 
+/* :nodoc: */
+static VALUE
+cgiesc_unescape(VALUE self, VALUE str, VALUE enc)
+{
+    StringValue(str);
+
+    if (rb_enc_str_asciicompat_p(str)) {
+	return optimized_unescape(str, enc);
+    }
+    else {
+	return rb_call_super(1, &str);
+    }
+}
+
 void
 Init_escape(void)
 {
@@ -314,6 +392,7 @@ Init_escape(void) https://github.com/ruby/ruby/blob/trunk/ext/cgi/escape/escape.c#L392
     rb_define_method(rb_mEscape, "escapeHTML", cgiesc_escape_html, 1);
     rb_define_method(rb_mEscape, "unescapeHTML", cgiesc_unescape_html, 1);
     rb_define_method(rb_mEscape, "escape", cgiesc_escape, 1);
+    rb_define_private_method(rb_mEscape, "_unescape", cgiesc_unescape, 2);
     rb_prepend_module(rb_mUtil, rb_mEscape);
     rb_extend_object(rb_cCGI, rb_mEscape);
 }

--
ML: ruby-changes@q...
Info: http://www.atdot.net/~ko1/quickml/

[前][次][番号順一覧][スレッド一覧]