ruby-changes:41811
From: nobu <ko1@a...>
Date: Sun, 21 Feb 2016 13:57:24 +0900 (JST)
Subject: [ruby-changes:41811] nobu:r53885 (trunk): cgi/escape: Optimize CGI.unescape
nobu 2016-02-21 13:57:57 +0900 (Sun, 21 Feb 2016) New Revision: 53885 https://svn.ruby-lang.org/cgi-bin/viewvc.cgi?view=revision&revision=53885 Log: cgi/escape: Optimize CGI.unescape * cgi/escape/escape.c: Optimize CGI.unescape performance by C ext for ASCII-compatible encodings. [Fix GH-1250] Modified files: trunk/ChangeLog trunk/ext/cgi/escape/escape.c trunk/lib/cgi/util.rb Index: lib/cgi/util.rb =================================================================== --- lib/cgi/util.rb (revision 53884) +++ lib/cgi/util.rb (revision 53885) @@ -16,6 +16,10 @@ module CGI::Util https://github.com/ruby/ruby/blob/trunk/lib/cgi/util.rb#L16 # string = CGI::unescape("%27Stop%21%27+said+Fred") # # => "'Stop!' said Fred" def unescape(string,encoding=@@accept_charset) + _unescape(string,encoding) + end + + private def _unescape(string, encoding) str=string.tr('+', ' ').b.gsub(/((?:%[0-9a-fA-F]{2})+)/) do |m| [m.delete('%')].pack('H*') end.force_encoding(encoding) Index: ChangeLog =================================================================== --- ChangeLog (revision 53884) +++ ChangeLog (revision 53885) @@ -1,3 +1,8 @@ https://github.com/ruby/ruby/blob/trunk/ChangeLog#L1 +Sun Feb 21 13:57:18 2016 Nobuyoshi Nakada <nobu@r...> + + * cgi/escape/escape.c: Optimize CGI.unescape performance by C ext + for ASCII-compatible encodings. [Fix GH-1250] + Sun Feb 21 13:56:57 2016 Nobuyoshi Nakada <nobu@r...> * cgi/escape/escape.c: Optimize CGI.unescapeHTML performance by C Index: ext/cgi/escape/escape.c =================================================================== --- ext/cgi/escape/escape.c (revision 53884) +++ ext/cgi/escape/escape.c (revision 53885) @@ -3,8 +3,10 @@ https://github.com/ruby/ruby/blob/trunk/ext/cgi/escape/escape.c#L3 RUBY_EXTERN unsigned long ruby_scan_digits(const char *str, ssize_t len, int base, size_t *retlen, int *overflow); RUBY_EXTERN const char ruby_hexdigits[]; +RUBY_EXTERN const signed char ruby_digit36_to_number_table[]; #define lower_hexdigits (ruby_hexdigits+0) #define upper_hexdigits (ruby_hexdigits+16) +#define char_to_number(c) ruby_digit36_to_number_table[(unsigned char)(c)] static VALUE rb_cCGI, rb_mUtil, rb_mEscape; @@ -245,6 +247,68 @@ optimized_escape(VALUE str) https://github.com/ruby/ruby/blob/trunk/ext/cgi/escape/escape.c#L247 } } +static VALUE +optimized_unescape(VALUE str, VALUE encoding) +{ + long i, len, beg = 0; + VALUE dest = 0; + const char *cstr; + int cr, origenc, encidx = rb_to_encoding_index(encoding); + + len = RSTRING_LEN(str); + cstr = RSTRING_PTR(str); + + for (i = 0; i < len; ++i) { + char buf[1]; + const char c = cstr[i]; + int clen = 0; + if (c == '%') { + if (i + 3 > len) break; + if (!ISXDIGIT(cstr[i+1])) continue; + if (!ISXDIGIT(cstr[i+2])) continue; + buf[0] = ((char_to_number(cstr[i+1]) << 4) + | char_to_number(cstr[i+2])); + clen = 2; + } + else if (c == '+') { + buf[0] = ' '; + } + else { + continue; + } + + if (!dest) { + dest = rb_str_buf_new(len); + } + + rb_str_cat(dest, cstr + beg, i - beg); + i += clen; + beg = i + 1; + + rb_str_cat(dest, buf, 1); + } + + if (dest) { + rb_str_cat(dest, cstr + beg, len - beg); + preserve_original_state(str, dest); + cr = ENC_CODERANGE_UNKNOWN; + } + else { + dest = rb_str_dup(str); + cr = ENC_CODERANGE(str); + } + origenc = rb_enc_get_index(str); + if (origenc != encidx) { + rb_enc_associate_index(dest, encidx); + if (!ENC_CODERANGE_CLEAN_P(rb_enc_str_coderange(dest))) { + rb_enc_associate_index(dest, origenc); + if (cr != ENC_CODERANGE_UNKNOWN) + ENC_CODERANGE_SET(dest, cr); + } + } + return dest; +} + /* * call-seq: * CGI.escapeHTML(string) -> string @@ -305,6 +369,20 @@ cgiesc_escape(VALUE self, VALUE str) https://github.com/ruby/ruby/blob/trunk/ext/cgi/escape/escape.c#L369 } } +/* :nodoc: */ +static VALUE +cgiesc_unescape(VALUE self, VALUE str, VALUE enc) +{ + StringValue(str); + + if (rb_enc_str_asciicompat_p(str)) { + return optimized_unescape(str, enc); + } + else { + return rb_call_super(1, &str); + } +} + void Init_escape(void) { @@ -314,6 +392,7 @@ Init_escape(void) https://github.com/ruby/ruby/blob/trunk/ext/cgi/escape/escape.c#L392 rb_define_method(rb_mEscape, "escapeHTML", cgiesc_escape_html, 1); rb_define_method(rb_mEscape, "unescapeHTML", cgiesc_unescape_html, 1); rb_define_method(rb_mEscape, "escape", cgiesc_escape, 1); + rb_define_private_method(rb_mEscape, "_unescape", cgiesc_unescape, 2); rb_prepend_module(rb_mUtil, rb_mEscape); rb_extend_object(rb_cCGI, rb_mEscape); } -- ML: ruby-changes@q... Info: http://www.atdot.net/~ko1/quickml/