ruby-changes:56009
From: Takashi <ko1@a...>
Date: Wed, 5 Jun 2019 11:02:06 +0900 (JST)
Subject: [ruby-changes:56009] Takashi Kokubun: 71b14affc6 (trunk): Revert "Optimize CGI.escapeHTML by reducing buffer extension"
https://git.ruby-lang.org/ruby.git/commit/?id=71b14affc6 From 71b14affc6b699f38aabe73125380cab57799e34 Mon Sep 17 00:00:00 2001 From: Takashi Kokubun <takashikkbn@g...> Date: Wed, 5 Jun 2019 11:00:54 +0900 Subject: Revert "Optimize CGI.escapeHTML by reducing buffer extension" This reverts commit 8d81e59aa7a62652caf85f9c8db371703668c149. `ALLOCA_N` does not check stack overflow unlike ALLOCV. I'll fix it and re-commit it again. diff --git a/benchmark/cgi_escape_html.yml b/benchmark/cgi_escape_html.yml deleted file mode 100644 index af6abd0..0000000 --- a/benchmark/cgi_escape_html.yml +++ /dev/null @@ -1,40 +0,0 @@ https://github.com/ruby/ruby/blob/trunk/#L0 -prelude: require 'cgi/escape' -benchmark: - - name: escape_html_blank - prelude: str = "" - script: CGI.escapeHTML(str) - loop_count: 20000000 - - name: escape_html_short_none - prelude: str = "abcde" - script: CGI.escapeHTML(str) - loop_count: 20000000 - - name: escape_html_short_one - prelude: str = "abcd<" - script: CGI.escapeHTML(str) - loop_count: 20000000 - - name: escape_html_short_all - prelude: str = "'&\"<>" - script: CGI.escapeHTML(str) - loop_count: 5000000 - - name: escape_html_long_none - prelude: str = "abcde" * 300 - script: CGI.escapeHTML(str) - loop_count: 1000000 - - name: escape_html_long_all - prelude: str = "'&\"<>" * 10 - script: CGI.escapeHTML(str) - loop_count: 1000000 - - name: escape_html_real - prelude: | # http://example.com/ - str = <<~HTML - <body> - <div> - <h1>Example Domain</h1> - <p>This domain is established to be used for illustrative examples in documents. You may use this - domain in examples without prior coordination or asking for permission.</p> - <p><a href="http://www.iana.org/domains/example">More information...</a></p> - </div> - </body> - HTML - script: CGI.escapeHTML(str) - loop_count: 1000000 diff --git a/ext/cgi/escape/escape.c b/ext/cgi/escape/escape.c index 9b64c35..ced1b18 100644 --- a/ext/cgi/escape/escape.c +++ b/ext/cgi/escape/escape.c @@ -11,20 +11,27 @@ RUBY_EXTERN const signed char ruby_digit36_to_number_table[]; https://github.com/ruby/ruby/blob/trunk/ext/cgi/escape/escape.c#L11 static VALUE rb_cCGI, rb_mUtil, rb_mEscape; static ID id_accept_charset; -#define HTML_ESCAPE_MAX_LEN 6 - -static const struct { - uint8_t len; - char str[HTML_ESCAPE_MAX_LEN+1]; -} html_escape_table[UCHAR_MAX+1] = { -#define HTML_ESCAPE(c, str) [c] = {rb_strlen_lit(str), str} - HTML_ESCAPE('\'', "'"), - HTML_ESCAPE('&', "&"), - HTML_ESCAPE('"', """), - HTML_ESCAPE('<', "<"), - HTML_ESCAPE('>', ">"), -#undef HTML_ESCAPE -}; +static void +html_escaped_cat(VALUE str, char c) +{ + switch (c) { + case '\'': + rb_str_cat_cstr(str, "'"); + break; + case '&': + rb_str_cat_cstr(str, "&"); + break; + case '"': + rb_str_cat_cstr(str, """); + break; + case '<': + rb_str_cat_cstr(str, "<"); + break; + case '>': + rb_str_cat_cstr(str, ">"); + break; + } +} static inline void preserve_original_state(VALUE orig, VALUE dest) @@ -37,27 +44,36 @@ preserve_original_state(VALUE orig, VALUE dest) https://github.com/ruby/ruby/blob/trunk/ext/cgi/escape/escape.c#L44 static VALUE optimized_escape_html(VALUE str) { - const char *cstr = RSTRING_PTR(str); - const char *end = cstr + RSTRING_LEN(str); - char *buf = ALLOCA_N(char, RSTRING_LEN(str) * HTML_ESCAPE_MAX_LEN); - - char *dest = buf; - while (cstr < end) { - const unsigned char c = *cstr++; - uint8_t len = html_escape_table[c].len; - if (len) { - memcpy(dest, html_escape_table[c].str, len); - dest += len; - } - else { - *dest++ = c; - } + long i, len, beg = 0; + VALUE dest = 0; + const char *cstr; + + len = RSTRING_LEN(str); + cstr = RSTRING_PTR(str); + + for (i = 0; i < len; i++) { + switch (cstr[i]) { + case '\'': + case '&': + case '"': + case '<': + case '>': + if (!dest) { + dest = rb_str_buf_new(len); + } + + rb_str_cat(dest, cstr + beg, i - beg); + beg = i + 1; + + html_escaped_cat(dest, cstr[i]); + break; + } } - if (RSTRING_LEN(str) < (dest - buf)) { - VALUE escaped = rb_str_new(buf, dest - buf); - preserve_original_state(str, escaped); - return escaped; + if (dest) { + rb_str_cat(dest, cstr + beg, len - beg); + preserve_original_state(str, dest); + return dest; } else { return rb_str_dup(str); -- cgit v0.10.2 -- ML: ruby-changes@q... Info: http://www.atdot.net/~ko1/quickml/