[前][次][番号順一覧][スレッド一覧]

ruby-changes:56009

From: Takashi <ko1@a...>
Date: Wed, 5 Jun 2019 11:02:06 +0900 (JST)
Subject: [ruby-changes:56009] Takashi Kokubun: 71b14affc6 (trunk): Revert "Optimize CGI.escapeHTML by reducing buffer extension"

https://git.ruby-lang.org/ruby.git/commit/?id=71b14affc6

From 71b14affc6b699f38aabe73125380cab57799e34 Mon Sep 17 00:00:00 2001
From: Takashi Kokubun <takashikkbn@g...>
Date: Wed, 5 Jun 2019 11:00:54 +0900
Subject: Revert "Optimize CGI.escapeHTML by reducing buffer extension"

This reverts commit 8d81e59aa7a62652caf85f9c8db371703668c149.

`ALLOCA_N` does not check stack overflow unlike ALLOCV. I'll fix it and
re-commit it again.

diff --git a/benchmark/cgi_escape_html.yml b/benchmark/cgi_escape_html.yml
deleted file mode 100644
index af6abd0..0000000
--- a/benchmark/cgi_escape_html.yml
+++ /dev/null
@@ -1,40 +0,0 @@ https://github.com/ruby/ruby/blob/trunk/#L0
-prelude: require 'cgi/escape'
-benchmark:
-  - name: escape_html_blank
-    prelude: str = ""
-    script: CGI.escapeHTML(str)
-    loop_count: 20000000
-  - name: escape_html_short_none
-    prelude: str = "abcde"
-    script: CGI.escapeHTML(str)
-    loop_count: 20000000
-  - name: escape_html_short_one
-    prelude: str = "abcd<"
-    script: CGI.escapeHTML(str)
-    loop_count: 20000000
-  - name: escape_html_short_all
-    prelude: str = "'&\"<>"
-    script: CGI.escapeHTML(str)
-    loop_count: 5000000
-  - name: escape_html_long_none
-    prelude: str = "abcde" * 300
-    script: CGI.escapeHTML(str)
-    loop_count: 1000000
-  - name: escape_html_long_all
-    prelude: str = "'&\"<>" * 10
-    script: CGI.escapeHTML(str)
-    loop_count: 1000000
-  - name: escape_html_real
-    prelude: | # http://example.com/
-      str = <<~HTML
-        <body>
-        <div>
-            <h1>Example Domain</h1>
-            <p>This domain is established to be used for illustrative examples in documents. You may use this
-            domain in examples without prior coordination or asking for permission.</p>
-            <p><a href="http://www.iana.org/domains/example">More information...</a></p>
-        </div>
-        </body>
-      HTML
-    script: CGI.escapeHTML(str)
-    loop_count: 1000000
diff --git a/ext/cgi/escape/escape.c b/ext/cgi/escape/escape.c
index 9b64c35..ced1b18 100644
--- a/ext/cgi/escape/escape.c
+++ b/ext/cgi/escape/escape.c
@@ -11,20 +11,27 @@ RUBY_EXTERN const signed char ruby_digit36_to_number_table[]; https://github.com/ruby/ruby/blob/trunk/ext/cgi/escape/escape.c#L11
 static VALUE rb_cCGI, rb_mUtil, rb_mEscape;
 static ID id_accept_charset;
 
-#define HTML_ESCAPE_MAX_LEN 6
-
-static const struct {
-    uint8_t len;
-    char str[HTML_ESCAPE_MAX_LEN+1];
-} html_escape_table[UCHAR_MAX+1] = {
-#define HTML_ESCAPE(c, str) [c] = {rb_strlen_lit(str), str}
-    HTML_ESCAPE('\'', "&#39;"),
-    HTML_ESCAPE('&', "&amp;"),
-    HTML_ESCAPE('"', "&quot;"),
-    HTML_ESCAPE('<', "&lt;"),
-    HTML_ESCAPE('>', "&gt;"),
-#undef HTML_ESCAPE
-};
+static void
+html_escaped_cat(VALUE str, char c)
+{
+    switch (c) {
+      case '\'':
+	rb_str_cat_cstr(str, "&#39;");
+	break;
+      case '&':
+	rb_str_cat_cstr(str, "&amp;");
+	break;
+      case '"':
+	rb_str_cat_cstr(str, "&quot;");
+	break;
+      case '<':
+	rb_str_cat_cstr(str, "&lt;");
+	break;
+      case '>':
+	rb_str_cat_cstr(str, "&gt;");
+	break;
+    }
+}
 
 static inline void
 preserve_original_state(VALUE orig, VALUE dest)
@@ -37,27 +44,36 @@ preserve_original_state(VALUE orig, VALUE dest) https://github.com/ruby/ruby/blob/trunk/ext/cgi/escape/escape.c#L44
 static VALUE
 optimized_escape_html(VALUE str)
 {
-    const char *cstr = RSTRING_PTR(str);
-    const char *end = cstr + RSTRING_LEN(str);
-    char *buf = ALLOCA_N(char, RSTRING_LEN(str) * HTML_ESCAPE_MAX_LEN);
-
-    char *dest = buf;
-    while (cstr < end) {
-        const unsigned char c = *cstr++;
-        uint8_t len = html_escape_table[c].len;
-        if (len) {
-            memcpy(dest, html_escape_table[c].str, len);
-            dest += len;
-        }
-        else {
-            *dest++ = c;
-        }
+    long i, len, beg = 0;
+    VALUE dest = 0;
+    const char *cstr;
+
+    len  = RSTRING_LEN(str);
+    cstr = RSTRING_PTR(str);
+
+    for (i = 0; i < len; i++) {
+	switch (cstr[i]) {
+	  case '\'':
+	  case '&':
+	  case '"':
+	  case '<':
+	  case '>':
+	    if (!dest) {
+		dest = rb_str_buf_new(len);
+	    }
+
+	    rb_str_cat(dest, cstr + beg, i - beg);
+	    beg = i + 1;
+
+	    html_escaped_cat(dest, cstr[i]);
+	    break;
+	}
     }
 
-    if (RSTRING_LEN(str) < (dest - buf)) {
-        VALUE escaped = rb_str_new(buf, dest - buf);
-        preserve_original_state(str, escaped);
-        return escaped;
+    if (dest) {
+	rb_str_cat(dest, cstr + beg, len - beg);
+	preserve_original_state(str, dest);
+	return dest;
     }
     else {
 	return rb_str_dup(str);
-- 
cgit v0.10.2


--
ML: ruby-changes@q...
Info: http://www.atdot.net/~ko1/quickml/

[前][次][番号順一覧][スレッド一覧]