[前][次][番号順一覧][スレッド一覧]

ruby-changes:66236

From: Jean <ko1@a...>
Date: Mon, 17 May 2021 20:16:07 +0900 (JST)
Subject: [ruby-changes:66236] 2de594ca98 (master): [flori/json] Deduplicate strings inside json_string_unescape

https://git.ruby-lang.org/ruby.git/commit/?id=2de594ca98

From 2de594ca98d95e62f7fcf000f21e174ac3f6fcaf Mon Sep 17 00:00:00 2001
From: Jean Boussier <jean.boussier@g...>
Date: Wed, 18 Nov 2020 11:59:27 +0100
Subject: [flori/json] Deduplicate strings inside json_string_unescape

[ci 2]

https://github.com/flori/json/commit/1982070cb8
---
 ext/json/parser/extconf.rb |  1 +
 ext/json/parser/parser.c   | 84 +++++++++++++++++++++++++---------------------
 ext/json/parser/parser.h   |  2 +-
 ext/json/parser/parser.rl  | 70 ++++++++++++++++++++------------------
 4 files changed, 85 insertions(+), 72 deletions(-)

diff --git a/ext/json/parser/extconf.rb b/ext/json/parser/extconf.rb
index f832b56..feb586e 100644
--- a/ext/json/parser/extconf.rb
+++ b/ext/json/parser/extconf.rb
@@ -2,6 +2,7 @@ https://github.com/ruby/ruby/blob/trunk/ext/json/parser/extconf.rb#L2
 require 'mkmf'
 
 have_func("rb_enc_raise", "ruby.h")
+have_func("rb_enc_interned_str", "ruby.h")
 
 # checking if String#-@ (str_uminus) dedupes... '
 begin
diff --git a/ext/json/parser/parser.c b/ext/json/parser/parser.c
index 241ec0d..a15d20a 100644
--- a/ext/json/parser/parser.c
+++ b/ext/json/parser/parser.c
@@ -2354,7 +2354,7 @@ static char *JSON_parse_array(JSON_Parser *json, char *p, char *pe, VALUE *resul https://github.com/ruby/ruby/blob/trunk/ext/json/parser/parser.c#L2354
 }
 
 static const size_t MAX_STACK_BUFFER_SIZE = 128;
-static VALUE json_string_unescape(char *string, char *stringEnd)
+static VALUE json_string_unescape(char *string, char *stringEnd, int intern, int symbolize)
 {
 	VALUE result = Qnil;
 	size_t bufferSize = stringEnd - string;
@@ -2363,10 +2363,9 @@ static VALUE json_string_unescape(char *string, char *stringEnd) https://github.com/ruby/ruby/blob/trunk/ext/json/parser/parser.c#L2363
 	char buf[4];
 
 	if (bufferSize > MAX_STACK_BUFFER_SIZE) {
-		buffer = xmalloc(bufferSize);
-		bufferStart = buffer;
+		bufferStart = buffer = ALLOC_N(char, bufferSize);
 	} else {
-		bufferStart = buffer = alloca(bufferSize);
+		bufferStart = buffer = ALLOCA_N(char, bufferSize);
 	}
 
 	while (pe < stringEnd) {
@@ -2453,15 +2452,42 @@ static VALUE json_string_unescape(char *string, char *stringEnd) https://github.com/ruby/ruby/blob/trunk/ext/json/parser/parser.c#L2452
 		buffer += pe - p;
 	}
 
-	#ifdef HAVE_RUBY_ENCODING_H
-	result = rb_utf8_str_new(bufferStart, buffer - bufferStart);
-	#else
-	result = rb_str_new(bufferStart, buffer - bufferStart);
-	#endif
+	# ifdef HAVE_RB_ENC_INTERNED_STR
+	if (intern) {
+		result = rb_enc_interned_str(bufferStart, (long)(buffer - bufferStart), rb_utf8_encoding());
+	} else {
+		result = rb_utf8_str_new(bufferStart, (long)(buffer - bufferStart));
+	}
+	if (bufferSize > MAX_STACK_BUFFER_SIZE) {
+		free(bufferStart);
+	}
+	# else
+	result = rb_utf8_str_new(bufferStart, (long)(buffer - bufferStart));
 
 	if (bufferSize > MAX_STACK_BUFFER_SIZE) {
 		free(bufferStart);
 	}
+
+	if (intern) {
+		# if STR_UMINUS_DEDUPE_FROZEN
+		// Starting from MRI 2.8 it is preferable to freeze the string
+		// before deduplication so that it can be interned directly
+		// otherwise it would be duplicated first which is wasteful.
+		result = rb_funcall(rb_str_freeze(result), i_uminus, 0);
+		# elif STR_UMINUS_DEDUPE
+		// MRI 2.5 and older do not deduplicate strings that are already
+		// frozen.
+		result = rb_funcall(result, i_uminus, 0);
+		# else
+		result = rb_str_freeze(result);
+		# endif
+	}
+	# endif
+
+	if (symbolize) {
+		result = rb_str_intern(result);
+	}
+
 	return result;
 }
 
@@ -2490,7 +2516,7 @@ static const char _JSON_string_nfa_pop_trans[] = { https://github.com/ruby/ruby/blob/trunk/ext/json/parser/parser.c#L2516
 };
 
 
-#line 586 "parser.rl"
+#line 612 "parser.rl"
 
 
 static int
@@ -2515,7 +2541,7 @@ static char *JSON_parse_string(JSON_Parser *json, char *p, char *pe, VALUE *resu https://github.com/ruby/ruby/blob/trunk/ext/json/parser/parser.c#L2541
 		cs = (int)JSON_string_start;
 	}
 
-	#line 606 "parser.rl"
+	#line 632 "parser.rl"
 
 	json->memo = p;
 
@@ -2576,9 +2602,9 @@ static char *JSON_parse_string(JSON_Parser *json, char *p, char *pe, VALUE *resu https://github.com/ruby/ruby/blob/trunk/ext/json/parser/parser.c#L2602
 		}
 		ctr2:
 		{
-			#line 573 "parser.rl"
+			#line 599 "parser.rl"
 
-			*result = json_string_unescape(json->memo + 1, p);
+			*result = json_string_unescape(json->memo + 1, p, json->parsing_name || json-> freeze, json->parsing_name && json->symbolize_names);
 			if (NIL_P(*result)) {
 				{p = p - 1; }
 				{p+= 1; cs = 8; goto _out;}
@@ -2588,7 +2614,7 @@ static char *JSON_parse_string(JSON_Parser *json, char *p, char *pe, VALUE *resu https://github.com/ruby/ruby/blob/trunk/ext/json/parser/parser.c#L2614
 			}
 		}
 		{
-			#line 583 "parser.rl"
+			#line 609 "parser.rl"
 			{p = p - 1; } {p+= 1; cs = 8; goto _out;} }
 
 		goto st8;
@@ -2703,7 +2729,7 @@ static char *JSON_parse_string(JSON_Parser *json, char *p, char *pe, VALUE *resu https://github.com/ruby/ruby/blob/trunk/ext/json/parser/parser.c#L2729
 		_out: {}
 	}
 
-	#line 608 "parser.rl"
+	#line 634 "parser.rl"
 
 
 	if (json->create_additions && RTEST(match_string = json->match_string)) {
@@ -2717,26 +2743,6 @@ static char *JSON_parse_string(JSON_Parser *json, char *p, char *pe, VALUE *resu https://github.com/ruby/ruby/blob/trunk/ext/json/parser/parser.c#L2743
 		}
 	}
 
-	if (json->symbolize_names && json->parsing_name) {
-		*result = rb_str_intern(*result);
-	} else if (RB_TYPE_P(*result, T_STRING)) {
-		# if STR_UMINUS_DEDUPE_FROZEN
-		if (json->freeze) {
-			// Starting from MRI 2.8 it is preferable to freeze the string
-			// before deduplication so that it can be interned directly
-			// otherwise it would be duplicated first which is wasteful.
-			*result = rb_funcall(rb_str_freeze(*result), i_uminus, 0);
-		}
-		# elif STR_UMINUS_DEDUPE
-		if (json->freeze) {
-			// MRI 2.5 and older do not deduplicate strings that are already
-			// frozen.
-			*result = rb_funcall(*result, i_uminus, 0);
-		}
-		# else
-		rb_str_resize(*result, RSTRING_LEN(*result));
-		# endif
-	}
 	if (cs >= JSON_string_first_final) {
 		return p + 1;
 	} else {
@@ -2936,7 +2942,7 @@ static const char _JSON_nfa_pop_trans[] = { https://github.com/ruby/ruby/blob/trunk/ext/json/parser/parser.c#L2942
 };
 
 
-#line 829 "parser.rl"
+#line 835 "parser.rl"
 
 
 /*
@@ -2957,7 +2963,7 @@ static VALUE cParser_parse(VALUE self) https://github.com/ruby/ruby/blob/trunk/ext/json/parser/parser.c#L2963
 		cs = (int)JSON_start;
 	}
 
-	#line 845 "parser.rl"
+	#line 851 "parser.rl"
 
 	p = json->source;
 	pe = p + json->len;
@@ -3050,7 +3056,7 @@ static VALUE cParser_parse(VALUE self) https://github.com/ruby/ruby/blob/trunk/ext/json/parser/parser.c#L3056
 		goto _out;
 		ctr2:
 		{
-			#line 821 "parser.rl"
+			#line 827 "parser.rl"
 
 			char *np = JSON_parse_value(json, p, pe, &result, 0);
 			if (np == NULL) { {p = p - 1; } {p+= 1; cs = 10; goto _out;} } else {p = (( np))-1;}
@@ -3204,7 +3210,7 @@ static VALUE cParser_parse(VALUE self) https://github.com/ruby/ruby/blob/trunk/ext/json/parser/parser.c#L3210
 		_out: {}
 	}
 
-	#line 848 "parser.rl"
+	#line 854 "parser.rl"
 
 
 	if (cs >= JSON_first_final && p == pe) {
diff --git a/ext/json/parser/parser.h b/ext/json/parser/parser.h
index d2b074d..0992bbf 100644
--- a/ext/json/parser/parser.h
+++ b/ext/json/parser/parser.h
@@ -63,7 +63,7 @@ static char *JSON_parse_value(JSON_Parser *json, char *p, char *pe, VALUE *resul https://github.com/ruby/ruby/blob/trunk/ext/json/parser/parser.h#L63
 static char *JSON_parse_integer(JSON_Parser *json, char *p, char *pe, VALUE *result);
 static char *JSON_parse_float(JSON_Parser *json, char *p, char *pe, VALUE *result);
 static char *JSON_parse_array(JSON_Parser *json, char *p, char *pe, VALUE *result, int current_nesting);
-static VALUE json_string_unescape(char *string, char *stringEnd);
+static VALUE json_string_unescape(char *string, char *stringEnd, int intern, int symbolize);
 static char *JSON_parse_string(JSON_Parser *json, char *p, char *pe, VALUE *result);
 static VALUE convert_encoding(VALUE source);
 static VALUE cParser_initialize(int argc, VALUE *argv, VALUE self);
diff --git a/ext/json/parser/parser.rl b/ext/json/parser/parser.rl
index 230d725..f7be1a5 100644
--- a/ext/json/parser/parser.rl
+++ b/ext/json/parser/parser.rl
@@ -453,7 +453,7 @@ static char *JSON_parse_array(JSON_Parser *json, char *p, char *pe, VALUE *resul https://github.com/ruby/ruby/blob/trunk/ext/json/parser/parser.rl#L453
 }
 
 static const size_t MAX_STACK_BUFFER_SIZE = 128;
-static VALUE json_string_unescape(char *string, char *stringEnd)
+static VALUE json_string_unescape(char *string, char *stringEnd, int intern, int symbolize)
 {
     VALUE result = Qnil;
     size_t bufferSize = stringEnd - string;
@@ -462,10 +462,9 @@ static VALUE json_string_unescape(char *string, char *stringEnd) https://github.com/ruby/ruby/blob/trunk/ext/json/parser/parser.rl#L462
     char buf[4];
 
     if (bufferSize > MAX_STACK_BUFFER_SIZE) {
-      buffer = xmalloc(bufferSize);
-      bufferStart = buffer;
+      bufferStart = buffer = ALLOC_N(char, bufferSize);
     } else {
-      bufferStart = buffer = alloca(bufferSize);
+      bufferStart = buffer = ALLOCA_N(char, bufferSize);
     }
 
     while (pe < stringEnd) {
@@ -552,15 +551,42 @@ static VALUE json_string_unescape(char *string, char *stringEnd) https://github.com/ruby/ruby/blob/trunk/ext/json/parser/parser.rl#L551
       buffer += pe - p;
     }
 
-    #ifdef HAVE_RUBY_ENCODING_H
-      result = rb_utf8_str_new(bufferStart, buffer - bufferStart);
-    #else
-      result = rb_str_new(bufferStart, buffer - bufferStart);
-    #endif
+# ifdef HAVE_RB_ENC_INTERNED_STR
+      if (intern) {
+        result = rb_enc_interned_str(bufferStart, (long)(buffer - bufferStart), rb_utf8_encoding());
+      } else {
+        result = rb_utf8_str_new(bufferStart, (long)(buffer - bufferStart));
+      }
+      if (bufferSize > MAX_STACK_BUFFER_SIZE) {
+        free(bufferStart);
+      }
+# else
+      result = rb_ut (... truncated)

--
ML: ruby-changes@q...
Info: http://www.atdot.net/~ko1/quickml/

[前][次][番号順一覧][スレッド一覧]