[前][次][番号順一覧][スレッド一覧]

ruby-changes:47239

From: normal <ko1@a...>
Date: Tue, 18 Jul 2017 11:30:04 +0900 (JST)
Subject: [ruby-changes:47239] normal:r59354 (trunk): hash: keep fstrings of tainted strings for string keys

normal	2017-07-18 11:29:59 +0900 (Tue, 18 Jul 2017)

  New Revision: 59354

  https://svn.ruby-lang.org/cgi-bin/viewvc.cgi?view=revision&revision=59354

  Log:
    hash: keep fstrings of tainted strings for string keys
    
    The same hash keys may be loaded from tainted data sources
    frequently (e.g. parsing headers from socket or loading
    YAML data from a file).  If a non-tainted fstring already
    exists (because the application expects the hash key),
    cache and deduplicate the tainted version in the new
    tainted_frozen_strings table.
    
    For non-embedded strings, this also allows sharing with the
    underlying malloc-ed data.
    
    * vm_core.h (rb_vm_struct): add tainted_frozen_strings
    * vm.c (ruby_vm_destruct): free tainted_frozen_strings
      (Init_vm_objects): initialize tainted_frozen_strings
      (rb_vm_tfstring_table): accessor for tainted_frozen_strings
    * internal.h: declare rb_fstring_existing, rb_vm_tfstring_table
    * hash.c (fstring_existing_str): remove (moved to string.c)
      (hash_aset_str): use rb_fstring_existing
    * string.c (rb_fstring_existing): new, based on fstring_existing_str
      (tainted_fstr_update): new
      (rb_fstring_existing0): new, based on fstring_existing_str
      (rb_tainted_fstring_existing): new, special case for tainted strings
      (rb_str_free): delete from tainted_frozen_strings table
    * test/ruby/test_optimization.rb (test_hash_reuse_fstring): new test
      [ruby-core:82012] [Bug #13737]

  Modified files:
    trunk/hash.c
    trunk/internal.h
    trunk/string.c
    trunk/test/ruby/test_optimization.rb
    trunk/vm.c
    trunk/vm_core.h
Index: hash.c
===================================================================
--- hash.c	(revision 59353)
+++ hash.c	(revision 59354)
@@ -18,7 +18,6 @@ https://github.com/ruby/ruby/blob/trunk/hash.c#L18
 #include "probes.h"
 #include "id.h"
 #include "symbol.h"
-#include "gc.h"
 
 #ifdef __APPLE__
 # ifdef HAVE_CRT_EXTERNS_H
@@ -1516,33 +1515,13 @@ hash_aset(st_data_t *key, st_data_t *val https://github.com/ruby/ruby/blob/trunk/hash.c#L1515
     return ST_CONTINUE;
 }
 
-static VALUE
-fstring_existing_str(VALUE str)
-{
-    st_data_t fstr;
-    st_table *tbl = rb_vm_fstring_table();
-
-    if (st_lookup(tbl, str, &fstr)) {
-	if (rb_objspace_garbage_object_p(fstr)) {
-	    return rb_fstring(str);
-	}
-	else {
-	    return (VALUE)fstr;
-	}
-    }
-    else {
-	return Qnil;
-    }
-}
-
 static int
 hash_aset_str(st_data_t *key, st_data_t *val, struct update_arg *arg, int existing)
 {
     if (!existing && !RB_OBJ_FROZEN(*key)) {
 	VALUE k;
 
-	if (!RB_OBJ_TAINTED(*key) &&
-	    (k = fstring_existing_str(*key)) != Qnil) {
+	if ((k = rb_fstring_existing(*key)) != Qnil) {
 	    *key = k;
 	}
 	else {
Index: internal.h
===================================================================
--- internal.h	(revision 59353)
+++ internal.h	(revision 59354)
@@ -1574,6 +1574,7 @@ VALUE rb_strftime(const char *format, si https://github.com/ruby/ruby/blob/trunk/internal.h#L1574
 /* string.c */
 VALUE rb_fstring(VALUE);
 VALUE rb_fstring_new(const char *ptr, long len);
+VALUE rb_fstring_existing(VALUE);
 #define rb_fstring_lit(str) rb_fstring_new((str), rb_strlen_lit(str))
 #define rb_fstring_literal(str) rb_fstring_lit(str)
 VALUE rb_fstring_cstr(const char *str);
@@ -1726,7 +1727,7 @@ void rb_vm_check_redefinition_by_prepend https://github.com/ruby/ruby/blob/trunk/internal.h#L1727
 VALUE rb_yield_refine_block(VALUE refinement, VALUE refinements);
 VALUE ruby_vm_special_exception_copy(VALUE);
 PUREFUNC(st_table *rb_vm_fstring_table(void));
-
+PUREFUNC(st_table *rb_vm_tfstring_table(void));
 
 /* vm_dump.c */
 void rb_print_backtrace(void);
Index: vm.c
===================================================================
--- vm.c	(revision 59353)
+++ vm.c	(revision 59354)
@@ -2206,6 +2206,10 @@ ruby_vm_destruct(rb_vm_t *vm) https://github.com/ruby/ruby/blob/trunk/vm.c#L2206
 	    st_free_table(vm->frozen_strings);
 	    vm->frozen_strings = 0;
 	}
+	if (vm->tainted_frozen_strings) {
+	    st_free_table(vm->tainted_frozen_strings);
+	    vm->tainted_frozen_strings = 0;
+	}
 	rb_vm_gvl_destroy(vm);
 	if (objspace) {
 	    rb_objspace_free(objspace);
@@ -3142,6 +3146,8 @@ Init_vm_objects(void) https://github.com/ruby/ruby/blob/trunk/vm.c#L3146
     vm->mark_object_ary = rb_ary_tmp_new(128);
     vm->loading_table = st_init_strtable();
     vm->frozen_strings = st_init_table_with_size(&rb_fstring_hash_type, 1000);
+    vm->tainted_frozen_strings =
+		st_init_table_with_size(&rb_fstring_hash_type, 1000);
 }
 
 /* top self */
@@ -3203,6 +3209,12 @@ rb_vm_fstring_table(void) https://github.com/ruby/ruby/blob/trunk/vm.c#L3209
     return GET_VM()->frozen_strings;
 }
 
+st_table *
+rb_vm_tfstring_table(void)
+{
+    return GET_VM()->tainted_frozen_strings;
+}
+
 #if VM_COLLECT_USAGE_DETAILS
 
 #define HASH_ASET(h, k, v) rb_hash_aset((h), (st_data_t)(k), (st_data_t)(v))
Index: vm_core.h
===================================================================
--- vm_core.h	(revision 59353)
+++ vm_core.h	(revision 59354)
@@ -574,6 +574,7 @@ typedef struct rb_vm_struct { https://github.com/ruby/ruby/blob/trunk/vm_core.h#L574
 
     VALUE *defined_strings;
     st_table *frozen_strings;
+    st_table *tainted_frozen_strings;
 
     /* params */
     struct { /* size in byte */
Index: test/ruby/test_optimization.rb
===================================================================
--- test/ruby/test_optimization.rb	(revision 59353)
+++ test/ruby/test_optimization.rb	(revision 59354)
@@ -181,6 +181,37 @@ class TestRubyOptimization < Test::Unit: https://github.com/ruby/ruby/blob/trunk/test/ruby/test_optimization.rb#L181
     assert_redefine_method('Hash', 'empty?', 'assert_nil({}.empty?); assert_nil({1=>1}.empty?)')
   end
 
+  def test_hash_reuse_fstring
+    embed = -'h e l l o'
+    shared = -'this string is too long to be embedded and should be shared'
+    assert_not_equal GC::INTERNAL_CONSTANTS[:RVALUE_SIZE],
+                     ObjectSpace.memsize_of(shared),
+                     'update this test if string embedding changed'
+
+    [ embed, shared ].each do |exp|
+      key = exp.split(' ').join(' ')
+      h = {}
+      h[key] = true
+      assert_not_same key, h.keys[0]
+      assert_predicate h.keys[0], :frozen?
+      assert_same exp, h.keys[0]
+
+      h1 = {}
+      h2 = {}
+      key.taint
+      h1[key] = true
+      h2[key] = true
+      k1 = h1.keys[0]
+      k2 = h2.keys[0]
+      assert_same k1, k2
+      assert_predicate k1, :tainted?
+
+      assert_equal GC::INTERNAL_CONSTANTS[:RVALUE_SIZE],
+                   ObjectSpace.memsize_of(k1),
+                   'tainted string should share with untainted fstring'
+    end
+  end
+
   def test_hash_aref_with
     h = { "foo" => 1 }
     assert_equal 1, h["foo"]
Index: string.c
===================================================================
--- string.c	(revision 59353)
+++ string.c	(revision 59354)
@@ -349,6 +349,99 @@ register_fstring(VALUE str) https://github.com/ruby/ruby/blob/trunk/string.c#L349
     return ret;
 }
 
+static int
+tainted_fstr_update(st_data_t *key, st_data_t *val, st_data_t arg, int existing)
+{
+    VALUE *fstr = (VALUE *)arg;
+    VALUE str = (VALUE)*key;
+
+    if (existing) {
+	/* because of lazy sweep, str may be unmarked already and swept
+	 * at next time */
+	if (rb_objspace_garbage_object_p(str)) {
+	    *fstr = Qundef;
+	    return ST_DELETE;
+	}
+
+	*fstr = str;
+	return ST_STOP;
+    }
+    else {
+	str = rb_str_resurrect(str);
+	RB_OBJ_TAINT_RAW(str);
+	RB_FL_SET_RAW(str, RSTRING_FSTR);
+	RB_OBJ_FREEZE_RAW(str);
+
+	*key = *val = *fstr = str;
+	return ST_CONTINUE;
+    }
+}
+
+static VALUE
+rb_fstring_existing0(VALUE str)
+{
+    st_table *frozen_strings = rb_vm_fstring_table();
+    st_data_t fstr;
+
+    if (st_lookup(frozen_strings, str, &fstr)) {
+	if (rb_objspace_garbage_object_p(fstr)) {
+	    return register_fstring(str);
+	}
+	else {
+	    return (VALUE)fstr;
+	}
+    }
+    return Qnil;
+}
+
+static VALUE
+rb_tainted_fstring_existing(VALUE str)
+{
+    VALUE ret;
+    st_data_t fstr;
+    st_table *tfstrings = rb_vm_tfstring_table();
+
+    if (st_lookup(tfstrings, str, &fstr)) {
+	ret = (VALUE)fstr;
+	if (!rb_objspace_garbage_object_p(ret)) {
+	    return ret;
+	}
+    }
+    ret = rb_fstring_existing0(str);
+    if (NIL_P(ret)) {
+	return Qnil;
+    }
+    if (!RB_FL_TEST_RAW(ret, RSTRING_FSTR)) {
+	return Qnil;
+    }
+    do {
+	fstr = (st_data_t)ret;
+	st_update(tfstrings, fstr, tainted_fstr_update, (st_data_t)&fstr);
+    } while ((VALUE)fstr == Qundef);
+
+    ret = (VALUE)fstr;
+    assert(OBJ_FROZEN_RAW(ret));
+    assert(!FL_TEST_RAW(ret, STR_FAKESTR));
+    assert(!FL_TEST_RAW(ret, FL_EXIVAR));
+    assert(FL_TEST_RAW(ret, RSTRING_FSTR));
+    assert(FL_TEST_RAW(ret, FL_TAINT));
+    assert(RBASIC_CLASS(ret) == rb_cString);
+
+    return ret;
+}
+
+VALUE
+rb_fstring_existing(VALUE str)
+{
+    if (FL_TEST_RAW(str, RSTRING_FSTR))
+	return str;
+
+    if (!RB_OBJ_TAINTED_RAW(str))
+	return rb_fstring_existing0(str);
+
+    return rb_tainted_fstring_existing(str);
+}
+
 static VALUE
 setup_fake_str(struct RString *fake_str, const char *name, long len, int encidx)
 {
@@ -1311,6 +1404,7 @@ rb_str_free(VALUE str) https://github.com/ruby/ruby/blob/trunk/string.c#L1404
     if (FL_TEST(str, RSTRING_FSTR)) {
 	st_data_t fstr = (st_data_t)str;
 	st_delete(rb_vm_fstring_table(), &fstr, NULL);
+	st_delete(rb_vm_tfstring_table(), &fstr, NULL);
 	RB_DEBUG_COUNTER_INC(obj_str_fstr);
     }
 

--
ML: ruby-changes@q...
Info: http://www.atdot.net/~ko1/quickml/

[前][次][番号順一覧][スレッド一覧]