ruby-changes:47239
From: normal <ko1@a...>
Date: Tue, 18 Jul 2017 11:30:04 +0900 (JST)
Subject: [ruby-changes:47239] normal:r59354 (trunk): hash: keep fstrings of tainted strings for string keys
normal 2017-07-18 11:29:59 +0900 (Tue, 18 Jul 2017) New Revision: 59354 https://svn.ruby-lang.org/cgi-bin/viewvc.cgi?view=revision&revision=59354 Log: hash: keep fstrings of tainted strings for string keys The same hash keys may be loaded from tainted data sources frequently (e.g. parsing headers from socket or loading YAML data from a file). If a non-tainted fstring already exists (because the application expects the hash key), cache and deduplicate the tainted version in the new tainted_frozen_strings table. For non-embedded strings, this also allows sharing with the underlying malloc-ed data. * vm_core.h (rb_vm_struct): add tainted_frozen_strings * vm.c (ruby_vm_destruct): free tainted_frozen_strings (Init_vm_objects): initialize tainted_frozen_strings (rb_vm_tfstring_table): accessor for tainted_frozen_strings * internal.h: declare rb_fstring_existing, rb_vm_tfstring_table * hash.c (fstring_existing_str): remove (moved to string.c) (hash_aset_str): use rb_fstring_existing * string.c (rb_fstring_existing): new, based on fstring_existing_str (tainted_fstr_update): new (rb_fstring_existing0): new, based on fstring_existing_str (rb_tainted_fstring_existing): new, special case for tainted strings (rb_str_free): delete from tainted_frozen_strings table * test/ruby/test_optimization.rb (test_hash_reuse_fstring): new test [ruby-core:82012] [Bug #13737] Modified files: trunk/hash.c trunk/internal.h trunk/string.c trunk/test/ruby/test_optimization.rb trunk/vm.c trunk/vm_core.h Index: hash.c =================================================================== --- hash.c (revision 59353) +++ hash.c (revision 59354) @@ -18,7 +18,6 @@ https://github.com/ruby/ruby/blob/trunk/hash.c#L18 #include "probes.h" #include "id.h" #include "symbol.h" -#include "gc.h" #ifdef __APPLE__ # ifdef HAVE_CRT_EXTERNS_H @@ -1516,33 +1515,13 @@ hash_aset(st_data_t *key, st_data_t *val https://github.com/ruby/ruby/blob/trunk/hash.c#L1515 return ST_CONTINUE; } -static VALUE -fstring_existing_str(VALUE str) -{ - st_data_t fstr; - st_table *tbl = rb_vm_fstring_table(); - - if (st_lookup(tbl, str, &fstr)) { - if (rb_objspace_garbage_object_p(fstr)) { - return rb_fstring(str); - } - else { - return (VALUE)fstr; - } - } - else { - return Qnil; - } -} - static int hash_aset_str(st_data_t *key, st_data_t *val, struct update_arg *arg, int existing) { if (!existing && !RB_OBJ_FROZEN(*key)) { VALUE k; - if (!RB_OBJ_TAINTED(*key) && - (k = fstring_existing_str(*key)) != Qnil) { + if ((k = rb_fstring_existing(*key)) != Qnil) { *key = k; } else { Index: internal.h =================================================================== --- internal.h (revision 59353) +++ internal.h (revision 59354) @@ -1574,6 +1574,7 @@ VALUE rb_strftime(const char *format, si https://github.com/ruby/ruby/blob/trunk/internal.h#L1574 /* string.c */ VALUE rb_fstring(VALUE); VALUE rb_fstring_new(const char *ptr, long len); +VALUE rb_fstring_existing(VALUE); #define rb_fstring_lit(str) rb_fstring_new((str), rb_strlen_lit(str)) #define rb_fstring_literal(str) rb_fstring_lit(str) VALUE rb_fstring_cstr(const char *str); @@ -1726,7 +1727,7 @@ void rb_vm_check_redefinition_by_prepend https://github.com/ruby/ruby/blob/trunk/internal.h#L1727 VALUE rb_yield_refine_block(VALUE refinement, VALUE refinements); VALUE ruby_vm_special_exception_copy(VALUE); PUREFUNC(st_table *rb_vm_fstring_table(void)); - +PUREFUNC(st_table *rb_vm_tfstring_table(void)); /* vm_dump.c */ void rb_print_backtrace(void); Index: vm.c =================================================================== --- vm.c (revision 59353) +++ vm.c (revision 59354) @@ -2206,6 +2206,10 @@ ruby_vm_destruct(rb_vm_t *vm) https://github.com/ruby/ruby/blob/trunk/vm.c#L2206 st_free_table(vm->frozen_strings); vm->frozen_strings = 0; } + if (vm->tainted_frozen_strings) { + st_free_table(vm->tainted_frozen_strings); + vm->tainted_frozen_strings = 0; + } rb_vm_gvl_destroy(vm); if (objspace) { rb_objspace_free(objspace); @@ -3142,6 +3146,8 @@ Init_vm_objects(void) https://github.com/ruby/ruby/blob/trunk/vm.c#L3146 vm->mark_object_ary = rb_ary_tmp_new(128); vm->loading_table = st_init_strtable(); vm->frozen_strings = st_init_table_with_size(&rb_fstring_hash_type, 1000); + vm->tainted_frozen_strings = + st_init_table_with_size(&rb_fstring_hash_type, 1000); } /* top self */ @@ -3203,6 +3209,12 @@ rb_vm_fstring_table(void) https://github.com/ruby/ruby/blob/trunk/vm.c#L3209 return GET_VM()->frozen_strings; } +st_table * +rb_vm_tfstring_table(void) +{ + return GET_VM()->tainted_frozen_strings; +} + #if VM_COLLECT_USAGE_DETAILS #define HASH_ASET(h, k, v) rb_hash_aset((h), (st_data_t)(k), (st_data_t)(v)) Index: vm_core.h =================================================================== --- vm_core.h (revision 59353) +++ vm_core.h (revision 59354) @@ -574,6 +574,7 @@ typedef struct rb_vm_struct { https://github.com/ruby/ruby/blob/trunk/vm_core.h#L574 VALUE *defined_strings; st_table *frozen_strings; + st_table *tainted_frozen_strings; /* params */ struct { /* size in byte */ Index: test/ruby/test_optimization.rb =================================================================== --- test/ruby/test_optimization.rb (revision 59353) +++ test/ruby/test_optimization.rb (revision 59354) @@ -181,6 +181,37 @@ class TestRubyOptimization < Test::Unit: https://github.com/ruby/ruby/blob/trunk/test/ruby/test_optimization.rb#L181 assert_redefine_method('Hash', 'empty?', 'assert_nil({}.empty?); assert_nil({1=>1}.empty?)') end + def test_hash_reuse_fstring + embed = -'h e l l o' + shared = -'this string is too long to be embedded and should be shared' + assert_not_equal GC::INTERNAL_CONSTANTS[:RVALUE_SIZE], + ObjectSpace.memsize_of(shared), + 'update this test if string embedding changed' + + [ embed, shared ].each do |exp| + key = exp.split(' ').join(' ') + h = {} + h[key] = true + assert_not_same key, h.keys[0] + assert_predicate h.keys[0], :frozen? + assert_same exp, h.keys[0] + + h1 = {} + h2 = {} + key.taint + h1[key] = true + h2[key] = true + k1 = h1.keys[0] + k2 = h2.keys[0] + assert_same k1, k2 + assert_predicate k1, :tainted? + + assert_equal GC::INTERNAL_CONSTANTS[:RVALUE_SIZE], + ObjectSpace.memsize_of(k1), + 'tainted string should share with untainted fstring' + end + end + def test_hash_aref_with h = { "foo" => 1 } assert_equal 1, h["foo"] Index: string.c =================================================================== --- string.c (revision 59353) +++ string.c (revision 59354) @@ -349,6 +349,99 @@ register_fstring(VALUE str) https://github.com/ruby/ruby/blob/trunk/string.c#L349 return ret; } +static int +tainted_fstr_update(st_data_t *key, st_data_t *val, st_data_t arg, int existing) +{ + VALUE *fstr = (VALUE *)arg; + VALUE str = (VALUE)*key; + + if (existing) { + /* because of lazy sweep, str may be unmarked already and swept + * at next time */ + if (rb_objspace_garbage_object_p(str)) { + *fstr = Qundef; + return ST_DELETE; + } + + *fstr = str; + return ST_STOP; + } + else { + str = rb_str_resurrect(str); + RB_OBJ_TAINT_RAW(str); + RB_FL_SET_RAW(str, RSTRING_FSTR); + RB_OBJ_FREEZE_RAW(str); + + *key = *val = *fstr = str; + return ST_CONTINUE; + } +} + +static VALUE +rb_fstring_existing0(VALUE str) +{ + st_table *frozen_strings = rb_vm_fstring_table(); + st_data_t fstr; + + if (st_lookup(frozen_strings, str, &fstr)) { + if (rb_objspace_garbage_object_p(fstr)) { + return register_fstring(str); + } + else { + return (VALUE)fstr; + } + } + return Qnil; +} + +static VALUE +rb_tainted_fstring_existing(VALUE str) +{ + VALUE ret; + st_data_t fstr; + st_table *tfstrings = rb_vm_tfstring_table(); + + if (st_lookup(tfstrings, str, &fstr)) { + ret = (VALUE)fstr; + if (!rb_objspace_garbage_object_p(ret)) { + return ret; + } + } + ret = rb_fstring_existing0(str); + if (NIL_P(ret)) { + return Qnil; + } + if (!RB_FL_TEST_RAW(ret, RSTRING_FSTR)) { + return Qnil; + } + do { + fstr = (st_data_t)ret; + st_update(tfstrings, fstr, tainted_fstr_update, (st_data_t)&fstr); + } while ((VALUE)fstr == Qundef); + + ret = (VALUE)fstr; + assert(OBJ_FROZEN_RAW(ret)); + assert(!FL_TEST_RAW(ret, STR_FAKESTR)); + assert(!FL_TEST_RAW(ret, FL_EXIVAR)); + assert(FL_TEST_RAW(ret, RSTRING_FSTR)); + assert(FL_TEST_RAW(ret, FL_TAINT)); + assert(RBASIC_CLASS(ret) == rb_cString); + + return ret; +} + +VALUE +rb_fstring_existing(VALUE str) +{ + if (FL_TEST_RAW(str, RSTRING_FSTR)) + return str; + + if (!RB_OBJ_TAINTED_RAW(str)) + return rb_fstring_existing0(str); + + return rb_tainted_fstring_existing(str); +} + static VALUE setup_fake_str(struct RString *fake_str, const char *name, long len, int encidx) { @@ -1311,6 +1404,7 @@ rb_str_free(VALUE str) https://github.com/ruby/ruby/blob/trunk/string.c#L1404 if (FL_TEST(str, RSTRING_FSTR)) { st_data_t fstr = (st_data_t)str; st_delete(rb_vm_fstring_table(), &fstr, NULL); + st_delete(rb_vm_tfstring_table(), &fstr, NULL); RB_DEBUG_COUNTER_INC(obj_str_fstr); } -- ML: ruby-changes@q... Info: http://www.atdot.net/~ko1/quickml/