ruby-changes:52468
From: shyouhei <ko1@a...>
Date: Tue, 11 Sep 2018 18:49:02 +0900 (JST)
Subject: [ruby-changes:52468] shyouhei:r64677 (trunk): add new instruction attribute called leaf
shyouhei 2018-09-11 18:48:58 +0900 (Tue, 11 Sep 2018) New Revision: 64677 https://svn.ruby-lang.org/cgi-bin/viewvc.cgi?view=revision&revision=64677 Log: add new instruction attribute called leaf An instruction is leaf if it has no rb_funcall inside. In order to check this property, we introduce stack canary which is a random number collected at runtime. Stack top is always filled with this number and checked for stack smashing operations, when VM_CHECK_MODE. [GH-1947] Added files: trunk/tool/ruby_vm/views/_leaf_helpers.erb Modified files: trunk/inits.c trunk/insns.def trunk/internal.h trunk/random.c trunk/tool/ruby_vm/models/bare_instructions.rb trunk/tool/ruby_vm/views/_insn_entry.erb trunk/tool/ruby_vm/views/insns_info.inc.erb trunk/variable.c trunk/vm_insnhelper.c Index: random.c =================================================================== --- random.c (revision 64676) +++ random.c (revision 64677) @@ -573,7 +573,7 @@ fill_random_bytes_syscall(void *seed, si https://github.com/ruby/ruby/blob/trunk/random.c#L573 # define fill_random_bytes_syscall(seed, size, need_secure) -1 #endif -static int +int fill_random_bytes(void *seed, size_t size, int need_secure) { int ret = fill_random_bytes_syscall(seed, size, need_secure); Index: variable.c =================================================================== --- variable.c (revision 64676) +++ variable.c (revision 64677) @@ -867,6 +867,24 @@ rb_gvar_defined(struct rb_global_entry * https://github.com/ruby/ruby/blob/trunk/variable.c#L867 return Qtrue; } +rb_gvar_getter_t * +rb_gvar_getter_function_of(const struct rb_global_entry *entry) +{ + return entry->var->getter; +} + +rb_gvar_setter_t * +rb_gvar_setter_function_of(const struct rb_global_entry *entry) +{ + return entry->var->setter; +} + +bool +rb_gvar_is_traced(const struct rb_global_entry *entry) +{ + return !!entry->var->trace; +} + static enum rb_id_table_iterator_result gvar_i(ID key, VALUE val, void *a) { Index: inits.c =================================================================== --- inits.c (revision 64676) +++ inits.c (revision 64677) @@ -61,6 +61,7 @@ rb_call_inits(void) https://github.com/ruby/ruby/blob/trunk/inits.c#L61 CALL(Complex); CALL(version); CALL(vm_trace); + CALL(vm_stack_canary); CALL(ast); } #undef CALL Index: tool/ruby_vm/models/bare_instructions.rb =================================================================== --- tool/ruby_vm/models/bare_instructions.rb (revision 64676) +++ tool/ruby_vm/models/bare_instructions.rb (revision 64677) @@ -105,6 +105,10 @@ class RubyVM::BareInstructions https://github.com/ruby/ruby/blob/trunk/tool/ruby_vm/models/bare_instructions.rb#L105 /\b(false|0)\b/ !~ @attrs['handles_sp'].expr.expr end + def complicated_return_values? + @sig[:ret].any? {|i| i == '...' } + end + def inspect sprintf "#<%s %s@%s:%d>", self.class.name, @name, @loc[0], @loc[1] end @@ -130,6 +134,10 @@ class RubyVM::BareInstructions https://github.com/ruby/ruby/blob/trunk/tool/ruby_vm/models/bare_instructions.rb#L134 generate_attribute 'rb_num_t', 'width', width generate_attribute 'rb_snum_t', 'sp_inc', rets.size - pops.size generate_attribute 'bool', 'handles_sp', false + generate_attribute 'bool', 'leaf', opes.all? {|o| + # Insn with ISEQ should yield it; can never be a leaf. + o[:type] != 'ISEQ' + } end def typesplit a Index: tool/ruby_vm/views/_insn_entry.erb =================================================================== --- tool/ruby_vm/views/_insn_entry.erb (revision 64676) +++ tool/ruby_vm/views/_insn_entry.erb (revision 64677) @@ -13,6 +13,12 @@ INSN_ENTRY(<%= insn.name %>) https://github.com/ruby/ruby/blob/trunk/tool/ruby_vm/views/_insn_entry.erb#L13 %# NAME_OF_CURRENT_INSN is used in vm_exec.h # define NAME_OF_CURRENT_INSN <%= insn.name %> # define INSN_ATTR(x) <%= insn.call_attribute(' ## x ## ') %> +% unless insn.complicated_return_values? +# if VM_CHECK_MODE > 0 + bool leaf; + VALUE *canary; +# endif +% end % unless insn.declarations.empty? <%= insn.declarations.join(";\n ") %>; @@ -33,12 +39,27 @@ INSN_ENTRY(<%= insn.name %>) https://github.com/ruby/ruby/blob/trunk/tool/ruby_vm/views/_insn_entry.erb#L39 % if insn.handles_sp? POPN(INSN_ATTR(popn)); % end +% unless insn.complicated_return_values? +#if VM_CHECK_MODE > 0 + if ((leaf = INSN_ATTR(leaf))) { + canary = STACK_ADDR_FROM_TOP(-1); + *canary = vm_stack_canary; + } +#endif +% end COLLECT_USAGE_INSN(INSN_ATTR(bin)); % insn.opes.each_with_index do |ope, i| COLLECT_USAGE_OPERAND(INSN_ATTR(bin), <%= i %>, <%= ope[:name] %>); % end <%= render_c_expr insn.expr -%> CHECK_VM_STACK_OVERFLOW_FOR_INSN(VM_REG_CFP, INSN_ATTR(retn)); +% unless insn.complicated_return_values? +#if VM_CHECK_MODE > 0 + if (leaf && (*canary != vm_stack_canary)) { + vm_canary_is_found_dead(INSN_ATTR(bin), *canary); + } +#endif +% end % if insn.handles_sp? % insn.rets.reverse_each do |ret| PUSH(<%= insn.cast_to_VALUE ret %>); Index: tool/ruby_vm/views/_leaf_helpers.erb =================================================================== --- tool/ruby_vm/views/_leaf_helpers.erb (nonexistent) +++ tool/ruby_vm/views/_leaf_helpers.erb (revision 64677) @@ -0,0 +1,105 @@ https://github.com/ruby/ruby/blob/trunk/tool/ruby_vm/views/_leaf_helpers.erb#L1 +%# -*- mode:c; style:ruby; coding: utf-8; indent-tabs-mode: nil -*- +%# Copyright (c) 2018 Urabe, Shyouhei. All rights reserved. +%# +%# This file is a part of the programming language Ruby. Permission is hereby +%# granted, to either redistribute and/or modify this file, provided that the +%# conditions mentioned in the file COPYING are met. Consult the file for +%# details. +%; + +static bool +leafness_of_getglobal(VALUE gentry) +{ + const struct rb_global_entry *e = (void *)gentry; + + if (UNLIKELY(rb_gvar_is_traced(e))) { + return false; + } + else { + /* We cannot write this function using a switch() because a + * case label cannot be a function pointer. */ + static rb_gvar_getter_t *const whitelist[] = { + rb_gvar_val_getter, + rb_gvar_var_getter, + rb_gvar_undef_getter, + }; + rb_gvar_getter_t *f = rb_gvar_getter_function_of(e); + int i; + + for (i = 0; i < numberof(whitelist); i++) { + if (f == whitelist[i]) { + return true; + } + } + return false; + } +} + +static bool +leafness_of_setglobal(VALUE gentry) +{ + const struct rb_global_entry *e = (void *)gentry; + + if (UNLIKELY(rb_gvar_is_traced(e))) { + return false; + } + else { + /* We cannot write this function using a switch() because a + * case label cannot be a function pointer. */ + static rb_gvar_setter_t *const whitelist[] = { + rb_gvar_val_setter, + rb_gvar_readonly_setter, + rb_gvar_var_setter, + rb_gvar_undef_setter, + }; + rb_gvar_setter_t *f = rb_gvar_setter_function_of(e); + int i; + + for (i = 0; i < numberof(whitelist); i++) { + if (f == whitelist[i]) { + return true; + } + } + return false; + } +} + +#include "iseq.h" + +static bool +leafness_of_defined(rb_num_t op_type) +{ + /* see also: vm_insnhelper.c:vm_defined() */ + switch (op_type) { + case DEFINED_IVAR: + case DEFINED_IVAR2: + case DEFINED_GVAR: + case DEFINED_CVAR: + case DEFINED_YIELD: + case DEFINED_REF: + case DEFINED_ZSUPER: + return false; + case DEFINED_CONST: + /* has rb_autoload_load(); */ + return false; + case DEFINED_FUNC: + case DEFINED_METHOD: + /* calls #respond_to_missing? */ + return false; + default: + rb_bug("unknown operand %ld: blame @shyouhei.", op_type); + } +} + +static bool +leafness_of_checkmatch(rb_num_t flag) +{ + /* see also: vm_insnhelper.c:check_match() */ + if (flag == VM_CHECKMATCH_TYPE_WHEN) { + return true; + } + else { + /* has rb_funcallv() */ + return false; + } +} Property changes on: tool/ruby_vm/views/_leaf_helpers.erb ___________________________________________________________________ Added: svn:eol-style ## -0,0 +1 ## +LF \ No newline at end of property Index: tool/ruby_vm/views/insns_info.inc.erb =================================================================== --- tool/ruby_vm/views/insns_info.inc.erb (revision 64676) +++ tool/ruby_vm/views/insns_info.inc.erb (revision 64677) @@ -15,5 +15,6 @@ https://github.com/ruby/ruby/blob/trunk/tool/ruby_vm/views/insns_info.inc.erb#L15 <%= render 'insn_name_info' %> <%= render 'insn_len_info' %> <%= render 'insn_operand_info' %> +<%= render 'leaf_helpers' %> <%= render 'attributes' %> <%= render 'insn_stack_increase' %> Index: vm_insnhelper.c =================================================================== --- vm_insnhelper.c (revision 64676) +++ vm_insnhelper.c (revision 64677) @@ -3900,3 +3900,38 @@ vm_trace(rb_execution_context_t *ec, rb_ https://github.com/ruby/ruby/blob/trunk/vm_insnhelper.c#L3900 } } } + +#if VM_CHECK_MODE > 0 +static NORETURN( NOINLINE( +#if GCC_VERSION_SINCE(4, 3, 0) +__attribute__((__cold__)) +#endif +void vm_canary_is_found_dead(enum ruby_vminsn_type i, VALUE c))); +static VALUE vm_stack_canary; + +void +Init_vm_stack_canary(void) +{ + /* This has to be called _after_ our PRNG is properly set up. */ + int n = fill_random_bytes(&vm_stack_canary, sizeof vm_stack_canary, false); + + VM_ASSERT(n == 0); +} + +static void +vm_canary_is_found_dead(enum ruby_vminsn_type i, VALUE c) +{ + /* Because a method has already been called, why not call + * another one. */ + const char *insn = rb_insns_name(i); + VALUE inspection = rb_inspect(c); + const char *str = StringValueCStr(inspection); + VALUE message = rb_sprintf("dead canary found at %s: %s", insn, str); + const char *msg = StringValueCStr(message); + + rb_bug(msg); +} + +#elif !defined(MJIT_HEADER) +void Init_vm_stack_canary(void) { /* nothing to do */ } +#endif Index: internal.h =================================================================== --- internal.h (revision 64676) +++ internal.h (revision 64677) @@ -1901,6 +1901,9 @@ VALUE rb_ivar_lookup(VALUE obj, ID id, V https://github.com/ruby/ruby/blob/trunk/internal.h#L1901 void rb_autoload_str(VALUE mod, ID id, VALUE file); void rb_deprecate_constant(VALUE mod, const char *name); NORETURN(VALUE rb_mod_const_missing(VALUE,VALUE)); +rb_gvar_getter_t *rb_gvar_getter_function_of(const struct rb_global_entry *); +rb_gvar_setter_t *rb_gvar_setter_function_of(const struct rb_global_entry *); +bool rb_gvar_is_traced(const struct rb_global_entry *); /* vm_insnhelper.h */ rb_serial_t rb_next_class_serial(void); @@ -1946,6 +1949,7 @@ VALUE rb_lambda_call(VALUE obj, ID mid, https://github.com/ruby/ruby/blob/trunk/internal.h#L1949 /* vm_insnhelper.c */ VALUE rb_equal_opt(VALUE obj1, VALUE obj2); VALUE rb_eql_opt(VALUE obj1, VALUE obj2); +void Init_vm_stack_canary(void); /* vm_method.c */ void Init_eval_method(void); @@ -2099,6 +2103,9 @@ VALUE rb_imemo_new_debug(enum imemo_type https://github.com/ruby/ruby/blob/trunk/internal.h#L2103 VALUE rb_imemo_new(enum imemo_type type, VALUE v1, VALUE v2, VALUE v3, VALUE v0); #endif +/* random.c */ +int fill_random_bytes(void *, size_t, int); + RUBY_SYMBOL_EXPORT_END #define RUBY_DTRACE_CREATE_HOOK(name, arg) \ Index: insns.def =================================================================== --- insns.def (revision 64676) +++ insns.def (revision 64677) @@ -45,6 +45,9 @@ https://github.com/ruby/ruby/blob/trunk/insns.def#L45 * handles_sp: If it is true, VM deals with sp in the insn. + * leaf: indicates that the instruction is "leaf" i.e. it does + not introduce new stack frame on top of it. Default true. + - Attributes can access operands, but not stack (push/pop) variables. - An instruction's body is a pure C block, copied verbatimly into @@ -203,6 +206,8 @@ getinstancevariable https://github.com/ruby/ruby/blob/trunk/insns.def#L206 (ID id, IC ic) () (VALUE val) +/* "instance variable not initialized" warning can be hooked. */ +// attr bool leaf = false; /* has rb_warning() */ { val = vm_getinstancevariable(GET_SELF(), id, ic); } @@ -223,6 +228,8 @@ getclassvariable https://github.com/ruby/ruby/blob/trunk/insns.def#L228 (ID id) () (VALUE val) +/* "class variable access from toplevel" warning can be hooked. */ +// attr bool leaf = false; /* has rb_warning() */ { val = rb_cvar_get(vm_get_cvar_base(rb_vm_get_cref(GET_EP()), GET_CFP()), id); } @@ -233,6 +240,8 @@ setclassvariable https://github.com/ruby/ruby/blob/trunk/insns.def#L240 (ID id) (VALUE val) () +/* "class variable access from toplevel" warning can be hooked. */ +// attr bool leaf = false; /* has rb_warning() */ { vm_ensure_not_refinement_module(GET_SELF()); rb_cvar_set(vm_get_cvar_base(rb_vm_get_cref(GET_EP()), GET_CFP()), id, val); @@ -247,6 +256,8 @@ getconstant https://github.com/ruby/ruby/blob/trunk/insns.def#L256 (ID id) (VALUE klass) (VALUE val) +/* getconstant can kick autoload */ +// attr bool leaf = false; /* has rb_autoload_load() */ { val = vm_get_ev_const(ec, klass, id, 0); } @@ -258,6 +269,11 @@ setconstant https://github.com/ruby/ruby/blob/trunk/insns.def#L269 (ID id) (VALUE val, VALUE cbase) () +/* Assigning an object to a constant is basically a leaf operation. + * The problem is, assigning a Module instance to a constant _names_ + * that module. Naming involves string manipulations, which are + * method calls. */ +// attr bool leaf = false; /* has StringValue() */ { vm_check_if_namespace(cbase); vm_ensure_not_refinement_module(GET_SELF()); @@ -270,6 +286,7 @@ getglobal https://github.com/ruby/ruby/blob/trunk/insns.def#L286 (GENTRY entry) () (VALUE val) +// attr bool leaf = leafness_of_getglobal(entry); { val = GET_GLOBAL((VALUE)entry); } @@ -280,6 +297,7 @@ setglobal https://github.com/ruby/ruby/blob/trunk/insns.def#L297 (GENTRY entry) (VALUE val) () +// attr bool leaf = leafness_of_setglobal(entry); { SET_GLOBAL((VALUE)entry, val); } @@ -339,6 +357,7 @@ putiseq https://github.com/ruby/ruby/blob/trunk/insns.def#L357 (ISEQ iseq) () (VALUE ret) +// attr bool leaf = true; /* yes it is */ { ret = (VALUE)iseq; } @@ -392,6 +411,9 @@ toregexp https://github.com/ruby/ruby/blob/trunk/insns.def#L411 (rb_num_t opt, rb_num_t cnt) (...) (VALUE val) +/* This instruction has StringValue(), which is a method call. But it + * seems that path is never covered. */ +// attr bool leaf = true; /* yes it is */ // attr rb_snum_t sp_inc = 1 - cnt; { const VALUE ary = rb_ary_tmp_new_from_values(0, cnt, STACK_ADDR_FROM_TOP(cnt)); @@ -444,6 +466,7 @@ expandarray https://github.com/ruby/ruby/blob/trunk/insns.def#L466 (rb_num_t num, rb_num_t flag) (..., VALUE ary) (...) +// attr bool leaf = false; /* has rb_check_array_type() */ // attr rb_snum_t sp_inc = num - 1 + (flag & 1 ? 1 : 0); { vm_expandarray(GET_SP(), ary, num, (int)flag); @@ -455,6 +478,7 @@ concatarray https://github.com/ruby/ruby/blob/trunk/insns.def#L478 () (VALUE ary1, VALUE ary2) (VALUE ary) +// attr bool leaf = false; /* has rb_check_array_type() */ { ary = vm_concat_array(ary1, ary2); } @@ -465,6 +489,7 @@ splatarray https://github.com/ruby/ruby/blob/trunk/insns.def#L489 (VALUE flag) (VALUE ary) (VALUE obj) +// attr bool leaf = false; /* has rb_check_array_type() */ { obj = vm_splat_array(flag, ary); } @@ -475,6 +500,7 @@ newhash https://github.com/ruby/ruby/blob/trunk/insns.def#L500 (rb_num_t num) (...) (VALUE val) +// attr bool leaf = false; /* has rb_hash_key_str() */ // attr rb_snum_t sp_inc = 1 - num; { RUBY_DTRACE_CREATE_HOOK(HASH, num); @@ -492,6 +518,8 @@ newrange https://github.com/ruby/ruby/blob/trunk/insns.def#L518 (rb_num_t flag) (VALUE low, VALUE high) (VALUE val) +/* rb_range_new() exercises "bad value for range" check. */ +// attr bool leaf = false; /* see also: range.c:range_init() */ { val = rb_range_new(low, high, (int)flag); } @@ -618,6 +646,7 @@ defined https://github.com/ruby/ruby/blob/trunk/insns.def#L646 (rb_num_t op_type, VALUE obj, VALUE needstr) (VALUE v) (VALUE val) +// attr bool leaf = leafness_of_defined(op_type); { val = vm_defined(ec, GET_CFP(), op_type, obj, needstr, v); } @@ -634,6 +663,7 @@ checkmatch https://github.com/ruby/ruby/blob/trunk/insns.def#L663 (rb_num_t flag) (VALUE target, VALUE pattern) (VALUE result) +// attr bool leaf = leafness_of_checkmatch(flag); { result = vm_check_match(ec, target, pattern, flag); } @@ -726,6 +756,7 @@ opt_str_freeze https://github.com/ruby/ruby/blob/trunk/insns.def#L756 (VALUE str) () (VALUE val) +// attr bool leaf = BASIC_OP_UNREDEFINED_P(BOP_FREEZE, STRING_REDEFINED_OP_FLAG); { val = vm_opt_str_freeze(str, BOP_FREEZE, idFreeze); } @@ -735,6 +766,7 @@ opt_str_uminus https://github.com/ruby/ruby/blob/trunk/insns.def#L766 (VALUE str) () (VALUE val) +// attr bool leaf = BASIC_OP_UNREDEFINED_P(BOP_UMINUS, STRING_REDEFINED_OP_FLAG); { val = vm_opt_str_freeze(str, BOP_UMINUS, idUMinus); } @@ -744,6 +776,11 @@ opt_newarray_max https://github.com/ruby/ruby/blob/trunk/insns.def#L776 (rb_num_t num) (...) (VALUE val) +/* This instruction typically has no funcalls. But it compares array + * contents each other by nature. That part could call methods when + * necessary. No way to detect such method calls beforehand. We + * cannot but mark it being not leaf. */ +// attr bool leaf = false; /* has rb_funcall() */ // attr rb_snum_t sp_inc = 1 - num; { val = vm_opt_newarray_max(num, STACK_ADDR_FROM_TOP(num)); @@ -754,6 +791,8 @@ opt_newarray_min https://github.com/ruby/ruby/blob/trunk/insns.def#L791 (rb_num_t num) (...) (VALUE val) +/* Same discussion as opt_newarray_max. */ +// attr bool leaf = false; /* has rb_funcall() */ // attr rb_snum_t sp_inc = 1 - num; { val = vm_opt_newarray_min(num, STACK_ADDR_FROM_TOP(num)); @@ -765,6 +804,7 @@ opt_send_without_block https://github.com/ruby/ruby/blob/trunk/insns.def#L804 (CALL_INFO ci, CALL_CACHE cc) (...) (VALUE val) +// attr bool leaf = false; /* Of course it isn't. */ // attr bool handles_sp = true; // attr rb_snum_t sp_inc = -ci->orig_argc; { @@ -797,6 +837,7 @@ invokeblock https://github.com/ruby/ruby/blob/trunk/insns.def#L837 (CALL_INFO ci) (...) (VALUE val) +// attr bool leaf = false; /* Of course it isn't. */ // attr bool handles_sp = true; // attr rb_snum_t sp_inc = 1 - ci->orig_argc; { @@ -824,6 +865,10 @@ leave https://github.com/ruby/ruby/blob/trunk/insns.def#L865 () (VALUE val) (VALUE val) +/* This is super surprising but when leaving from a frame, we check + * for interrupts. If any, that should be executed on top of the + * current execution context. This is a method call. */ +// attr bool leaf = false; /* has rb_threadptr_execute_interrupts() */ // attr bool handles_sp = true; { if (OPT_CHECKED_RUN) { @@ -858,6 +903,8 @@ throw https://github.com/ruby/ruby/blob/trunk/insns.def#L903 (rb_num_t throw_state) (VALUE throwobj) (VALUE val) +/* Same discussion as leave. */ +// attr bool leaf = false; /* has rb_threadptr_execute_interrupts() */ { RUBY_VM_CHECK_INTS(ec); val = vm_throw(ec, GET_CFP(), throw_state, throwobj); @@ -875,6 +922,8 @@ jump https://github.com/ruby/ruby/blob/trunk/insns.def#L922 (OFFSET dst) () () +/* Same discussion as leave. */ +// attr bool leaf = false; /* has rb_threadptr_execute_interrupts() */ { RUBY_VM_CHECK_INTS(ec); JUMP(dst); @@ -886,6 +935,8 @@ branchif https://github.com/ruby/ruby/blob/trunk/insns.def#L935 (OFFSET dst) (VALUE val) () +/* Same discussion as jump. */ +// attr bool leaf = false; /* has rb_threadptr_execute_interrupts() */ { if (RTEST(val)) { RUBY_VM_CHECK_INTS(ec); @@ -899,6 +950,8 @@ branchunless https://github.com/ruby/ruby/blob/trunk/insns.def#L950 (OFFSET dst) (VALUE val) () +/* Same discussion as jump. */ +// attr bool leaf = false; /* has rb_threadptr_execute_interrupts() */ { if (!RTEST(val)) { RUBY_VM_CHECK_INTS(ec); @@ -912,6 +965,8 @@ branchnil https://github.com/ruby/ruby/blob/trunk/insns.def#L965 (OFFSET dst) (VALUE val) () +/* Same discussion as jump. */ +// attr bool leaf = false; /* has rb_threadptr_execute_interrupts() */ { if (NIL_P(val)) { RUBY_VM_CHECK_INTS(ec); @@ -965,6 +1020,10 @@ opt_case_dispatch https://github.com/rub (... truncated) -- ML: ruby-changes@q... Info: http://www.atdot.net/~ko1/quickml/