ruby-changes:23255

ko1	2012-04-12 10:33:34 +0900 (Thu, 12 Apr 2012)

  New Revision: 35306

  http://svn.ruby-lang.org/cgi-bin/viewvc.cgi?view=rev&revision=35306

  Log:
    * compile.c (compile_array, compile_array_):
      Divide big array (or hash) literals into several blocks and
      concatetene them.  There was a problem that a big array (hash)
      literal causes SystemStackError exception (stack overflow)
      because VM push all contents of the literal onto VM stack to
      make an array (or hash).  To solve this issue, we make several
      arrays (hashes) and concatenate them to make a big array (hash)
      object.
      ??
    * compile.c (iseq_compile_each, setup_args): use modified
      compile_array.
    * vm.c (m_core_hash_from_ary, m_core_hash_merge_ary,
      m_core_hash_merge_ptr): added for above change.
    * id.c (Init_id), parse.y: add core method ids.
    * bootstraptest/test_literal.rb: add simple tests.
    * bootstraptest/test_eval.rb: remove rescue clause to catch
      SystemStackError exception.
    * test/ruby/test_literal.rb: add tests to check no stack overflow.

  Modified files:
    trunk/ChangeLog
    trunk/bootstraptest/test_eval.rb
    trunk/bootstraptest/test_literal.rb
    trunk/compile.c
    trunk/id.c
    trunk/parse.y
    trunk/test/ruby/test_literal.rb
    trunk/vm.c

Index: ChangeLog
===================================================================
--- ChangeLog	(revision 35305)
+++ ChangeLog	(revision 35306)
@@ -1,3 +1,30 @@
+Wed Apr 11 17:16:49 2012  Koichi Sasada  <ko1@a...>
+
+	* compile.c (compile_array, compile_array_):
+	  Divide big array (or hash) literals into several blocks and
+	  concatetene them.  There was a problem that a big array (hash)
+          literal causes SystemStackError exception (stack overflow)
+          because VM push all contents of the literal onto VM stack to
+          make an array (or hash).  To solve this issue, we make several
+          arrays (hashes) and concatenate them to make a big array (hash)
+          object.
+	  ??
+
+	* compile.c (iseq_compile_each, setup_args): use modified
+	  compile_array.
+
+	* vm.c (m_core_hash_from_ary, m_core_hash_merge_ary,
+	  m_core_hash_merge_ptr): added for above change.
+
+	* id.c (Init_id), parse.y: add core method ids.
+
+	* bootstraptest/test_literal.rb: add simple tests.
+
+	* bootstraptest/test_eval.rb: remove rescue clause to catch
+	  SystemStackError exception.
+
+	* test/ruby/test_literal.rb: add tests to check no stack overflow.
+
 Thu Apr 12 07:10:37 2012  Eric Hodel  <drbrain@s...>
 
 	* lib/uri/generic.rb (module URI):  URI now downcases the scheme to
Index: bootstraptest/test_eval.rb
===================================================================
--- bootstraptest/test_eval.rb	(revision 35305)
+++ bootstraptest/test_eval.rb	(revision 35306)
@@ -316,6 +316,5 @@
   end
   begin
     eval "class C; @@h = #{hash.inspect}; end"
-  rescue SystemStackError
   end
 }, '[ruby-core:25714]'
Index: bootstraptest/test_literal.rb
===================================================================
--- bootstraptest/test_literal.rb	(revision 35305)
+++ bootstraptest/test_literal.rb	(revision 35306)
@@ -200,3 +200,25 @@
 assert_equal 'ok', %q{
   "#{}o""#{}k""#{}"
 }, '[ruby-core:25284]'
+
+assert_equal 'ok', %q{ #  long array literal
+  x = nil
+  eval "a = [#{(1..10_000).map{'x'}.join(", ")}]"
+  :ok
+}
+
+assert_equal 'ok', %q{ #  long array literal (optimized)
+  eval "a = [#{(1..10_000).to_a.join(", ")}]"
+  :ok
+}
+
+assert_equal 'ok', %q{ #  long hash literal
+  x = nil
+  eval "a = {#{(1..10_000).map{|n| "#{n} => x"}.join(', ')}}"
+  :ok
+}
+
+assert_equal 'ok', %q{ #  long hash literal (optimized)
+  eval "a = {#{(1..10_000).map{|n| "#{n} => #{n}"}.join(', ')}}"
+  :ok
+}
Index: compile.c
===================================================================
--- compile.c	(revision 35305)
+++ compile.c	(revision 35306)
@@ -2274,65 +2274,139 @@
     return COMPILE_OK;
 }
 
+enum compile_array_type_t {
+    COMPILE_ARRAY_TYPE_ARRAY,
+    COMPILE_ARRAY_TYPE_HASH,
+    COMPILE_ARRAY_TYPE_ARGS,
+};
+
 static int
 compile_array_(rb_iseq_t *iseq, LINK_ANCHOR *ret, NODE* node_root,
-	       VALUE opt_p, int poped)
+	       enum compile_array_type_t type, int poped)
 {
     NODE *node = node_root;
-    int len = (int)node->nd_alen, line = (int)nd_line(node), i=0;
-    DECL_ANCHOR(anchor);
+    int line = (int)nd_line(node);
+    int len = 0;
 
-    INIT_ANCHOR(anchor);
-    if (nd_type(node) != NODE_ZARRAY) {
-	while (node) {
-	    if (nd_type(node) != NODE_ARRAY) {
-		rb_bug("compile_array: This node is not NODE_ARRAY, but %s",
-		       ruby_node_name(nd_type(node)));
+    if (nd_type(node) == NODE_ZARRAY) {
+	if (!poped) {
+	    switch (type) {
+	      case COMPILE_ARRAY_TYPE_ARRAY: ADD_INSN1(ret, line, newarray, INT2FIX(0)); break;
+	      case COMPILE_ARRAY_TYPE_HASH: ADD_INSN1(ret, line, newhash, INT2FIX(0)); break;
+	      case COMPILE_ARRAY_TYPE_ARGS: /* do nothing */ break;
 	    }
-
-	    i++;
-	    if (opt_p && nd_type(node->nd_head) != NODE_LIT) {
-		opt_p = Qfalse;
-	    }
-	    COMPILE_(anchor, "array element", node->nd_head, poped);
-	    node = node->nd_next;
 	}
     }
+    else {
+	int opt_p = 1;
+	int first = 1, i;
 
-    if (len != i) {
-	if (0) {
-	    rb_bug("node error: compile_array (%d: %d-%d)",
-		   (int)nd_line(node_root), len, i);
-	}
-	len = i;
-    }
+	while (node) {
+	    NODE *start_node = node, *end_node;
+	    const int max = 0x100;
+	    DECL_ANCHOR(anchor);
+	    INIT_ANCHOR(anchor);
 
-    if (opt_p == Qtrue) {
-	if (!poped) {
-	    VALUE ary = rb_ary_tmp_new(len);
-	    node = node_root;
-	    while (node) {
-		rb_ary_push(ary, node->nd_head->nd_lit);
+	    for (i=0; i<max && node; i++, len++) {
+		if (CPDEBUG > 0 && nd_type(node) != NODE_ARRAY) {
+		    rb_bug("compile_array: This node is not NODE_ARRAY, but %s", ruby_node_name(nd_type(node)));
+		}
+
+		if (opt_p && nd_type(node->nd_head) != NODE_LIT) {
+		    opt_p = 0;
+		}
+
+		COMPILE_(anchor, "array element", node->nd_head, poped);
 		node = node->nd_next;
 	    }
-	    OBJ_FREEZE(ary);
-	    iseq_add_mark_object_compile_time(iseq, ary);
-	    ADD_INSN1(ret, nd_line(node_root), duparray, ary);
+
+	    if (opt_p && type != COMPILE_ARRAY_TYPE_ARGS) {
+		if (!poped) {
+		    VALUE ary = rb_ary_tmp_new(i);
+
+		    end_node = node;
+		    node = start_node;
+
+		    while (node != end_node) {
+			rb_ary_push(ary, node->nd_head->nd_lit);
+			node = node->nd_next;
+		    }
+		    while (node && nd_type(node->nd_head) == NODE_LIT) {
+			rb_ary_push(ary, node->nd_head->nd_lit);
+			node = node->nd_next;
+			len++;
+		    }
+
+		    OBJ_FREEZE(ary);
+
+		    iseq_add_mark_object_compile_time(iseq, ary);
+
+		    if (first) {
+			first = 0;
+			if (type == COMPILE_ARRAY_TYPE_ARRAY) {
+			    ADD_INSN1(ret, line, duparray, ary);
+			}
+			else { /* COMPILE_ARRAY_TYPE_HASH */
+			    ADD_INSN1(ret, line, putspecialobject, INT2FIX(VM_SPECIAL_OBJECT_VMCORE));
+			    ADD_INSN1(ret, line, putobject, ary);
+			    ADD_SEND(ret, line, ID2SYM(id_core_hash_from_ary), INT2FIX(1));
+			}
+		    }
+		    else {
+			if (type == COMPILE_ARRAY_TYPE_ARRAY) {
+			    ADD_INSN1(ret, line, putobject, ary);
+			    ADD_INSN(ret, line, concatarray);
+			}
+			else {
+			    ADD_INSN1(ret, line, putspecialobject, INT2FIX(VM_SPECIAL_OBJECT_VMCORE));
+			    ADD_INSN1(ret, line, putobject, ary);
+			    ADD_SEND(ret, line, ID2SYM(id_core_hash_merge_ary), INT2FIX(1));
+			}
+		    }
+		}
+	    }
+	    else {
+		if (!poped) {
+		    switch (type) {
+		      case COMPILE_ARRAY_TYPE_ARRAY:
+			ADD_INSN1(anchor, line, newarray, INT2FIX(i));
+
+			if (first) {
+			    first = 0;
+			}
+			else {
+			    ADD_INSN(anchor, line, concatarray);
+			}
+			APPEND_LIST(ret, anchor);
+			break;
+		      case COMPILE_ARRAY_TYPE_HASH:
+			if (first) {
+			    first = 0;
+			    ADD_INSN1(anchor, line, newhash, INT2FIX(i));
+			    APPEND_LIST(ret, anchor);
+			}
+			else {
+			    ADD_INSN1(ret, line, putspecialobject, INT2FIX(VM_SPECIAL_OBJECT_VMCORE));
+			    ADD_INSN(ret, line, swap);
+			    APPEND_LIST(ret, anchor);
+			    ADD_SEND(ret, line, ID2SYM(id_core_hash_merge_ptr), INT2FIX(i + 1));
+			}
+			break;
+		      case COMPILE_ARRAY_TYPE_ARGS:
+			APPEND_LIST(ret, anchor);
+			break;
+		    }
+		}
+	    }
 	}
     }
-    else {
-	if (!poped) {
-	    ADD_INSN1(anchor, line, newarray, INT2FIX(len));
-	}
-	APPEND_LIST(ret, anchor);
-    }
     return len;
 }
 
 static VALUE
-compile_array(rb_iseq_t *iseq, LINK_ANCHOR *ret, NODE* node_root, VALUE opt_p)
+compile_array(rb_iseq_t *iseq, LINK_ANCHOR *ret, NODE* node_root, enum compile_array_type_t type)
 {
-    return compile_array_(iseq, ret, node_root, opt_p, 0);
+    return compile_array_(iseq, ret, node_root, type, 0);
 }
 
 static VALUE
@@ -2963,8 +3037,7 @@
 	    *flag |= VM_CALL_ARGS_SPLAT_BIT;
 
 	    if (next_is_array) {
-		argc = INT2FIX(compile_array(iseq, args, argn->nd_head, Qfalse) + 1);
-		POP_ELEMENT(args);
+		argc = INT2FIX(compile_array(iseq, args, argn->nd_head, COMPILE_ARRAY_TYPE_ARGS) + 1);
 	    }
 	    else {
 		argn = argn->nd_head;
@@ -2973,8 +3046,7 @@
 	    break;
 	  }
 	  case NODE_ARRAY: {
-	    argc = INT2FIX(compile_array(iseq, args, argn, Qfalse));
-	    POP_ELEMENT(args);
+	    argc = INT2FIX(compile_array(iseq, args, argn, COMPILE_ARRAY_TYPE_ARGS));
 	    break;
 	  }
 	  default: {
@@ -4265,7 +4337,7 @@
 	break;
       }
       case NODE_ARRAY:{
-	compile_array_(iseq, ret, node, Qtrue, poped);
+	compile_array_(iseq, ret, node, COMPILE_ARRAY_TYPE_ARRAY, poped);
 	break;
       }
       case NODE_ZARRAY:{
@@ -4293,22 +4365,19 @@
 
 	INIT_ANCHOR(list);
 	switch (type) {
-	  case NODE_ARRAY:{
-	    compile_array(iseq, list, node->nd_head, Qfalse);
-	    size = OPERAND_AT(POP_ELEMENT(list), 0);
+	  case NODE_ARRAY:
+	    size = INT2FIX(compile_array(iseq, list, node->nd_head, COMPILE_ARRAY_TYPE_HASH));
 	    ADD_SEQ(ret, list);
 	    break;
-	  }
+
 	  case NODE_ZARRAY:
-	    size = INT2FIX(0);
+	    ADD_INSN1(ret, nd_line(node), newhash, INT2FIX(0));
 	    break;
 
 	  default:
 	    rb_bug("can't make hash with this node: %s", ruby_node_name(type));
 	}
 
-	ADD_INSN1(ret, nd_line(node), newhash, size);
-
 	if (poped) {
 	    ADD_INSN(ret, nd_line(node), pop);
 	}
Index: id.c
===================================================================
--- id.c	(revision 35305)
+++ id.c	(revision 35306)
@@ -31,6 +31,9 @@
     REGISTER_SYMID(id_core_define_method,      	    "core#define_method");
     REGISTER_SYMID(id_core_define_singleton_method, "core#define_singleton_method");
     REGISTER_SYMID(id_core_set_postexe,             "core#set_postexe");
+    REGISTER_SYMID(id_core_hash_from_ary,           "core#hash_from_ary");
+    REGISTER_SYMID(id_core_hash_merge_ary,          "core#hash_merge_ary");
+    REGISTER_SYMID(id_core_hash_merge_ptr,          "core#hash_merge_ptr");
 
     REGISTER_SYMID(idEach, "each");
     REGISTER_SYMID(idLength, "length");
Index: parse.y
===================================================================
--- parse.y	(revision 35305)
+++ parse.y	(revision 35306)
@@ -802,6 +802,9 @@
 %nonassoc id_core_define_method
 %nonassoc id_core_define_singleton_method
 %nonassoc id_core_set_postexe
+%nonassoc id_core_hash_from_ary
+%nonassoc id_core_hash_merge_ary
+%nonassoc id_core_hash_merge_ptr
 
 %token tLAST_TOKEN
 
Index: vm.c
===================================================================
--- vm.c	(revision 35305)
+++ vm.c	(revision 35306)
@@ -2030,6 +2030,44 @@
     return Qnil;
 }
 
+static VALUE
+m_core_hash_from_ary(VALUE self, VALUE ary)
+{
+    VALUE hash = rb_hash_new();
+    int i;
+
+    for (i=0; i<RARRAY_LEN(ary); i+=2) {
+	rb_hash_aset(hash, RARRAY_PTR(ary)[i], RARRAY_PTR(ary)[i+1]);
+    }
+
+    return hash;
+}
+
+static VALUE
+m_core_hash_merge_ary(VALUE self, VALUE hash, VALUE ary)
+{
+    int i;
+
+    for (i=0; i<RARRAY_LEN(ary); i+=2) {
+	rb_hash_aset(hash, RARRAY_PTR(ary)[i], RARRAY_PTR(ary)[i+1]);
+    }
+
+    return hash;
+}
+
+static VALUE
+m_core_hash_merge_ptr(int argc, VALUE *argv, VALUE recv)
+{
+    int i;
+    VALUE hash = argv[0];
+
+    for (i=1; i<argc; i+=2) {
+	rb_hash_aset(hash, argv[i], argv[i+1]);
+    }
+
+    return hash;
+}
+
 extern VALUE *rb_gc_stack_start;
 extern size_t rb_gc_stack_maxsize;
 #ifdef __ia64
@@ -2093,6 +2131,9 @@
     rb_define_method_id(klass, id_core_define_method, m_core_define_method, 3);
     rb_define_method_id(klass, id_core_define_singleton_method, m_core_define_singleton_method, 3);
     rb_define_method_id(klass, id_core_set_postexe, m_core_set_postexe, 1);
+    rb_define_method_id(klass, id_core_hash_from_ary, m_core_hash_from_ary, 1);
+    rb_define_method_id(klass, id_core_hash_merge_ary, m_core_hash_merge_ary, 2);
+    rb_define_method_id(klass, id_core_hash_merge_ptr, m_core_hash_merge_ptr, -1);
     rb_obj_freeze(fcore);
     rb_gc_register_mark_object(fcore);
     rb_mRubyVMFrozenCore = fcore;
Index: test/ruby/test_literal.rb
===================================================================
--- test/ruby/test_literal.rb	(revision 35305)
+++ test/ruby/test_literal.rb	(revision 35306)
@@ -1,4 +1,5 @@
 require 'test/unit'
+require_relative 'envutil'
 
 class TestRubyLiteral < Test::Unit::TestCase
 
@@ -186,6 +187,13 @@
     assert_equal "literal", h["string"]
   end
 
+  def test_big_array_and_hash_literal
+    assert_normal_exit %q{x = nil; raise if eval("[#{(1..1_000_000).map{'x'}.join(", ")}]").size != 1_000_000}, "", timeout: 300
+    assert_normal_exit %q{x = nil; raise if eval("[#{(1..1_000_000).to_a.join(", ")}]").size != 1_000_000}, "", timeout: 300
+    assert_normal_exit %q{x = nil; raise if eval("{#{(1..1_000_000).map{|n| "#{n} => x"}.join(', ')}}").size != 1_000_000}, "", timeout: 300
+    assert_normal_exit %q{x = nil; raise if eval("{#{(1..1_000_000).map{|n| "#{n} => #{n}"}.join(', ')}}").size != 1_000_000}, "", timeout: 300
+  end
+
   def test_range
     assert_instance_of Range, (1..2)
     assert_equal(1..2, 1..2)

--
ML: ruby-changes@q...
Info: http://www.atdot.net/~ko1/quickml/