ruby-changes:66504
From: Yusuke <ko1@a...>
Date: Fri, 18 Jun 2021 02:34:40 +0900 (JST)
Subject: [ruby-changes:66504] acae5f363d (master): ast.rb: RubyVM::AST.parse and .of accepts `save_script_lines: true`
https://git.ruby-lang.org/ruby.git/commit/?id=acae5f363d From acae5f363dfaedd9c2873cee68c9498da3c072f5 Mon Sep 17 00:00:00 2001 From: Yusuke Endoh <mame@r...> Date: Thu, 17 Jun 2021 23:43:08 +0900 Subject: ast.rb: RubyVM::AST.parse and .of accepts `save_script_lines: true` This option makes the parser keep the original source as an array of the original code lines. This feature exploits the mechanism of `SCRIPT_LINES__` but records only the specified code that is passed to RubyVM::AST.of or .parse, instead of recording all parsed program texts. --- ast.c | 48 ++++++++++++++++++++++++++++++++---------------- ast.rb | 48 ++++++++++++++++++++++++++++++++++++++++++------ compile.c | 1 + internal/parse.h | 1 + node.c | 1 + node.h | 1 + parse.y | 17 +++++++++++++++++ test/ruby/test_ast.rb | 50 ++++++++++++++++++++++++++++++++++++++++++++++++++ 8 files changed, 145 insertions(+), 22 deletions(-) diff --git a/ast.c b/ast.c index b7416ad..3fec251 100644 --- a/ast.c +++ b/ast.c @@ -64,8 +64,8 @@ ast_new_internal(rb_ast_t *ast, const NODE *node) https://github.com/ruby/ruby/blob/trunk/ast.c#L64 return obj; } -static VALUE rb_ast_parse_str(VALUE str); -static VALUE rb_ast_parse_file(VALUE path); +static VALUE rb_ast_parse_str(VALUE str, VALUE save_script_lines); +static VALUE rb_ast_parse_file(VALUE path, VALUE save_script_lines); static VALUE ast_parse_new(void) @@ -85,29 +85,31 @@ ast_parse_done(rb_ast_t *ast) https://github.com/ruby/ruby/blob/trunk/ast.c#L85 } static VALUE -ast_s_parse(rb_execution_context_t *ec, VALUE module, VALUE str) +ast_s_parse(rb_execution_context_t *ec, VALUE module, VALUE str, VALUE save_script_lines) { - return rb_ast_parse_str(str); + return rb_ast_parse_str(str, save_script_lines); } static VALUE -rb_ast_parse_str(VALUE str) +rb_ast_parse_str(VALUE str, VALUE save_script_lines) { rb_ast_t *ast = 0; StringValue(str); - ast = rb_parser_compile_string_path(ast_parse_new(), Qnil, str, 1); + VALUE vparser = ast_parse_new(); + if (RTEST(save_script_lines)) rb_parser_save_script_lines(vparser); + ast = rb_parser_compile_string_path(vparser, Qnil, str, 1); return ast_parse_done(ast); } static VALUE -ast_s_parse_file(rb_execution_context_t *ec, VALUE module, VALUE path) +ast_s_parse_file(rb_execution_context_t *ec, VALUE module, VALUE path, VALUE save_script_lines) { - return rb_ast_parse_file(path); + return rb_ast_parse_file(path, save_script_lines); } static VALUE -rb_ast_parse_file(VALUE path) +rb_ast_parse_file(VALUE path, VALUE save_script_lines) { VALUE f; rb_ast_t *ast = 0; @@ -116,7 +118,9 @@ rb_ast_parse_file(VALUE path) https://github.com/ruby/ruby/blob/trunk/ast.c#L118 FilePathValue(path); f = rb_file_open_str(path, "r"); rb_funcall(f, rb_intern("set_encoding"), 2, rb_enc_from_encoding(enc), rb_str_new_cstr("-")); - ast = rb_parser_compile_file_path(ast_parse_new(), Qnil, f, 1); + VALUE vparser = ast_parse_new(); + if (RTEST(save_script_lines)) rb_parser_save_script_lines(vparser); + ast = rb_parser_compile_file_path(vparser, Qnil, f, 1); rb_io_close(f); return ast_parse_done(ast); } @@ -135,12 +139,14 @@ lex_array(VALUE array, int index) https://github.com/ruby/ruby/blob/trunk/ast.c#L139 } static VALUE -rb_ast_parse_array(VALUE array) +rb_ast_parse_array(VALUE array, VALUE save_script_lines) { rb_ast_t *ast = 0; array = rb_check_array_type(array); - ast = rb_parser_compile_generic(ast_parse_new(), lex_array, Qnil, array, 1); + VALUE vparser = ast_parse_new(); + if (RTEST(save_script_lines)) rb_parser_save_script_lines(vparser); + ast = rb_parser_compile_generic(vparser, lex_array, Qnil, array, 1); return ast_parse_done(ast); } @@ -187,7 +193,7 @@ script_lines(VALUE path) https://github.com/ruby/ruby/blob/trunk/ast.c#L193 } static VALUE -ast_s_of(rb_execution_context_t *ec, VALUE module, VALUE body) +ast_s_of(rb_execution_context_t *ec, VALUE module, VALUE body, VALUE save_script_lines) { VALUE path, node, lines; int node_id; @@ -209,13 +215,13 @@ ast_s_of(rb_execution_context_t *ec, VALUE module, VALUE body) https://github.com/ruby/ruby/blob/trunk/ast.c#L215 path = rb_iseq_path(iseq); node_id = iseq->body->location.node_id; if (!NIL_P(lines = script_lines(path))) { - node = rb_ast_parse_array(lines); + node = rb_ast_parse_array(lines, save_script_lines); } else if (RSTRING_LEN(path) == 2 && memcmp(RSTRING_PTR(path), "-e", 2) == 0) { - node = rb_ast_parse_str(rb_e_script); + node = rb_ast_parse_str(rb_e_script, save_script_lines); } else { - node = rb_ast_parse_file(path); + node = rb_ast_parse_file(path, save_script_lines); } return node_find(node, node_id); @@ -698,6 +704,16 @@ ast_node_inspect(rb_execution_context_t *ec, VALUE self) https://github.com/ruby/ruby/blob/trunk/ast.c#L704 return str; } +static VALUE +ast_node_script_lines(rb_execution_context_t *ec, VALUE self) +{ + struct ASTNodeData *data; + TypedData_Get_Struct(self, struct ASTNodeData, &rb_node_type, data); + VALUE ret = data->ast->body.script_lines; + if (!ret) ret = Qnil; + return ret; +} + #include "ast.rbinc" void diff --git a/ast.rb b/ast.rb index 9d4b05b..ce99f53 100644 --- a/ast.rb +++ b/ast.rb @@ -29,8 +29,8 @@ module RubyVM::AbstractSyntaxTree https://github.com/ruby/ruby/blob/trunk/ast.rb#L29 # # RubyVM::AbstractSyntaxTree.parse("x = 1 + 2") # # => #<RubyVM::AbstractSyntaxTree::Node:SCOPE@1:0-1:9> - def self.parse string - Primitive.ast_s_parse string + def self.parse string, save_script_lines: false + Primitive.ast_s_parse string, save_script_lines end # call-seq: @@ -44,8 +44,8 @@ module RubyVM::AbstractSyntaxTree https://github.com/ruby/ruby/blob/trunk/ast.rb#L44 # # RubyVM::AbstractSyntaxTree.parse_file("my-app/app.rb") # # => #<RubyVM::AbstractSyntaxTree::Node:SCOPE@1:0-31:3> - def self.parse_file pathname - Primitive.ast_s_parse_file pathname + def self.parse_file pathname, save_script_lines: false + Primitive.ast_s_parse_file pathname, save_script_lines end # call-seq: @@ -63,8 +63,8 @@ module RubyVM::AbstractSyntaxTree https://github.com/ruby/ruby/blob/trunk/ast.rb#L63 # # RubyVM::AbstractSyntaxTree.of(method(:hello)) # # => #<RubyVM::AbstractSyntaxTree::Node:SCOPE@1:0-3:3> - def self.of body - Primitive.ast_s_of body + def self.of body, save_script_lines: false + Primitive.ast_s_of body, save_script_lines end # RubyVM::AbstractSyntaxTree::Node instances are created by parse methods in @@ -139,5 +139,41 @@ module RubyVM::AbstractSyntaxTree https://github.com/ruby/ruby/blob/trunk/ast.rb#L139 def inspect Primitive.ast_node_inspect end + + # call-seq: + # node.script_lines -> array + # + # Returns the original source code as an array of lines. + # + # Note that this is an API for ruby internal use, debugging, + # and research. Do not use this for any other purpose. + # The compatibility is not guaranteed. + def script_lines + Primitive.ast_node_script_lines + end + + # call-seq: + # node.source -> string + # + # Returns the code fragment that corresponds to this AST. + # + # Note that this is an API for ruby internal use, debugging, + # and research. Do not use this for any other purpose. + # The compatibility is not guaranteed. + # + # Also note that this API may return an incomplete code fragment + # that does not parse; for example, a here document following + # an expression may be dropped. + def source + lines = script_lines + if lines + lines = lines[first_lineno - 1 .. last_lineno - 1] + lines[-1] = lines[-1][0...last_column] + lines[0] = lines[0][first_column..-1] + lines.join + else + nil + end + end end end diff --git a/compile.c b/compile.c index c58a6c2..30c7511 100644 --- a/compile.c +++ b/compile.c @@ -1329,6 +1329,7 @@ new_child_iseq(rb_iseq_t *iseq, const NODE *const node, https://github.com/ruby/ruby/blob/trunk/compile.c#L1329 ast.root = node; ast.compile_option = 0; ast.line_count = -1; + ast.script_lines = Qfalse; debugs("[new_child_iseq]> ---------------------------------------\n"); int isolated_depth = ISEQ_COMPILE_DATA(iseq)->isolated_depth; diff --git a/internal/parse.h b/internal/parse.h index a37a39f8..588b2b3 100644 --- a/internal/parse.h +++ b/internal/parse.h @@ -15,6 +15,7 @@ struct rb_iseq_struct; /* in vm_core.h */ https://github.com/ruby/ruby/blob/trunk/internal/parse.h#L15 /* parse.y */ VALUE rb_parser_set_yydebug(VALUE, VALUE); void *rb_parser_load_file(VALUE parser, VALUE name); +void rb_parser_save_script_lines(VALUE vparser); RUBY_SYMBOL_EXPORT_BEGIN VALUE rb_parser_set_context(VALUE, const struct rb_iseq_struct *, int); diff --git a/node.c b/node.c index bef9d7b..f3dbf6e 100644 --- a/node.c +++ b/node.c @@ -1407,6 +1407,7 @@ rb_ast_mark(rb_ast_t *ast) https://github.com/ruby/ruby/blob/trunk/node.c#L1407 iterate_node_values(&nb->markable, mark_ast_value, NULL); } + if (ast->body.script_lines) rb_gc_mark(ast->body.script_lines); } void diff --git a/node.h b/node.h index 192e121..592b285 100644 --- a/node.h +++ b/node.h @@ -399,6 +399,7 @@ typedef struct rb_ast_body_struct { https://github.com/ruby/ruby/blob/trunk/node.h#L399 const NODE *root; VALUE compile_option; int line_count; + VALUE script_lines; } rb_ast_body_t; typedef struct rb_ast_struct { VALUE flags; diff --git a/parse.y b/parse.y index 6b42b6b..47b63e8 100644 --- a/parse.y +++ b/parse.y @@ -337,6 +337,7 @@ struct parser_params { https://github.com/ruby/ruby/blob/trunk/parse.y#L337 unsigned int do_loop (... truncated) -- ML: ruby-changes@q... Info: http://www.atdot.net/~ko1/quickml/