ruby-changes:73929
From: yui-knk <ko1@a...>
Date: Sat, 8 Oct 2022 17:59:34 +0900 (JST)
Subject: [ruby-changes:73929] 342d4c16d9 (master): Generates "end" tokens if parser hits end of input
https://git.ruby-lang.org/ruby.git/commit/?id=342d4c16d9 From 342d4c16d963408905fd08118d1908fe197f2364 Mon Sep 17 00:00:00 2001 From: yui-knk <spiketeika@g...> Date: Sun, 25 Sep 2022 18:09:34 +0900 Subject: Generates "end" tokens if parser hits end of input but "end" tokens are needed for correct language. [Feature #19013] --- parse.y | 111 ++++++++++++++- test/ruby/test_ast.rb | 378 ++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 483 insertions(+), 6 deletions(-) diff --git a/parse.y b/parse.y index c05ce48068..a1b1e2ee46 100644 --- a/parse.y +++ b/parse.y @@ -355,6 +355,8 @@ struct parser_params { https://github.com/ruby/ruby/blob/trunk/parse.y#L355 VALUE error_buffer; VALUE debug_lines; const struct rb_iseq_struct *parent_iseq; + /* store specific keyword localtions to generate dummy end token */ + VALUE end_expect_token_localtions; #else /* Ripper only */ @@ -408,6 +410,36 @@ pop_pktbl(struct parser_params *p, st_table *tbl) https://github.com/ruby/ruby/blob/trunk/parse.y#L410 p->pktbl = tbl; } +#ifndef RIPPER +static void flush_debug_buffer(struct parser_params *p, VALUE out, VALUE str); + +static void +debug_end_expect_token_localtions(struct parser_params *p, const char *name) +{ + if(p->debug) { + VALUE mesg = rb_sprintf("%s: ", name); + rb_str_catf(mesg, " %"PRIsVALUE"\n", p->end_expect_token_localtions); + flush_debug_buffer(p, p->debug_output, mesg); + } +} + +static void +push_end_expect_token_localtions(struct parser_params *p, const rb_code_position_t *pos) +{ + if(NIL_P(p->end_expect_token_localtions)) return; + rb_ary_push(p->end_expect_token_localtions, rb_ary_new_from_args(2, INT2NUM(pos->lineno), INT2NUM(pos->column))); + debug_end_expect_token_localtions(p, "push_end_expect_token_localtions"); +} + +static void +pop_end_expect_token_localtions(struct parser_params *p) +{ + if(NIL_P(p->end_expect_token_localtions)) return; + rb_ary_pop(p->end_expect_token_localtions); + debug_end_expect_token_localtions(p, "pop_end_expect_token_localtions"); +} +#endif + RBIMPL_ATTR_NONNULL((1, 2, 3)) static int parser_yyerror(struct parser_params*, const YYLTYPE *yylloc, const char*); RBIMPL_ATTR_NONNULL((1, 2)) @@ -1214,6 +1246,7 @@ static int looking_at_eol_p(struct parser_params *p); https://github.com/ruby/ruby/blob/trunk/parse.y#L1246 %token <node> tBACK_REF "back reference" %token <node> tSTRING_CONTENT "literal content" %token <num> tREGEXP_END +%token <num> tDUMNY_END "dummy end" %type <node> singleton strings string string1 xstring regexp %type <node> string_contents xstring_contents regexp_contents string_content @@ -3307,28 +3340,38 @@ primary : literal https://github.com/ruby/ruby/blob/trunk/parse.y#L3340 } | defn_head f_arglist + { + /*%%%*/ + push_end_expect_token_localtions(p, &@1.beg_pos); + /*% %*/ + } bodystmt k_end { restore_defun(p, $<node>1->nd_defn); /*%%%*/ - $$ = set_defun_body(p, $1, $2, $3, &@$); + $$ = set_defun_body(p, $1, $2, $4, &@$); /*% %*/ - /*% ripper: def!(get_value($1), $2, $3) %*/ + /*% ripper: def!(get_value($1), $2, $4) %*/ local_pop(p); } | defs_head f_arglist + { + /*%%%*/ + push_end_expect_token_localtions(p, &@1.beg_pos); + /*% %*/ + } bodystmt k_end { restore_defun(p, $<node>1->nd_defn); /*%%%*/ - $$ = set_defun_body(p, $1, $2, $3, &@$); + $$ = set_defun_body(p, $1, $2, $4, &@$); /*% $1 = get_value($1); %*/ - /*% ripper: defs!(AREF($1, 0), AREF($1, 1), AREF($1, 2), $2, $3) %*/ + /*% ripper: defs!(AREF($1, 0), AREF($1, 1), AREF($1, 2), $2, $4) %*/ local_pop(p); } | keyword_break @@ -3371,6 +3414,9 @@ primary_value : primary https://github.com/ruby/ruby/blob/trunk/parse.y#L3414 k_begin : keyword_begin { token_info_push(p, "begin", &@$); + /*%%%*/ + push_end_expect_token_localtions(p, &@1.beg_pos); + /*% %*/ } ; @@ -3388,36 +3434,54 @@ k_if : keyword_if https://github.com/ruby/ruby/blob/trunk/parse.y#L3434 p->token_info->nonspc = 0; } } + /*%%%*/ + push_end_expect_token_localtions(p, &@1.beg_pos); + /*% %*/ } ; k_unless : keyword_unless { token_info_push(p, "unless", &@$); + /*%%%*/ + push_end_expect_token_localtions(p, &@1.beg_pos); + /*% %*/ } ; k_while : keyword_while { token_info_push(p, "while", &@$); + /*%%%*/ + push_end_expect_token_localtions(p, &@1.beg_pos); + /*% %*/ } ; k_until : keyword_until { token_info_push(p, "until", &@$); + /*%%%*/ + push_end_expect_token_localtions(p, &@1.beg_pos); + /*% %*/ } ; k_case : keyword_case { token_info_push(p, "case", &@$); + /*%%%*/ + push_end_expect_token_localtions(p, &@1.beg_pos); + /*% %*/ } ; k_for : keyword_for { token_info_push(p, "for", &@$); + /*%%%*/ + push_end_expect_token_localtions(p, &@1.beg_pos); + /*% %*/ } ; @@ -3425,6 +3489,9 @@ k_class : keyword_class https://github.com/ruby/ruby/blob/trunk/parse.y#L3489 { token_info_push(p, "class", &@$); $<ctxt>$ = p->ctxt; + /*%%%*/ + push_end_expect_token_localtions(p, &@1.beg_pos); + /*% %*/ } ; @@ -3432,6 +3499,9 @@ k_module : keyword_module https://github.com/ruby/ruby/blob/trunk/parse.y#L3499 { token_info_push(p, "module", &@$); $<ctxt>$ = p->ctxt; + /*%%%*/ + push_end_expect_token_localtions(p, &@1.beg_pos); + /*% %*/ } ; @@ -3445,12 +3515,19 @@ k_def : keyword_def https://github.com/ruby/ruby/blob/trunk/parse.y#L3515 k_do : keyword_do { token_info_push(p, "do", &@$); + /*%%%*/ + push_end_expect_token_localtions(p, &@1.beg_pos); + /*% %*/ + } ; k_do_block : keyword_do_block { token_info_push(p, "do", &@$); + /*%%%*/ + push_end_expect_token_localtions(p, &@1.beg_pos); + /*% %*/ } ; @@ -3497,6 +3574,13 @@ k_elsif : keyword_elsif https://github.com/ruby/ruby/blob/trunk/parse.y#L3574 k_end : keyword_end { token_info_pop(p, "end", &@$); + /*%%%*/ + pop_end_expect_token_localtions(p); + /*% %*/ + } + | tDUMNY_END + { + compile_error(p, "syntax error, unexpected end-of-input"); } ; @@ -3862,9 +3946,15 @@ lambda_body : tLAMBEG compstmt '}' https://github.com/ruby/ruby/blob/trunk/parse.y#L3946 token_info_pop(p, "}", &@3); $$ = $2; } - | keyword_do_LAMBDA bodystmt k_end + | keyword_do_LAMBDA { - $$ = $2; + /*%%%*/ + push_end_expect_token_localtions(p, &@1.beg_pos); + /*% %*/ + } + bodystmt k_end + { + $$ = $3; } ; @@ -9319,6 +9409,12 @@ parser_yylex(struct parser_params *p) https://github.com/ruby/ruby/blob/trunk/parse.y#L9409 case '\032': /* ^Z */ case -1: /* end of script. */ p->eofp = 1; +#ifndef RIPPER + if (!NIL_P(p->end_expect_token_localtions) && RARRAY_LEN(p->end_expect_token_localtions) > 0) { + pop_end_expect_token_localtions(p); + return tDUMNY_END; + } +#endif return 0; /* white spaces */ @@ -13184,6 +13280,7 @@ parser_initialize(struct parser_params *p) https://github.com/ruby/ruby/blob/trunk/parse.y#L13280 p->parsing_thread = Qnil; #else p->error_buffer = Qfalse; + p->end_expect_token_localtions = Qnil; #endif p->debug_buffer = Qnil; p->debug_output = rb_ractor_stdout(); @@ -13212,6 +13309,7 @@ parser_mark(void *ptr) https://github.com/ruby/ruby/blob/trunk/parse.y#L13309 rb_gc_mark(p->debug_lines); rb_gc_mark(p->compile_option); rb_gc_mark(p->error_buffer); + rb_gc_mark(p->end_expect_token_localtions); #else rb_gc_mark(p->delayed.token); rb_gc_mark(p->value); @@ -13324,6 +13422,7 @@ rb_parser_error_tolerant(VALUE vparser) https://github.com/ruby/ruby/blob/trunk/parse.y#L13422 TypedData_Get_Struct(vparser, struct parser_params, &parser_data_type, p); p->error_tolerant = 1; + p->end_expect_token_localtions = rb_ary_new(); } #endif diff --git a/test/ruby/test_ast.rb b/test/ruby/test_ast.rb index c2c5356f83..2f05bf97a1 100644 --- a/test/ruby/test_ast.rb +++ b/test/ruby/test_ast.rb @@ -1,6 +1,7 @@ https://github.com/ruby/ruby/blob/trunk/test/ruby/test_ast.rb#L1 # frozen_string_literal: false require 'test/unit' require 'tempfile' +require 'pp' class RubyVM module AbstractSyntaxTree @@ -578,4 +579,381 @@ dummy https://github.com/ruby/ruby/blob/trunk/test/ruby/test_ast.rb#L579 assert_equal(:SCOPE, node.type) end + + def test_error_tolerant_end_is_short_for_method_define + node = RubyVM::AbstractSyntaxTree.parse(<<~STR, error_tolerant: true) + def m + m2 + STR + + str = "" + PP.pp(node, str) + assert_equal(<<~EXP, str) + (SCOPE@1:0-2:4 + tbl: [] + args: nil + body: + (DEFN@1:0-2:4 + mid: :m + body: + (SCOPE@1:0-2:4 + tbl: [] + args: + (ARGS@1:5-1:5 + pre_num: 0 + pre_init: nil + opt: nil + first_post: nil + post_num: 0 + post_init: nil + rest: nil + kw: nil + kwrest: nil + block: nil) + body: (VCALL@2:2-2:4 :m2)))) + EXP + end + + def test_error_tolerant_end_is_short_for_singleton_method_define + node = RubyVM::AbstractSyntaxTree.parse(<<~STR, error_tolerant: true) + def obj.m + m2 + STR + + str = "" + PP.pp(node, str) + assert_equal(<<~EXP, str) + (SCOPE@1:0-2:4 + tbl: [] + args: nil + body: + (DEFS@1:0-2:4 (VCALL@1:4-1:7 :obj) :m + (SCOPE@1:0-2:4 + tbl: [] + (... truncated) -- ML: ruby-changes@q... Info: http://www.atdot.net/~ko1/quickml/