ruby-changes:40837
From: nobu <ko1@a...>
Date: Mon, 7 Dec 2015 23:40:09 +0900 (JST)
Subject: [ruby-changes:40837] nobu:r52916 (trunk): parse.y: indented hereoc
nobu 2015-12-07 23:39:52 +0900 (Mon, 07 Dec 2015) New Revision: 52916 http://svn.ruby-lang.org/cgi-bin/viewvc.cgi?view=revision&revision=52916 Log: parse.y: indented hereoc * parse.y: add heredoc <<~ syntax. [Feature #9098] Modified files: trunk/ChangeLog trunk/doc/syntax/literals.rdoc trunk/ext/ripper/lib/ripper/lexer.rb trunk/ext/ripper/lib/ripper/sexp.rb trunk/parse.y trunk/test/ripper/test_parser_events.rb trunk/test/ripper/test_sexp.rb trunk/test/ruby/test_syntax.rb Index: doc/syntax/literals.rdoc =================================================================== --- doc/syntax/literals.rdoc (revision 52915) +++ doc/syntax/literals.rdoc (revision 52916) @@ -196,6 +196,20 @@ Note that the while the closing identifi https://github.com/ruby/ruby/blob/trunk/doc/syntax/literals.rdoc#L196 always treated as if it is flush left. If you indent the content those spaces will appear in the output. +To have indented content as well as an indented closing identifier, you can use +a "squiggly" heredoc, which uses a "~" instead of a "-" after <tt><<</tt>: + + expected_result = <<~SQUIGGLY_HEREDOC + This would contain specially formatted text. + + That might span many lines + SQUIGGLY_HEREDOC + +The indentation of the least-indented line will be removed from each line of +the content. Note that empty lines and lines consisting solely of literal tabs +and spaces will be ignored for the purposes of determining indentation, but +escaped tabs and spaces are considered non-indentation characters. + A heredoc allows interpolation and escaped characters. You may disable interpolation and escaping by surrounding the opening identifier with single quotes: Index: ChangeLog =================================================================== --- ChangeLog (revision 52915) +++ ChangeLog (revision 52916) @@ -1,3 +1,7 @@ https://github.com/ruby/ruby/blob/trunk/ChangeLog#L1 +Mon Dec 7 23:39:49 2015 Ben Miller <bjmllr@g...> + + * parse.y: add heredoc <<~ syntax. [Feature #9098] + Mon Dec 7 23:06:16 2015 Kazuhiro NISHIYAMA <zn@m...> * prelude.rb (IO#read_nonblock): [DOC] add missing options to Index: parse.y =================================================================== --- parse.y (revision 52915) +++ parse.y (revision 52916) @@ -257,6 +257,8 @@ struct parser_params { https://github.com/ruby/ruby/blob/trunk/parse.y#L257 int toksiz; int tokline; int heredoc_end; + int heredoc_indent; + int heredoc_line_indent; char *tokenbuf; NODE *deferred_nodes; struct local_vars *lvtbl; @@ -347,6 +349,8 @@ static int parser_yyerror(struct parser_ https://github.com/ruby/ruby/blob/trunk/parse.y#L349 #define lex_p (parser->lex.pcur) #define lex_pend (parser->lex.pend) #define heredoc_end (parser->heredoc_end) +#define heredoc_indent (parser->heredoc_indent) +#define heredoc_line_indent (parser->heredoc_line_indent) #define command_start (parser->command_start) #define deferred_nodes (parser->deferred_nodes) #define lex_gets_ptr (parser->lex.gets_ptr) @@ -487,6 +491,9 @@ static int reg_fragment_check_gen(struct https://github.com/ruby/ruby/blob/trunk/parse.y#L491 static NODE *reg_named_capture_assign_gen(struct parser_params* parser, VALUE regexp, NODE *match); #define reg_named_capture_assign(regexp,match) reg_named_capture_assign_gen(parser,(regexp),(match)) +static void parser_heredoc_dedent(struct parser_params*,NODE*); +# define heredoc_dedent(str) parser_heredoc_dedent(parser, (str)) + #define get_id(id) (id) #define get_value(val) (val) #else @@ -670,6 +677,9 @@ new_args_tail_gen(struct parser_params * https://github.com/ruby/ruby/blob/trunk/parse.y#L677 #define new_defined(expr) dispatch1(defined, (expr)) +static void parser_heredoc_dedent(struct parser_params*,VALUE); +# define heredoc_dedent(str) parser_heredoc_dedent(parser, (str)) + #define FIXME 0 #endif /* RIPPER */ @@ -3887,6 +3897,7 @@ strings : string https://github.com/ruby/ruby/blob/trunk/parse.y#L3897 else { node = evstr2dstr(node); } + heredoc_indent = 0; $$ = node; /*% $$ = $1; @@ -3908,6 +3919,7 @@ string : tCHAR https://github.com/ruby/ruby/blob/trunk/parse.y#L3919 string1 : tSTRING_BEG string_contents tSTRING_END { + heredoc_dedent($2); /*%%%*/ $$ = $2; /*% @@ -3920,6 +3932,10 @@ xstring : tXSTRING_BEG xstring_contents https://github.com/ruby/ruby/blob/trunk/parse.y#L3932 { /*%%%*/ NODE *node = $2; + /*% + %*/ + heredoc_dedent($2); + /*%%%*/ if (!node) { node = NEW_XSTR(STR_NEW0()); } @@ -4319,6 +4335,10 @@ string_content : tSTRING_CONTENT https://github.com/ruby/ruby/blob/trunk/parse.y#L4335 $<num>$ = brace_nest; brace_nest = 0; } + { + $<num>$ = heredoc_indent; + heredoc_indent = 0; + } compstmt tSTRING_DEND { cond_stack = $<val>1; @@ -4326,11 +4346,13 @@ string_content : tSTRING_CONTENT https://github.com/ruby/ruby/blob/trunk/parse.y#L4346 lex_strterm = $<node>3; lex_state = $<num>4; brace_nest = $<num>5; + heredoc_indent = $<num>6; + heredoc_line_indent = -1; /*%%%*/ - if ($6) $6->flags &= ~NODE_FL_NEWLINE; - $$ = new_evstr($6); + if ($7) $7->flags &= ~NODE_FL_NEWLINE; + $$ = new_evstr($7); /*% - $$ = dispatch1(string_embexpr, $6); + $$ = dispatch1(string_embexpr, $7); %*/ } ; @@ -6204,6 +6226,27 @@ parser_tokadd_string(struct parser_param https://github.com/ruby/ruby/blob/trunk/parse.y#L6226 } while (0) while ((c = nextc()) != -1) { + if (heredoc_indent > 0) { + if (heredoc_line_indent == -1) { + if (c == '\n') heredoc_line_indent = 0; + } + else { + if (c == ' ') { + heredoc_line_indent++; + } + else if (c == '\t') { + int w = (heredoc_line_indent / TAB_WIDTH) + 1; + heredoc_line_indent = w * TAB_WIDTH; + } + else if (c != '\n') { + if (heredoc_indent > heredoc_line_indent) { + heredoc_indent = heredoc_line_indent; + } + heredoc_line_indent = -1; + } + } + } + if (paren && c == paren) { ++*nest; } @@ -6465,6 +6508,12 @@ parser_heredoc_identifier(struct parser_ https://github.com/ruby/ruby/blob/trunk/parse.y#L6508 c = nextc(); func = STR_FUNC_INDENT; } + else if (c == '~') { + c = nextc(); + func = STR_FUNC_INDENT; + heredoc_indent = INT_MAX; + heredoc_line_indent = 0; + } switch (c) { case '\'': func |= str_squote; goto quoted; @@ -6489,7 +6538,7 @@ parser_heredoc_identifier(struct parser_ https://github.com/ruby/ruby/blob/trunk/parse.y#L6538 if (!parser_is_identchar()) { pushback(c); if (func & STR_FUNC_INDENT) { - pushback('-'); + pushback(heredoc_indent > 0 ? '~' : '-'); } return 0; } @@ -6535,6 +6584,114 @@ parser_heredoc_restore(struct parser_par https://github.com/ruby/ruby/blob/trunk/parse.y#L6584 } static int +dedent_pos(const char *str, long len, int width) +{ + int i, col = 0; + + for (i = 0; i < len && col < width; i++) { + if (str[i] == ' ') { + col++; + } + else if (str[i] == '\t') { + int n = TAB_WIDTH * (col / TAB_WIDTH + 1); + if (n > width) break; + col = n; + } + else { + break; + } + } + return i; +} + +#ifndef RIPPER +static VALUE +parser_heredoc_dedent_string(VALUE input, int width, int first) +{ + long len; + int col; + char *str, *p, *out_p, *end, *t; + + RSTRING_GETMEM(input, str, len); + end = &str[len]; + + p = str; + if (!first) { + p = memchr(p, '\n', end - p); + if (!p) return input; + p++; + } + out_p = p; + while (p < end) { + col = dedent_pos(p, end - p, width); + p += col; + if (!(t = memchr(p, '\n', end - p))) + t = end; + else + ++t; + if (p > out_p) memmove(out_p, p, t - p); + out_p += t - p; + p = t; + } + rb_str_set_len(input, out_p - str); + + return input; +} + +static void +parser_heredoc_dedent(struct parser_params *parser, NODE *root) +{ + NODE *node, *str_node; + int first = TRUE; + int indent = heredoc_indent; + + if (indent <= 0) return; + + node = str_node = root; + + while (str_node) { + VALUE lit = str_node->nd_lit; + if (NIL_P(parser_heredoc_dedent_string(lit, indent, first))) + compile_error(PARSER_ARG "dedent failure: %d: %"PRIsVALUE, indent, lit); + first = FALSE; + + str_node = 0; + while ((node = node->nd_next) != 0 && nd_type(node) == NODE_ARRAY) { + if ((str_node = node->nd_head) != 0) { + enum node_type type = nd_type(str_node); + if (type == NODE_STR || type == NODE_DSTR) break; + } + } + } +} +#else /* RIPPER */ +static void +parser_heredoc_dedent(struct parser_params *parser, VALUE array) +{ + if (heredoc_indent <= 0) return; + + dispatch2(heredoc_dedent, array, INT2NUM(heredoc_indent)); +} + +static VALUE +parser_dedent_string(VALUE self, VALUE input, VALUE width) +{ + char *str; + long len; + int wid, col; + + StringValue(input); + wid = NUM2UINT(width); + rb_str_modify(input); + RSTRING_GETMEM(input, str, len); + col = dedent_pos(str, len, wid); + MEMMOVE(str, str + col, char, len - col); + rb_str_set_len(input, len - col); + return INT2NUM(col); +} +#endif + +static int parser_whole_match_p(struct parser_params *parser, const char *eos, long len, int indent) { @@ -6685,7 +6842,15 @@ parser_here_document(struct parser_param https://github.com/ruby/ruby/blob/trunk/parse.y#L6842 } if (!(func & STR_FUNC_EXPAND)) { + int end = 0; do { +#ifdef RIPPER + if (end && heredoc_indent > 0) { + set_yylval_str(str); + flush_string_content(enc); + return tSTRING_CONTENT; + } +#endif p = RSTRING_PTR(lex_lastline); pend = lex_pend; if (pend > p) { @@ -6712,7 +6877,7 @@ parser_here_document(struct parser_param https://github.com/ruby/ruby/blob/trunk/parse.y#L6877 } goto error; } - } while (!whole_match_p(eos, len, indent)); + } while (!(end = whole_match_p(eos, len, indent))); } else { /* int mb = ENC_CODERANGE_7BIT, *mbp = &mb;*/ @@ -6730,11 +6895,20 @@ parser_here_document(struct parser_param https://github.com/ruby/ruby/blob/trunk/parse.y#L6895 goto restore; } if (c != '\n') { +#ifdef RIPPER + flush: +#endif set_yylval_str(STR_NEW3(tok(), toklen(), enc, func)); flush_string_content(enc); return tSTRING_CONTENT; } tokadd(nextc()); +#ifdef RIPPER + if (c == '\n' && heredoc_indent > 0) { + lex_goto_eol(parser); + goto flush; + } +#endif /* if (mbp && mb == ENC_CODERANGE_UNKNOWN) mbp = 0;*/ if ((c = nextc()) == -1) goto error; } while (!whole_match_p(eos, len, indent)); @@ -11294,6 +11468,9 @@ InitVM_ripper(void) https://github.com/ruby/ruby/blob/trunk/parse.y#L11468 rb_define_method(rb_mKernel, "validate_object", ripper_validate_object, 1); #endif + rb_define_singleton_method(Ripper, "dedent_string", parser_dedent_string, 2); + rb_define_private_method(Ripper, "dedent_string", parser_dedent_string, 2); + ripper_init_eventids1_table(Ripper); ripper_init_eventids2_table(Ripper); Index: ext/ripper/lib/ripper/lexer.rb =================================================================== --- ext/ripper/lib/ripper/lexer.rb (revision 52915) +++ ext/ripper/lib/ripper/lexer.rb (revision 52916) @@ -44,28 +44,56 @@ class Ripper https://github.com/ruby/ruby/blob/trunk/ext/ripper/lib/ripper/lexer.rb#L44 end class Lexer < ::Ripper #:nodoc: internal use only + Elem = Struct.new(:pos, :event, :tok) + def tokenize - lex().map {|pos, event, tok| tok } + parse().sort_by(&:pos).map(&:tok) end def lex - parse().sort_by {|pos, event, tok| pos } + parse().sort_by(&:pos).map(&:to_a) end def parse @buf = [] + @stack = [] super + @buf.flatten! @buf end private + def on_heredoc_dedent(v, w) + @buf.each do |e| + if e.event == :on_tstring_content + if (n = dedent_string(e.tok, w)) > 0 + e.pos[1] += n + end + end + end + v + end + + def on_heredoc_beg(tok) + @stack.push @buf + buf = [] + @buf << buf + @buf = buf + @buf.push Elem.new([lineno(), column()], __callee__, tok) + end + + def on_heredoc_end(tok) + @buf.push Elem.new([lineno(), column()], __callee__, tok) + @buf = @stack.pop + end + def _push_token(tok) - @buf.push [[lineno(), column()], __callee__, tok] + @buf.push Elem.new([lineno(), column()], __callee__, tok) end - SCANNER_EVENTS.each do |event| - alias_method "on_#{event}", :_push_token + (SCANNER_EVENTS.map {|event|:"on_#{event}"} - private_instance_methods(false)).each do |event| + alias_method event, :_push_token end end Index: ext/ripper/lib/ripper/sexp.rb =================================================================== --- ext/ripper/lib/ripper/sexp.rb (revision 52915) +++ ext/ripper/lib/ripper/sexp.rb (revision 52916) @@ -62,7 +62,35 @@ class Ripper https://github.com/ruby/ruby/blob/trunk/ext/ripper/lib/ripper/sexp.rb#L62 class SexpBuilder < ::Ripper #:nodoc: private - PARSER_EVENTS.each do |event| + def dedent_element(e, width) + if (n = dedent_string(e[1], width)) > 0 + e[2][1] += n + end + e + end + + def on_heredoc_dedent(val, width) + sub = proc do |cont| + cont.map! do |e| + if Array === e + case e[0] + when :@tstring_content + e = dedent_element(e, width) + when /_add\z/ + e[1] = sub[e[1]] + end + elsif String === e + dedent_string(e, width) + end + e + end + end + sub[val] + val + end + + events = private_instance_methods(false).grep(/\Aon_/) {$'.to_sym} + (PARSER_EVENTS - events).each do |event| module_eval(<<-End, __FILE__, __LINE__ + 1) def on_#{event}(*args) args.unshift :#{event} @@ -83,6 +111,19 @@ class Ripper https://github.com/ruby/ruby/blob/trunk/ext/ripper/lib/ripper/sexp.rb#L111 class SexpBuilderPP < SexpBuilder #:nodoc: private + def on_heredoc_dedent(val, width) + val.map! do |e| + next e if Symbol === e and /_content\z/ =~ e + if Array === e and e[0] == :@tstring_content + e = dedent_element(e, width) + elsif String === e + dedent_string(e, width) + end + e + end + val + end + def _dispatch_event_new [] end Index: test/ruby/test_syntax.rb =================================================================== --- test/ruby/test_syntax.rb (revision 52915) +++ test/ruby/test_syntax.rb (revision 52916) @@ -475,6 +475,94 @@ e" https://github.com/ruby/ruby/blob/trunk/test/ruby/test_syntax.rb#L475 assert_equal(expected, actual, "#{Bug7559}: ") end + def test_dedented_heredoc_without_indentation + assert_equal(" y\nz\n", <<~eos) + y +z + eos + end + + def test_dedented_heredoc_with_indentation + assert_equal(" a\nb\n", <<~eos) + a + b + eos + end + + def test_dedented_heredoc_with_blank_less_indented_line + # the blank line has two leading spaces + result = eval("<<~eos\n" \ + " a\n" \ + " \n" \ + " b\n" \ + " eos\n") + assert_equal("a\n\nb\n", result) + end + + def test_dedented_heredoc_with_blank_less_indented_line_escaped + result = eval("<<~eos\n" \ + " a\n" \ + "\\ \\ \n" \ + " b\n" \ + " eos\n") + assert_equal(" a\n \n b\n", result) + end + + def test_dedented_heredoc_with_blank_more_indented_line + # the blank line has six leading spaces + result = eval("<<~eos\n" \ + " a\n" \ + " \n" \ + " b\n" \ + " eos\n") + assert_equal("a\n \nb\n", result) + end + + def test_dedented_heredoc_with_blank_more_indented_line_escaped + result = eval("<<~eos\n" \ + " a\n" \ + "\\ \\ \\ \\ \\ \\ \n" \ + " b\n" \ + " eos\n") + assert_equal(" a\n \n b\n", result) + end + + def test_dedented_heredoc_with_empty_line +result = eval("<<~eos\n" \ + " This would contain specially formatted text.\n" \ + "\n" \ + " That might span many lines\n" \ + " eos\n") + assert_equal(<<-eos, result) +This would contain specially formatted text. + +That might span many lines + eos + end + + def test_dedented_heredoc_with_interpolated_expression + result = eval(" <<~eos\n" \ + " #{1}a\n" \ + " zy\n" \ + " eos\n") + assert_equal(<<-eos, result) + #{1}a +zy + eos + end + + def test_dedented_heredoc_with_interpolated_string + w = "" + result = eval("<<~eos\n" \ + " \#{w} a\n" \ + " zy\n" \ + " eos\n") + assert_equal(<<-eos, result) +#{w} a + zy + eos + end + def test_lineno_after_heredoc bug7559 = '[ruby-dev:46737]' expected, _, actual = __LINE__, <<eom, __LINE__ Index: test/ripper/test_sexp.rb =================================================================== --- test/ripper/test_sexp.rb (revision 52915) +++ test/ripper/test_sexp.rb (revision 52916) @@ -38,6 +38,27 @@ class TestRipper::Sexp < Test::Unit::Tes https://github.com/ruby/ruby/blob/trunk/test/ripper/test_sexp.rb#L38 assert_equal "foo\n", search_sexp(:@tstring_content, sexp)[1] end + def test_squiggly_heredoc + sexp = Ripper.sexp("<<~eot\n asdf\neot") + assert_equal "asdf\n", search_sexp(:@tstring_content, sexp)[1] + end + + def test_squiggly_heredoc_with_interpolated_expression + sexp1 = Ripper.sexp(<<-eos) +<<-eot +a\#{1}z +eot + eos + + sexp2 = Ripper.sexp(<<-eos) +<<~eot + a\#{1}z +eot + eos + + assert_equal clear_pos(sexp1), clear_pos(sexp2) + end + def search_sexp(sym, sexp) return sexp if !sexp or sexp[0] == sym sexp.find do |e| @@ -46,4 +67,18 @@ class TestRipper::Sexp < Test::Unit::Tes https://github.com/ruby/ruby/blob/trunk/test/ripper/test_sexp.rb#L67 end end end + + def clear_pos(sexp) + return sexp if !sexp + sexp.each do |e| + if Array === e + if e.size == 3 and Array === (last = e.last) and + last.size == 2 and Integer === last[0] and Integer === last[1] + last.clear + else + clear_pos(e) + end + end + end + end end if ripper_test Index: test/ripper/test_parser_events.rb =================================================================== --- test/ripper/test_parser_events.rb (revision 52915) +++ test/ripper/test_parser_events.rb (revision 52916) @@ -431,6 +431,19 @@ class TestRipper::ParserEvents < Test::U https://github.com/ruby/ruby/blob/trunk/test/ripper/test_parser_events.rb#L431 assert_equal("heredoc1\nheredoc2\n", heredoc, bug1921) end + def test_heredoc_dedent + thru_heredoc_dedent = false + str = width = nil + tree = parse("<""<~EOS\n heredoc\nEOS\n", :on_heredoc_dedent) {|e, s, w| + thru_heredoc_dedent = true + str = s + width = w + } + assert_equal true, thru_heredoc_dedent + assert_match(/string_content\(\), heredoc\n/, tree) + assert_equal(1, width) + end + def test_massign thru_massign = false parse("a, b = 1, 2", :on_massign) {thru_massign = true} -- ML: ruby-changes@q... Info: http://www.atdot.net/~ko1/quickml/