[前][次][番号順一覧][スレッド一覧]

ruby-changes:40837

From: nobu <ko1@a...>
Date: Mon, 7 Dec 2015 23:40:09 +0900 (JST)
Subject: [ruby-changes:40837] nobu:r52916 (trunk): parse.y: indented hereoc

nobu	2015-12-07 23:39:52 +0900 (Mon, 07 Dec 2015)

  New Revision: 52916

  http://svn.ruby-lang.org/cgi-bin/viewvc.cgi?view=revision&revision=52916

  Log:
    parse.y: indented hereoc
    
    * parse.y: add heredoc <<~ syntax.  [Feature #9098]

  Modified files:
    trunk/ChangeLog
    trunk/doc/syntax/literals.rdoc
    trunk/ext/ripper/lib/ripper/lexer.rb
    trunk/ext/ripper/lib/ripper/sexp.rb
    trunk/parse.y
    trunk/test/ripper/test_parser_events.rb
    trunk/test/ripper/test_sexp.rb
    trunk/test/ruby/test_syntax.rb
Index: doc/syntax/literals.rdoc
===================================================================
--- doc/syntax/literals.rdoc	(revision 52915)
+++ doc/syntax/literals.rdoc	(revision 52916)
@@ -196,6 +196,20 @@ Note that the while the closing identifi https://github.com/ruby/ruby/blob/trunk/doc/syntax/literals.rdoc#L196
 always treated as if it is flush left.  If you indent the content those spaces
 will appear in the output.
 
+To have indented content as well as an indented closing identifier, you can use
+a "squiggly" heredoc, which uses a "~" instead of a "-" after <tt><<</tt>:
+
+    expected_result = <<~SQUIGGLY_HEREDOC
+      This would contain specially formatted text.
+
+      That might span many lines
+    SQUIGGLY_HEREDOC
+
+The indentation of the least-indented line will be removed from each line of
+the content.  Note that empty lines and lines consisting solely of literal tabs
+and spaces will be ignored for the purposes of determining indentation, but
+escaped tabs and spaces are considered non-indentation characters.
+
 A heredoc allows interpolation and escaped characters.  You may disable
 interpolation and escaping by surrounding the opening identifier with single
 quotes:
Index: ChangeLog
===================================================================
--- ChangeLog	(revision 52915)
+++ ChangeLog	(revision 52916)
@@ -1,3 +1,7 @@ https://github.com/ruby/ruby/blob/trunk/ChangeLog#L1
+Mon Dec  7 23:39:49 2015  Ben Miller  <bjmllr@g...>
+
+	* parse.y: add heredoc <<~ syntax.  [Feature #9098]
+
 Mon Dec  7 23:06:16 2015  Kazuhiro NISHIYAMA  <zn@m...>
 
 	* prelude.rb (IO#read_nonblock): [DOC] add missing options to
Index: parse.y
===================================================================
--- parse.y	(revision 52915)
+++ parse.y	(revision 52916)
@@ -257,6 +257,8 @@ struct parser_params { https://github.com/ruby/ruby/blob/trunk/parse.y#L257
     int toksiz;
     int tokline;
     int heredoc_end;
+    int heredoc_indent;
+    int heredoc_line_indent;
     char *tokenbuf;
     NODE *deferred_nodes;
     struct local_vars *lvtbl;
@@ -347,6 +349,8 @@ static int parser_yyerror(struct parser_ https://github.com/ruby/ruby/blob/trunk/parse.y#L349
 #define lex_p			(parser->lex.pcur)
 #define lex_pend		(parser->lex.pend)
 #define heredoc_end		(parser->heredoc_end)
+#define heredoc_indent		(parser->heredoc_indent)
+#define heredoc_line_indent	(parser->heredoc_line_indent)
 #define command_start		(parser->command_start)
 #define deferred_nodes		(parser->deferred_nodes)
 #define lex_gets_ptr		(parser->lex.gets_ptr)
@@ -487,6 +491,9 @@ static int reg_fragment_check_gen(struct https://github.com/ruby/ruby/blob/trunk/parse.y#L491
 static NODE *reg_named_capture_assign_gen(struct parser_params* parser, VALUE regexp, NODE *match);
 #define reg_named_capture_assign(regexp,match) reg_named_capture_assign_gen(parser,(regexp),(match))
 
+static void parser_heredoc_dedent(struct parser_params*,NODE*);
+# define heredoc_dedent(str) parser_heredoc_dedent(parser, (str))
+
 #define get_id(id) (id)
 #define get_value(val) (val)
 #else
@@ -670,6 +677,9 @@ new_args_tail_gen(struct parser_params * https://github.com/ruby/ruby/blob/trunk/parse.y#L677
 
 #define new_defined(expr) dispatch1(defined, (expr))
 
+static void parser_heredoc_dedent(struct parser_params*,VALUE);
+# define heredoc_dedent(str) parser_heredoc_dedent(parser, (str))
+
 #define FIXME 0
 
 #endif /* RIPPER */
@@ -3887,6 +3897,7 @@ strings		: string https://github.com/ruby/ruby/blob/trunk/parse.y#L3897
 			else {
 			    node = evstr2dstr(node);
 			}
+			heredoc_indent = 0;
 			$$ = node;
 		    /*%
 			$$ = $1;
@@ -3908,6 +3919,7 @@ string		: tCHAR https://github.com/ruby/ruby/blob/trunk/parse.y#L3919
 
 string1		: tSTRING_BEG string_contents tSTRING_END
 		    {
+			heredoc_dedent($2);
 		    /*%%%*/
 			$$ = $2;
 		    /*%
@@ -3920,6 +3932,10 @@ xstring		: tXSTRING_BEG xstring_contents https://github.com/ruby/ruby/blob/trunk/parse.y#L3932
 		    {
 		    /*%%%*/
 			NODE *node = $2;
+		    /*%
+		    %*/
+			heredoc_dedent($2);
+		    /*%%%*/
 			if (!node) {
 			    node = NEW_XSTR(STR_NEW0());
 			}
@@ -4319,6 +4335,10 @@ string_content	: tSTRING_CONTENT https://github.com/ruby/ruby/blob/trunk/parse.y#L4335
 			$<num>$ = brace_nest;
 			brace_nest = 0;
 		    }
+		    {
+			$<num>$ = heredoc_indent;
+			heredoc_indent = 0;
+		    }
 		  compstmt tSTRING_DEND
 		    {
 			cond_stack = $<val>1;
@@ -4326,11 +4346,13 @@ string_content	: tSTRING_CONTENT https://github.com/ruby/ruby/blob/trunk/parse.y#L4346
 			lex_strterm = $<node>3;
 			lex_state = $<num>4;
 			brace_nest = $<num>5;
+			heredoc_indent = $<num>6;
+			heredoc_line_indent = -1;
 		    /*%%%*/
-			if ($6) $6->flags &= ~NODE_FL_NEWLINE;
-			$$ = new_evstr($6);
+			if ($7) $7->flags &= ~NODE_FL_NEWLINE;
+			$$ = new_evstr($7);
 		    /*%
-			$$ = dispatch1(string_embexpr, $6);
+			$$ = dispatch1(string_embexpr, $7);
 		    %*/
 		    }
 		;
@@ -6204,6 +6226,27 @@ parser_tokadd_string(struct parser_param https://github.com/ruby/ruby/blob/trunk/parse.y#L6226
     } while (0)
 
     while ((c = nextc()) != -1) {
+	if (heredoc_indent > 0) {
+	    if (heredoc_line_indent == -1) {
+		if (c == '\n') heredoc_line_indent = 0;
+	    }
+	    else {
+		if (c == ' ') {
+		    heredoc_line_indent++;
+		}
+		else if (c == '\t') {
+		    int w = (heredoc_line_indent / TAB_WIDTH) + 1;
+		    heredoc_line_indent = w * TAB_WIDTH;
+		}
+		else if (c != '\n') {
+		    if (heredoc_indent > heredoc_line_indent) {
+			heredoc_indent = heredoc_line_indent;
+		    }
+		    heredoc_line_indent = -1;
+		}
+	    }
+	}
+
 	if (paren && c == paren) {
 	    ++*nest;
 	}
@@ -6465,6 +6508,12 @@ parser_heredoc_identifier(struct parser_ https://github.com/ruby/ruby/blob/trunk/parse.y#L6508
 	c = nextc();
 	func = STR_FUNC_INDENT;
     }
+    else if (c == '~') {
+	c = nextc();
+	func = STR_FUNC_INDENT;
+	heredoc_indent = INT_MAX;
+	heredoc_line_indent = 0;
+    }
     switch (c) {
       case '\'':
 	func |= str_squote; goto quoted;
@@ -6489,7 +6538,7 @@ parser_heredoc_identifier(struct parser_ https://github.com/ruby/ruby/blob/trunk/parse.y#L6538
 	if (!parser_is_identchar()) {
 	    pushback(c);
 	    if (func & STR_FUNC_INDENT) {
-		pushback('-');
+		pushback(heredoc_indent > 0 ? '~' : '-');
 	    }
 	    return 0;
 	}
@@ -6535,6 +6584,114 @@ parser_heredoc_restore(struct parser_par https://github.com/ruby/ruby/blob/trunk/parse.y#L6584
 }
 
 static int
+dedent_pos(const char *str, long len, int width)
+{
+    int i, col = 0;
+
+    for (i = 0; i < len && col < width; i++) {
+	if (str[i] == ' ') {
+	    col++;
+	}
+	else if (str[i] == '\t') {
+	    int n = TAB_WIDTH * (col / TAB_WIDTH + 1);
+	    if (n > width) break;
+	    col = n;
+	}
+	else {
+	    break;
+	}
+    }
+    return i;
+}
+
+#ifndef RIPPER
+static VALUE
+parser_heredoc_dedent_string(VALUE input, int width, int first)
+{
+    long len;
+    int col;
+    char *str, *p, *out_p, *end, *t;
+
+    RSTRING_GETMEM(input, str, len);
+    end = &str[len];
+
+    p = str;
+    if (!first) {
+	p = memchr(p, '\n', end - p);
+	if (!p) return input;
+	p++;
+    }
+    out_p = p;
+    while (p < end) {
+	col = dedent_pos(p, end - p, width);
+	p += col;
+	if (!(t = memchr(p, '\n', end - p)))
+	    t = end;
+	else
+	    ++t;
+	if (p > out_p) memmove(out_p, p, t - p);
+	out_p += t - p;
+	p = t;
+    }
+    rb_str_set_len(input, out_p - str);
+
+    return input;
+}
+
+static void
+parser_heredoc_dedent(struct parser_params *parser, NODE *root)
+{
+    NODE *node, *str_node;
+    int first = TRUE;
+    int indent = heredoc_indent;
+
+    if (indent <= 0) return;
+
+    node = str_node = root;
+
+    while (str_node) {
+	VALUE lit = str_node->nd_lit;
+	if (NIL_P(parser_heredoc_dedent_string(lit, indent, first)))
+	    compile_error(PARSER_ARG "dedent failure: %d: %"PRIsVALUE, indent, lit);
+	first = FALSE;
+
+	str_node = 0;
+	while ((node = node->nd_next) != 0 && nd_type(node) == NODE_ARRAY) {
+	    if ((str_node = node->nd_head) != 0) {
+		enum node_type type = nd_type(str_node);
+		if (type == NODE_STR || type == NODE_DSTR) break;
+	    }
+	}
+    }
+}
+#else /* RIPPER */
+static void
+parser_heredoc_dedent(struct parser_params *parser, VALUE array)
+{
+    if (heredoc_indent <= 0) return;
+
+    dispatch2(heredoc_dedent, array, INT2NUM(heredoc_indent));
+}
+
+static VALUE
+parser_dedent_string(VALUE self, VALUE input, VALUE width)
+{
+    char *str;
+    long len;
+    int wid, col;
+
+    StringValue(input);
+    wid = NUM2UINT(width);
+    rb_str_modify(input);
+    RSTRING_GETMEM(input, str, len);
+    col = dedent_pos(str, len, wid);
+    MEMMOVE(str, str + col, char, len - col);
+    rb_str_set_len(input, len - col);
+    return INT2NUM(col);
+}
+#endif
+
+static int
 parser_whole_match_p(struct parser_params *parser,
     const char *eos, long len, int indent)
 {
@@ -6685,7 +6842,15 @@ parser_here_document(struct parser_param https://github.com/ruby/ruby/blob/trunk/parse.y#L6842
     }
 
     if (!(func & STR_FUNC_EXPAND)) {
+	int end = 0;
 	do {
+#ifdef RIPPER
+	    if (end && heredoc_indent > 0) {
+		set_yylval_str(str);
+		flush_string_content(enc);
+		return tSTRING_CONTENT;
+	    }
+#endif
 	    p = RSTRING_PTR(lex_lastline);
 	    pend = lex_pend;
 	    if (pend > p) {
@@ -6712,7 +6877,7 @@ parser_here_document(struct parser_param https://github.com/ruby/ruby/blob/trunk/parse.y#L6877
 		}
 		goto error;
 	    }
-	} while (!whole_match_p(eos, len, indent));
+	} while (!(end = whole_match_p(eos, len, indent)));
     }
     else {
 	/*	int mb = ENC_CODERANGE_7BIT, *mbp = &mb;*/
@@ -6730,11 +6895,20 @@ parser_here_document(struct parser_param https://github.com/ruby/ruby/blob/trunk/parse.y#L6895
 		goto restore;
 	    }
 	    if (c != '\n') {
+#ifdef RIPPER
+	      flush:
+#endif
 		set_yylval_str(STR_NEW3(tok(), toklen(), enc, func));
 		flush_string_content(enc);
 		return tSTRING_CONTENT;
 	    }
 	    tokadd(nextc());
+#ifdef RIPPER
+	    if (c == '\n' && heredoc_indent > 0) {
+		lex_goto_eol(parser);
+		goto flush;
+	    }
+#endif
 	    /*	    if (mbp && mb == ENC_CODERANGE_UNKNOWN) mbp = 0;*/
 	    if ((c = nextc()) == -1) goto error;
 	} while (!whole_match_p(eos, len, indent));
@@ -11294,6 +11468,9 @@ InitVM_ripper(void) https://github.com/ruby/ruby/blob/trunk/parse.y#L11468
     rb_define_method(rb_mKernel, "validate_object", ripper_validate_object, 1);
 #endif
 
+    rb_define_singleton_method(Ripper, "dedent_string", parser_dedent_string, 2);
+    rb_define_private_method(Ripper, "dedent_string", parser_dedent_string, 2);
+
     ripper_init_eventids1_table(Ripper);
     ripper_init_eventids2_table(Ripper);
 
Index: ext/ripper/lib/ripper/lexer.rb
===================================================================
--- ext/ripper/lib/ripper/lexer.rb	(revision 52915)
+++ ext/ripper/lib/ripper/lexer.rb	(revision 52916)
@@ -44,28 +44,56 @@ class Ripper https://github.com/ruby/ruby/blob/trunk/ext/ripper/lib/ripper/lexer.rb#L44
   end
 
   class Lexer < ::Ripper   #:nodoc: internal use only
+    Elem = Struct.new(:pos, :event, :tok)
+
     def tokenize
-      lex().map {|pos, event, tok| tok }
+      parse().sort_by(&:pos).map(&:tok)
     end
 
     def lex
-      parse().sort_by {|pos, event, tok| pos }
+      parse().sort_by(&:pos).map(&:to_a)
     end
 
     def parse
       @buf = []
+      @stack = []
       super
+      @buf.flatten!
       @buf
     end
 
     private
 
+    def on_heredoc_dedent(v, w)
+      @buf.each do |e|
+        if e.event == :on_tstring_content
+          if (n = dedent_string(e.tok, w)) > 0
+            e.pos[1] += n
+          end
+        end
+      end
+      v
+    end
+
+    def on_heredoc_beg(tok)
+      @stack.push @buf
+      buf = []
+      @buf << buf
+      @buf = buf
+      @buf.push Elem.new([lineno(), column()], __callee__, tok)
+    end
+
+    def on_heredoc_end(tok)
+      @buf.push Elem.new([lineno(), column()], __callee__, tok)
+      @buf = @stack.pop
+    end
+
     def _push_token(tok)
-      @buf.push [[lineno(), column()], __callee__, tok]
+      @buf.push Elem.new([lineno(), column()], __callee__, tok)
     end
 
-    SCANNER_EVENTS.each do |event|
-      alias_method "on_#{event}", :_push_token
+    (SCANNER_EVENTS.map {|event|:"on_#{event}"} - private_instance_methods(false)).each do |event|
+      alias_method event, :_push_token
     end
   end
 
Index: ext/ripper/lib/ripper/sexp.rb
===================================================================
--- ext/ripper/lib/ripper/sexp.rb	(revision 52915)
+++ ext/ripper/lib/ripper/sexp.rb	(revision 52916)
@@ -62,7 +62,35 @@ class Ripper https://github.com/ruby/ruby/blob/trunk/ext/ripper/lib/ripper/sexp.rb#L62
   class SexpBuilder < ::Ripper   #:nodoc:
     private
 
-    PARSER_EVENTS.each do |event|
+    def dedent_element(e, width)
+      if (n = dedent_string(e[1], width)) > 0
+        e[2][1] += n
+      end
+      e
+    end
+
+    def on_heredoc_dedent(val, width)
+      sub = proc do |cont|
+        cont.map! do |e|
+          if Array === e
+            case e[0]
+            when :@tstring_content
+              e = dedent_element(e, width)
+            when /_add\z/
+              e[1] = sub[e[1]]
+            end
+          elsif String === e
+            dedent_string(e, width)
+          end
+          e
+        end
+      end
+      sub[val]
+      val
+    end
+
+    events = private_instance_methods(false).grep(/\Aon_/) {$'.to_sym}
+    (PARSER_EVENTS - events).each do |event|
       module_eval(<<-End, __FILE__, __LINE__ + 1)
         def on_#{event}(*args)
           args.unshift :#{event}
@@ -83,6 +111,19 @@ class Ripper https://github.com/ruby/ruby/blob/trunk/ext/ripper/lib/ripper/sexp.rb#L111
   class SexpBuilderPP < SexpBuilder #:nodoc:
     private
 
+    def on_heredoc_dedent(val, width)
+      val.map! do |e|
+        next e if Symbol === e and /_content\z/ =~ e
+        if Array === e and e[0] == :@tstring_content
+          e = dedent_element(e, width)
+        elsif String === e
+          dedent_string(e, width)
+        end
+        e
+      end
+      val
+    end
+
     def _dispatch_event_new
       []
     end
Index: test/ruby/test_syntax.rb
===================================================================
--- test/ruby/test_syntax.rb	(revision 52915)
+++ test/ruby/test_syntax.rb	(revision 52916)
@@ -475,6 +475,94 @@ e" https://github.com/ruby/ruby/blob/trunk/test/ruby/test_syntax.rb#L475
     assert_equal(expected, actual, "#{Bug7559}: ")
   end
 
+  def test_dedented_heredoc_without_indentation
+    assert_equal(" y\nz\n", <<~eos)
+ y
+z
+    eos
+  end
+
+  def test_dedented_heredoc_with_indentation
+    assert_equal(" a\nb\n", <<~eos)
+     a
+    b
+    eos
+  end
+
+  def test_dedented_heredoc_with_blank_less_indented_line
+    # the blank line has two leading spaces
+    result = eval("<<~eos\n" \
+                  "    a\n" \
+                  "  \n" \
+                  "    b\n" \
+                  "    eos\n")
+    assert_equal("a\n\nb\n", result)
+  end
+
+  def test_dedented_heredoc_with_blank_less_indented_line_escaped
+    result = eval("<<~eos\n" \
+                  "    a\n" \
+                  "\\ \\ \n" \
+                  "    b\n" \
+                  "    eos\n")
+    assert_equal("    a\n  \n    b\n", result)
+  end
+
+  def test_dedented_heredoc_with_blank_more_indented_line
+    # the blank line has six leading spaces
+    result = eval("<<~eos\n" \
+                  "    a\n" \
+                  "      \n" \
+                  "    b\n" \
+                  "    eos\n")
+    assert_equal("a\n  \nb\n", result)
+  end
+
+  def test_dedented_heredoc_with_blank_more_indented_line_escaped
+    result = eval("<<~eos\n" \
+                  "    a\n" \
+                  "\\ \\ \\ \\ \\ \\ \n" \
+                  "    b\n" \
+                  "    eos\n")
+    assert_equal("    a\n      \n    b\n", result)
+  end
+
+  def test_dedented_heredoc_with_empty_line
+result = eval("<<~eos\n" \
+              "      This would contain specially formatted text.\n" \
+              "\n" \
+              "      That might span many lines\n" \
+              "    eos\n")
+    assert_equal(<<-eos, result)
+This would contain specially formatted text.
+
+That might span many lines
+    eos
+  end
+
+  def test_dedented_heredoc_with_interpolated_expression
+    result = eval(" <<~eos\n" \
+                  "  #{1}a\n" \
+                  " zy\n" \
+                  "      eos\n")
+      assert_equal(<<-eos, result)
+ #{1}a
+zy
+      eos
+  end
+
+  def test_dedented_heredoc_with_interpolated_string
+    w = ""
+    result = eval("<<~eos\n" \
+                  " \#{w} a\n" \
+                  "  zy\n" \
+                  "    eos\n")
+    assert_equal(<<-eos, result)
+#{w} a
+ zy
+    eos
+  end
+
   def test_lineno_after_heredoc
     bug7559 = '[ruby-dev:46737]'
     expected, _, actual = __LINE__, <<eom, __LINE__
Index: test/ripper/test_sexp.rb
===================================================================
--- test/ripper/test_sexp.rb	(revision 52915)
+++ test/ripper/test_sexp.rb	(revision 52916)
@@ -38,6 +38,27 @@ class TestRipper::Sexp < Test::Unit::Tes https://github.com/ruby/ruby/blob/trunk/test/ripper/test_sexp.rb#L38
     assert_equal "foo\n", search_sexp(:@tstring_content, sexp)[1]
   end
 
+  def test_squiggly_heredoc
+    sexp = Ripper.sexp("<<~eot\n      asdf\neot")
+    assert_equal "asdf\n", search_sexp(:@tstring_content, sexp)[1]
+  end
+
+  def test_squiggly_heredoc_with_interpolated_expression
+    sexp1 = Ripper.sexp(<<-eos)
+<<-eot
+a\#{1}z
+eot
+    eos
+
+    sexp2 = Ripper.sexp(<<-eos)
+<<~eot
+  a\#{1}z
+eot
+    eos
+
+    assert_equal clear_pos(sexp1), clear_pos(sexp2)
+  end
+
   def search_sexp(sym, sexp)
     return sexp if !sexp or sexp[0] == sym
     sexp.find do |e|
@@ -46,4 +67,18 @@ class TestRipper::Sexp < Test::Unit::Tes https://github.com/ruby/ruby/blob/trunk/test/ripper/test_sexp.rb#L67
       end
     end
   end
+
+  def clear_pos(sexp)
+    return sexp if !sexp
+    sexp.each do |e|
+      if Array === e
+        if e.size == 3 and Array === (last = e.last) and
+          last.size == 2 and Integer === last[0] and Integer === last[1]
+          last.clear
+        else
+          clear_pos(e)
+        end
+      end
+    end
+  end
 end if ripper_test
Index: test/ripper/test_parser_events.rb
===================================================================
--- test/ripper/test_parser_events.rb	(revision 52915)
+++ test/ripper/test_parser_events.rb	(revision 52916)
@@ -431,6 +431,19 @@ class TestRipper::ParserEvents < Test::U https://github.com/ruby/ruby/blob/trunk/test/ripper/test_parser_events.rb#L431
     assert_equal("heredoc1\nheredoc2\n", heredoc, bug1921)
   end
 
+  def test_heredoc_dedent
+    thru_heredoc_dedent = false
+    str = width = nil
+    tree = parse("<""<~EOS\n heredoc\nEOS\n", :on_heredoc_dedent) {|e, s, w|
+      thru_heredoc_dedent = true
+      str = s
+      width = w
+    }
+    assert_equal true, thru_heredoc_dedent
+    assert_match(/string_content\(\), heredoc\n/, tree)
+    assert_equal(1, width)
+  end
+
   def test_massign
     thru_massign = false
     parse("a, b = 1, 2", :on_massign) {thru_massign = true}

--
ML: ruby-changes@q...
Info: http://www.atdot.net/~ko1/quickml/

[前][次][番号順一覧][スレッド一覧]