[前][次][番号順一覧][スレッド一覧]

ruby-changes:47778

From: nobu <ko1@a...>
Date: Thu, 14 Sep 2017 19:53:53 +0900 (JST)
Subject: [ruby-changes:47778] nobu:r59896 (trunk): ripper: add states of scanner

nobu	2017-09-14 19:53:47 +0900 (Thu, 14 Sep 2017)

  New Revision: 59896

  https://svn.ruby-lang.org/cgi-bin/viewvc.cgi?view=revision&revision=59896

  Log:
    ripper: add states of scanner
    
    * parse.y (ripper_state): add states of scanner to tokens from
      Ripper.lex and Ripper::Filter#on_*.  based on the patch by
      aycabta (Code Ahss) at [ruby-core:81789].  [Feature #13686]
    
    * ext/ripper/tools/preproc.rb (prelude, usercode): generate EXPR_*
      constants from enums.

  Modified files:
    trunk/NEWS
    trunk/ext/ripper/lib/ripper/filter.rb
    trunk/ext/ripper/lib/ripper/lexer.rb
    trunk/ext/ripper/tools/preproc.rb
    trunk/parse.y
    trunk/test/ripper/test_filter.rb
    trunk/test/ripper/test_ripper.rb
    trunk/test/ripper/test_scanner_events.rb
Index: test/ripper/test_ripper.rb
===================================================================
--- test/ripper/test_ripper.rb	(revision 59895)
+++ test/ripper/test_ripper.rb	(revision 59896)
@@ -17,6 +17,10 @@ class TestRipper::Ripper < Test::Unit::T https://github.com/ruby/ruby/blob/trunk/test/ripper/test_ripper.rb#L17
     assert_nil @ripper.column
   end
 
+  def test_state
+    assert_nil @ripper.state
+  end
+
   def test_encoding
     assert_equal Encoding::UTF_8, @ripper.encoding
     ripper = Ripper.new('# coding: iso-8859-15')
Index: test/ripper/test_filter.rb
===================================================================
--- test/ripper/test_filter.rb	(revision 59895)
+++ test/ripper/test_filter.rb	(revision 59896)
@@ -15,6 +15,7 @@ class TestRipper::Filter < Test::Unit::T https://github.com/ruby/ruby/blob/trunk/test/ripper/test_filter.rb#L15
         data[:filename] = filename rescue nil
         data[:lineno] = lineno
         data[:column] = column
+        data[:state] = state
         data[:token] = token
       end
       data
@@ -75,6 +76,16 @@ class TestRipper::Filter < Test::Unit::T https://github.com/ruby/ruby/blob/trunk/test/ripper/test_filter.rb#L76
     assert_equal(last_columns, filter.column)
   end
 
+  def test_filter_state
+    data = {}
+    src = File.read(filename)
+    filter = Filter.new(src)
+    assert_equal(nil, filter.state)
+    filter.parse(data)
+    assert_not_nil(data[:state])
+    assert_not_nil(filter.state)
+  end
+
   def test_filter_token
     data = {}
     filter = Filter.new("begin; puts 1; end")
Index: test/ripper/test_scanner_events.rb
===================================================================
--- test/ripper/test_scanner_events.rb	(revision 59895)
+++ test/ripper/test_scanner_events.rb	(revision 59896)
@@ -48,70 +48,70 @@ class TestRipper::ScannerEvents < Test:: https://github.com/ruby/ruby/blob/trunk/test/ripper/test_scanner_events.rb#L48
   def test_lex
     assert_equal [],
                  Ripper.lex('')
-    assert_equal [[[1,0], :on_ident, "a"]],
+    assert_equal [[[1,0], :on_ident, "a", Ripper::EXPR_CMDARG]],
                  Ripper.lex('a')
-    assert_equal [[[1, 0], :on_kw, "nil"]],
+    assert_equal [[[1, 0], :on_kw, "nil", Ripper::EXPR_END]],
                  Ripper.lex("nil")
-    assert_equal [[[1, 0], :on_kw, "def"],
-                  [[1, 3], :on_sp, " "],
-                  [[1, 4], :on_ident, "m"],
-                  [[1, 5], :on_lparen, "("],
-                  [[1, 6], :on_ident, "a"],
-                  [[1, 7], :on_rparen, ")"],
-                  [[1, 8], :on_kw, "end"]],
+    assert_equal [[[1, 0], :on_kw, "def", Ripper::EXPR_FNAME],
+                  [[1, 3], :on_sp, " ", Ripper::EXPR_FNAME],
+                  [[1, 4], :on_ident, "m", Ripper::EXPR_ENDFN],
+                  [[1, 5], :on_lparen, "(", Ripper::EXPR_BEG | Ripper::EXPR_LABEL],
+                  [[1, 6], :on_ident, "a", Ripper::EXPR_ARG],
+                  [[1, 7], :on_rparen, ")", Ripper::EXPR_ENDFN],
+                  [[1, 8], :on_kw, "end", Ripper::EXPR_END]],
                  Ripper.lex("def m(a)end")
-    assert_equal [[[1, 0], :on_int, "1"],
-                  [[1, 1], :on_nl, "\n"],
-                  [[2, 0], :on_int, "2"],
-                  [[2, 1], :on_nl, "\n"],
-                  [[3, 0], :on_int, "3"]],
+    assert_equal [[[1, 0], :on_int, "1", Ripper::EXPR_END | Ripper::EXPR_ENDARG],
+                  [[1, 1], :on_nl, "\n", Ripper::EXPR_BEG],
+                  [[2, 0], :on_int, "2", Ripper::EXPR_END | Ripper::EXPR_ENDARG],
+                  [[2, 1], :on_nl, "\n", Ripper::EXPR_BEG],
+                  [[3, 0], :on_int, "3", Ripper::EXPR_END | Ripper::EXPR_ENDARG]],
                  Ripper.lex("1\n2\n3")
-    assert_equal [[[1, 0], :on_heredoc_beg, "<<""EOS"],
-                  [[1, 5], :on_nl, "\n"],
-                  [[2, 0], :on_tstring_content, "heredoc\n"],
-                  [[3, 0], :on_heredoc_end, "EOS"]],
+    assert_equal [[[1, 0], :on_heredoc_beg, "<<""EOS", Ripper::EXPR_BEG],
+                  [[1, 5], :on_nl, "\n", Ripper::EXPR_BEG],
+                  [[2, 0], :on_tstring_content, "heredoc\n", Ripper::EXPR_BEG],
+                  [[3, 0], :on_heredoc_end, "EOS", Ripper::EXPR_BEG]],
                  Ripper.lex("<<""EOS\nheredoc\nEOS")
-    assert_equal [[[1, 0], :on_heredoc_beg, "<<""EOS"],
-                  [[1, 5], :on_nl, "\n"],
-                  [[2, 0], :on_heredoc_end, "EOS"]],
+    assert_equal [[[1, 0], :on_heredoc_beg, "<<""EOS", Ripper::EXPR_BEG],
+                  [[1, 5], :on_nl, "\n", Ripper::EXPR_BEG],
+                  [[2, 0], :on_heredoc_end, "EOS", Ripper::EXPR_BEG]],
                  Ripper.lex("<<""EOS\nEOS"),
                  "bug#4543"
-    assert_equal [[[1, 0], :on_regexp_beg, "/"],
-                  [[1, 1], :on_tstring_content, "foo\nbar"],
-                  [[2, 3], :on_regexp_end, "/"]],
+    assert_equal [[[1, 0], :on_regexp_beg, "/", Ripper::EXPR_BEG],
+                  [[1, 1], :on_tstring_content, "foo\nbar", Ripper::EXPR_BEG],
+                  [[2, 3], :on_regexp_end, "/", Ripper::EXPR_BEG]],
                  Ripper.lex("/foo\nbar/")
-    assert_equal [[[1, 0], :on_regexp_beg, "/"],
-                  [[1, 1], :on_tstring_content, "foo\n\u3020"],
-                  [[2, 3], :on_regexp_end, "/"]],
+    assert_equal [[[1, 0], :on_regexp_beg, "/", Ripper::EXPR_BEG],
+                  [[1, 1], :on_tstring_content, "foo\n\u3020", Ripper::EXPR_BEG],
+                  [[2, 3], :on_regexp_end, "/", Ripper::EXPR_BEG]],
                  Ripper.lex("/foo\n\u3020/")
-    assert_equal [[[1, 0], :on_tstring_beg, "'"],
-                  [[1, 1], :on_tstring_content, "foo\n\xe3\x80\xa0"],
-                  [[2, 3], :on_tstring_end, "'"]],
+    assert_equal [[[1, 0], :on_tstring_beg, "'", Ripper::EXPR_BEG],
+                  [[1, 1], :on_tstring_content, "foo\n\xe3\x80\xa0", Ripper::EXPR_BEG],
+                  [[2, 3], :on_tstring_end, "'", Ripper::EXPR_END | Ripper::EXPR_ENDARG]],
                  Ripper.lex("'foo\n\xe3\x80\xa0'")
-    assert_equal [[[1, 0], :on_tstring_beg, "'"],
-                  [[1, 1], :on_tstring_content, "\u3042\n\u3044"],
-                  [[2, 3], :on_tstring_end, "'"]],
+    assert_equal [[[1, 0], :on_tstring_beg, "'", Ripper::EXPR_BEG],
+                  [[1, 1], :on_tstring_content, "\u3042\n\u3044", Ripper::EXPR_BEG],
+                  [[2, 3], :on_tstring_end, "'", Ripper::EXPR_END | Ripper::EXPR_ENDARG]],
                  Ripper.lex("'\u3042\n\u3044'")
-    assert_equal [[[1, 0], :on_rational, "1r"],
-                  [[1, 2], :on_nl, "\n"],
-                  [[2, 0], :on_imaginary, "2i"],
-                  [[2, 2], :on_nl, "\n"],
-                  [[3, 0], :on_imaginary, "3ri"],
-                  [[3, 3], :on_nl, "\n"],
-                  [[4, 0], :on_rational, "4.2r"],
-                  [[4, 4], :on_nl, "\n"],
-                  [[5, 0], :on_imaginary, "5.6ri"],
+    assert_equal [[[1, 0], :on_rational, "1r", Ripper::EXPR_END | Ripper::EXPR_ENDARG],
+                  [[1, 2], :on_nl, "\n", Ripper::EXPR_BEG],
+                  [[2, 0], :on_imaginary, "2i", Ripper::EXPR_END | Ripper::EXPR_ENDARG],
+                  [[2, 2], :on_nl, "\n", Ripper::EXPR_BEG],
+                  [[3, 0], :on_imaginary, "3ri", Ripper::EXPR_END | Ripper::EXPR_ENDARG],
+                  [[3, 3], :on_nl, "\n", Ripper::EXPR_BEG],
+                  [[4, 0], :on_rational, "4.2r", Ripper::EXPR_END | Ripper::EXPR_ENDARG],
+                  [[4, 4], :on_nl, "\n", Ripper::EXPR_BEG],
+                  [[5, 0], :on_imaginary, "5.6ri", Ripper::EXPR_END | Ripper::EXPR_ENDARG],
                  ],
                  Ripper.lex("1r\n2i\n3ri\n4.2r\n5.6ri")
-     assert_equal [[[1, 0], :on_heredoc_beg, "<<~EOS"],
-                   [[1, 6], :on_nl, "\n"],
-                   [[2, 0], :on_ignored_sp, "  "],
-                   [[2, 2], :on_tstring_content, "heredoc\n"],
-                   [[3, 0], :on_heredoc_end, "EOS"]
+     assert_equal [[[1, 0], :on_heredoc_beg, "<<~EOS", Ripper::EXPR_BEG],
+                   [[1, 6], :on_nl, "\n", Ripper::EXPR_BEG],
+                   [[2, 0], :on_ignored_sp, "  ", Ripper::EXPR_BEG],
+                   [[2, 2], :on_tstring_content, "heredoc\n", Ripper::EXPR_BEG],
+                   [[3, 0], :on_heredoc_end, "EOS", Ripper::EXPR_BEG]
                  ],
                  Ripper.lex("<<~EOS\n  heredoc\nEOS")
-    assert_equal [[[1, 0], :on_tstring_beg, "'"],
-                  [[1, 1], :on_tstring_content, "foo"]],
+    assert_equal [[[1, 0], :on_tstring_beg, "'", Ripper::EXPR_BEG],
+                  [[1, 1], :on_tstring_content, "foo", Ripper::EXPR_BEG]],
                  Ripper.lex("'foo")
   end
 
Index: ext/ripper/lib/ripper/lexer.rb
===================================================================
--- ext/ripper/lib/ripper/lexer.rb	(revision 59895)
+++ ext/ripper/lib/ripper/lexer.rb	(revision 59896)
@@ -23,29 +23,30 @@ class Ripper https://github.com/ruby/ruby/blob/trunk/ext/ripper/lib/ripper/lexer.rb#L23
   end
 
   # Tokenizes the Ruby program and returns an array of an array,
-  # which is formatted like <code>[[lineno, column], type, token]</code>.
+  # which is formatted like
+  # <code>[[lineno, column], type, token, state]</code>.
   #
   #   require 'ripper'
   #   require 'pp'
   #
   #   pp Ripper.lex("def m(a) nil end")
-  #     #=> [[[1,  0], :on_kw,     "def"],
-  #          [[1,  3], :on_sp,     " "  ],
-  #          [[1,  4], :on_ident,  "m"  ],
-  #          [[1,  5], :on_lparen, "("  ],
-  #          [[1,  6], :on_ident,  "a"  ],
-  #          [[1,  7], :on_rparen, ")"  ],
-  #          [[1,  8], :on_sp,     " "  ],
-  #          [[1,  9], :on_kw,     "nil"],
-  #          [[1, 12], :on_sp,     " "  ],
-  #          [[1, 13], :on_kw,     "end"]]
+  #   #=> [[[1,  0], :on_kw,     "def", Ripper::EXPR_FNAME                   ],
+  #        [[1,  3], :on_sp,     " ",   Ripper::EXPR_FNAME                   ],
+  #        [[1,  4], :on_ident,  "m",   Ripper::EXPR_ENDFN                   ],
+  #        [[1,  5], :on_lparen, "(",   Ripper::EXPR_LABEL | Ripper::EXPR_BEG],
+  #        [[1,  6], :on_ident,  "a",   Ripper::EXPR_ARG                     ],
+  #        [[1,  7], :on_rparen, ")",   Ripper::EXPR_ENDFN                   ],
+  #        [[1,  8], :on_sp,     " ",   Ripper::EXPR_BEG                     ],
+  #        [[1,  9], :on_kw,     "nil", Ripper::EXPR_END                     ],
+  #        [[1, 12], :on_sp,     " ",   Ripper::EXPR_END                     ],
+  #        [[1, 13], :on_kw,     "end", Ripper::EXPR_END                     ]]
   #
   def Ripper.lex(src, filename = '-', lineno = 1)
     Lexer.new(src, filename, lineno).lex
   end
 
   class Lexer < ::Ripper   #:nodoc: internal use only
-    Elem = Struct.new(:pos, :event, :tok)
+    Elem = Struct.new(:pos, :event, :tok, :state)
 
     def tokenize
       parse().sort_by(&:pos).map(&:tok)
@@ -77,7 +78,7 @@ class Ripper https://github.com/ruby/ruby/blob/trunk/ext/ripper/lib/ripper/lexer.rb#L78
               e.event = :on_ignored_sp
               next
             end
-            ignored_sp << [i, Elem.new(e.pos.dup, :on_ignored_sp, tok[0, n])]
+            ignored_sp << [i, Elem.new(e.pos.dup, :on_ignored_sp, tok[0, n], e.state)]
             e.pos[1] += n
           end
         end
@@ -93,16 +94,16 @@ class Ripper https://github.com/ruby/ruby/blob/trunk/ext/ripper/lib/ripper/lexer.rb#L94
       buf = []
       @buf << buf
       @buf = buf
-      @buf.push Elem.new([lineno(), column()], __callee__, tok)
+      @buf.push Elem.new([lineno(), column()], __callee__, tok, state())
     end
 
     def on_heredoc_end(tok)
-      @buf.push Elem.new([lineno(), column()], __callee__, tok)
+      @buf.push Elem.new([lineno(), column()], __callee__, tok, state())
       @buf = @stack.pop
     end
 
     def _push_token(tok)
-      @buf.push Elem.new([lineno(), column()], __callee__, tok)
+      @buf.push Elem.new([lineno(), column()], __callee__, tok, state())
     end
 
     (SCANNER_EVENTS.map {|event|:"on_#{event}"} - private_instance_methods(false)).each do |event|
Index: ext/ripper/lib/ripper/filter.rb
===================================================================
--- ext/ripper/lib/ripper/filter.rb	(revision 59895)
+++ ext/ripper/lib/ripper/filter.rb	(revision 59896)
@@ -25,6 +25,7 @@ class Ripper https://github.com/ruby/ruby/blob/trunk/ext/ripper/lib/ripper/filter.rb#L25
       @__lexer = Lexer.new(src, filename, lineno)
       @__line = nil
       @__col = nil
+      @__state = nil
     end
 
     # The file name of the input.
@@ -46,13 +47,20 @@ class Ripper https://github.com/ruby/ruby/blob/trunk/ext/ripper/lib/ripper/filter.rb#L47
       @__col
     end
 
+    # The scanner's state of the current token.
+    # This value is the bitwise OR of zero or more of the +Ripper::EXPR_*+ constants.
+    def state
+      @__state
+    end
+
     # Starts the parser.
     # +init+ is a data accumulator and is passed to the next event handler (as
     # of Enumerable#inject).
     def parse(init = nil)
       data = init
-      @__lexer.lex.each do |pos, event, tok|
+      @__lexer.lex.each do |pos, event, tok, state|
         @__line, @__col = *pos
+        @__state = state
         data = if respond_to?(event, true)
                then __send__(event, tok, data)
                else on_default(event, tok, data)
Index: ext/ripper/tools/preproc.rb
===================================================================
--- ext/ripper/tools/preproc.rb	(revision 59895)
+++ ext/ripper/tools/preproc.rb	(revision 59896)
@@ -40,6 +40,7 @@ def main https://github.com/ruby/ruby/blob/trunk/ext/ripper/tools/preproc.rb#L40
 end
 
 def prelude(f, out)
+  @exprs = {}
   while line = f.gets
     case line
     when %r</\*%%%\*/>
@@ -56,6 +57,16 @@ def prelude(f, out) https://github.com/ruby/ruby/blob/trunk/ext/ripper/tools/preproc.rb#L57
     when /\A%type/
       out << line.sub(/<\w+>/, '<val>')
     else
+      if (/^enum lex_state_(?:bits|e) \{/ =~ line)..(/^\}/ =~ line)
+        case line
+        when /^\s*(EXPR_\w+),\s+\/\*(.+)\*\//
+          @exprs[$1.chomp("_bit")] = $2.strip
+        when /^\s*(EXPR_\w+)\s+=\s+(.+)$/
+          name = $1
+          val = $2.chomp(",")
+          @exprs[name] = "equals to " + (val.start_with?("(") ? "<tt>#{val}</tt>" : "+#{val}+")
+        end
+      end
       out << line
     end
   end
@@ -84,9 +95,12 @@ def grammar(f, out) https://github.com/ruby/ruby/blob/trunk/ext/ripper/tools/preproc.rb#L95
 end
 
 def usercode(f, out)
-  while line = f.gets
-    out << line
-  end
+  require 'erb'
+  compiler = ERB::Compiler.new('%-')
+  compiler.put_cmd = compiler.insert_cmd = "out.<<"
+  lineno = f.lineno
+  src, = compiler.compile(f.read)
+  eval(src, binding, f.path, lineno)
 end
 
 main
Index: parse.y
===================================================================
--- parse.y	(revision 59895)
+++ parse.y	(revision 59896)
@@ -11446,6 +11446,25 @@ ripper_lineno(VALUE self) https://github.com/ruby/ruby/blob/trunk/parse.y#L11446
     return INT2NUM(ruby_sourceline);
 }
 
+/*
+ *  call-seq:
+ *    ripper.state   -> Integer
+ *
+ *  Return scanner state of current token.
+ */
+static VALUE
+ripper_state(VALUE self)
+{
+    struct parser_params *parser;
+
+    TypedData_Get_Struct(self, struct parser_params, &parser_data_type, parser);
+    if (!ripper_initialized_p(parser)) {
+	rb_raise(rb_eArgError, "method called for uninitialized object");
+    }
+    if (NIL_P(parser->parsing_thread)) return Qnil;
+    return INT2NUM(lex_state);
+}
+
 #ifdef RIPPER_DEBUG
 /* :nodoc: */
 static VALUE
@@ -11493,6 +11512,7 @@ InitVM_ripper(void) https://github.com/ruby/ruby/blob/trunk/parse.y#L11512
     rb_define_method(Ripper, "column", ripper_column, 0);
     rb_define_method(Ripper, "filename", ripper_filename, 0);
     rb_define_method(Ripper, "lineno", ripper_lineno, 0);
+    rb_define_method(Ripper, "state", ripper_state, 0);
     rb_define_method(Ripper, "end_seen?", rb_parser_end_seen_p, 0);
     rb_define_method(Ripper, "encoding", rb_parser_encoding, 0);
     rb_define_method(Ripper, "yydebug", rb_parser_get_yydebug, 0);
@@ -11507,6 +11527,10 @@ InitVM_ripper(void) https://github.com/ruby/ruby/blob/trunk/parse.y#L11527
     rb_define_singleton_method(Ripper, "dedent_string", parser_dedent_string, 2);
     rb_define_private_method(Ripper, "dedent_string", parser_dedent_string, 2);
 
+<% @exprs.each do |expr, desc| -%>
+    /* <%=desc%> */
+    rb_define_const(Ripper, "<%=expr%>", INT2NUM(<%=expr%>));
+<% end %>
     ripper_init_eventids1_table(Ripper);
     ripper_init_eventids2_table(Ripper);
 
Index: NEWS
===================================================================
--- NEWS	(revision 59895)
+++ NEWS	(revision 59896)
@@ -133,6 +133,13 @@ with all sufficient information, see the https://github.com/ruby/ruby/blob/trunk/NEWS#L133
   * New constants:
     * RbConfig::LIMITS is added to provide the limits of C types.
 
+* Ripper
+  * New method:
+    * Ripper#state is added to tell the state of scanner. [Feature #13686]
+
+  * New constants:
+    * Ripper::EXPR_BEG and so on for Ripper#state.
+
 * Set
   * Add Set#to_s as alias to #inspect [Feature #13676]
 

--
ML: ruby-changes@q...
Info: http://www.atdot.net/~ko1/quickml/

[前][次][番号順一覧][スレッド一覧]