ruby-changes:47778
From: nobu <ko1@a...>
Date: Thu, 14 Sep 2017 19:53:53 +0900 (JST)
Subject: [ruby-changes:47778] nobu:r59896 (trunk): ripper: add states of scanner
nobu 2017-09-14 19:53:47 +0900 (Thu, 14 Sep 2017) New Revision: 59896 https://svn.ruby-lang.org/cgi-bin/viewvc.cgi?view=revision&revision=59896 Log: ripper: add states of scanner * parse.y (ripper_state): add states of scanner to tokens from Ripper.lex and Ripper::Filter#on_*. based on the patch by aycabta (Code Ahss) at [ruby-core:81789]. [Feature #13686] * ext/ripper/tools/preproc.rb (prelude, usercode): generate EXPR_* constants from enums. Modified files: trunk/NEWS trunk/ext/ripper/lib/ripper/filter.rb trunk/ext/ripper/lib/ripper/lexer.rb trunk/ext/ripper/tools/preproc.rb trunk/parse.y trunk/test/ripper/test_filter.rb trunk/test/ripper/test_ripper.rb trunk/test/ripper/test_scanner_events.rb Index: test/ripper/test_ripper.rb =================================================================== --- test/ripper/test_ripper.rb (revision 59895) +++ test/ripper/test_ripper.rb (revision 59896) @@ -17,6 +17,10 @@ class TestRipper::Ripper < Test::Unit::T https://github.com/ruby/ruby/blob/trunk/test/ripper/test_ripper.rb#L17 assert_nil @ripper.column end + def test_state + assert_nil @ripper.state + end + def test_encoding assert_equal Encoding::UTF_8, @ripper.encoding ripper = Ripper.new('# coding: iso-8859-15') Index: test/ripper/test_filter.rb =================================================================== --- test/ripper/test_filter.rb (revision 59895) +++ test/ripper/test_filter.rb (revision 59896) @@ -15,6 +15,7 @@ class TestRipper::Filter < Test::Unit::T https://github.com/ruby/ruby/blob/trunk/test/ripper/test_filter.rb#L15 data[:filename] = filename rescue nil data[:lineno] = lineno data[:column] = column + data[:state] = state data[:token] = token end data @@ -75,6 +76,16 @@ class TestRipper::Filter < Test::Unit::T https://github.com/ruby/ruby/blob/trunk/test/ripper/test_filter.rb#L76 assert_equal(last_columns, filter.column) end + def test_filter_state + data = {} + src = File.read(filename) + filter = Filter.new(src) + assert_equal(nil, filter.state) + filter.parse(data) + assert_not_nil(data[:state]) + assert_not_nil(filter.state) + end + def test_filter_token data = {} filter = Filter.new("begin; puts 1; end") Index: test/ripper/test_scanner_events.rb =================================================================== --- test/ripper/test_scanner_events.rb (revision 59895) +++ test/ripper/test_scanner_events.rb (revision 59896) @@ -48,70 +48,70 @@ class TestRipper::ScannerEvents < Test:: https://github.com/ruby/ruby/blob/trunk/test/ripper/test_scanner_events.rb#L48 def test_lex assert_equal [], Ripper.lex('') - assert_equal [[[1,0], :on_ident, "a"]], + assert_equal [[[1,0], :on_ident, "a", Ripper::EXPR_CMDARG]], Ripper.lex('a') - assert_equal [[[1, 0], :on_kw, "nil"]], + assert_equal [[[1, 0], :on_kw, "nil", Ripper::EXPR_END]], Ripper.lex("nil") - assert_equal [[[1, 0], :on_kw, "def"], - [[1, 3], :on_sp, " "], - [[1, 4], :on_ident, "m"], - [[1, 5], :on_lparen, "("], - [[1, 6], :on_ident, "a"], - [[1, 7], :on_rparen, ")"], - [[1, 8], :on_kw, "end"]], + assert_equal [[[1, 0], :on_kw, "def", Ripper::EXPR_FNAME], + [[1, 3], :on_sp, " ", Ripper::EXPR_FNAME], + [[1, 4], :on_ident, "m", Ripper::EXPR_ENDFN], + [[1, 5], :on_lparen, "(", Ripper::EXPR_BEG | Ripper::EXPR_LABEL], + [[1, 6], :on_ident, "a", Ripper::EXPR_ARG], + [[1, 7], :on_rparen, ")", Ripper::EXPR_ENDFN], + [[1, 8], :on_kw, "end", Ripper::EXPR_END]], Ripper.lex("def m(a)end") - assert_equal [[[1, 0], :on_int, "1"], - [[1, 1], :on_nl, "\n"], - [[2, 0], :on_int, "2"], - [[2, 1], :on_nl, "\n"], - [[3, 0], :on_int, "3"]], + assert_equal [[[1, 0], :on_int, "1", Ripper::EXPR_END | Ripper::EXPR_ENDARG], + [[1, 1], :on_nl, "\n", Ripper::EXPR_BEG], + [[2, 0], :on_int, "2", Ripper::EXPR_END | Ripper::EXPR_ENDARG], + [[2, 1], :on_nl, "\n", Ripper::EXPR_BEG], + [[3, 0], :on_int, "3", Ripper::EXPR_END | Ripper::EXPR_ENDARG]], Ripper.lex("1\n2\n3") - assert_equal [[[1, 0], :on_heredoc_beg, "<<""EOS"], - [[1, 5], :on_nl, "\n"], - [[2, 0], :on_tstring_content, "heredoc\n"], - [[3, 0], :on_heredoc_end, "EOS"]], + assert_equal [[[1, 0], :on_heredoc_beg, "<<""EOS", Ripper::EXPR_BEG], + [[1, 5], :on_nl, "\n", Ripper::EXPR_BEG], + [[2, 0], :on_tstring_content, "heredoc\n", Ripper::EXPR_BEG], + [[3, 0], :on_heredoc_end, "EOS", Ripper::EXPR_BEG]], Ripper.lex("<<""EOS\nheredoc\nEOS") - assert_equal [[[1, 0], :on_heredoc_beg, "<<""EOS"], - [[1, 5], :on_nl, "\n"], - [[2, 0], :on_heredoc_end, "EOS"]], + assert_equal [[[1, 0], :on_heredoc_beg, "<<""EOS", Ripper::EXPR_BEG], + [[1, 5], :on_nl, "\n", Ripper::EXPR_BEG], + [[2, 0], :on_heredoc_end, "EOS", Ripper::EXPR_BEG]], Ripper.lex("<<""EOS\nEOS"), "bug#4543" - assert_equal [[[1, 0], :on_regexp_beg, "/"], - [[1, 1], :on_tstring_content, "foo\nbar"], - [[2, 3], :on_regexp_end, "/"]], + assert_equal [[[1, 0], :on_regexp_beg, "/", Ripper::EXPR_BEG], + [[1, 1], :on_tstring_content, "foo\nbar", Ripper::EXPR_BEG], + [[2, 3], :on_regexp_end, "/", Ripper::EXPR_BEG]], Ripper.lex("/foo\nbar/") - assert_equal [[[1, 0], :on_regexp_beg, "/"], - [[1, 1], :on_tstring_content, "foo\n\u3020"], - [[2, 3], :on_regexp_end, "/"]], + assert_equal [[[1, 0], :on_regexp_beg, "/", Ripper::EXPR_BEG], + [[1, 1], :on_tstring_content, "foo\n\u3020", Ripper::EXPR_BEG], + [[2, 3], :on_regexp_end, "/", Ripper::EXPR_BEG]], Ripper.lex("/foo\n\u3020/") - assert_equal [[[1, 0], :on_tstring_beg, "'"], - [[1, 1], :on_tstring_content, "foo\n\xe3\x80\xa0"], - [[2, 3], :on_tstring_end, "'"]], + assert_equal [[[1, 0], :on_tstring_beg, "'", Ripper::EXPR_BEG], + [[1, 1], :on_tstring_content, "foo\n\xe3\x80\xa0", Ripper::EXPR_BEG], + [[2, 3], :on_tstring_end, "'", Ripper::EXPR_END | Ripper::EXPR_ENDARG]], Ripper.lex("'foo\n\xe3\x80\xa0'") - assert_equal [[[1, 0], :on_tstring_beg, "'"], - [[1, 1], :on_tstring_content, "\u3042\n\u3044"], - [[2, 3], :on_tstring_end, "'"]], + assert_equal [[[1, 0], :on_tstring_beg, "'", Ripper::EXPR_BEG], + [[1, 1], :on_tstring_content, "\u3042\n\u3044", Ripper::EXPR_BEG], + [[2, 3], :on_tstring_end, "'", Ripper::EXPR_END | Ripper::EXPR_ENDARG]], Ripper.lex("'\u3042\n\u3044'") - assert_equal [[[1, 0], :on_rational, "1r"], - [[1, 2], :on_nl, "\n"], - [[2, 0], :on_imaginary, "2i"], - [[2, 2], :on_nl, "\n"], - [[3, 0], :on_imaginary, "3ri"], - [[3, 3], :on_nl, "\n"], - [[4, 0], :on_rational, "4.2r"], - [[4, 4], :on_nl, "\n"], - [[5, 0], :on_imaginary, "5.6ri"], + assert_equal [[[1, 0], :on_rational, "1r", Ripper::EXPR_END | Ripper::EXPR_ENDARG], + [[1, 2], :on_nl, "\n", Ripper::EXPR_BEG], + [[2, 0], :on_imaginary, "2i", Ripper::EXPR_END | Ripper::EXPR_ENDARG], + [[2, 2], :on_nl, "\n", Ripper::EXPR_BEG], + [[3, 0], :on_imaginary, "3ri", Ripper::EXPR_END | Ripper::EXPR_ENDARG], + [[3, 3], :on_nl, "\n", Ripper::EXPR_BEG], + [[4, 0], :on_rational, "4.2r", Ripper::EXPR_END | Ripper::EXPR_ENDARG], + [[4, 4], :on_nl, "\n", Ripper::EXPR_BEG], + [[5, 0], :on_imaginary, "5.6ri", Ripper::EXPR_END | Ripper::EXPR_ENDARG], ], Ripper.lex("1r\n2i\n3ri\n4.2r\n5.6ri") - assert_equal [[[1, 0], :on_heredoc_beg, "<<~EOS"], - [[1, 6], :on_nl, "\n"], - [[2, 0], :on_ignored_sp, " "], - [[2, 2], :on_tstring_content, "heredoc\n"], - [[3, 0], :on_heredoc_end, "EOS"] + assert_equal [[[1, 0], :on_heredoc_beg, "<<~EOS", Ripper::EXPR_BEG], + [[1, 6], :on_nl, "\n", Ripper::EXPR_BEG], + [[2, 0], :on_ignored_sp, " ", Ripper::EXPR_BEG], + [[2, 2], :on_tstring_content, "heredoc\n", Ripper::EXPR_BEG], + [[3, 0], :on_heredoc_end, "EOS", Ripper::EXPR_BEG] ], Ripper.lex("<<~EOS\n heredoc\nEOS") - assert_equal [[[1, 0], :on_tstring_beg, "'"], - [[1, 1], :on_tstring_content, "foo"]], + assert_equal [[[1, 0], :on_tstring_beg, "'", Ripper::EXPR_BEG], + [[1, 1], :on_tstring_content, "foo", Ripper::EXPR_BEG]], Ripper.lex("'foo") end Index: ext/ripper/lib/ripper/lexer.rb =================================================================== --- ext/ripper/lib/ripper/lexer.rb (revision 59895) +++ ext/ripper/lib/ripper/lexer.rb (revision 59896) @@ -23,29 +23,30 @@ class Ripper https://github.com/ruby/ruby/blob/trunk/ext/ripper/lib/ripper/lexer.rb#L23 end # Tokenizes the Ruby program and returns an array of an array, - # which is formatted like <code>[[lineno, column], type, token]</code>. + # which is formatted like + # <code>[[lineno, column], type, token, state]</code>. # # require 'ripper' # require 'pp' # # pp Ripper.lex("def m(a) nil end") - # #=> [[[1, 0], :on_kw, "def"], - # [[1, 3], :on_sp, " " ], - # [[1, 4], :on_ident, "m" ], - # [[1, 5], :on_lparen, "(" ], - # [[1, 6], :on_ident, "a" ], - # [[1, 7], :on_rparen, ")" ], - # [[1, 8], :on_sp, " " ], - # [[1, 9], :on_kw, "nil"], - # [[1, 12], :on_sp, " " ], - # [[1, 13], :on_kw, "end"]] + # #=> [[[1, 0], :on_kw, "def", Ripper::EXPR_FNAME ], + # [[1, 3], :on_sp, " ", Ripper::EXPR_FNAME ], + # [[1, 4], :on_ident, "m", Ripper::EXPR_ENDFN ], + # [[1, 5], :on_lparen, "(", Ripper::EXPR_LABEL | Ripper::EXPR_BEG], + # [[1, 6], :on_ident, "a", Ripper::EXPR_ARG ], + # [[1, 7], :on_rparen, ")", Ripper::EXPR_ENDFN ], + # [[1, 8], :on_sp, " ", Ripper::EXPR_BEG ], + # [[1, 9], :on_kw, "nil", Ripper::EXPR_END ], + # [[1, 12], :on_sp, " ", Ripper::EXPR_END ], + # [[1, 13], :on_kw, "end", Ripper::EXPR_END ]] # def Ripper.lex(src, filename = '-', lineno = 1) Lexer.new(src, filename, lineno).lex end class Lexer < ::Ripper #:nodoc: internal use only - Elem = Struct.new(:pos, :event, :tok) + Elem = Struct.new(:pos, :event, :tok, :state) def tokenize parse().sort_by(&:pos).map(&:tok) @@ -77,7 +78,7 @@ class Ripper https://github.com/ruby/ruby/blob/trunk/ext/ripper/lib/ripper/lexer.rb#L78 e.event = :on_ignored_sp next end - ignored_sp << [i, Elem.new(e.pos.dup, :on_ignored_sp, tok[0, n])] + ignored_sp << [i, Elem.new(e.pos.dup, :on_ignored_sp, tok[0, n], e.state)] e.pos[1] += n end end @@ -93,16 +94,16 @@ class Ripper https://github.com/ruby/ruby/blob/trunk/ext/ripper/lib/ripper/lexer.rb#L94 buf = [] @buf << buf @buf = buf - @buf.push Elem.new([lineno(), column()], __callee__, tok) + @buf.push Elem.new([lineno(), column()], __callee__, tok, state()) end def on_heredoc_end(tok) - @buf.push Elem.new([lineno(), column()], __callee__, tok) + @buf.push Elem.new([lineno(), column()], __callee__, tok, state()) @buf = @stack.pop end def _push_token(tok) - @buf.push Elem.new([lineno(), column()], __callee__, tok) + @buf.push Elem.new([lineno(), column()], __callee__, tok, state()) end (SCANNER_EVENTS.map {|event|:"on_#{event}"} - private_instance_methods(false)).each do |event| Index: ext/ripper/lib/ripper/filter.rb =================================================================== --- ext/ripper/lib/ripper/filter.rb (revision 59895) +++ ext/ripper/lib/ripper/filter.rb (revision 59896) @@ -25,6 +25,7 @@ class Ripper https://github.com/ruby/ruby/blob/trunk/ext/ripper/lib/ripper/filter.rb#L25 @__lexer = Lexer.new(src, filename, lineno) @__line = nil @__col = nil + @__state = nil end # The file name of the input. @@ -46,13 +47,20 @@ class Ripper https://github.com/ruby/ruby/blob/trunk/ext/ripper/lib/ripper/filter.rb#L47 @__col end + # The scanner's state of the current token. + # This value is the bitwise OR of zero or more of the +Ripper::EXPR_*+ constants. + def state + @__state + end + # Starts the parser. # +init+ is a data accumulator and is passed to the next event handler (as # of Enumerable#inject). def parse(init = nil) data = init - @__lexer.lex.each do |pos, event, tok| + @__lexer.lex.each do |pos, event, tok, state| @__line, @__col = *pos + @__state = state data = if respond_to?(event, true) then __send__(event, tok, data) else on_default(event, tok, data) Index: ext/ripper/tools/preproc.rb =================================================================== --- ext/ripper/tools/preproc.rb (revision 59895) +++ ext/ripper/tools/preproc.rb (revision 59896) @@ -40,6 +40,7 @@ def main https://github.com/ruby/ruby/blob/trunk/ext/ripper/tools/preproc.rb#L40 end def prelude(f, out) + @exprs = {} while line = f.gets case line when %r</\*%%%\*/> @@ -56,6 +57,16 @@ def prelude(f, out) https://github.com/ruby/ruby/blob/trunk/ext/ripper/tools/preproc.rb#L57 when /\A%type/ out << line.sub(/<\w+>/, '<val>') else + if (/^enum lex_state_(?:bits|e) \{/ =~ line)..(/^\}/ =~ line) + case line + when /^\s*(EXPR_\w+),\s+\/\*(.+)\*\// + @exprs[$1.chomp("_bit")] = $2.strip + when /^\s*(EXPR_\w+)\s+=\s+(.+)$/ + name = $1 + val = $2.chomp(",") + @exprs[name] = "equals to " + (val.start_with?("(") ? "<tt>#{val}</tt>" : "+#{val}+") + end + end out << line end end @@ -84,9 +95,12 @@ def grammar(f, out) https://github.com/ruby/ruby/blob/trunk/ext/ripper/tools/preproc.rb#L95 end def usercode(f, out) - while line = f.gets - out << line - end + require 'erb' + compiler = ERB::Compiler.new('%-') + compiler.put_cmd = compiler.insert_cmd = "out.<<" + lineno = f.lineno + src, = compiler.compile(f.read) + eval(src, binding, f.path, lineno) end main Index: parse.y =================================================================== --- parse.y (revision 59895) +++ parse.y (revision 59896) @@ -11446,6 +11446,25 @@ ripper_lineno(VALUE self) https://github.com/ruby/ruby/blob/trunk/parse.y#L11446 return INT2NUM(ruby_sourceline); } +/* + * call-seq: + * ripper.state -> Integer + * + * Return scanner state of current token. + */ +static VALUE +ripper_state(VALUE self) +{ + struct parser_params *parser; + + TypedData_Get_Struct(self, struct parser_params, &parser_data_type, parser); + if (!ripper_initialized_p(parser)) { + rb_raise(rb_eArgError, "method called for uninitialized object"); + } + if (NIL_P(parser->parsing_thread)) return Qnil; + return INT2NUM(lex_state); +} + #ifdef RIPPER_DEBUG /* :nodoc: */ static VALUE @@ -11493,6 +11512,7 @@ InitVM_ripper(void) https://github.com/ruby/ruby/blob/trunk/parse.y#L11512 rb_define_method(Ripper, "column", ripper_column, 0); rb_define_method(Ripper, "filename", ripper_filename, 0); rb_define_method(Ripper, "lineno", ripper_lineno, 0); + rb_define_method(Ripper, "state", ripper_state, 0); rb_define_method(Ripper, "end_seen?", rb_parser_end_seen_p, 0); rb_define_method(Ripper, "encoding", rb_parser_encoding, 0); rb_define_method(Ripper, "yydebug", rb_parser_get_yydebug, 0); @@ -11507,6 +11527,10 @@ InitVM_ripper(void) https://github.com/ruby/ruby/blob/trunk/parse.y#L11527 rb_define_singleton_method(Ripper, "dedent_string", parser_dedent_string, 2); rb_define_private_method(Ripper, "dedent_string", parser_dedent_string, 2); +<% @exprs.each do |expr, desc| -%> + /* <%=desc%> */ + rb_define_const(Ripper, "<%=expr%>", INT2NUM(<%=expr%>)); +<% end %> ripper_init_eventids1_table(Ripper); ripper_init_eventids2_table(Ripper); Index: NEWS =================================================================== --- NEWS (revision 59895) +++ NEWS (revision 59896) @@ -133,6 +133,13 @@ with all sufficient information, see the https://github.com/ruby/ruby/blob/trunk/NEWS#L133 * New constants: * RbConfig::LIMITS is added to provide the limits of C types. +* Ripper + * New method: + * Ripper#state is added to tell the state of scanner. [Feature #13686] + + * New constants: + * Ripper::EXPR_BEG and so on for Ripper#state. + * Set * Add Set#to_s as alias to #inspect [Feature #13676] -- ML: ruby-changes@q... Info: http://www.atdot.net/~ko1/quickml/