ruby-changes:57060
From: Yusuke <ko1@a...>
Date: Fri, 16 Aug 2019 06:22:34 +0900 (JST)
Subject: [ruby-changes:57060] Yusuke Endoh: f71bd7477e (master): RDoc::Parser::C: Integrate do_classes and do_modules by one regexp match
https://git.ruby-lang.org/ruby.git/commit/?id=f71bd7477e From f71bd7477e84eb1cd10fa27e79b1e081ee51793a Mon Sep 17 00:00:00 2001 From: Yusuke Endoh <mame@r...> Date: Wed, 7 Aug 2019 02:14:39 +0900 Subject: RDoc::Parser::C: Integrate do_classes and do_modules by one regexp match The full scan of the C source code (`@content.scan`) is very slow. The old code invokes the scan six times in `do_classes` and `do_modules`. This change integrates the six scans into one by merging the regexps. The integrated regexp is a bit hard to maintain, but the speed up is significant: approx. 30 sec -> 20 sec in Ruby's `make rdoc`. In addition, this change omits `do_boot_defclass` unless the file name is `class.c`. `boot_defclass` is too specific to Ruby's source code, so RDoc should handle it as a special case. Before this change: TOTAL (pct) SAMPLES (pct) FRAME 858 (13.6%) 858 (13.6%) (garbage collection) 292 (4.6%) 264 (4.2%) RDoc::Parser::C#do_define_class 263 (4.2%) 250 (3.9%) RDoc::Parser::C#do_define_module 275 (4.3%) 241 (3.8%) RDoc::Parser::C#do_define_class_under 248 (3.9%) 237 (3.7%) RDoc::Parser::C#do_define_module_under 234 (3.7%) 234 (3.7%) RDoc::Parser::C#gen_body_table 219 (3.5%) 219 (3.5%) Ripper::Lexer#state_obj 217 (3.4%) 216 (3.4%) RDoc::Parser::C#do_struct_define_without_accessor 205 (3.2%) 205 (3.2%) RDoc::Parser::C#do_boot_defclass 205 (3.2%) 205 (3.2%) RDoc::Parser::C#do_singleton_class The six methods take approx. 22.2%. `do_define_class` (4.2%) + `do_define_class_under` (3.8%) + `do_define_module` (3,9$) + `do_define_module_under` (3.7%) + `do_struct_define_without_accessor` (3.4%) + `do_singleton_class` (3.2%) After this change, the methods are integrated to `do_classes_and_modules` which takes only 5.8%. TOTAL (pct) SAMPLES (pct) FRAME 812 (16.7%) 812 (16.7%) (garbage collection) 355 (7.3%) 284 (5.8%) RDoc::Parser::C#do_classes_and_modules 225 (4.6%) 225 (4.6%) RDoc::Parser::C#gen_body_table 429 (8.8%) 210 (4.3%) RDoc::Parser::RubyTools#get_tk 208 (4.3%) 208 (4.3%) RDoc::TokenStream#add_tokens diff --git a/lib/rdoc/parser/c.rb b/lib/rdoc/parser/c.rb index 5cc009e..8265712 100644 --- a/lib/rdoc/parser/c.rb +++ b/lib/rdoc/parser/c.rb @@ -324,12 +324,100 @@ class RDoc::Parser::C < RDoc::Parser https://github.com/ruby/ruby/blob/trunk/lib/rdoc/parser/c.rb#L324 # Scans #content for rb_define_class, boot_defclass, rb_define_class_under # and rb_singleton_class - def do_classes - do_boot_defclass - do_define_class - do_define_class_under - do_singleton_class - do_struct_define_without_accessor + def do_classes_and_modules + do_boot_defclass if @file_name == "class.c" + + @content.scan( + %r( + (?<var_name>[\w\.]+)\s* = + \s*rb_(?: + define_(?: + class(?: # rb_define_class(class_name_1, parent_name_1) + \s*\( + \s*"(?<class_name_1>\w+)", + \s*(?<parent_name_1>\w+)\s* + \) + | + _under\s*\( # rb_define_class_under(class_under, class_name2, parent_name2...) + \s* (?<class_under>\w+), + \s* "(?<class_name_2>\w+)", + \s* + (?: + (?<parent_name_2>[\w\*\s\(\)\.\->]+) | + rb_path2class\("(?<path>[\w:]+)"\) + ) + \s*\) + ) + | + module(?: # rb_define_module(module_name_1) + \s*\( + \s*"(?<module_name_1>\w+)"\s* + \) + | + _under\s*\( # rb_define_module_under(module_under, module_name_1) + \s*(?<module_under>\w+), + \s*"(?<module_name_2>\w+)" + \s*\) + ) + ) + | + struct_define_without_accessor\s*\( # rb_struct_define_without_accessor(class_name_3, parent_name_3, ...) + \s*"(?<class_name_3>\w+)", + \s*(?<parent_name_3>\w+), + \s*\w+, # Allocation function + (?:\s*"\w+",)* # Attributes + \s*NULL + \) + | + singleton_class\s*\( # rb_singleton_class(target_class_name) + \s*(?<target_class_name>\w+) + \) + ) + )mx + ) do + class_name = $~[:class_name_1] + type = :class + if class_name + # rb_define_class(class_name_1, parent_name_1) + parent_name = $~[:parent_name_1] + #under = nil + else + class_name = $~[:class_name_2] + if class_name + # rb_define_class_under(class_under, class_name2, parent_name2...) + parent_name = $~[:parent_name_2] || $~[:path] + under = $~[:class_under] + else + class_name = $~[:class_name_3] + if class_name + # rb_struct_define_without_accessor(class_name_3, parent_name_3, ...) + parent_name = $~[:parent_name_3] + #under = nil + else + type = :module + class_name = $~[:module_name_1] + #parent_name = nil + if class_name + # rb_define_module(module_name_1) + #under = nil + else + class_name = $~[:module_name_2] + if class_name + # rb_define_module_under(module_under, module_name_1) + under = $~[:module_under] + else + # rb_singleton_class(target_class_name) + target_class_name = $~[:target_class_name] + handle_singleton $~[:var_name], target_class_name + next + end + end + end + end + end + + handle_class_module($~[:var_name], type, class_name, parent_name, under) + end end ## @@ -378,65 +466,6 @@ class RDoc::Parser::C < RDoc::Parser https://github.com/ruby/ruby/blob/trunk/lib/rdoc/parser/c.rb#L466 end end - ## - # Scans #content for rb_define_class - - def do_define_class - # The '.' lets us handle SWIG-generated files - @content.scan(/([\w\.]+)\s* = \s*rb_define_class\s* - \( - \s*"(\w+)", - \s*(\w+)\s* - \)/mx) do |var_name, class_name, parent| - handle_class_module(var_name, :class, class_name, parent, nil) - end - end - - ## - # Scans #content for rb_define_class_under - - def do_define_class_under - @content.scan(/([\w\.]+)\s* = # var_name - \s*rb_define_class_under\s* - \( - \s* (\w+), # under - \s* "(\w+)", # class_name - \s* - (?: - ([\w\*\s\(\)\.\->]+) | # parent_name - rb_path2class\("([\w:]+)"\) # path - ) - \s* - \) - /mx) do |var_name, under, class_name, parent_name, path| - parent = path || parent_name - - handle_class_module var_name, :class, class_name, parent, under - end - end - - ## - # Scans #content for rb_define_module - - def do_define_module - @content.scan(/(\w+)\s* = \s*rb_define_module\s*\(\s*"(\w+)"\s*\)/mx) do - |var_name, class_name| - handle_class_module(var_name, :module, class_name, nil, nil) - end - end - - ## - # Scans #content for rb_define_module_under - - def do_define_module_under - @content.scan(/(\w+)\s* = \s*rb_define_module_under\s* - \( - \s*(\w+), - \s*"(\w+)" - \s*\)/mx) do |var_name, in_module, class_name| - handle_class_module(var_name, :module, class_name, nil, in_module) - end - end ## # Scans #content for rb_include_module @@ -519,42 +548,6 @@ class RDoc::Parser::C < RDoc::Parser https://github.com/ruby/ruby/blob/trunk/lib/rdoc/parser/c.rb#L548 end ## - # Scans #content for rb_define_module and rb_define_module_under - - def do_modules - do_define_module - do_define_module_under - end - - ## - # Scans #content for rb_singleton_class - - def do_singleton_class - @content.scan(/([\w\.]+)\s* = \s*rb_singleton_class\s* - \( - \s*(\w+) - \s*\)/mx) do |sclass_var, class_var| - handle_singleton sclass_var, class_var - end - end - - ## - # Scans #content for struct_define_without_accessor - - def do_struct_define_without_accessor - @content.scan(/([\w\.]+)\s* = \s*rb_struct_define_without_accessor\s* - \( - \s*"(\w+)", # Class name - \s*(\w+), # Parent class - \s*\w+, # Allocation function - (\s*"\w+",)* # Attributes - \s*NULL - \)/mx) do |var_name, class_name, parent| - handle_class_module(var_name, :class, class_name, parent, nil) - end - end - - ## # Finds the comment for an alias on +class_name+ from +new_name+ to # +old_name+ @@ -1247,8 +1240,7 @@ class RDoc::Parser::C < RDoc::Parser https://github.com/ruby/ruby/blob/trunk/lib/rdoc/parser/c.rb#L1240 def scan remove_commented_out_lines - do_modules - do_classes + do_classes_and_modules do_missing do_constants diff --git a/test/rdoc/test_rdoc_parser_c.rb b/test/rdoc/test_rdoc_parser_c.rb index 81727ad..6601d28 100644 --- a/test/rdoc/test_rdoc_parser_c.rb +++ b/test/rdoc/test_rdoc_parser_c.rb @@ -304,32 +304,6 @@ void Init_Blah(void) { https://github.com/ruby/ruby/blob/trunk/test/rdoc/test_rdoc_parser_c.rb#L304 assert_equal 'This should show up as an alias', methods.last.commen (... truncated) -- ML: ruby-changes@q... Info: http://www.atdot.net/~ko1/quickml/