ruby-changes:63938
From: aycabta <ko1@a...>
Date: Sat, 5 Dec 2020 03:09:41 +0900 (JST)
Subject: [ruby-changes:63938] fb2fda9a27 (master): [ruby/reline] Optimize regexp handling for width
https://git.ruby-lang.org/ruby.git/commit/?id=fb2fda9a27 From fb2fda9a27ed96d5100897e93cc31e155b9c41a8 Mon Sep 17 00:00:00 2001 From: aycabta <aycabta@g...> Date: Wed, 18 Nov 2020 15:21:57 +0900 Subject: [ruby/reline] Optimize regexp handling for width The rendering time in IRB has been reduced as follows: start = Time.now [{"_id"=>"5f9072a4589a06d2d74b6028", "index"=>0, "guid"=>"6b3051e2-dbc7-4537-bdb9-6cd7bb5358a7", "isActive"=>true, "balance"=>"$1,442.84", "picture"=>"http://placehold.it/32x32", "age"=>34, "eyeColor"=>"blue", "name"=>{"first"=>"Ward", "last"=>"Levy"}, "company"=>"HYPLEX", "email"=>"ward.levy@h...", "phone"=>"+1 (867) 568-3319", "address"=>"867 Cobek Court, Clara, Maryland, 3254", "about"=> "Exercitation eu ex aliqua sit. Pariatur aliquip incididunt sint id non consectetur ullamco Lorem ea mollit duis amet sint labore. Commodo laborum labore commodo officia in cillum adipisicing esse excepteur cupidatat adipisicing ut. Non esse incididunt voluptate aliquip cillum eu aute duis laboris sit et. Amet enim quis tempor occaecat excepteur exercitation excepteur deserunt amet cillum adipisicing.", "registered"=>"Monday, May 25, 2015 6:51 AM", "latitude"=>"16.001127", "longitude"=>"-72.377848", "tags"=>["dolore", "nostrud", "occaecat", "cillum", "nisi"], "range"=>[0, 1, 2, 3, 4, 5, 6, 7, 8, 9], "friends"=> [{"id"=>0, "name"=>"Alison Bryant"}, {"id"=>1, "name"=>"Ester Espinoza"}, {"id"=>2, "name"=>"Sullivan Kane"}], "greeting"=>"Hello, Ward! You have 7 unread messages.", "favoriteFruit"=>"apple"}] puts "Duration: #{Time.now - start} seconds" 0.47sec -> 0.34sec start = Time.now "Exercitation eu ex aliqua sit. Pariatur aliquip incididunt sint id non consectetur ullamco Lorem ea mollit duis amet sint labore. Commodo laborum labore commodo officia in cillum adipisicing esse excepteur cupidatat adipisicing ut. Non esse incididunt voluptate aliquip cillum eu aute duis laboris sit et. Amet enim quis tempor occaecat excepteur exercitation excepteur deserunt amet cillum adipisicing." puts "Duration: #{Time.now - start} seconds" 0.11sec -> 0.08sec start = Time.now def each_top_level_statement initialize_input catch(:TERM_INPUT) do loop do begin prompt unless l = lex throw :TERM_INPUT if @line == '' else @line_no += l.count("\n") next if l == "\n" @line.concat l if @code_block_open or @ltype or @continue or @indent > 0 next end end if @line != "\n" @line.force_encoding(@io.encoding) yield @line, @exp_line_no end break if @io.eof? @line = '' @exp_line_no = @line_no @indent = 0 rescue TerminateLineInput initialize_input prompt end end end end puts "Duration: #{Time.now - start} seconds" 0.40sec -> 0.33sec Co-authored-by: NARUSE, Yui <naruse@a...> https://github.com/ruby/reline/commit/a9e39ddcc4 diff --git a/lib/reline/unicode.rb b/lib/reline/unicode.rb index 5fcb8be..09aff69 100644 --- a/lib/reline/unicode.rb +++ b/lib/reline/unicode.rb @@ -35,11 +35,16 @@ class Reline::Unicode https://github.com/ruby/ruby/blob/trunk/lib/reline/unicode.rb#L35 } EscapedChars = EscapedPairs.keys.map(&:chr) - CSI_REGEXP = /\e\[[\d;]*[ABCDEFGHJKSTfminsuhl]/ - OSC_REGEXP = /\e\]\d+(?:;[^;]+)*\a/ NON_PRINTING_START = "\1" NON_PRINTING_END = "\2" - WIDTH_SCANNER = /\G(?:#{NON_PRINTING_START}|#{NON_PRINTING_END}|#{CSI_REGEXP}|#{OSC_REGEXP}|\X)/ + CSI_REGEXP = /\e\[[\d;]*[ABCDEFGHJKSTfminsuhl]/ + OSC_REGEXP = /\e\]\d+(?:;[^;]+)*\a/ + WIDTH_SCANNER = /\G(?:(#{NON_PRINTING_START})|(#{NON_PRINTING_END})|(#{CSI_REGEXP})|(#{OSC_REGEXP})|(\X))/o + NON_PRINTING_START_INDEX = 0 + NON_PRINTING_END_INDEX = 1 + CSI_REGEXP_INDEX = 2 + OSC_REGEXP_INDEX = 3 + GRAPHEME_CLUSTER_INDEX = 4 def self.get_mbchar_byte_size_by_first_char(c) # Checks UTF-8 character byte size @@ -119,13 +124,14 @@ class Reline::Unicode https://github.com/ruby/ruby/blob/trunk/lib/reline/unicode.rb#L124 rest = str.encode(Encoding::UTF_8) in_zero_width = false rest.scan(WIDTH_SCANNER) do |gc| - case gc - when NON_PRINTING_START + case + when gc[NON_PRINTING_START_INDEX] in_zero_width = true - when NON_PRINTING_END + when gc[NON_PRINTING_END_INDEX] in_zero_width = false - when CSI_REGEXP, OSC_REGEXP - else + when gc[CSI_REGEXP_INDEX], gc[OSC_REGEXP_INDEX] + when gc[GRAPHEME_CLUSTER_INDEX] + gc = gc[GRAPHEME_CLUSTER_INDEX] unless in_zero_width width += get_mbchar_width(gc) end @@ -146,14 +152,17 @@ class Reline::Unicode https://github.com/ruby/ruby/blob/trunk/lib/reline/unicode.rb#L152 rest = str.encode(Encoding::UTF_8) in_zero_width = false rest.scan(WIDTH_SCANNER) do |gc| - case gc - when NON_PRINTING_START + case + when gc[NON_PRINTING_START_INDEX] in_zero_width = true - when NON_PRINTING_END + when gc[NON_PRINTING_END_INDEX] in_zero_width = false - when CSI_REGEXP, OSC_REGEXP - lines.last << gc - else + when gc[CSI_REGEXP_INDEX] + lines.last << gc[CSI_REGEXP_INDEX] + when gc[OSC_REGEXP_INDEX] + lines.last << gc[OSC_REGEXP_INDEX] + when gc[GRAPHEME_CLUSTER_INDEX] + gc = gc[GRAPHEME_CLUSTER_INDEX] unless in_zero_width mbchar_width = get_mbchar_width(gc) if (width += mbchar_width) > max_width -- cgit v0.10.2 -- ML: ruby-changes@q... Info: http://www.atdot.net/~ko1/quickml/