ruby-changes:36899
From: normal <ko1@a...>
Date: Thu, 25 Dec 2014 08:50:45 +0900 (JST)
Subject: [ruby-changes:36899] normal:r48980 (trunk): lib/uri: performance improvements [misc #10628]
normal 2014-12-25 08:50:37 +0900 (Thu, 25 Dec 2014) New Revision: 48980 http://svn.ruby-lang.org/cgi-bin/viewvc.cgi?view=revision&revision=48980 Log: lib/uri: performance improvements [misc #10628] * lib/uri/generic.rb (split_userinfo): fstring for 1-byte split (set_port): reduce bytecode size (check_path): reduce garbage via opt_str_freeze (query=): ditto (fragment=): ditto [misc #10628] * lib/uri/rfc3986_parser.rb (regexp): cache as attr (initialize): setup and freeze regexp attr once (split): reduce bytecode size, use opt_str_freeze (parse): minor bytecode and garbage reduction (default_regexp): rename for initialize Modified files: trunk/ChangeLog trunk/lib/uri/generic.rb trunk/lib/uri/rfc3986_parser.rb Index: ChangeLog =================================================================== --- ChangeLog (revision 48979) +++ ChangeLog (revision 48980) @@ -1,3 +1,17 @@ https://github.com/ruby/ruby/blob/trunk/ChangeLog#L1 +Thu Dec 25 08:42:11 2014 Eric Wong <e@8...> + + * lib/uri/generic.rb (split_userinfo): fstring for 1-byte split + (set_port): reduce bytecode size + (check_path): reduce garbage via opt_str_freeze + (query=): ditto + (fragment=): ditto + [misc #10628] + * lib/uri/rfc3986_parser.rb (regexp): cache as attr + (initialize): setup and freeze regexp attr once + (split): reduce bytecode size, use opt_str_freeze + (parse): minor bytecode and garbage reduction + (default_regexp): rename for initialize + Wed Dec 24 20:38:16 2014 Nobuyoshi Nakada <nobu@r...> * dir.c (glob_make_pattern): restrict searching case-insensitive Index: lib/uri/rfc3986_parser.rb =================================================================== --- lib/uri/rfc3986_parser.rb (revision 48979) +++ lib/uri/rfc3986_parser.rb (revision 48980) @@ -4,6 +4,11 @@ module URI https://github.com/ruby/ruby/blob/trunk/lib/uri/rfc3986_parser.rb#L4 # this regexp is modified not to host is not empty string RFC3986_URI = /\A(?<URI>(?<scheme>[A-Za-z][+\-.0-9A-Za-z]*):(?<hier-part>\/\/(?<authority>(?:(?<userinfo>(?:%\h\h|[!$&-.0-;=A-Z_a-z~])*)@)?(?<host>(?<IP-literal>\[(?:(?<IPv6address>(?:\h{1,4}:){6}(?<ls32>\h{1,4}:\h{1,4}|(?<IPv4address>(?<dec-octet>[1-9]\d|1\d{2}|2[0-4]\d|25[0-5]|\d)\.\g<dec-octet>\.\g<dec-octet>\.\g<dec-octet>))|::(?:\h{1,4}:){5}\g<ls32>|\h{1,4}?::(?:\h{1,4}:){4}\g<ls32>|(?:(?:\h{1,4}:)?\h{1,4})?::(?:\h{1,4}:){3}\g<ls32>|(?:(?:\h{1,4}:){,2}\h{1,4})?::(?:\h{1,4}:){2}\g<ls32>|(?:(?:\h{1,4}:){,3}\h{1,4})?::\h{1,4}:\g<ls32>|(?:(?:\h{1,4}:){,4}\h{1,4})?::\g<ls32>|(?:(?:\h{1,4}:){,5}\h{1,4})?::\h{1,4}|(?:(?:\h{1,4}:){,6}\h{1,4})?::)|(?<IPvFuture>v\h+\.[!$&-.0-;=A-Z_a-z~]+))\])|\g<IPv4address>|(?<reg-name>(?:%\h\h|[!$&-.0-9;=A-Z_a-z~])+))?(?::(?<port>\d*))?)(?<path-abempty>(?:\/(?<segment>(?:%\h\h|[!$&-.0-;=@-Z_a-z~])*))*)|(?<path-absolute>\/(?:(?<segment-nz>(?:%\h\h|[!$&-.0-;=@-Z_a-z~])+)(?:\/\g<segment>)*)?)|(?<path-rootless>\g<segment-nz>(?:\/\g<segment>)*)| (?<path-empty>))(?:\?(?<query>[^#]*))?(?:\#(?<fragment>(?:%\h\h|[!$&-.0-;=@-Z_a-z~\/?])*))?)\z/ RFC3986_relative_ref = /\A(?<relative-ref>(?<relative-part>\/\/(?<authority>(?:(?<userinfo>(?:%\h\h|[!$&-.0-;=A-Z_a-z~])*)@)?(?<host>(?<IP-literal>\[(?<IPv6address>(?:\h{1,4}:){6}(?<ls32>\h{1,4}:\h{1,4}|(?<IPv4address>(?<dec-octet>[1-9]\d|1\d{2}|2[0-4]\d|25[0-5]|\d)\.\g<dec-octet>\.\g<dec-octet>\.\g<dec-octet>))|::(?:\h{1,4}:){5}\g<ls32>|\h{1,4}?::(?:\h{1,4}:){4}\g<ls32>|(?:(?:\h{1,4}:){,1}\h{1,4})?::(?:\h{1,4}:){3}\g<ls32>|(?:(?:\h{1,4}:){,2}\h{1,4})?::(?:\h{1,4}:){2}\g<ls32>|(?:(?:\h{1,4}:){,3}\h{1,4})?::\h{1,4}:\g<ls32>|(?:(?:\h{1,4}:){,4}\h{1,4})?::\g<ls32>|(?:(?:\h{1,4}:){,5}\h{1,4})?::\h{1,4}|(?:(?:\h{1,4}:){,6}\h{1,4})?::)|(?<IPvFuture>v\h+\.[!$&-.0-;=A-Z_a-z~]+)\])|\g<IPv4address>|(?<reg-name>(?:%\h\h|[!$&-.0-9;=A-Z_a-z~])+))?(?::(?<port>\d*))?)(?<path-abempty>(?:\/(?<segment>(?:%\h\h|[!$&-.0-;=@-Z_a-z~])*))*)|(?<path-absolute>\/(?:(?<segment-nz>(?:%\h\h|[!$&-.0-;=@-Z_a-z~])+)(?:\/\g<segment>)*)?)|(?<path-noscheme>(?<segment-nz-nc>(?:%\h\h|[!$&-.0-9;=@-Z_a-z~])+) (?:\/\g<segment>)*)|(?<path-empty>))(?:\?(?<query>[^#]*))?(?:\#(?<fragment>(?:%\h\h|[!$&-.0-;=@-Z_a-z~\/?])*))?)\z/ + attr_reader :regexp + + def initialize + @regexp = default_regexp.each_value(&:freeze).freeze + end def split(uri) #:nodoc: begin @@ -11,42 +16,52 @@ module URI https://github.com/ruby/ruby/blob/trunk/lib/uri/rfc3986_parser.rb#L16 rescue NoMethodError raise InvalidURIError, "bad URI(is not URI?): #{uri}" end - unless uri.ascii_only? + uri.ascii_only? or raise InvalidURIError, "URI must be ascii only #{uri.dump}" - end if m = RFC3986_URI.match(uri) - ary = [] - ary << m["scheme"] - if m["path-rootless"] # opaque - ary << nil # userinfo - ary << nil # host - ary << nil # port - ary << nil # registry - ary << nil # path - ary << m["path-rootless"] - ary[-1] << '?' << m["query"] if m["query"] - ary << nil # query - ary << m["fragment"] + query = m["query".freeze] + scheme = m["scheme".freeze] + opaque = m["path-rootless".freeze] + if opaque + opaque << "?#{query}" if query + [ scheme, + nil, # userinfo + nil, # host + nil, # port + nil, # registry + nil, # path + opaque, + nil, # query + m["fragment".freeze] + ] else # normal - ary << m["userinfo"] - ary << m["host"] - ary << m["port"] - ary << nil # registry - ary << (m["path-abempty"] || m["path-absolute"] || m["path-empty"]) - ary << nil # opaque - ary << m["query"] - ary << m["fragment"] + [ scheme, + m["userinfo".freeze], + m["host".freeze], + m["port".freeze], + nil, # registry + (m["path-abempty".freeze] || + m["path-absolute".freeze] || + m["path-empty".freeze]), + nil, # opaque + query, + m["fragment".freeze] + ] end elsif m = RFC3986_relative_ref.match(uri) - ary = [nil] - ary << m["userinfo"] - ary << m["host"] - ary << m["port"] - ary << nil # registry - ary << (m["path-abempty"] || m["path-absolute"] || m["path-noscheme"] || m["path-empty"]) - ary << nil # opaque - ary << m["query"] - ary << m["fragment"] + [ nil, # scheme + m["userinfo".freeze], + m["host".freeze], + m["port".freeze], + nil, # registry, + (m["path-abempty".freeze] || + m["path-absolute".freeze] || + m["path-noscheme".freeze] || + m["path-empty".freeze]), + nil, # opaque + m["query".freeze], + m["fragment".freeze] + ] else raise InvalidURIError, "bad URI(is not URI?): #{uri}" end @@ -55,11 +70,11 @@ module URI https://github.com/ruby/ruby/blob/trunk/lib/uri/rfc3986_parser.rb#L70 def parse(uri) # :nodoc: scheme, userinfo, host, port, registry, path, opaque, query, fragment = self.split(uri) - - if scheme && URI.scheme_list.include?(scheme.upcase) - URI.scheme_list[scheme.upcase].new(scheme, userinfo, host, port, - registry, path, opaque, query, - fragment, self) + scheme_list = URI.scheme_list + if scheme && scheme_list.include?(uc = scheme.upcase) + scheme_list[uc].new(scheme, userinfo, host, port, + registry, path, opaque, query, + fragment, self) else Generic.new(scheme, userinfo, host, port, registry, path, opaque, query, @@ -78,7 +93,9 @@ module URI https://github.com/ruby/ruby/blob/trunk/lib/uri/rfc3986_parser.rb#L93 @@to_s.bind(self).call end - def regexp + private + + def default_regexp # :nodoc: { SCHEME: /\A[A-Za-z][A-Za-z0-9+\-.]*\z/, USERINFO: /\A(?:%\h\h|[!$&-.0-;=A-Z_a-z~])*\z/, @@ -92,8 +109,6 @@ module URI https://github.com/ruby/ruby/blob/trunk/lib/uri/rfc3986_parser.rb#L109 } end - private - def convert_to_uri(uri) if uri.is_a?(URI::Generic) uri Index: lib/uri/generic.rb =================================================================== --- lib/uri/generic.rb (revision 48979) +++ lib/uri/generic.rb (revision 48980) @@ -543,7 +543,7 @@ module URI https://github.com/ruby/ruby/blob/trunk/lib/uri/generic.rb#L543 # if properly formatted as 'user:password' def split_userinfo(ui) return nil, nil unless ui - user, password = ui.split(/:/, 2) + user, password = ui.split(':'.freeze, 2) return user, password end @@ -695,13 +695,7 @@ module URI https://github.com/ruby/ruby/blob/trunk/lib/uri/generic.rb#L695 # see also URI::Generic.port= # def set_port(v) - unless !v || v.kind_of?(Fixnum) - if v.empty? - v = nil - else - v = v.to_i - end - end + v = v.empty? ? nil : v.to_i unless !v || v.kind_of?(Fixnum) @port = v end protected :set_port @@ -768,13 +762,14 @@ module URI https://github.com/ruby/ruby/blob/trunk/lib/uri/generic.rb#L762 # If scheme is ftp, path may be relative. # See RFC 1738 section 3.2.2, and RFC 2396. - if @scheme && @scheme != "ftp" - if v && v != '' && parser.regexp[:ABS_PATH] !~ v + if @scheme && @scheme != "ftp".freeze + if v && v != ''.freeze && parser.regexp[:ABS_PATH] !~ v raise InvalidComponentError, "bad component(expected absolute path component): #{v}" end else - if v && v != '' && parser.regexp[:ABS_PATH] !~ v && parser.regexp[:REL_PATH] !~ v + if v && v != ''.freeze && parser.regexp[:ABS_PATH] !~ v && + parser.regexp[:REL_PATH] !~ v raise InvalidComponentError, "bad component(expected relative path component): #{v}" end @@ -849,7 +844,7 @@ module URI https://github.com/ruby/ruby/blob/trunk/lib/uri/generic.rb#L844 x = v.to_str v = x.dup if x.equal? v v.encode!(Encoding::UTF_8) rescue nil - v.delete!("\t\r\n") + v.delete!("\t\r\n".freeze) v.force_encoding(Encoding::ASCII_8BIT) v.gsub!(/(?!%\h\h|[!$-&(-;=?-_a-~])./n.freeze){'%%%02X'.freeze % $&.ord} v.force_encoding(Encoding::US_ASCII) @@ -939,9 +934,9 @@ module URI https://github.com/ruby/ruby/blob/trunk/lib/uri/generic.rb#L934 x = v.to_str v = x.dup if x.equal? v v.encode!(Encoding::UTF_8) rescue nil - v.delete!("\t\r\n") + v.delete!("\t\r\n".freeze) v.force_encoding(Encoding::ASCII_8BIT) - v.gsub!(/(?!%\h\h|[!-~])./n){'%%%02X' % $&.ord} + v.gsub!(/(?!%\h\h|[!-~])./n){'%%%02X'.freeze % $&.ord} v.force_encoding(Encoding::US_ASCII) @fragment = v end -- ML: ruby-changes@q... Info: http://www.atdot.net/~ko1/quickml/