[前][次][番号順一覧][スレッド一覧]

ruby-changes:36899

From: normal <ko1@a...>
Date: Thu, 25 Dec 2014 08:50:45 +0900 (JST)
Subject: [ruby-changes:36899] normal:r48980 (trunk): lib/uri: performance improvements [misc #10628]

normal	2014-12-25 08:50:37 +0900 (Thu, 25 Dec 2014)

  New Revision: 48980

  http://svn.ruby-lang.org/cgi-bin/viewvc.cgi?view=revision&revision=48980

  Log:
    lib/uri: performance improvements [misc #10628]
    
    * lib/uri/generic.rb (split_userinfo): fstring for 1-byte split
      (set_port): reduce bytecode size
      (check_path): reduce garbage via opt_str_freeze
      (query=): ditto
      (fragment=): ditto
      [misc #10628]
    * lib/uri/rfc3986_parser.rb (regexp): cache as attr
      (initialize): setup and freeze regexp attr once
      (split): reduce bytecode size, use opt_str_freeze
      (parse): minor bytecode and garbage reduction
      (default_regexp): rename for initialize

  Modified files:
    trunk/ChangeLog
    trunk/lib/uri/generic.rb
    trunk/lib/uri/rfc3986_parser.rb
Index: ChangeLog
===================================================================
--- ChangeLog	(revision 48979)
+++ ChangeLog	(revision 48980)
@@ -1,3 +1,17 @@ https://github.com/ruby/ruby/blob/trunk/ChangeLog#L1
+Thu Dec 25 08:42:11 2014  Eric Wong  <e@8...>
+
+	* lib/uri/generic.rb (split_userinfo): fstring for 1-byte split
+	  (set_port): reduce bytecode size
+	  (check_path): reduce garbage via opt_str_freeze
+	  (query=): ditto
+	  (fragment=): ditto
+	  [misc #10628]
+	* lib/uri/rfc3986_parser.rb (regexp): cache as attr
+	  (initialize): setup and freeze regexp attr once
+	  (split): reduce bytecode size, use opt_str_freeze
+	  (parse): minor bytecode and garbage reduction
+	  (default_regexp): rename for initialize
+
 Wed Dec 24 20:38:16 2014  Nobuyoshi Nakada  <nobu@r...>
 
 	* dir.c (glob_make_pattern): restrict searching case-insensitive
Index: lib/uri/rfc3986_parser.rb
===================================================================
--- lib/uri/rfc3986_parser.rb	(revision 48979)
+++ lib/uri/rfc3986_parser.rb	(revision 48980)
@@ -4,6 +4,11 @@ module URI https://github.com/ruby/ruby/blob/trunk/lib/uri/rfc3986_parser.rb#L4
     # this regexp is modified not to host is not empty string
     RFC3986_URI = /\A(?<URI>(?<scheme>[A-Za-z][+\-.0-9A-Za-z]*):(?<hier-part>\/\/(?<authority>(?:(?<userinfo>(?:%\h\h|[!$&-.0-;=A-Z_a-z~])*)@)?(?<host>(?<IP-literal>\[(?:(?<IPv6address>(?:\h{1,4}:){6}(?<ls32>\h{1,4}:\h{1,4}|(?<IPv4address>(?<dec-octet>[1-9]\d|1\d{2}|2[0-4]\d|25[0-5]|\d)\.\g<dec-octet>\.\g<dec-octet>\.\g<dec-octet>))|::(?:\h{1,4}:){5}\g<ls32>|\h{1,4}?::(?:\h{1,4}:){4}\g<ls32>|(?:(?:\h{1,4}:)?\h{1,4})?::(?:\h{1,4}:){3}\g<ls32>|(?:(?:\h{1,4}:){,2}\h{1,4})?::(?:\h{1,4}:){2}\g<ls32>|(?:(?:\h{1,4}:){,3}\h{1,4})?::\h{1,4}:\g<ls32>|(?:(?:\h{1,4}:){,4}\h{1,4})?::\g<ls32>|(?:(?:\h{1,4}:){,5}\h{1,4})?::\h{1,4}|(?:(?:\h{1,4}:){,6}\h{1,4})?::)|(?<IPvFuture>v\h+\.[!$&-.0-;=A-Z_a-z~]+))\])|\g<IPv4address>|(?<reg-name>(?:%\h\h|[!$&-.0-9;=A-Z_a-z~])+))?(?::(?<port>\d*))?)(?<path-abempty>(?:\/(?<segment>(?:%\h\h|[!$&-.0-;=@-Z_a-z~])*))*)|(?<path-absolute>\/(?:(?<segment-nz>(?:%\h\h|[!$&-.0-;=@-Z_a-z~])+)(?:\/\g<segment>)*)?)|(?<path-rootless>\g<segment-nz>(?:\/\g<segment>)*)|
 (?<path-empty>))(?:\?(?<query>[^#]*))?(?:\#(?<fragment>(?:%\h\h|[!$&-.0-;=@-Z_a-z~\/?])*))?)\z/
     RFC3986_relative_ref = /\A(?<relative-ref>(?<relative-part>\/\/(?<authority>(?:(?<userinfo>(?:%\h\h|[!$&-.0-;=A-Z_a-z~])*)@)?(?<host>(?<IP-literal>\[(?<IPv6address>(?:\h{1,4}:){6}(?<ls32>\h{1,4}:\h{1,4}|(?<IPv4address>(?<dec-octet>[1-9]\d|1\d{2}|2[0-4]\d|25[0-5]|\d)\.\g<dec-octet>\.\g<dec-octet>\.\g<dec-octet>))|::(?:\h{1,4}:){5}\g<ls32>|\h{1,4}?::(?:\h{1,4}:){4}\g<ls32>|(?:(?:\h{1,4}:){,1}\h{1,4})?::(?:\h{1,4}:){3}\g<ls32>|(?:(?:\h{1,4}:){,2}\h{1,4})?::(?:\h{1,4}:){2}\g<ls32>|(?:(?:\h{1,4}:){,3}\h{1,4})?::\h{1,4}:\g<ls32>|(?:(?:\h{1,4}:){,4}\h{1,4})?::\g<ls32>|(?:(?:\h{1,4}:){,5}\h{1,4})?::\h{1,4}|(?:(?:\h{1,4}:){,6}\h{1,4})?::)|(?<IPvFuture>v\h+\.[!$&-.0-;=A-Z_a-z~]+)\])|\g<IPv4address>|(?<reg-name>(?:%\h\h|[!$&-.0-9;=A-Z_a-z~])+))?(?::(?<port>\d*))?)(?<path-abempty>(?:\/(?<segment>(?:%\h\h|[!$&-.0-;=@-Z_a-z~])*))*)|(?<path-absolute>\/(?:(?<segment-nz>(?:%\h\h|[!$&-.0-;=@-Z_a-z~])+)(?:\/\g<segment>)*)?)|(?<path-noscheme>(?<segment-nz-nc>(?:%\h\h|[!$&-.0-9;=@-Z_a-z~])+)
 (?:\/\g<segment>)*)|(?<path-empty>))(?:\?(?<query>[^#]*))?(?:\#(?<fragment>(?:%\h\h|[!$&-.0-;=@-Z_a-z~\/?])*))?)\z/
+    attr_reader :regexp
+
+    def initialize
+      @regexp = default_regexp.each_value(&:freeze).freeze
+    end
 
     def split(uri) #:nodoc:
       begin
@@ -11,42 +16,52 @@ module URI https://github.com/ruby/ruby/blob/trunk/lib/uri/rfc3986_parser.rb#L16
       rescue NoMethodError
         raise InvalidURIError, "bad URI(is not URI?): #{uri}"
       end
-      unless uri.ascii_only?
+      uri.ascii_only? or
         raise InvalidURIError, "URI must be ascii only #{uri.dump}"
-      end
       if m = RFC3986_URI.match(uri)
-        ary = []
-        ary << m["scheme"]
-        if m["path-rootless"] # opaque
-          ary << nil # userinfo
-          ary << nil # host
-          ary << nil # port
-          ary << nil # registry
-          ary << nil # path
-          ary << m["path-rootless"]
-          ary[-1] << '?' << m["query"] if m["query"]
-          ary << nil # query
-          ary << m["fragment"]
+        query = m["query".freeze]
+        scheme = m["scheme".freeze]
+        opaque = m["path-rootless".freeze]
+        if opaque
+          opaque << "?#{query}" if query
+          [ scheme,
+            nil, # userinfo
+            nil, # host
+            nil, # port
+            nil, # registry
+            nil, # path
+            opaque,
+            nil, # query
+            m["fragment".freeze]
+          ]
         else # normal
-          ary << m["userinfo"]
-          ary << m["host"]
-          ary << m["port"]
-          ary << nil # registry
-          ary << (m["path-abempty"] || m["path-absolute"] || m["path-empty"])
-          ary << nil # opaque
-          ary << m["query"]
-          ary << m["fragment"]
+          [ scheme,
+            m["userinfo".freeze],
+            m["host".freeze],
+            m["port".freeze],
+            nil, # registry
+            (m["path-abempty".freeze] ||
+             m["path-absolute".freeze] ||
+             m["path-empty".freeze]),
+            nil, # opaque
+            query,
+            m["fragment".freeze]
+          ]
         end
       elsif m = RFC3986_relative_ref.match(uri)
-        ary = [nil]
-        ary << m["userinfo"]
-        ary << m["host"]
-        ary << m["port"]
-        ary << nil # registry
-        ary << (m["path-abempty"] || m["path-absolute"] || m["path-noscheme"] || m["path-empty"])
-        ary << nil # opaque
-        ary << m["query"]
-        ary << m["fragment"]
+        [ nil, # scheme
+          m["userinfo".freeze],
+          m["host".freeze],
+          m["port".freeze],
+          nil, # registry,
+          (m["path-abempty".freeze] ||
+           m["path-absolute".freeze] ||
+           m["path-noscheme".freeze] ||
+           m["path-empty".freeze]),
+          nil, # opaque
+          m["query".freeze],
+          m["fragment".freeze]
+        ]
       else
         raise InvalidURIError, "bad URI(is not URI?): #{uri}"
       end
@@ -55,11 +70,11 @@ module URI https://github.com/ruby/ruby/blob/trunk/lib/uri/rfc3986_parser.rb#L70
     def parse(uri) # :nodoc:
       scheme, userinfo, host, port,
         registry, path, opaque, query, fragment = self.split(uri)
-
-      if scheme && URI.scheme_list.include?(scheme.upcase)
-        URI.scheme_list[scheme.upcase].new(scheme, userinfo, host, port,
-                                           registry, path, opaque, query,
-                                           fragment, self)
+      scheme_list = URI.scheme_list
+      if scheme && scheme_list.include?(uc = scheme.upcase)
+        scheme_list[uc].new(scheme, userinfo, host, port,
+                            registry, path, opaque, query,
+                            fragment, self)
       else
         Generic.new(scheme, userinfo, host, port,
                     registry, path, opaque, query,
@@ -78,7 +93,9 @@ module URI https://github.com/ruby/ruby/blob/trunk/lib/uri/rfc3986_parser.rb#L93
       @@to_s.bind(self).call
     end
 
-    def regexp
+    private
+
+    def default_regexp # :nodoc:
       {
         SCHEME: /\A[A-Za-z][A-Za-z0-9+\-.]*\z/,
         USERINFO: /\A(?:%\h\h|[!$&-.0-;=A-Z_a-z~])*\z/,
@@ -92,8 +109,6 @@ module URI https://github.com/ruby/ruby/blob/trunk/lib/uri/rfc3986_parser.rb#L109
       }
     end
 
-    private
-
     def convert_to_uri(uri)
       if uri.is_a?(URI::Generic)
         uri
Index: lib/uri/generic.rb
===================================================================
--- lib/uri/generic.rb	(revision 48979)
+++ lib/uri/generic.rb	(revision 48980)
@@ -543,7 +543,7 @@ module URI https://github.com/ruby/ruby/blob/trunk/lib/uri/generic.rb#L543
     # if properly formatted as 'user:password'
     def split_userinfo(ui)
       return nil, nil unless ui
-      user, password = ui.split(/:/, 2)
+      user, password = ui.split(':'.freeze, 2)
 
       return user, password
     end
@@ -695,13 +695,7 @@ module URI https://github.com/ruby/ruby/blob/trunk/lib/uri/generic.rb#L695
     # see also URI::Generic.port=
     #
     def set_port(v)
-      unless !v || v.kind_of?(Fixnum)
-        if v.empty?
-          v = nil
-        else
-          v = v.to_i
-        end
-      end
+      v = v.empty? ? nil : v.to_i unless !v || v.kind_of?(Fixnum)
       @port = v
     end
     protected :set_port
@@ -768,13 +762,14 @@ module URI https://github.com/ruby/ruby/blob/trunk/lib/uri/generic.rb#L762
 
       # If scheme is ftp, path may be relative.
       # See RFC 1738 section 3.2.2, and RFC 2396.
-      if @scheme && @scheme != "ftp"
-        if v && v != '' && parser.regexp[:ABS_PATH] !~ v
+      if @scheme && @scheme != "ftp".freeze
+        if v && v != ''.freeze && parser.regexp[:ABS_PATH] !~ v
           raise InvalidComponentError,
             "bad component(expected absolute path component): #{v}"
         end
       else
-        if v && v != '' && parser.regexp[:ABS_PATH] !~ v && parser.regexp[:REL_PATH] !~ v
+        if v && v != ''.freeze && parser.regexp[:ABS_PATH] !~ v &&
+           parser.regexp[:REL_PATH] !~ v
           raise InvalidComponentError,
             "bad component(expected relative path component): #{v}"
         end
@@ -849,7 +844,7 @@ module URI https://github.com/ruby/ruby/blob/trunk/lib/uri/generic.rb#L844
       x = v.to_str
       v = x.dup if x.equal? v
       v.encode!(Encoding::UTF_8) rescue nil
-      v.delete!("\t\r\n")
+      v.delete!("\t\r\n".freeze)
       v.force_encoding(Encoding::ASCII_8BIT)
       v.gsub!(/(?!%\h\h|[!$-&(-;=?-_a-~])./n.freeze){'%%%02X'.freeze % $&.ord}
       v.force_encoding(Encoding::US_ASCII)
@@ -939,9 +934,9 @@ module URI https://github.com/ruby/ruby/blob/trunk/lib/uri/generic.rb#L934
       x = v.to_str
       v = x.dup if x.equal? v
       v.encode!(Encoding::UTF_8) rescue nil
-      v.delete!("\t\r\n")
+      v.delete!("\t\r\n".freeze)
       v.force_encoding(Encoding::ASCII_8BIT)
-      v.gsub!(/(?!%\h\h|[!-~])./n){'%%%02X' % $&.ord}
+      v.gsub!(/(?!%\h\h|[!-~])./n){'%%%02X'.freeze % $&.ord}
       v.force_encoding(Encoding::US_ASCII)
       @fragment = v
     end

--
ML: ruby-changes@q...
Info: http://www.atdot.net/~ko1/quickml/

[前][次][番号順一覧][スレッド一覧]