[前][次][番号順一覧][スレッド一覧]

ruby-changes:71805

From: Jeremy <ko1@a...>
Date: Thu, 12 May 2022 14:54:52 +0900 (JST)
Subject: [ruby-changes:71805] fbebfe1697 (master): [ruby/uri] Add URI::Generic#decoded_#{user, password}

https://git.ruby-lang.org/ruby.git/commit/?id=fbebfe1697

From fbebfe1697938a684f460cd28af36cf1f056513c Mon Sep 17 00:00:00 2001
From: Jeremy Evans <code@j...>
Date: Thu, 4 Mar 2021 14:05:18 -0800
Subject: [ruby/uri] Add URI::Generic#decoded_#{user,password}

URI::Generic#{user,password} return the encoded values, which are
not that useful if you want to do authentication with them.
Automatic decoding by default would break backwards compatibility.
Optional automatic decoding via a keyword to URI.parse would
require threading the option through at least 3 other methods, and
would make semantics confusing (user= takes encoded or unencoded
password?) or require more work.  Thus, adding this as a separate
method seemed the simplest approach.

Unfortunately, URI lacks a method for correct decoding.  Unlike in
www form components, + in earlier parts of the URI such as the
userinfo section is treated verbatim and not as an encoded space.
Add URI.#{en,de}code_uri_component methods, which are almost the
same as URI.#{en,de}code_www_form_component, but without the
special SP => + handling.

Implements [Feature #9045]

https://github.com/ruby/uri/commit/16cfc4e92f
---
 lib/uri/common.rb       | 41 ++++++++++++++++++++++++++++++--------
 lib/uri/generic.rb      | 14 +++++++++++--
 test/uri/test_common.rb | 52 +++++++++++++++++++++++++++++++++++++++++++++++++
 test/uri/test_parser.rb |  9 +++++++++
 4 files changed, 106 insertions(+), 10 deletions(-)

diff --git a/lib/uri/common.rb b/lib/uri/common.rb
index d592fdc9ba..a6d08aa26f 100644
--- a/lib/uri/common.rb
+++ b/lib/uri/common.rb
@@ -295,6 +295,7 @@ module URI https://github.com/ruby/ruby/blob/trunk/lib/uri/common.rb#L295
   256.times do |i|
     TBLENCWWWCOMP_[-i.chr] = -('%%%02X' % i)
   end
+  TBLENCURICOMP_ = TBLENCWWWCOMP_.dup.freeze
   TBLENCWWWCOMP_[' '] = '+'
   TBLENCWWWCOMP_.freeze
   TBLDECWWWCOMP_ = {} # :nodoc:
@@ -320,6 +321,33 @@ module URI https://github.com/ruby/ruby/blob/trunk/lib/uri/common.rb#L321
   #
   # See URI.decode_www_form_component, URI.encode_www_form.
   def self.encode_www_form_component(str, enc=nil)
+    _encode_uri_component(/[^*\-.0-9A-Z_a-z]/, TBLENCWWWCOMP_, str, enc)
+  end
+
+  # Decodes given +str+ of URL-encoded form data.
+  #
+  # This decodes + to SP.
+  #
+  # See URI.encode_www_form_component, URI.decode_www_form.
+  def self.decode_www_form_component(str, enc=Encoding::UTF_8)
+    _decode_uri_component(/\+|%\h\h/, str, enc)
+  end
+
+  # Encodes +str+ using URL encoding
+  #
+  # This encodes SP to %20 instead of +.
+  def self.encode_uri_component(str, enc=nil)
+    _encode_uri_component(/[^*\-.0-9A-Z_a-z]/, TBLENCURICOMP_, str, enc)
+  end
+
+  # Decodes given +str+ of URL-encoded data.
+  #
+  # This does not decode + to SP.
+  def self.decode_uri_component(str, enc=Encoding::UTF_8)
+    _decode_uri_component(/%\h\h/, str, enc)
+  end
+
+  def self._encode_uri_component(regexp, table, str, enc)
     str = str.to_s.dup
     if str.encoding != Encoding::ASCII_8BIT
       if enc && enc != Encoding::ASCII_8BIT
@@ -328,19 +356,16 @@ module URI https://github.com/ruby/ruby/blob/trunk/lib/uri/common.rb#L356
       end
       str.force_encoding(Encoding::ASCII_8BIT)
     end
-    str.gsub!(/[^*\-.0-9A-Z_a-z]/, TBLENCWWWCOMP_)
+    str.gsub!(regexp, table)
     str.force_encoding(Encoding::US_ASCII)
   end
+  private_class_method :_encode_uri_component
 
-  # Decodes given +str+ of URL-encoded form data.
-  #
-  # This decodes + to SP.
-  #
-  # See URI.encode_www_form_component, URI.decode_www_form.
-  def self.decode_www_form_component(str, enc=Encoding::UTF_8)
+  def self._decode_uri_component(regexp, str, enc)
     raise ArgumentError, "invalid %-encoding (#{str})" if /%(?!\h\h)/.match?(str)
-    str.b.gsub(/\+|%\h\h/, TBLDECWWWCOMP_).force_encoding(enc)
+    str.b.gsub(regexp, TBLDECWWWCOMP_).force_encoding(enc)
   end
+  private_class_method :_decode_uri_component
 
   # Generates URL-encoded form data from given +enum+.
   #
diff --git a/lib/uri/generic.rb b/lib/uri/generic.rb
index cfa0de6b74..69698c4e2d 100644
--- a/lib/uri/generic.rb
+++ b/lib/uri/generic.rb
@@ -564,16 +564,26 @@ module URI https://github.com/ruby/ruby/blob/trunk/lib/uri/generic.rb#L564
       end
     end
 
-    # Returns the user component.
+    # Returns the user component (without URI decoding).
     def user
       @user
     end
 
-    # Returns the password component.
+    # Returns the password component (without URI decoding).
     def password
       @password
     end
 
+    # Returns the user component after URI decoding.
+    def decoded_user
+      URI.decode_uri_component(@user) if @user
+    end
+
+    # Returns the password component after URI decoding.
+    def decoded_password
+      URI.decode_uri_component(@password) if @password
+    end
+
     #
     # Checks the host +v+ component for RFC2396 compliance
     # and against the URI::Parser Regexp for :HOST.
diff --git a/test/uri/test_common.rb b/test/uri/test_common.rb
index 0fa7e8ac70..8cb23fe167 100644
--- a/test/uri/test_common.rb
+++ b/test/uri/test_common.rb
@@ -130,6 +130,58 @@ class TestCommon < Test::Unit::TestCase https://github.com/ruby/ruby/blob/trunk/test/uri/test_common.rb#L130
     assert_nothing_raised(ArgumentError){URI.decode_www_form_component("x"*(1024*1024))}
   end
 
+  def test_encode_uri_component
+    assert_equal("%00%20%21%22%23%24%25%26%27%28%29*%2B%2C-.%2F09%3A%3B%3C%3D%3E%3F%40" \
+                 "AZ%5B%5C%5D%5E_%60az%7B%7C%7D%7E",
+                 URI.encode_uri_component("\x00 !\"\#$%&'()*+,-./09:;<=>?@AZ[\\]^_`az{|}~"))
+    assert_equal("%95A", URI.encode_uri_component(
+                   "\x95\x41".force_encoding(Encoding::Shift_JIS)))
+    assert_equal("0B", URI.encode_uri_component(
+                   "\x30\x42".force_encoding(Encoding::UTF_16BE)))
+    assert_equal("%1B%24B%24%22%1B%28B", URI.encode_uri_component(
+                   "\e$B$\"\e(B".force_encoding(Encoding::ISO_2022_JP)))
+
+    assert_equal("%E3%81%82", URI.encode_uri_component(
+                   "\u3042", Encoding::ASCII_8BIT))
+    assert_equal("%82%A0", URI.encode_uri_component(
+                   "\u3042", Encoding::Windows_31J))
+    assert_equal("%E3%81%82", URI.encode_uri_component(
+                   "\u3042", Encoding::UTF_8))
+
+    assert_equal("%82%A0", URI.encode_uri_component(
+                   "\u3042".encode("sjis"), Encoding::ASCII_8BIT))
+    assert_equal("%A4%A2", URI.encode_uri_component(
+                   "\u3042".encode("sjis"), Encoding::EUC_JP))
+    assert_equal("%E3%81%82", URI.encode_uri_component(
+                   "\u3042".encode("sjis"), Encoding::UTF_8))
+    assert_equal("B0", URI.encode_uri_component(
+                   "\u3042".encode("sjis"), Encoding::UTF_16LE))
+    assert_equal("%26%23730%3B", URI.encode_uri_component(
+                   "\u02DA", Encoding::WINDOWS_1252))
+
+    # invalid
+    assert_equal("%EF%BF%BD%EF%BF%BD", URI.encode_uri_component(
+                   "\xE3\x81\xFF", "utf-8"))
+    assert_equal("%E6%9F%8A%EF%BF%BD%EF%BF%BD", URI.encode_uri_component(
+                   "\x95\x41\xff\xff".force_encoding(Encoding::Shift_JIS), "utf-8"))
+  end
+
+  def test_decode_uri_component
+    assert_equal(" +!\"\#$%&'()*+,-./09:;<=>?@AZ[\\]^_`az{|}~",
+                 URI.decode_uri_component(
+                   "%20+%21%22%23%24%25%26%27%28%29*%2B%2C-.%2F09%3A%3B%3C%3D%3E%3F%40" \
+                   "AZ%5B%5C%5D%5E_%60az%7B%7C%7D%7E"))
+    assert_equal("\xA1\xA2".force_encoding(Encoding::EUC_JP),
+                 URI.decode_uri_component("%A1%A2", "EUC-JP"))
+    assert_equal("\xE3\x81\x82\xE3\x81\x82".force_encoding("UTF-8"),
+                 URI.decode_uri_component("\xE3\x81\x82%E3%81%82".force_encoding("UTF-8")))
+
+    assert_raise(ArgumentError){URI.decode_uri_component("%")}
+    assert_raise(ArgumentError){URI.decode_uri_component("%a")}
+    assert_raise(ArgumentError){URI.decode_uri_component("x%a_")}
+    assert_nothing_raised(ArgumentError){URI.decode_uri_component("x"*(1024*1024))}
+  end
+
   def test_encode_www_form
     assert_equal("a=1", URI.encode_www_form("a" => "1"))
     assert_equal("a=1", URI.encode_www_form(a: 1))
diff --git a/test/uri/test_parser.rb b/test/uri/test_parser.rb
index 03de137788..f8e9299d09 100644
--- a/test/uri/test_parser.rb
+++ b/test/uri/test_parser.rb
@@ -50,6 +50,15 @@ class URI::TestParser < Test::Unit::TestCase https://github.com/ruby/ruby/blob/trunk/test/uri/test_parser.rb#L50
     assert_raise(URI::InvalidURIError) { URI.parse('https://www.example.com/search?q=%XX') }
   end
 
+  def test_parse_auth
+    str = "http://al%40ice:p%40s%25sword@e.../dir%2Fname/subdir?foo=bar%40example.com"
+    uri = URI.parse(str)
+    assert_equal "al%40ice", uri.user
+    assert_equal "p%40s%25sword", uri.password
+    assert_equal "al@ice", uri.decoded_user
+    assert_equal "p@s%sword", uri.decoded_password
+  end
+
   def test_raise_bad_uri_for_integer
     assert_raise(URI::InvalidURIError) do
       URI.parse(1)
-- 
cgit v1.2.1


--
ML: ruby-changes@q...
Info: http://www.atdot.net/~ko1/quickml/

[前][次][番号順一覧][スレッド一覧]