[前][次][番号順一覧][スレッド一覧]

ruby-changes:15124

From: naruse <ko1@a...>
Date: Sun, 21 Mar 2010 21:36:32 +0900 (JST)
Subject: [ruby-changes:15124] Ruby:r27001 (trunk): * lib/uri/common.rb (URI.encode_www_form_component):

naruse	2010-03-21 21:36:14 +0900 (Sun, 21 Mar 2010)

  New Revision: 27001

  http://svn.ruby-lang.org/cgi-bin/viewvc.cgi?view=rev&revision=27001

  Log:
    * lib/uri/common.rb (URI.encode_www_form_component):
      convert strings of HTML5 ASCII incompatible encoding
      to UTF-8.
    
    * lib/uri/common.rb (URI.encode_www_form_component):
      "\x83\x41" of Shift_JIS should be encoded as "%83A".
      This follows real implementations.
    
    * lib/uri/common.rb (URI.decode_www_form_component):
      use given encoding for force_encoding. [ruby-dev:40721]

  Modified files:
    trunk/ChangeLog
    trunk/lib/uri/common.rb
    trunk/test/uri/test_common.rb

Index: ChangeLog
===================================================================
--- ChangeLog	(revision 27000)
+++ ChangeLog	(revision 27001)
@@ -1,3 +1,16 @@
+Sun Mar 21 00:46:29 2010  NARUSE, Yui  <naruse@r...>
+
+	* lib/uri/common.rb (URI.encode_www_form_component):
+	  convert strings of HTML5 ASCII incompatible encoding
+	  to UTF-8.
+
+	* lib/uri/common.rb (URI.encode_www_form_component):
+	  "\x83\x41" of Shift_JIS should be encoded as "%83A".
+	  This follows real implementations.
+
+	* lib/uri/common.rb (URI.decode_www_form_component):
+	  use given encoding for force_encoding. [ruby-dev:40721]
+
 Sun Mar 21 21:09:17 2010  Tanaka Akira  <akr@f...>
 
 	* lib/resolv-replace.rb: specify super class for rdoc.
Index: lib/uri/common.rb
===================================================================
--- lib/uri/common.rb	(revision 27000)
+++ lib/uri/common.rb	(revision 27001)
@@ -722,6 +722,10 @@
   # :nodoc:
   TBLDECWWWCOMP_ = {}
 
+  # :nodoc:
+  HTML5ASCIIINCOMPAT = [Encoding::UTF_7, Encoding::UTF_16BE, Encoding::UTF_16LE,
+    Encoding::UTF_32BE, Encoding::UTF_32LE]
+
   # Encode given +str+ to URL-encoded form data.
   #
   # This doesn't convert *, -, ., 0-9, A-Z, _, a-z,
@@ -733,35 +737,19 @@
   def self.encode_www_form_component(str)
     if TBLENCWWWCOMP_.empty?
       256.times do |i|
-        case i
-        when 0x20
-          TBLENCWWWCOMP_[' '] = '+'
-        # when 0x2A, 0x2D, 0x2E, 0x30..0x39, 0x41..0x5A, 0x5F, 0x61..0x7A
-        else
-          TBLENCWWWCOMP_[i.chr] = '%%%02X' % i
-        end
+        TBLENCWWWCOMP_[i.chr] = '%%%02X' % i
       end
+      TBLENCWWWCOMP_[' '] = '+'
       TBLENCWWWCOMP_.freeze
     end
     str = str.to_s
-    case str.encoding
-    when Encoding::ASCII_8BIT, Encoding::US_ASCII, Encoding::UTF_8
-      str = str.dup.force_encoding(Encoding::ASCII_8BIT)
-      str.gsub!(/[^*\-.0-9A-Z_a-z]/, TBLENCWWWCOMP_)
-    when Encoding::UTF_16BE, Encoding::UTF_16LE, Encoding::UTF_32BE, Encoding::UTF_32LE
-      reg = Regexp.new('[^*\-.0-9A-Z_a-z]+'.encode(str.encoding))
-      str = str.gsub(reg){
-        $&.force_encoding(Encoding::ASCII_8BIT).gsub(/./, TBLENCWWWCOMP_).
-        force_encoding(str.encoding)
-      }
+    if HTML5ASCIIINCOMPAT.include?(str.encoding)
+      str = str.encode(Encoding::UTF_8)
     else
-      if str.encoding.ascii_compatible?
-        str = str.gsub(/[^*\-.0-9A-Z_a-z]+/){
-          $&.force_encoding(Encoding::ASCII_8BIT).gsub(/./, TBLENCWWWCOMP_)}
-      else
-        str = str.force_encoding(Encoding::ASCII_8BIT).gsub(/./, TBLENCWWWCOMP_)
-      end
+      str = str.dup
     end
+    str.force_encoding(Encoding::ASCII_8BIT)
+    str.gsub!(/[^*\-.0-9A-Z_a-z]/, TBLENCWWWCOMP_)
     str.force_encoding(Encoding::US_ASCII)
   end
 
@@ -778,11 +766,11 @@
         TBLDECWWWCOMP_['%%%x%X' % [h, l]] = i.chr
         TBLDECWWWCOMP_['%%%X%x' % [h, l]] = i.chr
         TBLDECWWWCOMP_['%%%x%x' % [h, l]] = i.chr
-        TBLDECWWWCOMP_['+'] = ' ' if i == 0x20
       end
+      TBLDECWWWCOMP_['+'] = ' '
       TBLDECWWWCOMP_.freeze
     end
-    str.gsub(/\+|%\h\h/, TBLDECWWWCOMP_).force_encoding(Encoding::UTF_8)
+    str.gsub(/\+|%\h\h/, TBLDECWWWCOMP_).force_encoding(enc)
   end
 
   # Generate URL-encoded form data from given +enum+.
@@ -794,7 +782,8 @@
   #
   # This doesn't convert encodings of give items, so convert them before call
   # this method if you want to send data as other than original encoding or
-  # mixed encoding data.
+  # mixed encoding data. (strings which is encoded in HTML5 ASCII incompatible
+  # encoding is converted to UTF-8)
   #
   # This doesn't treat files. When you send a file, use multipart/form-data.
   #
Index: test/uri/test_common.rb
===================================================================
--- test/uri/test_common.rb	(revision 27000)
+++ test/uri/test_common.rb	(revision 27001)
@@ -54,12 +54,12 @@
     assert_equal("%00+%21%22%23%24%25%26%27%28%29*%2B%2C-.%2F09%3A%3B%3C%3D%3E%3F%40" \
                  "AZ%5B%5C%5D%5E_%60az%7B%7C%7D%7E",
                  URI.encode_www_form_component("\x00 !\"\#$%&'()*+,-./09:;<=>?@AZ[\\]^_`az{|}~"))
-    assert_equal("%95%41", URI.encode_www_form_component(
+    assert_equal("%95A", URI.encode_www_form_component(
                    "\x95\x41".force_encoding(Encoding::Shift_JIS)))
-    assert_equal("%30%42", URI.encode_www_form_component(
+    assert_equal("%E3%81%82", URI.encode_www_form_component(
                    "\x30\x42".force_encoding(Encoding::UTF_16BE)))
-    assert_equal("%30%42", URI.encode_www_form_component(
-                   "\x30\x42".force_encoding(Encoding::ISO_2022_JP)))
+    assert_equal("%1B%24B%24%22%1B%28B", URI.encode_www_form_component(
+                   "\e$B$\"\e(B".force_encoding(Encoding::ISO_2022_JP)))
   end
 
   def test_decode_www_form_component
@@ -67,6 +67,8 @@
                  URI.decode_www_form_component(
                    "%20+%21%22%23%24%25%26%27%28%29*%2B%2C-.%2F09%3A%3B%3C%3D%3E%3F%40" \
                    "AZ%5B%5C%5D%5E_%60az%7B%7C%7D%7E"))
+    assert_equal("\xA1\xA2".force_encoding(Encoding::EUC_JP),
+                 URI.decode_www_form_component("%A1%A2", "EUC-JP"))
   end
 
   def test_encode_www_form

--
ML: ruby-changes@q...
Info: http://www.atdot.net/~ko1/quickml/

[前][次][番号順一覧][スレッド一覧]