ruby-changes:18333

nobu	2010-12-25 17:32:27 +0900 (Sat, 25 Dec 2010)

  New Revision: 30356

  http://svn.ruby-lang.org/cgi-bin/viewvc.cgi?view=rev&revision=30356

  Log:
    * lib/csv.rb (CSV#init_separators): use IO#gets with length
      parameter to get rid of wrong convertion.

  Modified files:
    trunk/ChangeLog
    trunk/lib/csv.rb
    trunk/test/csv/test_encodings.rb

Index: ChangeLog
===================================================================
--- ChangeLog	(revision 30355)
+++ ChangeLog	(revision 30356)
@@ -1,5 +1,8 @@
-Sat Dec 25 16:04:34 2010  Nobuyoshi Nakada  <nobu@r...>
+Sat Dec 25 17:32:24 2010  Nobuyoshi Nakada  <nobu@r...>
 
+	* lib/csv.rb (CSV#init_separators): use IO#gets with length
+	  parameter to get rid of wrong convertion.
+
 	* lib/csv.rb (CSV::foreach, CSV#initialize): directly use encoding
 
 	* lib/csv.rb, test/csv: should not assume $, invariant.
Index: lib/csv.rb
===================================================================
--- lib/csv.rb	(revision 30355)
+++ lib/csv.rb	(revision 30356)
@@ -1573,10 +1573,7 @@
     # if we can transcode the needed characters
     #
     @re_esc   =   "\\".encode(@encoding) rescue ""
-    @re_chars =   %w[ \\ .  [  ]  -  ^  $  ?
-                      *  +  {  }  (  )  |  #
-                      \  \r \n \t \f \v ].
-                  map { |s| s.encode(@encoding) rescue nil }.compact
+    @re_chars =   /#{%"[-][\\.^$?*+{}()|# \r\n\t\f\v]".encode(@encoding, fallback: proc{""})}/
 
     init_separators(options)
     init_parsers(options)
@@ -2025,15 +2022,13 @@
             # if we run out of data, it's probably a single line
             # (use a sensible default)
             #
-            if @io.eof?
+            unless sample = @io.gets(nil, 1024)
               @row_sep = $INPUT_RECORD_SEPARATOR
               break
             end
 
             # read ahead a bit
-            sample =  read_to_char(1024)
-            sample += read_to_char(1) if sample[-1..-1] == encode_str("\r") and
-                                         not @io.eof?
+            sample << (@io.gets(nil, 1) || "") if sample.end_with?(encode_str("\r"))
             # try to find a standard separator
             if sample =~ encode_re("\r\n?|\n")
               @row_sep = $&
@@ -2267,7 +2262,7 @@
   # a backslash cannot be transcoded.
   #
   def escape_re(str)
-    str.chars.map { |c| @re_chars.include?(c) ? @re_esc + c : c }.join('')
+    str.gsub(@re_chars) {|c| @re_esc + c}
   end
 
   #
@@ -2286,31 +2281,6 @@
     chunks.map { |chunk| chunk.encode(@encoding.name) }.join('')
   end
 
-  #
-  # Reads at least +bytes+ from <tt>@io</tt>, but will read up 10 bytes ahead if
-  # needed to ensure the data read is valid in the ecoding of that data.  This
-  # should ensure that it is safe to use regular expressions on the read data,
-  # unless it is actually a broken encoding.  The read data will be returned in
-  # <tt>@encoding</tt>.
-  #
-  def read_to_char(bytes)
-    return "" if @io.eof?
-    data = read_io(bytes)
-    begin
-      raise unless data.valid_encoding?
-      encoded = encode_str(data)
-      raise unless encoded.valid_encoding?
-      return encoded
-    rescue  # encoding error or my invalid data raise
-      if @io.eof? or data.size >= bytes + 10
-        return data
-      else
-        data += read_io(1)
-        retry
-      end
-    end
-  end
-
   private
 
   def raw_encoding
@@ -2324,10 +2294,6 @@
       Encoding::ASCII_8BIT
     end
   end
-
-  def read_io(bytes)
-    @io.read(bytes).force_encoding(raw_encoding)
-  end
 end
 
 # Another name for CSV::instance().
Index: test/csv/test_encodings.rb
===================================================================
--- test/csv/test_encodings.rb	(revision 30355)
+++ test/csv/test_encodings.rb	(revision 30356)
@@ -238,12 +238,28 @@
   
   def assert_parses(fields, encoding, options = { })
     encoding = Encoding.find(encoding) unless encoding.is_a? Encoding
+    orig_fields = fields
     fields   = encode_ary(fields, encoding)
-    parsed   = CSV.parse(ary_to_data(fields, options), options)
+    data = ary_to_data(fields, options)
+    parsed   = CSV.parse(data, options)
     assert_equal(fields, parsed)
     parsed.flatten.each_with_index do |field, i|
       assert_equal(encoding, field.encoding, "Field[#{i + 1}] was transcoded.")
     end
+    File.open(@temp_csv_path, "wb") {|f| f.print(data)}
+    CSV.open(@temp_csv_path, "rb:#{encoding}", options) do |csv|
+      csv.each_with_index do |row, i|
+        assert_equal(fields[i], row)
+      end
+    end
+    begin
+      CSV.open(@temp_csv_path, "rb:#{encoding}:#{__ENCODING__}", options) do |csv|
+        csv.each_with_index do |row, i|
+          assert_equal(orig_fields[i], row)
+        end
+      end unless encoding == __ENCODING__
+    rescue Encoding::ConverterNotFoundError
+    end
   end
   
   def encode_ary(ary, encoding)

--
ML: ruby-changes@q...
Info: http://www.atdot.net/~ko1/quickml/