ruby-changes:18333
From: nobu <ko1@a...>
Date: Sat, 25 Dec 2010 17:33:03 +0900 (JST)
Subject: [ruby-changes:18333] Ruby:r30356 (trunk): * lib/csv.rb (CSV#init_separators): use IO#gets with length
nobu 2010-12-25 17:32:27 +0900 (Sat, 25 Dec 2010) New Revision: 30356 http://svn.ruby-lang.org/cgi-bin/viewvc.cgi?view=rev&revision=30356 Log: * lib/csv.rb (CSV#init_separators): use IO#gets with length parameter to get rid of wrong convertion. Modified files: trunk/ChangeLog trunk/lib/csv.rb trunk/test/csv/test_encodings.rb Index: ChangeLog =================================================================== --- ChangeLog (revision 30355) +++ ChangeLog (revision 30356) @@ -1,5 +1,8 @@ -Sat Dec 25 16:04:34 2010 Nobuyoshi Nakada <nobu@r...> +Sat Dec 25 17:32:24 2010 Nobuyoshi Nakada <nobu@r...> + * lib/csv.rb (CSV#init_separators): use IO#gets with length + parameter to get rid of wrong convertion. + * lib/csv.rb (CSV::foreach, CSV#initialize): directly use encoding * lib/csv.rb, test/csv: should not assume $, invariant. Index: lib/csv.rb =================================================================== --- lib/csv.rb (revision 30355) +++ lib/csv.rb (revision 30356) @@ -1573,10 +1573,7 @@ # if we can transcode the needed characters # @re_esc = "\\".encode(@encoding) rescue "" - @re_chars = %w[ \\ . [ ] - ^ $ ? - * + { } ( ) | # - \ \r \n \t \f \v ]. - map { |s| s.encode(@encoding) rescue nil }.compact + @re_chars = /#{%"[-][\\.^$?*+{}()|# \r\n\t\f\v]".encode(@encoding, fallback: proc{""})}/ init_separators(options) init_parsers(options) @@ -2025,15 +2022,13 @@ # if we run out of data, it's probably a single line # (use a sensible default) # - if @io.eof? + unless sample = @io.gets(nil, 1024) @row_sep = $INPUT_RECORD_SEPARATOR break end # read ahead a bit - sample = read_to_char(1024) - sample += read_to_char(1) if sample[-1..-1] == encode_str("\r") and - not @io.eof? + sample << (@io.gets(nil, 1) || "") if sample.end_with?(encode_str("\r")) # try to find a standard separator if sample =~ encode_re("\r\n?|\n") @row_sep = $& @@ -2267,7 +2262,7 @@ # a backslash cannot be transcoded. # def escape_re(str) - str.chars.map { |c| @re_chars.include?(c) ? @re_esc + c : c }.join('') + str.gsub(@re_chars) {|c| @re_esc + c} end # @@ -2286,31 +2281,6 @@ chunks.map { |chunk| chunk.encode(@encoding.name) }.join('') end - # - # Reads at least +bytes+ from <tt>@io</tt>, but will read up 10 bytes ahead if - # needed to ensure the data read is valid in the ecoding of that data. This - # should ensure that it is safe to use regular expressions on the read data, - # unless it is actually a broken encoding. The read data will be returned in - # <tt>@encoding</tt>. - # - def read_to_char(bytes) - return "" if @io.eof? - data = read_io(bytes) - begin - raise unless data.valid_encoding? - encoded = encode_str(data) - raise unless encoded.valid_encoding? - return encoded - rescue # encoding error or my invalid data raise - if @io.eof? or data.size >= bytes + 10 - return data - else - data += read_io(1) - retry - end - end - end - private def raw_encoding @@ -2324,10 +2294,6 @@ Encoding::ASCII_8BIT end end - - def read_io(bytes) - @io.read(bytes).force_encoding(raw_encoding) - end end # Another name for CSV::instance(). Index: test/csv/test_encodings.rb =================================================================== --- test/csv/test_encodings.rb (revision 30355) +++ test/csv/test_encodings.rb (revision 30356) @@ -238,12 +238,28 @@ def assert_parses(fields, encoding, options = { }) encoding = Encoding.find(encoding) unless encoding.is_a? Encoding + orig_fields = fields fields = encode_ary(fields, encoding) - parsed = CSV.parse(ary_to_data(fields, options), options) + data = ary_to_data(fields, options) + parsed = CSV.parse(data, options) assert_equal(fields, parsed) parsed.flatten.each_with_index do |field, i| assert_equal(encoding, field.encoding, "Field[#{i + 1}] was transcoded.") end + File.open(@temp_csv_path, "wb") {|f| f.print(data)} + CSV.open(@temp_csv_path, "rb:#{encoding}", options) do |csv| + csv.each_with_index do |row, i| + assert_equal(fields[i], row) + end + end + begin + CSV.open(@temp_csv_path, "rb:#{encoding}:#{__ENCODING__}", options) do |csv| + csv.each_with_index do |row, i| + assert_equal(orig_fields[i], row) + end + end unless encoding == __ENCODING__ + rescue Encoding::ConverterNotFoundError + end end def encode_ary(ary, encoding) -- ML: ruby-changes@q... Info: http://www.atdot.net/~ko1/quickml/