ruby-changes:41329
From: jeg2 <ko1@a...>
Date: Fri, 1 Jan 2016 11:44:56 +0900 (JST)
Subject: [ruby-changes:41329] jeg2:r53401 (trunk): Adding a liberal_parsing option to CSV. Patch by Braden Anderson.
jeg2 2016-01-01 11:44:48 +0900 (Fri, 01 Jan 2016) New Revision: 53401 http://svn.ruby-lang.org/cgi-bin/viewvc.cgi?view=revision&revision=53401 Log: Adding a liberal_parsing option to CSV. Patch by Braden Anderson. Modified files: trunk/ChangeLog trunk/lib/csv.rb trunk/test/csv/test_features.rb Index: ChangeLog =================================================================== --- ChangeLog (revision 53400) +++ ChangeLog (revision 53401) @@ -1,3 +1,9 @@ https://github.com/ruby/ruby/blob/trunk/ChangeLog#L1 +Fri Jan 1 11:42:57 2016 James Edward Gray II <james@g...> + + * lib/csv.rb (CSV): Add a liberal_parsing option. + Patch by Braden Anderson. [#11839] + * test/csv/test_features.rb: test liberal_parsing + Fri Jan 1 10:27:28 2016 Nobuyoshi Nakada <nobu@r...> * tool/mkconfig.rb (RbConfig): prefix SDKROOT to oldincludedir Index: lib/csv.rb =================================================================== --- lib/csv.rb (revision 53400) +++ lib/csv.rb (revision 53401) @@ -1019,6 +1019,7 @@ class CSV https://github.com/ruby/ruby/blob/trunk/lib/csv.rb#L1019 # <b><tt>:skip_blanks</tt></b>:: +false+ # <b><tt>:force_quotes</tt></b>:: +false+ # <b><tt>:skip_lines</tt></b>:: +nil+ + # <b><tt>:liberal_parsing</tt></b>:: +false+ # DEFAULT_OPTIONS = { col_sep: ",", @@ -1033,6 +1034,7 @@ class CSV https://github.com/ruby/ruby/blob/trunk/lib/csv.rb#L1034 skip_blanks: false, force_quotes: false, skip_lines: nil, + liberal_parsing: false, }.freeze # @@ -1499,6 +1501,10 @@ class CSV https://github.com/ruby/ruby/blob/trunk/lib/csv.rb#L1501 # a comment. If the passed object does # not respond to <tt>match</tt>, # <tt>ArgumentError</tt> is thrown. + # <b><tt>:liberal_parsing</tt></b>:: When set to a +true+ value, CSV will + # attempt to parse input not conformant + # with RFC 4180, such as double quotes + # in unquoted fields. # # See CSV::DEFAULT_OPTIONS for the default settings. # @@ -1622,6 +1628,8 @@ class CSV https://github.com/ruby/ruby/blob/trunk/lib/csv.rb#L1628 def skip_blanks?() @skip_blanks end # Returns +true+ if all output fields are quoted. See CSV::new for details. def force_quotes?() @force_quotes end + # Returns +true+ if illegal input is handled. See CSV::new for details. + def liberal_parsing?() @liberal_parsing end # # The Encoding CSV is parsing or writing in. This will be the Encoding you @@ -1860,12 +1868,12 @@ class CSV https://github.com/ruby/ruby/blob/trunk/lib/csv.rb#L1868 end elsif part[0] == @quote_char # If we are starting a new quoted column - if part[-1] != @quote_char || part.count(@quote_char) % 2 != 0 + if part.count(@quote_char) % 2 != 0 # start an extended column csv << part[1..-1] csv.last << @col_sep in_extended_col = true - else + elsif part[-1] == @quote_char # regular quoted column csv << part[1..-2] if csv.last =~ @parsers[:stray_quote] @@ -1873,6 +1881,11 @@ class CSV https://github.com/ruby/ruby/blob/trunk/lib/csv.rb#L1881 "Missing or stray quote in line #{lineno + 1}" end csv.last.gsub!(@quote_char * 2, @quote_char) + elsif @liberal_parsing + csv << part + else + raise MalformedCSVError, + "Missing or stray quote in line #{lineno + 1}" end elsif part =~ @parsers[:quote_or_nl] # Unquoted field with bad characters. @@ -1880,7 +1893,11 @@ class CSV https://github.com/ruby/ruby/blob/trunk/lib/csv.rb#L1893 raise MalformedCSVError, "Unquoted fields do not allow " + "\\r or \\n (line #{lineno + 1})." else - raise MalformedCSVError, "Illegal quoting in line #{lineno + 1}." + if @liberal_parsing + csv << part + else + raise MalformedCSVError, "Illegal quoting in line #{lineno + 1}." + end end else # Regular ole unquoted field. @@ -1945,7 +1962,7 @@ class CSV https://github.com/ruby/ruby/blob/trunk/lib/csv.rb#L1962 str << " encoding:" << @encoding.name # show other attributes %w[ lineno col_sep row_sep - quote_char skip_blanks ].each do |attr_name| + quote_char skip_blanks liberal_parsing ].each do |attr_name| if a = instance_variable_get("@#{attr_name}") str << " " << attr_name << ":" << a.inspect end @@ -2079,6 +2096,7 @@ class CSV https://github.com/ruby/ruby/blob/trunk/lib/csv.rb#L2096 # store the parser behaviors @skip_blanks = options.delete(:skip_blanks) @field_size_limit = options.delete(:field_size_limit) + @liberal_parsing = options.delete(:liberal_parsing) # prebuild Regexps for faster parsing esc_row_sep = escape_re(@row_sep) Index: test/csv/test_features.rb =================================================================== --- test/csv/test_features.rb (revision 53400) +++ test/csv/test_features.rb (revision 53401) @@ -142,6 +142,29 @@ class TestCSV::Features < TestCSV https://github.com/ruby/ruby/blob/trunk/test/csv/test_features.rb#L142 assert_equal(3, count) end + def test_liberal_parsing + input = '"Johnson, Dwayne",Dwayne "The Rock" Johnson' + assert_raise(CSV::MalformedCSVError) do + CSV.parse_line(input) + end + assert_equal(["Johnson, Dwayne", 'Dwayne "The Rock" Johnson'], + CSV.parse_line(input, liberal_parsing: true)) + + input = '"quoted" field' + assert_raise(CSV::MalformedCSVError) do + CSV.parse_line(input) + end + assert_equal(['"quoted" field'], + CSV.parse_line(input, liberal_parsing: true)) + + assert_raise(CSV::MalformedCSVError) do + CSV.parse_line('is,this "three," or four,fields', liberal_parsing: true) + end + + assert_equal(["is", 'this "three', ' or four"', "fields"], + CSV.parse_line('is,this "three, or four",fields', liberal_parsing: true)) + end + def test_csv_behavior_readers %w[ unconverted_fields return_headers write_headers skip_blanks force_quotes ].each do |behavior| -- ML: ruby-changes@q... Info: http://www.atdot.net/~ko1/quickml/