[前][次][番号順一覧][スレッド一覧]

ruby-changes:41329

From: jeg2 <ko1@a...>
Date: Fri, 1 Jan 2016 11:44:56 +0900 (JST)
Subject: [ruby-changes:41329] jeg2:r53401 (trunk): Adding a liberal_parsing option to CSV. Patch by Braden Anderson.

jeg2	2016-01-01 11:44:48 +0900 (Fri, 01 Jan 2016)

  New Revision: 53401

  http://svn.ruby-lang.org/cgi-bin/viewvc.cgi?view=revision&revision=53401

  Log:
    Adding a liberal_parsing option to CSV.  Patch by Braden Anderson.

  Modified files:
    trunk/ChangeLog
    trunk/lib/csv.rb
    trunk/test/csv/test_features.rb
Index: ChangeLog
===================================================================
--- ChangeLog	(revision 53400)
+++ ChangeLog	(revision 53401)
@@ -1,3 +1,9 @@ https://github.com/ruby/ruby/blob/trunk/ChangeLog#L1
+Fri Jan  1 11:42:57 2016  James Edward Gray II  <james@g...>
+
+	* lib/csv.rb (CSV): Add a liberal_parsing option.
+	  Patch by Braden Anderson. [#11839]
+	* test/csv/test_features.rb:  test liberal_parsing
+
 Fri Jan  1 10:27:28 2016  Nobuyoshi Nakada  <nobu@r...>
 
 	* tool/mkconfig.rb (RbConfig): prefix SDKROOT to oldincludedir
Index: lib/csv.rb
===================================================================
--- lib/csv.rb	(revision 53400)
+++ lib/csv.rb	(revision 53401)
@@ -1019,6 +1019,7 @@ class CSV https://github.com/ruby/ruby/blob/trunk/lib/csv.rb#L1019
   # <b><tt>:skip_blanks</tt></b>::        +false+
   # <b><tt>:force_quotes</tt></b>::       +false+
   # <b><tt>:skip_lines</tt></b>::         +nil+
+  # <b><tt>:liberal_parsing</tt></b>::    +false+
   #
   DEFAULT_OPTIONS = {
     col_sep:            ",",
@@ -1033,6 +1034,7 @@ class CSV https://github.com/ruby/ruby/blob/trunk/lib/csv.rb#L1034
     skip_blanks:        false,
     force_quotes:       false,
     skip_lines:         nil,
+    liberal_parsing:    false,
   }.freeze
 
   #
@@ -1499,6 +1501,10 @@ class CSV https://github.com/ruby/ruby/blob/trunk/lib/csv.rb#L1501
   #                                       a comment. If the passed object does
   #                                       not respond to <tt>match</tt>,
   #                                       <tt>ArgumentError</tt> is thrown.
+  # <b><tt>:liberal_parsing</tt></b>::    When set to a +true+ value, CSV will
+  #                                       attempt to parse input not conformant
+  #                                       with RFC 4180, such as double quotes
+  #                                       in unquoted fields.
   #
   # See CSV::DEFAULT_OPTIONS for the default settings.
   #
@@ -1622,6 +1628,8 @@ class CSV https://github.com/ruby/ruby/blob/trunk/lib/csv.rb#L1628
   def skip_blanks?()        @skip_blanks        end
   # Returns +true+ if all output fields are quoted. See CSV::new for details.
   def force_quotes?()       @force_quotes       end
+  # Returns +true+ if illegal input is handled. See CSV::new for details.
+  def liberal_parsing?()    @liberal_parsing    end
 
   #
   # The Encoding CSV is parsing or writing in.  This will be the Encoding you
@@ -1860,12 +1868,12 @@ class CSV https://github.com/ruby/ruby/blob/trunk/lib/csv.rb#L1868
           end
         elsif part[0] == @quote_char
           # If we are starting a new quoted column
-          if part[-1] != @quote_char || part.count(@quote_char) % 2 != 0
+          if part.count(@quote_char) % 2 != 0
             # start an extended column
             csv             << part[1..-1]
             csv.last        << @col_sep
             in_extended_col =  true
-          else
+          elsif part[-1] == @quote_char
             # regular quoted column
             csv << part[1..-2]
             if csv.last =~ @parsers[:stray_quote]
@@ -1873,6 +1881,11 @@ class CSV https://github.com/ruby/ruby/blob/trunk/lib/csv.rb#L1881
                     "Missing or stray quote in line #{lineno + 1}"
             end
             csv.last.gsub!(@quote_char * 2, @quote_char)
+          elsif @liberal_parsing
+            csv << part
+          else
+            raise MalformedCSVError,
+                  "Missing or stray quote in line #{lineno + 1}"
           end
         elsif part =~ @parsers[:quote_or_nl]
           # Unquoted field with bad characters.
@@ -1880,7 +1893,11 @@ class CSV https://github.com/ruby/ruby/blob/trunk/lib/csv.rb#L1893
             raise MalformedCSVError, "Unquoted fields do not allow " +
                                      "\\r or \\n (line #{lineno + 1})."
           else
-            raise MalformedCSVError, "Illegal quoting in line #{lineno + 1}."
+            if @liberal_parsing
+              csv << part
+            else
+              raise MalformedCSVError, "Illegal quoting in line #{lineno + 1}."
+            end
           end
         else
           # Regular ole unquoted field.
@@ -1945,7 +1962,7 @@ class CSV https://github.com/ruby/ruby/blob/trunk/lib/csv.rb#L1962
     str << " encoding:" << @encoding.name
     # show other attributes
     %w[ lineno     col_sep     row_sep
-        quote_char skip_blanks ].each do |attr_name|
+        quote_char skip_blanks liberal_parsing ].each do |attr_name|
       if a = instance_variable_get("@#{attr_name}")
         str << " " << attr_name << ":" << a.inspect
       end
@@ -2079,6 +2096,7 @@ class CSV https://github.com/ruby/ruby/blob/trunk/lib/csv.rb#L2096
     # store the parser behaviors
     @skip_blanks      = options.delete(:skip_blanks)
     @field_size_limit = options.delete(:field_size_limit)
+    @liberal_parsing  = options.delete(:liberal_parsing)
 
     # prebuild Regexps for faster parsing
     esc_row_sep = escape_re(@row_sep)
Index: test/csv/test_features.rb
===================================================================
--- test/csv/test_features.rb	(revision 53400)
+++ test/csv/test_features.rb	(revision 53401)
@@ -142,6 +142,29 @@ class TestCSV::Features < TestCSV https://github.com/ruby/ruby/blob/trunk/test/csv/test_features.rb#L142
     assert_equal(3, count)
   end
 
+  def test_liberal_parsing
+    input = '"Johnson, Dwayne",Dwayne "The Rock" Johnson'
+    assert_raise(CSV::MalformedCSVError) do
+        CSV.parse_line(input)
+    end
+    assert_equal(["Johnson, Dwayne", 'Dwayne "The Rock" Johnson'],
+                 CSV.parse_line(input, liberal_parsing: true))
+
+    input = '"quoted" field'
+    assert_raise(CSV::MalformedCSVError) do
+        CSV.parse_line(input)
+    end
+    assert_equal(['"quoted" field'],
+                 CSV.parse_line(input, liberal_parsing: true))
+
+    assert_raise(CSV::MalformedCSVError) do
+      CSV.parse_line('is,this "three," or four,fields', liberal_parsing: true)
+    end
+
+    assert_equal(["is", 'this "three', ' or four"', "fields"],
+      CSV.parse_line('is,this "three, or four",fields', liberal_parsing: true))
+  end
+
   def test_csv_behavior_readers
     %w[ unconverted_fields return_headers write_headers
         skip_blanks        force_quotes ].each do |behavior|

--
ML: ruby-changes@q...
Info: http://www.atdot.net/~ko1/quickml/

[前][次][番号順一覧][スレッド一覧]