ruby-changes:69366
From: Joakim <ko1@a...>
Date: Sun, 24 Oct 2021 05:57:54 +0900 (JST)
Subject: [ruby-changes:69366] 7f3dd601c8 (master): [ruby/csv] Changed line ending handling to consider the combination \r\n as a single entry when row is faulty (https://github.com/ruby/csv/pull/220)
https://git.ruby-lang.org/ruby.git/commit/?id=7f3dd601c8 From 7f3dd601c895354c041988251a0be05a8a423664 Mon Sep 17 00:00:00 2001 From: Joakim Antman <antmanj@g...> Date: Sun, 3 Oct 2021 22:10:48 +0300 Subject: [ruby/csv] Changed line ending handling to consider the combination \r\n as a single entry when row is faulty (https://github.com/ruby/csv/pull/220) https://github.com/ruby/csv/commit/29cef9ea9d --- lib/csv/parser.rb | 8 ++++---- test/csv/parse/test_invalid.rb | 13 +++++++++++++ 2 files changed, 17 insertions(+), 4 deletions(-) diff --git a/lib/csv/parser.rb b/lib/csv/parser.rb index 2fb3b0a46e..d0b02a6423 100644 --- a/lib/csv/parser.rb +++ b/lib/csv/parser.rb @@ -526,7 +526,7 @@ class CSV https://github.com/ruby/ruby/blob/trunk/lib/csv/parser.rb#L526 @cr = "\r".encode(@encoding) @lf = "\n".encode(@encoding) - @cr_or_lf = Regexp.new("[\r\n]".encode(@encoding)) + @line_end = Regexp.new("\r\n|\n|\r".encode(@encoding)) @not_line_end = Regexp.new("[^\r\n]+".encode(@encoding)) end @@ -914,7 +914,7 @@ class CSV https://github.com/ruby/ruby/blob/trunk/lib/csv/parser.rb#L914 message = "Any value after quoted field isn't allowed" raise MalformedCSVError.new(message, @lineno) elsif @unquoted_column_value and - (new_line = @scanner.scan(@cr_or_lf)) + (new_line = @scanner.scan(@line_end)) ignore_broken_line message = "Unquoted fields do not allow new line " + "<#{new_line.inspect}>" @@ -923,7 +923,7 @@ class CSV https://github.com/ruby/ruby/blob/trunk/lib/csv/parser.rb#L923 ignore_broken_line message = "Illegal quoting" raise MalformedCSVError.new(message, @lineno) - elsif (new_line = @scanner.scan(@cr_or_lf)) + elsif (new_line = @scanner.scan(@line_end)) ignore_broken_line message = "New line must be <#{@row_separator.inspect}> " + "not <#{new_line.inspect}>" @@ -1089,7 +1089,7 @@ class CSV https://github.com/ruby/ruby/blob/trunk/lib/csv/parser.rb#L1089 def ignore_broken_line @scanner.scan_all(@not_line_end) - @scanner.scan_all(@cr_or_lf) + @scanner.scan_all(@line_end) @lineno += 1 end diff --git a/test/csv/parse/test_invalid.rb b/test/csv/parse/test_invalid.rb index 9dfd081380..ddb59e2b9a 100644 --- a/test/csv/parse/test_invalid.rb +++ b/test/csv/parse/test_invalid.rb @@ -36,4 +36,17 @@ ggg,hhh,iii https://github.com/ruby/ruby/blob/trunk/test/csv/parse/test_invalid.rb#L36 csv.shift) assert_equal(true, csv.eof?) end + + def test_ignore_invalid_line_cr_lf + data = <<-CSV +"1","OK"\r +"2",""NOT" OK"\r +"3","OK"\r +CSV + csv = CSV.new(data) + + assert_equal(['1', 'OK'], csv.shift) + assert_raise(CSV::MalformedCSVError) { csv.shift } + assert_equal(['3', 'OK'], csv.shift) + end end -- cgit v1.2.1 -- ML: ruby-changes@q... Info: http://www.atdot.net/~ko1/quickml/