ruby-changes:62280
From: Sutou <ko1@a...>
Date: Mon, 20 Jul 2020 03:35:24 +0900 (JST)
Subject: [ruby-changes:62280] 814bfc8adc (master): [ruby/csv] Fix a parse bug when split character exists in middle of column value
https://git.ruby-lang.org/ruby.git/commit/?id=814bfc8adc From 814bfc8adc128ed050f2b60a423beb86e00fc6ec Mon Sep 17 00:00:00 2001 From: Sutou Kouhei <kou@c...> Date: Wed, 25 Dec 2019 06:59:43 +0900 Subject: [ruby/csv] Fix a parse bug when split character exists in middle of column value GitHub: fix #115 Reported by TOMITA Masahiro. Thanks!!! https://github.com/ruby/csv/commit/398b3564c5 diff --git a/lib/csv/parser.rb b/lib/csv/parser.rb index f30cfc6..924ca46 100644 --- a/lib/csv/parser.rb +++ b/lib/csv/parser.rb @@ -446,6 +446,7 @@ class CSV https://github.com/ruby/ruby/blob/trunk/lib/csv/parser.rb#L446 @strip = @options[:strip] @escaped_strip = nil @strip_value = nil + @rstrip_value = nil if @strip.is_a?(String) case @strip.length when 0 @@ -460,6 +461,8 @@ class CSV https://github.com/ruby/ruby/blob/trunk/lib/csv/parser.rb#L461 if @quote_character @strip_value = Regexp.new(@escaped_strip + "+".encode(@encoding)) + @rstrip_value = Regexp.new(@escaped_strip + + "+\\z".encode(@encoding)) end @need_robust_parsing = true elsif @strip @@ -467,6 +470,7 @@ class CSV https://github.com/ruby/ruby/blob/trunk/lib/csv/parser.rb#L470 @escaped_strip = strip_values.encode(@encoding) if @quote_character @strip_value = Regexp.new("[#{strip_values}]+".encode(@encoding)) + @rstrip_value = Regexp.new("[#{strip_values}]+\\z".encode(@encoding)) end @need_robust_parsing = true end @@ -561,9 +565,6 @@ class CSV https://github.com/ruby/ruby/blob/trunk/lib/csv/parser.rb#L565 unless @liberal_parsing no_unquoted_values << @escaped_quote_character end - if @escaped_strip - no_unquoted_values << @escaped_strip - end @unquoted_value = Regexp.new("[^".encode(@encoding) + no_unquoted_values + "]+".encode(@encoding)) @@ -939,6 +940,7 @@ class CSV https://github.com/ruby/ruby/blob/trunk/lib/csv/parser.rb#L940 if @liberal_parsing quoted_value = parse_quoted_column_value if quoted_value + @scanner.scan_all(@strip_value) if @strip_value unquoted_value = parse_unquoted_column_value if unquoted_value if @double_quote_outside_quote @@ -986,6 +988,9 @@ class CSV https://github.com/ruby/ruby/blob/trunk/lib/csv/parser.rb#L988 end end value.gsub!(@backslash_quote_character, @quote_character) if @backslash_quote + if @rstrip_value + value.gsub!(@rstrip_value, "") + end value end diff --git a/test/csv/parse/test_strip.rb b/test/csv/parse/test_strip.rb index 0255bb9..3564fcb 100644 --- a/test/csv/parse/test_strip.rb +++ b/test/csv/parse/test_strip.rb @@ -21,6 +21,11 @@ class TestCSVParseStrip < Test::Unit::TestCase https://github.com/ruby/ruby/blob/trunk/test/csv/parse/test_strip.rb#L21 CSV.parse_line(%Q{a ,b }, strip: true)) end + def test_middle + assert_equal(["a b"], + CSV.parse_line(%Q{a b}, strip: true)) + end + def test_quoted assert_equal([" a ", " b "], CSV.parse_line(%Q{" a "," b "}, strip: true)) -- cgit v0.10.2 -- ML: ruby-changes@q... Info: http://www.atdot.net/~ko1/quickml/