[前][次][番号順一覧][スレッド一覧]

ruby-changes:70507

From: Sutou <ko1@a...>
Date: Fri, 24 Dec 2021 14:41:17 +0900 (JST)
Subject: [ruby-changes:70507] 4a5d372ca8 (master): [ruby/csv] parser: fix a keep bug that some texts may be dropped unexpectedly

https://git.ruby-lang.org/ruby.git/commit/?id=4a5d372ca8

From 4a5d372ca8902a649928eb0689aca7edcfaa07b6 Mon Sep 17 00:00:00 2001
From: Sutou Kouhei <kou@c...>
Date: Fri, 24 Dec 2021 10:18:18 +0900
Subject: [ruby/csv] parser: fix a keep bug that some texts may be dropped
 unexpectedly

Ruby: [Bug #18245] [ruby-core:105587]

Reported by Hassan Abdul Rehman.

https://github.com/ruby/csv/commit/5c6523da0a
---
 lib/csv/parser.rb                     | 85 +++++++++++++++++++++--------------
 test/csv/parse/test_inputs_scanner.rb | 37 +++++++++++++++
 2 files changed, 88 insertions(+), 34 deletions(-)
 create mode 100644 test/csv/parse/test_inputs_scanner.rb

diff --git a/lib/csv/parser.rb b/lib/csv/parser.rb
index ef33a694781..e1fe559a41b 100644
--- a/lib/csv/parser.rb
+++ b/lib/csv/parser.rb
@@ -166,6 +166,7 @@ class CSV https://github.com/ruby/ruby/blob/trunk/lib/csv/parser.rb#L166
       end
 
       def keep_start
+        adjust_last_keep
         @keeps.push([@scanner.pos, nil])
       end
 
@@ -196,7 +197,17 @@ class CSV https://github.com/ruby/ruby/blob/trunk/lib/csv/parser.rb#L197
       end
 
       def keep_drop
-        @keeps.pop
+        _, buffer = @keeps.pop
+        return unless buffer
+
+        last_keep = @keeps.last
+        return unless last_keep
+
+        if last_keep[1]
+          last_keep[1] << buffer
+        else
+          last_keep[1] = buffer
+        end
       end
 
       def rest
@@ -204,24 +215,30 @@ class CSV https://github.com/ruby/ruby/blob/trunk/lib/csv/parser.rb#L215
       end
 
       private
+      def adjust_last_keep
+        keep = @keeps.last
+        return if keep.nil?
+
+        keep_start = keep[0]
+        return if @scanner.pos == keep_start
+
+        string = @scanner.string
+        keep_data = string.byteslice(keep_start, @scanner.pos - keep_start)
+        if keep_data
+          keep_buffer = keep[1]
+          if keep_buffer
+            keep_buffer << keep_data
+          else
+            keep[1] = keep_data.dup
+          end
+        end
+        keep[0] = 0
+      end
+
       def read_chunk
         return false if @last_scanner
 
-        unless @keeps.empty?
-          keep = @keeps.last
-          keep_start = keep[0]
-          string = @scanner.string
-          keep_data = string.byteslice(keep_start, @scanner.pos - keep_start)
-          if keep_data
-            keep_buffer = keep[1]
-            if keep_buffer
-              keep_buffer << keep_data
-            else
-              keep[1] = keep_data.dup
-            end
-          end
-          keep[0] = 0
-        end
+        adjust_last_keep
 
         input = @inputs.first
         case input
@@ -728,28 +745,26 @@ class CSV https://github.com/ruby/ruby/blob/trunk/lib/csv/parser.rb#L745
       sample[0, 128].index(@quote_character)
     end
 
-    SCANNER_TEST = (ENV["CSV_PARSER_SCANNER_TEST"] == "yes")
-    if SCANNER_TEST
-      class UnoptimizedStringIO
-        def initialize(string)
-          @io = StringIO.new(string, "rb:#{string.encoding}")
-        end
+    class UnoptimizedStringIO # :nodoc:
+      def initialize(string)
+        @io = StringIO.new(string, "rb:#{string.encoding}")
+      end
 
-        def gets(*args)
-          @io.gets(*args)
-        end
+      def gets(*args)
+        @io.gets(*args)
+      end
 
-        def each_line(*args, &block)
-          @io.each_line(*args, &block)
-        end
+      def each_line(*args, &block)
+        @io.each_line(*args, &block)
+      end
 
-        def eof?
-          @io.eof?
-        end
+      def eof?
+        @io.eof?
       end
+    end
 
-      SCANNER_TEST_CHUNK_SIZE =
-        Integer((ENV["CSV_PARSER_SCANNER_TEST_CHUNK_SIZE"] || "1"), 10)
+    SCANNER_TEST = (ENV["CSV_PARSER_SCANNER_TEST"] == "yes")
+    if SCANNER_TEST
       def build_scanner
         inputs = @samples.collect do |sample|
           UnoptimizedStringIO.new(sample)
@@ -759,9 +774,11 @@ class CSV https://github.com/ruby/ruby/blob/trunk/lib/csv/parser.rb#L774
         else
           inputs << @input
         end
+        chunk_size =
+          Integer((ENV["CSV_PARSER_SCANNER_TEST_CHUNK_SIZE"] || "1"), 10)
         InputsScanner.new(inputs,
                           @encoding,
-                          chunk_size: SCANNER_TEST_CHUNK_SIZE)
+                          chunk_size: chunk_size)
       end
     else
       def build_scanner
diff --git a/test/csv/parse/test_inputs_scanner.rb b/test/csv/parse/test_inputs_scanner.rb
new file mode 100644
index 00000000000..dd0a64cc455
--- /dev/null
+++ b/test/csv/parse/test_inputs_scanner.rb
@@ -0,0 +1,37 @@ https://github.com/ruby/ruby/blob/trunk/test/csv/parse/test_inputs_scanner.rb#L1
+require_relative "../helper"
+
+class TestCSVParseInputsScanner < Test::Unit::TestCase
+  include Helper
+
+  def test_keep_over_chunks_nested_back
+    input = CSV::Parser::UnoptimizedStringIO.new("abcdefghijklmnl")
+    scanner = CSV::Parser::InputsScanner.new([input],
+                                             Encoding::UTF_8,
+                                             nil,
+                                             chunk_size: 2)
+    scanner.keep_start
+    assert_equal("abc", scanner.scan_all(/[a-c]+/))
+    scanner.keep_start
+    assert_equal("def", scanner.scan_all(/[d-f]+/))
+    scanner.keep_back
+    scanner.keep_back
+    assert_equal("abcdefg", scanner.scan_all(/[a-g]+/))
+  end
+
+
+  def test_keep_over_chunks_nested_drop_back
+    input = CSV::Parser::UnoptimizedStringIO.new("abcdefghijklmnl")
+    scanner = CSV::Parser::InputsScanner.new([input],
+                                             Encoding::UTF_8,
+                                             nil,
+                                             chunk_size: 3)
+    scanner.keep_start
+    assert_equal("ab", scanner.scan(/../))
+    scanner.keep_start
+    assert_equal("c", scanner.scan(/./))
+    assert_equal("d", scanner.scan(/./))
+    scanner.keep_drop
+    scanner.keep_back
+    assert_equal("abcdefg", scanner.scan_all(/[a-g]+/))
+  end
+end
-- 
cgit v1.2.1


--
ML: ruby-changes@q...
Info: http://www.atdot.net/~ko1/quickml/

[前][次][番号順一覧][スレッド一覧]