[前][次][番号順一覧][スレッド一覧]

ruby-changes:70516

From: Sutou <ko1@a...>
Date: Fri, 24 Dec 2021 14:41:29 +0900 (JST)
Subject: [ruby-changes:70516] 22ef4f6445 (master): [ruby/csv] Revert "parser: fix a keep bug that some texts may be dropped unexpectedly"

https://git.ruby-lang.org/ruby.git/commit/?id=22ef4f6445

From 22ef4f6445376b992b2725124594dad1c77a185e Mon Sep 17 00:00:00 2001
From: Sutou Kouhei <kou@c...>
Date: Fri, 24 Dec 2021 10:44:11 +0900
Subject: [ruby/csv] Revert "parser: fix a keep bug that some texts may be
 dropped unexpectedly"

This reverts commit https://github.com/ruby/csv/commit/5c6523da0a61.

This introduces another pbolem. We should try again later.

https://github.com/ruby/csv/commit/43a1d6fff1
---
 lib/csv/parser.rb                     | 85 ++++++++++++++---------------------
 test/csv/parse/test_inputs_scanner.rb | 37 ---------------
 2 files changed, 34 insertions(+), 88 deletions(-)
 delete mode 100644 test/csv/parse/test_inputs_scanner.rb

diff --git a/lib/csv/parser.rb b/lib/csv/parser.rb
index 78ff363d692..7e943acf214 100644
--- a/lib/csv/parser.rb
+++ b/lib/csv/parser.rb
@@ -167,7 +167,6 @@ class CSV https://github.com/ruby/ruby/blob/trunk/lib/csv/parser.rb#L167
       end
 
       def keep_start
-        adjust_last_keep
         @keeps.push([@scanner.pos, nil])
       end
 
@@ -198,17 +197,7 @@ class CSV https://github.com/ruby/ruby/blob/trunk/lib/csv/parser.rb#L197
       end
 
       def keep_drop
-        _, buffer = @keeps.pop
-        return unless buffer
-
-        last_keep = @keeps.last
-        return unless last_keep
-
-        if last_keep[1]
-          last_keep[1] << buffer
-        else
-          last_keep[1] = buffer
-        end
+        @keeps.pop
       end
 
       def rest
@@ -216,30 +205,24 @@ class CSV https://github.com/ruby/ruby/blob/trunk/lib/csv/parser.rb#L205
       end
 
       private
-      def adjust_last_keep
-        keep = @keeps.last
-        return if keep.nil?
-
-        keep_start = keep[0]
-        return if @scanner.pos == keep_start
-
-        string = @scanner.string
-        keep_data = string.byteslice(keep_start, @scanner.pos - keep_start)
-        if keep_data
-          keep_buffer = keep[1]
-          if keep_buffer
-            keep_buffer << keep_data
-          else
-            keep[1] = keep_data.dup
-          end
-        end
-        keep[0] = 0
-      end
-
       def read_chunk
         return false if @last_scanner
 
-        adjust_last_keep
+        unless @keeps.empty?
+          keep = @keeps.last
+          keep_start = keep[0]
+          string = @scanner.string
+          keep_data = string.byteslice(keep_start, @scanner.pos - keep_start)
+          if keep_data
+            keep_buffer = keep[1]
+            if keep_buffer
+              keep_buffer << keep_data
+            else
+              keep[1] = keep_data.dup
+            end
+          end
+          keep[0] = 0
+        end
 
         input = @inputs.first
         case input
@@ -746,26 +729,28 @@ class CSV https://github.com/ruby/ruby/blob/trunk/lib/csv/parser.rb#L729
       sample[0, 128].index(@quote_character)
     end
 
-    class UnoptimizedStringIO # :nodoc:
-      def initialize(string)
-        @io = StringIO.new(string, "rb:#{string.encoding}")
-      end
+    SCANNER_TEST = (ENV["CSV_PARSER_SCANNER_TEST"] == "yes")
+    if SCANNER_TEST
+      class UnoptimizedStringIO
+        def initialize(string)
+          @io = StringIO.new(string, "rb:#{string.encoding}")
+        end
 
-      def gets(*args)
-        @io.gets(*args)
-      end
+        def gets(*args)
+          @io.gets(*args)
+        end
 
-      def each_line(*args, &block)
-        @io.each_line(*args, &block)
-      end
+        def each_line(*args, &block)
+          @io.each_line(*args, &block)
+        end
 
-      def eof?
-        @io.eof?
+        def eof?
+          @io.eof?
+        end
       end
-    end
 
-    SCANNER_TEST = (ENV["CSV_PARSER_SCANNER_TEST"] == "yes")
-    if SCANNER_TEST
+      SCANNER_TEST_CHUNK_SIZE =
+        Integer((ENV["CSV_PARSER_SCANNER_TEST_CHUNK_SIZE"] || "1"), 10)
       def build_scanner
         inputs = @samples.collect do |sample|
           UnoptimizedStringIO.new(sample)
@@ -775,12 +760,10 @@ class CSV https://github.com/ruby/ruby/blob/trunk/lib/csv/parser.rb#L760
         else
           inputs << @input
         end
-        chunk_size =
-          Integer((ENV["CSV_PARSER_SCANNER_TEST_CHUNK_SIZE"] || "1"), 10)
         InputsScanner.new(inputs,
                           @encoding,
                           @row_separator,
-                          chunk_size: chunk_size)
+                          chunk_size: SCANNER_TEST_CHUNK_SIZE)
       end
     else
       def build_scanner
diff --git a/test/csv/parse/test_inputs_scanner.rb b/test/csv/parse/test_inputs_scanner.rb
deleted file mode 100644
index dd0a64cc455..00000000000
--- a/test/csv/parse/test_inputs_scanner.rb
+++ /dev/null
@@ -1,37 +0,0 @@ https://github.com/ruby/ruby/blob/trunk/lib/csv/parser.rb#L0
-require_relative "../helper"
-
-class TestCSVParseInputsScanner < Test::Unit::TestCase
-  include Helper
-
-  def test_keep_over_chunks_nested_back
-    input = CSV::Parser::UnoptimizedStringIO.new("abcdefghijklmnl")
-    scanner = CSV::Parser::InputsScanner.new([input],
-                                             Encoding::UTF_8,
-                                             nil,
-                                             chunk_size: 2)
-    scanner.keep_start
-    assert_equal("abc", scanner.scan_all(/[a-c]+/))
-    scanner.keep_start
-    assert_equal("def", scanner.scan_all(/[d-f]+/))
-    scanner.keep_back
-    scanner.keep_back
-    assert_equal("abcdefg", scanner.scan_all(/[a-g]+/))
-  end
-
-
-  def test_keep_over_chunks_nested_drop_back
-    input = CSV::Parser::UnoptimizedStringIO.new("abcdefghijklmnl")
-    scanner = CSV::Parser::InputsScanner.new([input],
-                                             Encoding::UTF_8,
-                                             nil,
-                                             chunk_size: 3)
-    scanner.keep_start
-    assert_equal("ab", scanner.scan(/../))
-    scanner.keep_start
-    assert_equal("c", scanner.scan(/./))
-    assert_equal("d", scanner.scan(/./))
-    scanner.keep_drop
-    scanner.keep_back
-    assert_equal("abcdefg", scanner.scan_all(/[a-g]+/))
-  end
-end
-- 
cgit v1.2.1


--
ML: ruby-changes@q...
Info: http://www.atdot.net/~ko1/quickml/

[前][次][番号順一覧][スレッド一覧]