ruby-changes:70516
From: Sutou <ko1@a...>
Date: Fri, 24 Dec 2021 14:41:29 +0900 (JST)
Subject: [ruby-changes:70516] 22ef4f6445 (master): [ruby/csv] Revert "parser: fix a keep bug that some texts may be dropped unexpectedly"
https://git.ruby-lang.org/ruby.git/commit/?id=22ef4f6445 From 22ef4f6445376b992b2725124594dad1c77a185e Mon Sep 17 00:00:00 2001 From: Sutou Kouhei <kou@c...> Date: Fri, 24 Dec 2021 10:44:11 +0900 Subject: [ruby/csv] Revert "parser: fix a keep bug that some texts may be dropped unexpectedly" This reverts commit https://github.com/ruby/csv/commit/5c6523da0a61. This introduces another pbolem. We should try again later. https://github.com/ruby/csv/commit/43a1d6fff1 --- lib/csv/parser.rb | 85 ++++++++++++++--------------------- test/csv/parse/test_inputs_scanner.rb | 37 --------------- 2 files changed, 34 insertions(+), 88 deletions(-) delete mode 100644 test/csv/parse/test_inputs_scanner.rb diff --git a/lib/csv/parser.rb b/lib/csv/parser.rb index 78ff363d692..7e943acf214 100644 --- a/lib/csv/parser.rb +++ b/lib/csv/parser.rb @@ -167,7 +167,6 @@ class CSV https://github.com/ruby/ruby/blob/trunk/lib/csv/parser.rb#L167 end def keep_start - adjust_last_keep @keeps.push([@scanner.pos, nil]) end @@ -198,17 +197,7 @@ class CSV https://github.com/ruby/ruby/blob/trunk/lib/csv/parser.rb#L197 end def keep_drop - _, buffer = @keeps.pop - return unless buffer - - last_keep = @keeps.last - return unless last_keep - - if last_keep[1] - last_keep[1] << buffer - else - last_keep[1] = buffer - end + @keeps.pop end def rest @@ -216,30 +205,24 @@ class CSV https://github.com/ruby/ruby/blob/trunk/lib/csv/parser.rb#L205 end private - def adjust_last_keep - keep = @keeps.last - return if keep.nil? - - keep_start = keep[0] - return if @scanner.pos == keep_start - - string = @scanner.string - keep_data = string.byteslice(keep_start, @scanner.pos - keep_start) - if keep_data - keep_buffer = keep[1] - if keep_buffer - keep_buffer << keep_data - else - keep[1] = keep_data.dup - end - end - keep[0] = 0 - end - def read_chunk return false if @last_scanner - adjust_last_keep + unless @keeps.empty? + keep = @keeps.last + keep_start = keep[0] + string = @scanner.string + keep_data = string.byteslice(keep_start, @scanner.pos - keep_start) + if keep_data + keep_buffer = keep[1] + if keep_buffer + keep_buffer << keep_data + else + keep[1] = keep_data.dup + end + end + keep[0] = 0 + end input = @inputs.first case input @@ -746,26 +729,28 @@ class CSV https://github.com/ruby/ruby/blob/trunk/lib/csv/parser.rb#L729 sample[0, 128].index(@quote_character) end - class UnoptimizedStringIO # :nodoc: - def initialize(string) - @io = StringIO.new(string, "rb:#{string.encoding}") - end + SCANNER_TEST = (ENV["CSV_PARSER_SCANNER_TEST"] == "yes") + if SCANNER_TEST + class UnoptimizedStringIO + def initialize(string) + @io = StringIO.new(string, "rb:#{string.encoding}") + end - def gets(*args) - @io.gets(*args) - end + def gets(*args) + @io.gets(*args) + end - def each_line(*args, &block) - @io.each_line(*args, &block) - end + def each_line(*args, &block) + @io.each_line(*args, &block) + end - def eof? - @io.eof? + def eof? + @io.eof? + end end - end - SCANNER_TEST = (ENV["CSV_PARSER_SCANNER_TEST"] == "yes") - if SCANNER_TEST + SCANNER_TEST_CHUNK_SIZE = + Integer((ENV["CSV_PARSER_SCANNER_TEST_CHUNK_SIZE"] || "1"), 10) def build_scanner inputs = @samples.collect do |sample| UnoptimizedStringIO.new(sample) @@ -775,12 +760,10 @@ class CSV https://github.com/ruby/ruby/blob/trunk/lib/csv/parser.rb#L760 else inputs << @input end - chunk_size = - Integer((ENV["CSV_PARSER_SCANNER_TEST_CHUNK_SIZE"] || "1"), 10) InputsScanner.new(inputs, @encoding, @row_separator, - chunk_size: chunk_size) + chunk_size: SCANNER_TEST_CHUNK_SIZE) end else def build_scanner diff --git a/test/csv/parse/test_inputs_scanner.rb b/test/csv/parse/test_inputs_scanner.rb deleted file mode 100644 index dd0a64cc455..00000000000 --- a/test/csv/parse/test_inputs_scanner.rb +++ /dev/null @@ -1,37 +0,0 @@ https://github.com/ruby/ruby/blob/trunk/lib/csv/parser.rb#L0 -require_relative "../helper" - -class TestCSVParseInputsScanner < Test::Unit::TestCase - include Helper - - def test_keep_over_chunks_nested_back - input = CSV::Parser::UnoptimizedStringIO.new("abcdefghijklmnl") - scanner = CSV::Parser::InputsScanner.new([input], - Encoding::UTF_8, - nil, - chunk_size: 2) - scanner.keep_start - assert_equal("abc", scanner.scan_all(/[a-c]+/)) - scanner.keep_start - assert_equal("def", scanner.scan_all(/[d-f]+/)) - scanner.keep_back - scanner.keep_back - assert_equal("abcdefg", scanner.scan_all(/[a-g]+/)) - end - - - def test_keep_over_chunks_nested_drop_back - input = CSV::Parser::UnoptimizedStringIO.new("abcdefghijklmnl") - scanner = CSV::Parser::InputsScanner.new([input], - Encoding::UTF_8, - nil, - chunk_size: 3) - scanner.keep_start - assert_equal("ab", scanner.scan(/../)) - scanner.keep_start - assert_equal("c", scanner.scan(/./)) - assert_equal("d", scanner.scan(/./)) - scanner.keep_drop - scanner.keep_back - assert_equal("abcdefg", scanner.scan_all(/[a-g]+/)) - end -end -- cgit v1.2.1 -- ML: ruby-changes@q... Info: http://www.atdot.net/~ko1/quickml/