ruby-changes:70978
From: Seth <ko1@a...>
Date: Sat, 22 Jan 2022 10:01:06 +0900 (JST)
Subject: [ruby-changes:70978] c1a6ff046d (master): [ruby/psych] Add strict_integer option to parse numbers with commas as strings
https://git.ruby-lang.org/ruby.git/commit/?id=c1a6ff046d From c1a6ff046d4f27c972adf96f9a6724abc2f0647a Mon Sep 17 00:00:00 2001 From: Seth Boyles <sethboyles@g...> Date: Fri, 14 Jan 2022 19:58:20 +0000 Subject: [ruby/psych] Add strict_integer option to parse numbers with commas as strings Authored-by: Seth Boyles <sethboyles@g...> https://github.com/ruby/psych/commit/75bebb37b8 --- ext/psych/lib/psych.rb | 13 +++++++------ ext/psych/lib/psych/nodes/node.rb | 4 ++-- ext/psych/lib/psych/scalar_scanner.rb | 22 +++++++++++++++------- ext/psych/lib/psych/visitors/to_ruby.rb | 4 ++-- test/psych/test_numeric.rb | 11 +++++++++++ test/psych/test_scalar_scanner.rb | 25 +++++++++++++++++++++++++ 6 files changed, 62 insertions(+), 17 deletions(-) diff --git a/ext/psych/lib/psych.rb b/ext/psych/lib/psych.rb index 1a95408ca07..42d79efb832 100644 --- a/ext/psych/lib/psych.rb +++ b/ext/psych/lib/psych.rb @@ -268,10 +268,10 @@ module Psych https://github.com/ruby/ruby/blob/trunk/ext/psych/lib/psych.rb#L268 # YAML documents that are supplied via user input. Instead, please use the # load method or the safe_load method. # - def self.unsafe_load yaml, filename: nil, fallback: false, symbolize_names: false, freeze: false + def self.unsafe_load yaml, filename: nil, fallback: false, symbolize_names: false, freeze: false, strict_integer: false result = parse(yaml, filename: filename) return fallback unless result - result.to_ruby(symbolize_names: symbolize_names, freeze: freeze) + result.to_ruby(symbolize_names: symbolize_names, freeze: freeze, strict_integer: strict_integer) end ### @@ -319,13 +319,13 @@ module Psych https://github.com/ruby/ruby/blob/trunk/ext/psych/lib/psych.rb#L319 # Psych.safe_load("---\n foo: bar") # => {"foo"=>"bar"} # Psych.safe_load("---\n foo: bar", symbolize_names: true) # => {:foo=>"bar"} # - def self.safe_load yaml, permitted_classes: [], permitted_symbols: [], aliases: false, filename: nil, fallback: nil, symbolize_names: false, freeze: false + def self.safe_load yaml, permitted_classes: [], permitted_symbols: [], aliases: false, filename: nil, fallback: nil, symbolize_names: false, freeze: false, strict_integer: false result = parse(yaml, filename: filename) return fallback unless result class_loader = ClassLoader::Restricted.new(permitted_classes.map(&:to_s), permitted_symbols.map(&:to_s)) - scanner = ScalarScanner.new class_loader + scanner = ScalarScanner.new class_loader, strict_integer: strict_integer visitor = if aliases Visitors::ToRuby.new scanner, class_loader, symbolize_names: symbolize_names, freeze: freeze else @@ -365,14 +365,15 @@ module Psych https://github.com/ruby/ruby/blob/trunk/ext/psych/lib/psych.rb#L365 # Raises a TypeError when `yaml` parameter is NilClass. This method is # similar to `safe_load` except that `Symbol` objects are allowed by default. # - def self.load yaml, permitted_classes: [Symbol], permitted_symbols: [], aliases: false, filename: nil, fallback: nil, symbolize_names: false, freeze: false + def self.load yaml, permitted_classes: [Symbol], permitted_symbols: [], aliases: false, filename: nil, fallback: nil, symbolize_names: false, freeze: false, strict_integer: false safe_load yaml, permitted_classes: permitted_classes, permitted_symbols: permitted_symbols, aliases: aliases, filename: filename, fallback: fallback, symbolize_names: symbolize_names, - freeze: freeze + freeze: freeze, + strict_integer: strict_integer end ### diff --git a/ext/psych/lib/psych/nodes/node.rb b/ext/psych/lib/psych/nodes/node.rb index 1f841625ca4..f44fce5f053 100644 --- a/ext/psych/lib/psych/nodes/node.rb +++ b/ext/psych/lib/psych/nodes/node.rb @@ -46,8 +46,8 @@ module Psych https://github.com/ruby/ruby/blob/trunk/ext/psych/lib/psych/nodes/node.rb#L46 # Convert this node to Ruby. # # See also Psych::Visitors::ToRuby - def to_ruby(symbolize_names: false, freeze: false) - Visitors::ToRuby.create(symbolize_names: symbolize_names, freeze: freeze).accept(self) + def to_ruby(symbolize_names: false, freeze: false, strict_integer: false) + Visitors::ToRuby.create(symbolize_names: symbolize_names, freeze: freeze, strict_integer: strict_integer).accept(self) end alias :transform :to_ruby diff --git a/ext/psych/lib/psych/scalar_scanner.rb b/ext/psych/lib/psych/scalar_scanner.rb index 604cb3e3998..58deea3baa5 100644 --- a/ext/psych/lib/psych/scalar_scanner.rb +++ b/ext/psych/lib/psych/scalar_scanner.rb @@ -12,24 +12,32 @@ module Psych https://github.com/ruby/ruby/blob/trunk/ext/psych/lib/psych/scalar_scanner.rb#L12 FLOAT = /^(?:[-+]?([0-9][0-9_,]*)?\.[0-9]*([eE][-+][0-9]+)?(?# base 10))$/x # Taken from http://yaml.org/type/int.html - INTEGER = /^(?:[-+]?0b[0-1_,]+ (?# base 2) - |[-+]?0[0-7_,]+ (?# base 8) - |[-+]?(?:0|[1-9](?:[0-9]|,[0-9]|_[0-9])*) (?# base 10) - |[-+]?0x[0-9a-fA-F_,]+ (?# base 16))$/x + INTEGER_STRICT = /^(?:[-+]?0b[0-1_]+ (?# base 2) + |[-+]?0[0-7_]+ (?# base 8) + |[-+]?(0|[1-9][0-9_]*) (?# base 10) + |[-+]?0x[0-9a-fA-F_]+ (?# base 16))$/x + + # Same as above, but allows commas. + # Not to YML spec, but kept for backwards compatibility + INTEGER_LEGACY = /^(?:[-+]?0b[0-1_,]+ (?# base 2) + |[-+]?0[0-7_,]+ (?# base 8) + |[-+]?(?:0|[1-9](?:[0-9]|,[0-9]|_[0-9])*) (?# base 10) + |[-+]?0x[0-9a-fA-F_,]+ (?# base 16))$/x attr_reader :class_loader # Create a new scanner - def initialize class_loader + def initialize class_loader, strict_integer: false @symbol_cache = {} @class_loader = class_loader + @strict_integer = strict_integer end # Tokenize +string+ returning the Ruby object def tokenize string return nil if string.empty? return @symbol_cache[string] if @symbol_cache.key?(string) - + integer_regex = @strict_integer ? INTEGER_STRICT : INTEGER_LEGACY # Check for a String type, being careful not to get caught by hash keys, hex values, and # special floats (e.g., -.inf). if string.match?(%r{^[^\d.:-]?[[:alpha:]_\s!@#$%\^&*(){}<>|/\\~;=]+}) || string.match?(/\n/) @@ -89,7 +97,7 @@ module Psych https://github.com/ruby/ruby/blob/trunk/ext/psych/lib/psych/scalar_scanner.rb#L97 else Float(string.gsub(/[,_]|\.([Ee]|$)/, '\1')) end - elsif string.match?(INTEGER) + elsif string.match?(integer_regex) parse_int string else string diff --git a/ext/psych/lib/psych/visitors/to_ruby.rb b/ext/psych/lib/psych/visitors/to_ruby.rb index 4c1f5610703..935bc74f21c 100644 --- a/ext/psych/lib/psych/visitors/to_ruby.rb +++ b/ext/psych/lib/psych/visitors/to_ruby.rb @@ -12,9 +12,9 @@ module Psych https://github.com/ruby/ruby/blob/trunk/ext/psych/lib/psych/visitors/to_ruby.rb#L12 ### # This class walks a YAML AST, converting each node to Ruby class ToRuby < Psych::Visitors::Visitor - def self.create(symbolize_names: false, freeze: false) + def self.create(symbolize_names: false, freeze: false, strict_integer: false) class_loader = ClassLoader.new - scanner = ScalarScanner.new class_loader + scanner = ScalarScanner.new class_loader, strict_integer: strict_integer new(scanner, class_loader, symbolize_names: symbolize_names, freeze: freeze) end diff --git a/test/psych/test_numeric.rb b/test/psych/test_numeric.rb index 8c3dcd173c1..9c75c016cd4 100644 --- a/test/psych/test_numeric.rb +++ b/test/psych/test_numeric.rb @@ -43,5 +43,16 @@ module Psych https://github.com/ruby/ruby/blob/trunk/test/psych/test_numeric.rb#L43 str = Psych.load('--- 1.1.1') assert_equal '1.1.1', str end + + # This behavior is not to YML spec, but is kept for backwards compatibility + def test_string_with_commas + number = Psych.load('--- 12,34,56') + assert_equal 123456, number + end + + def test_string_with_commas_with_strict_integer + str = Psych.load('--- 12,34,56', strict_integer: true) + assert_equal '12,34,56', str + end end end diff --git a/test/psych/test_scalar_scanner.rb b/test/psych/test_scalar_scanner.rb index ebc9fbdcd2e..145db58fd95 100644 --- a/test/psych/test_scalar_scanner.rb +++ b/test/psych/test_scalar_scanner.rb @@ -149,6 +149,31 @@ module Psych https://github.com/ruby/ruby/blob/trunk/test/psych/test_scalar_scanner.rb#L149 assert_equal 0x123456789abcdef, ss.tokenize('0x12_,34,_56,_789abcdef__') end + def test_scan_strict_int_commas_and_underscores + # this test is to ensure adherance to YML spec using the 'strict_integer' option + scanner = Psych::ScalarScanner.new ClassLoader.new, strict_integer: true + assert_equal 123_456_789, scanner.tokenize('123_456_789') + assert_equal '123,456,789', scanner.tokenize('123,456,789') + assert_equal '1_2,3,4_5,6_789', scanner.tokenize('1_2,3,4_5,6_789') + + assert_equal 1, scanner.tokenize('1') + assert_equal 1, scanner.tokenize('+1') + assert_equal(-1, scanner.tokenize('-1')) + + assert_equal 0b010101010, scanner.tokenize('0b010101010') + assert_equal 0b010101010, scanner.tokenize('0b01_01_01_010') + assert_equal '0b0,1_0,1_,0,1_01,0', scanner.tokenize('0b0,1_0,1_,0,1_01,0') + + assert (... truncated) -- ML: ruby-changes@q... Info: http://www.atdot.net/~ko1/quickml/