[前][次][番号順一覧][スレッド一覧]

ruby-changes:70978

From: Seth <ko1@a...>
Date: Sat, 22 Jan 2022 10:01:06 +0900 (JST)
Subject: [ruby-changes:70978] c1a6ff046d (master): [ruby/psych] Add strict_integer option to parse numbers with commas as strings

https://git.ruby-lang.org/ruby.git/commit/?id=c1a6ff046d

From c1a6ff046d4f27c972adf96f9a6724abc2f0647a Mon Sep 17 00:00:00 2001
From: Seth Boyles <sethboyles@g...>
Date: Fri, 14 Jan 2022 19:58:20 +0000
Subject: [ruby/psych] Add strict_integer option to parse numbers with commas
 as strings

Authored-by: Seth Boyles <sethboyles@g...>

https://github.com/ruby/psych/commit/75bebb37b8
---
 ext/psych/lib/psych.rb                  | 13 +++++++------
 ext/psych/lib/psych/nodes/node.rb       |  4 ++--
 ext/psych/lib/psych/scalar_scanner.rb   | 22 +++++++++++++++-------
 ext/psych/lib/psych/visitors/to_ruby.rb |  4 ++--
 test/psych/test_numeric.rb              | 11 +++++++++++
 test/psych/test_scalar_scanner.rb       | 25 +++++++++++++++++++++++++
 6 files changed, 62 insertions(+), 17 deletions(-)

diff --git a/ext/psych/lib/psych.rb b/ext/psych/lib/psych.rb
index 1a95408ca07..42d79efb832 100644
--- a/ext/psych/lib/psych.rb
+++ b/ext/psych/lib/psych.rb
@@ -268,10 +268,10 @@ module Psych https://github.com/ruby/ruby/blob/trunk/ext/psych/lib/psych.rb#L268
   # YAML documents that are supplied via user input.  Instead, please use the
   # load method or the safe_load method.
   #
-  def self.unsafe_load yaml, filename: nil, fallback: false, symbolize_names: false, freeze: false
+  def self.unsafe_load yaml, filename: nil, fallback: false, symbolize_names: false, freeze: false, strict_integer: false
     result = parse(yaml, filename: filename)
     return fallback unless result
-    result.to_ruby(symbolize_names: symbolize_names, freeze: freeze)
+    result.to_ruby(symbolize_names: symbolize_names, freeze: freeze, strict_integer: strict_integer)
   end
 
   ###
@@ -319,13 +319,13 @@ module Psych https://github.com/ruby/ruby/blob/trunk/ext/psych/lib/psych.rb#L319
   #   Psych.safe_load("---\n foo: bar")                         # => {"foo"=>"bar"}
   #   Psych.safe_load("---\n foo: bar", symbolize_names: true)  # => {:foo=>"bar"}
   #
-  def self.safe_load yaml, permitted_classes: [], permitted_symbols: [], aliases: false, filename: nil, fallback: nil, symbolize_names: false, freeze: false
+  def self.safe_load yaml, permitted_classes: [], permitted_symbols: [], aliases: false, filename: nil, fallback: nil, symbolize_names: false, freeze: false, strict_integer: false
     result = parse(yaml, filename: filename)
     return fallback unless result
 
     class_loader = ClassLoader::Restricted.new(permitted_classes.map(&:to_s),
                                                permitted_symbols.map(&:to_s))
-    scanner      = ScalarScanner.new class_loader
+    scanner      = ScalarScanner.new class_loader, strict_integer: strict_integer
     visitor = if aliases
                 Visitors::ToRuby.new scanner, class_loader, symbolize_names: symbolize_names, freeze: freeze
               else
@@ -365,14 +365,15 @@ module Psych https://github.com/ruby/ruby/blob/trunk/ext/psych/lib/psych.rb#L365
   # Raises a TypeError when `yaml` parameter is NilClass.  This method is
   # similar to `safe_load` except that `Symbol` objects are allowed by default.
   #
-  def self.load yaml, permitted_classes: [Symbol], permitted_symbols: [], aliases: false, filename: nil, fallback: nil, symbolize_names: false, freeze: false
+  def self.load yaml, permitted_classes: [Symbol], permitted_symbols: [], aliases: false, filename: nil, fallback: nil, symbolize_names: false, freeze: false, strict_integer: false
     safe_load yaml, permitted_classes: permitted_classes,
                     permitted_symbols: permitted_symbols,
                     aliases: aliases,
                     filename: filename,
                     fallback: fallback,
                     symbolize_names: symbolize_names,
-                    freeze: freeze
+                    freeze: freeze,
+                    strict_integer: strict_integer
   end
 
   ###
diff --git a/ext/psych/lib/psych/nodes/node.rb b/ext/psych/lib/psych/nodes/node.rb
index 1f841625ca4..f44fce5f053 100644
--- a/ext/psych/lib/psych/nodes/node.rb
+++ b/ext/psych/lib/psych/nodes/node.rb
@@ -46,8 +46,8 @@ module Psych https://github.com/ruby/ruby/blob/trunk/ext/psych/lib/psych/nodes/node.rb#L46
       # Convert this node to Ruby.
       #
       # See also Psych::Visitors::ToRuby
-      def to_ruby(symbolize_names: false, freeze: false)
-        Visitors::ToRuby.create(symbolize_names: symbolize_names, freeze: freeze).accept(self)
+      def to_ruby(symbolize_names: false, freeze: false, strict_integer: false)
+        Visitors::ToRuby.create(symbolize_names: symbolize_names, freeze: freeze, strict_integer: strict_integer).accept(self)
       end
       alias :transform :to_ruby
 
diff --git a/ext/psych/lib/psych/scalar_scanner.rb b/ext/psych/lib/psych/scalar_scanner.rb
index 604cb3e3998..58deea3baa5 100644
--- a/ext/psych/lib/psych/scalar_scanner.rb
+++ b/ext/psych/lib/psych/scalar_scanner.rb
@@ -12,24 +12,32 @@ module Psych https://github.com/ruby/ruby/blob/trunk/ext/psych/lib/psych/scalar_scanner.rb#L12
     FLOAT = /^(?:[-+]?([0-9][0-9_,]*)?\.[0-9]*([eE][-+][0-9]+)?(?# base 10))$/x
 
     # Taken from http://yaml.org/type/int.html
-    INTEGER = /^(?:[-+]?0b[0-1_,]+                        (?# base 2)
-                  |[-+]?0[0-7_,]+                         (?# base 8)
-                  |[-+]?(?:0|[1-9](?:[0-9]|,[0-9]|_[0-9])*) (?# base 10)
-                  |[-+]?0x[0-9a-fA-F_,]+                  (?# base 16))$/x
+    INTEGER_STRICT = /^(?:[-+]?0b[0-1_]+                  (?# base 2)
+                         |[-+]?0[0-7_]+                   (?# base 8)
+                         |[-+]?(0|[1-9][0-9_]*)           (?# base 10)
+                         |[-+]?0x[0-9a-fA-F_]+            (?# base 16))$/x
+
+    # Same as above, but allows commas.
+    # Not to YML spec, but kept for backwards compatibility
+    INTEGER_LEGACY = /^(?:[-+]?0b[0-1_,]+                        (?# base 2)
+                         |[-+]?0[0-7_,]+                         (?# base 8)
+                         |[-+]?(?:0|[1-9](?:[0-9]|,[0-9]|_[0-9])*) (?# base 10)
+                         |[-+]?0x[0-9a-fA-F_,]+                  (?# base 16))$/x
 
     attr_reader :class_loader
 
     # Create a new scanner
-    def initialize class_loader
+    def initialize class_loader, strict_integer: false
       @symbol_cache = {}
       @class_loader = class_loader
+      @strict_integer = strict_integer
     end
 
     # Tokenize +string+ returning the Ruby object
     def tokenize string
       return nil if string.empty?
       return @symbol_cache[string] if @symbol_cache.key?(string)
-
+      integer_regex = @strict_integer ? INTEGER_STRICT : INTEGER_LEGACY
       # Check for a String type, being careful not to get caught by hash keys, hex values, and
       # special floats (e.g., -.inf).
       if string.match?(%r{^[^\d.:-]?[[:alpha:]_\s!@#$%\^&*(){}<>|/\\~;=]+}) || string.match?(/\n/)
@@ -89,7 +97,7 @@ module Psych https://github.com/ruby/ruby/blob/trunk/ext/psych/lib/psych/scalar_scanner.rb#L97
         else
           Float(string.gsub(/[,_]|\.([Ee]|$)/, '\1'))
         end
-      elsif string.match?(INTEGER)
+      elsif string.match?(integer_regex)
         parse_int string
       else
         string
diff --git a/ext/psych/lib/psych/visitors/to_ruby.rb b/ext/psych/lib/psych/visitors/to_ruby.rb
index 4c1f5610703..935bc74f21c 100644
--- a/ext/psych/lib/psych/visitors/to_ruby.rb
+++ b/ext/psych/lib/psych/visitors/to_ruby.rb
@@ -12,9 +12,9 @@ module Psych https://github.com/ruby/ruby/blob/trunk/ext/psych/lib/psych/visitors/to_ruby.rb#L12
     ###
     # This class walks a YAML AST, converting each node to Ruby
     class ToRuby < Psych::Visitors::Visitor
-      def self.create(symbolize_names: false, freeze: false)
+      def self.create(symbolize_names: false, freeze: false, strict_integer: false)
         class_loader = ClassLoader.new
-        scanner      = ScalarScanner.new class_loader
+        scanner      = ScalarScanner.new class_loader, strict_integer: strict_integer
         new(scanner, class_loader, symbolize_names: symbolize_names, freeze: freeze)
       end
 
diff --git a/test/psych/test_numeric.rb b/test/psych/test_numeric.rb
index 8c3dcd173c1..9c75c016cd4 100644
--- a/test/psych/test_numeric.rb
+++ b/test/psych/test_numeric.rb
@@ -43,5 +43,16 @@ module Psych https://github.com/ruby/ruby/blob/trunk/test/psych/test_numeric.rb#L43
       str = Psych.load('--- 1.1.1')
       assert_equal '1.1.1', str
     end
+
+    # This behavior is not to YML spec, but is kept for backwards compatibility
+    def test_string_with_commas
+      number = Psych.load('--- 12,34,56')
+      assert_equal 123456, number
+    end
+
+    def test_string_with_commas_with_strict_integer
+      str = Psych.load('--- 12,34,56', strict_integer: true)
+      assert_equal '12,34,56', str
+    end
   end
 end
diff --git a/test/psych/test_scalar_scanner.rb b/test/psych/test_scalar_scanner.rb
index ebc9fbdcd2e..145db58fd95 100644
--- a/test/psych/test_scalar_scanner.rb
+++ b/test/psych/test_scalar_scanner.rb
@@ -149,6 +149,31 @@ module Psych https://github.com/ruby/ruby/blob/trunk/test/psych/test_scalar_scanner.rb#L149
       assert_equal 0x123456789abcdef, ss.tokenize('0x12_,34,_56,_789abcdef__')
     end
 
+    def test_scan_strict_int_commas_and_underscores
+      # this test is to ensure adherance to YML spec using the 'strict_integer' option
+      scanner = Psych::ScalarScanner.new ClassLoader.new, strict_integer: true
+      assert_equal 123_456_789, scanner.tokenize('123_456_789')
+      assert_equal '123,456,789', scanner.tokenize('123,456,789')
+      assert_equal '1_2,3,4_5,6_789', scanner.tokenize('1_2,3,4_5,6_789')
+
+      assert_equal 1, scanner.tokenize('1')
+      assert_equal 1, scanner.tokenize('+1')
+      assert_equal(-1, scanner.tokenize('-1'))
+
+      assert_equal 0b010101010, scanner.tokenize('0b010101010')
+      assert_equal 0b010101010, scanner.tokenize('0b01_01_01_010')
+      assert_equal '0b0,1_0,1_,0,1_01,0', scanner.tokenize('0b0,1_0,1_,0,1_01,0')
+
+      assert (... truncated)

--
ML: ruby-changes@q...
Info: http://www.atdot.net/~ko1/quickml/

[前][次][番号順一覧][スレッド一覧]