[前][次][番号順一覧][スレッド一覧]

ruby-changes:65784

From: usa <ko1@a...>
Date: Mon, 5 Apr 2021 20:45:49 +0900 (JST)
Subject: [ruby-changes:65784] 4870620450 (ruby_2_5): REXML 3.1.7.4

https://git.ruby-lang.org/ruby.git/commit/?id=4870620450

From 48706204503ee83a9925f2a482bcf37ddcc7fa48 Mon Sep 17 00:00:00 2001
From: usa <usa@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>
Date: Mon, 5 Apr 2021 11:45:31 +0000
Subject: REXML 3.1.7.4

git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/branches/ruby_2_5@67937 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
---
 lib/rexml/doctype.rb                               |  50 +++-
 lib/rexml/parsers/baseparser.rb                    | 323 ++++++++++++++++-----
 lib/rexml/rexml.rb                                 |   2 +-
 test/rexml/parse/test_document_type_declaration.rb | 193 +++++++++++-
 test/rexml/parse/test_notation_declaration.rb      | 181 ++++++++++++
 test/rexml/parser/test_tree.rb                     |   2 +-
 test/rexml/parser/test_ultra_light.rb              |   1 -
 test/rexml/test_core.rb                            |  17 +-
 test/rexml/test_doctype.rb                         | 151 +++++++---
 version.h                                          |   8 +-
 10 files changed, 782 insertions(+), 146 deletions(-)

diff --git a/lib/rexml/doctype.rb b/lib/rexml/doctype.rb
index 1eb1f5b..cb9bf57 100644
--- a/lib/rexml/doctype.rb
+++ b/lib/rexml/doctype.rb
@@ -7,6 +7,39 @@ require 'rexml/attlistdecl' https://github.com/ruby/ruby/blob/trunk/lib/rexml/doctype.rb#L7
 require 'rexml/xmltokens'
 
 module REXML
+  class ReferenceWriter
+    def initialize(id_type,
+                   public_id_literal,
+                   system_literal)
+      @id_type = id_type
+      @public_id_literal = public_id_literal
+      @system_literal = system_literal
+      @default_quote = "\""
+    end
+
+    def write(output)
+      output << " #{@id_type}"
+      if @public_id_literal
+        if @public_id_literal.include?("'")
+          quote = "\""
+        else
+          quote = @default_quote
+        end
+        output << " #{quote}#{@public_id_literal}#{quote}"
+      end
+      if @system_literal
+        if @system_literal.include?("'")
+          quote = "\""
+        elsif @system_literal.include?("\"")
+          quote = "'"
+        else
+          quote = @default_quote
+        end
+        output << " #{quote}#{@system_literal}#{quote}"
+      end
+    end
+  end
+
   # Represents an XML DOCTYPE declaration; that is, the contents of <!DOCTYPE
   # ... >.  DOCTYPES can be used to declare the DTD of a document, as well as
   # being used to declare entities used in the document.
@@ -50,6 +83,8 @@ module REXML https://github.com/ruby/ruby/blob/trunk/lib/rexml/doctype.rb#L83
         super( parent )
         @name = first.name
         @external_id = first.external_id
+        @long_name = first.instance_variable_get(:@long_name)
+        @uri = first.instance_variable_get(:@uri)
       elsif first.kind_of? Array
         super( parent )
         @name = first[0]
@@ -112,9 +147,12 @@ module REXML https://github.com/ruby/ruby/blob/trunk/lib/rexml/doctype.rb#L147
       output << START
       output << ' '
       output << @name
-      output << " #@external_id" if @external_id
-      output << " #{@long_name.inspect}" if @long_name
-      output << " #{@uri.inspect}" if @uri
+      if @external_id
+        reference_writer = ReferenceWriter.new(@external_id,
+                                               @long_name,
+                                               @uri)
+        reference_writer.write(output)
+      end
       unless @children.empty?
         output << ' ['
         @children.each { |child|
@@ -249,9 +287,9 @@ module REXML https://github.com/ruby/ruby/blob/trunk/lib/rexml/doctype.rb#L287
     end
 
     def to_s
-      notation = "<!NOTATION #{@name} #{@middle}"
-      notation << " #{@public.inspect}" if @public
-      notation << " #{@system.inspect}" if @system
+      notation = "<!NOTATION #{@name}"
+      reference_writer = ReferenceWriter.new(@middle, @public, @system)
+      reference_writer.write(notation)
       notation << ">"
       notation
     end
diff --git a/lib/rexml/parsers/baseparser.rb b/lib/rexml/parsers/baseparser.rb
index 80eeb0f..e7ef695 100644
--- a/lib/rexml/parsers/baseparser.rb
+++ b/lib/rexml/parsers/baseparser.rb
@@ -1,4 +1,7 @@ https://github.com/ruby/ruby/blob/trunk/lib/rexml/parsers/baseparser.rb#L1
 # frozen_string_literal: false
+
+require "strscan"
+
 require 'rexml/parseexception'
 require 'rexml/undefinednamespaceexception'
 require 'rexml/source'
@@ -32,8 +35,12 @@ module REXML https://github.com/ruby/ruby/blob/trunk/lib/rexml/parsers/baseparser.rb#L35
       COMBININGCHAR = '' # TODO
       EXTENDER = ''      # TODO
 
-      NCNAME_STR= "[#{LETTER}_:][-[:alnum:]._:#{COMBININGCHAR}#{EXTENDER}]*"
-      NAME_STR= "(?:(#{NCNAME_STR}):)?(#{NCNAME_STR})"
+      NCNAME_STR= "[#{LETTER}_][-[:alnum:]._#{COMBININGCHAR}#{EXTENDER}]*"
+      QNAME_STR= "(?:(#{NCNAME_STR}):)?(#{NCNAME_STR})"
+      QNAME = /(#{QNAME_STR})/
+
+      # Just for backward compatibility. For example, kramdown uses this.
+      # It's not used in REXML.
       UNAME_STR= "(?:#{NCNAME_STR}:)?#{NCNAME_STR}"
 
       NAMECHAR = '[\-\w\.:]'
@@ -45,8 +52,7 @@ module REXML https://github.com/ruby/ruby/blob/trunk/lib/rexml/parsers/baseparser.rb#L52
 
       DOCTYPE_START = /\A\s*<!DOCTYPE\s/um
       DOCTYPE_END = /\A\s*\]\s*>/um
-      DOCTYPE_PATTERN = /\s*<!DOCTYPE\s+(.*?)(\[|>)/um
-      ATTRIBUTE_PATTERN = /\s*(#{NAME_STR})\s*=\s*(["'])(.*?)\4/um
+      ATTRIBUTE_PATTERN = /\s*(#{QNAME_STR})\s*=\s*(["'])(.*?)\4/um
       COMMENT_START = /\A<!--/u
       COMMENT_PATTERN = /<!--(.*?)-->/um
       CDATA_START = /\A<!\[CDATA\[/u
@@ -56,15 +62,14 @@ module REXML https://github.com/ruby/ruby/blob/trunk/lib/rexml/parsers/baseparser.rb#L62
       XMLDECL_PATTERN = /<\?xml\s+(.*?)\?>/um
       INSTRUCTION_START = /\A<\?/u
       INSTRUCTION_PATTERN = /<\?(.*?)(\s+.*?)?\?>/um
-      TAG_MATCH = /^<((?>#{NAME_STR}))\s*((?>\s+#{UNAME_STR}\s*=\s*(["']).*?\5)*)\s*(\/)?>/um
-      CLOSE_MATCH = /^\s*<\/(#{NAME_STR})\s*>/um
+      TAG_MATCH = /\A<((?>#{QNAME_STR}))/um
+      CLOSE_MATCH = /\A\s*<\/(#{QNAME_STR})\s*>/um
 
       VERSION = /\bversion\s*=\s*["'](.*?)['"]/um
       ENCODING = /\bencoding\s*=\s*["'](.*?)['"]/um
       STANDALONE = /\bstandalone\s*=\s*["'](.*?)['"]/um
 
       ENTITY_START = /\A\s*<!ENTITY/
-      IDENTITY = /^([!\*\w\-]+)(\s+#{NCNAME_STR})?(\s+["'](.*?)['"])?(\s+['"](.*?)["'])?/u
       ELEMENTDECL_START = /\A\s*<!ELEMENT/um
       ELEMENTDECL_PATTERN = /\A\s*(<!ELEMENT.*?)>/um
       SYSTEMENTITY = /\A\s*(%.*?;)\s*$/um
@@ -78,9 +83,6 @@ module REXML https://github.com/ruby/ruby/blob/trunk/lib/rexml/parsers/baseparser.rb#L83
       ATTDEF_RE = /#{ATTDEF}/
       ATTLISTDECL_START = /\A\s*<!ATTLIST/um
       ATTLISTDECL_PATTERN = /\A\s*<!ATTLIST\s+#{NAME}(?:#{ATTDEF})*\s*>/um
-      NOTATIONDECL_START = /\A\s*<!NOTATION/um
-      PUBLIC = /\A\s*<!NOTATION\s+(\w[\-\w]*)\s+(PUBLIC)\s+(["'])(.*?)\3(?:\s+(["'])(.*?)\5)?\s*>/um
-      SYSTEM = /\A\s*<!NOTATION\s+(\w[\-\w]*)\s+(SYSTEM)\s+(["'])(.*?)\3\s*>/um
 
       TEXT_PATTERN = /\A([^<]*)/um
 
@@ -98,6 +100,11 @@ module REXML https://github.com/ruby/ruby/blob/trunk/lib/rexml/parsers/baseparser.rb#L100
       GEDECL = "<!ENTITY\\s+#{NAME}\\s+#{ENTITYDEF}\\s*>"
       ENTITYDECL = /\s*(?:#{GEDECL})|(?:#{PEDECL})/um
 
+      NOTATIONDECL_START = /\A\s*<!NOTATION/um
+      EXTERNAL_ID_PUBLIC = /\A\s*PUBLIC\s+#{PUBIDLITERAL}\s+#{SYSTEMLITERAL}\s*/um
+      EXTERNAL_ID_SYSTEM = /\A\s*SYSTEM\s+#{SYSTEMLITERAL}\s*/um
+      PUBLIC_ID = /\A\s*PUBLIC\s+#{PUBIDLITERAL}\s*/um
+
       EREFERENCE = /&(?!#{NAME};)/
 
       DEFAULT_ENTITIES = {
@@ -112,7 +119,7 @@ module REXML https://github.com/ruby/ruby/blob/trunk/lib/rexml/parsers/baseparser.rb#L119
       # These are patterns to identify common markup errors, to make the
       # error messages more informative.
       ######################################################################
-      MISSING_ATTRIBUTE_QUOTES = /^<#{NAME_STR}\s+#{NAME_STR}\s*=\s*[^"']/um
+      MISSING_ATTRIBUTE_QUOTES = /^<#{QNAME_STR}\s+#{QNAME_STR}\s*=\s*[^"']/um
 
       def initialize( source )
         self.stream = source
@@ -197,11 +204,9 @@ module REXML https://github.com/ruby/ruby/blob/trunk/lib/rexml/parsers/baseparser.rb#L204
         return [ :end_document ] if empty?
         return @stack.shift if @stack.size > 0
         #STDERR.puts @source.encoding
-        @source.read if @source.buffer.size<2
         #STDERR.puts "BUFFER = #{@source.buffer.inspect}"
         if @document_status == nil
-          #@source.consume( /^\s*/um )
-          word = @source.match( /^((?:\s+)|(?:<[^>]*>))/um )
+          word = @source.match( /\A((?:\s+)|(?:<[^>]*>))/um )
           word = word[1] unless word.nil?
           #STDERR.puts "WORD = #{word.inspect}"
           case word
@@ -226,38 +231,49 @@ module REXML https://github.com/ruby/ruby/blob/trunk/lib/rexml/parsers/baseparser.rb#L231
           when INSTRUCTION_START
             return [ :processing_instruction, *@source.match(INSTRUCTION_PATTERN, true)[1,2] ]
           when DOCTYPE_START
-            md = @source.match( DOCTYPE_PATTERN, true )
+            base_error_message = "Malformed DOCTYPE"
+            @source.match(DOCTYPE_START, true)
             @nsstack.unshift(curr_ns=Set.new)
-            identity = md[1]
-            close = md[2]
-            identity =~ IDENTITY
-            name = $1
-            raise REXML::ParseException.new("DOCTYPE is missing a name") if name.nil?
-            pub_sys = $2.nil? ? nil : $2.strip
-            long_name = $4.nil? ? nil : $4.strip
-            uri = $6.nil? ? nil : $6.strip
-            args = [ :start_doctype, name, pub_sys, long_name, uri ]
-            if close == ">"
+            name = parse_name(base_error_message)
+            if @source.match(/\A\s*\[/um, true)
+              id = [nil, nil, nil]
+              @document_status = :in_doctype
+            elsif @source.match(/\A\s*>/um, true)
+              id = [nil, nil, nil]
               @document_status = :after_doctype
-              @source.read if @source.buffer.size<2
- (... truncated)

--
ML: ruby-changes@q...
Info: http://www.atdot.net/~ko1/quickml/

[前][次][番号順一覧][スレッド一覧]