ruby-changes:33074
From: kou <ko1@a...>
Date: Sun, 23 Feb 2014 18:01:40 +0900 (JST)
Subject: [ruby-changes:33074] kou:r45153 (trunk): * lib/rexml/xmltokens.rb: Add missing non ASCII valid characters
kou 2014-02-23 18:01:32 +0900 (Sun, 23 Feb 2014) New Revision: 45153 http://svn.ruby-lang.org/cgi-bin/viewvc.cgi?view=rev&revision=45153 Log: * lib/rexml/xmltokens.rb: Add missing non ASCII valid characters to element name characters. Now, REXML name tokens exactly match "[5] Name" in the XML spec and "[4] NCName" in the Namespaces in XML spec. See comment about the details. [Bug #9539] [ruby-core:60901] Reported by Mario Barcala. Thanks!!! * test/rexml/xpath/test_node.rb: Add tests for the above case. Added files: trunk/test/rexml/xpath/test_node.rb Modified files: trunk/ChangeLog trunk/lib/rexml/xmltokens.rb Index: ChangeLog =================================================================== --- ChangeLog (revision 45152) +++ ChangeLog (revision 45153) @@ -1,3 +1,14 @@ https://github.com/ruby/ruby/blob/trunk/ChangeLog#L1 +Sun Feb 23 17:55:50 2014 Kouhei Sutou <kou@c...> + + * lib/rexml/xmltokens.rb: Add missing non ASCII valid characters + to element name characters. Now, REXML name tokens exactly + match "[5] Name" in the XML spec and "[4] NCName" in the + Namespaces in XML spec. See comment about the details. + [Bug #9539] [ruby-core:60901] + Reported by Mario Barcala. Thanks!!! + + * test/rexml/xpath/test_node.rb: Add tests for the above case. + Sun Feb 23 12:18:54 2014 Nobuyoshi Nakada <nobu@r...> * ext/socket/raddrinfo.c (inet_pton): use rb_w32_inet_pton, instead of Index: lib/rexml/xmltokens.rb =================================================================== --- lib/rexml/xmltokens.rb (revision 45152) +++ lib/rexml/xmltokens.rb (revision 45153) @@ -2,12 +2,78 @@ module REXML https://github.com/ruby/ruby/blob/trunk/lib/rexml/xmltokens.rb#L2 # Defines a number of tokens used for parsing XML. Not for general # consumption. module XMLTokens - NCNAME_STR= '[\w:][\-\w.]*' - NAME_STR= "(?:#{NCNAME_STR}:)?#{NCNAME_STR}" + # From http://www.w3.org/TR/REC-xml/#sec-common-syn + # + # [4] NameStartChar ::= + # ":" | + # [A-Z] | + # "_" | + # [a-z] | + # [#xC0-#xD6] | + # [#xD8-#xF6] | + # [#xF8-#x2FF] | + # [#x370-#x37D] | + # [#x37F-#x1FFF] | + # [#x200C-#x200D] | + # [#x2070-#x218F] | + # [#x2C00-#x2FEF] | + # [#x3001-#xD7FF] | + # [#xF900-#xFDCF] | + # [#xFDF0-#xFFFD] | + # [#x10000-#xEFFFF] + name_start_chars = [ + ":", + "A-Z", + "_", + "a-z", + "\\u00C0-\\u00D6", + "\\u00D8-\\u00F6", + "\\u00F8-\\u02FF", + "\\u0370-\\u037D", + "\\u037F-\\u1FFF", + "\\u200C-\\u200D", + "\\u2070-\\u218F", + "\\u2C00-\\u2FEF", + "\\u3001-\\uD7FF", + "\\uF900-\\uFDCF", + "\\uFDF0-\\uFFFD", + "\\u{10000}-\\u{EFFFF}", + ] + # From http://www.w3.org/TR/REC-xml/#sec-common-syn + # + # [4a] NameChar ::= + # NameStartChar | + # "-" | + # "." | + # [0-9] | + # #xB7 | + # [#x0300-#x036F] | + # [#x203F-#x2040] + name_chars = name_start_chars + [ + "\\-", + "\\.", + "0-9", + "\\u00B7", + "\\u0300-\\u036F", + "\\u203F-\\u2040", + ] + NAME_START_CHAR = "[#{name_start_chars.join('')}]" + NAME_CHAR = "[#{name_chars.join('')}]" + NAMECHAR = NAME_CHAR # deprecated. Use NAME_CHAR instead. - NAMECHAR = '[\-\w\.:]' - NAME = "([\\w:]#{NAMECHAR}*)" - NMTOKEN = "(?:#{NAMECHAR})+" + # From http://www.w3.org/TR/xml-names11/#NT-NCName + # + # [6] NCNameStartChar ::= NameStartChar - ':' + ncname_start_chars = name_start_chars - [":"] + # From http://www.w3.org/TR/xml-names11/#NT-NCName + # + # [5] NCNameChar ::= NameChar - ':' + ncname_chars = name_chars - [":"] + NCNAME_STR = "[#{ncname_start_chars.join('')}][#{ncname_chars.join('')}]*" + NAME_STR = "(?:#{NCNAME_STR}:)?#{NCNAME_STR}" + + NAME = "(#{NAME_START_CHAR}#{NAME_CHAR}*)" + NMTOKEN = "(?:#{NAME_CHAR})+" NMTOKENS = "#{NMTOKEN}(\\s+#{NMTOKEN})*" REFERENCE = "(?:&#{NAME};|&#\\d+;|&#x[0-9a-fA-F]+;)" Index: test/rexml/xpath/test_node.rb =================================================================== --- test/rexml/xpath/test_node.rb (revision 0) +++ test/rexml/xpath/test_node.rb (revision 45153) @@ -0,0 +1,40 @@ https://github.com/ruby/ruby/blob/trunk/test/rexml/xpath/test_node.rb#L1 +# -*- coding: utf-8 -*- + +require_relative "../rexml_test_utils" + +require "rexml/document" + +class TestXPathNode < Test::Unit::TestCase + def matches(xml, xpath) + document = REXML::Document.new(xml) + REXML::XPath.each(document, xpath).collect(&:to_s) + end + + class TestQName < self + def test_ascii + xml = <<-XML +<?xml version="1.0" encoding="UTF-8"?> +<root> + <ascii> + <child>child</child> + </ascii> +</root> + XML + assert_equal(["<child>child</child>"], + matches(xml, "/root/ascii/child")) + end + + def test_non_ascii + xml = <<-XML +<?xml version="1.0" encoding="UTF-8"?> +<root> + <non-scii> + <child>child</child> + </non-scii> +</root> + XML + assert_equal(["<child>child</child>"], + matches(xml, "/root/non-scii/child")) + end + end +end -- ML: ruby-changes@q... Info: http://www.atdot.net/~ko1/quickml/