ruby-changes:2104
From: ko1@a...
Date: 2 Oct 2007 10:46:45 +0900
Subject: [ruby-changes:2104] ser - Ruby:r13595 (trunk): r1366@bean: ser | 2007-10-01 21:24:33 -0400
ser 2007-10-02 10:46:32 +0900 (Tue, 02 Oct 2007) New Revision: 13595 Modified directories: trunk/lib/rexml/ Modified files: trunk/lib/rexml/attribute.rb trunk/lib/rexml/element.rb trunk/lib/rexml/parsers/baseparser.rb trunk/lib/rexml/parsers/treeparser.rb Log: r1366@bean: ser | 2007-10-01 21:24:33 -0400 r1352@bean: ser | 2007-07-29 11:33:07 -0400 Implements namespace validation in the baseparser. This means that, as per the XML namespace spec, unbound prefixes generate UndefinedNamespaceException. Also, as per the namespace spec, the 'xml' prefix must be bound to http://www.w3.org/XML/1998/namespace, and the 'xmlns' prefix must not be declared. in the XML. Modified: trunk/lib/rexml/ http://svn.ruby-lang.org/cgi-bin/viewvc.cgi/trunk/lib/rexml/parsers/treeparser.rb?r1=13595&r2=13594 http://svn.ruby-lang.org/cgi-bin/viewvc.cgi/trunk/lib/rexml/attribute.rb?r1=13595&r2=13594 http://svn.ruby-lang.org/cgi-bin/viewvc.cgi/trunk/lib/rexml?r1=13595&r2=13594 http://svn.ruby-lang.org/cgi-bin/viewvc.cgi/trunk/lib/rexml/parsers/baseparser.rb?r1=13595&r2=13594 http://svn.ruby-lang.org/cgi-bin/viewvc.cgi/trunk/lib/rexml/element.rb?r1=13595&r2=13594 Index: lib/rexml/parsers/baseparser.rb =================================================================== --- lib/rexml/parsers/baseparser.rb (revision 13594) +++ lib/rexml/parsers/baseparser.rb (revision 13595) @@ -1,5 +1,6 @@ require 'rexml/parseexception' require 'rexml/source' +require 'set' module REXML module Parsers @@ -24,7 +25,8 @@ # Nat Price gave me some good ideas for the API. class BaseParser NCNAME_STR= '[\w:][\-\w\d.]*' - NAME_STR= "(?:#{NCNAME_STR}:)?#{NCNAME_STR}" + NAME_STR= "(?:(#{NCNAME_STR}):)?(#{NCNAME_STR})" + UNAME_STR= "(?:#{NCNAME_STR}:)?#{NCNAME_STR}" NAMECHAR = '[\-\w\d\.:]' NAME = "([\\w:]#{NAMECHAR}*)" @@ -35,7 +37,7 @@ DOCTYPE_START = /\A\s*<!DOCTYPE\s/um DOCTYPE_PATTERN = /\s*<!DOCTYPE\s+(.*?)(\[|>)/um - ATTRIBUTE_PATTERN = /\s*(#{NAME_STR})\s*=\s*(["'])(.*?)\2/um + ATTRIBUTE_PATTERN = /\s*(#{NAME_STR})\s*=\s*(["'])(.*?)\4/um COMMENT_START = /\A<!--/u COMMENT_PATTERN = /<!--(.*?)-->/um CDATA_START = /\A<!\[CDATA\[/u @@ -45,7 +47,7 @@ XMLDECL_PATTERN = /<\?xml\s+(.*?)\?>/um INSTRUCTION_START = /\A<\?/u INSTRUCTION_PATTERN = /<\?(.*?)(\s+.*?)?\?>/um - TAG_MATCH = /^<((?>#{NAME_STR}))\s*((?>\s+#{NAME_STR}\s*=\s*(["']).*?\3)*)\s*(\/)?>/um + TAG_MATCH = /^<((?>#{NAME_STR}))\s*((?>\s+#{UNAME_STR}\s*=\s*(["']).*?\5)*)\s*(\/)?>/um CLOSE_MATCH = /^\s*<\/(#{NAME_STR})\s*>/um VERSION = /\bversion\s*=\s*["'](.*?)['"]/um @@ -133,6 +135,7 @@ @tags = [] @stack = [] @entities = [] + @nsstack = [] end def position @@ -213,6 +216,7 @@ return [ :processing_instruction, *@source.match(INSTRUCTION_PATTERN, true)[1,2] ] when DOCTYPE_START md = @source.match( DOCTYPE_PATTERN, true ) + @nsstack.unshift(curr_ns=Set.new) identity = md[1] close = md[2] identity =~ IDENTITY @@ -288,6 +292,9 @@ val = attdef[3] val = attdef[4] if val == "#FIXED " pairs[attdef[0]] = val + if attdef[0] =~ /^xmlns:(.*)/ + @nsstack[0] << $1 + end end end return [ :attlistdecl, element, pairs, contents ] @@ -312,6 +319,7 @@ begin if @source.buffer[0] == ?< if @source.buffer[1] == ?/ + @nsstack.shift last_tag = @tags.pop #md = @source.match_to_consume( '>', CLOSE_MATCH) md = @source.match( CLOSE_MATCH, true ) @@ -345,19 +353,47 @@ raise REXML::ParseException.new("missing attribute quote", @source) if @source.match(MISSING_ATTRIBUTE_QUOTES ) raise REXML::ParseException.new("malformed XML: missing tag start", @source) end - attrs = [] - if md[2].size > 0 - attrs = md[2].scan( ATTRIBUTE_PATTERN ) + attributes = {} + prefixes = Set.new + prefixes << md[2] if md[2] + @nsstack.unshift(curr_ns=Set.new) + if md[4].size > 0 + attrs = md[4].scan( ATTRIBUTE_PATTERN ) raise REXML::ParseException.new( "error parsing attributes: [#{attrs.join ', '}], excess = \"#$'\"", @source) if $' and $'.strip.size > 0 + attrs.each { |a,b,c,d,e| + if b == "xmlns" + if c == "xml" + if d != "http://www.w3.org/XML/1998/namespace" + msg = "The 'xml' prefix must not be bound to any other namespace "+ + "(http://www.w3.org/TR/REC-xml-names/#ns-decl)" + raise REXML::ParseException.new( msg, @source, self ) + end + elsif c == "xmlns" + msg = "The 'xmlns' prefix must not be declared "+ + "(http://www.w3.org/TR/REC-xml-names/#ns-decl)" + raise REXML::ParseException.new( msg, @source, self) + end + curr_ns << c + elsif b + prefixes << b unless b == "xml" + end + attributes[a] = e + } end - if md[4] + # Verify that all of the prefixes have been defined + for prefix in prefixes + unless @nsstack.find{|k| k.member?(prefix)} + raise UndefinedNamespaceException.new(prefix,@source,self) + end + end + + if md[6] @closed = md[1] + @nsstack.shift else @tags.push( md[1] ) end - attributes = {} - attrs.each { |a,b,c| attributes[a] = c } return [ :start_element, md[1], attributes ] end else @@ -371,6 +407,8 @@ # return PullEvent.new( :text, md[1], unnormalized ) return [ :text, md[1] ] end + rescue REXML::UndefinedNamespaceException + raise rescue REXML::ParseException raise rescue Exception, NameError => error Index: lib/rexml/parsers/treeparser.rb =================================================================== --- lib/rexml/parsers/treeparser.rb (revision 13594) +++ lib/rexml/parsers/treeparser.rb (revision 13595) @@ -29,8 +29,7 @@ return when :start_element tag_stack.push(event[1]) - # find the observers for namespaces - @build_context = @build_context.add_element( event[1], event[2] ) + el = @build_context = @build_context.add_element( event[1], event[2] ) when :end_element tag_stack.pop @build_context = @build_context.parent @@ -86,6 +85,8 @@ end rescue REXML::Validation::ValidationException raise + rescue REXML::UndefinedNamespaceException + raise rescue raise ParseException.new( $!.message, @parser.source, @parser, $! ) end Index: lib/rexml/element.rb =================================================================== --- lib/rexml/element.rb (revision 13594) +++ lib/rexml/element.rb (revision 13595) @@ -855,15 +855,15 @@ # Source (see Element.initialize). If not supplied or nil, a # new, default Element will be constructed # Returns:: the added Element - # a = Element.new 'a' - # a.elements.add Element.new 'b' #-> <a><b/></a> - # a.elements.add 'c' #-> <a><b/><c/></a> + # a = Element.new('a') + # a.elements.add(Element.new('b')) #-> <a><b/></a> + # a.elements.add('c') #-> <a><b/><c/></a> def add element=nil rv = nil if element.nil? - Element.new "", self, @element.context + Element.new("", self, @element.context) elsif not element.kind_of?(Element) - Element.new element, self, @element.context + Element.new(element, self, @element.context) else @element << element element.context = @element.context Index: lib/rexml/attribute.rb =================================================================== --- lib/rexml/attribute.rb (revision 13594) +++ lib/rexml/attribute.rb (revision 13595) @@ -50,7 +50,7 @@ @element = first.element end elsif first.kind_of? String - @element = parent if parent.kind_of? Element + @element = parent self.name = first @normalized = second.to_s else Property changes on: lib/rexml ___________________________________________________________________ Name: svk:merge - 3a3bbbf4-582e-0410-a78b-8bf2211dae1f:/local/rexml-trunk/src/rexml:1287 3a3bbbf4-582e-0410-a78b-8bf2211dae1f:/local/rexml/trunk/src/rexml:1346 3a3bbbf4-582e-0410-a78b-8bf2211dae1f:/local/ruby-1.8.6:1304 3a3bbbf4-582e-0410-a78b-8bf2211dae1f:/local/ruby-trunk:1365 877f57f0-f5bd-0310-8c13-bb9e5bdefd87:/branches/3.1.7/src/rexml:1274 + 3a3bbbf4-582e-0410-a78b-8bf2211dae1f:/local/rexml-trunk/src/rexml:1287 3a3bbbf4-582e-0410-a78b-8bf2211dae1f:/local/rexml/trunk/src/rexml:1352 3a3bbbf4-582e-0410-a78b-8bf2211dae1f:/local/ruby-1.8.6:1304 3a3bbbf4-582e-0410-a78b-8bf2211dae1f:/local/ruby-trunk:1366 877f57f0-f5bd-0310-8c13-bb9e5bdefd87:/branches/3.1.7/src/rexml:1274 -- ML: ruby-changes@q... Info: http://www.atdot.net/~ko1/quickml