ruby-changes:2104
From: ko1@a...
Date: 2 Oct 2007 10:46:45 +0900
Subject: [ruby-changes:2104] ser - Ruby:r13595 (trunk): r1366@bean: ser | 2007-10-01 21:24:33 -0400
ser 2007-10-02 10:46:32 +0900 (Tue, 02 Oct 2007)
New Revision: 13595
Modified directories:
trunk/lib/rexml/
Modified files:
trunk/lib/rexml/attribute.rb
trunk/lib/rexml/element.rb
trunk/lib/rexml/parsers/baseparser.rb
trunk/lib/rexml/parsers/treeparser.rb
Log:
r1366@bean: ser | 2007-10-01 21:24:33 -0400
r1352@bean: ser | 2007-07-29 11:33:07 -0400
Implements namespace validation in the baseparser. This means that, as per
the XML namespace spec, unbound prefixes generate UndefinedNamespaceException.
Also, as per the namespace spec, the 'xml' prefix must be bound to
http://www.w3.org/XML/1998/namespace, and the 'xmlns' prefix must not be declared.
in the XML.
Modified: trunk/lib/rexml/
http://svn.ruby-lang.org/cgi-bin/viewvc.cgi/trunk/lib/rexml/parsers/treeparser.rb?r1=13595&r2=13594
http://svn.ruby-lang.org/cgi-bin/viewvc.cgi/trunk/lib/rexml/attribute.rb?r1=13595&r2=13594
http://svn.ruby-lang.org/cgi-bin/viewvc.cgi/trunk/lib/rexml?r1=13595&r2=13594
http://svn.ruby-lang.org/cgi-bin/viewvc.cgi/trunk/lib/rexml/parsers/baseparser.rb?r1=13595&r2=13594
http://svn.ruby-lang.org/cgi-bin/viewvc.cgi/trunk/lib/rexml/element.rb?r1=13595&r2=13594
Index: lib/rexml/parsers/baseparser.rb
===================================================================
--- lib/rexml/parsers/baseparser.rb (revision 13594)
+++ lib/rexml/parsers/baseparser.rb (revision 13595)
@@ -1,5 +1,6 @@
require 'rexml/parseexception'
require 'rexml/source'
+require 'set'
module REXML
module Parsers
@@ -24,7 +25,8 @@
# Nat Price gave me some good ideas for the API.
class BaseParser
NCNAME_STR= '[\w:][\-\w\d.]*'
- NAME_STR= "(?:#{NCNAME_STR}:)?#{NCNAME_STR}"
+ NAME_STR= "(?:(#{NCNAME_STR}):)?(#{NCNAME_STR})"
+ UNAME_STR= "(?:#{NCNAME_STR}:)?#{NCNAME_STR}"
NAMECHAR = '[\-\w\d\.:]'
NAME = "([\\w:]#{NAMECHAR}*)"
@@ -35,7 +37,7 @@
DOCTYPE_START = /\A\s*<!DOCTYPE\s/um
DOCTYPE_PATTERN = /\s*<!DOCTYPE\s+(.*?)(\[|>)/um
- ATTRIBUTE_PATTERN = /\s*(#{NAME_STR})\s*=\s*(["'])(.*?)\2/um
+ ATTRIBUTE_PATTERN = /\s*(#{NAME_STR})\s*=\s*(["'])(.*?)\4/um
COMMENT_START = /\A<!--/u
COMMENT_PATTERN = /<!--(.*?)-->/um
CDATA_START = /\A<!\[CDATA\[/u
@@ -45,7 +47,7 @@
XMLDECL_PATTERN = /<\?xml\s+(.*?)\?>/um
INSTRUCTION_START = /\A<\?/u
INSTRUCTION_PATTERN = /<\?(.*?)(\s+.*?)?\?>/um
- TAG_MATCH = /^<((?>#{NAME_STR}))\s*((?>\s+#{NAME_STR}\s*=\s*(["']).*?\3)*)\s*(\/)?>/um
+ TAG_MATCH = /^<((?>#{NAME_STR}))\s*((?>\s+#{UNAME_STR}\s*=\s*(["']).*?\5)*)\s*(\/)?>/um
CLOSE_MATCH = /^\s*<\/(#{NAME_STR})\s*>/um
VERSION = /\bversion\s*=\s*["'](.*?)['"]/um
@@ -133,6 +135,7 @@
@tags = []
@stack = []
@entities = []
+ @nsstack = []
end
def position
@@ -213,6 +216,7 @@
return [ :processing_instruction, *@source.match(INSTRUCTION_PATTERN, true)[1,2] ]
when DOCTYPE_START
md = @source.match( DOCTYPE_PATTERN, true )
+ @nsstack.unshift(curr_ns=Set.new)
identity = md[1]
close = md[2]
identity =~ IDENTITY
@@ -288,6 +292,9 @@
val = attdef[3]
val = attdef[4] if val == "#FIXED "
pairs[attdef[0]] = val
+ if attdef[0] =~ /^xmlns:(.*)/
+ @nsstack[0] << $1
+ end
end
end
return [ :attlistdecl, element, pairs, contents ]
@@ -312,6 +319,7 @@
begin
if @source.buffer[0] == ?<
if @source.buffer[1] == ?/
+ @nsstack.shift
last_tag = @tags.pop
#md = @source.match_to_consume( '>', CLOSE_MATCH)
md = @source.match( CLOSE_MATCH, true )
@@ -345,19 +353,47 @@
raise REXML::ParseException.new("missing attribute quote", @source) if @source.match(MISSING_ATTRIBUTE_QUOTES )
raise REXML::ParseException.new("malformed XML: missing tag start", @source)
end
- attrs = []
- if md[2].size > 0
- attrs = md[2].scan( ATTRIBUTE_PATTERN )
+ attributes = {}
+ prefixes = Set.new
+ prefixes << md[2] if md[2]
+ @nsstack.unshift(curr_ns=Set.new)
+ if md[4].size > 0
+ attrs = md[4].scan( ATTRIBUTE_PATTERN )
raise REXML::ParseException.new( "error parsing attributes: [#{attrs.join ', '}], excess = \"#$'\"", @source) if $' and $'.strip.size > 0
+ attrs.each { |a,b,c,d,e|
+ if b == "xmlns"
+ if c == "xml"
+ if d != "http://www.w3.org/XML/1998/namespace"
+ msg = "The 'xml' prefix must not be bound to any other namespace "+
+ "(http://www.w3.org/TR/REC-xml-names/#ns-decl)"
+ raise REXML::ParseException.new( msg, @source, self )
+ end
+ elsif c == "xmlns"
+ msg = "The 'xmlns' prefix must not be declared "+
+ "(http://www.w3.org/TR/REC-xml-names/#ns-decl)"
+ raise REXML::ParseException.new( msg, @source, self)
+ end
+ curr_ns << c
+ elsif b
+ prefixes << b unless b == "xml"
+ end
+ attributes[a] = e
+ }
end
- if md[4]
+ # Verify that all of the prefixes have been defined
+ for prefix in prefixes
+ unless @nsstack.find{|k| k.member?(prefix)}
+ raise UndefinedNamespaceException.new(prefix,@source,self)
+ end
+ end
+
+ if md[6]
@closed = md[1]
+ @nsstack.shift
else
@tags.push( md[1] )
end
- attributes = {}
- attrs.each { |a,b,c| attributes[a] = c }
return [ :start_element, md[1], attributes ]
end
else
@@ -371,6 +407,8 @@
# return PullEvent.new( :text, md[1], unnormalized )
return [ :text, md[1] ]
end
+ rescue REXML::UndefinedNamespaceException
+ raise
rescue REXML::ParseException
raise
rescue Exception, NameError => error
Index: lib/rexml/parsers/treeparser.rb
===================================================================
--- lib/rexml/parsers/treeparser.rb (revision 13594)
+++ lib/rexml/parsers/treeparser.rb (revision 13595)
@@ -29,8 +29,7 @@
return
when :start_element
tag_stack.push(event[1])
- # find the observers for namespaces
- @build_context = @build_context.add_element( event[1], event[2] )
+ el = @build_context = @build_context.add_element( event[1], event[2] )
when :end_element
tag_stack.pop
@build_context = @build_context.parent
@@ -86,6 +85,8 @@
end
rescue REXML::Validation::ValidationException
raise
+ rescue REXML::UndefinedNamespaceException
+ raise
rescue
raise ParseException.new( $!.message, @parser.source, @parser, $! )
end
Index: lib/rexml/element.rb
===================================================================
--- lib/rexml/element.rb (revision 13594)
+++ lib/rexml/element.rb (revision 13595)
@@ -855,15 +855,15 @@
# Source (see Element.initialize). If not supplied or nil, a
# new, default Element will be constructed
# Returns:: the added Element
- # a = Element.new 'a'
- # a.elements.add Element.new 'b' #-> <a><b/></a>
- # a.elements.add 'c' #-> <a><b/><c/></a>
+ # a = Element.new('a')
+ # a.elements.add(Element.new('b')) #-> <a><b/></a>
+ # a.elements.add('c') #-> <a><b/><c/></a>
def add element=nil
rv = nil
if element.nil?
- Element.new "", self, @element.context
+ Element.new("", self, @element.context)
elsif not element.kind_of?(Element)
- Element.new element, self, @element.context
+ Element.new(element, self, @element.context)
else
@element << element
element.context = @element.context
Index: lib/rexml/attribute.rb
===================================================================
--- lib/rexml/attribute.rb (revision 13594)
+++ lib/rexml/attribute.rb (revision 13595)
@@ -50,7 +50,7 @@
@element = first.element
end
elsif first.kind_of? String
- @element = parent if parent.kind_of? Element
+ @element = parent
self.name = first
@normalized = second.to_s
else
Property changes on: lib/rexml
___________________________________________________________________
Name: svk:merge
- 3a3bbbf4-582e-0410-a78b-8bf2211dae1f:/local/rexml-trunk/src/rexml:1287
3a3bbbf4-582e-0410-a78b-8bf2211dae1f:/local/rexml/trunk/src/rexml:1346
3a3bbbf4-582e-0410-a78b-8bf2211dae1f:/local/ruby-1.8.6:1304
3a3bbbf4-582e-0410-a78b-8bf2211dae1f:/local/ruby-trunk:1365
877f57f0-f5bd-0310-8c13-bb9e5bdefd87:/branches/3.1.7/src/rexml:1274
+ 3a3bbbf4-582e-0410-a78b-8bf2211dae1f:/local/rexml-trunk/src/rexml:1287
3a3bbbf4-582e-0410-a78b-8bf2211dae1f:/local/rexml/trunk/src/rexml:1352
3a3bbbf4-582e-0410-a78b-8bf2211dae1f:/local/ruby-1.8.6:1304
3a3bbbf4-582e-0410-a78b-8bf2211dae1f:/local/ruby-trunk:1366
877f57f0-f5bd-0310-8c13-bb9e5bdefd87:/branches/3.1.7/src/rexml:1274
--
ML: ruby-changes@q...
Info: http://www.atdot.net/~ko1/quickml