ruby-changes:3333
From: ko1@a...
Date: 1 Jan 2008 14:44:10 +0900
Subject: [ruby-changes:3333] matz - Ruby:r14826 (trunk): * lib/rexml: 1.9 patch from Sam Ruby mentioned in his blog:
matz 2008-01-01 14:43:50 +0900 (Tue, 01 Jan 2008) New Revision: 14826 Modified files: trunk/ChangeLog trunk/lib/rexml/doctype.rb trunk/lib/rexml/element.rb trunk/lib/rexml/encoding.rb trunk/lib/rexml/entity.rb trunk/lib/rexml/formatters/pretty.rb trunk/lib/rexml/functions.rb trunk/lib/rexml/parsers/baseparser.rb trunk/lib/rexml/parsers/sax2parser.rb trunk/lib/rexml/parsers/treeparser.rb trunk/lib/rexml/parsers/xpathparser.rb trunk/lib/rexml/source.rb trunk/lib/rexml/syncenumerator.rb trunk/lib/rexml/text.rb trunk/lib/rexml/validation/validation.rb trunk/lib/rexml/xpath_parser.rb Log: * lib/rexml: 1.9 patch from Sam Ruby mentioned in his blog: <http://intertwingly.net/blog/2007/12/31/Porting-REXML-to-Ruby-1-9> [ruby-core:14639] http://svn.ruby-lang.org/cgi-bin/viewvc.cgi/trunk/lib/rexml/parsers/treeparser.rb?r1=14826&r2=14825&diff_format=u http://svn.ruby-lang.org/cgi-bin/viewvc.cgi/trunk/lib/rexml/formatters/pretty.rb?r1=14826&r2=14825&diff_format=u http://svn.ruby-lang.org/cgi-bin/viewvc.cgi/trunk/lib/rexml/text.rb?r1=14826&r2=14825&diff_format=u http://svn.ruby-lang.org/cgi-bin/viewvc.cgi/trunk/lib/rexml/validation/validation.rb?r1=14826&r2=14825&diff_format=u http://svn.ruby-lang.org/cgi-bin/viewvc.cgi/trunk/lib/rexml/parsers/xpathparser.rb?r1=14826&r2=14825&diff_format=u http://svn.ruby-lang.org/cgi-bin/viewvc.cgi/trunk/ChangeLog?r1=14826&r2=14825&diff_format=u http://svn.ruby-lang.org/cgi-bin/viewvc.cgi/trunk/lib/rexml/syncenumerator.rb?r1=14826&r2=14825&diff_format=u http://svn.ruby-lang.org/cgi-bin/viewvc.cgi/trunk/lib/rexml/entity.rb?r1=14826&r2=14825&diff_format=u http://svn.ruby-lang.org/cgi-bin/viewvc.cgi/trunk/lib/rexml/parsers/sax2parser.rb?r1=14826&r2=14825&diff_format=u http://svn.ruby-lang.org/cgi-bin/viewvc.cgi/trunk/lib/rexml/xpath_parser.rb?r1=14826&r2=14825&diff_format=u http://svn.ruby-lang.org/cgi-bin/viewvc.cgi/trunk/lib/rexml/doctype.rb?r1=14826&r2=14825&diff_format=u http://svn.ruby-lang.org/cgi-bin/viewvc.cgi/trunk/lib/rexml/parsers/baseparser.rb?r1=14826&r2=14825&diff_format=u http://svn.ruby-lang.org/cgi-bin/viewvc.cgi/trunk/lib/rexml/element.rb?r1=14826&r2=14825&diff_format=u http://svn.ruby-lang.org/cgi-bin/viewvc.cgi/trunk/lib/rexml/source.rb?r1=14826&r2=14825&diff_format=u http://svn.ruby-lang.org/cgi-bin/viewvc.cgi/trunk/lib/rexml/functions.rb?r1=14826&r2=14825&diff_format=u http://svn.ruby-lang.org/cgi-bin/viewvc.cgi/trunk/lib/rexml/encoding.rb?r1=14826&r2=14825&diff_format=u Index: ChangeLog =================================================================== --- ChangeLog (revision 14825) +++ ChangeLog (revision 14826) @@ -1,3 +1,9 @@ +Tue Jan 1 14:41:56 2008 Yukihiro Matsumoto <matz@r...> + + * lib/rexml: 1.9 patch from Sam Ruby mentioned in his blog: + <http://intertwingly.net/blog/2007/12/31/Porting-REXML-to-Ruby-1-9> + [ruby-core:14639] + Tue Jan 1 14:15:04 2008 Yukihiro Matsumoto <matz@r...> * string.c (rb_str_substr): offset movement bug. a patch from Index: lib/rexml/parsers/xpathparser.rb =================================================================== --- lib/rexml/parsers/xpathparser.rb (revision 14825) +++ lib/rexml/parsers/xpathparser.rb (revision 14826) @@ -332,12 +332,12 @@ predicates << expr[1..-2] if expr end #puts "PREDICATES = #{predicates.inspect}" - predicates.each{ |expr| - #puts "ORING #{expr}" + predicates.each{ |pred| + #puts "ORING #{pred}" preds = [] parsed << :predicate parsed << preds - OrExpr(expr, preds) + OrExpr(pred, preds) } #puts "PREDICATES = #{predicates.inspect}" path Index: lib/rexml/parsers/baseparser.rb =================================================================== --- lib/rexml/parsers/baseparser.rb (revision 14825) +++ lib/rexml/parsers/baseparser.rb (revision 14826) @@ -242,6 +242,11 @@ @document_status = :after_doctype @source.read if @source.buffer.size<2 md = @source.match(/\s*/um, true) + if @source.encoding == "UTF-8" + if @source.buffer.respond_to? :force_encoding + @source.buffer.force_encoding(Encoding::UTF_8) + end + end end end if @document_status == :in_doctype Index: lib/rexml/parsers/sax2parser.rb =================================================================== --- lib/rexml/parsers/sax2parser.rb (revision 14825) +++ lib/rexml/parsers/sax2parser.rb (revision 14826) @@ -149,17 +149,26 @@ procs = get_procs( :end_prefix_mapping, event[1] ) listeners = get_listeners( :end_prefix_mapping, event[1] ) if procs or listeners - namespace_mapping.each do |prefix, uri| + namespace_mapping.each do |ns_prefix, ns_uri| # notify observers of namespaces - procs.each { |ob| ob.call( prefix ) } if procs - listeners.each { |ob| ob.end_prefix_mapping(prefix) } if listeners + procs.each { |ob| ob.call( ns_prefix ) } if procs + listeners.each { |ob| ob.end_prefix_mapping(ns_prefix) } if listeners end end when :text #normalized = @parser.normalize( event[1] ) #handle( :characters, normalized ) copy = event[1].clone - @entities.each { |key, value| copy = copy.gsub("&#{key};", value) } + + esub = proc { |match| + if @entities.has_key?($1) + @entities[$1].gsub(Text::REFERENCE, &esub) + else + match + end + } + + copy.gsub!( Text::REFERENCE, &esub ) copy.gsub!( Text::NUMERICENTITY ) {|m| m=$1 m = "0#{m}" if m[0] == ?x Index: lib/rexml/parsers/treeparser.rb =================================================================== --- lib/rexml/parsers/treeparser.rb (revision 14825) +++ lib/rexml/parsers/treeparser.rb (revision 14826) @@ -30,7 +30,10 @@ return when :start_element tag_stack.push(event[1]) - el = @build_context = @build_context.add_element( event[1], event[2] ) + el = @build_context = @build_context.add_element( event[1] ) + event[2].each do |key, value| + el.attributes[key]=Attribute.new(key,value,self) + end when :end_element tag_stack.pop @build_context = @build_context.parent Index: lib/rexml/validation/validation.rb =================================================================== --- lib/rexml/validation/validation.rb (revision 14825) +++ lib/rexml/validation/validation.rb (revision 14826) @@ -33,8 +33,8 @@ sattr = [:start_attribute, nil] eattr = [:end_attribute] text = [:text, nil] - k,v = event[2].find { |k,v| - sattr[1] = k + k,v = event[2].find { |key,value| + sattr[1] = key #puts "Looking for #{sattr.inspect}" m = @current.next( sattr ) #puts "Got #{m.inspect}" @@ -47,7 +47,7 @@ @current = m else #puts "Didn't get end" - text[1] = v + text[1] = value #puts "Looking for #{text.inspect}" m = m.next( text ) #puts "Got #{m.inspect}" Index: lib/rexml/element.rb =================================================================== --- lib/rexml/element.rb (revision 14825) +++ lib/rexml/element.rb (revision 14826) @@ -296,7 +296,7 @@ raise "First argument must be either an element name, or an Element object" if element.nil? el = @elements.add(element) attrs.each do |key, value| - el.attributes[key]=Attribute.new(key,value,self) + el.attributes[key]=value end if attrs.kind_of? Hash el end @@ -552,7 +552,11 @@ def attribute( name, namespace=nil ) prefix = nil - prefix = namespaces.index(namespace) if namespace + if namespaces.respond_to? :key + prefix = namespaces.key(namespace) if namespace + else + prefix = namespaces.index(namespace) if namespace + end prefix = nil if prefix == 'xmlns' attributes.get_attribute( "#{prefix ? prefix + ':' : ''}#{name}" ) end @@ -704,7 +708,6 @@ # A private helper method def each_with_something( test, max=0, name=nil ) num = 0 - child=nil @elements.each( name ){ |child| yield child if test.call(child) and num += 1 return if max>0 and num == max @@ -754,7 +757,6 @@ raise "index (#{index}) must be >= 1" if index < 1 name = literalize(name) if name num = 0 - child = nil @element.find { |child| child.kind_of? Element and (name.nil? ? true : child.has_name?( name )) and @@ -1217,7 +1219,8 @@ def get_attribute_ns(namespace, name) each_attribute() { |attribute| if name == attribute.name && - namespace == attribute.namespace() + namespace == attribute.namespace() && + ( !namespace.empty? || !attribute.fully_expanded_name.index(':') ) return attribute end } Index: lib/rexml/source.rb =================================================================== --- lib/rexml/source.rb (revision 14825) +++ lib/rexml/source.rb (revision 14826) @@ -59,6 +59,9 @@ @to_utf = true else @to_utf = false + if @buffer.respond_to? :force_encoding + @buffer.force_encoding Encoding::UTF_8 + end end end @@ -147,13 +150,13 @@ str = @source.read( 2 ) if encoding self.encoding = encoding - elsif 0xfe == str[0] && 0xff == str[1] + elsif str[0,2] == "\xfe\xff" @line_break = "\000>" - elsif 0xff == str[0] && 0xfe == str[1] + elsif str[0,2] == "\xff\xfe" @line_break = ">\000" - elsif 0xef == str[0] && 0xbb == str[1] + elsif str[0,2] == "\xef\xbb" str += @source.read(1) - str = '' if (0xbf == str[2]) + str = '' if (str[2,1] == "\xBF") @line_break = ">" else @line_break = ">" @@ -193,6 +196,9 @@ str = @source.readline(@line_break) str = decode(str) if @to_utf and str @buffer << str + if not @to_utf and @buffer.respond_to? :force_encoding + @buffer.force_encoding Encoding::UTF_8 + end rescue Exception, NameError @source = nil end Index: lib/rexml/doctype.rb =================================================================== --- lib/rexml/doctype.rb (revision 14825) +++ lib/rexml/doctype.rb (revision 14826) @@ -117,7 +117,6 @@ unless @children.empty? next_indent = indent + 1 output << ' [' - child = nil # speed @children.each { |child| output << "\n" f.write( child, output ) Index: lib/rexml/functions.rb =================================================================== --- lib/rexml/functions.rb (revision 14825) +++ lib/rexml/functions.rb (revision 14826) @@ -256,9 +256,15 @@ end } - string(string).unpack('U*').collect { |c| - if map.has_key? c then map[c] else c end - }.compact.pack('U*') + if ''.respond_to? :chars + string(string).chars.collect { |c| + if map.has_key? c then map[c] else c end + }.compact.join + else + string(string).unpack('U*').collect { |c| + if map.has_key? c then map[c] else c end + }.compact.pack('U*') + end end # UNTESTED Index: lib/rexml/entity.rb =================================================================== --- lib/rexml/entity.rb (revision 14825) +++ lib/rexml/entity.rb (revision 14826) @@ -139,7 +139,7 @@ if @parent matches.each do |entity_reference| entity_value = @parent.entity( entity_reference[0] ) - rv.gsub!( /%#{entity_reference};/um, entity_value ) + rv.gsub!( /%#{entity_reference.join};/um, entity_value ) end end return rv Index: lib/rexml/syncenumerator.rb =================================================================== --- lib/rexml/syncenumerator.rb (revision 14825) +++ lib/rexml/syncenumerator.rb (revision 14826) @@ -6,8 +6,7 @@ # Enumerable objects. def initialize(*enums) @gens = enums - @biggest = @gens[0] - @gens.each {|x| @biggest = x if x.size > @biggest.size } + @length = @gens.collect {|x| x.size }.max end # Returns the number of enumerated Enumerable objects, i.e. the size @@ -24,8 +23,8 @@ # Enumerates rows of the Enumerable objects. def each - @biggest.zip( *@gens ) {|a| - yield(*a[1..-1]) + @length.times {|i| + yield @gens.collect {|x| x[i]} } self end Index: lib/rexml/text.rb =================================================================== --- lib/rexml/text.rb (revision 14825) +++ lib/rexml/text.rb (revision 14826) @@ -308,37 +308,24 @@ # Unescapes all possible entities def Text::unnormalize( string, doctype=nil, filter=nil, illegal=nil ) - rv = string.clone - rv.gsub!( /\r\n?/, "\n" ) - matches = rv.scan( REFERENCE ) - return rv if matches.size == 0 - rv.gsub!( NUMERICENTITY ) {|m| - m=$1 - m = "0#{m}" if m[0] == ?x - [Integer(m)].pack('U*') - } - matches.collect!{|x|x[0]}.compact! - if matches.size > 0 - if doctype - matches.each do |entity_reference| - unless filter and filter.include?(entity_reference) - entity_value = doctype.entity( entity_reference ) - re = /&#{entity_reference};/ - rv.gsub!( re, entity_value ) if entity_value - end + string.gsub( /\r\n?/, "\n" ).gsub( REFERENCE ) { |ref| + if ref[1] == ?# + if ref[2] == ?x + [ref[3...-1].to_i(16)].pack('U*') + else + [ref[2...-1].to_i].pack('U*') end + elsif ref == '&' + '&' + elsif filter and filter.include?( ref[1...-1] ) + ref + elsif doctype + doctype.entity( ref[1...-1] ) or ref else - matches.each do |entity_reference| - unless filter and filter.include?(entity_reference) - entity_value = DocType::DEFAULT_ENTITIES[ entity_reference ] - re = /&#{entity_reference};/ - rv.gsub!( re, entity_value.value ) if entity_value - end - end + entity_value = DocType::DEFAULT_ENTITIES[ ref[1...-1] ] + entity_value ? entity_value.value : ref end - rv.gsub!( /&/, '&' ) - end - rv + } end end end Index: lib/rexml/formatters/pretty.rb =================================================================== --- lib/rexml/formatters/pretty.rb (revision 14825) +++ lib/rexml/formatters/pretty.rb (revision 14826) @@ -31,6 +31,7 @@ @level = 0 @ie_hack = ie_hack @width = 80 + @compact = false end protected Index: lib/rexml/encoding.rb =================================================================== --- lib/rexml/encoding.rb (revision 14825) +++ lib/rexml/encoding.rb (revision 14826) @@ -56,14 +56,14 @@ def check_encoding str # We have to recognize UTF-16, LSB UTF-16, and UTF-8 - if str[0] == 0xfe && str[1] == 0xff + if str[0,2] == "\xfe\xff" str[0,2] = "" return UTF_16 - elsif str[0] == 0xff && str[1] == 0xfe + elsif str[0,2] == "\xff\xfe" str[0,2] = "" return UNILE end - str =~ /^\s*<\?xml\s+version\s*=\s*(['"]).*?\1\s+encoding\s*=\s*(["'])(.*?)\2/um + str =~ /^\s*<\?xml\s+version\s*=\s*(['"]).*?\1\s+encoding\s*=\s*(["'])(.*?)\2/m return $3.upcase if $3 return UTF_8 end Index: lib/rexml/xpath_parser.rb =================================================================== --- lib/rexml/xpath_parser.rb (revision 14825) +++ lib/rexml/xpath_parser.rb (revision 14826) @@ -222,7 +222,7 @@ when :child new_nodeset = [] nt = nil - for node in nodeset + nodeset.each do |node| nt = node.node_type new_nodeset += node.children if nt == :element or nt == :document end @@ -266,7 +266,7 @@ when :ancestor new_nodeset = [] - for node in nodeset + nodeset.each do |node| while node.parent node = node.parent new_nodeset << node unless new_nodeset.include? node @@ -277,7 +277,7 @@ when :ancestor_or_self new_nodeset = [] - for node in nodeset + nodeset.each do |node| if node.node_type == :element new_nodeset << node while ( node.parent ) @@ -341,7 +341,7 @@ when :descendant results = [] nt = nil - for node in nodeset + nodeset.each do |node| nt = node.node_type results += expr( path_stack.dclone.unshift( :descendant_or_self ), node.children ) if nt == :element or nt == :document @@ -376,7 +376,7 @@ when :preceding new_nodeset = [] - for node in nodeset + nodeset.each do |node| new_nodeset += preceding( node ) end #puts "NEW NODESET => #{new_nodeset.inspect}" @@ -385,7 +385,7 @@ when :following new_nodeset = [] - for node in nodeset + nodeset.each do |node| new_nodeset += following( node ) end nodeset = new_nodeset @@ -395,7 +395,7 @@ #puts "In :namespace" new_nodeset = [] prefix = path_stack.shift - for node in nodeset + nodeset.each do |node| if (node.node_type == :element or node.node_type == :attribute) if @namespaces namespaces = @namespaces -- ML: ruby-changes@q... Info: http://www.atdot.net/~ko1/quickml