ruby-changes:2325
From: ko1@a...
Date: 4 Nov 2007 13:59:57 +0900
Subject: [ruby-changes:2325] ser - Ruby:r13816 (trunk): Fixes ticket:110 (more UTF-16 problems)
ser 2007-11-04 13:52:08 +0900 (Sun, 04 Nov 2007) New Revision: 13816 Modified directories: trunk/lib/rexml/ Modified files: trunk/lib/rexml/encoding.rb trunk/lib/rexml/parsers/baseparser.rb trunk/lib/rexml/parsers/treeparser.rb trunk/lib/rexml/source.rb Log: Fixes ticket:110 (more UTF-16 problems) Missing include for UndefinedNamespaceException was causing errors in some cases. Modified: trunk/lib/rexml/ http://svn.ruby-lang.org/cgi-bin/viewvc.cgi/trunk/lib/rexml/parsers/treeparser.rb?r1=13816&r2=13815 http://svn.ruby-lang.org/cgi-bin/viewvc.cgi/trunk/lib/rexml?r1=13816&r2=13815 http://svn.ruby-lang.org/cgi-bin/viewvc.cgi/trunk/lib/rexml/parsers/baseparser.rb?r1=13816&r2=13815 http://svn.ruby-lang.org/cgi-bin/viewvc.cgi/trunk/lib/rexml/source.rb?r1=13816&r2=13815 http://svn.ruby-lang.org/cgi-bin/viewvc.cgi/trunk/lib/rexml/encoding.rb?r1=13816&r2=13815 Index: lib/rexml/parsers/baseparser.rb =================================================================== --- lib/rexml/parsers/baseparser.rb (revision 13815) +++ lib/rexml/parsers/baseparser.rb (revision 13816) @@ -1,4 +1,5 @@ require 'rexml/parseexception' +require 'rexml/undefinednamespaceexception' require 'rexml/source' require 'set' @@ -191,6 +192,7 @@ end return [ :end_document ] if empty? return @stack.shift if @stack.size > 0 + #STDERR.puts @source.encoding @source.read if @source.buffer.size<2 #STDERR.puts "BUFFER = #{@source.buffer.inspect}" if @document_status == nil Index: lib/rexml/parsers/treeparser.rb =================================================================== --- lib/rexml/parsers/treeparser.rb (revision 13815) +++ lib/rexml/parsers/treeparser.rb (revision 13816) @@ -1,4 +1,5 @@ require 'rexml/validation/validationexception' +require 'rexml/undefinednamespaceexception' module REXML module Parsers Index: lib/rexml/source.rb =================================================================== --- lib/rexml/source.rb (revision 13815) +++ lib/rexml/source.rb (revision 13816) @@ -135,6 +135,7 @@ def initialize(arg, block_size=500, encoding=nil) @er_source = @source = arg @to_utf = false + # Determining the encoding is a deceptively difficult issue to resolve. # First, we check the first two bytes for UTF-16. Then we # assume that the encoding is at least ASCII enough for the '>', and @@ -146,13 +147,16 @@ str = @source.read( 2 ) if encoding self.encoding = encoding - elsif /\A(?:\xfe\xff|\xff\xfe)/n =~ str - self.encoding = check_encoding( str ) - elsif (0xef == str[0] && 0xbb == str[1]) + elsif 0xfe == str[0] && 0xff == str[1] + @line_break = "\000>" + elsif 0xff == str[0] && 0xfe == str[1] + @line_break = ">\000" + elsif 0xef == str[0] && 0xbb == str[1] str += @source.read(1) str = '' if (0xbf == str[2]) + @line_break = ">" else - @line_break = '>' + @line_break = ">" end super str+@s...( @line_break ) end Index: lib/rexml/encoding.rb =================================================================== --- lib/rexml/encoding.rb (revision 13815) +++ lib/rexml/encoding.rb (revision 13816) @@ -56,8 +56,13 @@ def check_encoding str # We have to recognize UTF-16, LSB UTF-16, and UTF-8 - return UTF_16 if /\A\xfe\xff/n =~ str - return UNILE if /\A\xff\xfe/n =~ str + if str[0] == 0xfe && str[1] == 0xff + str[0,2] = "" + return UTF_16 + elsif str[0] == 0xff && str[1] == 0xfe + str[0,2] = "" + return UNILE + end str =~ /^\s*<\?xml\s+version\s*=\s*(['"]).*?\1\s+encoding\s*=\s*(["'])(.*?)\2/um return $3.upcase if $3 return UTF_8 Property changes on: lib/rexml ___________________________________________________________________ Name: svk:merge - 3a3bbbf4-582e-0410-a78b-8bf2211dae1f:/local/rexml-trunk/src/rexml:1287 3a3bbbf4-582e-0410-a78b-8bf2211dae1f:/local/rexml/trunk/src/rexml:1356 3a3bbbf4-582e-0410-a78b-8bf2211dae1f:/local/ruby-1.8.6:1304 3a3bbbf4-582e-0410-a78b-8bf2211dae1f:/local/ruby-trunk:1368 877f57f0-f5bd-0310-8c13-bb9e5bdefd87:/branches/3.1.7/src/rexml:1274 + 3a3bbbf4-582e-0410-a78b-8bf2211dae1f:/local/rexml-trunk/src/rexml:1287 3a3bbbf4-582e-0410-a78b-8bf2211dae1f:/local/rexml/trunk/src/rexml:1433 3a3bbbf4-582e-0410-a78b-8bf2211dae1f:/local/ruby-1.8.6:1304 3a3bbbf4-582e-0410-a78b-8bf2211dae1f:/local/ruby-trunk:1435 877f57f0-f5bd-0310-8c13-bb9e5bdefd87:/branches/3.1.7/src/rexml:1274 -- ML: ruby-changes@q... Info: http://www.atdot.net/~ko1/quickml