ruby-changes:15065
From: akr <ko1@a...>
Date: Mon, 15 Mar 2010 21:25:37 +0900 (JST)
Subject: [ruby-changes:15065] Ruby:r26941 (trunk): * tool/transcode-tblgen.rb (transcode_tblgen): add valid_encoding
akr 2010-03-15 21:25:20 +0900 (Mon, 15 Mar 2010) New Revision: 26941 http://svn.ruby-lang.org/cgi-bin/viewvc.cgi?view=rev&revision=26941 Log: * tool/transcode-tblgen.rb (transcode_tblgen): add valid_encoding optional argument. * enc/trans/single_byte.trans use valid_encoding argument for transcode_tblgen. * enc/trans/chinese.trans: ditto. Modified files: trunk/ChangeLog trunk/enc/trans/chinese.trans trunk/enc/trans/single_byte.trans trunk/tool/transcode-tblgen.rb Index: ChangeLog =================================================================== --- ChangeLog (revision 26940) +++ ChangeLog (revision 26941) @@ -1,3 +1,13 @@ +Mon Mar 15 21:22:49 2010 Tanaka Akira <akr@f...> + + * tool/transcode-tblgen.rb (transcode_tblgen): add valid_encoding + optional argument. + + * enc/trans/single_byte.trans use valid_encoding argument for + transcode_tblgen. + + * enc/trans/chinese.trans: ditto. + Mon Mar 15 18:33:36 2010 Nobuyoshi Nakada <nobu@r...> * random.c (default_rand): removed initial buffer. Index: enc/trans/chinese.trans =================================================================== --- enc/trans/chinese.trans (revision 26940) +++ enc/trans/chinese.trans (revision 26941) @@ -1,16 +1,18 @@ #include "transcode_data.h" <% - set_valid_byte_pattern 'GB2312', 'EUC-KR' - set_valid_byte_pattern 'GB12345', 'EUC-KR' + gb2312_valid_byte_pattern = ValidEncoding['EUC-KR'] + gb12345_valid_byte_pattern = ValidEncoding['EUC-KR'] transcode_tblgen "GB2312", "UTF-8", [["{00-7f}", :nomap]] + - citrus_decode_mapsrc("euc", 0x8080, "GB2312/UCS") + citrus_decode_mapsrc("euc", 0x8080, "GB2312/UCS"), + gb2312_valid_byte_pattern transcode_tblgen "GB12345", "UTF-8", [["{00-7f}", :nomap]] + - citrus_decode_mapsrc("euc", 0x8080, "GB12345/UCS") + citrus_decode_mapsrc("euc", 0x8080, "GB12345/UCS"), + gb12345_valid_byte_pattern transcode_tblgen "UTF-8", "GB2312", [["{00-7f}", :nomap]] + Index: enc/trans/single_byte.trans =================================================================== --- enc/trans/single_byte.trans (revision 26940) +++ enc/trans/single_byte.trans (revision 26941) @@ -22,9 +22,8 @@ require(name.downcase + "-tbl") control1_if_needed = (name =~ /^ISO-8859/) ? CONTROL1_TO_UCS_TBL : [] tbl_to_ucs = control1_if_needed + eval(name.gsub(/-/, '_') + "_TO_UCS_TBL") - set_valid_byte_pattern(name, '1byte') code = '' - code << transcode_tblgen(name, "UTF-8", [["{00-7f}", :nomap], *tbl_to_ucs.reject {|a, b| a.length != 2 }]) + code << transcode_tblgen(name, "UTF-8", [["{00-7f}", :nomap], *tbl_to_ucs.reject {|a, b| a.length != 2 }], '{00-ff}') code << "\n" code << transcode_tblgen("UTF-8", name, [["{00-7f}", :nomap], *tbl_to_ucs.map {|a,b| [b,a] }]) code Index: tool/transcode-tblgen.rb =================================================================== --- tool/transcode-tblgen.rb (revision 26940) +++ tool/transcode-tblgen.rb (revision 26941) @@ -165,7 +165,7 @@ end def self.build_tree(rects) - expand("", rects) {|actions| + expand("", rects) {|prefix, actions| unambiguous_action(actions) } end @@ -186,12 +186,12 @@ all_rects.concat rects.map {|min, max, action| [min, max, [i, action]] } } - tree = expand("", all_rects) {|actions| + tree = expand("", all_rects) {|prefix, actions| args = Array.new(rects_list.length) { [] } actions.each {|i, action| args[i] << action } - yield(args) + yield(prefix, *args) } self.new("", tree) @@ -213,7 +213,7 @@ end if has_empty actions = rects.map {|min, max, action| action }.uniq - act = block.call(actions) + act = block.call(prefix, actions) tree = Action.new(act) else tree = [] @@ -649,18 +649,22 @@ r end -def transcode_compile_tree(name, from, map) +def transcode_compile_tree(name, from, map, valid_encoding=nil) map = encode_utf8(map) h = {} map.each {|k, v| h[k] = v unless h[k] # use first mapping } - if valid_encoding = ValidEncoding[from] + valid_encoding = ValidEncoding[from] if valid_encoding == nil + if valid_encoding rects = ActionMap.parse_to_rects(h) undef_rects = ActionMap.parse_to_rects(valid_encoding => :undef) - am = ActionMap.merge(rects, undef_rects) {|a1, a2| - a1 = a1.empty? ? nil : ActionMap.unambiguous_action(a1) - a2 = a2.empty? ? nil : ActionMap.unambiguous_action(a2) + am = ActionMap.merge(rects, undef_rects) {|prefix, as1, as2| + a1 = as1.empty? ? nil : ActionMap.unambiguous_action(as1) + a2 = as2.empty? ? nil : ActionMap.unambiguous_action(as2) + if !a2 + raise "invalid mapping: #{prefix}" + end a1 || a2 } else @@ -675,7 +679,7 @@ TRANSCODERS = [] TRANSCODE_GENERATED_TRANSCODER_CODE = '' -def transcode_tbl_only(from, to, map) +def transcode_tbl_only(from, to, map, valid_encoding=nil) if VERBOSE_MODE if from.empty? || to.empty? STDERR.puts "converter for #{from.empty? ? to : from}" @@ -692,12 +696,12 @@ else tree_name = "from_#{id_from}_to_#{id_to}" end - real_tree_name, max_input = transcode_compile_tree(tree_name, from, map) + real_tree_name, max_input = transcode_compile_tree(tree_name, from, map, valid_encoding) return map, tree_name, real_tree_name, max_input end -def transcode_tblgen(from, to, map) - map, tree_name, real_tree_name, max_input = transcode_tbl_only(from, to, map) +def transcode_tblgen(from, to, map, valid_encoding=nil) + map, tree_name, real_tree_name, max_input = transcode_tbl_only(from, to, map, valid_encoding) transcoder_name = "rb_#{tree_name}" TRANSCODERS << transcoder_name input_unit_length = UnitLength[from] -- ML: ruby-changes@q... Info: http://www.atdot.net/~ko1/quickml/