[前][次][番号順一覧][スレッド一覧]

ruby-changes:15065

From: akr <ko1@a...>
Date: Mon, 15 Mar 2010 21:25:37 +0900 (JST)
Subject: [ruby-changes:15065] Ruby:r26941 (trunk): * tool/transcode-tblgen.rb (transcode_tblgen): add valid_encoding

akr	2010-03-15 21:25:20 +0900 (Mon, 15 Mar 2010)

  New Revision: 26941

  http://svn.ruby-lang.org/cgi-bin/viewvc.cgi?view=rev&revision=26941

  Log:
    * tool/transcode-tblgen.rb (transcode_tblgen): add valid_encoding
      optional argument.
    
    * enc/trans/single_byte.trans use valid_encoding argument for
      transcode_tblgen.
    
    * enc/trans/chinese.trans: ditto.

  Modified files:
    trunk/ChangeLog
    trunk/enc/trans/chinese.trans
    trunk/enc/trans/single_byte.trans
    trunk/tool/transcode-tblgen.rb

Index: ChangeLog
===================================================================
--- ChangeLog	(revision 26940)
+++ ChangeLog	(revision 26941)
@@ -1,3 +1,13 @@
+Mon Mar 15 21:22:49 2010  Tanaka Akira  <akr@f...>
+
+	* tool/transcode-tblgen.rb (transcode_tblgen): add valid_encoding
+	  optional argument.
+
+	* enc/trans/single_byte.trans use valid_encoding argument for
+	  transcode_tblgen.
+
+	* enc/trans/chinese.trans: ditto.
+
 Mon Mar 15 18:33:36 2010  Nobuyoshi Nakada  <nobu@r...>
 
 	* random.c (default_rand): removed initial buffer.
Index: enc/trans/chinese.trans
===================================================================
--- enc/trans/chinese.trans	(revision 26940)
+++ enc/trans/chinese.trans	(revision 26941)
@@ -1,16 +1,18 @@
 #include "transcode_data.h"
 
 <%
-  set_valid_byte_pattern 'GB2312', 'EUC-KR'
-  set_valid_byte_pattern 'GB12345', 'EUC-KR'
+  gb2312_valid_byte_pattern = ValidEncoding['EUC-KR']
+  gb12345_valid_byte_pattern = ValidEncoding['EUC-KR']
 
   transcode_tblgen "GB2312", "UTF-8",
   [["{00-7f}", :nomap]] +
-  citrus_decode_mapsrc("euc", 0x8080, "GB2312/UCS")
+  citrus_decode_mapsrc("euc", 0x8080, "GB2312/UCS"),
+  gb2312_valid_byte_pattern
 
   transcode_tblgen "GB12345", "UTF-8",
   [["{00-7f}", :nomap]] +
-  citrus_decode_mapsrc("euc", 0x8080, "GB12345/UCS")
+  citrus_decode_mapsrc("euc", 0x8080, "GB12345/UCS"),
+  gb12345_valid_byte_pattern
 
   transcode_tblgen "UTF-8", "GB2312",
   [["{00-7f}", :nomap]] +
Index: enc/trans/single_byte.trans
===================================================================
--- enc/trans/single_byte.trans	(revision 26940)
+++ enc/trans/single_byte.trans	(revision 26941)
@@ -22,9 +22,8 @@
     require(name.downcase + "-tbl")
     control1_if_needed = (name =~ /^ISO-8859/) ? CONTROL1_TO_UCS_TBL : []
     tbl_to_ucs = control1_if_needed + eval(name.gsub(/-/, '_') + "_TO_UCS_TBL")
-    set_valid_byte_pattern(name, '1byte')
     code = ''
-    code << transcode_tblgen(name, "UTF-8", [["{00-7f}", :nomap], *tbl_to_ucs.reject {|a, b| a.length != 2 }])
+    code << transcode_tblgen(name, "UTF-8", [["{00-7f}", :nomap], *tbl_to_ucs.reject {|a, b| a.length != 2 }], '{00-ff}')
     code << "\n"
     code << transcode_tblgen("UTF-8", name, [["{00-7f}", :nomap], *tbl_to_ucs.map {|a,b| [b,a] }])
     code
Index: tool/transcode-tblgen.rb
===================================================================
--- tool/transcode-tblgen.rb	(revision 26940)
+++ tool/transcode-tblgen.rb	(revision 26941)
@@ -165,7 +165,7 @@
   end
 
   def self.build_tree(rects)
-    expand("", rects) {|actions|
+    expand("", rects) {|prefix, actions|
       unambiguous_action(actions)
     }
   end
@@ -186,12 +186,12 @@
       all_rects.concat rects.map {|min, max, action| [min, max, [i, action]] }
     }
 
-    tree = expand("", all_rects) {|actions|
+    tree = expand("", all_rects) {|prefix, actions|
       args = Array.new(rects_list.length) { [] }
       actions.each {|i, action|
         args[i] << action
       }
-      yield(args)
+      yield(prefix, *args)
     }
 
     self.new("", tree)
@@ -213,7 +213,7 @@
     end
     if has_empty
       actions = rects.map {|min, max, action| action }.uniq
-      act = block.call(actions)
+      act = block.call(prefix, actions)
       tree = Action.new(act)
     else
       tree = []
@@ -649,18 +649,22 @@
   r
 end
 
-def transcode_compile_tree(name, from, map)
+def transcode_compile_tree(name, from, map, valid_encoding=nil)
   map = encode_utf8(map)
   h = {}
   map.each {|k, v|
     h[k] = v unless h[k] # use first mapping
   }
-  if valid_encoding = ValidEncoding[from]
+  valid_encoding = ValidEncoding[from] if valid_encoding == nil
+  if valid_encoding
     rects = ActionMap.parse_to_rects(h)
     undef_rects = ActionMap.parse_to_rects(valid_encoding => :undef)
-    am = ActionMap.merge(rects, undef_rects) {|a1, a2|
-      a1 = a1.empty? ? nil : ActionMap.unambiguous_action(a1)
-      a2 = a2.empty? ? nil : ActionMap.unambiguous_action(a2)
+    am = ActionMap.merge(rects, undef_rects) {|prefix, as1, as2|
+      a1 = as1.empty? ? nil : ActionMap.unambiguous_action(as1)
+      a2 = as2.empty? ? nil : ActionMap.unambiguous_action(as2)
+      if !a2
+        raise "invalid mapping: #{prefix}"
+      end
       a1 || a2
     }
   else
@@ -675,7 +679,7 @@
 TRANSCODERS = []
 TRANSCODE_GENERATED_TRANSCODER_CODE = ''
 
-def transcode_tbl_only(from, to, map)
+def transcode_tbl_only(from, to, map, valid_encoding=nil)
   if VERBOSE_MODE
     if from.empty? || to.empty?
       STDERR.puts "converter for #{from.empty? ? to : from}"
@@ -692,12 +696,12 @@
   else
     tree_name = "from_#{id_from}_to_#{id_to}"
   end
-  real_tree_name, max_input = transcode_compile_tree(tree_name, from, map)
+  real_tree_name, max_input = transcode_compile_tree(tree_name, from, map, valid_encoding)
   return map, tree_name, real_tree_name, max_input
 end
 
-def transcode_tblgen(from, to, map)
-  map, tree_name, real_tree_name, max_input = transcode_tbl_only(from, to, map)
+def transcode_tblgen(from, to, map, valid_encoding=nil)
+  map, tree_name, real_tree_name, max_input = transcode_tbl_only(from, to, map, valid_encoding)
   transcoder_name = "rb_#{tree_name}"
   TRANSCODERS << transcoder_name
   input_unit_length = UnitLength[from]

--
ML: ruby-changes@q...
Info: http://www.atdot.net/~ko1/quickml/

[前][次][番号順一覧][スレッド一覧]