ruby-changes:7486
From: akr <ko1@a...>
Date: Mon, 1 Sep 2008 02:35:27 +0900 (JST)
Subject: [ruby-changes:7486] Ruby:r19006 (trunk): * tool/transcode-tblgen.rb (transcode_generated_code): defined for
akr 2008-09-01 02:35:00 +0900 (Mon, 01 Sep 2008) New Revision: 19006 http://svn.ruby-lang.org/cgi-bin/viewvc.cgi?view=rev&revision=19006 Log: * tool/transcode-tblgen.rb (transcode_generated_code): defined for generating table at once. (transcode_tblgen): returns an empty string. (transcode_generate_node): ditto. * enc/trans/newline.trans: use transcode_generated_code. * enc/trans/iso2022.trans: ditto. * enc/trans/single_byte.trans: ditto. * enc/trans/utf_16_32.trans: ditto. * enc/trans/japanese.trans: ditto. * enc/trans/korean.trans: ditto. Modified files: trunk/ChangeLog trunk/enc/trans/iso2022.trans trunk/enc/trans/japanese.trans trunk/enc/trans/korean.trans trunk/enc/trans/newline.trans trunk/enc/trans/single_byte.trans trunk/enc/trans/utf_16_32.trans trunk/tool/transcode-tblgen.rb Index: ChangeLog =================================================================== --- ChangeLog (revision 19005) +++ ChangeLog (revision 19006) @@ -1,3 +1,22 @@ +Mon Sep 1 02:31:16 2008 Tanaka Akira <akr@f...> + + * tool/transcode-tblgen.rb (transcode_generated_code): defined for + generating table at once. + (transcode_tblgen): returns an empty string. + (transcode_generate_node): ditto. + + * enc/trans/newline.trans: use transcode_generated_code. + + * enc/trans/iso2022.trans: ditto. + + * enc/trans/single_byte.trans: ditto. + + * enc/trans/utf_16_32.trans: ditto. + + * enc/trans/japanese.trans: ditto. + + * enc/trans/korean.trans: ditto. + Mon Sep 1 02:10:03 2008 Tanaka Akira <akr@f...> * tool/transcode-tblgen.rb (citrus_decode_mapsrc): print logging Index: enc/trans/iso2022.trans =================================================================== --- enc/trans/iso2022.trans (revision 19005) +++ enc/trans/iso2022.trans (revision 19006) @@ -10,10 +10,22 @@ map_jisx0208_rest = {} map_jisx0208_rest["{21-7e}"] = :func_so + + transcode_generate_node(ActionMap.parse(map), "iso2022jp_to_eucjp") + transcode_generate_node(ActionMap.parse(map_jisx0208_rest), "iso2022jp_to_eucjp_jisx0208_rest") + + map_eucjp = { + "{0e,0f,1b}" => :undef, + "{00-0d,10-1a,1c-7f}" => :func_so, + "{a1-fe}{a1-fe}" => :func_so, + "8e{a1-fe}" => :undef, + "8f{a1-fe}{a1-fe}" => :undef, + } + + transcode_generate_node(ActionMap.parse(map_eucjp), "eucjp_to_iso2022jp") %> -<%= transcode_generate_node(ActionMap.parse(map), "iso2022jp_to_eucjp") %> -<%= transcode_generate_node(ActionMap.parse(map_jisx0208_rest), "iso2022jp_to_eucjp_jisx0208_rest") %> +<%= transcode_generated_code %> static VALUE fun_si_iso2022jp_to_eucjp(rb_transcoding* t, const unsigned char* s, size_t l) @@ -65,18 +77,6 @@ NULL, fun_si_iso2022jp_to_eucjp, NULL, fun_so_iso2022jp_to_eucjp }; -<% - map_eucjp = { - "{0e,0f,1b}" => :undef, - "{00-0d,10-1a,1c-7f}" => :func_so, - "{a1-fe}{a1-fe}" => :func_so, - "8e{a1-fe}" => :undef, - "8f{a1-fe}{a1-fe}" => :undef, - } -%> - -<%= transcode_generate_node(ActionMap.parse(map_eucjp), "eucjp_to_iso2022jp") %> - static int fun_so_eucjp_to_iso2022jp(rb_transcoding *t, const unsigned char *s, size_t l, unsigned char *o) { Index: enc/trans/newline.trans =================================================================== --- enc/trans/newline.trans (revision 19005) +++ enc/trans/newline.trans (revision 19006) @@ -3,9 +3,23 @@ <% map_normalize = {} map_normalize["{00-ff}"] = :func_so + + transcode_generate_node(ActionMap.parse(map_normalize), "universal_newline") + + map_crlf = {} + map_crlf["{00-09,0b-ff}"] = :nomap + map_crlf["0a"] = "0d0a" + + transcode_generate_node(ActionMap.parse(map_crlf), "crlf_newline") + + map_cr = {} + map_cr["{00-09,0b-ff}"] = :nomap + map_cr["0a"] = "0d" + + transcode_generate_node(ActionMap.parse(map_cr), "cr_newline") %> -<%= transcode_generate_node(ActionMap.parse(map_normalize), "universal_newline") %> +<%= transcode_generated_code %> static int fun_so_universal_newline(rb_transcoding* t, const unsigned char* s, size_t l, unsigned char* o) @@ -48,14 +62,6 @@ NULL, NULL, NULL, fun_so_universal_newline }; -<% - map_crlf = {} - map_crlf["{00-09,0b-ff}"] = :nomap - map_crlf["0a"] = "0d0a" -%> - -<%= transcode_generate_node(ActionMap.parse(map_crlf), "crlf_newline") %> - static const rb_transcoder rb_crlf_newline = { "", "crlf_newline", &crlf_newline, @@ -66,14 +72,6 @@ NULL, NULL, NULL, NULL }; -<% - map_cr = {} - map_cr["{00-09,0b-ff}"] = :nomap - map_cr["0a"] = "0d" -%> - -<%= transcode_generate_node(ActionMap.parse(map_cr), "cr_newline") %> - static const rb_transcoder rb_cr_newline = { "", "cr_newline", &cr_newline, Index: enc/trans/single_byte.trans =================================================================== --- enc/trans/single_byte.trans (revision 19005) +++ enc/trans/single_byte.trans (revision 19006) @@ -20,14 +20,11 @@ require 'iso-8859-14-tbl' require 'iso-8859-15-tbl' -%> + transcode_tblgen "US-ASCII", "UTF-8", us_ascii_map + transcode_tblgen "UTF-8", "US-ASCII", us_ascii_map + transcode_tblgen "ASCII-8BIT", "UTF-8", us_ascii_map + transcode_tblgen "UTF-8", "ASCII-8BIT", us_ascii_map -<%= transcode_tblgen "US-ASCII", "UTF-8", us_ascii_map %> -<%= transcode_tblgen "UTF-8", "US-ASCII", us_ascii_map %> -<%= transcode_tblgen "ASCII-8BIT", "UTF-8", us_ascii_map %> -<%= transcode_tblgen "UTF-8", "ASCII-8BIT", us_ascii_map %> - -<% def transcode_tblgen_iso8859(name, tbl_to_ucs) tbl_to_ucs = CONTROL1_TO_UCS_TBL + tbl_to_ucs name_ident = name.tr('-','_') @@ -37,22 +34,24 @@ code << transcode_tblgen("UTF-8", name, [["{00-7f}", :nomap], *tbl_to_ucs.map {|a,b| [b,a] }]) code end + + transcode_tblgen_iso8859("ISO-8859-1", ISO_8859_1_TO_UCS_TBL) + transcode_tblgen_iso8859("ISO-8859-2", ISO_8859_2_TO_UCS_TBL) + transcode_tblgen_iso8859("ISO-8859-3", ISO_8859_3_TO_UCS_TBL) + transcode_tblgen_iso8859("ISO-8859-4", ISO_8859_4_TO_UCS_TBL) + transcode_tblgen_iso8859("ISO-8859-5", ISO_8859_5_TO_UCS_TBL) + transcode_tblgen_iso8859("ISO-8859-6", ISO_8859_6_TO_UCS_TBL) + transcode_tblgen_iso8859("ISO-8859-7", ISO_8859_7_TO_UCS_TBL) + transcode_tblgen_iso8859("ISO-8859-8", ISO_8859_8_TO_UCS_TBL) + transcode_tblgen_iso8859("ISO-8859-9", ISO_8859_9_TO_UCS_TBL) + transcode_tblgen_iso8859("ISO-8859-10", ISO_8859_10_TO_UCS_TBL) + transcode_tblgen_iso8859("ISO-8859-11", ISO_8859_11_TO_UCS_TBL) + transcode_tblgen_iso8859("ISO-8859-13", ISO_8859_13_TO_UCS_TBL) + transcode_tblgen_iso8859("ISO-8859-14", ISO_8859_14_TO_UCS_TBL) + transcode_tblgen_iso8859("ISO-8859-15", ISO_8859_15_TO_UCS_TBL) %> -<%= transcode_tblgen_iso8859("ISO-8859-1", ISO_8859_1_TO_UCS_TBL) %> -<%= transcode_tblgen_iso8859("ISO-8859-2", ISO_8859_2_TO_UCS_TBL) %> -<%= transcode_tblgen_iso8859("ISO-8859-3", ISO_8859_3_TO_UCS_TBL) %> -<%= transcode_tblgen_iso8859("ISO-8859-4", ISO_8859_4_TO_UCS_TBL) %> -<%= transcode_tblgen_iso8859("ISO-8859-5", ISO_8859_5_TO_UCS_TBL) %> -<%= transcode_tblgen_iso8859("ISO-8859-6", ISO_8859_6_TO_UCS_TBL) %> -<%= transcode_tblgen_iso8859("ISO-8859-7", ISO_8859_7_TO_UCS_TBL) %> -<%= transcode_tblgen_iso8859("ISO-8859-8", ISO_8859_8_TO_UCS_TBL) %> -<%= transcode_tblgen_iso8859("ISO-8859-9", ISO_8859_9_TO_UCS_TBL) %> -<%= transcode_tblgen_iso8859("ISO-8859-10", ISO_8859_10_TO_UCS_TBL) %> -<%= transcode_tblgen_iso8859("ISO-8859-11", ISO_8859_11_TO_UCS_TBL) %> -<%= transcode_tblgen_iso8859("ISO-8859-13", ISO_8859_13_TO_UCS_TBL) %> -<%= transcode_tblgen_iso8859("ISO-8859-14", ISO_8859_14_TO_UCS_TBL) %> -<%= transcode_tblgen_iso8859("ISO-8859-15", ISO_8859_15_TO_UCS_TBL) %> +<%= transcode_generated_code %> void Init_single_byte(void) Index: enc/trans/utf_16_32.trans =================================================================== --- enc/trans/utf_16_32.trans (revision 19005) +++ enc/trans/utf_16_32.trans (revision 19006) @@ -1,5 +1,42 @@ #include "transcode_data.h" +<% + map = {} + map["{00-ff}{00-d7,e0-ff}0000"] = :func_so + map["{00-ff}{00-ff}{01-10}00"] = :func_so + transcode_generate_node(ActionMap.parse(map), "from_UTF_32LE") + + map = {} + map["{00-d7,e0-ff}{00-ff}"] = :func_so + map["{d8-db}{00-ff}{dc-df}{00-ff}"] = :func_so + transcode_generate_node(ActionMap.parse(map), "from_UTF_16BE") + + map = {} + map["{00-7f}"] = :func_so + map["{c2-df}{80-bf}"] = :func_so + map["e0{a0-bf}{80-bf}"] = :func_so + map["{e1-ec}{80-bf}{80-bf}"] = :func_so + map["ed{80-9f}{80-bf}"] = :func_so + map["{ee-ef}{80-bf}{80-bf}"] = :func_so + map["f0{90-bf}{80-bf}{80-bf}"] = :func_so + map["{f1-f3}{80-bf}{80-bf}{80-bf}"] = :func_so + map["f4{80-8f}{80-bf}{80-bf}"] = :func_so + am = ActionMap.parse(map) + transcode_generate_node(am, "to_UTF_16BE") + + map = {} + map["{00-ff}{00-d7,e0-ff}"] = :func_so + map["{00-ff}{d8-db}{00-ff}{dc-df}"] = :func_so + transcode_generate_node(ActionMap.parse(map), "from_UTF_16LE") + + map = {} + map["0000{00-d7,e0-ff}{00-ff}"] = :func_so + map["00{01-10}{00-ff}{00-ff}"] = :func_so + transcode_generate_node(ActionMap.parse(map), "from_UTF_32BE") +%> + +<%= transcode_generated_code %> + static int fun_so_from_utf_16be(rb_transcoding* t, const unsigned char* s, size_t l, unsigned char* o) { @@ -222,13 +259,6 @@ return 4; } -<%= - map = {} - map["{00-d7,e0-ff}{00-ff}"] = :func_so - map["{d8-db}{00-ff}{dc-df}{00-ff}"] = :func_so - transcode_generate_node(ActionMap.parse(map), "from_UTF_16BE") -%> - static const rb_transcoder rb_from_UTF_16BE = { "UTF-16BE", "UTF-8", &from_UTF_16BE, @@ -239,21 +269,6 @@ NULL, NULL, NULL, &fun_so_from_utf_16be }; -<%= - map = {} - map["{00-7f}"] = :func_so - map["{c2-df}{80-bf}"] = :func_so - map["e0{a0-bf}{80-bf}"] = :func_so - map["{e1-ec}{80-bf}{80-bf}"] = :func_so - map["ed{80-9f}{80-bf}"] = :func_so - map["{ee-ef}{80-bf}{80-bf}"] = :func_so - map["f0{90-bf}{80-bf}{80-bf}"] = :func_so - map["{f1-f3}{80-bf}{80-bf}{80-bf}"] = :func_so - map["f4{80-8f}{80-bf}{80-bf}"] = :func_so - am = ActionMap.parse(map) - transcode_generate_node(am, "to_UTF_16BE") -%> - static const rb_transcoder rb_to_UTF_16BE = { "UTF-8", "UTF-16BE", &to_UTF_16BE, @@ -264,13 +279,6 @@ NULL, NULL, NULL, &fun_so_to_utf_16be }; -<%= - map = {} - map["{00-ff}{00-d7,e0-ff}"] = :func_so - map["{00-ff}{d8-db}{00-ff}{dc-df}"] = :func_so - transcode_generate_node(ActionMap.parse(map), "from_UTF_16LE") -%> - static const rb_transcoder rb_from_UTF_16LE = { "UTF-16LE", "UTF-8", &from_UTF_16LE, @@ -291,13 +299,6 @@ NULL, NULL, NULL, &fun_so_to_utf_16le }; -<%= - map = {} - map["0000{00-d7,e0-ff}{00-ff}"] = :func_so - map["00{01-10}{00-ff}{00-ff}"] = :func_so - transcode_generate_node(ActionMap.parse(map), "from_UTF_32BE") -%> - static const rb_transcoder rb_from_UTF_32BE = { "UTF-32BE", "UTF-8", &from_UTF_32BE, @@ -318,13 +319,6 @@ NULL, NULL, NULL, &fun_so_to_utf_32be }; -<%= - map = {} - map["{00-ff}{00-d7,e0-ff}0000"] = :func_so - map["{00-ff}{00-ff}{01-10}00"] = :func_so - transcode_generate_node(ActionMap.parse(map), "from_UTF_32LE") -%> - static const rb_transcoder rb_from_UTF_32LE = { "UTF-32LE", "UTF-8", &from_UTF_32LE, Index: enc/trans/korean.trans =================================================================== --- enc/trans/korean.trans (revision 19005) +++ enc/trans/korean.trans (revision 19006) @@ -3,12 +3,14 @@ <% require "euckr-tbl" require "cp949-tbl" + + transcode_tblgen "UTF-8", "EUC-KR", [["{00-7f}", :nomap], *UCS_TO_EUCKR_TBL] + transcode_tblgen "EUC-KR", "UTF-8", [["{00-7f}", :nomap], *EUCKR_TO_UCS_TBL] + transcode_tblgen "UTF-8", "CP949", [["{00-7f}", :nomap], *UCS_TO_CP949_TBL] + transcode_tblgen "CP949", "UTF-8", [["{00-7f}", :nomap], *CP949_TO_UCS_TBL] %> -<%= transcode_tblgen "UTF-8", "EUC-KR", [["{00-7f}", :nomap], *UCS_TO_EUCKR_TBL] %> -<%= transcode_tblgen "EUC-KR", "UTF-8", [["{00-7f}", :nomap], *EUCKR_TO_UCS_TBL] %> -<%= transcode_tblgen "UTF-8", "CP949", [["{00-7f}", :nomap], *UCS_TO_CP949_TBL] %> -<%= transcode_tblgen "CP949", "UTF-8", [["{00-7f}", :nomap], *CP949_TO_UCS_TBL] %> +<%= transcode_generated_code %> void Init_korean(void) Index: enc/trans/japanese.trans =================================================================== --- enc/trans/japanese.trans (revision 19005) +++ enc/trans/japanese.trans (revision 19006) @@ -1,65 +1,69 @@ #include "transcode_data.h" -<%= transcode_tblgen "Shift_JIS", "UTF-8", [ +<% + transcode_tblgen "Shift_JIS", "UTF-8", [ ["{00-7f}", :nomap], *citrus_decode_mapsrc("mskanji", 1, "JISX0201-KANA/UCS"), *citrus_decode_mapsrc("mskanji", 2, "JISX0208:1990/UCS"), - ] %> -<%= transcode_tblgen "Windows-31J", "UTF-8", [ + ] + transcode_tblgen "Windows-31J", "UTF-8", [ ["{00-7f}", :nomap], *citrus_decode_mapsrc("mskanji", 1, "JISX0201-KANA/UCS"), *citrus_decode_mapsrc("mskanji", 2, "JISX0208VDC:NEC/UCS,CP932VDC:NEC_IBM/UCS,CP932VDC:IBM/UCS,CP932UDA/UCS,JISX0208:MS/UCS"), - ] %> + ] -<%= transcode_tblgen "UTF-8", "Shift_JIS", [ + transcode_tblgen "UTF-8", "Shift_JIS", [ ["{00-7f}", :nomap], *citrus_decode_mapsrc("mskanji", 1, "UCS/JISX0201-KANA"), *citrus_decode_mapsrc("mskanji", 2, "UCS/JISX0208:1990"), - ] %> -<%= transcode_tblgen "UTF-8", "Windows-31J", [ + ] + transcode_tblgen "UTF-8", "Windows-31J", [ ["{00-7f}", :nomap], *citrus_decode_mapsrc("mskanji", 1, "UCS/JISX0201-KANA"), *citrus_decode_mapsrc("mskanji", 2, "UCS/JISX0208VDC:NEC,UCS/CP932VDC:NEC_IBM,UCS/CP932VDC:IBM,UCS/CP932UDA,UCS/JISX0208:MS"), - ] %> + ] -<%= transcode_tblgen "EUC-JP", "UTF-8", [ + transcode_tblgen "EUC-JP", "UTF-8", [ ["{00-7f}", :nomap], *citrus_decode_mapsrc("euc", 0x8080, "JISX0208:1990/UCS"), *citrus_decode_mapsrc("euc", 0x0080, "JISX0201-KANA/UCS"), *citrus_decode_mapsrc("euc", 0x8000, "JISX0212/UCS"), - ] %> -<%= transcode_tblgen "EUC-JP-MS", "UTF-8", [ + ] + transcode_tblgen "EUC-JP-MS", "UTF-8", [ ["{00-7f}", :nomap], *citrus_decode_mapsrc("euc", 0x8080, "JISX0208VDC:NEC/UCS,JISX0208UDC/UCS,JISX0208:MS/UCS"), *citrus_decode_mapsrc("euc", 0x0080, "JISX0201-KANA/UCS"), *citrus_decode_mapsrc("euc", 0x8000, "JISX0212VDC:IBM/UCS,JISX0212UDC/UCS,JISX0212:MS/UCS"), - ] %> -<%= transcode_tblgen "CP51932", "UTF-8", [ + ] + transcode_tblgen "CP51932", "UTF-8", [ ["{00-7f}", :nomap], *citrus_decode_mapsrc("euc", 0x8080, "JISX0208VDC:NEC/UCS,CP932VDC:NEC_IBM/UCS,JISX0208:MS/UCS"), *citrus_decode_mapsrc("euc", 0x0080, "JISX0201-KANA/UCS"), - ] %> + ] -<%= transcode_tblgen "UTF-8", "EUC-JP", [ + transcode_tblgen "UTF-8", "EUC-JP", [ ["{00-7f}", :nomap], *citrus_decode_mapsrc("euc", 0x8080, "UCS/JISX0208:1990"), *citrus_decode_mapsrc("euc", 0x0080, "UCS/JISX0201-KANA"), *citrus_decode_mapsrc("euc", 0x8000, "UCS/JISX0212"), - ] %> -<%= transcode_tblgen "UTF-8", "EUC-JP-MS", [ + ] + transcode_tblgen "UTF-8", "EUC-JP-MS", [ ["{00-7f}", :nomap], *citrus_decode_mapsrc("euc", 0x8080, "UCS/JISX0208VDC:NEC,UCS/JISX0208UDC,UCS/JISX0208:MS"), *citrus_decode_mapsrc("euc", 0x0080, "UCS/JISX0201-KANA"), *citrus_decode_mapsrc("euc", 0x8000, "UCS/JISX0212VDC:IBM,UCS/JISX0212UDC,UCS/JISX0212:MS"), - ] %> -<%= transcode_tblgen "UTF-8", "CP51932", [ + ] + transcode_tblgen "UTF-8", "CP51932", [ ["{00-7f}", :nomap], *citrus_decode_mapsrc("euc", 0x8080, "UCS/JISX0208VDC:NEC,UCS/CP932VDC:NEC_IBM,UCS/JISX0208:MS"), *citrus_decode_mapsrc("euc", 0x0080, "UCS/JISX0201-KANA"), - ] %> + ] +%> +<%= transcode_generated_code %> + void Init_japanese(void) { Index: tool/transcode-tblgen.rb =================================================================== --- tool/transcode-tblgen.rb (revision 19005) +++ tool/transcode-tblgen.rb (revision 19006) @@ -389,6 +389,12 @@ code << generate_lookup_node(name_hint, table) name_hint end + + def gennode(name_hint=nil, valid_encoding=nil) + code = '' + name = generate_node(code, name_hint, valid_encoding) + return name, code + end end def citrus_mskanji_cstomb(csid, index) @@ -529,12 +535,12 @@ valid_encoding = nil end - code = '' - defined_name = am.generate_node(code, name, valid_encoding) + defined_name, code = am.gennode(name, valid_encoding) return defined_name, code, max_input end TRANSCODERS = [] +TRANSCODE_GENERATED_CODE = '' def transcode_tblgen(from, to, map) STDERR.puts "converter from #{from} to #{to}" if VERBOSE_MODE @@ -565,16 +571,21 @@ NULL, NULL, NULL }; End - tree_code + "\n" + transcoder_code + TRANSCODE_GENERATED_CODE << tree_code + "\n" + transcoder_code + '' end def transcode_generate_node(am, name_hint=nil) STDERR.puts "converter for #{name_hint}" if VERBOSE_MODE - code = '' - am.generate_node(code, name_hint) - code + name, code = am.gennode(name_hint) + TRANSCODE_GENERATED_CODE << code + '' end +def transcode_generated_code + TRANSCODE_GENERATED_CODE +end + def transcode_register_code code = '' TRANSCODERS.each {|transcoder_name| -- ML: ruby-changes@q... Info: http://www.atdot.net/~ko1/quickml/