ruby-changes:34185
From: nobu <ko1@a...>
Date: Sat, 31 May 2014 08:50:01 +0900 (JST)
Subject: [ruby-changes:34185] nobu:r46266 (trunk): case-folding.rb: conversion script
nobu 2014-05-31 08:49:54 +0900 (Sat, 31 May 2014) New Revision: 46266 http://svn.ruby-lang.org/cgi-bin/viewvc.cgi?revision=46266&view=revision Log: case-folding.rb: conversion script * enc/unicode/case-folding.rb: script to convert CaseFolding.txt, tranlated from CaseFolding.py. Added files: trunk/enc/unicode/case-folding.rb Modified files: trunk/ChangeLog Index: ChangeLog =================================================================== --- ChangeLog (revision 46265) +++ ChangeLog (revision 46266) @@ -1,3 +1,8 @@ https://github.com/ruby/ruby/blob/trunk/ChangeLog#L1 +Sat May 31 08:49:52 2014 Nobuyoshi Nakada <nobu@r...> + + * enc/unicode/case-folding.rb: script to convert CaseFolding.txt, + tranlated from CaseFolding.py. + Sat May 31 08:31:41 2014 Tanaka Akira <akr@f...> * test/lib/minitest/unit.rb: Check Tempfile leaks for each test method Index: enc/unicode/case-folding.rb =================================================================== --- enc/unicode/case-folding.rb (revision 0) +++ enc/unicode/case-folding.rb (revision 46266) @@ -0,0 +1,107 @@ https://github.com/ruby/ruby/blob/trunk/enc/unicode/case-folding.rb#L1 +#!/usr/bin/ruby + +# Usage: +# $ wget http://www.unicode.org/Public/UNIDATA/CaseFolding.txt +# $ ruby CaseFolding.rb CaseFolding.txt > ../enc/unicode/casefold.h + + +def hex_seq(v) + v.map {|i| "0x%04x" % i}.join(", ") +end + +def print_table(table, data) + print("static const #{table}[] = {\n") + for k, v in data.sort + if Array === k and k.length > 1 + sk = "{#{hex_seq(k)}}" + else + sk = "0x%04x" % k + end + print(" {#{sk}, {#{v.length}, {#{hex_seq(v)}}}},\n") + end + print("};\n\n") +end + +def print_case_folding_data(filename) + pattern = /([0-9A-F]{4,6}); ([CFT]); ([0-9A-F]{4,6})(?: ([0-9A-F]{4,6}))?(?: ([0-9A-F]{4,6}))?;/ + + fold = {} + unfold = [{}, {}, {}] + turkic = [] + + IO.foreach(filename) do |line| + next unless res = pattern.match(line) + ch_from = res[1].to_i(16) + ch_to = [] + + if res[2] == 'T' + # Turkic case folding + turkic << ch_from + next + end + + # store folding data + (3..6).each do |i| + if res[i] + ch_to << res[i].to_i(16) + end + end + fold[ch_from] = ch_to + + # store unfolding data + i = ch_to.length - 1 + (unfold[i][ch_to] ||= []) << ch_from + end + + # move locale dependent data to (un)fold_locale + fold_locale = {} + unfold_locale = [{}, {}] + for ch_from in turkic + key = fold[ch_from] + i = key.length - 1 + unfold_locale[i][i == 0 ? key[0] : key] = unfold[i].delete(key) + fold_locale[ch_from] = fold.delete(ch_from) + end + + # print the header + print("/* DO NOT EDIT THIS FILE. */\n") + print("/* Generated by tool/CaseFolding.py */\n\n") + + # print folding data + + # CaseFold + print_table("CaseFold_11_Type CaseFold", fold) + + # CaseFold_Locale + print_table("CaseFold_11_Type CaseFold_Locale", fold_locale) + + # print unfolding data + + # CaseUnfold_11 + print_table("CaseUnfold_11_Type CaseUnfold_11", unfold[0]) + + # CaseUnfold_11_Locale + print_table("CaseUnfold_11_Type CaseUnfold_11_Locale", unfold_locale[0]) + + # CaseUnfold_12 + print_table("CaseUnfold_12_Type CaseUnfold_12", unfold[1]) + + # CaseUnfold_12_Locale + print_table("CaseUnfold_12_Type CaseUnfold_12_Locale", unfold_locale[1]) + + # CaseUnfold_13 + print_table("CaseUnfold_13_Type CaseUnfold_13", unfold[2]) + + # table sizes + fold_table_size = fold.size + fold_locale.size + printf("#define FOLD_TABLE_SIZE\t\t%d\n", (fold_table_size * 1.2)) + unfold1_table_size = unfold[0].size + unfold_locale[0].size + printf("#define UNFOLD1_TABLE_SIZE\t%d\n", (unfold1_table_size * 1.2)) + unfold2_table_size = unfold[1].size + unfold_locale[1].size + printf("#define UNFOLD2_TABLE_SIZE\t%d\n", (unfold2_table_size * 1.5)) + unfold3_table_size = unfold[2].size + printf("#define UNFOLD3_TABLE_SIZE\t%d\n", (unfold3_table_size * 1.7)) +end + +filename = ARGV[0] || 'CaseFolding.txt' +print_case_folding_data(filename) Property changes on: enc/unicode/case-folding.rb ___________________________________________________________________ Added: svn:eol-style + LF Added: svn:executable + * -- ML: ruby-changes@q... Info: http://www.atdot.net/~ko1/quickml/