ruby-changes:34191
From: nobu <ko1@a...>
Date: Sat, 31 May 2014 08:58:35 +0900 (JST)
Subject: [ruby-changes:34191] nobu:r46272 (trunk): case-folding.rb: perfect hash for case unfolding3
nobu 2014-05-31 08:58:24 +0900 (Sat, 31 May 2014) New Revision: 46272 http://svn.ruby-lang.org/cgi-bin/viewvc.cgi?revision=46272&view=revision Log: case-folding.rb: perfect hash for case unfolding3 * enc/unicode/case-folding.rb (lookup_hash): make perfect hash to lookup case unfolding table 3. Modified files: trunk/ChangeLog trunk/enc/unicode/case-folding.rb trunk/enc/unicode/casefold.h trunk/enc/unicode.c Index: ChangeLog =================================================================== --- ChangeLog (revision 46271) +++ ChangeLog (revision 46272) @@ -1,4 +1,7 @@ https://github.com/ruby/ruby/blob/trunk/ChangeLog#L1 -Sat May 31 08:58:12 2014 Nobuyoshi Nakada <nobu@r...> +Sat May 31 08:58:22 2014 Nobuyoshi Nakada <nobu@r...> + + * enc/unicode/case-folding.rb (lookup_hash): make perfect hash to + lookup case unfolding table 3. * enc/unicode/case-folding.rb (lookup_hash): make perfect hash to lookup case unfolding table 2. Index: enc/unicode/casefold.h =================================================================== --- enc/unicode/casefold.h (revision 46271) +++ enc/unicode/casefold.h (revision 46272) @@ -5176,4 +5176,97 @@ static const CaseUnfold_13_Type CaseUnfo https://github.com/ruby/ruby/blob/trunk/enc/unicode/casefold.h#L5176 {{0x03c9, 0x0342, 0x03b9}, {1, {0x1ff7}}}, }; -#define UNFOLD3_TABLE_SIZE 23 +/* C code produced by gperf version 3.0.4 */ +/* Command-line: gperf -7 -k1,2,3,4,5,6,7,8,9 -F,-1 -c -j1 -i1 -t -T -E -C -H onigenc_unicode_CaseUnfold_13_hash -N onigenc_unicode_CaseUnfold_13_lookup */ + +/* maximum key range = 20, duplicates = 0 */ + +#if (defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L) || defined(__cplusplus) || defined(__GNUC_STDC_INLINE__) +inline +#elif defined(__GNUC__) +__inline +#endif +/*ARGSUSED*/ +static unsigned int +onigenc_unicode_CaseUnfold_13_hash(const OnigCodePoint *codes) +{ + static const unsigned char asso_values[] = + { + 7, 4, 47, 47, 47, 47, 1, 1, 2, 47, + 47, 47, 47, 47, 47, 47, 47, 47, 47, 1, + 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, + 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, + 47, 47, 47, 47, 47, 47, 47, 47, 47, 11, + 47, 47, 47, 47, 47, 10, 47, 2, 47, 47, + 47, 47, 47, 47, 47, 47, 1, 47, 47, 1, + 47, 47, 47, 9, 47, 47, 47, 47, 47, 47, + 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, + 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, + 47, 47, 1, 47, 47, 2, 47, 47, 1, 47, + 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, + 47, 47, 47, 47, 47, 47, 47, 47 + }; + return asso_values[bits_at(codes, 8)] + asso_values[bits_at(codes, 7)] + asso_values[bits_at(codes, 6)] + asso_values[bits_at(codes, 5)] + asso_values[bits_at(codes, 4)] + asso_values[bits_at(codes, 3)] + asso_values[bits_at(codes, 2)] + asso_values[bits_at(codes, 1)] + asso_values[bits_at(codes, 0)]; +} + +#ifdef __GNUC__ +__inline +#if defined __GNUC_STDC_INLINE__ || defined __GNUC_GNU_INLINE__ +__attribute__ ((__gnu_inline__)) +#endif +#endif +static const CodePointList2 * +onigenc_unicode_CaseUnfold_13_lookup(const OnigCodePoint *codes) +{ + enum + { + MIN_CODE_VALUE = 0x66, + MAX_CODE_VALUE = 0x3c9, + TOTAL_KEYWORDS = 14, + MIN_WORD_LENGTH = 9, + MAX_WORD_LENGTH = 9, + MIN_HASH_VALUE = 27, + MAX_HASH_VALUE = 46 + }; + + static const short wordlist[] = + { + -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, + -1, -1, -1, + /*0x03c5,0x0313,0x0342*/ 12, + /*0x03c5,0x0308,0x0342*/ 9, + /*0x03b9,0x0308,0x0342*/ 6, + /*0x03c5,0x0313,0x0301*/ 11, + /*0x03c5,0x0308,0x0301*/ 8, + /*0x03b9,0x0308,0x0301*/ 5, + /*0x03c5,0x0313,0x0300*/ 10, + /*0x03c5,0x0308,0x0300*/ 7, + /*0x03b9,0x0308,0x0300*/ 4, + /*0x03c9,0x0342,0x03b9*/ 13, + /*0x03b7,0x0342,0x03b9*/ 3, + /*0x03b1,0x0342,0x03b9*/ 2, + -1, -1, -1, -1, -1, -1, + /*0x0066,0x0066,0x006c*/ 1, + /*0x0066,0x0066,0x0069*/ 0 + }; + + if (codes[0] <= MAX_CODE_VALUE && codes[0] >= MIN_CODE_VALUE && + codes[1] <= MAX_CODE_VALUE && codes[1] >= MIN_CODE_VALUE && + codes[2] <= MAX_CODE_VALUE && codes[2] >= MIN_CODE_VALUE) + { + register int key = onigenc_unicode_CaseUnfold_13_hash(codes); + + if (key <= MAX_HASH_VALUE && key >= 0) + { + register short s = wordlist[key]; + + if (s >= 0 && code3_equal(codes, CaseUnfold_13_Table[s].from)) + return &CaseUnfold_13_Table[s].to; + } + } + return 0; +} + Index: enc/unicode/case-folding.rb =================================================================== --- enc/unicode/case-folding.rb (revision 46271) +++ enc/unicode/case-folding.rb (revision 46272) @@ -163,11 +163,8 @@ class CaseFolding https://github.com/ruby/ruby/blob/trunk/enc/unicode/case-folding.rb#L163 # CaseUnfold_13 name = "CaseUnfold_13" - print_table(dest, name, name=>unfold[2]) - - # table sizes - unfold3_table_size = unfold[2].size - dest.printf("#define UNFOLD3_TABLE_SIZE\t%d\n", (unfold3_table_size * 1.7)) + data = print_table(dest, name, name=>unfold[2]) + dest.print lookup_hash(name, "CodePointList2", data) end def self.load(*args) Index: enc/unicode.c =================================================================== --- enc/unicode.c (revision 46271) +++ enc/unicode.c (revision 46272) @@ -128,6 +128,15 @@ code2_equal(const OnigCodePoint *x, cons https://github.com/ruby/ruby/blob/trunk/enc/unicode.c#L128 return 1; } +static int +code3_equal(const OnigCodePoint *x, const OnigCodePoint *y) +{ + if (x[0] != y[0]) return 0; + if (x[1] != y[1]) return 0; + if (x[2] != y[2]) return 0; + return 1; +} + #include "enc/unicode/casefold.h" #include "enc/unicode/name2ctype.h" @@ -211,45 +220,12 @@ onigenc_unicode_property_name_to_ctype(O https://github.com/ruby/ruby/blob/trunk/enc/unicode.c#L220 } -static int -code3_cmp(st_data_t x0, st_data_t y0) -{ - const OnigCodePoint *x = (const OnigCodePoint *)x0; - const OnigCodePoint *y = (const OnigCodePoint *)y0; - if (x[0] == y[0] && x[1] == y[1] && x[2] == y[2]) return 0; - return 1; -} - -static st_index_t -code3_hash(st_data_t x0) -{ - const OnigCodePoint *x = (const OnigCodePoint *)x0; - return (st_index_t )(x[0] + x[1] + x[2]); -} - -static const struct st_hash_type type_code3_hash = { - code3_cmp, - code3_hash, -}; - - -static st_table* Unfold3Table; static int CaseFoldInited = 0; static int init_case_fold_table(void) { - int i; - THREAD_ATOMIC_START; - Unfold3Table = st_init_table_with_size(&type_code3_hash, UNFOLD3_TABLE_SIZE); - if (ONIG_IS_NULL(Unfold3Table)) return ONIGERR_MEMORY; - - for (i = 0; i < numberof(CaseUnfold_13); i++) { - const CaseUnfold_13_Type *p3 = &CaseUnfold_13[i]; - st_add_direct(Unfold3Table, (st_data_t )p3->from, (st_data_t )(&p3->to)); - } - CaseFoldInited = 1; THREAD_ATOMIC_END; return 0; @@ -258,17 +234,7 @@ static int init_case_fold_table(void) https://github.com/ruby/ruby/blob/trunk/enc/unicode.c#L234 #define onigenc_unicode_fold_lookup onigenc_unicode_CaseFold_11_lookup #define onigenc_unicode_unfold1_lookup onigenc_unicode_CaseUnfold_11_lookup #define onigenc_unicode_unfold2_lookup onigenc_unicode_CaseUnfold_12_lookup - - -static inline const CodePointList2 * -onigenc_unicode_unfold3_lookup(const OnigCodePoint *code) -{ - st_data_t to; - if (onig_st_lookup(Unfold3Table, (st_data_t )code, &to) != 0) { - return (const CodePointList2 *)to; - } - return 0; -} +#define onigenc_unicode_unfold3_lookup onigenc_unicode_CaseUnfold_13_lookup extern int onigenc_unicode_mbc_case_fold(OnigEncoding enc, -- ML: ruby-changes@q... Info: http://www.atdot.net/~ko1/quickml/