[前][次][番号順一覧][スレッド一覧]

ruby-changes:34191

From: nobu <ko1@a...>
Date: Sat, 31 May 2014 08:58:35 +0900 (JST)
Subject: [ruby-changes:34191] nobu:r46272 (trunk): case-folding.rb: perfect hash for case unfolding3

nobu	2014-05-31 08:58:24 +0900 (Sat, 31 May 2014)

  New Revision: 46272

  http://svn.ruby-lang.org/cgi-bin/viewvc.cgi?revision=46272&view=revision

  Log:
    case-folding.rb: perfect hash for case unfolding3
    
    * enc/unicode/case-folding.rb (lookup_hash): make perfect hash to
      lookup case unfolding table 3.

  Modified files:
    trunk/ChangeLog
    trunk/enc/unicode/case-folding.rb
    trunk/enc/unicode/casefold.h
    trunk/enc/unicode.c
Index: ChangeLog
===================================================================
--- ChangeLog	(revision 46271)
+++ ChangeLog	(revision 46272)
@@ -1,4 +1,7 @@ https://github.com/ruby/ruby/blob/trunk/ChangeLog#L1
-Sat May 31 08:58:12 2014  Nobuyoshi Nakada  <nobu@r...>
+Sat May 31 08:58:22 2014  Nobuyoshi Nakada  <nobu@r...>
+
+	* enc/unicode/case-folding.rb (lookup_hash): make perfect hash to
+	  lookup case unfolding table 3.
 
 	* enc/unicode/case-folding.rb (lookup_hash): make perfect hash to
 	  lookup case unfolding table 2.
Index: enc/unicode/casefold.h
===================================================================
--- enc/unicode/casefold.h	(revision 46271)
+++ enc/unicode/casefold.h	(revision 46272)
@@ -5176,4 +5176,97 @@ static const CaseUnfold_13_Type CaseUnfo https://github.com/ruby/ruby/blob/trunk/enc/unicode/casefold.h#L5176
   {{0x03c9, 0x0342, 0x03b9}, {1, {0x1ff7}}},
 };
 
-#define UNFOLD3_TABLE_SIZE	23
+/* C code produced by gperf version 3.0.4 */
+/* Command-line: gperf -7 -k1,2,3,4,5,6,7,8,9 -F,-1 -c -j1 -i1 -t -T -E -C -H onigenc_unicode_CaseUnfold_13_hash -N onigenc_unicode_CaseUnfold_13_lookup  */
+
+/* maximum key range = 20, duplicates = 0 */
+
+#if (defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L) || defined(__cplusplus) || defined(__GNUC_STDC_INLINE__)
+inline
+#elif defined(__GNUC__)
+__inline
+#endif
+/*ARGSUSED*/
+static unsigned int
+onigenc_unicode_CaseUnfold_13_hash(const OnigCodePoint *codes)
+{
+  static const unsigned char asso_values[] =
+    {
+       7,  4, 47, 47, 47, 47,  1,  1,  2, 47,
+      47, 47, 47, 47, 47, 47, 47, 47, 47,  1,
+      47, 47, 47, 47, 47, 47, 47, 47, 47, 47,
+      47, 47, 47, 47, 47, 47, 47, 47, 47, 47,
+      47, 47, 47, 47, 47, 47, 47, 47, 47, 11,
+      47, 47, 47, 47, 47, 10, 47,  2, 47, 47,
+      47, 47, 47, 47, 47, 47,  1, 47, 47,  1,
+      47, 47, 47,  9, 47, 47, 47, 47, 47, 47,
+      47, 47, 47, 47, 47, 47, 47, 47, 47, 47,
+      47, 47, 47, 47, 47, 47, 47, 47, 47, 47,
+      47, 47,  1, 47, 47,  2, 47, 47,  1, 47,
+      47, 47, 47, 47, 47, 47, 47, 47, 47, 47,
+      47, 47, 47, 47, 47, 47, 47, 47
+    };
+  return asso_values[bits_at(codes, 8)] + asso_values[bits_at(codes, 7)] + asso_values[bits_at(codes, 6)] + asso_values[bits_at(codes, 5)] + asso_values[bits_at(codes, 4)] + asso_values[bits_at(codes, 3)] + asso_values[bits_at(codes, 2)] + asso_values[bits_at(codes, 1)] + asso_values[bits_at(codes, 0)];
+}
+
+#ifdef __GNUC__
+__inline
+#if defined __GNUC_STDC_INLINE__ || defined __GNUC_GNU_INLINE__
+__attribute__ ((__gnu_inline__))
+#endif
+#endif
+static const CodePointList2 *
+onigenc_unicode_CaseUnfold_13_lookup(const OnigCodePoint *codes)
+{
+  enum
+    {
+      MIN_CODE_VALUE = 0x66,
+      MAX_CODE_VALUE = 0x3c9,
+      TOTAL_KEYWORDS = 14,
+      MIN_WORD_LENGTH = 9,
+      MAX_WORD_LENGTH = 9,
+      MIN_HASH_VALUE = 27,
+      MAX_HASH_VALUE = 46
+    };
+
+  static const short wordlist[] =
+    {
+      -1, -1, -1, -1, -1, -1,
+      -1, -1, -1, -1, -1, -1,
+      -1, -1, -1, -1, -1, -1,
+      -1, -1, -1, -1, -1, -1,
+      -1, -1, -1,
+      /*0x03c5,0x0313,0x0342*/ 12,
+      /*0x03c5,0x0308,0x0342*/ 9,
+      /*0x03b9,0x0308,0x0342*/ 6,
+      /*0x03c5,0x0313,0x0301*/ 11,
+      /*0x03c5,0x0308,0x0301*/ 8,
+      /*0x03b9,0x0308,0x0301*/ 5,
+      /*0x03c5,0x0313,0x0300*/ 10,
+      /*0x03c5,0x0308,0x0300*/ 7,
+      /*0x03b9,0x0308,0x0300*/ 4,
+      /*0x03c9,0x0342,0x03b9*/ 13,
+      /*0x03b7,0x0342,0x03b9*/ 3,
+      /*0x03b1,0x0342,0x03b9*/ 2,
+      -1, -1, -1, -1, -1, -1,
+      /*0x0066,0x0066,0x006c*/ 1,
+      /*0x0066,0x0066,0x0069*/ 0
+    };
+
+  if (codes[0] <= MAX_CODE_VALUE && codes[0] >= MIN_CODE_VALUE &&
+      codes[1] <= MAX_CODE_VALUE && codes[1] >= MIN_CODE_VALUE &&
+      codes[2] <= MAX_CODE_VALUE && codes[2] >= MIN_CODE_VALUE)
+    {
+      register int key = onigenc_unicode_CaseUnfold_13_hash(codes);
+
+      if (key <= MAX_HASH_VALUE && key >= 0)
+        {
+          register short s = wordlist[key];
+
+          if (s >= 0 && code3_equal(codes, CaseUnfold_13_Table[s].from))
+            return &CaseUnfold_13_Table[s].to;
+        }
+    }
+  return 0;
+}
+
Index: enc/unicode/case-folding.rb
===================================================================
--- enc/unicode/case-folding.rb	(revision 46271)
+++ enc/unicode/case-folding.rb	(revision 46272)
@@ -163,11 +163,8 @@ class CaseFolding https://github.com/ruby/ruby/blob/trunk/enc/unicode/case-folding.rb#L163
 
     # CaseUnfold_13
     name = "CaseUnfold_13"
-    print_table(dest, name, name=>unfold[2])
-
-    # table sizes
-    unfold3_table_size = unfold[2].size
-    dest.printf("#define UNFOLD3_TABLE_SIZE\t%d\n", (unfold3_table_size * 1.7))
+    data = print_table(dest, name, name=>unfold[2])
+    dest.print lookup_hash(name, "CodePointList2", data)
   end
 
   def self.load(*args)
Index: enc/unicode.c
===================================================================
--- enc/unicode.c	(revision 46271)
+++ enc/unicode.c	(revision 46272)
@@ -128,6 +128,15 @@ code2_equal(const OnigCodePoint *x, cons https://github.com/ruby/ruby/blob/trunk/enc/unicode.c#L128
   return 1;
 }
 
+static int
+code3_equal(const OnigCodePoint *x, const OnigCodePoint *y)
+{
+  if (x[0] != y[0]) return 0;
+  if (x[1] != y[1]) return 0;
+  if (x[2] != y[2]) return 0;
+  return 1;
+}
+
 #include "enc/unicode/casefold.h"
 
 #include "enc/unicode/name2ctype.h"
@@ -211,45 +220,12 @@ onigenc_unicode_property_name_to_ctype(O https://github.com/ruby/ruby/blob/trunk/enc/unicode.c#L220
 }
 
 
-static int
-code3_cmp(st_data_t x0, st_data_t y0)
-{
-  const OnigCodePoint *x = (const OnigCodePoint *)x0;
-  const OnigCodePoint *y = (const OnigCodePoint *)y0;
-  if (x[0] == y[0] && x[1] == y[1] && x[2] == y[2]) return 0;
-  return 1;
-}
-
-static st_index_t
-code3_hash(st_data_t x0)
-{
-  const OnigCodePoint *x = (const OnigCodePoint *)x0;
-  return (st_index_t )(x[0] + x[1] + x[2]);
-}
-
-static const struct st_hash_type type_code3_hash = {
-  code3_cmp,
-  code3_hash,
-};
-
-
-static st_table* Unfold3Table;
 static int CaseFoldInited = 0;
 
 static int init_case_fold_table(void)
 {
-  int i;
-
   THREAD_ATOMIC_START;
 
-  Unfold3Table = st_init_table_with_size(&type_code3_hash, UNFOLD3_TABLE_SIZE);
-  if (ONIG_IS_NULL(Unfold3Table)) return ONIGERR_MEMORY;
-
-  for (i = 0; i < numberof(CaseUnfold_13); i++) {
-    const CaseUnfold_13_Type *p3 = &CaseUnfold_13[i];
-    st_add_direct(Unfold3Table, (st_data_t )p3->from, (st_data_t )(&p3->to));
-  }
-
   CaseFoldInited = 1;
   THREAD_ATOMIC_END;
   return 0;
@@ -258,17 +234,7 @@ static int init_case_fold_table(void) https://github.com/ruby/ruby/blob/trunk/enc/unicode.c#L234
 #define onigenc_unicode_fold_lookup onigenc_unicode_CaseFold_11_lookup
 #define onigenc_unicode_unfold1_lookup onigenc_unicode_CaseUnfold_11_lookup
 #define onigenc_unicode_unfold2_lookup onigenc_unicode_CaseUnfold_12_lookup
-
-
-static inline const CodePointList2 *
-onigenc_unicode_unfold3_lookup(const OnigCodePoint *code)
-{
-  st_data_t to;
-  if (onig_st_lookup(Unfold3Table, (st_data_t )code, &to) != 0) {
-    return (const CodePointList2 *)to;
-  }
-  return 0;
-}
+#define onigenc_unicode_unfold3_lookup onigenc_unicode_CaseUnfold_13_lookup
 
 extern int
 onigenc_unicode_mbc_case_fold(OnigEncoding enc,

--
ML: ruby-changes@q...
Info: http://www.atdot.net/~ko1/quickml/

[前][次][番号順一覧][スレッド一覧]