[前][次][番号順一覧][スレッド一覧]

ruby-changes:12669

From: naruse <ko1@a...>
Date: Tue, 4 Aug 2009 09:52:18 +0900 (JST)
Subject: [ruby-changes:12669] Ruby:r24384 (trunk): More strict for Big5 series.

naruse	2009-08-04 09:51:22 +0900 (Tue, 04 Aug 2009)

  New Revision: 24384

  http://svn.ruby-lang.org/cgi-bin/viewvc.cgi?view=rev&revision=24384

  Log:
    More strict for Big5 series.
    * enc/big5.c (EncLen_Big5): back to original Big5 table.
      (EncLen_Big5_HKSCS): for Big5-HKSCS.
      (trans): add the lead byte table for Big5-HKSCS.
      (big5_mbc_enc_len): abstract function for Big5 series.
      (big5_mbc_enc_len): for Big5.
      (big5_hkscs_mbc_enc_len): for Big5-HKSCS.
      (BIG5_HKSCS_P): added.
      (BIG5_ISMB_FIRST): add routine for Big5-HKSCS.
      (big5_hkscs): add for Big5-HKSCS.

  Modified files:
    trunk/ChangeLog
    trunk/enc/big5.c

Index: ChangeLog
===================================================================
--- ChangeLog	(revision 24383)
+++ ChangeLog	(revision 24384)
@@ -1,3 +1,15 @@
+Tue Aug  4 09:41:11 2009  NARUSE, Yui  <naruse@r...>
+
+	* enc/big5.c (EncLen_Big5): back to original Big5 table.
+	  (EncLen_Big5_HKSCS): for Big5-HKSCS.
+	  (trans): add the lead byte table for Big5-HKSCS.
+	  (big5_mbc_enc_len): abstract function for Big5 series.
+	  (big5_mbc_enc_len): for Big5.
+	  (big5_hkscs_mbc_enc_len): for Big5-HKSCS.
+	  (BIG5_HKSCS_P): added.
+	  (BIG5_ISMB_FIRST): add routine for Big5-HKSCS.
+	  (big5_hkscs): add for Big5-HKSCS.
+
 Tue Aug  4 09:33:54 2009  NARUSE, Yui  <naruse@r...>
 
 	* encoding.c (rb_enc_set_base): Add for setting base encoding
Index: enc/big5.c
===================================================================
--- enc/big5.c	(revision 24383)
+++ enc/big5.c	(revision 24384)
@@ -30,6 +30,24 @@
 #include "regenc.h"
 
 static const int EncLen_BIG5[] = {
+  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+  1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+  2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+  2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+  2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+  2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+  2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1
+};
+static const int EncLen_BIG5_HKSCS[] = {
  /* LEN  0  1  2  3  4  5  6  7  8  9  A  B  C  D  E  F */
  /* 0 */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
  /* 1 */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
@@ -88,26 +106,56 @@
     /* d */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,
     /* e */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,
     /* f */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, F 
+  },
+  { /* S2   0  1  2  3  4  5  6  7  8  9  a  b  c  d  e  f */
+    /* 0 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,
+    /* 1 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,
+    /* 2 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,
+    /* 3 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,
+    /* 4 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,
+    /* 5 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,
+    /* 6 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,
+    /* 7 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,
+    /* 8 */ F, F, F, F, F, F, F, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+    /* 9 */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+    /* a */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+    /* b */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+    /* c */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+    /* d */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+    /* e */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+    /* f */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, F
   }
 };
 #undef A
 #undef F
 
 static int
-big5_mbc_enc_len(const UChar* p, const UChar* e, OnigEncoding enc ARG_UNUSED)
+big5_mbc_enc_len0(const UChar* p, const UChar* e, int tridx, const int tbl[])
 {
   int firstbyte = *p++;
-  state_t s = trans[0][firstbyte];
+  state_t s = trans[tridx][firstbyte];
 #define RETURN(n) \
     return s == ACCEPT ? ONIGENC_CONSTRUCT_MBCLEN_CHARFOUND(n) : \
                          ONIGENC_CONSTRUCT_MBCLEN_INVALID()
   if (s < 0) RETURN(1);
-  if (p == e) return ONIGENC_CONSTRUCT_MBCLEN_NEEDMORE(EncLen_BIG5[firstbyte]-1);
+  if (p == e) return ONIGENC_CONSTRUCT_MBCLEN_NEEDMORE(tbl[firstbyte]-1);
   s = trans[s][*p++];
   RETURN(2);
 #undef RETURN
 }
 
+static int
+big5_mbc_enc_len(const UChar* p, const UChar* e, OnigEncoding enc ARG_UNUSED)
+{
+    return big5_mbc_enc_len0(p, e, 0, EncLen_BIG5);
+}
+
+static int
+big5_hkscs_mbc_enc_len(const UChar* p, const UChar* e, OnigEncoding enc ARG_UNUSED)
+{
+    return big5_mbc_enc_len0(p, e, 2, EncLen_BIG5_HKSCS);
+}
+
 static OnigCodePoint
 big5_mbc_to_code(const UChar* p, const UChar* end, OnigEncoding enc)
 {
@@ -162,7 +210,11 @@
   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0
 };
 
-#define BIG5_ISMB_FIRST(byte)  (EncLen_BIG5[byte] > 1)
+#define BIG5_HKSCS_P(enc) ((enc)->precise_mbc_enc_len == big5_hkscs_mbc_enc_len)
+#define BIG5_ISMB_FIRST(byte)  ( \
+	BIG5_HKSCS_P(enc) ? EncLen_BIG5_HKSCS[byte] > 1 : \
+	EncLen_BIG5[byte] > 1 \
+	)
 #define BIG5_ISMB_TRAIL(byte)  BIG5_CAN_BE_TRAIL_TABLE[(byte)]
 
 static UChar*
@@ -229,5 +281,22 @@
  * Source:   See (http://www.iana.org/assignments/charset-reg/Big5-HKSCS)
  * Alias: None
  */
-ENC_REPLICATE("Big5-HKSCS", "Big5")
+OnigEncodingDefine(big5_hkscs, BIG5_HKSCS) = {
+  big5_hkscs_mbc_enc_len,
+  "Big5-HKSCS",     /* name */
+  2,          /* max enc length */
+  1,          /* min enc length */
+  onigenc_is_mbc_newline_0x0a,
+  big5_mbc_to_code,
+  onigenc_mb2_code_to_mbclen,
+  big5_code_to_mbc,
+  big5_mbc_case_fold,
+  onigenc_ascii_apply_all_case_fold,
+  onigenc_ascii_get_case_fold_codes_by_str,
+  onigenc_minimum_property_name_to_ctype,
+  big5_is_code_ctype,
+  onigenc_not_support_get_ctype_code_range,
+  big5_left_adjust_char_head,
+  big5_is_allowed_reverse_match
+};
 ENC_ALIAS("CP951", "Big5-HKSCS")

--
ML: ruby-changes@q...
Info: http://www.atdot.net/~ko1/quickml/

[前][次][番号順一覧][スレッド一覧]