[前][次][番号順一覧][スレッド一覧]

ruby-changes:3832

From: ko1@a...
Date: Tue, 29 Jan 2008 20:44:27 +0900 (JST)
Subject: [ruby-changes:3832] naruse - Ruby:r15321 (trunk): * enc/trans/make_transdb.rb: add for make transdb.h.

naruse	2008-01-29 20:44:08 +0900 (Tue, 29 Jan 2008)

  New Revision: 15321

  Modified files:
    trunk/ChangeLog
    trunk/enc/gbk.c

  Log:
    * enc/trans/make_transdb.rb: add for make transdb.h.
    
    * dmytranscode.c: add for miniruby.
    
    * enc/gbk.c (gbk_left_adjust_char_head, gbk_is_allowed_reverse_match):
      fix odd regexp match. [ruby-dev:33502]


  http://svn.ruby-lang.org/cgi-bin/viewvc.cgi/trunk/enc/gbk.c?r1=15321&r2=15320&diff_format=u
  http://svn.ruby-lang.org/cgi-bin/viewvc.cgi/trunk/ChangeLog?r1=15321&r2=15320&diff_format=u

Index: ChangeLog
===================================================================
--- ChangeLog	(revision 15320)
+++ ChangeLog	(revision 15321)
@@ -1,3 +1,12 @@
+Tue Jan 29 20:37:36 2008  NARUSE, Yui  <naruse@r...>
+
+	* enc/trans/make_transdb.rb: add for make transdb.h.
+
+	* dmytranscode.c: add for miniruby.
+
+	* enc/gbk.c (gbk_left_adjust_char_head, gbk_is_allowed_reverse_match):
+	  fix odd regexp match. [ruby-dev:33502]
+
 Tue Jan 29 20:17:06 2008  NAKAMURA Usaku  <usa@r...>
 
 	* {bcc32,win32}/Makefile.sub (MINIOBJS): add dmytranscode.$(OBJEXT).
Index: enc/gbk.c
===================================================================
--- enc/gbk.c	(revision 15320)
+++ enc/gbk.c	(revision 15321)
@@ -29,7 +29,7 @@
 
 #include "regenc.h"
 
-static const int EncLen_gbk[] = {
+static const int EncLen_GBK[] = {
   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
@@ -48,6 +48,28 @@
   2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1
 };
 
+static const char GBK_CAN_BE_TRAIL_TABLE[256] = {
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0,
+  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0
+};
+
+#define GBK_ISMB_FIRST(byte)  (EncLen_GBK[byte] > 1)
+#define GBK_ISMB_TRAIL(byte)  GBK_CAN_BE_TRAIL_TABLE[(byte)]
+
 typedef enum { FAILURE = -2, ACCEPT = -1, S0 = 0, S1 } state_t;
 #define A ACCEPT
 #define F FAILURE
@@ -101,7 +123,7 @@
     return s == ACCEPT ? ONIGENC_CONSTRUCT_MBCLEN_CHARFOUND(n) : \
                          ONIGENC_CONSTRUCT_MBCLEN_INVALID()
   if (s < 0) RETURN(1);
-  if (p == e) return ONIGENC_CONSTRUCT_MBCLEN_NEEDMORE(EncLen_gbk[firstbyte]-1);
+  if (p == e) return ONIGENC_CONSTRUCT_MBCLEN_NEEDMORE(EncLen_GBK[firstbyte]-1);
   s = trans[s][*p++];
   RETURN(2);
 #undef RETURN
@@ -142,21 +164,23 @@
   return onigenc_mb2_is_code_ctype(enc, code, ctype);
 }
 
-#define gbk_islead(c)    ((c) < 0xa1 || (c) == 0xff)
-
 static UChar*
 gbk_left_adjust_char_head(const UChar* start, const UChar* s, OnigEncoding enc)
 {
-  /* Assumed in this encoding,
-     mb-trail bytes don't mix with single bytes.
-  */
   const UChar *p;
   int len;
 
   if (s <= start) return (UChar* )s;
   p = s;
 
-  while (!gbk_islead(*p) && p > start) p--;
+  if (GBK_ISMB_TRAIL(*p)) {
+    while (p > start) {
+      if (! GBK_ISMB_FIRST(*--p)) {
+	p++;
+	break;
+      }
+    } 
+  }
   len = enclen(enc, p, s);
   if (p + len > s) return (UChar* )p;
   p += len;
@@ -167,13 +191,12 @@
 gbk_is_allowed_reverse_match(const UChar* s, const UChar* end ARG_UNUSED, OnigEncoding enc ARG_UNUSED)
 {
   const UChar c = *s;
-  if (c <= 0x7e) return TRUE;
-  else           return FALSE;
+  return (GBK_ISMB_TRAIL(c) ? FALSE : TRUE);
 }
 
-OnigEncodingDefine(gbk, gbk) = {
+OnigEncodingDefine(gbk, GBK) = {
   gbk_mbc_enc_len,
-  "GBK",   /* name */
+  "GBK",      /* name */
   2,          /* max enc length */
   1,          /* min enc length */
   onigenc_is_mbc_newline_0x0a,

--
ML: ruby-changes@q...
Info: http://www.atdot.net/~ko1/quickml/

[前][次][番号順一覧][スレッド一覧]