ruby-changes:3832
From: ko1@a...
Date: Tue, 29 Jan 2008 20:44:27 +0900 (JST)
Subject: [ruby-changes:3832] naruse - Ruby:r15321 (trunk): * enc/trans/make_transdb.rb: add for make transdb.h.
naruse 2008-01-29 20:44:08 +0900 (Tue, 29 Jan 2008)
New Revision: 15321
Modified files:
trunk/ChangeLog
trunk/enc/gbk.c
Log:
* enc/trans/make_transdb.rb: add for make transdb.h.
* dmytranscode.c: add for miniruby.
* enc/gbk.c (gbk_left_adjust_char_head, gbk_is_allowed_reverse_match):
fix odd regexp match. [ruby-dev:33502]
http://svn.ruby-lang.org/cgi-bin/viewvc.cgi/trunk/enc/gbk.c?r1=15321&r2=15320&diff_format=u
http://svn.ruby-lang.org/cgi-bin/viewvc.cgi/trunk/ChangeLog?r1=15321&r2=15320&diff_format=u
Index: ChangeLog
===================================================================
--- ChangeLog (revision 15320)
+++ ChangeLog (revision 15321)
@@ -1,3 +1,12 @@
+Tue Jan 29 20:37:36 2008 NARUSE, Yui <naruse@r...>
+
+ * enc/trans/make_transdb.rb: add for make transdb.h.
+
+ * dmytranscode.c: add for miniruby.
+
+ * enc/gbk.c (gbk_left_adjust_char_head, gbk_is_allowed_reverse_match):
+ fix odd regexp match. [ruby-dev:33502]
+
Tue Jan 29 20:17:06 2008 NAKAMURA Usaku <usa@r...>
* {bcc32,win32}/Makefile.sub (MINIOBJS): add dmytranscode.$(OBJEXT).
Index: enc/gbk.c
===================================================================
--- enc/gbk.c (revision 15320)
+++ enc/gbk.c (revision 15321)
@@ -29,7 +29,7 @@
#include "regenc.h"
-static const int EncLen_gbk[] = {
+static const int EncLen_GBK[] = {
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
@@ -48,6 +48,28 @@
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1
};
+static const char GBK_CAN_BE_TRAIL_TABLE[256] = {
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0
+};
+
+#define GBK_ISMB_FIRST(byte) (EncLen_GBK[byte] > 1)
+#define GBK_ISMB_TRAIL(byte) GBK_CAN_BE_TRAIL_TABLE[(byte)]
+
typedef enum { FAILURE = -2, ACCEPT = -1, S0 = 0, S1 } state_t;
#define A ACCEPT
#define F FAILURE
@@ -101,7 +123,7 @@
return s == ACCEPT ? ONIGENC_CONSTRUCT_MBCLEN_CHARFOUND(n) : \
ONIGENC_CONSTRUCT_MBCLEN_INVALID()
if (s < 0) RETURN(1);
- if (p == e) return ONIGENC_CONSTRUCT_MBCLEN_NEEDMORE(EncLen_gbk[firstbyte]-1);
+ if (p == e) return ONIGENC_CONSTRUCT_MBCLEN_NEEDMORE(EncLen_GBK[firstbyte]-1);
s = trans[s][*p++];
RETURN(2);
#undef RETURN
@@ -142,21 +164,23 @@
return onigenc_mb2_is_code_ctype(enc, code, ctype);
}
-#define gbk_islead(c) ((c) < 0xa1 || (c) == 0xff)
-
static UChar*
gbk_left_adjust_char_head(const UChar* start, const UChar* s, OnigEncoding enc)
{
- /* Assumed in this encoding,
- mb-trail bytes don't mix with single bytes.
- */
const UChar *p;
int len;
if (s <= start) return (UChar* )s;
p = s;
- while (!gbk_islead(*p) && p > start) p--;
+ if (GBK_ISMB_TRAIL(*p)) {
+ while (p > start) {
+ if (! GBK_ISMB_FIRST(*--p)) {
+ p++;
+ break;
+ }
+ }
+ }
len = enclen(enc, p, s);
if (p + len > s) return (UChar* )p;
p += len;
@@ -167,13 +191,12 @@
gbk_is_allowed_reverse_match(const UChar* s, const UChar* end ARG_UNUSED, OnigEncoding enc ARG_UNUSED)
{
const UChar c = *s;
- if (c <= 0x7e) return TRUE;
- else return FALSE;
+ return (GBK_ISMB_TRAIL(c) ? FALSE : TRUE);
}
-OnigEncodingDefine(gbk, gbk) = {
+OnigEncodingDefine(gbk, GBK) = {
gbk_mbc_enc_len,
- "GBK", /* name */
+ "GBK", /* name */
2, /* max enc length */
1, /* min enc length */
onigenc_is_mbc_newline_0x0a,
--
ML: ruby-changes@q...
Info: http://www.atdot.net/~ko1/quickml/