ruby-changes:29735
From: naruse <ko1@a...>
Date: Fri, 5 Jul 2013 09:55:55 +0900 (JST)
Subject: [ruby-changes:29735] naruse:r41786 (trunk): broken utf-8
naruse 2013-07-05 09:54:11 +0900 (Fri, 05 Jul 2013) New Revision: 41786 http://svn.ruby-lang.org/cgi-bin/viewvc.cgi?view=rev&revision=41786 Log: broken utf-8 Modified files: trunk/enc/utf_8.c trunk/include/ruby/oniguruma.h trunk/regenc.c Index: include/ruby/oniguruma.h =================================================================== --- include/ruby/oniguruma.h (revision 41785) +++ include/ruby/oniguruma.h (revision 41786) @@ -246,11 +246,14 @@ ONIG_EXTERN OnigEncodingType OnigEncodin https://github.com/ruby/ruby/blob/trunk/include/ruby/oniguruma.h#L246 #define ONIGENC_MBCLEN_CHARFOUND_P(r) (0 < (r)) #define ONIGENC_MBCLEN_CHARFOUND_LEN(r) (r) -#define ONIGENC_CONSTRUCT_MBCLEN_INVALID() (-1) -#define ONIGENC_MBCLEN_INVALID_P(r) ((r) == -1) +#define ONIGENC_INVALID_NUM 0x10000 +#define ONIGENC_CONSTRUCT_MBCLEN_INVALID() (-1-ONIGENC_INVALID_NUM) +#define ONIGENC_CONSTRUCT_MBCLEN_INVALID2(n) (-(n)-ONIGENC_INVALID_NUM) +#define ONIGENC_MBCLEN_INVALID_P(r) ((r) < -ONIGENC_INVALID_NUM) +#define ONIGENC_MBCLEN_INVALID_LEN(r) (-(r)-ONIGENC_INVALID_NUM) #define ONIGENC_CONSTRUCT_MBCLEN_NEEDMORE(n) (-1-(n)) -#define ONIGENC_MBCLEN_NEEDMORE_P(r) ((r) < -1) +#define ONIGENC_MBCLEN_NEEDMORE_P(r) (-ONIGENC_INVALID_NUM <= (r) && (r) < -1) #define ONIGENC_MBCLEN_NEEDMORE_LEN(r) (-1-(r)) #define ONIGENC_PRECISE_MBC_ENC_LEN(enc,p,e) (enc)->precise_mbc_enc_len(p,e,enc) Index: regenc.c =================================================================== --- regenc.c (revision 41785) +++ regenc.c (revision 41786) @@ -55,10 +55,12 @@ extern int https://github.com/ruby/ruby/blob/trunk/regenc.c#L55 onigenc_mbclen_approximate(const OnigUChar* p,const OnigUChar* e, struct OnigEncodingTypeST* enc) { int ret = ONIGENC_PRECISE_MBC_ENC_LEN(enc,p,e); - if (ONIGENC_MBCLEN_CHARFOUND_P(ret)) - return ONIGENC_MBCLEN_CHARFOUND_LEN(ret); - else if (ONIGENC_MBCLEN_NEEDMORE_P(ret)) + if (ONIGENC_MBCLEN_NEEDMORE_P(ret)) return (int)(e-p)+ONIGENC_MBCLEN_NEEDMORE_LEN(ret); + else if (ONIGENC_MBCLEN_INVALID_P(ret)) + return (int)(e-p)+ONIGENC_MBCLEN_INVALID_LEN(ret); + else if (ONIGENC_MBCLEN_CHARFOUND_P(ret)) + return ONIGENC_MBCLEN_CHARFOUND_LEN(ret); return 1; } Index: enc/utf_8.c =================================================================== --- enc/utf_8.c (revision 41785) +++ enc/utf_8.c (revision 41786) @@ -233,12 +233,12 @@ mbc_enc_len(const UChar* p, const UChar* https://github.com/ruby/ruby/blob/trunk/enc/utf_8.c#L233 if (p == e) return ONIGENC_CONSTRUCT_MBCLEN_NEEDMORE(EncLen_UTF8[firstbyte]-2); s = trans[s][*p++]; if (s < 0) return s == ACCEPT ? ONIGENC_CONSTRUCT_MBCLEN_CHARFOUND(3) : - ONIGENC_CONSTRUCT_MBCLEN_INVALID(); + ONIGENC_CONSTRUCT_MBCLEN_INVALID2(2); if (p == e) return ONIGENC_CONSTRUCT_MBCLEN_NEEDMORE(EncLen_UTF8[firstbyte]-3); s = trans[s][*p++]; return s == ACCEPT ? ONIGENC_CONSTRUCT_MBCLEN_CHARFOUND(4) : - ONIGENC_CONSTRUCT_MBCLEN_INVALID(); + ONIGENC_CONSTRUCT_MBCLEN_INVALID2(3); } static int -- ML: ruby-changes@q... Info: http://www.atdot.net/~ko1/quickml/