ruby-changes:3791
From: ko1@a...
Date: Sun, 27 Jan 2008 23:27:34 +0900 (JST)
Subject: [ruby-changes:3791] akr - Ruby:r15280 (trunk): * include/ruby/oniguruma.h: precise mbclen API redesigned to avoid
akr 2008-01-27 23:27:07 +0900 (Sun, 27 Jan 2008) New Revision: 15280 Modified files: trunk/ChangeLog trunk/encoding.c trunk/include/ruby/encoding.h trunk/include/ruby/oniguruma.h trunk/io.c trunk/parse.y trunk/re.c trunk/regenc.c trunk/string.c Log: * include/ruby/oniguruma.h: precise mbclen API redesigned to avoid inline functions. (onigenc_mbclen_charfound): removed. (onigenc_mbclen_needmore): removed. (onigenc_mbclen_recover): removed. (ONIGENC_MBCLEN_CHARFOUND): removed. (ONIGENC_MBCLEN_CHARFOUND_P): defined. (ONIGENC_MBCLEN_CHARFOUND_LEN): defined. (ONIGENC_MBCLEN_INVALID): removed. (ONIGENC_MBCLEN_INVALID_P): defined. (ONIGENC_MBCLEN_NEEDMORE): removed. (ONIGENC_MBCLEN_NEEDMORE_P): defined. (ONIGENC_MBCLEN_NEEDMORE_LEN): defined. (ONIGENC_MBC_ENC_LEN): use onigenc_mbclen_approximate. * regenc.c (onigenc_mbclen_approximate): defined. * include/ruby/encoding.h (MBCLEN_CHARFOUND): removed. (MBCLEN_INVALID): removed. (MBCLEN_NEEDMORE): removed. (MBCLEN_CHARFOUND_P): defined. (MBCLEN_INVALID_P): defined. (MBCLEN_NEEDMORE_P): defined. (MBCLEN_CHARFOUND_LEN): defined. (MBCLEN_NEEDMORE_LEN): defined. * encoding.c: use new API. * re.c: ditto. * string.c: ditto. * parse.y: ditto. http://svn.ruby-lang.org/cgi-bin/viewvc.cgi/trunk/include/ruby/oniguruma.h?r1=15280&r2=15279&diff_format=u http://svn.ruby-lang.org/cgi-bin/viewvc.cgi/trunk/string.c?r1=15280&r2=15279&diff_format=u http://svn.ruby-lang.org/cgi-bin/viewvc.cgi/trunk/parse.y?r1=15280&r2=15279&diff_format=u http://svn.ruby-lang.org/cgi-bin/viewvc.cgi/trunk/ChangeLog?r1=15280&r2=15279&diff_format=u http://svn.ruby-lang.org/cgi-bin/viewvc.cgi/trunk/regenc.c?r1=15280&r2=15279&diff_format=u http://svn.ruby-lang.org/cgi-bin/viewvc.cgi/trunk/io.c?r1=15280&r2=15279&diff_format=u http://svn.ruby-lang.org/cgi-bin/viewvc.cgi/trunk/include/ruby/encoding.h?r1=15280&r2=15279&diff_format=u http://svn.ruby-lang.org/cgi-bin/viewvc.cgi/trunk/re.c?r1=15280&r2=15279&diff_format=u http://svn.ruby-lang.org/cgi-bin/viewvc.cgi/trunk/encoding.c?r1=15280&r2=15279&diff_format=u Index: encoding.c =================================================================== --- encoding.c (revision 15279) +++ encoding.c (revision 15280) @@ -749,9 +749,8 @@ rb_enc_mbclen(const char *p, const char *e, rb_encoding *enc) { int n = ONIGENC_PRECISE_MBC_ENC_LEN(enc, (UChar*)p, (UChar*)e); - n = MBCLEN_CHARFOUND(n); - if (0 < n && n <= e-p) - return n; + if (MBCLEN_CHARFOUND_P(n) && MBCLEN_CHARFOUND_LEN(n) <= e-p) + return MBCLEN_CHARFOUND_LEN(n); else return 1; } @@ -782,7 +781,7 @@ return c; } l = rb_enc_precise_mbclen(p, e, enc); - if (!MBCLEN_CHARFOUND(l)) + if (!MBCLEN_CHARFOUND_P(l)) return -1; c = rb_enc_codepoint(p, e, enc); if (!rb_enc_isascii(c, enc)) @@ -798,7 +797,7 @@ if (e <= p) rb_raise(rb_eArgError, "empty string"); r = rb_enc_precise_mbclen(p, e, enc); - if (MBCLEN_CHARFOUND(r)) + if (MBCLEN_CHARFOUND_P(r)) return rb_enc_mbc_to_codepoint(p, e, enc); else rb_raise(rb_eArgError, "invalid mbstring sequence"); Index: include/ruby/encoding.h =================================================================== --- include/ruby/encoding.h (revision 15279) +++ include/ruby/encoding.h (revision 15280) @@ -110,9 +110,11 @@ /* -> chlen, invalid or needmore */ int rb_enc_precise_mbclen(const char *p, const char *e, rb_encoding *enc); -#define MBCLEN_CHARFOUND(ret) ONIGENC_MBCLEN_CHARFOUND(ret) -#define MBCLEN_INVALID(ret) ONIGENC_MBCLEN_INVALID(ret) -#define MBCLEN_NEEDMORE(ret) ONIGENC_MBCLEN_NEEDMORE(ret) +#define MBCLEN_CHARFOUND_P(ret) ONIGENC_MBCLEN_CHARFOUND_P(ret) +#define MBCLEN_CHARFOUND_LEN(ret) ONIGENC_MBCLEN_CHARFOUND_LEN(ret) +#define MBCLEN_INVALID_P(ret) ONIGENC_MBCLEN_INVALID_P(ret) +#define MBCLEN_NEEDMORE_P(ret) ONIGENC_MBCLEN_NEEDMORE_P(ret) +#define MBCLEN_NEEDMORE_LEN(ret) ONIGENC_MBCLEN_NEEDMORE_LEN(ret) /* -> 0x00..0x7f, -1 */ int rb_enc_ascget(const char *p, const char *e, int *len, rb_encoding *enc); Index: include/ruby/oniguruma.h =================================================================== --- include/ruby/oniguruma.h (revision 15279) +++ include/ruby/oniguruma.h (revision 15280) @@ -229,32 +229,23 @@ #define ONIGENC_STEP_BACK(enc,start,s,n) \ onigenc_step_back((enc),(start),(s),(n)) - #define ONIGENC_CONSTRUCT_MBCLEN_CHARFOUND(n) (n) +#define ONIGENC_MBCLEN_CHARFOUND_P(r) (0 < (r)) +#define ONIGENC_MBCLEN_CHARFOUND_LEN(r) (r) + #define ONIGENC_CONSTRUCT_MBCLEN_INVALID() (-1) +#define ONIGENC_MBCLEN_INVALID_P(r) ((r) == -1) + #define ONIGENC_CONSTRUCT_MBCLEN_NEEDMORE(n) (-1-(n)) +#define ONIGENC_MBCLEN_NEEDMORE_P(r) ((r) < -1) +#define ONIGENC_MBCLEN_NEEDMORE_LEN(r) (-1-(r)) -static inline int onigenc_mbclen_charfound(int r) { return 0 < r ? r : 0; } -static inline int onigenc_mbclen_needmore(int r) { return r < -1 ? -1 - r : 0; } -#define ONIGENC_MBCLEN_CHARFOUND(r) onigenc_mbclen_charfound(r) -#define ONIGENC_MBCLEN_INVALID(r) ((r) == -1) -#define ONIGENC_MBCLEN_NEEDMORE(r) onigenc_mbclen_needmore(r) - #define ONIGENC_PRECISE_MBC_ENC_LEN(enc,p,e) (enc)->precise_mbc_enc_len(p,e,enc) -static inline int onigenc_mbclen_recover(const OnigUChar* p,const OnigUChar* e, struct OnigEncodingTypeST* enc) -{ - int ret = ONIGENC_PRECISE_MBC_ENC_LEN(enc,p,e); - int r; - if (ONIGENC_MBCLEN_INVALID(ret)) - return 1; - else if ((r = ONIGENC_MBCLEN_NEEDMORE(ret))) - return e-p+r; - else - return ONIGENC_MBCLEN_CHARFOUND(ret); -} +ONIG_EXTERN +int onigenc_mbclen_approximate P_((const OnigUChar* p,const OnigUChar* e, struct OnigEncodingTypeST* enc)); -#define ONIGENC_MBC_ENC_LEN(enc,p,e) onigenc_mbclen_recover(p,e,enc) +#define ONIGENC_MBC_ENC_LEN(enc,p,e) onigenc_mbclen_approximate(p,e,enc) #define ONIGENC_MBC_MAXLEN(enc) ((enc)->max_enc_len) #define ONIGENC_MBC_MAXLEN_DIST(enc) ONIGENC_MBC_MAXLEN(enc) #define ONIGENC_MBC_MINLEN(enc) ((enc)->min_enc_len) Index: regenc.c =================================================================== --- regenc.c (revision 15279) +++ regenc.c (revision 15280) @@ -50,6 +50,17 @@ return 0; } +extern int +onigenc_mbclen_approximate(const OnigUChar* p,const OnigUChar* e, struct OnigEncodingTypeST* enc) +{ + int ret = ONIGENC_PRECISE_MBC_ENC_LEN(enc,p,e); + if (ONIGENC_MBCLEN_CHARFOUND_P(ret)) + return ONIGENC_MBCLEN_CHARFOUND_LEN(ret); + else if (ONIGENC_MBCLEN_NEEDMORE_P(ret)) + return e-p+ONIGENC_MBCLEN_NEEDMORE_LEN(ret); + return 1; +} + extern UChar* onigenc_get_right_adjust_char_head(OnigEncoding enc, const UChar* start, const UChar* s) { Index: re.c =================================================================== --- re.c (revision 15279) +++ re.c (revision 15280) @@ -1673,7 +1673,7 @@ chbuf[chlen++] = byte; while (chlen < chmaxlen && - MBCLEN_NEEDMORE(rb_enc_precise_mbclen(chbuf, chbuf+chlen, enc))) { + MBCLEN_NEEDMORE_P(rb_enc_precise_mbclen(chbuf, chbuf+chlen, enc))) { byte = read_escaped_byte(&p, end, err); if (byte == -1) { return -1; @@ -1682,7 +1682,7 @@ } l = rb_enc_precise_mbclen(chbuf, chbuf+chlen, enc); - if (MBCLEN_INVALID(l)) { + if (MBCLEN_INVALID_P(l)) { strcpy(err, "invalid multibyte escape"); return -1; } @@ -1812,10 +1812,11 @@ while (p < end) { int chlen = rb_enc_precise_mbclen(p, end, enc); - if (!MBCLEN_CHARFOUND(chlen)) { + if (!MBCLEN_CHARFOUND_P(chlen)) { strcpy(err, "invalid multibyte character"); return -1; } + chlen = MBCLEN_CHARFOUND_LEN(chlen); if (1 < chlen || (*p & 0x80)) { rb_str_buf_cat(buf, p, chlen); p += chlen; Index: ChangeLog =================================================================== --- ChangeLog (revision 15279) +++ ChangeLog (revision 15280) @@ -1,3 +1,39 @@ +Sun Jan 27 23:20:54 2008 Tanaka Akira <akr@f...> + + * include/ruby/oniguruma.h: precise mbclen API redesigned to avoid + inline functions. + (onigenc_mbclen_charfound): removed. + (onigenc_mbclen_needmore): removed. + (onigenc_mbclen_recover): removed. + (ONIGENC_MBCLEN_CHARFOUND): removed. + (ONIGENC_MBCLEN_CHARFOUND_P): defined. + (ONIGENC_MBCLEN_CHARFOUND_LEN): defined. + (ONIGENC_MBCLEN_INVALID): removed. + (ONIGENC_MBCLEN_INVALID_P): defined. + (ONIGENC_MBCLEN_NEEDMORE): removed. + (ONIGENC_MBCLEN_NEEDMORE_P): defined. + (ONIGENC_MBCLEN_NEEDMORE_LEN): defined. + (ONIGENC_MBC_ENC_LEN): use onigenc_mbclen_approximate. + + * regenc.c (onigenc_mbclen_approximate): defined. + + * include/ruby/encoding.h (MBCLEN_CHARFOUND): removed. + (MBCLEN_INVALID): removed. + (MBCLEN_NEEDMORE): removed. + (MBCLEN_CHARFOUND_P): defined. + (MBCLEN_INVALID_P): defined. + (MBCLEN_NEEDMORE_P): defined. + (MBCLEN_CHARFOUND_LEN): defined. + (MBCLEN_NEEDMORE_LEN): defined. + + * encoding.c: use new API. + + * re.c: ditto. + + * string.c: ditto. + + * parse.y: ditto. + Sun Jan 27 22:55:27 2008 Nobuyoshi Nakada <nobu@r...> * parse.y (value_expr_gen): reverted r12880. [ruby-dev:33388] Index: string.c =================================================================== --- string.c (revision 15279) +++ string.c (revision 15280) @@ -170,11 +170,10 @@ } while (p < e) { int ret = rb_enc_precise_mbclen(p, e, enc); - int len = MBCLEN_CHARFOUND(ret); - if (!len) { + if (!MBCLEN_CHARFOUND_P(ret)) { return ENC_CODERANGE_BROKEN; } - p += len; + p += MBCLEN_CHARFOUND_LEN(ret); if (p < e) { p = search_nonascii(p, e); if (!p) { @@ -190,12 +189,11 @@ while (p < e) { int ret = rb_enc_precise_mbclen(p, e, enc); - int len = MBCLEN_CHARFOUND(ret); - if (!len) { + if (!MBCLEN_CHARFOUND_P(ret)) { return ENC_CODERANGE_BROKEN; } - p += len; + p += MBCLEN_CHARFOUND_LEN(ret); } if (e < p) { return ENC_CODERANGE_BROKEN; @@ -2017,7 +2015,8 @@ return NEIGHBOR_WRAPPED; ++((unsigned char*)p)[i]; l = rb_enc_precise_mbclen(p, p+len, enc); - if (MBCLEN_CHARFOUND(l)) { + if (MBCLEN_CHARFOUND_P(l)) { + l = MBCLEN_CHARFOUND_LEN(l); if (l == len) { return NEIGHBOR_FOUND; } @@ -2025,11 +2024,11 @@ memset(p+l, 0xff, len-l); } } - if (MBCLEN_INVALID(l) && i < len-1) { + if (MBCLEN_INVALID_P(l) && i < len-1) { int len2, l2; for (len2 = len-1; 0 < len2; len2--) { l2 = rb_enc_precise_mbclen(p, p+len2, enc); - if (!MBCLEN_INVALID(l2)) + if (!MBCLEN_INVALID_P(l2)) break; } memset(p+len2+1, 0xff, len-(len2+1)); @@ -2048,7 +2047,8 @@ return NEIGHBOR_WRAPPED; --((unsigned char*)p)[i]; l = rb_enc_precise_mbclen(p, p+len, enc); - if (MBCLEN_CHARFOUND(l)) { + if (MBCLEN_CHARFOUND_P(l)) { + l = MBCLEN_CHARFOUND_LEN(l); if (l == len) { return NEIGHBOR_FOUND; } @@ -2056,11 +2056,11 @@ memset(p+l, 0, len-l); } } - if (MBCLEN_INVALID(l) && i < len-1) { + if (MBCLEN_INVALID_P(l) && i < len-1) { int len2, l2; for (len2 = len-1; 0 < len2; len2--) { l2 = rb_enc_precise_mbclen(p, p+len2, enc); - if (!MBCLEN_INVALID(l2)) + if (!MBCLEN_INVALID_P(l2)) break; } memset(p+len2+1, 0, len-(len2+1)); @@ -3300,11 +3300,12 @@ int cc; n = rb_enc_precise_mbclen(p, pend, enc); - if (!MBCLEN_CHARFOUND(n)) { + if (!MBCLEN_CHARFOUND_P(n)) { p++; n = 1; goto escape_codepoint; } + n = MBCLEN_CHARFOUND_LEN(n); c = rb_enc_codepoint(p, pend, enc); n = rb_enc_codelen(c, enc); @@ -3313,7 +3314,7 @@ if (c == '"'|| c == '\\' || (c == '#' && p < pend && - MBCLEN_CHARFOUND(rb_enc_precise_mbclen(p,pend,enc)) && + MBCLEN_CHARFOUND_P(rb_enc_precise_mbclen(p,pend,enc)) && (cc = rb_enc_codepoint(p,pend,enc), (cc == '$' || cc == '@' || cc == '{')))) { prefix_escape(result, c, enc); Index: io.c =================================================================== --- io.c (revision 15279) +++ io.c (revision 15280) @@ -2289,12 +2289,13 @@ return Qnil; } r = rb_enc_precise_mbclen(fptr->rbuf+fptr->rbuf_off, fptr->rbuf+fptr->rbuf_off+fptr->rbuf_len, enc); - if ((n = MBCLEN_CHARFOUND(r)) != 0 && n <= fptr->rbuf_len) { + if (MBCLEN_CHARFOUND_P(r) && + (n = MBCLEN_CHARFOUND_LEN(r)) <= fptr->rbuf_len) { str = rb_str_new(fptr->rbuf+fptr->rbuf_off, n); fptr->rbuf_off += n; fptr->rbuf_len -= n; } - else if (MBCLEN_NEEDMORE(r)) { + else if (MBCLEN_NEEDMORE_P(r)) { str = rb_str_new(fptr->rbuf+fptr->rbuf_off, fptr->rbuf_len); fptr->rbuf_len = 0; getc_needmore: @@ -2303,7 +2304,7 @@ fptr->rbuf_off++; fptr->rbuf_len--; r = rb_enc_precise_mbclen(RSTRING_PTR(str), RSTRING_PTR(str)+RSTRING_LEN(str), enc); - if (MBCLEN_NEEDMORE(r)) { + if (MBCLEN_NEEDMORE_P(r)) { goto getc_needmore; } } Index: parse.y =================================================================== --- parse.y (revision 15279) +++ parse.y (revision 15280) @@ -5299,7 +5299,7 @@ parser_tokadd_mbchar(struct parser_params *parser, int c) { int len = parser_precise_mbclen(); - if (!MBCLEN_CHARFOUND(len)) { + if (!MBCLEN_CHARFOUND_P(len)) { compile_error(PARSER_ARG "invalid multibyte char"); return -1; } -- ML: ruby-changes@q... Info: http://www.atdot.net/~ko1/quickml/