ruby-changes:3199
From: ko1@a...
Date: 25 Dec 2007 19:01:22 +0900
Subject: [ruby-changes:3199] nobu - Ruby:r14692 (trunk): * encoding.h (rb_enc_mbc_to_codepoint): wrapper for
nobu 2007-12-25 19:01:06 +0900 (Tue, 25 Dec 2007) New Revision: 14692 Modified files: trunk/ChangeLog trunk/encoding.c trunk/include/ruby/encoding.h trunk/string.c Log: * encoding.h (rb_enc_mbc_to_codepoint): wrapper for ONIGENC_MBC_TO_CODE(). * string.c (rb_str_succ): deal with invalid sequence as binary. http://svn.ruby-lang.org/cgi-bin/viewvc.cgi/trunk/string.c?r1=14692&r2=14691 http://svn.ruby-lang.org/cgi-bin/viewvc.cgi/trunk/ChangeLog?r1=14692&r2=14691 http://svn.ruby-lang.org/cgi-bin/viewvc.cgi/trunk/include/ruby/encoding.h?r1=14692&r2=14691 http://svn.ruby-lang.org/cgi-bin/viewvc.cgi/trunk/encoding.c?r1=14692&r2=14691 Index: encoding.c =================================================================== --- encoding.c (revision 14691) +++ encoding.c (revision 14692) @@ -660,14 +660,15 @@ return c; } -int rb_enc_codepoint(const char *p, const char *e, rb_encoding *enc) +int +rb_enc_codepoint(const char *p, const char *e, rb_encoding *enc) { int r; if (e <= p) rb_raise(rb_eArgError, "empty string"); r = rb_enc_precise_mbclen(p, e, enc); if (MBCLEN_CHARFOUND(r)) - return ONIGENC_MBC_TO_CODE(enc,(UChar*)p,(UChar*)e); + return rb_enc_mbc_to_codepoint(p, e, enc); else rb_raise(rb_eArgError, "invalid mbstring sequence"); } Index: include/ruby/encoding.h =================================================================== --- include/ruby/encoding.h (revision 14691) +++ include/ruby/encoding.h (revision 14692) @@ -85,18 +85,19 @@ /* -> code or raise exception */ int rb_enc_codepoint(const char *p, const char *e, rb_encoding *enc); +#define rb_enc_mbc_to_codepoint(p, e, enc) ONIGENC_MBC_TO_CODE(enc,(UChar*)(p),(UChar*)(e)) /* -> codelen or raise exception */ int rb_enc_codelen(int code, rb_encoding *enc); /* code,ptr,encoding -> write buf */ -#define rb_enc_mbcput(c,buf,enc) ONIGENC_CODE_TO_MBC(enc,c,(UChar*)buf) +#define rb_enc_mbcput(c,buf,enc) ONIGENC_CODE_TO_MBC(enc,c,(UChar*)(buf)) /* ptr, ptr, encoding -> prev_char */ -#define rb_enc_prev_char(s,p,enc) (char *)onigenc_get_prev_char_head(enc,(UChar*)s,(UChar*)p) +#define rb_enc_prev_char(s,p,enc) (char *)onigenc_get_prev_char_head(enc,(UChar*)(s),(UChar*)(p)) /* ptr, ptr, encoding -> next_char */ -#define rb_enc_left_char_head(s,p,enc) (char *)onigenc_get_left_adjust_char_head(enc,(UChar*)s,(UChar*)p) -#define rb_enc_right_char_head(s,p,enc) (char *)onigenc_get_right_adjust_char_head(enc,(UChar*)s,(UChar*)p) +#define rb_enc_left_char_head(s,p,enc) (char *)onigenc_get_left_adjust_char_head(enc,(UChar*)(s),(UChar*)(p)) +#define rb_enc_right_char_head(s,p,enc) (char *)onigenc_get_right_adjust_char_head(enc,(UChar*)(s),(UChar*)(p)) #define rb_enc_isctype(c,t,enc) ONIGENC_IS_CODE_CTYPE(enc,c,t) #define rb_enc_isascii(c,enc) ONIGENC_IS_CODE_ASCII(c) @@ -110,9 +111,11 @@ #define rb_enc_asciicompat(enc) (rb_enc_mbminlen(enc)==1) +int rb_enc_casefold(char *to, const char *p, const char *e, rb_encoding *enc); int rb_enc_toupper(int c, rb_encoding *enc); int rb_enc_tolower(int c, rb_encoding *enc); ID rb_intern3(const char*, long, rb_encoding*); +ID rb_interned_id_p(const char *, long, rb_encoding *); int rb_enc_symname_p(const char*, rb_encoding*); int rb_enc_str_coderange(VALUE); int rb_enc_str_asciionly_p(VALUE); Index: ChangeLog =================================================================== --- ChangeLog (revision 14691) +++ ChangeLog (revision 14692) @@ -1,3 +1,10 @@ +Tue Dec 25 19:01:04 2007 Nobuyoshi Nakada <nobu@r...> + + * encoding.h (rb_enc_mbc_to_codepoint): wrapper for + ONIGENC_MBC_TO_CODE(). + + * string.c (rb_str_succ): deal with invalid sequence as binary. + Tue Dec 25 18:40:46 2007 Koichi Sasada <ko1@a...> * iseq.c: all methods need $SAFE < 1. Index: string.c =================================================================== --- string.c (revision 14691) +++ string.c (revision 14692) @@ -1876,7 +1876,8 @@ s = e = sbeg + RSTRING_LEN(str); while ((s = rb_enc_prev_char(sbeg, s, enc)) != 0) { - cc = rb_enc_codepoint(s, e, enc); + if ((l = rb_enc_precise_mbclen(s, e, enc)) <= 0) continue; + cc = rb_enc_mbc_to_codepoint(s, e, enc); if (rb_enc_isalnum(cc, enc)) { if (rb_enc_isascii(cc, enc)) { if ((c = succ_char(s)) == 0) break; @@ -1892,7 +1893,8 @@ s = e; while ((s = rb_enc_prev_char(sbeg, s, enc)) != 0) { int limit = 256; - cc = rb_enc_codepoint(s, e, enc); + if ((l = rb_enc_precise_mbclen(s, e, enc)) <= 0) continue; + cc = rb_enc_mbc_to_codepoint(s, e, enc); while ((l = rb_enc_mbcput(++cc, carry, enc)) < 0 && --limit); if (l > 0) { if (l == (o = e - s)) goto overlay; -- ML: ruby-changes@q... Info: http://www.atdot.net/~ko1/quickml