ruby-changes:11838
From: matz <ko1@a...>
Date: Wed, 20 May 2009 02:00:08 +0900 (JST)
Subject: [ruby-changes:11838] Ruby:r23493 (trunk): * encoding.c (rb_enc_codepoint_len): combine rb_enc_codepoint()
matz 2009-05-20 01:59:22 +0900 (Wed, 20 May 2009) New Revision: 23493 http://svn.ruby-lang.org/cgi-bin/viewvc.cgi?view=rev&revision=23493 Log: * encoding.c (rb_enc_codepoint_len): combine rb_enc_codepoint() and rb_enc_codelen() in one function to reduce calls. * encoding.c (rb_enc_codepoint): compatibility function. * sprintf.c (rb_str_format): use rb_enc_codepoint_len(). * string.c (rb_str_inspect, rb_str_upcase_bang, rb_str_downcase_bang, rb_str_capitalize_bang, rb_str_swapcase_bang, trnext, tr_trans, rb_str_delete_bang, rb_str_squeeze_bang, rb_str_count, rb_str_split_m, rb_str_each_line, rb_str_each_codepoint, rb_str_lstrip_bang, sym_printable): ditto. * transcode.c (make_econv_exception): use rb_enc_mbc_to_codepoint() Modified files: trunk/ChangeLog trunk/encoding.c trunk/include/ruby/encoding.h trunk/sprintf.c trunk/string.c trunk/transcode.c Index: encoding.c =================================================================== --- encoding.c (revision 23492) +++ encoding.c (revision 23493) @@ -774,18 +774,27 @@ } unsigned int -rb_enc_codepoint(const char *p, const char *e, rb_encoding *enc) +rb_enc_codepoint_len(const char *p, const char *e, int *len_p, rb_encoding *enc) { int r; if (e <= p) rb_raise(rb_eArgError, "empty string"); r = rb_enc_precise_mbclen(p, e, enc); - if (MBCLEN_CHARFOUND_P(r)) + if (MBCLEN_CHARFOUND_P(r)) { + if (len_p) *len_p = MBCLEN_CHARFOUND_LEN(r); return rb_enc_mbc_to_codepoint(p, e, enc); + } else rb_raise(rb_eArgError, "invalid byte sequence in %s", rb_enc_name(enc)); } +#undef rb_enc_codepoint +unsigned int +rb_enc_codepoint(const char *p, const char *e, rb_encoding *enc) +{ + return rb_enc_codepoint_len(p, e, 0, enc); +} + int rb_enc_codelen(int c, rb_encoding *enc) { Index: include/ruby/encoding.h =================================================================== --- include/ruby/encoding.h (revision 23492) +++ include/ruby/encoding.h (revision 23493) @@ -123,8 +123,14 @@ /* -> 0x00..0x7f, -1 */ int rb_enc_ascget(const char *p, const char *e, int *len, rb_encoding *enc); -/* -> code or raise exception */ + +/* -> code (and len) or raise exception */ +unsigned int rb_enc_codepoint_len(const char *p, const char *e, int *len, rb_encoding *enc); + +/* prototype for obsolete function */ unsigned int rb_enc_codepoint(const char *p, const char *e, rb_encoding *enc); +/* overriding macro */ +#define rb_enc_codepoint(p,e,enc) rb_enc_codepoint_len((p),(e),0,(enc)) #define rb_enc_mbc_to_codepoint(p, e, enc) ONIGENC_MBC_TO_CODE(enc,(UChar*)(p),(UChar*)(e)) /* -> codelen>0 or raise exception */ Index: ChangeLog =================================================================== --- ChangeLog (revision 23492) +++ ChangeLog (revision 23493) @@ -1,3 +1,21 @@ +Wed May 20 00:13:38 2009 Yukihiro Matsumoto <matz@r...> + + * encoding.c (rb_enc_codepoint_len): combine rb_enc_codepoint() + and rb_enc_codelen() in one function to reduce calls. + + * encoding.c (rb_enc_codepoint): compatibility function. + + * sprintf.c (rb_str_format): use rb_enc_codepoint_len(). + + * string.c (rb_str_inspect, rb_str_upcase_bang, + rb_str_downcase_bang, rb_str_capitalize_bang, + rb_str_swapcase_bang, trnext, tr_trans, rb_str_delete_bang, + rb_str_squeeze_bang, rb_str_count, rb_str_split_m, + rb_str_each_line, rb_str_each_codepoint, rb_str_lstrip_bang, + sym_printable): ditto. + + * transcode.c (make_econv_exception): use rb_enc_mbc_to_codepoint() + Wed May 20 00:05:52 2009 Yukihiro Matsumoto <matz@r...> * vm_method.c (rb_attr): should preserve encoding info. Index: string.c =================================================================== --- string.c (revision 23492) +++ string.c (revision 23493) @@ -4168,9 +4168,7 @@ } n = MBCLEN_CHARFOUND_LEN(n); - c = rb_enc_codepoint(p, pend, enc); - n = rb_enc_codelen(c, enc); - + c = rb_enc_codepoint_len(p, pend, &n, enc); p += n; if (c == '"'|| c == '\\' || (c == '#' && @@ -4273,7 +4271,7 @@ char buf[32]; int n = rb_enc_precise_mbclen(p-1, pend, enc); if (MBCLEN_CHARFOUND_P(n)) { - int cc = rb_enc_codepoint(p-1, pend, enc); + int cc = rb_enc_mbc_to_codepoint(p-1, pend, enc); sprintf(buf, "%x", cc); len += strlen(buf)+4; p += MBCLEN_CHARFOUND_LEN(n)-1; @@ -4346,7 +4344,7 @@ if (u8) { int n = rb_enc_precise_mbclen(p-1, pend, enc) - 1; if (MBCLEN_CHARFOUND_P(n)) { - int cc = rb_enc_codepoint(p-1, pend, enc); + int cc = rb_enc_mbc_to_codepoint(p-1, pend, enc); p += n; snprintf(q, qend-q, "u{%x}", cc); q += strlen(q); @@ -4395,6 +4393,7 @@ rb_encoding *enc; char *s, *send; int modify = 0; + int n; str_modify_keep_cr(str); enc = STR_ENC_GET(str); @@ -4425,13 +4424,13 @@ s++; } else { - c = rb_enc_codepoint(s, send, enc); + c = rb_enc_codepoint_len(s, send, &n, enc); if (rb_enc_islower(c, enc)) { /* assuming toupper returns codepoint with same size */ rb_enc_mbcput(rb_enc_toupper(c, enc), s, enc); modify = 1; } - s += rb_enc_codelen(c, enc); + s += n; } } } @@ -4498,6 +4497,7 @@ while (s < send) { unsigned int c; + int n; if (ascompat && (c = *(unsigned char*)s) < 0x80) { if (rb_enc_isascii(c, enc) && 'A' <= c && c <= 'Z') { @@ -4507,13 +4507,13 @@ s++; } else { - c = rb_enc_codepoint(s, send, enc); + c = rb_enc_codepoint_len(s, send, &n, enc); if (rb_enc_isupper(c, enc)) { /* assuming toupper returns codepoint with same size */ rb_enc_mbcput(rb_enc_tolower(c, enc), s, enc); modify = 1; } - s += rb_enc_codelen(c, enc); + s += n; } } } @@ -4565,6 +4565,7 @@ char *s, *send; int modify = 0; unsigned int c; + int n; str_modify_keep_cr(str); enc = STR_ENC_GET(str); @@ -4572,19 +4573,19 @@ if (RSTRING_LEN(str) == 0 || !RSTRING_PTR(str)) return Qnil; s = RSTRING_PTR(str); send = RSTRING_END(str); - c = rb_enc_codepoint(s, send, enc); + c = rb_enc_codepoint_len(s, send, &n, enc); if (rb_enc_islower(c, enc)) { rb_enc_mbcput(rb_enc_toupper(c, enc), s, enc); modify = 1; } - s += rb_enc_codelen(c, enc); + s += n; while (s < send) { - c = rb_enc_codepoint(s, send, enc); + c = rb_enc_codepoint_len(s, send, &n, enc); if (rb_enc_isupper(c, enc)) { rb_enc_mbcput(rb_enc_tolower(c, enc), s, enc); modify = 1; } - s += rb_enc_codelen(c, enc); + s += n; } if (modify) return str; @@ -4629,13 +4630,14 @@ rb_encoding *enc; char *s, *send; int modify = 0; + int n; str_modify_keep_cr(str); enc = STR_ENC_GET(str); rb_str_check_dummy_enc(enc); s = RSTRING_PTR(str); send = RSTRING_END(str); while (s < send) { - unsigned int c = rb_enc_codepoint(s, send, enc); + unsigned int c = rb_enc_codepoint_len(s, send, &n, enc); if (rb_enc_isupper(c, enc)) { /* assuming toupper returns codepoint with same size */ @@ -4647,7 +4649,7 @@ rb_enc_mbcput(rb_enc_toupper(c, enc), s, enc); modify = 1; } - s += rb_enc_mbclen(s, send, enc); + s += n; } if (modify) return str; @@ -4686,19 +4688,21 @@ static unsigned int trnext(struct tr *t, rb_encoding *enc) { + int n; + for (;;) { if (!t->gen) { if (t->p == t->pend) return -1; if (t->p < t->pend - 1 && *t->p == '\\') { t->p++; } - t->now = rb_enc_codepoint(t->p, t->pend, enc); - t->p += rb_enc_codelen(t->now, enc); + t->now = rb_enc_codepoint_len(t->p, t->pend, &n, enc); + t->p += n; if (t->p < t->pend - 1 && *t->p == '-') { t->p++; if (t->p < t->pend) { - unsigned int c = rb_enc_codepoint(t->p, t->pend, enc); - t->p += rb_enc_codelen(c, enc); + unsigned int c = rb_enc_codepoint_len(t->p, t->pend, &n, enc); + t->p += n; if (t->now > c) continue; t->gen = 1; t->max = c; @@ -4819,8 +4823,8 @@ while (s < send) { int may_modify = 0; - c0 = c = rb_enc_codepoint(s, send, e1); - clen = rb_enc_codelen(c, e1); + + c0 = c = rb_enc_codepoint_len(s, send, &clen, e1); tlen = enc == e1 ? clen : rb_enc_codelen(c, enc); s += clen; @@ -4897,8 +4901,7 @@ while (s < send) { int may_modify = 0; - c0 = c = rb_enc_codepoint(s, send, e1); - clen = rb_enc_codelen(c, e1); + c0 = c = rb_enc_codepoint_len(s, send, &clen, e1); tlen = enc == e1 ? clen : rb_enc_codelen(c, enc); if (c < 256) { @@ -5125,8 +5128,7 @@ s++; } else { - c = rb_enc_codepoint(s, send, enc); - clen = rb_enc_codelen(c, enc); + c = rb_enc_codepoint_len(s, send, &clen, enc); if (tr_find(c, squeez, del, nodel)) { modify = 1; @@ -5231,8 +5233,7 @@ s++; } else { - c = rb_enc_codepoint(s, send, enc); - clen = rb_enc_codelen(c, enc); + c = rb_enc_codepoint_len(s, send, &clen, enc); if (c != save || (argc > 0 && !tr_find(c, squeez, del, nodel))) { if (t != s) rb_enc_mbcput(c, t, enc); @@ -5371,8 +5372,7 @@ s++; } else { - c = rb_enc_codepoint(s, send, enc); - clen = rb_enc_codelen(c, enc); + c = rb_enc_codepoint_len(s, send, &clen, enc); if (tr_find(c, table, del, nodel)) { i++; } @@ -5542,8 +5542,10 @@ } else { while (ptr < eptr) { - c = rb_enc_codepoint(ptr, eptr, enc); - ptr += rb_enc_mbclen(ptr, eptr, enc); + int n; + + c = rb_enc_codepoint_len(ptr, eptr, &n, enc); + ptr += n; if (skip) { if (rb_enc_isspace(c, enc)) { beg = ptr - bptr; @@ -5773,13 +5775,12 @@ } while (p < pend) { - unsigned int c = rb_enc_codepoint(p, pend, enc); + unsigned int c = rb_enc_codepoint_len(p, pend, &n, enc); again: - n = rb_enc_codelen(c, enc); if (rslen == 0 && c == newline) { p += n; - if (p < pend && (c = rb_enc_codepoint(p, pend, enc)) != newline) { + if (p < pend && (c = rb_enc_codepoint_len(p, pend, &n, enc)) != newline) { goto again; } while (p < pend && rb_enc_codepoint(p, pend, enc) == newline) { @@ -5940,8 +5941,7 @@ end = RSTRING_END(str); enc = STR_ENC_GET(str); while (ptr < end) { - c = rb_enc_codepoint(ptr, end, enc); - n = rb_enc_codelen(c, enc); + c = rb_enc_codepoint_len(ptr, end, &n, enc); rb_yield(UINT2NUM(c)); ptr += n; } @@ -6180,10 +6180,11 @@ e = t = RSTRING_END(str); /* remove spaces at head */ while (s < e) { - unsigned int cc = rb_enc_codepoint(s, e, enc); + int n; + unsigned int cc = rb_enc_codepoint_len(s, e, &n, enc); if (!rb_enc_isspace(cc, enc)) break; - s += rb_enc_codelen(cc, enc); + s += n; } if (s > RSTRING_PTR(str)) { @@ -7057,8 +7058,9 @@ sym_printable(const char *s, const char *send, rb_encoding *enc) { while (s < send) { - int c = rb_enc_codepoint(s, send, enc); - int n = rb_enc_codelen(c, enc); + int n; + int c = rb_enc_codepoint_len(s, send, &n, enc); + if (!rb_enc_isprint(c, enc)) return Qfalse; s += n; } Index: sprintf.c =================================================================== --- sprintf.c (revision 23492) +++ sprintf.c (revision 23493) @@ -625,12 +625,12 @@ if (rb_enc_strlen(RSTRING_PTR(tmp),RSTRING_END(tmp),enc) != 1) { rb_raise(rb_eArgError, "%%c requires a character"); } - c = rb_enc_codepoint(RSTRING_PTR(tmp), RSTRING_END(tmp), enc); + c = rb_enc_codepoint_len(RSTRING_PTR(tmp), RSTRING_END(tmp), &n, enc); } else { c = NUM2INT(val); + n = rb_enc_codelen(c, enc); } - n = rb_enc_codelen(c, enc); if (n <= 0) { rb_raise(rb_eArgError, "invalid character"); } Index: transcode.c =================================================================== --- transcode.c (revision 23492) +++ transcode.c (revision 23493) @@ -2027,7 +2027,7 @@ n = rb_enc_precise_mbclen(start, end, utf8); if (MBCLEN_CHARFOUND_P(n) && (size_t)MBCLEN_CHARFOUND_LEN(n) == ec->last_error.error_bytes_len) { - unsigned int cc = rb_enc_codepoint(start, end, utf8); + unsigned int cc = rb_enc_mbc_to_codepoint(start, end, utf8); dumped = rb_sprintf("U+%04X", cc); } } -- ML: ruby-changes@q... Info: http://www.atdot.net/~ko1/quickml/