ruby-changes:11838

matz	2009-05-20 01:59:22 +0900 (Wed, 20 May 2009)

  New Revision: 23493

  http://svn.ruby-lang.org/cgi-bin/viewvc.cgi?view=rev&revision=23493

  Log:
    * encoding.c (rb_enc_codepoint_len): combine rb_enc_codepoint()
      and rb_enc_codelen() in one function to reduce calls.
    * encoding.c (rb_enc_codepoint): compatibility function.
    
    * sprintf.c (rb_str_format): use rb_enc_codepoint_len().
    
    * string.c (rb_str_inspect, rb_str_upcase_bang,
      rb_str_downcase_bang, rb_str_capitalize_bang,
      rb_str_swapcase_bang, trnext, tr_trans, rb_str_delete_bang,
      rb_str_squeeze_bang, rb_str_count, rb_str_split_m,
      rb_str_each_line, rb_str_each_codepoint, rb_str_lstrip_bang,
      sym_printable): ditto.
    
    * transcode.c (make_econv_exception): use rb_enc_mbc_to_codepoint()

  Modified files:
    trunk/ChangeLog
    trunk/encoding.c
    trunk/include/ruby/encoding.h
    trunk/sprintf.c
    trunk/string.c
    trunk/transcode.c

Index: encoding.c
===================================================================
--- encoding.c	(revision 23492)
+++ encoding.c	(revision 23493)
@@ -774,18 +774,27 @@
 }
 
 unsigned int
-rb_enc_codepoint(const char *p, const char *e, rb_encoding *enc)
+rb_enc_codepoint_len(const char *p, const char *e, int *len_p, rb_encoding *enc)
 {
     int r;
     if (e <= p)
         rb_raise(rb_eArgError, "empty string");
     r = rb_enc_precise_mbclen(p, e, enc);
-    if (MBCLEN_CHARFOUND_P(r))
+    if (MBCLEN_CHARFOUND_P(r)) {
+	if (len_p) *len_p = MBCLEN_CHARFOUND_LEN(r);
         return rb_enc_mbc_to_codepoint(p, e, enc);
+    }
     else
 	rb_raise(rb_eArgError, "invalid byte sequence in %s", rb_enc_name(enc));
 }
 
+#undef rb_enc_codepoint
+unsigned int
+rb_enc_codepoint(const char *p, const char *e, rb_encoding *enc)
+{
+    return rb_enc_codepoint_len(p, e, 0, enc);
+}
+
 int
 rb_enc_codelen(int c, rb_encoding *enc)
 {
Index: include/ruby/encoding.h
===================================================================
--- include/ruby/encoding.h	(revision 23492)
+++ include/ruby/encoding.h	(revision 23493)
@@ -123,8 +123,14 @@
 /* -> 0x00..0x7f, -1 */
 int rb_enc_ascget(const char *p, const char *e, int *len, rb_encoding *enc);
 
-/* -> code or raise exception */
+
+/* -> code (and len) or raise exception */
+unsigned int rb_enc_codepoint_len(const char *p, const char *e, int *len, rb_encoding *enc);
+
+/* prototype for obsolete function */
 unsigned int rb_enc_codepoint(const char *p, const char *e, rb_encoding *enc);
+/* overriding macro */
+#define rb_enc_codepoint(p,e,enc) rb_enc_codepoint_len((p),(e),0,(enc))
 #define rb_enc_mbc_to_codepoint(p, e, enc) ONIGENC_MBC_TO_CODE(enc,(UChar*)(p),(UChar*)(e))
 
 /* -> codelen>0 or raise exception */
Index: ChangeLog
===================================================================
--- ChangeLog	(revision 23492)
+++ ChangeLog	(revision 23493)
@@ -1,3 +1,21 @@
+Wed May 20 00:13:38 2009  Yukihiro Matsumoto  <matz@r...>
+
+	* encoding.c (rb_enc_codepoint_len): combine rb_enc_codepoint()
+	  and rb_enc_codelen() in one function to reduce calls.
+
+	* encoding.c (rb_enc_codepoint): compatibility function.
+
+	* sprintf.c (rb_str_format): use rb_enc_codepoint_len().
+
+	* string.c (rb_str_inspect, rb_str_upcase_bang,
+	  rb_str_downcase_bang, rb_str_capitalize_bang,
+	  rb_str_swapcase_bang, trnext, tr_trans, rb_str_delete_bang,
+	  rb_str_squeeze_bang, rb_str_count, rb_str_split_m,
+	  rb_str_each_line, rb_str_each_codepoint, rb_str_lstrip_bang,
+	  sym_printable): ditto.
+
+	* transcode.c (make_econv_exception): use rb_enc_mbc_to_codepoint()
+
 Wed May 20 00:05:52 2009  Yukihiro Matsumoto  <matz@r...>
 
 	* vm_method.c (rb_attr): should preserve encoding info.
Index: string.c
===================================================================
--- string.c	(revision 23492)
+++ string.c	(revision 23493)
@@ -4168,9 +4168,7 @@
         }
         n = MBCLEN_CHARFOUND_LEN(n);
 
-	c = rb_enc_codepoint(p, pend, enc);
-	n = rb_enc_codelen(c, enc);
-
+	c = rb_enc_codepoint_len(p, pend, &n, enc);
 	p += n;
 	if (c == '"'|| c == '\\' ||
 	    (c == '#' &&
@@ -4273,7 +4271,7 @@
 		    char buf[32];
 		    int n = rb_enc_precise_mbclen(p-1, pend, enc);
 		    if (MBCLEN_CHARFOUND_P(n)) {
-			int cc = rb_enc_codepoint(p-1, pend, enc);
+			int cc = rb_enc_mbc_to_codepoint(p-1, pend, enc);
 			sprintf(buf, "%x", cc);
 			len += strlen(buf)+4;
 			p += MBCLEN_CHARFOUND_LEN(n)-1;
@@ -4346,7 +4344,7 @@
 	    if (u8) {
 		int n = rb_enc_precise_mbclen(p-1, pend, enc) - 1;
 		if (MBCLEN_CHARFOUND_P(n)) {
-		    int cc = rb_enc_codepoint(p-1, pend, enc);
+		    int cc = rb_enc_mbc_to_codepoint(p-1, pend, enc);
 		    p += n;
 		    snprintf(q, qend-q, "u{%x}", cc);
 		    q += strlen(q);
@@ -4395,6 +4393,7 @@
     rb_encoding *enc;
     char *s, *send;
     int modify = 0;
+    int n;
 
     str_modify_keep_cr(str);
     enc = STR_ENC_GET(str);
@@ -4425,13 +4424,13 @@
 		s++;
 	    }
 	    else {
-		c = rb_enc_codepoint(s, send, enc);
+		c = rb_enc_codepoint_len(s, send, &n, enc);
 		if (rb_enc_islower(c, enc)) {
 		    /* assuming toupper returns codepoint with same size */
 		    rb_enc_mbcput(rb_enc_toupper(c, enc), s, enc);
 		    modify = 1;
 		}
-		s += rb_enc_codelen(c, enc);
+		s += n;
 	    }
 	}
     }
@@ -4498,6 +4497,7 @@
 
 	while (s < send) {
 	    unsigned int c;
+	    int n;
 
 	    if (ascompat && (c = *(unsigned char*)s) < 0x80) {
 		if (rb_enc_isascii(c, enc) && 'A' <= c && c <= 'Z') {
@@ -4507,13 +4507,13 @@
 		s++;
 	    }
 	    else {
-		c = rb_enc_codepoint(s, send, enc);
+		c = rb_enc_codepoint_len(s, send, &n, enc);
 		if (rb_enc_isupper(c, enc)) {
 		    /* assuming toupper returns codepoint with same size */
 		    rb_enc_mbcput(rb_enc_tolower(c, enc), s, enc);
 		    modify = 1;
 		}
-		s += rb_enc_codelen(c, enc);
+		s += n;
 	    }
 	}
     }
@@ -4565,6 +4565,7 @@
     char *s, *send;
     int modify = 0;
     unsigned int c;
+    int n;
 
     str_modify_keep_cr(str);
     enc = STR_ENC_GET(str);
@@ -4572,19 +4573,19 @@
     if (RSTRING_LEN(str) == 0 || !RSTRING_PTR(str)) return Qnil;
     s = RSTRING_PTR(str); send = RSTRING_END(str);
 
-    c = rb_enc_codepoint(s, send, enc);
+    c = rb_enc_codepoint_len(s, send, &n, enc);
     if (rb_enc_islower(c, enc)) {
 	rb_enc_mbcput(rb_enc_toupper(c, enc), s, enc);
 	modify = 1;
     }
-    s += rb_enc_codelen(c, enc);
+    s += n;
     while (s < send) {
-	c = rb_enc_codepoint(s, send, enc);
+	c = rb_enc_codepoint_len(s, send, &n, enc);
 	if (rb_enc_isupper(c, enc)) {
 	    rb_enc_mbcput(rb_enc_tolower(c, enc), s, enc);
 	    modify = 1;
 	}
-	s += rb_enc_codelen(c, enc);
+	s += n;
     }
 
     if (modify) return str;
@@ -4629,13 +4630,14 @@
     rb_encoding *enc;
     char *s, *send;
     int modify = 0;
+    int n;
 
     str_modify_keep_cr(str);
     enc = STR_ENC_GET(str);
     rb_str_check_dummy_enc(enc);
     s = RSTRING_PTR(str); send = RSTRING_END(str);
     while (s < send) {
-	unsigned int c = rb_enc_codepoint(s, send, enc);
+	unsigned int c = rb_enc_codepoint_len(s, send, &n, enc);
 
 	if (rb_enc_isupper(c, enc)) {
 	    /* assuming toupper returns codepoint with same size */
@@ -4647,7 +4649,7 @@
 	    rb_enc_mbcput(rb_enc_toupper(c, enc), s, enc);
 	    modify = 1;
 	}
-	s += rb_enc_mbclen(s, send, enc);
+	s += n;
     }
 
     if (modify) return str;
@@ -4686,19 +4688,21 @@
 static unsigned int
 trnext(struct tr *t, rb_encoding *enc)
 {
+    int n;
+
     for (;;) {
 	if (!t->gen) {
 	    if (t->p == t->pend) return -1;
 	    if (t->p < t->pend - 1 && *t->p == '\\') {
 		t->p++;
 	    }
-	    t->now = rb_enc_codepoint(t->p, t->pend, enc);
-	    t->p += rb_enc_codelen(t->now, enc);
+	    t->now = rb_enc_codepoint_len(t->p, t->pend, &n, enc);
+	    t->p += n;
 	    if (t->p < t->pend - 1 && *t->p == '-') {
 		t->p++;
 		if (t->p < t->pend) {
-		    unsigned int c = rb_enc_codepoint(t->p, t->pend, enc);
-		    t->p += rb_enc_codelen(c, enc);
+		    unsigned int c = rb_enc_codepoint_len(t->p, t->pend, &n, enc);
+		    t->p += n;
 		    if (t->now > c) continue;
 		    t->gen = 1;
 		    t->max = c;
@@ -4819,8 +4823,8 @@
 
 	while (s < send) {
 	    int may_modify = 0;
-	    c0 = c = rb_enc_codepoint(s, send, e1);
-	    clen = rb_enc_codelen(c, e1);
+
+	    c0 = c = rb_enc_codepoint_len(s, send, &clen, e1);
 	    tlen = enc == e1 ? clen : rb_enc_codelen(c, enc);
 
 	    s += clen;
@@ -4897,8 +4901,7 @@
 
 	while (s < send) {
 	    int may_modify = 0;
-	    c0 = c = rb_enc_codepoint(s, send, e1);
-	    clen = rb_enc_codelen(c, e1);
+	    c0 = c = rb_enc_codepoint_len(s, send, &clen, e1);
 	    tlen = enc == e1 ? clen : rb_enc_codelen(c, enc);
 
 	    if (c < 256) {
@@ -5125,8 +5128,7 @@
 	    s++;
 	}
 	else {
-	    c = rb_enc_codepoint(s, send, enc);
-	    clen = rb_enc_codelen(c, enc);
+	    c = rb_enc_codepoint_len(s, send, &clen, enc);
 
 	    if (tr_find(c, squeez, del, nodel)) {
 		modify = 1;
@@ -5231,8 +5233,7 @@
 		s++;
 	    }
 	    else {
-		c = rb_enc_codepoint(s, send, enc);
-		clen = rb_enc_codelen(c, enc);
+		c = rb_enc_codepoint_len(s, send, &clen, enc);
 
 		if (c != save || (argc > 0 && !tr_find(c, squeez, del, nodel))) {
 		    if (t != s) rb_enc_mbcput(c, t, enc);
@@ -5371,8 +5372,7 @@
 	    s++;
 	}
 	else {
-	    c = rb_enc_codepoint(s, send, enc);
-	    clen = rb_enc_codelen(c, enc);
+	    c = rb_enc_codepoint_len(s, send, &clen, enc);
 	    if (tr_find(c, table, del, nodel)) {
 		i++;
 	    }
@@ -5542,8 +5542,10 @@
 	}
 	else {
 	    while (ptr < eptr) {
-		c = rb_enc_codepoint(ptr, eptr, enc);
-		ptr += rb_enc_mbclen(ptr, eptr, enc);
+		int n;
+
+		c = rb_enc_codepoint_len(ptr, eptr, &n, enc);
+		ptr += n;
 		if (skip) {
 		    if (rb_enc_isspace(c, enc)) {
 			beg = ptr - bptr;
@@ -5773,13 +5775,12 @@
     }
 
     while (p < pend) {
-	unsigned int c = rb_enc_codepoint(p, pend, enc);
+	unsigned int c = rb_enc_codepoint_len(p, pend, &n, enc);
 
       again:
-	n = rb_enc_codelen(c, enc);
 	if (rslen == 0 && c == newline) {
 	    p += n;
-	    if (p < pend && (c = rb_enc_codepoint(p, pend, enc)) != newline) {
+	    if (p < pend && (c = rb_enc_codepoint_len(p, pend, &n, enc)) != newline) {
 		goto again;
 	    }
 	    while (p < pend && rb_enc_codepoint(p, pend, enc) == newline) {
@@ -5940,8 +5941,7 @@
     end = RSTRING_END(str);
     enc = STR_ENC_GET(str);
     while (ptr < end) {
-	c = rb_enc_codepoint(ptr, end, enc);
-	n = rb_enc_codelen(c, enc);
+	c = rb_enc_codepoint_len(ptr, end, &n, enc);
 	rb_yield(UINT2NUM(c));
 	ptr += n;
     }
@@ -6180,10 +6180,11 @@
     e = t = RSTRING_END(str);
     /* remove spaces at head */
     while (s < e) {
-	unsigned int cc = rb_enc_codepoint(s, e, enc);
+	int n;
+	unsigned int cc = rb_enc_codepoint_len(s, e, &n, enc);
 
 	if (!rb_enc_isspace(cc, enc)) break;
-	s += rb_enc_codelen(cc, enc);
+	s += n;
     }
 
     if (s > RSTRING_PTR(str)) {
@@ -7057,8 +7058,9 @@
 sym_printable(const char *s, const char *send, rb_encoding *enc)
 {
     while (s < send) {
-	int c = rb_enc_codepoint(s, send, enc);
-	int n = rb_enc_codelen(c, enc);
+	int n;
+	int c = rb_enc_codepoint_len(s, send, &n, enc);
+
 	if (!rb_enc_isprint(c, enc)) return Qfalse;
 	s += n;
     }
Index: sprintf.c
===================================================================
--- sprintf.c	(revision 23492)
+++ sprintf.c	(revision 23493)
@@ -625,12 +625,12 @@
 		    if (rb_enc_strlen(RSTRING_PTR(tmp),RSTRING_END(tmp),enc) != 1) {
 			rb_raise(rb_eArgError, "%%c requires a character");
 		    }
-		    c = rb_enc_codepoint(RSTRING_PTR(tmp), RSTRING_END(tmp), enc);
+		    c = rb_enc_codepoint_len(RSTRING_PTR(tmp), RSTRING_END(tmp), &n, enc);
 		}
 		else {
 		    c = NUM2INT(val);
+		    n = rb_enc_codelen(c, enc);
 		}
-		n = rb_enc_codelen(c, enc);
 		if (n <= 0) {
 		    rb_raise(rb_eArgError, "invalid character");
 		}
Index: transcode.c
===================================================================
--- transcode.c	(revision 23492)
+++ transcode.c	(revision 23493)
@@ -2027,7 +2027,7 @@
             n = rb_enc_precise_mbclen(start, end, utf8);
             if (MBCLEN_CHARFOUND_P(n) &&
                 (size_t)MBCLEN_CHARFOUND_LEN(n) == ec->last_error.error_bytes_len) {
-                unsigned int cc = rb_enc_codepoint(start, end, utf8);
+                unsigned int cc = rb_enc_mbc_to_codepoint(start, end, utf8);
                 dumped = rb_sprintf("U+%04X", cc);
             }
         }

--
ML: ruby-changes@q...
Info: http://www.atdot.net/~ko1/quickml/