ruby-changes:1857

matz	2007-09-06 21:33:45 +0900 (Thu, 06 Sep 2007)

  New Revision: 13348

  Modified files:
    trunk/ChangeLog
    trunk/array.c
    trunk/encoding.c
    trunk/euc_jp.c
    trunk/ext/strscan/strscan.c
    trunk/include/ruby/encoding.h
    trunk/include/ruby/oniguruma.h
    trunk/include/ruby/regex.h
    trunk/parse.y
    trunk/re.c
    trunk/regcomp.c
    trunk/regenc.c
    trunk/regerror.c
    trunk/regexec.c
    trunk/regparse.c
    trunk/sjis.c
    trunk/string.c
    trunk/unicode.c
    trunk/utf8.c

  Log:
    * array.c (rb_ary_cycle): typo in rdoc.  a patch from Yugui
      <yugui@y...>.  [ruby-dev:31748]

  http://svn.ruby-lang.org/cgi-bin/viewvc.cgi/trunk/regcomp.c?r1=13348&r2=13347
  http://svn.ruby-lang.org/cgi-bin/viewvc.cgi/trunk/include/ruby/oniguruma.h?r1=13348&r2=13347
  http://svn.ruby-lang.org/cgi-bin/viewvc.cgi/trunk/utf8.c?r1=13348&r2=13347
  http://svn.ruby-lang.org/cgi-bin/viewvc.cgi/trunk/string.c?r1=13348&r2=13347
  http://svn.ruby-lang.org/cgi-bin/viewvc.cgi/trunk/array.c?r1=13348&r2=13347
  http://svn.ruby-lang.org/cgi-bin/viewvc.cgi/trunk/regexec.c?r1=13348&r2=13347
  http://svn.ruby-lang.org/cgi-bin/viewvc.cgi/trunk/euc_jp.c?r1=13348&r2=13347
  http://svn.ruby-lang.org/cgi-bin/viewvc.cgi/trunk/parse.y?r1=13348&r2=13347
  http://svn.ruby-lang.org/cgi-bin/viewvc.cgi/trunk/ChangeLog?r1=13348&r2=13347
  http://svn.ruby-lang.org/cgi-bin/viewvc.cgi/trunk/regenc.c?r1=13348&r2=13347
  http://svn.ruby-lang.org/cgi-bin/viewvc.cgi/trunk/include/ruby/encoding.h?r1=13348&r2=13347
  http://svn.ruby-lang.org/cgi-bin/viewvc.cgi/trunk/unicode.c?r1=13348&r2=13347
  http://svn.ruby-lang.org/cgi-bin/viewvc.cgi/trunk/sjis.c?r1=13348&r2=13347
  http://svn.ruby-lang.org/cgi-bin/viewvc.cgi/trunk/re.c?r1=13348&r2=13347
  http://svn.ruby-lang.org/cgi-bin/viewvc.cgi/trunk/include/ruby/regex.h?r1=13348&r2=13347
  http://svn.ruby-lang.org/cgi-bin/viewvc.cgi/trunk/ext/strscan/strscan.c?r1=13348&r2=13347
  http://svn.ruby-lang.org/cgi-bin/viewvc.cgi/trunk/encoding.c?r1=13348&r2=13347
  http://svn.ruby-lang.org/cgi-bin/viewvc.cgi/trunk/regparse.c?r1=13348&r2=13347
  http://svn.ruby-lang.org/cgi-bin/viewvc.cgi/trunk/regerror.c?r1=13348&r2=13347

Index: euc_jp.c
===================================================================
--- euc_jp.c	(revision 13347)
+++ euc_jp.c	(revision 13348)
@@ -51,7 +51,7 @@
 };
 
 static int
-mbc_enc_len(const UChar* p)
+mbc_enc_len(const UChar* p, const UChar* e)
 {
   return EncLen_EUCJP[*p];
 }
@@ -62,7 +62,7 @@
   int c, i, len;
   OnigCodePoint n;
 
-  len = enc_len(ONIG_ENCODING_EUC_JP, p);
+  len = enc_len(ONIG_ENCODING_EUC_JP, p, end);
   n = (OnigCodePoint )*p++;
   if (len == 1) return n;
 
@@ -113,7 +113,7 @@
   *p++ = (UChar )(code & 0xff);
 
 #if 1
-  if (enc_len(ONIG_ENCODING_EUC_JP, buf) != (p - buf))
+  if (enc_len(ONIG_ENCODING_EUC_JP, buf, p) != (p - buf))
     return ONIGENC_ERR_INVALID_WIDE_CHAR_VALUE;
 #endif  
   return p - buf;
@@ -134,7 +134,7 @@
   else {
     int i;
 
-    len = enc_len(ONIG_ENCODING_EUC_JP, p);
+    len = enc_len(ONIG_ENCODING_EUC_JP, p, end);
     for (i = 0; i < len; i++) {
       *lower++ = *p++;
     }
@@ -156,7 +156,7 @@
   p = s;
 
   while (!eucjp_islead(*p) && p > start) p--;
-  len = enc_len(ONIG_ENCODING_EUC_JP, p);
+  len = enc_len(ONIG_ENCODING_EUC_JP, p, s);
   if (p + len > s) return (UChar* )p;
   p += len;
   return (UChar* )(p + ((s - p) & ~1));
Index: sjis.c
===================================================================
--- sjis.c	(revision 13347)
+++ sjis.c	(revision 13348)
@@ -71,7 +71,7 @@
 #define SJIS_ISMB_TRAIL(byte)  SJIS_CAN_BE_TRAIL_TABLE[(byte)]
 
 static int
-mbc_enc_len(const UChar* p)
+mbc_enc_len(const UChar* p, const UChar* e)
 {
   return EncLen_SJIS[*p];
 }
@@ -98,7 +98,7 @@
   int c, i, len;
   OnigCodePoint n;
 
-  len = enc_len(ONIG_ENCODING_SJIS, p);
+  len = enc_len(ONIG_ENCODING_SJIS, p, end);
   c = *p++;
   n = c;
   if (len == 1) return n;
@@ -139,7 +139,7 @@
   }
   else {
     int i;
-    int len = enc_len(ONIG_ENCODING_SJIS, p);
+    int len = enc_len(ONIG_ENCODING_SJIS, p, end);
 
     for (i = 0; i < len; i++) {
       *lower++ = *p++;
@@ -192,7 +192,7 @@
       }
     } 
   }
-  len = enc_len(ONIG_ENCODING_SJIS, p);
+  len = enc_len(ONIG_ENCODING_SJIS, p, s);
   if (p + len > s) return (UChar* )p;
   p += len;
   return (UChar* )(p + ((s - p) & ~1));
Index: regparse.c
===================================================================
--- regparse.c	(revision 13347)
+++ regparse.c	(revision 13348)
@@ -246,12 +246,12 @@
 #define PUNFETCH     p = pfetch_prev
 #define PINC       do { \
   pfetch_prev = p; \
-  p += ONIGENC_MBC_ENC_LEN(enc, p); \
+  p += ONIGENC_MBC_ENC_LEN(enc, p, end); \
 } while (0)
 #define PFETCH(c)  do { \
   c = ONIGENC_MBC_TO_CODE(enc, p, end); \
   pfetch_prev = p; \
-  p += ONIGENC_MBC_ENC_LEN(enc, p); \
+  p += ONIGENC_MBC_ENC_LEN(enc, p, end); \
 } while (0)
 
 #define PPEEK        (p < end ? ONIGENC_MBC_TO_CODE(enc, p, end) : PEND_VALUE)
@@ -1539,7 +1539,7 @@
 str_node_can_be_split(StrNode* sn, OnigEncoding enc)
 {
   if (sn->end > sn->s) {
-    return ((enc_len(enc, sn->s) < sn->end - sn->s)  ?  1 : 0);
+    return ((enc_len(enc, sn->s, sn->end) < sn->end - sn->s)  ?  1 : 0);
   }
   return 0;
 }
@@ -2733,12 +2733,12 @@
   
   while (p < to) {
     x = ONIGENC_MBC_TO_CODE(enc, p, to);
-    q = p + enc_len(enc, p);
+    q = p + enc_len(enc, p, to);
     if (x == s[0]) {
       for (i = 1; i < n && q < to; i++) {
 	x = ONIGENC_MBC_TO_CODE(enc, q, to);
 	if (x != s[i]) break;
-	q += enc_len(enc, q);
+	q += enc_len(enc, q, to);
       }
       if (i >= n) {
 	if (IS_NOT_NULL(next))
@@ -2764,19 +2764,19 @@
   while (p < to) {
     if (in_esc) {
       in_esc = 0;
-      p += enc_len(enc, p);
+      p += enc_len(enc, p, to);
     }
     else {
       x = ONIGENC_MBC_TO_CODE(enc, p, to);
-      q = p + enc_len(enc, p);
+      q = p + enc_len(enc, p, to);
       if (x == s[0]) {
 	for (i = 1; i < n && q < to; i++) {
 	  x = ONIGENC_MBC_TO_CODE(enc, q, to);
 	  if (x != s[i]) break;
-	  q += enc_len(enc, q);
+	  q += enc_len(enc, q, to);
 	}
 	if (i >= n) return 1;
-	p += enc_len(enc, p);
+	p += enc_len(enc, p, to);
       }
       else {
 	x = ONIGENC_MBC_TO_CODE(enc, p, to);
@@ -2904,7 +2904,7 @@
             return ONIGERR_TOO_LONG_WIDE_CHAR_VALUE;
         }
 
-	if (p > prev + enc_len(enc, prev) && !PEND && (PPEEK_IS('}'))) {
+	if (p > prev + enc_len(enc, prev, end) && !PEND && (PPEEK_IS('}'))) {
 	  PINC;
 	  tok->type   = TK_CODE_POINT;
 	  tok->base   = 16;
@@ -3244,7 +3244,7 @@
             return ONIGERR_TOO_LONG_WIDE_CHAR_VALUE;
         }
 
-	if ((p > prev + enc_len(enc, prev)) && !PEND && PPEEK_IS('}')) {
+	if ((p > prev + enc_len(enc, prev, end)) && !PEND && PPEEK_IS('}')) {
 	  PINC;
 	  tok->type   = TK_CODE_POINT;
 	  tok->u.code = (OnigCodePoint )num;
@@ -3443,7 +3443,7 @@
 	tok->u.code = (OnigCodePoint )num;
       }
       else { /* string */
-	p = tok->backp + enc_len(enc, tok->backp);
+	p = tok->backp + enc_len(enc, tok->backp, end);
       }
       break;
     }
@@ -4120,7 +4120,7 @@
 	  goto err;
 	}
 
-	len = enc_len(env->enc, buf);
+	len = enc_len(env->enc, buf, buf+i);
 	if (i < len) {
 	  r = ONIGERR_TOO_SHORT_MULTI_BYTE_STRING;
 	  goto err;
@@ -4927,7 +4927,7 @@
       len = 1;
       while (1) {
 	if (len >= ONIGENC_MBC_MINLEN(env->enc)) {
-	  if (len == enc_len(env->enc, NSTR(*np)->s)) {
+	  if (len == enc_len(env->enc, NSTR(*np)->s, NSTR(*np)->end)) {
 	    r = fetch_token(tok, src, end, env);
 	    NSTRING_CLEAR_RAW(*np);
 	    goto string_end;
Index: array.c
===================================================================
--- array.c	(revision 13347)
+++ array.c	(revision 13348)
@@ -2929,7 +2929,7 @@
  *  Calls <i>block</i> repeatedly forever.
  *     
  *     a = ["a", "b", "c"]
- *     a.each {|x| puts x }  # print, a, b, c, a, b, c,.. forever.
+ *     a.cycle {|x| puts x }  # print, a, b, c, a, b, c,.. forever.
  *     
  */
 
Index: encoding.c
===================================================================
--- encoding.c	(revision 13347)
+++ encoding.c	(revision 13348)
@@ -189,7 +189,7 @@
     }
     else {
 	for (c=0; p<e && nth--; c++) {
-	    int n = rb_enc_mbclen(p, enc);
+	    int n = rb_enc_mbclen(p, e, enc);
 
 	    if (n == 0) return 0;
 	    p += n;
@@ -208,7 +208,7 @@
     }
 
     for (c=0; p<e; c++) {
-	int n = rb_enc_mbclen(p, enc);
+	int n = rb_enc_mbclen(p, e, enc);
 
 	if (n == 0) return -1;
 	p += n;
@@ -217,9 +217,9 @@
 }
 
 int
-rb_enc_mbclen(const char *p, rb_encoding *enc)
+rb_enc_mbclen(const char *p, const char *e, rb_encoding *enc)
 {
-    int n = ONIGENC_MBC_ENC_LEN(enc, (UChar*)p);
+    int n = ONIGENC_MBC_ENC_LEN(enc, (UChar*)p, (UChar*)e);
     if (n == 0) {
 	rb_raise(rb_eArgError, "invalid mbstring sequence");
     }
Index: regcomp.c
===================================================================
--- regcomp.c	(revision 13347)
+++ regcomp.c	(revision 13348)
@@ -469,13 +469,13 @@
   ambig = NSTRING_IS_AMBIG(node);
 
   p = prev = sn->s;
-  prev_len = enc_len(enc, p);
+  prev_len = enc_len(enc, p, sn->end);
   p += prev_len;
   slen = 1;
   rlen = 0;
 
   for (; p < sn->end; ) {
-    len = enc_len(enc, p);
+    len = enc_len(enc, p, sn->end);
     if (len == prev_len) {
       slen++;
     }
@@ -518,12 +518,12 @@
   ambig = NSTRING_IS_AMBIG(node);
 
   p = prev = sn->s;
-  prev_len = enc_len(enc, p);
+  prev_len = enc_len(enc, p, end);
   p += prev_len;
   slen = 1;
 
   for (; p < end; ) {
-    len = enc_len(enc, p);
+    len = enc_len(enc, p, end);
     if (len == prev_len) {
       slen++;
     }
@@ -2312,7 +2312,7 @@
       StrNode* sn = NSTR(node);
       UChar *s = sn->s;
       while (s < sn->end) {
-	s += enc_len(reg->enc, s);
+	s += enc_len(reg->enc, s, sn->end);
 	(*len)++;
       }
     }
@@ -3389,7 +3389,7 @@
       goto err;
     }
 
-    len = enc_len(reg->enc, p);
+    len = enc_len(reg->enc, p, end);
 
     if (n == 0) {
       if (IS_NULL(snode)) {
@@ -4212,7 +4212,7 @@
   p = add->s;
   end = p + add->len;
   for (i = to->len; p < end; ) {
-    len = enc_len(enc, p);
+    len = enc_len(enc, p, end);
     if (i + len > OPT_EXACT_MAXLEN) break;
     for (j = 0; j < len && p < end; j++)
       to->s[i++] = *p++;
@@ -4234,7 +4234,7 @@
   UChar *p;
 
   for (i = to->len, p = s; p < end && i < OPT_EXACT_MAXLEN; ) {
-    len = enc_len(enc, p);
+    len = enc_len(enc, p, end);
     if (i + len > OPT_EXACT_MAXLEN) break;
     for (j = 0; j < len && p < end; j++)
       to->s[i++] = *p++;
@@ -4260,7 +4260,7 @@
 
   for (i = 0; i < to->len && i < add->len; ) {
     if (to->s[i] != add->s[i]) break;
-    len = enc_len(env->enc, to->s + i);
+    len = enc_len(env->enc, to->s + i, to->s + to->len);
 
     for (j = 1; j < len; j++) {
       if (to->s[i+j] != add->s[i+j]) break;
Index: include/ruby/regex.h
===================================================================
--- include/ruby/regex.h	(revision 13347)
+++ include/ruby/regex.h	(revision 13348)
@@ -29,8 +29,8 @@
 
 ONIG_EXTERN OnigEncoding    OnigEncDefaultCharEncoding;
 
-#define ismbchar(p, enc) (mbclen((p),(enc)) != 1)
-#define mbclen(p,enc)  rb_enc_mbclen((p), (enc))
+#define ismbchar(p, e, enc) (mbclen((p),(e),(enc)) != 1)
+#define mbclen(p,e,enc)  rb_enc_mbclen((p),(e),(enc))
 
 #endif /* ifndef ONIG_RUBY_M17N */
 
Index: include/ruby/encoding.h
===================================================================
--- include/ruby/encoding.h	(revision 13347)
+++ include/ruby/encoding.h	(revision 13348)
@@ -50,7 +50,7 @@
 #define rb_enc_mbmaxlen(enc) (enc)->max_enc_len
 
 /* ptr,encoding -> mbclen */
-int rb_enc_mbclen(const char*, rb_encoding*);
+int rb_enc_mbclen(const char*, const char *, rb_encoding*);
 
 /* code,encoding -> codelen */
 int rb_enc_codelen(int, rb_encoding*);
Index: include/ruby/oniguruma.h
===================================================================
--- include/ruby/oniguruma.h	(revision 13347)
+++ include/ruby/oniguruma.h	(revision 13348)
@@ -144,7 +144,7 @@
 typedef int (*OnigApplyAllCaseFoldFunc)(OnigCodePoint from, OnigCodePoint* to, int to_len, void* arg);
 
 typedef struct OnigEncodingTypeST {
-  int    (*mbc_enc_len)(const OnigUChar* p);
+  int    (*mbc_enc_len)(const OnigUChar* p,const OnigUChar* e);
   const char*   name;
   int           max_enc_len;
   int           min_enc_len;
@@ -255,11 +255,11 @@
 #define ONIGENC_MAX_STD_CTYPE  ONIGENC_CTYPE_ASCII
 
 
-#define enc_len(enc,p)                ONIGENC_MBC_ENC_LEN(enc, p)
+#define enc_len(enc,p,e)                ONIGENC_MBC_ENC_LEN(enc, p, e)
 
 #define ONIGENC_IS_UNDEF(enc)          ((enc) == ONIG_ENCODING_UNDEF)
 #define ONIGENC_IS_SINGLEBYTE(enc)     (ONIGENC_MBC_MAXLEN(enc) == 1)
-#define ONIGENC_IS_MBC_HEAD(enc,p)     (ONIGENC_MBC_ENC_LEN(enc,p) != 1)
+#define ONIGENC_IS_MBC_HEAD(enc,p,e)   (ONIGENC_MBC_ENC_LEN(enc,p,e) != 1)
 #define ONIGENC_IS_MBC_ASCII(p)           (*(p)   < 128)
 #define ONIGENC_IS_CODE_ASCII(code)       ((code) < 128)
 #define ONIGENC_IS_MBC_WORD(enc,s,end) \
@@ -281,7 +281,7 @@
 #define ONIGENC_STEP_BACK(enc,start,s,n) \
         onigenc_step_back((enc),(start),(s),(n))
 
-#define ONIGENC_MBC_ENC_LEN(enc,p)             (enc)->mbc_enc_len(p)
+#define ONIGENC_MBC_ENC_LEN(enc,p,e)           (enc)->mbc_enc_len(p,e)
 #define ONIGENC_MBC_MAXLEN(enc)               ((enc)->max_enc_len)
 #define ONIGENC_MBC_MAXLEN_DIST(enc)           ONIGENC_MBC_MAXLEN(enc)
 #define ONIGENC_MBC_MINLEN(enc)               ((enc)->min_enc_len)
Index: regenc.c
===================================================================
--- regenc.c	(revision 13347)
+++ regenc.c	(revision 13348)
@@ -55,7 +55,7 @@
 {
   UChar* p = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, start, s);
   if (p < s) {
-    p += enc_len(enc, p);
+      p += enc_len(enc, p, s);
   }
   return p;
 }
@@ -68,7 +68,7 @@
 
   if (p < s) {
     if (prev) *prev = (const UChar* )p;
-    p += enc_len(enc, p);
+    p += enc_len(enc, p, s);
   }
   else {
     if (prev) *prev = (const UChar* )NULL; /* Sorry */
@@ -102,7 +102,7 @@
 {
   UChar* q = (UChar* )p;
   while (n-- > 0) {
-    q += ONIGENC_MBC_ENC_LEN(enc, q);
+    q += ONIGENC_MBC_ENC_LEN(enc, q, end);
   }
   return (q <= end ? q : NULL);
 }
@@ -114,7 +114,7 @@
   UChar* q = (UChar* )p;
   
   while (q < end) {
-    q += ONIGENC_MBC_ENC_LEN(enc, q);
+    q += ONIGENC_MBC_ENC_LEN(enc, q, end);
     n++;
   }
   return n;
@@ -125,6 +125,7 @@
 {
   int n = 0;
   UChar* p = (UChar* )s;
+  UChar* e = p + strlen(s);
   
   while (1) {
     if (*p == '\0') {
@@ -140,7 +141,7 @@
       }
       if (len == 1) return n;
     }
-    p += ONIGENC_MBC_ENC_LEN(enc, p);
+    p += ONIGENC_MBC_ENC_LEN(enc, p, e);
     n++;
   }
 }
@@ -150,6 +151,7 @@
 {
   UChar* start = (UChar* )s;
   UChar* p = (UChar* )s;
+  UChar* e = p + strlen(s);
 
   while (1) {
     if (*p == '\0') {
@@ -165,7 +167,7 @@
       }
       if (len == 1) return (int )(p - start);
     }
-    p += ONIGENC_MBC_ENC_LEN(enc, p);
+    p += ONIGENC_MBC_ENC_LEN(enc, p, e);
   }
 }
 
@@ -638,7 +640,7 @@
   int c, i, len;
   OnigCodePoint n;
 
-  len = enc_len(enc, p);
+  len = enc_len(enc, p, end);
   n = (OnigCodePoint )(*p++);
   if (len == 1) return n;
 
@@ -665,7 +667,7 @@
   else {
     int i;
 
-    len = enc_len(enc, p);
+    len = enc_len(enc, p, end);
     for (i = 0; i < len; i++) {
       *lower++ = *p++;
     }
@@ -718,7 +720,7 @@
   *p++ = (UChar )(code & 0xff);
 
 #if 1
-  if (enc_len(enc, buf) != (p - buf))
+  if (enc_len(enc, buf, p) != (p - buf))
     return ONIGENC_ERR_INVALID_WIDE_CHAR_VALUE;
 #endif
   return p - buf;
@@ -741,7 +743,7 @@
   *p++ = (UChar )(code & 0xff);
 
 #if 1
-  if (enc_len(enc, buf) != (p - buf))
+  if (enc_len(enc, buf, p) != (p - buf))
     return ONIGENC_ERR_INVALID_WIDE_CHAR_VALUE;
 #endif
   return p - buf;
@@ -825,7 +827,7 @@
     if (x) return x;
 
     sascii++;
-    p += enc_len(enc, p);
+    p += enc_len(enc, p, end);
   }
   return 0;
 }
Index: re.c
===================================================================
--- re.c	(revision 13347)
+++ re.c	(revision 13348)
@@ -393,11 +393,11 @@
 
     p = s; pend = p + len;
     while (p<pend) {
-	if (*p == '/' || (!rb_enc_isprint(*p, enc) && !ismbchar(p, enc))) {
+	if (*p == '/' || (!rb_enc_isprint(*p, enc) && !ismbchar(p, pend, enc))) {
 	    need_escape = 1;
 	    break;
 	}
-	p += mbclen(p, enc);
+	p += mbclen(p, pend, enc);
     }
     if (!need_escape) {
 	rb_str_buf_cat(str, s, len);
@@ -406,7 +406,7 @@
 	p = s;
 	while (p<pend) {
 	    if (*p == '\\') {
-		int n = mbclen(p+1, enc) + 1;
+		int n = mbclen(p+1, pend, enc) + 1;
 		rb_str_buf_cat(str, p, n);
 		p += n;
 		continue;
@@ -416,9 +416,9 @@
 		rb_str_buf_cat(str, &c, 1);
 		rb_str_buf_cat(str, p, 1);
 	    }
-	    else if (ismbchar(p, enc)) {
-	    	rb_str_buf_cat(str, p, mbclen(p, enc));
-		p += mbclen(p, enc);
+	    else if (ismbchar(p, pend, enc)) {
+	    	rb_str_buf_cat(str, p, mbclen(p, pend, enc));
+		p += mbclen(p, pend, enc);
 		continue;
 	    }
 	    else if (rb_enc_isprint(*p, enc)) {
@@ -1906,8 +1906,8 @@
     send = s + RSTRING_LEN(str);
     for (; s < send; s++) {
 	c = *s;
-	if (ismbchar(s, enc)) {
-	    int n = mbclen(s, enc);
+	if (ismbchar(s, send, enc)) {
+	    int n = mbclen(s, send, enc);
 
 	    while (n-- && s < send)
 		s++;
@@ -1935,8 +1935,8 @@
 
     for (; s < send; s++) {
 	c = *s;
-	if (ismbchar(s, enc)) {
-	    int n = mbclen(s, enc);
+	if (ismbchar(s, send, enc)) {
+	    int n = mbclen(s, send, enc);
 
 	    while (n-- && s < send)
 		*t++ = *s++;
@@ -2180,8 +2180,8 @@
     while (s < e) {
 	char *ss = s++;
 
-	if (ismbchar(ss, enc)) {
-	    s += mbclen(ss, enc) - 1;
+	if (ismbchar(ss, e, enc)) {
+	    s += mbclen(ss, e, enc) - 1;
 	    continue;
 	}
 	if (*ss != '\\' || s == e) continue;
@@ -2214,7 +2214,7 @@
               name_end = name = s + 1;
               while (name_end < e) {
                 if (*name_end == '>') break;
-                name_end += mbclen(name_end, enc);
+                name_end += mbclen(name_end, e, enc);
               }
               if (name_end < e) {
                 no = name_to_backref_number(regs, regexp, name, name_end);
Index: ChangeLog
===================================================================
--- ChangeLog	(revision 13347)
+++ ChangeLog	(revision 13348)
@@ -1,3 +1,8 @@
+Thu Sep  6 21:31:49 2007  Yukihiro Matsumoto  <matz@r...>
+
+	* array.c (rb_ary_cycle): typo in rdoc.  a patch from Yugui
+	  <yugui@y...>.  [ruby-dev:31748]
+
 Thu Sep  6 12:42:10 2007  Nobuyoshi Nakada  <nobu@r...>
 
 	* string.c (rb_str_succ, rb_str_chop_bang, rb_str_chop): m17n support.
Index: utf8.c
===================================================================
--- utf8.c	(revision 13347)
+++ utf8.c	(revision 13348)
@@ -60,7 +60,7 @@
 };
 
 static int
-utf8_mbc_enc_len(const UChar* p)
+utf8_mbc_enc_len(const UChar* p, const UChar* e)
 {
   return EncLen_UTF8[*p];
 }
@@ -96,7 +96,7 @@
   int c, len;
   OnigCodePoint n;
 
-  len = enc_len(ONIG_ENCODING_UTF8, p);
+  len = enc_len(ONIG_ENCODING_UTF8, p, end);
   c = *p++;
   if (len > 1) {
     len--;
Index: regerror.c
===================================================================
--- regerror.c	(revision 13347)
+++ regerror.c	(revision 13348)
@@ -209,7 +209,7 @@
 	buf[len++] = (UChar )code;
       }
 
-      p += enc_len(enc, p);
+      p += enc_len(enc, p, end);
       if (len >= buf_size) break;
     }
 
@@ -330,15 +330,15 @@
     while (p < pat_end) {
       if (*p == '\\') {
 	*s++ = *p++;
-	len = enc_len(enc, p);
+	len = enc_len(enc, p, pat_end);
 	while (len-- > 0) *s++ = *p++;
       }
       else if (*p == '/') {
 	*s++ = (unsigned char )'\\';
 	*s++ = *p++;
       }
-      else if (ONIGENC_IS_MBC_HEAD(enc, p)) {
-        len = enc_len(enc, p);
+      else if (ONIGENC_IS_MBC_HEAD(enc, p, pat_end)) {
+        len = enc_len(enc, p, pat_end);
         if (ONIGENC_MBC_MINLEN(enc) == 1) {
           while (len-- > 0) *s++ = *p++;
         }
Index: string.c
===================================================================
--- string.c	(revision 13347)
+++ string.c	(revision 13348)
@@ -707,7 +707,7 @@
 
 	i = 0;
 	while (p < e) {
-	    p += rb_enc_mbclen(p, enc);
+	    p += rb_enc_mbclen(p, e, enc);
 	    i++;
 	}
 	return i;
@@ -2375,7 +2375,7 @@
 	     * in order to prevent infinite loops.
 	     */
 	    if (RSTRING_LEN(str) <= END(0)) break;
-	    len = rb_enc_mbclen(RSTRING_PTR(str)+END(0), enc);
+	    len = rb_enc_mbclen(RSTRING_PTR(str)+END(0), RSTRING_END(str), enc);
 	    memcpy(bp, RSTRING_PTR(str)+END(0), len);
 	    bp += len;
 	    offset = END(0) + len;
@@ -2595,7 +2595,7 @@
 	}
 	else {
 	    while (s < e) {
-		int clen = rb_enc_mbclen(s, enc);
+		int clen = rb_enc_mbclen(s, e, enc);
 
 		if (clen == 0) {
 		    rb_raise(rb_eArgError, "invalid mbstring sequence");
@@ -3861,11 +3861,13 @@
 		}
 		else if (last_null == 1) {
 		    rb_ary_push(result, rb_str_subseq(str, beg,
-						      rb_enc_mbclen(RSTRING_PTR(str)+beg,enc)));
+						      rb_enc_mbclen(RSTRING_PTR(str)+beg,
+								    RSTRING_END(str),
+								    enc)));
 		    beg = start;
 		}
 		else {
-		    start += rb_enc_mbclen(RSTRING_PTR(str)+start,enc);
+		    start += rb_enc_mbclen(RSTRING_PTR(str)+start,RSTRING_END(str),enc);
 		    last_null = 1;
 		    continue;
 		}
@@ -4455,7 +4457,8 @@
 	     * Always consume at least one character of the input string
 	     */
 	    if (RSTRING_LEN(str) > END(0))
-		*start = END(0)+rb_enc_mbclen(RSTRING_PTR(str)+END(0),enc);
+		*start = END(0)+rb_enc_mbclen(RSTRING_PTR(str)+END(0),
+					      RSTRING_END(str), enc);
 	    else
 		*start = END(0)+1;
 	}
Index: unicode.c
===================================================================
--- unicode.c	(revision 13347)
+++ unicode.c	(revision 13348)
@@ -10832,7 +10832,7 @@
     if (len >= PROPERTY_NAME_MAX_SIZE)
       return ONIGERR_INVALID_CHAR_PROPERTY_NAME;
 
-    p += enc_len(enc, p);
+    p += enc_len(enc, p, end);
   }
 
   buf[len] = 0;
@@ -10963,7 +10963,7 @@
   if (CaseFoldInited == 0) init_case_fold_table();
 
   code = ONIGENC_MBC_TO_CODE(enc, p, end);
-  len = enc_len(enc, p);
+  len = enc_len(enc, p, end);
   *pp += len;
 
 #ifdef USE_UNICODE_CASE_FOLD_TURKISH_AZERI
@@ -11155,7 +11155,7 @@
   n = 0;
 
   code = ONIGENC_MBC_TO_CODE(enc, p, end);
-  len = enc_len(enc, p);
+  len = enc_len(enc, p, end);
 
 #ifdef USE_UNICODE_CASE_FOLD_TURKISH_AZERI
   if ((flag & ONIGENC_CASE_FOLD_TURKISH_AZERI) != 0) {
@@ -11304,7 +11304,7 @@
       else
 	codes[1] = code;
 
-      clen = enc_len(enc, p);
+      clen = enc_len(enc, p, end);
       len += clen;
       if (onig_st_lookup(Unfold2Table, (st_data_t )codes, (void* )&z2) != 0) {
 	for (i = 0; i < z2->n; i++) {
@@ -11325,7 +11325,7 @@
 	else
 	  codes[2] = code;
 
-	clen = enc_len(enc, p);
+	clen = enc_len(enc, p, end);
 	len += clen;
 	if (onig_st_lookup(Unfold3Table, (st_data_t )codes,
 			   (void* )&z2) != 0) {
Index: parse.y
===================================================================
--- parse.y	(revision 13347)
+++ parse.y	(revision 13348)
@@ -4558,10 +4558,10 @@
 # define SIGN_EXTEND_CHAR(c) ((((unsigned char)(c)) ^ 128) - 128)
 #endif
 
-#define parser_mbclen()  mbclen((lex_p-1),parser->enc)
-#define is_identchar(p, enc) (rb_enc_isalnum(*p, enc) || (*p) == '_' || ismbchar(p, enc))
-#define parser_ismbchar() ismbchar((lex_p-1), parser->enc)
-#define parser_is_identchar() (!parser->eofp && is_identchar((lex_p-1),parser->enc))
+#define parser_mbclen()  mbclen((lex_p-1),lex_pend,parser->enc)
+#define is_identchar(p,e,enc) (rb_enc_isalnum(*p,enc) || (*p) == '_' || ismbchar(p,e,enc))
+#define parser_ismbchar() ismbchar((lex_p-1), lex_pend, parser->enc)
+#define parser_is_identchar() (!parser->eofp && is_identchar((lex_p-1),lex_pend,parser->enc))
 
 static int
 parser_yyerror(struct parser_params *parser, const char *msg)
@@ -5995,7 +5995,7 @@
 	    }
 	}
 	else if ((rb_enc_isalnum(c, parser->enc) || c == '_') &&
-		 lex_p < lex_pend && is_identchar(lex_p, parser->enc)) {
+		 lex_p < lex_pend && is_identchar(lex_p, lex_pend, parser->enc)) {
 	    goto ternary;
 	}
 	else if (c == '\\') {
@@ -8328,7 +8328,7 @@
 }
 
 static int
-is_special_global_name(const char *m, rb_encoding *enc)
+is_special_global_name(const char *m, const char *e, rb_encoding *enc)
 {
     switch (*m) {
       case '~': case '*': case '$': case '?': case '!': case '@':
@@ -8340,7 +8340,7 @@
 	break;
       case '-':
 	++m;
-	if (is_identchar(m, enc)) m += rb_enc_mbclen(m, enc);
+	if (is_identchar(m, e, enc)) m += rb_enc_mbclen(m, e, enc);
 	break;
       default:
 	if (!rb_enc_isdigit(*m, enc)) return 0;
@@ -8353,6 +8353,7 @@
 rb_symname_p(const char *name)
 {
     const char *m = name;
+    const char *e = m + strlen(m);
     int localid = Qfalse;
     rb_encoding *enc = rb_enc_from_index(0);
 
@@ -8362,7 +8363,7 @@
 	return Qfalse;
 
       case '$':
-	if (is_special_global_name(++m, enc)) return Qtrue;
+	if (is_special_global_name(++m, e, enc)) return Qtrue;
 	goto id;
 
       case '@':
@@ -8411,8 +8412,9 @@
       default:
 	localid = !rb_enc_isupper(*m, enc);
       id:
-	if (*m != '_' && !rb_enc_isalpha(*m, enc) && !ismbchar(m, enc)) return Qfalse;
-	while (is_identchar(m, enc)) m += rb_enc_mbclen(m, enc);
+	if (*m != '_' && !rb_enc_isalpha(*m, enc) && !ismbchar(m, e, enc))
+		  return Qfalse;
+	while (is_identchar(m, e, enc)) m += rb_enc_mbclen(m, e, enc);
 	if (localid) {
 	    switch (*m) {
 	      case '!': case '?': case '=': ++m;
@@ -8427,6 +8429,7 @@
 rb_intern3(const char *name, long len, rb_encoding *enc)
 {
     const char *m = name;
+    const char *e = m + len;
     VALUE str;
     ID id;
     int last;
@@ -8445,7 +8448,7 @@
     switch (*m) {
       case '$':
 	id |= ID_GLOBAL;
-	if (is_special_global_name(++m, enc)) goto new_id;
+	if (is_special_global_name(++m, e, enc)) goto new_id;
 	break;
       case '@':
 	if (m[1] == '@') {
@@ -8490,8 +8493,8 @@
 	break;
     }
     if (!rb_enc_isdigit(*m, enc)) {
-	while (m <= name + last && is_identchar(m, enc)) {
-	    m += rb_enc_mbclen(m, enc);
+	while (m <= name + last && is_identchar(m, e, enc)) {
+	    m += rb_enc_mbclen(m, e, enc);
 	}
     }
     if (m - name < len) id = ID_JUNK;
Index: ext/strscan/strscan.c
===================================================================
--- ext/strscan/strscan.c	(revision 13347)
+++ ext/strscan/strscan.c	(revision 13348)
@@ -661,7 +661,7 @@
     if (EOS_P(p))
         return Qnil;
 
-    len = rb_enc_mbclen(CURPTR(p), enc);
+    len = rb_enc_mbclen(CURPTR(p), S_PEND(p), enc);
     if (p->curr + len > S_LEN(p)) {
         len = S_LEN(p) - p->curr;
     }
Index: regexec.c
===================================================================
--- regexec.c	(revision 13347)
+++ regexec.c	(revision 13348)
@@ -1642,12 +1642,12 @@
       DATA_ENSURE(1);
       if (BITSET_AT(((BitSetRef )p), *s) == 0) goto fail;
       p += SIZE_BITSET;
-      s += enc_len(encode, s);   /* OP_CCLASS can match mb-code. \D, \S */
+      s += enc_len(encode, s, end);   /* OP_CCLASS can match mb-code. \D, \S */
       MOP_OUT;
       break;
 
     case OP_CCLASS_MB:  MOP_IN(OP_CCLASS_MB);
-      if (! ONIGENC_IS_MBC_HEAD(encode, s)) goto fail;
+      if (! ONIGENC_IS_MBC_HEAD(encode, s, end)) goto fail;
 
     cclass_mb:
       GET_LENGTH_INC(tlen, p);
@@ -1657,7 +1657,7 @@
 	int mb_len;
 
 	DATA_ENSURE(1);
-	mb_len = enc_len(encode, s);
+	mb_len = enc_len(encode, s, end);
 	DATA_ENSURE(mb_len);
 	ss = s;
 	s += mb_len;
@@ -1677,7 +1677,7 @@
 
     case OP_CCLASS_MIX:  MOP_IN(OP_CCLASS_MIX);
       DATA_ENSURE(1);
-      if (ONIGENC_IS_MBC_HEAD(encode, s)) {
+      if (ONIGENC_IS_MBC_HEAD(encode, s, end)) {
 	p += SIZE_BITSET;
 	goto cclass_mb;
       }
@@ -1697,13 +1697,13 @@
       DATA_ENSURE(1);
       if (BITSET_AT(((BitSetRef )p), *s) != 0) goto fail;
       p += SIZE_BITSET;
-      s += enc_len(encode, s);
+      s += enc_len(encode, s, end);
       MOP_OUT;
       break;
 
     case OP_CCLASS_MB_NOT:  MOP_IN(OP_CCLASS_MB_NOT);
       DATA_ENSURE(1);
-      if (! ONIGENC_IS_MBC_HEAD(encode, s)) {
+      if (! ONIGENC_IS_MBC_HEAD(encode, s, end)) {
 	s++;
 	GET_LENGTH_INC(tlen, p);
 	p += tlen;
@@ -1715,7 +1715,7 @@
       {
 	OnigCodePoint code;
 	UChar *ss;
-	int mb_len = enc_len(encode, s);
+	int mb_len = enc_len(encode, s, end);
 
 	if (! DATA_ENSURE_CHECK(mb_len)) {
           DATA_ENSURE(1);
@@ -1744,7 +1744,7 @@
 
     case OP_CCLASS_MIX_NOT:  MOP_IN(OP_CCLASS_MIX_NOT);
       DATA_ENSURE(1);
-      if (ONIGENC_IS_MBC_HEAD(encode, s)) {
+      if (ONIGENC_IS_MBC_HEAD(encode, s, end)) {
 	p += SIZE_BITSET;
 	goto cclass_mb_not;
       }
@@ -1769,7 +1769,7 @@
 
         DATA_ENSURE(1);
         GET_POINTER_INC(node, p);
-	mb_len = enc_len(encode, s);
+	mb_len = enc_len(encode, s, end);
 	ss = s;
 	s += mb_len;
 	DATA_ENSURE(0);
@@ -1781,7 +1781,7 @@
 
     case OP_ANYCHAR:  MOP_IN(OP_ANYCHAR);
       DATA_ENSURE(1);
-      n = enc_len(encode, s);
+      n = enc_len(encode, s, end);
       DATA_ENSURE(n);
       if (ONIGENC_IS_MBC_NEWLINE(encode, s, end)) goto fail;
       s += n;
@@ -1790,7 +1790,7 @@
 
     case OP_ANYCHAR_ML:  MOP_IN(OP_ANYCHAR_ML);
       DATA_ENSURE(1);
-      n = enc_len(encode, s);
+      n = enc_len(encode, s, end);
       DATA_ENSURE(n);
       s += n;
       MOP_OUT;
@@ -1799,7 +1799,7 @@
     case OP_ANYCHAR_STAR:  MOP_IN(OP_ANYCHAR_STAR);
       while (DATA_ENSURE_CHECK1) {
 	STACK_PUSH_ALT(p, s, sprev);
-	n = enc_len(encode, s);
+	n = enc_len(encode, s, end);
         DATA_ENSURE(n);
         if (ONIGENC_IS_MBC_NEWLINE(encode, s, end))  goto fail;
         sprev = s;
@@ -1811,7 +1811,7 @@
     case OP_ANYCHAR_ML_STAR:  MOP_IN(OP_ANYCHAR_ML_STAR);
       while (DATA_ENSURE_CHECK1) {
 	STACK_PUSH_ALT(p, s, sprev);
-	n = enc_len(encode, s);
+	n = enc_len(encode, s, end);
 	if (n > 1) {
 	  DATA_ENSURE(n);
 	  sprev = s;
@@ -1830,7 +1830,7 @@
 	if (*p == *s) {
 	  STACK_PUSH_ALT(p + 1, s, sprev);
 	}
-	n = enc_len(encode, s);
+	n = enc_len(encode, s, end);
         DATA_ENSURE(n);
         if (ONIGENC_IS_MBC_NEWLINE(encode, s, end))  goto fail;
         sprev = s;
@@ -1845,7 +1845,7 @@
 	if (*p == *s) {
 	  STACK_PUSH_ALT(p + 1, s, sprev);
 	}
-	n = enc_len(encode, s);
+	n = enc_len(encode, s, end);
 	if (n > 1) {
 	  DATA_ENSURE(n);
 	  sprev = s;
@@ -1906,7 +1906,7 @@
       if (! ONIGENC_IS_MBC_WORD(encode, s, end))
 	goto fail;
 
-      s += enc_len(encode, s);
+      s += enc_len(encode, s, end);
       MOP_OUT;
       break;
 
@@ -1915,7 +1915,7 @@
       if (ONIGENC_IS_MBC_WORD(encode, s, end))
 	goto fail;
 
-      s += enc_len(encode, s);
+      s += enc_len(encode, s, end);
       MOP_OUT;
       break;
 
@@ -2043,7 +2043,7 @@
 #endif
       }
       else if (ONIGENC_IS_MBC_NEWLINE(encode, s, end) &&
-	       ON_STR_END(s + enc_len(encode, s))) {
+	       ON_STR_END(s + enc_len(encode, s, end))) {
 	MOP_OUT;
 	continue;
       }
@@ -2157,7 +2157,7 @@
 	DATA_ENSURE(n);
 	sprev = s;
 	STRING_CMP(pstart, s, n);
-	while (sprev + (len = enc_len(encode, sprev)) < s)
+	while (sprev + (len = enc_len(encode, sprev, end)) < s)
 	  sprev += len;
 
 	MOP_OUT;
@@ -2189,7 +2189,7 @@
 	DATA_ENSURE(n);
 	sprev = s;
 	STRING_CMP_IC(case_fold_flag, pstart, &s, n);
-	while (sprev + (len = enc_len(encode, sprev)) < s)
+	while (sprev + (len = enc_len(encode, sprev, end)) < s)
 	  sprev += len;
 
 	MOP_OUT;
@@ -2224,7 +2224,7 @@
 	  STRING_CMP_VALUE(pstart, swork, n, is_fail);
 	  if (is_fail) continue;
 	  s = swork;
-	  while (sprev + (len = enc_len(encode, sprev)) < s)
+	  while (sprev + (len = enc_len(encode, sprev, end)) < s)
 	    sprev += len;
 
 	  p += (SIZE_MEMNUM * (tlen - i - 1));
@@ -2263,7 +2263,7 @@
 	  STRING_CMP_VALUE_IC(case_fold_flag, pstart, &swork, n, is_fail);
 	  if (is_fail) continue;
 	  s = swork;
-	  while (sprev + (len = enc_len(encode, sprev)) < s)
+	  while (sprev + (len = enc_len(encode, sprev, end)) < s)
 	    sprev += len;
 
 	  p += (SIZE_MEMNUM * (tlen - i - 1));
@@ -2289,7 +2289,7 @@
 	sprev = s;
 	if (backref_match_at_nested_level(reg, stk, stk_base, ic
 		  , case_fold_flag, (int )level, (int )tlen, p, &s, end)) {
-	  while (sprev + (len = enc_len(encode, sprev)) < s)
+	  while (sprev + (len = enc_len(encode, sprev, end)) < s)
 	    sprev += len;
 
 	  p += (SIZE_MEMNUM * tlen);
@@ -2760,7 +2760,7 @@
       if (t == target_end)
 	return s;
     }
-    s += enc_len(enc, s);
+    s += enc_len(enc, s, end);
   }
 
   return (UChar* )NULL;
@@ -2805,7 +2805,7 @@
 			     s, text_end))
       return s;
 
-    s += enc_len(enc, s);
+    s += enc_len(enc, s, text_end);
   }
 
   return (UChar* )NULL;
@@ -2903,7 +2903,7 @@
       skip = reg->map[*se];
       t = s;
       do {
-        s += enc_len(reg->enc, s);
+        s += enc_len(reg->enc, s, end);
       } while ((s - t) < skip && s < end);
     }
   }
@@ -2919,7 +2919,7 @@
       skip = reg->int_map[*se];
       t = s;
       do {
-        s += enc_len(reg->enc, s);
+        s += enc_len(reg->enc, s, end);
       } while ((s - t) < skip && s < end);
     }
   }
@@ -3024,7 +3024,7 @@
   while (s < text_range) {
     if (map[*s]) return (UChar* )s;
 
-    s += enc_len(enc, s);
+    s += enc_len(enc, s, text_range);
   }
   return (UChar* )NULL;
 }
@@ -3127,7 +3127,7 @@
     }
     else {
       UChar *q = p + reg->dmin;
-      while (p < q) p += enc_len(reg->enc, p);
+      while (p < q) p += enc_len(reg->enc, p, end);
     }
   }
 
@@ -3158,7 +3158,7 @@
     if (p - reg->dmin < s) {
     retry_gate:
       pprev = p;
-      p += enc_len(reg->enc, p);
+      p += enc_len(reg->enc, p, end);
       goto retry;
     }
 
@@ -3604,7 +3604,7 @@
 	  while (s <= high) {
 	    MATCH_AND_RETURN_CHECK(orig_range);
 	    prev = s;
-	    s += enc_len(reg->enc, s);
+	    s += enc_len(reg->enc, s, end);
 	  }
 	} while (s < range);
 	goto mismatch;
@@ -3617,11 +3617,11 @@
           do {
             MATCH_AND_RETURN_CHECK(orig_range);
             prev = s;
-            s += enc_len(reg->enc, s);
+            s += enc_len(reg->enc, s, end);
 
             while (!ONIGENC_IS_MBC_NEWLINE(reg->enc, prev, end) && s < range) {
               prev = s;
-              s += enc_len(reg->enc, s);
+              s += enc_len(reg->enc, s, end);
             }
           } while (s < range);
           goto mismatch;
@@ -3632,7 +3632,7 @@
     do {
       MATCH_AND_RETURN_CHECK(orig_range);
       prev = s;
-      s += enc_len(reg->enc, s);
+      s += enc_len(reg->enc, s, end);
     } while (s < range);
 
     if (s == range) { /* because empty match with /$/. */

--
ML: ruby-changes@q...
Info: http://www.atdot.net/~ko1/quickml