[前][次][番号順一覧][スレッド一覧]

ruby-changes:3791

From: ko1@a...
Date: Sun, 27 Jan 2008 23:27:34 +0900 (JST)
Subject: [ruby-changes:3791] akr - Ruby:r15280 (trunk): * include/ruby/oniguruma.h: precise mbclen API redesigned to avoid

akr	2008-01-27 23:27:07 +0900 (Sun, 27 Jan 2008)

  New Revision: 15280

  Modified files:
    trunk/ChangeLog
    trunk/encoding.c
    trunk/include/ruby/encoding.h
    trunk/include/ruby/oniguruma.h
    trunk/io.c
    trunk/parse.y
    trunk/re.c
    trunk/regenc.c
    trunk/string.c

  Log:
    * include/ruby/oniguruma.h: precise mbclen API redesigned to avoid
      inline functions.
      (onigenc_mbclen_charfound): removed.
      (onigenc_mbclen_needmore): removed.
      (onigenc_mbclen_recover): removed.
      (ONIGENC_MBCLEN_CHARFOUND): removed.
      (ONIGENC_MBCLEN_CHARFOUND_P): defined.
      (ONIGENC_MBCLEN_CHARFOUND_LEN): defined.
      (ONIGENC_MBCLEN_INVALID): removed.
      (ONIGENC_MBCLEN_INVALID_P): defined.
      (ONIGENC_MBCLEN_NEEDMORE): removed.
      (ONIGENC_MBCLEN_NEEDMORE_P): defined.
      (ONIGENC_MBCLEN_NEEDMORE_LEN): defined.
      (ONIGENC_MBC_ENC_LEN): use onigenc_mbclen_approximate.
    
    * regenc.c (onigenc_mbclen_approximate): defined.
    
    * include/ruby/encoding.h (MBCLEN_CHARFOUND): removed.
      (MBCLEN_INVALID): removed.
      (MBCLEN_NEEDMORE): removed.
      (MBCLEN_CHARFOUND_P): defined.
      (MBCLEN_INVALID_P): defined.
      (MBCLEN_NEEDMORE_P): defined.
      (MBCLEN_CHARFOUND_LEN): defined.
      (MBCLEN_NEEDMORE_LEN): defined.
    
    * encoding.c: use new API.
    
    * re.c: ditto.
    
    * string.c: ditto.
    
    * parse.y: ditto.
    


  http://svn.ruby-lang.org/cgi-bin/viewvc.cgi/trunk/include/ruby/oniguruma.h?r1=15280&r2=15279&diff_format=u
  http://svn.ruby-lang.org/cgi-bin/viewvc.cgi/trunk/string.c?r1=15280&r2=15279&diff_format=u
  http://svn.ruby-lang.org/cgi-bin/viewvc.cgi/trunk/parse.y?r1=15280&r2=15279&diff_format=u
  http://svn.ruby-lang.org/cgi-bin/viewvc.cgi/trunk/ChangeLog?r1=15280&r2=15279&diff_format=u
  http://svn.ruby-lang.org/cgi-bin/viewvc.cgi/trunk/regenc.c?r1=15280&r2=15279&diff_format=u
  http://svn.ruby-lang.org/cgi-bin/viewvc.cgi/trunk/io.c?r1=15280&r2=15279&diff_format=u
  http://svn.ruby-lang.org/cgi-bin/viewvc.cgi/trunk/include/ruby/encoding.h?r1=15280&r2=15279&diff_format=u
  http://svn.ruby-lang.org/cgi-bin/viewvc.cgi/trunk/re.c?r1=15280&r2=15279&diff_format=u
  http://svn.ruby-lang.org/cgi-bin/viewvc.cgi/trunk/encoding.c?r1=15280&r2=15279&diff_format=u

Index: encoding.c
===================================================================
--- encoding.c	(revision 15279)
+++ encoding.c	(revision 15280)
@@ -749,9 +749,8 @@
 rb_enc_mbclen(const char *p, const char *e, rb_encoding *enc)
 {
     int n = ONIGENC_PRECISE_MBC_ENC_LEN(enc, (UChar*)p, (UChar*)e);
-    n = MBCLEN_CHARFOUND(n);
-    if (0 < n && n <= e-p)
-        return n;
+    if (MBCLEN_CHARFOUND_P(n) && MBCLEN_CHARFOUND_LEN(n) <= e-p)
+        return MBCLEN_CHARFOUND_LEN(n);
     else
         return 1;
 }
@@ -782,7 +781,7 @@
         return c;
     }
     l = rb_enc_precise_mbclen(p, e, enc);
-    if (!MBCLEN_CHARFOUND(l))
+    if (!MBCLEN_CHARFOUND_P(l))
         return -1;
     c = rb_enc_codepoint(p, e, enc);
     if (!rb_enc_isascii(c, enc))
@@ -798,7 +797,7 @@
     if (e <= p)
         rb_raise(rb_eArgError, "empty string");
     r = rb_enc_precise_mbclen(p, e, enc);
-    if (MBCLEN_CHARFOUND(r))
+    if (MBCLEN_CHARFOUND_P(r))
         return rb_enc_mbc_to_codepoint(p, e, enc);
     else
 	rb_raise(rb_eArgError, "invalid mbstring sequence");
Index: include/ruby/encoding.h
===================================================================
--- include/ruby/encoding.h	(revision 15279)
+++ include/ruby/encoding.h	(revision 15280)
@@ -110,9 +110,11 @@
 
 /* -> chlen, invalid or needmore */
 int rb_enc_precise_mbclen(const char *p, const char *e, rb_encoding *enc);
-#define MBCLEN_CHARFOUND(ret)     ONIGENC_MBCLEN_CHARFOUND(ret)
-#define MBCLEN_INVALID(ret)       ONIGENC_MBCLEN_INVALID(ret)
-#define MBCLEN_NEEDMORE(ret)      ONIGENC_MBCLEN_NEEDMORE(ret)
+#define MBCLEN_CHARFOUND_P(ret)     ONIGENC_MBCLEN_CHARFOUND_P(ret)
+#define MBCLEN_CHARFOUND_LEN(ret)     ONIGENC_MBCLEN_CHARFOUND_LEN(ret)
+#define MBCLEN_INVALID_P(ret)       ONIGENC_MBCLEN_INVALID_P(ret)
+#define MBCLEN_NEEDMORE_P(ret)      ONIGENC_MBCLEN_NEEDMORE_P(ret)
+#define MBCLEN_NEEDMORE_LEN(ret)      ONIGENC_MBCLEN_NEEDMORE_LEN(ret)
 
 /* -> 0x00..0x7f, -1 */
 int rb_enc_ascget(const char *p, const char *e, int *len, rb_encoding *enc);
Index: include/ruby/oniguruma.h
===================================================================
--- include/ruby/oniguruma.h	(revision 15279)
+++ include/ruby/oniguruma.h	(revision 15280)
@@ -229,32 +229,23 @@
 #define ONIGENC_STEP_BACK(enc,start,s,n) \
         onigenc_step_back((enc),(start),(s),(n))
 
-
 #define ONIGENC_CONSTRUCT_MBCLEN_CHARFOUND(n)   (n)
+#define ONIGENC_MBCLEN_CHARFOUND_P(r)           (0 < (r))
+#define ONIGENC_MBCLEN_CHARFOUND_LEN(r)         (r)
+
 #define ONIGENC_CONSTRUCT_MBCLEN_INVALID()      (-1)
+#define ONIGENC_MBCLEN_INVALID_P(r)             ((r) == -1)
+
 #define ONIGENC_CONSTRUCT_MBCLEN_NEEDMORE(n)    (-1-(n))
+#define ONIGENC_MBCLEN_NEEDMORE_P(r)            ((r) < -1)
+#define ONIGENC_MBCLEN_NEEDMORE_LEN(r)          (-1-(r))
 
-static inline int onigenc_mbclen_charfound(int r) { return 0 < r ? r : 0; }
-static inline int onigenc_mbclen_needmore(int r) { return r < -1 ? -1 - r : 0; }
-#define ONIGENC_MBCLEN_CHARFOUND(r)     onigenc_mbclen_charfound(r)
-#define ONIGENC_MBCLEN_INVALID(r)       ((r) == -1)
-#define ONIGENC_MBCLEN_NEEDMORE(r)      onigenc_mbclen_needmore(r)
-
 #define ONIGENC_PRECISE_MBC_ENC_LEN(enc,p,e)   (enc)->precise_mbc_enc_len(p,e,enc)
 
-static inline int onigenc_mbclen_recover(const OnigUChar* p,const OnigUChar* e, struct OnigEncodingTypeST* enc)
-{
-    int ret = ONIGENC_PRECISE_MBC_ENC_LEN(enc,p,e);
-    int r;
-    if (ONIGENC_MBCLEN_INVALID(ret))
-        return 1;
-    else if ((r = ONIGENC_MBCLEN_NEEDMORE(ret)))
-        return e-p+r;
-    else
-        return ONIGENC_MBCLEN_CHARFOUND(ret);
-}
+ONIG_EXTERN
+int onigenc_mbclen_approximate P_((const OnigUChar* p,const OnigUChar* e, struct OnigEncodingTypeST* enc));
 
-#define ONIGENC_MBC_ENC_LEN(enc,p,e)           onigenc_mbclen_recover(p,e,enc)
+#define ONIGENC_MBC_ENC_LEN(enc,p,e)           onigenc_mbclen_approximate(p,e,enc)
 #define ONIGENC_MBC_MAXLEN(enc)               ((enc)->max_enc_len)
 #define ONIGENC_MBC_MAXLEN_DIST(enc)           ONIGENC_MBC_MAXLEN(enc)
 #define ONIGENC_MBC_MINLEN(enc)               ((enc)->min_enc_len)
Index: regenc.c
===================================================================
--- regenc.c	(revision 15279)
+++ regenc.c	(revision 15280)
@@ -50,6 +50,17 @@
   return 0;
 }
 
+extern int
+onigenc_mbclen_approximate(const OnigUChar* p,const OnigUChar* e, struct OnigEncodingTypeST* enc)
+{
+  int ret = ONIGENC_PRECISE_MBC_ENC_LEN(enc,p,e);
+  if (ONIGENC_MBCLEN_CHARFOUND_P(ret))
+    return ONIGENC_MBCLEN_CHARFOUND_LEN(ret);
+  else if (ONIGENC_MBCLEN_NEEDMORE_P(ret))
+    return e-p+ONIGENC_MBCLEN_NEEDMORE_LEN(ret);
+  return 1;
+}
+
 extern UChar*
 onigenc_get_right_adjust_char_head(OnigEncoding enc, const UChar* start, const UChar* s)
 {
Index: re.c
===================================================================
--- re.c	(revision 15279)
+++ re.c	(revision 15280)
@@ -1673,7 +1673,7 @@
 
     chbuf[chlen++] = byte;
     while (chlen < chmaxlen &&
-           MBCLEN_NEEDMORE(rb_enc_precise_mbclen(chbuf, chbuf+chlen, enc))) {
+           MBCLEN_NEEDMORE_P(rb_enc_precise_mbclen(chbuf, chbuf+chlen, enc))) {
         byte = read_escaped_byte(&p, end, err);
         if (byte == -1) {
             return -1;
@@ -1682,7 +1682,7 @@
     }
 
     l = rb_enc_precise_mbclen(chbuf, chbuf+chlen, enc);
-    if (MBCLEN_INVALID(l)) {
+    if (MBCLEN_INVALID_P(l)) {
         strcpy(err, "invalid multibyte escape");
         return -1;
     }
@@ -1812,10 +1812,11 @@
 
     while (p < end) {
         int chlen = rb_enc_precise_mbclen(p, end, enc);
-        if (!MBCLEN_CHARFOUND(chlen)) {
+        if (!MBCLEN_CHARFOUND_P(chlen)) {
             strcpy(err, "invalid multibyte character");
             return -1;
         }
+        chlen = MBCLEN_CHARFOUND_LEN(chlen);
         if (1 < chlen || (*p & 0x80)) {
             rb_str_buf_cat(buf, p, chlen);
             p += chlen;
Index: ChangeLog
===================================================================
--- ChangeLog	(revision 15279)
+++ ChangeLog	(revision 15280)
@@ -1,3 +1,39 @@
+Sun Jan 27 23:20:54 2008  Tanaka Akira  <akr@f...>
+
+	* include/ruby/oniguruma.h: precise mbclen API redesigned to avoid
+	  inline functions.
+	  (onigenc_mbclen_charfound): removed.
+	  (onigenc_mbclen_needmore): removed.
+	  (onigenc_mbclen_recover): removed.
+	  (ONIGENC_MBCLEN_CHARFOUND): removed.
+	  (ONIGENC_MBCLEN_CHARFOUND_P): defined.
+	  (ONIGENC_MBCLEN_CHARFOUND_LEN): defined.
+	  (ONIGENC_MBCLEN_INVALID): removed.
+	  (ONIGENC_MBCLEN_INVALID_P): defined.
+	  (ONIGENC_MBCLEN_NEEDMORE): removed.
+	  (ONIGENC_MBCLEN_NEEDMORE_P): defined.
+	  (ONIGENC_MBCLEN_NEEDMORE_LEN): defined.
+	  (ONIGENC_MBC_ENC_LEN): use onigenc_mbclen_approximate.
+
+	* regenc.c (onigenc_mbclen_approximate): defined.
+
+	* include/ruby/encoding.h (MBCLEN_CHARFOUND): removed.
+	  (MBCLEN_INVALID): removed.
+	  (MBCLEN_NEEDMORE): removed.
+	  (MBCLEN_CHARFOUND_P): defined.
+	  (MBCLEN_INVALID_P): defined.
+	  (MBCLEN_NEEDMORE_P): defined.
+	  (MBCLEN_CHARFOUND_LEN): defined.
+	  (MBCLEN_NEEDMORE_LEN): defined.
+
+	* encoding.c: use new API.
+
+	* re.c: ditto.
+
+	* string.c: ditto.
+
+	* parse.y: ditto.
+
 Sun Jan 27 22:55:27 2008  Nobuyoshi Nakada  <nobu@r...>
 
 	* parse.y (value_expr_gen): reverted r12880.  [ruby-dev:33388]
Index: string.c
===================================================================
--- string.c	(revision 15279)
+++ string.c	(revision 15280)
@@ -170,11 +170,10 @@
         }
         while (p < e) {
             int ret = rb_enc_precise_mbclen(p, e, enc);
-            int len = MBCLEN_CHARFOUND(ret);
-            if (!len) {
+            if (!MBCLEN_CHARFOUND_P(ret)) {
                 return ENC_CODERANGE_BROKEN;
             }
-            p += len;
+            p += MBCLEN_CHARFOUND_LEN(ret);
             if (p < e) {
                 p = search_nonascii(p, e);
                 if (!p) {
@@ -190,12 +189,11 @@
 
     while (p < e) {
         int ret = rb_enc_precise_mbclen(p, e, enc);
-        int len = MBCLEN_CHARFOUND(ret);
 
-        if (!len) {
+        if (!MBCLEN_CHARFOUND_P(ret)) {
             return ENC_CODERANGE_BROKEN;
         }
-        p += len;
+        p += MBCLEN_CHARFOUND_LEN(ret);
     }
     if (e < p) {
         return ENC_CODERANGE_BROKEN;
@@ -2017,7 +2015,8 @@
             return NEIGHBOR_WRAPPED;
         ++((unsigned char*)p)[i];
         l = rb_enc_precise_mbclen(p, p+len, enc);
-        if (MBCLEN_CHARFOUND(l)) {
+        if (MBCLEN_CHARFOUND_P(l)) {
+            l = MBCLEN_CHARFOUND_LEN(l);
             if (l == len) {
                 return NEIGHBOR_FOUND;
             }
@@ -2025,11 +2024,11 @@
                 memset(p+l, 0xff, len-l);
             }
         }
-        if (MBCLEN_INVALID(l) && i < len-1) {
+        if (MBCLEN_INVALID_P(l) && i < len-1) {
             int len2, l2;
             for (len2 = len-1; 0 < len2; len2--) {
                 l2 = rb_enc_precise_mbclen(p, p+len2, enc);
-                if (!MBCLEN_INVALID(l2))
+                if (!MBCLEN_INVALID_P(l2))
                     break;
             }
             memset(p+len2+1, 0xff, len-(len2+1));
@@ -2048,7 +2047,8 @@
             return NEIGHBOR_WRAPPED;
         --((unsigned char*)p)[i];
         l = rb_enc_precise_mbclen(p, p+len, enc);
-        if (MBCLEN_CHARFOUND(l)) {
+        if (MBCLEN_CHARFOUND_P(l)) {
+            l = MBCLEN_CHARFOUND_LEN(l);
             if (l == len) {
                 return NEIGHBOR_FOUND;
             }
@@ -2056,11 +2056,11 @@
                 memset(p+l, 0, len-l);
             }
         }
-        if (MBCLEN_INVALID(l) && i < len-1) {
+        if (MBCLEN_INVALID_P(l) && i < len-1) {
             int len2, l2;
             for (len2 = len-1; 0 < len2; len2--) {
                 l2 = rb_enc_precise_mbclen(p, p+len2, enc);
-                if (!MBCLEN_INVALID(l2))
+                if (!MBCLEN_INVALID_P(l2))
                     break;
             }
             memset(p+len2+1, 0, len-(len2+1));
@@ -3300,11 +3300,12 @@
 	int cc;
 
         n = rb_enc_precise_mbclen(p, pend, enc);
-        if (!MBCLEN_CHARFOUND(n)) {
+        if (!MBCLEN_CHARFOUND_P(n)) {
             p++;
             n = 1;
             goto escape_codepoint;
         }
+        n = MBCLEN_CHARFOUND_LEN(n);
 
 	c = rb_enc_codepoint(p, pend, enc);
 	n = rb_enc_codelen(c, enc);
@@ -3313,7 +3314,7 @@
 	if (c == '"'|| c == '\\' ||
 	    (c == '#' &&
              p < pend &&
-             MBCLEN_CHARFOUND(rb_enc_precise_mbclen(p,pend,enc)) &&
+             MBCLEN_CHARFOUND_P(rb_enc_precise_mbclen(p,pend,enc)) &&
              (cc = rb_enc_codepoint(p,pend,enc),
               (cc == '$' || cc == '@' || cc == '{')))) {
 	    prefix_escape(result, c, enc);
Index: io.c
===================================================================
--- io.c	(revision 15279)
+++ io.c	(revision 15280)
@@ -2289,12 +2289,13 @@
 	return Qnil;
     }
     r = rb_enc_precise_mbclen(fptr->rbuf+fptr->rbuf_off, fptr->rbuf+fptr->rbuf_off+fptr->rbuf_len, enc);
-    if ((n = MBCLEN_CHARFOUND(r)) != 0 && n <= fptr->rbuf_len) {
+    if (MBCLEN_CHARFOUND_P(r) &&
+        (n = MBCLEN_CHARFOUND_LEN(r)) <= fptr->rbuf_len) {
 	str = rb_str_new(fptr->rbuf+fptr->rbuf_off, n);
 	fptr->rbuf_off += n;
 	fptr->rbuf_len -= n;
     }
-    else if (MBCLEN_NEEDMORE(r)) {
+    else if (MBCLEN_NEEDMORE_P(r)) {
 	str = rb_str_new(fptr->rbuf+fptr->rbuf_off, fptr->rbuf_len);
         fptr->rbuf_len = 0;
 getc_needmore:
@@ -2303,7 +2304,7 @@
             fptr->rbuf_off++;
             fptr->rbuf_len--;
             r = rb_enc_precise_mbclen(RSTRING_PTR(str), RSTRING_PTR(str)+RSTRING_LEN(str), enc);
-            if (MBCLEN_NEEDMORE(r)) {
+            if (MBCLEN_NEEDMORE_P(r)) {
                 goto getc_needmore;
             }
         }
Index: parse.y
===================================================================
--- parse.y	(revision 15279)
+++ parse.y	(revision 15280)
@@ -5299,7 +5299,7 @@
 parser_tokadd_mbchar(struct parser_params *parser, int c)
 {
     int len = parser_precise_mbclen();
-    if (!MBCLEN_CHARFOUND(len)) {
+    if (!MBCLEN_CHARFOUND_P(len)) {
 	compile_error(PARSER_ARG "invalid multibyte char");
 	return -1;
     }

--
ML: ruby-changes@q...
Info: http://www.atdot.net/~ko1/quickml/

[前][次][番号順一覧][スレッド一覧]