ruby-changes:38311

nobu	2015-04-26 07:36:52 +0900 (Sun, 26 Apr 2015)

  New Revision: 50392

  http://svn.ruby-lang.org/cgi-bin/viewvc.cgi?view=revision&revision=50392

  Log:
    enc/utf_8.c: limit UTF-8
    
    * enc/utf_8.c (code_to_mbclen, code_to_mbc): reject values larger
      than UTF-8 max codepoints.  [Feature #11094]

  Modified files:
    trunk/ChangeLog
    trunk/enc/utf_8.c
Index: ChangeLog
===================================================================
--- ChangeLog	(revision 50391)
+++ ChangeLog	(revision 50392)
@@ -1,3 +1,8 @@ https://github.com/ruby/ruby/blob/trunk/ChangeLog#L1
+Sun Apr 26 07:36:48 2015  Nobuyoshi Nakada  <nobu@r...>
+
+	* enc/utf_8.c (code_to_mbclen, code_to_mbc): reject values larger
+	  than UTF-8 max codepoints.  [Feature #11094]
+
 Sat Apr 25 14:26:19 2015  Nobuyoshi Nakada  <nobu@r...>
 
 	* string.c (str_buf_cat): expand later so that the buffer can be
Index: enc/utf_8.c
===================================================================
--- enc/utf_8.c	(revision 50391)
+++ enc/utf_8.c	(revision 50392)
@@ -35,8 +35,8 @@ https://github.com/ruby/ruby/blob/trunk/enc/utf_8.c#L35
 /* virtual codepoint values for invalid encoding byte 0xfe and 0xff */
 #define INVALID_CODE_FE   0xfffffffe
 #define INVALID_CODE_FF   0xffffffff
-#define VALID_CODE_LIMIT  0x7fffffff
 #endif
+#define VALID_CODE_LIMIT  0x0010ffff
 
 #define utf8_islead(c)     ((UChar )((c) & 0xc0) != 0x80)
 
@@ -297,9 +297,7 @@ code_to_mbclen(OnigCodePoint code, OnigE https://github.com/ruby/ruby/blob/trunk/enc/utf_8.c#L297
   if      ((code & 0xffffff80) == 0) return 1;
   else if ((code & 0xfffff800) == 0) return 2;
   else if ((code & 0xffff0000) == 0) return 3;
-  else if ((code & 0xffe00000) == 0) return 4;
-  else if ((code & 0xfc000000) == 0) return 5;
-  else if ((code & 0x80000000) == 0) return 6;
+  else if (code <= VALID_CODE_LIMIT) return 4;
 #ifdef USE_INVALID_CODE_SCHEME
   else if (code == INVALID_CODE_FE) return 1;
   else if (code == INVALID_CODE_FF) return 1;
@@ -328,24 +326,11 @@ code_to_mbc(OnigCodePoint code, UChar *b https://github.com/ruby/ruby/blob/trunk/enc/utf_8.c#L326
       *p++ = (UChar )(((code>>12) & 0x0f) | 0xe0);
       *p++ = UTF8_TRAILS(code, 6);
     }
-    else if ((code & 0xffe00000) == 0) {
+    else if (code <= VALID_CODE_LIMIT) {
       *p++ = (UChar )(((code>>18) & 0x07) | 0xf0);
       *p++ = UTF8_TRAILS(code, 12);
       *p++ = UTF8_TRAILS(code,  6);
     }
-    else if ((code & 0xfc000000) == 0) {
-      *p++ = (UChar )(((code>>24) & 0x03) | 0xf8);
-      *p++ = UTF8_TRAILS(code, 18);
-      *p++ = UTF8_TRAILS(code, 12);
-      *p++ = UTF8_TRAILS(code,  6);
-    }
-    else if ((code & 0x80000000) == 0) {
-      *p++ = (UChar )(((code>>30) & 0x01) | 0xfc);
-      *p++ = UTF8_TRAILS(code, 24);
-      *p++ = UTF8_TRAILS(code, 18);
-      *p++ = UTF8_TRAILS(code, 12);
-      *p++ = UTF8_TRAILS(code,  6);
-    }
 #ifdef USE_INVALID_CODE_SCHEME
     else if (code == INVALID_CODE_FE) {
       *p = 0xfe;

--
ML: ruby-changes@q...
Info: http://www.atdot.net/~ko1/quickml/