[前][次][番号順一覧][スレッド一覧]

ruby-changes:43473

From: ngoto <ko1@a...>
Date: Thu, 30 Jun 2016 19:20:28 +0900 (JST)
Subject: [ruby-changes:43473] ngoto:r55547 (trunk): * string.c: Fix memory corruptions when using UTF-16/32 strings.

ngoto	2016-06-30 19:20:23 +0900 (Thu, 30 Jun 2016)

  New Revision: 55547

  https://svn.ruby-lang.org/cgi-bin/viewvc.cgi?view=revision&revision=55547

  Log:
    * string.c: Fix memory corruptions when using UTF-16/32 strings.
      [Bug #12536] [ruby-dev:49699]
    
    * string.c (TERM_LEN_MAX): Macro for the longest TERM_FILL length,
      the same as largest value of rb_enc_mbminlen(enc) among encodings.
    
    * string.c (str_new, rb_str_buf_new, str_shared_replace): Allocate
      +TERM_LEN_MAX bytes instead of +1. This change may increase memory
      usage.
    
    * string.c (rb_str_new_with_class): Use TERM_LEN of the "obj".
    
    * string.c (rb_str_plus, rb_str_justify): Use str_new0 which is aware
      of termlen.
    
    * string.c (str_shared_replace): Copy +termlen bytes instead of +1.
    
    * string.c (rb_str_times): termlen should not be included in capa.
    
    * string.c (RESIZE_CAPA_TERM): When using RSTRING_EMBED_LEN_MAX,
      termlen should be counted with it because embedded strings are
      also processed by TERM_FILL.
    
    * string.c (rb_str_capacity, str_shared_replace, str_buf_cat): ditto.
    
    * string.c (rb_str_drop_bytes, rb_str_setbyte, str_byte_substr): ditto.

  Modified files:
    trunk/ChangeLog
    trunk/string.c
Index: string.c
===================================================================
--- string.c	(revision 55546)
+++ string.c	(revision 55547)
@@ -120,6 +120,7 @@ VALUE rb_cSymbol; https://github.com/ruby/ruby/blob/trunk/string.c#L120
     if (UNLIKELY(term_fill_len > 1))\
 	memset(term_fill_ptr, 0, term_fill_len);\
 } while (0)
+#define TERM_LEN_MAX 4 /* UTF-32LE, UTF-32BE */
 
 #define RESIZE_CAPA(str,capacity) do {\
     const int termlen = TERM_LEN(str);\
@@ -127,7 +128,7 @@ VALUE rb_cSymbol; https://github.com/ruby/ruby/blob/trunk/string.c#L128
 } while (0)
 #define RESIZE_CAPA_TERM(str,capacity,termlen) do {\
     if (STR_EMBED_P(str)) {\
-	if ((capacity) > RSTRING_EMBED_LEN_MAX) {\
+	if ((capacity) > RSTRING_EMBED_LEN_MAX + 1 - (termlen)) {\
 	    char *const tmp = ALLOC_N(char, (capacity)+termlen);\
 	    const long tlen = RSTRING_LEN(str);\
 	    memcpy(tmp, RSTRING_PTR(str), tlen);\
@@ -650,7 +651,7 @@ size_t https://github.com/ruby/ruby/blob/trunk/string.c#L651
 rb_str_capacity(VALUE str)
 {
     if (STR_EMBED_P(str)) {
-	return RSTRING_EMBED_LEN_MAX;
+	return RSTRING_EMBED_LEN_MAX + 1 - TERM_LEN(str);
     }
     else if (FL_TEST(str, STR_SHARED|STR_NOFREE)) {
 	return RSTRING(str)->as.heap.len;
@@ -694,7 +695,7 @@ str_new0(VALUE klass, const char *ptr, l https://github.com/ruby/ruby/blob/trunk/string.c#L695
     RUBY_DTRACE_CREATE_HOOK(STRING, len);
 
     str = str_alloc(klass);
-    if (len > RSTRING_EMBED_LEN_MAX) {
+    if (len > RSTRING_EMBED_LEN_MAX + 1 - termlen) {
 	RSTRING(str)->as.heap.aux.capa = len;
 	RSTRING(str)->as.heap.ptr = ALLOC_N(char, len + termlen);
 	STR_SET_NOEMBED(str);
@@ -713,7 +714,7 @@ str_new0(VALUE klass, const char *ptr, l https://github.com/ruby/ruby/blob/trunk/string.c#L714
 static VALUE
 str_new(VALUE klass, const char *ptr, long len)
 {
-    return str_new0(klass, ptr, len, 1);
+    return str_new0(klass, ptr, len, TERM_LEN_MAX);
 }
 
 VALUE
@@ -1160,7 +1161,7 @@ str_new_frozen(VALUE klass, VALUE orig) https://github.com/ruby/ruby/blob/trunk/string.c#L1161
 VALUE
 rb_str_new_with_class(VALUE obj, const char *ptr, long len)
 {
-    return str_new(rb_obj_class(obj), ptr, len);
+    return str_new0(rb_obj_class(obj), ptr, len, TERM_LEN(obj));
 }
 
 static VALUE
@@ -1184,7 +1185,7 @@ rb_str_buf_new(long capa) https://github.com/ruby/ruby/blob/trunk/string.c#L1185
     }
     FL_SET(str, STR_NOEMBED);
     RSTRING(str)->as.heap.aux.capa = capa;
-    RSTRING(str)->as.heap.ptr = ALLOC_N(char, capa+1);
+    RSTRING(str)->as.heap.ptr = ALLOC_N(char, capa + TERM_LEN_MAX);
     RSTRING(str)->as.heap.ptr[0] = '\0';
 
     return str;
@@ -1252,16 +1253,18 @@ str_shared_replace(VALUE str, VALUE str2 https://github.com/ruby/ruby/blob/trunk/string.c#L1253
 {
     rb_encoding *enc;
     int cr;
+    int termlen;
 
     ASSUME(str2 != str);
     enc = STR_ENC_GET(str2);
     cr = ENC_CODERANGE(str2);
     str_discard(str);
     OBJ_INFECT(str, str2);
+    termlen = rb_enc_mbminlen(enc);
 
-    if (RSTRING_LEN(str2) <= RSTRING_EMBED_LEN_MAX) {
+    if (RSTRING_LEN(str2) <= RSTRING_EMBED_LEN_MAX + 1 - termlen) {
 	STR_SET_EMBED(str);
-	memcpy(RSTRING_PTR(str), RSTRING_PTR(str2), RSTRING_LEN(str2)+1);
+	memcpy(RSTRING_PTR(str), RSTRING_PTR(str2), RSTRING_LEN(str2)+termlen);
 	STR_SET_EMBED_LEN(str, RSTRING_LEN(str2));
         rb_enc_associate(str, enc);
         ENC_CODERANGE_SET(str, cr);
@@ -1730,16 +1733,18 @@ rb_str_plus(VALUE str1, VALUE str2) https://github.com/ruby/ruby/blob/trunk/string.c#L1733
     rb_encoding *enc;
     char *ptr1, *ptr2, *ptr3;
     long len1, len2;
+    int termlen;
 
     StringValue(str2);
     enc = rb_enc_check_str(str1, str2);
     RSTRING_GETMEM(str1, ptr1, len1);
     RSTRING_GETMEM(str2, ptr2, len2);
-    str3 = rb_str_new(0, len1+len2);
+    termlen = rb_enc_mbminlen(enc);
+    str3 = str_new0(rb_cString, 0, len1+len2, termlen);
     ptr3 = RSTRING_PTR(str3);
     memcpy(ptr3, ptr1, len1);
     memcpy(ptr3+len1, ptr2, len2);
-    TERM_FILL(&ptr3[len1+len2], rb_enc_mbminlen(enc));
+    TERM_FILL(&ptr3[len1+len2], termlen);
 
     FL_SET_RAW(str3, OBJ_TAINTED_RAW(str1) | OBJ_TAINTED_RAW(str2));
     ENCODING_CODERANGE_SET(str3, rb_enc_to_index(enc),
@@ -1787,7 +1792,7 @@ rb_str_times(VALUE str, VALUE times) https://github.com/ruby/ruby/blob/trunk/string.c#L1792
 
     len *= RSTRING_LEN(str);
     termlen = TERM_LEN(str);
-    str2 = rb_str_new_with_class(str, 0, (len + termlen - 1));
+    str2 = str_new0(rb_obj_class(str), 0, len, termlen);
     ptr2 = RSTRING_PTR(str2);
     if (len) {
         n = RSTRING_LEN(str);
@@ -2518,7 +2523,7 @@ str_buf_cat(VALUE str, const char *ptr, https://github.com/ruby/ruby/blob/trunk/string.c#L2523
     rb_str_modify(str);
     if (len == 0) return 0;
     if (STR_EMBED_P(str)) {
-	capa = RSTRING_EMBED_LEN_MAX;
+	capa = RSTRING_EMBED_LEN_MAX + termlen - 1;
 	sptr = RSTRING(str)->as.ary;
 	olen = RSTRING_EMBED_LEN(str);
     }
@@ -4141,7 +4146,7 @@ rb_str_drop_bytes(VALUE str, long len) https://github.com/ruby/ruby/blob/trunk/string.c#L4146
     str_modifiable(str);
     if (len > olen) len = olen;
     nlen = olen - len;
-    if (nlen <= RSTRING_EMBED_LEN_MAX) {
+    if (nlen <= RSTRING_EMBED_LEN_MAX + 1 - TERM_LEN(str)) {
 	char *oldptr = ptr;
 	int fl = (int)(RBASIC(str)->flags & (STR_NOEMBED|STR_SHARED|STR_NOFREE));
 	STR_SET_EMBED(str);
@@ -4992,7 +4997,7 @@ rb_str_setbyte(VALUE str, VALUE index, V https://github.com/ruby/ruby/blob/trunk/string.c#L4997
     enc = STR_ENC_GET(str);
     head = RSTRING_PTR(str);
     ptr = &head[pos];
-    if (len > RSTRING_EMBED_LEN_MAX) {
+    if (len > RSTRING_EMBED_LEN_MAX + 1 - rb_enc_mbminlen(enc)) {
 	cr = ENC_CODERANGE(str);
 	switch (cr) {
 	  case ENC_CODERANGE_7BIT:
@@ -5046,7 +5051,7 @@ str_byte_substr(VALUE str, long beg, lon https://github.com/ruby/ruby/blob/trunk/string.c#L5051
     else
 	p = s + beg;
 
-    if (len > RSTRING_EMBED_LEN_MAX && SHARABLE_SUBSTRING_P(beg, len, n)) {
+    if (len > RSTRING_EMBED_LEN_MAX + 1 - TERM_LEN(str) && SHARABLE_SUBSTRING_P(beg, len, n)) {
 	str2 = rb_str_new_frozen(str);
 	str2 = str_new_shared(rb_obj_class(str2), str2);
 	RSTRING(str2)->as.heap.ptr += beg;
@@ -8480,9 +8485,11 @@ rb_str_justify(int argc, VALUE *argv, VA https://github.com/ruby/ruby/blob/trunk/string.c#L8485
     long n, size, llen, rlen, llen2 = 0, rlen2 = 0;
     VALUE pad;
     int singlebyte = 1, cr;
+    int termlen;
 
     rb_scan_args(argc, argv, "11", &w, &pad);
     enc = STR_ENC_GET(str);
+    termlen = rb_enc_mbminlen(enc);
     width = NUM2LONG(w);
     if (argc == 2) {
 	StringValue(pad);
@@ -8512,7 +8519,7 @@ rb_str_justify(int argc, VALUE *argv, VA https://github.com/ruby/ruby/blob/trunk/string.c#L8519
        rb_raise(rb_eArgError, "argument too big");
     }
     len += size;
-    res = rb_str_new_with_class(str, 0, len);
+    res = str_new0(rb_obj_class(str), 0, len, termlen);
     p = RSTRING_PTR(res);
     if (flen <= 1) {
        memset(p, *f, llen);
@@ -8546,7 +8553,7 @@ rb_str_justify(int argc, VALUE *argv, VA https://github.com/ruby/ruby/blob/trunk/string.c#L8553
            p += rlen2;
 	}
     }
-    TERM_FILL(p, rb_enc_mbminlen(enc));
+    TERM_FILL(p, termlen);
     STR_SET_LEN(res, p-RSTRING_PTR(res));
     OBJ_INFECT_RAW(res, str);
     if (!NIL_P(pad)) OBJ_INFECT_RAW(res, pad);
Index: ChangeLog
===================================================================
--- ChangeLog	(revision 55546)
+++ ChangeLog	(revision 55547)
@@ -1,3 +1,32 @@ https://github.com/ruby/ruby/blob/trunk/ChangeLog#L1
+Thu Jun 30 19:15:13 2016  Naohisa Goto  <ngotogenome@g...>
+
+	* string.c: Fix memory corruptions when using UTF-16/32 strings.
+	  [Bug #12536] [ruby-dev:49699]
+
+	* string.c (TERM_LEN_MAX): Macro for the longest TERM_FILL length,
+	  the same as largest value of rb_enc_mbminlen(enc) among encodings.
+
+	* string.c (str_new, rb_str_buf_new, str_shared_replace): Allocate
+	  +TERM_LEN_MAX bytes instead of +1. This change may increase memory
+	  usage.
+
+	* string.c (rb_str_new_with_class): Use TERM_LEN of the "obj".
+
+	* string.c (rb_str_plus, rb_str_justify): Use str_new0 which is aware
+	  of termlen.
+
+	* string.c (str_shared_replace): Copy +termlen bytes instead of +1.
+
+	* string.c (rb_str_times): termlen should not be included in capa.
+
+	* string.c (RESIZE_CAPA_TERM): When using RSTRING_EMBED_LEN_MAX,
+	  termlen should be counted with it because embedded strings are
+	  also processed by TERM_FILL.
+
+	* string.c (rb_str_capacity, str_shared_replace, str_buf_cat): ditto.
+
+	* string.c (rb_str_drop_bytes, rb_str_setbyte, str_byte_substr): ditto.
+
 Wed Jun 29 22:24:37 2016  SHIBATA Hiroshi  <hsbt@r...>
 
 	* ext/psych/lib/psych_jars.rb: removed needless file required to JRuby.

--
ML: ruby-changes@q...
Info: http://www.atdot.net/~ko1/quickml/

[前][次][番号順一覧][スレッド一覧]