ruby-changes:43473
From: ngoto <ko1@a...>
Date: Thu, 30 Jun 2016 19:20:28 +0900 (JST)
Subject: [ruby-changes:43473] ngoto:r55547 (trunk): * string.c: Fix memory corruptions when using UTF-16/32 strings.
ngoto 2016-06-30 19:20:23 +0900 (Thu, 30 Jun 2016) New Revision: 55547 https://svn.ruby-lang.org/cgi-bin/viewvc.cgi?view=revision&revision=55547 Log: * string.c: Fix memory corruptions when using UTF-16/32 strings. [Bug #12536] [ruby-dev:49699] * string.c (TERM_LEN_MAX): Macro for the longest TERM_FILL length, the same as largest value of rb_enc_mbminlen(enc) among encodings. * string.c (str_new, rb_str_buf_new, str_shared_replace): Allocate +TERM_LEN_MAX bytes instead of +1. This change may increase memory usage. * string.c (rb_str_new_with_class): Use TERM_LEN of the "obj". * string.c (rb_str_plus, rb_str_justify): Use str_new0 which is aware of termlen. * string.c (str_shared_replace): Copy +termlen bytes instead of +1. * string.c (rb_str_times): termlen should not be included in capa. * string.c (RESIZE_CAPA_TERM): When using RSTRING_EMBED_LEN_MAX, termlen should be counted with it because embedded strings are also processed by TERM_FILL. * string.c (rb_str_capacity, str_shared_replace, str_buf_cat): ditto. * string.c (rb_str_drop_bytes, rb_str_setbyte, str_byte_substr): ditto. Modified files: trunk/ChangeLog trunk/string.c Index: string.c =================================================================== --- string.c (revision 55546) +++ string.c (revision 55547) @@ -120,6 +120,7 @@ VALUE rb_cSymbol; https://github.com/ruby/ruby/blob/trunk/string.c#L120 if (UNLIKELY(term_fill_len > 1))\ memset(term_fill_ptr, 0, term_fill_len);\ } while (0) +#define TERM_LEN_MAX 4 /* UTF-32LE, UTF-32BE */ #define RESIZE_CAPA(str,capacity) do {\ const int termlen = TERM_LEN(str);\ @@ -127,7 +128,7 @@ VALUE rb_cSymbol; https://github.com/ruby/ruby/blob/trunk/string.c#L128 } while (0) #define RESIZE_CAPA_TERM(str,capacity,termlen) do {\ if (STR_EMBED_P(str)) {\ - if ((capacity) > RSTRING_EMBED_LEN_MAX) {\ + if ((capacity) > RSTRING_EMBED_LEN_MAX + 1 - (termlen)) {\ char *const tmp = ALLOC_N(char, (capacity)+termlen);\ const long tlen = RSTRING_LEN(str);\ memcpy(tmp, RSTRING_PTR(str), tlen);\ @@ -650,7 +651,7 @@ size_t https://github.com/ruby/ruby/blob/trunk/string.c#L651 rb_str_capacity(VALUE str) { if (STR_EMBED_P(str)) { - return RSTRING_EMBED_LEN_MAX; + return RSTRING_EMBED_LEN_MAX + 1 - TERM_LEN(str); } else if (FL_TEST(str, STR_SHARED|STR_NOFREE)) { return RSTRING(str)->as.heap.len; @@ -694,7 +695,7 @@ str_new0(VALUE klass, const char *ptr, l https://github.com/ruby/ruby/blob/trunk/string.c#L695 RUBY_DTRACE_CREATE_HOOK(STRING, len); str = str_alloc(klass); - if (len > RSTRING_EMBED_LEN_MAX) { + if (len > RSTRING_EMBED_LEN_MAX + 1 - termlen) { RSTRING(str)->as.heap.aux.capa = len; RSTRING(str)->as.heap.ptr = ALLOC_N(char, len + termlen); STR_SET_NOEMBED(str); @@ -713,7 +714,7 @@ str_new0(VALUE klass, const char *ptr, l https://github.com/ruby/ruby/blob/trunk/string.c#L714 static VALUE str_new(VALUE klass, const char *ptr, long len) { - return str_new0(klass, ptr, len, 1); + return str_new0(klass, ptr, len, TERM_LEN_MAX); } VALUE @@ -1160,7 +1161,7 @@ str_new_frozen(VALUE klass, VALUE orig) https://github.com/ruby/ruby/blob/trunk/string.c#L1161 VALUE rb_str_new_with_class(VALUE obj, const char *ptr, long len) { - return str_new(rb_obj_class(obj), ptr, len); + return str_new0(rb_obj_class(obj), ptr, len, TERM_LEN(obj)); } static VALUE @@ -1184,7 +1185,7 @@ rb_str_buf_new(long capa) https://github.com/ruby/ruby/blob/trunk/string.c#L1185 } FL_SET(str, STR_NOEMBED); RSTRING(str)->as.heap.aux.capa = capa; - RSTRING(str)->as.heap.ptr = ALLOC_N(char, capa+1); + RSTRING(str)->as.heap.ptr = ALLOC_N(char, capa + TERM_LEN_MAX); RSTRING(str)->as.heap.ptr[0] = '\0'; return str; @@ -1252,16 +1253,18 @@ str_shared_replace(VALUE str, VALUE str2 https://github.com/ruby/ruby/blob/trunk/string.c#L1253 { rb_encoding *enc; int cr; + int termlen; ASSUME(str2 != str); enc = STR_ENC_GET(str2); cr = ENC_CODERANGE(str2); str_discard(str); OBJ_INFECT(str, str2); + termlen = rb_enc_mbminlen(enc); - if (RSTRING_LEN(str2) <= RSTRING_EMBED_LEN_MAX) { + if (RSTRING_LEN(str2) <= RSTRING_EMBED_LEN_MAX + 1 - termlen) { STR_SET_EMBED(str); - memcpy(RSTRING_PTR(str), RSTRING_PTR(str2), RSTRING_LEN(str2)+1); + memcpy(RSTRING_PTR(str), RSTRING_PTR(str2), RSTRING_LEN(str2)+termlen); STR_SET_EMBED_LEN(str, RSTRING_LEN(str2)); rb_enc_associate(str, enc); ENC_CODERANGE_SET(str, cr); @@ -1730,16 +1733,18 @@ rb_str_plus(VALUE str1, VALUE str2) https://github.com/ruby/ruby/blob/trunk/string.c#L1733 rb_encoding *enc; char *ptr1, *ptr2, *ptr3; long len1, len2; + int termlen; StringValue(str2); enc = rb_enc_check_str(str1, str2); RSTRING_GETMEM(str1, ptr1, len1); RSTRING_GETMEM(str2, ptr2, len2); - str3 = rb_str_new(0, len1+len2); + termlen = rb_enc_mbminlen(enc); + str3 = str_new0(rb_cString, 0, len1+len2, termlen); ptr3 = RSTRING_PTR(str3); memcpy(ptr3, ptr1, len1); memcpy(ptr3+len1, ptr2, len2); - TERM_FILL(&ptr3[len1+len2], rb_enc_mbminlen(enc)); + TERM_FILL(&ptr3[len1+len2], termlen); FL_SET_RAW(str3, OBJ_TAINTED_RAW(str1) | OBJ_TAINTED_RAW(str2)); ENCODING_CODERANGE_SET(str3, rb_enc_to_index(enc), @@ -1787,7 +1792,7 @@ rb_str_times(VALUE str, VALUE times) https://github.com/ruby/ruby/blob/trunk/string.c#L1792 len *= RSTRING_LEN(str); termlen = TERM_LEN(str); - str2 = rb_str_new_with_class(str, 0, (len + termlen - 1)); + str2 = str_new0(rb_obj_class(str), 0, len, termlen); ptr2 = RSTRING_PTR(str2); if (len) { n = RSTRING_LEN(str); @@ -2518,7 +2523,7 @@ str_buf_cat(VALUE str, const char *ptr, https://github.com/ruby/ruby/blob/trunk/string.c#L2523 rb_str_modify(str); if (len == 0) return 0; if (STR_EMBED_P(str)) { - capa = RSTRING_EMBED_LEN_MAX; + capa = RSTRING_EMBED_LEN_MAX + termlen - 1; sptr = RSTRING(str)->as.ary; olen = RSTRING_EMBED_LEN(str); } @@ -4141,7 +4146,7 @@ rb_str_drop_bytes(VALUE str, long len) https://github.com/ruby/ruby/blob/trunk/string.c#L4146 str_modifiable(str); if (len > olen) len = olen; nlen = olen - len; - if (nlen <= RSTRING_EMBED_LEN_MAX) { + if (nlen <= RSTRING_EMBED_LEN_MAX + 1 - TERM_LEN(str)) { char *oldptr = ptr; int fl = (int)(RBASIC(str)->flags & (STR_NOEMBED|STR_SHARED|STR_NOFREE)); STR_SET_EMBED(str); @@ -4992,7 +4997,7 @@ rb_str_setbyte(VALUE str, VALUE index, V https://github.com/ruby/ruby/blob/trunk/string.c#L4997 enc = STR_ENC_GET(str); head = RSTRING_PTR(str); ptr = &head[pos]; - if (len > RSTRING_EMBED_LEN_MAX) { + if (len > RSTRING_EMBED_LEN_MAX + 1 - rb_enc_mbminlen(enc)) { cr = ENC_CODERANGE(str); switch (cr) { case ENC_CODERANGE_7BIT: @@ -5046,7 +5051,7 @@ str_byte_substr(VALUE str, long beg, lon https://github.com/ruby/ruby/blob/trunk/string.c#L5051 else p = s + beg; - if (len > RSTRING_EMBED_LEN_MAX && SHARABLE_SUBSTRING_P(beg, len, n)) { + if (len > RSTRING_EMBED_LEN_MAX + 1 - TERM_LEN(str) && SHARABLE_SUBSTRING_P(beg, len, n)) { str2 = rb_str_new_frozen(str); str2 = str_new_shared(rb_obj_class(str2), str2); RSTRING(str2)->as.heap.ptr += beg; @@ -8480,9 +8485,11 @@ rb_str_justify(int argc, VALUE *argv, VA https://github.com/ruby/ruby/blob/trunk/string.c#L8485 long n, size, llen, rlen, llen2 = 0, rlen2 = 0; VALUE pad; int singlebyte = 1, cr; + int termlen; rb_scan_args(argc, argv, "11", &w, &pad); enc = STR_ENC_GET(str); + termlen = rb_enc_mbminlen(enc); width = NUM2LONG(w); if (argc == 2) { StringValue(pad); @@ -8512,7 +8519,7 @@ rb_str_justify(int argc, VALUE *argv, VA https://github.com/ruby/ruby/blob/trunk/string.c#L8519 rb_raise(rb_eArgError, "argument too big"); } len += size; - res = rb_str_new_with_class(str, 0, len); + res = str_new0(rb_obj_class(str), 0, len, termlen); p = RSTRING_PTR(res); if (flen <= 1) { memset(p, *f, llen); @@ -8546,7 +8553,7 @@ rb_str_justify(int argc, VALUE *argv, VA https://github.com/ruby/ruby/blob/trunk/string.c#L8553 p += rlen2; } } - TERM_FILL(p, rb_enc_mbminlen(enc)); + TERM_FILL(p, termlen); STR_SET_LEN(res, p-RSTRING_PTR(res)); OBJ_INFECT_RAW(res, str); if (!NIL_P(pad)) OBJ_INFECT_RAW(res, pad); Index: ChangeLog =================================================================== --- ChangeLog (revision 55546) +++ ChangeLog (revision 55547) @@ -1,3 +1,32 @@ https://github.com/ruby/ruby/blob/trunk/ChangeLog#L1 +Thu Jun 30 19:15:13 2016 Naohisa Goto <ngotogenome@g...> + + * string.c: Fix memory corruptions when using UTF-16/32 strings. + [Bug #12536] [ruby-dev:49699] + + * string.c (TERM_LEN_MAX): Macro for the longest TERM_FILL length, + the same as largest value of rb_enc_mbminlen(enc) among encodings. + + * string.c (str_new, rb_str_buf_new, str_shared_replace): Allocate + +TERM_LEN_MAX bytes instead of +1. This change may increase memory + usage. + + * string.c (rb_str_new_with_class): Use TERM_LEN of the "obj". + + * string.c (rb_str_plus, rb_str_justify): Use str_new0 which is aware + of termlen. + + * string.c (str_shared_replace): Copy +termlen bytes instead of +1. + + * string.c (rb_str_times): termlen should not be included in capa. + + * string.c (RESIZE_CAPA_TERM): When using RSTRING_EMBED_LEN_MAX, + termlen should be counted with it because embedded strings are + also processed by TERM_FILL. + + * string.c (rb_str_capacity, str_shared_replace, str_buf_cat): ditto. + + * string.c (rb_str_drop_bytes, rb_str_setbyte, str_byte_substr): ditto. + Wed Jun 29 22:24:37 2016 SHIBATA Hiroshi <hsbt@r...> * ext/psych/lib/psych_jars.rb: removed needless file required to JRuby. -- ML: ruby-changes@q... Info: http://www.atdot.net/~ko1/quickml/