[前][次][番号順一覧][スレッド一覧]

ruby-changes:29868

From: nobu <ko1@a...>
Date: Thu, 11 Jul 2013 20:18:17 +0900 (JST)
Subject: [ruby-changes:29868] nobu:r41920 (trunk): string.c: multi-byte terminator

nobu	2013-07-11 20:18:05 +0900 (Thu, 11 Jul 2013)

  New Revision: 41920

  http://svn.ruby-lang.org/cgi-bin/viewvc.cgi?view=rev&revision=41920

  Log:
    string.c: multi-byte terminator
    
    * string.c (rb_enc_str_new, rb_str_set_len, rb_str_resize): fill
      minimum length of the encoding as the terminator.
    * string.c (str_buf_cat, rb_str_buf_append, rb_str_splice_0): ditto.
    * string.c (str_make_independent_expand, rb_str_modify_expand): make
      the capacity enough for multi-byte terminator.

  Modified files:
    trunk/ChangeLog
    trunk/string.c

Index: ChangeLog
===================================================================
--- ChangeLog	(revision 41919)
+++ ChangeLog	(revision 41920)
@@ -1,4 +1,12 @@ https://github.com/ruby/ruby/blob/trunk/ChangeLog#L1
-Thu Jul 11 20:17:57 2013  Nobuyoshi Nakada  <nobu@r...>
+Thu Jul 11 20:18:03 2013  Nobuyoshi Nakada  <nobu@r...>
+
+	* string.c (rb_enc_str_new, rb_str_set_len, rb_str_resize): fill
+	  minimum length of the encoding as the terminator.
+
+	* string.c (str_buf_cat, rb_str_buf_append, rb_str_splice_0): ditto.
+
+	* string.c (str_make_independent_expand, rb_str_modify_expand): make
+	  the capacity enough for multi-byte terminator.
 
 	* string.c (rb_string_value_cstr): fill minimum length of the encoding
 	  as the terminator.
Index: string.c
===================================================================
--- string.c	(revision 41919)
+++ string.c	(revision 41920)
@@ -101,18 +101,20 @@ VALUE rb_cSymbol; https://github.com/ruby/ruby/blob/trunk/string.c#L101
 } while (0)
 
 #define RESIZE_CAPA(str,capacity) do {\
+    const int termlen = TERM_LEN(str);\
     if (STR_EMBED_P(str)) {\
 	if ((capacity) > RSTRING_EMBED_LEN_MAX) {\
-	    char *tmp = ALLOC_N(char, (capacity)+1);\
-	    memcpy(tmp, RSTRING_PTR(str), RSTRING_LEN(str));\
+	    char *const tmp = ALLOC_N(char, (capacity)+termlen);\
+	    const long tlen = RSTRING_LEN(str);\
+	    memcpy(tmp, RSTRING_PTR(str), tlen);\
 	    RSTRING(str)->as.heap.ptr = tmp;\
-	    RSTRING(str)->as.heap.len = RSTRING_LEN(str);\
+	    RSTRING(str)->as.heap.len = tlen;\
             STR_SET_NOEMBED(str);\
 	    RSTRING(str)->as.heap.aux.capa = (capacity);\
 	}\
     }\
     else {\
-	REALLOC_N(RSTRING(str)->as.heap.ptr, char, (capacity)+1);\
+	REALLOC_N(RSTRING(str)->as.heap.ptr, char, (capacity)+termlen);\
 	if (!STR_NOCAPA_P(str))\
 	    RSTRING(str)->as.heap.aux.capa = (capacity);\
     }\
@@ -398,7 +400,7 @@ empty_str_alloc(VALUE klass) https://github.com/ruby/ruby/blob/trunk/string.c#L400
 }
 
 static VALUE
-str_new(VALUE klass, const char *ptr, long len)
+str_new0(VALUE klass, const char *ptr, long len, int termlen)
 {
     VALUE str;
 
@@ -413,7 +415,7 @@ str_new(VALUE klass, const char *ptr, lo https://github.com/ruby/ruby/blob/trunk/string.c#L415
     str = str_alloc(klass);
     if (len > RSTRING_EMBED_LEN_MAX) {
 	RSTRING(str)->as.heap.aux.capa = len;
-	RSTRING(str)->as.heap.ptr = ALLOC_N(char,len+1);
+	RSTRING(str)->as.heap.ptr = ALLOC_N(char, len + termlen);
 	STR_SET_NOEMBED(str);
     }
     else if (len == 0) {
@@ -423,10 +425,16 @@ str_new(VALUE klass, const char *ptr, lo https://github.com/ruby/ruby/blob/trunk/string.c#L425
 	memcpy(RSTRING_PTR(str), ptr, len);
     }
     STR_SET_LEN(str, len);
-    RSTRING_PTR(str)[len] = '\0';
+    TERM_FILL(RSTRING_PTR(str) + len, termlen);
     return str;
 }
 
+static VALUE
+str_new(VALUE klass, const char *ptr, long len)
+{
+    return str_new0(klass, ptr, len, 1);
+}
+
 VALUE
 rb_str_new(const char *ptr, long len)
 {
@@ -444,7 +452,11 @@ rb_usascii_str_new(const char *ptr, long https://github.com/ruby/ruby/blob/trunk/string.c#L452
 VALUE
 rb_enc_str_new(const char *ptr, long len, rb_encoding *enc)
 {
-    VALUE str = rb_str_new(ptr, len);
+    VALUE str;
+
+    if (!enc) return rb_str_new(ptr, len);
+
+    str = str_new0(rb_cString, ptr, len, rb_enc_mbminlen(enc));
     rb_enc_associate(str, enc);
     return str;
 }
@@ -1330,16 +1342,17 @@ str_make_independent_expand(VALUE str, l https://github.com/ruby/ruby/blob/trunk/string.c#L1342
 {
     char *ptr;
     long len = RSTRING_LEN(str);
+    const int termlen = TERM_LEN(str);
     long capa = len + expand;
 
     if (len > capa) len = capa;
-    ptr = ALLOC_N(char, capa + 1);
+    ptr = ALLOC_N(char, capa + termlen);
     if (RSTRING_PTR(str)) {
 	memcpy(ptr, RSTRING_PTR(str), len);
     }
     STR_SET_NOEMBED(str);
     STR_UNSET_NOCAPA(str);
-    ptr[len] = 0;
+    TERM_FILL(ptr + len, termlen);
     RSTRING(str)->as.heap.ptr = ptr;
     RSTRING(str)->as.heap.len = len;
     RSTRING(str)->as.heap.aux.capa = capa;
@@ -1367,11 +1380,12 @@ rb_str_modify_expand(VALUE str, long exp https://github.com/ruby/ruby/blob/trunk/string.c#L1380
     else if (expand > 0) {
 	long len = RSTRING_LEN(str);
 	long capa = len + expand;
+	int termlen = TERM_LEN(str);
 	if (!STR_EMBED_P(str)) {
-	    REALLOC_N(RSTRING(str)->as.heap.ptr, char, capa+1);
+	    REALLOC_N(RSTRING(str)->as.heap.ptr, char, capa + termlen);
 	    RSTRING(str)->as.heap.aux.capa = capa;
 	}
-	else if (capa > RSTRING_EMBED_LEN_MAX) {
+	else if (capa + termlen > RSTRING_EMBED_LEN_MAX + 1) {
 	    str_make_independent_expand(str, expand);
 	}
     }
@@ -1851,16 +1865,17 @@ void https://github.com/ruby/ruby/blob/trunk/string.c#L1865
 rb_str_set_len(VALUE str, long len)
 {
     long capa;
+    const int termlen = TERM_LEN(str);
 
     str_modifiable(str);
     if (STR_SHARED_P(str)) {
 	rb_raise(rb_eRuntimeError, "can't set length of shared string");
     }
-    if (len > (capa = (long)rb_str_capacity(str))) {
+    if (len + termlen - 1 > (capa = (long)rb_str_capacity(str))) {
 	rb_bug("probable buffer overflow: %ld for %ld", len, capa);
     }
     STR_SET_LEN(str, len);
-    RSTRING_PTR(str)[len] = '\0';
+    TERM_FILL(&RSTRING_PTR(str)[len], termlen);
 }
 
 VALUE
@@ -1877,21 +1892,22 @@ rb_str_resize(VALUE str, long len) https://github.com/ruby/ruby/blob/trunk/string.c#L1892
     ENC_CODERANGE_CLEAR(str);
     slen = RSTRING_LEN(str);
     if (len != slen) {
+	const int termlen = TERM_LEN(str);
 	if (STR_EMBED_P(str)) {
-	    if (len <= RSTRING_EMBED_LEN_MAX) {
+	    if (len + termlen <= RSTRING_EMBED_LEN_MAX + 1) {
 		STR_SET_EMBED_LEN(str, len);
-		RSTRING(str)->as.ary[len] = '\0';
+		TERM_FILL(RSTRING(str)->as.ary + len, termlen);
 		return str;
 	    }
 	    str_make_independent_expand(str, len - slen);
 	    STR_SET_NOEMBED(str);
 	}
-	else if (len <= RSTRING_EMBED_LEN_MAX) {
+	else if (len + termlen <= RSTRING_EMBED_LEN_MAX + 1) {
 	    char *ptr = RSTRING(str)->as.heap.ptr;
 	    STR_SET_EMBED(str);
 	    if (slen > len) slen = len;
 	    if (slen > 0) MEMCPY(RSTRING(str)->as.ary, ptr, char, slen);
-	    RSTRING(str)->as.ary[len] = '\0';
+	    TERM_FILL(RSTRING(str)->as.ary + len, termlen);
 	    STR_SET_EMBED_LEN(str, len);
 	    if (independent) xfree(ptr);
 	    return str;
@@ -1900,13 +1916,13 @@ rb_str_resize(VALUE str, long len) https://github.com/ruby/ruby/blob/trunk/string.c#L1916
 	    str_make_independent_expand(str, len - slen);
 	}
 	else if (slen < len || slen - len > 1024) {
-	    REALLOC_N(RSTRING(str)->as.heap.ptr, char, len+1);
+	    REALLOC_N(RSTRING(str)->as.heap.ptr, char, len + termlen);
 	}
 	if (!STR_NOCAPA_P(str)) {
 	    RSTRING(str)->as.heap.aux.capa = len;
 	}
 	RSTRING(str)->as.heap.len = len;
-	RSTRING(str)->as.heap.ptr[len] = '\0';	/* sentinel */
+	TERM_FILL(RSTRING(str)->as.heap.ptr + len, termlen); /* sentinel */
     }
     return str;
 }
@@ -1915,6 +1931,7 @@ static VALUE https://github.com/ruby/ruby/blob/trunk/string.c#L1931
 str_buf_cat(VALUE str, const char *ptr, long len)
 {
     long capa, total, off = -1;
+    const int termlen = TERM_LEN(str);
 
     if (ptr >= RSTRING_PTR(str) && ptr <= RSTRING_END(str)) {
         off = ptr - RSTRING_PTR(str);
@@ -1937,11 +1954,11 @@ str_buf_cat(VALUE str, const char *ptr, https://github.com/ruby/ruby/blob/trunk/string.c#L1954
     total = RSTRING_LEN(str)+len;
     if (capa <= total) {
 	while (total > capa) {
-	    if (capa + 1 >= LONG_MAX / 2) {
+	    if (capa + termlen >= LONG_MAX / 2) {
 		capa = (total + 4095) / 4096;
 		break;
 	    }
-	    capa = (capa + 1) * 2;
+	    capa = (capa + termlen) * 2;
 	}
 	RESIZE_CAPA(str, capa);
     }
@@ -1985,7 +2002,7 @@ rb_str_cat(VALUE str, const char *ptr, l https://github.com/ruby/ruby/blob/trunk/string.c#L2002
 	p = RSTRING(str)->as.heap.ptr;
 	memcpy(p + RSTRING(str)->as.heap.len, ptr, len);
 	len = RSTRING(str)->as.heap.len += len;
-	p[len] = '\0'; /* sentinel */
+	TERM_FILL(p, TERM_LEN(str)); /* sentinel */
 	return str;
     }
 
@@ -2140,14 +2157,14 @@ rb_str_append(VALUE str, VALUE str2) https://github.com/ruby/ruby/blob/trunk/string.c#L2157
 
     StringValue(str2);
     if ((len2 = RSTRING_LEN(str2)) > 0 && STR_ASSOC_P(str)) {
-        long len = RSTRING_LEN(str) + len2;
+        long len1 = RSTRING(str)->as.heap.len, len = len1 + len2;
         enc = rb_enc_check(str, str2);
         cr = ENC_CODERANGE(str);
         if ((cr2 = ENC_CODERANGE(str2)) > cr || RSTRING_LEN(str) == 0)
 	    cr = cr2;
         rb_str_modify_expand(str, len2);
-        memcpy(RSTRING(str)->as.heap.ptr + RSTRING(str)->as.heap.len,
-               RSTRING_PTR(str2), len2+1);
+        memcpy(RSTRING(str)->as.heap.ptr + len1, RSTRING_PTR(str2), len2);
+        TERM_FILL(RSTRING(str)->as.heap.ptr + len, rb_enc_mbminlen(enc));
         RSTRING(str)->as.heap.len = len;
         rb_enc_associate(str, enc);
         ENC_CODERANGE_SET(str, cr);
@@ -3404,7 +3421,7 @@ rb_str_splice_0(VALUE str, long beg, lon https://github.com/ruby/ruby/blob/trunk/string.c#L3421
     rb_str_modify(str);
     if (len < RSTRING_LEN(val)) {
 	/* expand string */
-	RESIZE_CAPA(str, RSTRING_LEN(str) + RSTRING_LEN(val) - len + 1);
+	RESIZE_CAPA(str, RSTRING_LEN(str) + RSTRING_LEN(val) - len + TERM_LEN(str));
     }
 
     if (RSTRING_LEN(val) != len) {

--
ML: ruby-changes@q...
Info: http://www.atdot.net/~ko1/quickml/

[前][次][番号順一覧][スレッド一覧]