[前][次][番号順一覧][スレッド一覧]

ruby-changes:53684

From: shyouhei <ko1@a...>
Date: Wed, 21 Nov 2018 17:51:43 +0900 (JST)
Subject: [ruby-changes:53684] shyouhei:r65900 (trunk): char is not unsigned

shyouhei	2018-11-21 17:51:39 +0900 (Wed, 21 Nov 2018)

  New Revision: 65900

  https://svn.ruby-lang.org/cgi-bin/viewvc.cgi?view=revision&revision=65900

  Log:
    char is not unsigned
    
    It seems that decades ago, ruby was written under assumption that
    char is unsigned.  Which is of course a false assumption.  We
    need to explicitly store a numeric value into an unsigned char
    variable to tell we expect 0..255 value.

  Modified files:
    trunk/re.c
    trunk/string.c
Index: re.c
===================================================================
--- re.c	(revision 65899)
+++ re.c	(revision 65900)
@@ -2539,7 +2539,7 @@ unescape_nonascii(const char *p, const c https://github.com/ruby/ruby/blob/trunk/re.c#L2539
         VALUE buf, rb_encoding **encp, int *has_property,
         onig_errmsg_buffer err)
 {
-    char c;
+    unsigned char c;
     char smallbuf[2];
 
     while (p < end) {
@@ -2602,8 +2602,9 @@ unescape_nonascii(const char *p, const c https://github.com/ruby/ruby/blob/trunk/re.c#L2602
                 p = p-2;
 		if (enc == rb_usascii_encoding()) {
 		    const char *pbeg = p;
-		    c = read_escaped_byte(&p, end, err);
-		    if (c == (char)-1) return -1;
+                    int byte = read_escaped_byte(&p, end, err);
+                    if (byte == -1) return -1;
+                    c = byte;
 		    rb_str_buf_cat(buf, pbeg, p-pbeg);
 		}
 		else {
@@ -2652,7 +2653,7 @@ escape_asis: https://github.com/ruby/ruby/blob/trunk/re.c#L2653
             break;
 
           default:
-            rb_str_buf_cat(buf, &c, 1);
+            rb_str_buf_cat(buf, (char *)&c, 1);
             break;
         }
     }
Index: string.c
===================================================================
--- string.c	(revision 65899)
+++ string.c	(revision 65900)
@@ -6190,7 +6190,7 @@ undump_after_backslash(VALUE undumped, c https://github.com/ruby/ruby/blob/trunk/string.c#L6190
     unsigned int c;
     int codelen;
     size_t hexlen;
-    char buf[6];
+    unsigned char buf[6];
     static rb_encoding *enc_utf8 = NULL;
 
     switch (*s) {
@@ -6208,8 +6208,8 @@ undump_after_backslash(VALUE undumped, c https://github.com/ruby/ruby/blob/trunk/string.c#L6208
       case 'b':
       case 'a':
       case 'e':
-	*buf = (char)unescape_ascii(*s);
-	rb_str_cat(undumped, buf, 1);
+        *buf = unescape_ascii(*s);
+        rb_str_cat(undumped, (char *)buf, 1);
 	s++;
 	break;
       case 'u':
@@ -6249,8 +6249,8 @@ undump_after_backslash(VALUE undumped, c https://github.com/ruby/ruby/blob/trunk/string.c#L6249
 		if (0xd800 <= c && c <= 0xdfff) {
 		    rb_raise(rb_eRuntimeError, "invalid Unicode codepoint");
 		}
-		codelen = rb_enc_mbcput(c, buf, *penc);
-		rb_str_cat(undumped, buf, codelen);
+                codelen = rb_enc_mbcput(c, (char *)buf, *penc);
+                rb_str_cat(undumped, (char *)buf, codelen);
 		s += hexlen;
 	    }
 	}
@@ -6262,8 +6262,8 @@ undump_after_backslash(VALUE undumped, c https://github.com/ruby/ruby/blob/trunk/string.c#L6262
 	    if (0xd800 <= c && c <= 0xdfff) {
 		rb_raise(rb_eRuntimeError, "invalid Unicode codepoint");
 	    }
-	    codelen = rb_enc_mbcput(c, buf, *penc);
-	    rb_str_cat(undumped, buf, codelen);
+            codelen = rb_enc_mbcput(c, (char *)buf, *penc);
+            rb_str_cat(undumped, (char *)buf, codelen);
 	    s += hexlen;
 	}
 	break;
@@ -6279,7 +6279,7 @@ undump_after_backslash(VALUE undumped, c https://github.com/ruby/ruby/blob/trunk/string.c#L6279
 	if (hexlen != 2) {
 	    rb_raise(rb_eRuntimeError, "invalid hex escape");
 	}
-	rb_str_cat(undumped, buf, 1);
+        rb_str_cat(undumped, (char *)buf, 1);
 	s += hexlen;
 	break;
       default:
@@ -6915,7 +6915,7 @@ tr_trans(VALUE str, VALUE src, VALUE rep https://github.com/ruby/ruby/blob/trunk/string.c#L6915
     int cflag = 0;
     unsigned int c, c0, last = 0;
     int modify = 0, i, l;
-    char *s, *send;
+    unsigned char *s, *send;
     VALUE hash = 0;
     int singlebyte = single_byte_optimizable(str);
     int termlen;
@@ -6999,18 +6999,18 @@ tr_trans(VALUE str, VALUE src, VALUE rep https://github.com/ruby/ruby/blob/trunk/string.c#L6999
     if (cr == ENC_CODERANGE_VALID && rb_enc_asciicompat(e1))
 	cr = ENC_CODERANGE_7BIT;
     str_modify_keep_cr(str);
-    s = RSTRING_PTR(str); send = RSTRING_END(str);
+    s = (unsigned char *)RSTRING_PTR(str); send = (unsigned char *)RSTRING_END(str);
     termlen = rb_enc_mbminlen(enc);
     if (sflag) {
 	int clen, tlen;
 	long offset, max = RSTRING_LEN(str);
 	unsigned int save = -1;
-	char *buf = ALLOC_N(char, max + termlen), *t = buf;
+        unsigned char *buf = ALLOC_N(unsigned char, max + termlen), *t = buf;
 
 	while (s < send) {
 	    int may_modify = 0;
 
-	    c0 = c = rb_enc_codepoint_len(s, send, &clen, e1);
+            c0 = c = rb_enc_codepoint_len((char *)s, (char *)send, &clen, e1);
 	    tlen = enc == e1 ? clen : rb_enc_codelen(c, enc);
 
 	    s += clen;
@@ -7046,7 +7046,7 @@ tr_trans(VALUE str, VALUE src, VALUE rep https://github.com/ruby/ruby/blob/trunk/string.c#L7046
 	    if ((offset = t - buf) + tlen > max) {
 		size_t MAYBE_UNUSED(old) = max + termlen;
 		max = offset + tlen + (send - s);
-		SIZED_REALLOC_N(buf, char, max + termlen, old);
+                SIZED_REALLOC_N(buf, unsigned char, max + termlen, old);
 		t = buf + offset;
 	    }
 	    rb_enc_mbcput(c, t, enc);
@@ -7059,8 +7059,8 @@ tr_trans(VALUE str, VALUE src, VALUE rep https://github.com/ruby/ruby/blob/trunk/string.c#L7059
 	if (!STR_EMBED_P(str)) {
 	    ruby_sized_xfree(STR_HEAP_PTR(str), STR_HEAP_SIZE(str));
 	}
-	TERM_FILL(t, termlen);
-	RSTRING(str)->as.heap.ptr = buf;
+        TERM_FILL((char *)t, termlen);
+        RSTRING(str)->as.heap.ptr = (char *)buf;
 	RSTRING(str)->as.heap.len = t - buf;
 	STR_SET_NOEMBED(str);
 	RSTRING(str)->as.heap.aux.capa = max;
@@ -7086,11 +7086,11 @@ tr_trans(VALUE str, VALUE src, VALUE rep https://github.com/ruby/ruby/blob/trunk/string.c#L7086
     else {
 	int clen, tlen;
 	long offset, max = (long)((send - s) * 1.2);
-	char *buf = ALLOC_N(char, max + termlen), *t = buf;
+        unsigned char *buf = ALLOC_N(unsigned char, max + termlen), *t = buf;
 
 	while (s < send) {
 	    int may_modify = 0;
-	    c0 = c = rb_enc_codepoint_len(s, send, &clen, e1);
+            c0 = c = rb_enc_codepoint_len((char *)s, (char *)send, &clen, e1);
 	    tlen = enc == e1 ? clen : rb_enc_codelen(c, enc);
 
 	    if (c < 256) {
@@ -7119,7 +7119,7 @@ tr_trans(VALUE str, VALUE src, VALUE rep https://github.com/ruby/ruby/blob/trunk/string.c#L7119
 	    if ((offset = t - buf) + tlen > max) {
 		size_t MAYBE_UNUSED(old) = max + termlen;
 		max = offset + tlen + (long)((send - s) * 1.2);
-		SIZED_REALLOC_N(buf, char, max + termlen, old);
+                SIZED_REALLOC_N(buf, unsigned char, max + termlen, old);
 		t = buf + offset;
 	    }
 	    if (s != t) {
@@ -7135,8 +7135,8 @@ tr_trans(VALUE str, VALUE src, VALUE rep https://github.com/ruby/ruby/blob/trunk/string.c#L7135
 	if (!STR_EMBED_P(str)) {
 	    ruby_sized_xfree(STR_HEAP_PTR(str), STR_HEAP_SIZE(str));
 	}
-	TERM_FILL(t, termlen);
-	RSTRING(str)->as.heap.ptr = buf;
+        TERM_FILL((char *)t, termlen);
+        RSTRING(str)->as.heap.ptr = (char *)buf;
 	RSTRING(str)->as.heap.len = t - buf;
 	STR_SET_NOEMBED(str);
 	RSTRING(str)->as.heap.aux.capa = max;
@@ -7405,7 +7405,7 @@ rb_str_squeeze_bang(int argc, VALUE *arg https://github.com/ruby/ruby/blob/trunk/string.c#L7405
     char squeez[TR_TABLE_SIZE];
     rb_encoding *enc = 0;
     VALUE del = 0, nodel = 0;
-    char *s, *send, *t;
+    unsigned char *s, *send, *t;
     int i, modify = 0;
     int ascompat, singlebyte = single_byte_optimizable(str);
     unsigned int save;
@@ -7426,15 +7426,15 @@ rb_str_squeeze_bang(int argc, VALUE *arg https://github.com/ruby/ruby/blob/trunk/string.c#L7426
     }
 
     str_modify_keep_cr(str);
-    s = t = RSTRING_PTR(str);
+    s = t = (unsigned char *)RSTRING_PTR(str);
     if (!s || RSTRING_LEN(str) == 0) return Qnil;
-    send = RSTRING_END(str);
+    send = (unsigned char *)RSTRING_END(str);
     save = -1;
     ascompat = rb_enc_asciicompat(enc);
 
     if (singlebyte) {
         while (s < send) {
-	    unsigned int c = *(unsigned char*)s++;
+            unsigned int c = *s++;
 	    if (c != save || (argc > 0 && !squeez[c])) {
 	        *t++ = save = c;
 	    }
@@ -7445,14 +7445,14 @@ rb_str_squeeze_bang(int argc, VALUE *arg https://github.com/ruby/ruby/blob/trunk/string.c#L7445
 	    unsigned int c;
 	    int clen;
 
-	    if (ascompat && (c = *(unsigned char*)s) < 0x80) {
+            if (ascompat && (c = *s) < 0x80) {
 		if (c != save || (argc > 0 && !squeez[c])) {
 		    *t++ = save = c;
 		}
 		s++;
 	    }
 	    else {
-		c = rb_enc_codepoint_len(s, send, &clen, enc);
+                c = rb_enc_codepoint_len((char *)s, (char *)send, &clen, enc);
 
 		if (c != save || (argc > 0 && !tr_find(c, squeez, del, nodel))) {
 		    if (t != s) rb_enc_mbcput(c, t, enc);
@@ -7464,9 +7464,9 @@ rb_str_squeeze_bang(int argc, VALUE *arg https://github.com/ruby/ruby/blob/trunk/string.c#L7464
 	}
     }
 
-    TERM_FILL(t, TERM_LEN(str));
-    if (t - RSTRING_PTR(str) != RSTRING_LEN(str)) {
-	STR_SET_LEN(str, t - RSTRING_PTR(str));
+    TERM_FILL((char *)t, TERM_LEN(str));
+    if ((char *)t - RSTRING_PTR(str) != RSTRING_LEN(str)) {
+        STR_SET_LEN(str, (char *)t - RSTRING_PTR(str));
 	modify = 1;
     }
 

--
ML: ruby-changes@q...
Info: http://www.atdot.net/~ko1/quickml/

[前][次][番号順一覧][スレッド一覧]