ruby-changes:53684
From: shyouhei <ko1@a...>
Date: Wed, 21 Nov 2018 17:51:43 +0900 (JST)
Subject: [ruby-changes:53684] shyouhei:r65900 (trunk): char is not unsigned
shyouhei 2018-11-21 17:51:39 +0900 (Wed, 21 Nov 2018) New Revision: 65900 https://svn.ruby-lang.org/cgi-bin/viewvc.cgi?view=revision&revision=65900 Log: char is not unsigned It seems that decades ago, ruby was written under assumption that char is unsigned. Which is of course a false assumption. We need to explicitly store a numeric value into an unsigned char variable to tell we expect 0..255 value. Modified files: trunk/re.c trunk/string.c Index: re.c =================================================================== --- re.c (revision 65899) +++ re.c (revision 65900) @@ -2539,7 +2539,7 @@ unescape_nonascii(const char *p, const c https://github.com/ruby/ruby/blob/trunk/re.c#L2539 VALUE buf, rb_encoding **encp, int *has_property, onig_errmsg_buffer err) { - char c; + unsigned char c; char smallbuf[2]; while (p < end) { @@ -2602,8 +2602,9 @@ unescape_nonascii(const char *p, const c https://github.com/ruby/ruby/blob/trunk/re.c#L2602 p = p-2; if (enc == rb_usascii_encoding()) { const char *pbeg = p; - c = read_escaped_byte(&p, end, err); - if (c == (char)-1) return -1; + int byte = read_escaped_byte(&p, end, err); + if (byte == -1) return -1; + c = byte; rb_str_buf_cat(buf, pbeg, p-pbeg); } else { @@ -2652,7 +2653,7 @@ escape_asis: https://github.com/ruby/ruby/blob/trunk/re.c#L2653 break; default: - rb_str_buf_cat(buf, &c, 1); + rb_str_buf_cat(buf, (char *)&c, 1); break; } } Index: string.c =================================================================== --- string.c (revision 65899) +++ string.c (revision 65900) @@ -6190,7 +6190,7 @@ undump_after_backslash(VALUE undumped, c https://github.com/ruby/ruby/blob/trunk/string.c#L6190 unsigned int c; int codelen; size_t hexlen; - char buf[6]; + unsigned char buf[6]; static rb_encoding *enc_utf8 = NULL; switch (*s) { @@ -6208,8 +6208,8 @@ undump_after_backslash(VALUE undumped, c https://github.com/ruby/ruby/blob/trunk/string.c#L6208 case 'b': case 'a': case 'e': - *buf = (char)unescape_ascii(*s); - rb_str_cat(undumped, buf, 1); + *buf = unescape_ascii(*s); + rb_str_cat(undumped, (char *)buf, 1); s++; break; case 'u': @@ -6249,8 +6249,8 @@ undump_after_backslash(VALUE undumped, c https://github.com/ruby/ruby/blob/trunk/string.c#L6249 if (0xd800 <= c && c <= 0xdfff) { rb_raise(rb_eRuntimeError, "invalid Unicode codepoint"); } - codelen = rb_enc_mbcput(c, buf, *penc); - rb_str_cat(undumped, buf, codelen); + codelen = rb_enc_mbcput(c, (char *)buf, *penc); + rb_str_cat(undumped, (char *)buf, codelen); s += hexlen; } } @@ -6262,8 +6262,8 @@ undump_after_backslash(VALUE undumped, c https://github.com/ruby/ruby/blob/trunk/string.c#L6262 if (0xd800 <= c && c <= 0xdfff) { rb_raise(rb_eRuntimeError, "invalid Unicode codepoint"); } - codelen = rb_enc_mbcput(c, buf, *penc); - rb_str_cat(undumped, buf, codelen); + codelen = rb_enc_mbcput(c, (char *)buf, *penc); + rb_str_cat(undumped, (char *)buf, codelen); s += hexlen; } break; @@ -6279,7 +6279,7 @@ undump_after_backslash(VALUE undumped, c https://github.com/ruby/ruby/blob/trunk/string.c#L6279 if (hexlen != 2) { rb_raise(rb_eRuntimeError, "invalid hex escape"); } - rb_str_cat(undumped, buf, 1); + rb_str_cat(undumped, (char *)buf, 1); s += hexlen; break; default: @@ -6915,7 +6915,7 @@ tr_trans(VALUE str, VALUE src, VALUE rep https://github.com/ruby/ruby/blob/trunk/string.c#L6915 int cflag = 0; unsigned int c, c0, last = 0; int modify = 0, i, l; - char *s, *send; + unsigned char *s, *send; VALUE hash = 0; int singlebyte = single_byte_optimizable(str); int termlen; @@ -6999,18 +6999,18 @@ tr_trans(VALUE str, VALUE src, VALUE rep https://github.com/ruby/ruby/blob/trunk/string.c#L6999 if (cr == ENC_CODERANGE_VALID && rb_enc_asciicompat(e1)) cr = ENC_CODERANGE_7BIT; str_modify_keep_cr(str); - s = RSTRING_PTR(str); send = RSTRING_END(str); + s = (unsigned char *)RSTRING_PTR(str); send = (unsigned char *)RSTRING_END(str); termlen = rb_enc_mbminlen(enc); if (sflag) { int clen, tlen; long offset, max = RSTRING_LEN(str); unsigned int save = -1; - char *buf = ALLOC_N(char, max + termlen), *t = buf; + unsigned char *buf = ALLOC_N(unsigned char, max + termlen), *t = buf; while (s < send) { int may_modify = 0; - c0 = c = rb_enc_codepoint_len(s, send, &clen, e1); + c0 = c = rb_enc_codepoint_len((char *)s, (char *)send, &clen, e1); tlen = enc == e1 ? clen : rb_enc_codelen(c, enc); s += clen; @@ -7046,7 +7046,7 @@ tr_trans(VALUE str, VALUE src, VALUE rep https://github.com/ruby/ruby/blob/trunk/string.c#L7046 if ((offset = t - buf) + tlen > max) { size_t MAYBE_UNUSED(old) = max + termlen; max = offset + tlen + (send - s); - SIZED_REALLOC_N(buf, char, max + termlen, old); + SIZED_REALLOC_N(buf, unsigned char, max + termlen, old); t = buf + offset; } rb_enc_mbcput(c, t, enc); @@ -7059,8 +7059,8 @@ tr_trans(VALUE str, VALUE src, VALUE rep https://github.com/ruby/ruby/blob/trunk/string.c#L7059 if (!STR_EMBED_P(str)) { ruby_sized_xfree(STR_HEAP_PTR(str), STR_HEAP_SIZE(str)); } - TERM_FILL(t, termlen); - RSTRING(str)->as.heap.ptr = buf; + TERM_FILL((char *)t, termlen); + RSTRING(str)->as.heap.ptr = (char *)buf; RSTRING(str)->as.heap.len = t - buf; STR_SET_NOEMBED(str); RSTRING(str)->as.heap.aux.capa = max; @@ -7086,11 +7086,11 @@ tr_trans(VALUE str, VALUE src, VALUE rep https://github.com/ruby/ruby/blob/trunk/string.c#L7086 else { int clen, tlen; long offset, max = (long)((send - s) * 1.2); - char *buf = ALLOC_N(char, max + termlen), *t = buf; + unsigned char *buf = ALLOC_N(unsigned char, max + termlen), *t = buf; while (s < send) { int may_modify = 0; - c0 = c = rb_enc_codepoint_len(s, send, &clen, e1); + c0 = c = rb_enc_codepoint_len((char *)s, (char *)send, &clen, e1); tlen = enc == e1 ? clen : rb_enc_codelen(c, enc); if (c < 256) { @@ -7119,7 +7119,7 @@ tr_trans(VALUE str, VALUE src, VALUE rep https://github.com/ruby/ruby/blob/trunk/string.c#L7119 if ((offset = t - buf) + tlen > max) { size_t MAYBE_UNUSED(old) = max + termlen; max = offset + tlen + (long)((send - s) * 1.2); - SIZED_REALLOC_N(buf, char, max + termlen, old); + SIZED_REALLOC_N(buf, unsigned char, max + termlen, old); t = buf + offset; } if (s != t) { @@ -7135,8 +7135,8 @@ tr_trans(VALUE str, VALUE src, VALUE rep https://github.com/ruby/ruby/blob/trunk/string.c#L7135 if (!STR_EMBED_P(str)) { ruby_sized_xfree(STR_HEAP_PTR(str), STR_HEAP_SIZE(str)); } - TERM_FILL(t, termlen); - RSTRING(str)->as.heap.ptr = buf; + TERM_FILL((char *)t, termlen); + RSTRING(str)->as.heap.ptr = (char *)buf; RSTRING(str)->as.heap.len = t - buf; STR_SET_NOEMBED(str); RSTRING(str)->as.heap.aux.capa = max; @@ -7405,7 +7405,7 @@ rb_str_squeeze_bang(int argc, VALUE *arg https://github.com/ruby/ruby/blob/trunk/string.c#L7405 char squeez[TR_TABLE_SIZE]; rb_encoding *enc = 0; VALUE del = 0, nodel = 0; - char *s, *send, *t; + unsigned char *s, *send, *t; int i, modify = 0; int ascompat, singlebyte = single_byte_optimizable(str); unsigned int save; @@ -7426,15 +7426,15 @@ rb_str_squeeze_bang(int argc, VALUE *arg https://github.com/ruby/ruby/blob/trunk/string.c#L7426 } str_modify_keep_cr(str); - s = t = RSTRING_PTR(str); + s = t = (unsigned char *)RSTRING_PTR(str); if (!s || RSTRING_LEN(str) == 0) return Qnil; - send = RSTRING_END(str); + send = (unsigned char *)RSTRING_END(str); save = -1; ascompat = rb_enc_asciicompat(enc); if (singlebyte) { while (s < send) { - unsigned int c = *(unsigned char*)s++; + unsigned int c = *s++; if (c != save || (argc > 0 && !squeez[c])) { *t++ = save = c; } @@ -7445,14 +7445,14 @@ rb_str_squeeze_bang(int argc, VALUE *arg https://github.com/ruby/ruby/blob/trunk/string.c#L7445 unsigned int c; int clen; - if (ascompat && (c = *(unsigned char*)s) < 0x80) { + if (ascompat && (c = *s) < 0x80) { if (c != save || (argc > 0 && !squeez[c])) { *t++ = save = c; } s++; } else { - c = rb_enc_codepoint_len(s, send, &clen, enc); + c = rb_enc_codepoint_len((char *)s, (char *)send, &clen, enc); if (c != save || (argc > 0 && !tr_find(c, squeez, del, nodel))) { if (t != s) rb_enc_mbcput(c, t, enc); @@ -7464,9 +7464,9 @@ rb_str_squeeze_bang(int argc, VALUE *arg https://github.com/ruby/ruby/blob/trunk/string.c#L7464 } } - TERM_FILL(t, TERM_LEN(str)); - if (t - RSTRING_PTR(str) != RSTRING_LEN(str)) { - STR_SET_LEN(str, t - RSTRING_PTR(str)); + TERM_FILL((char *)t, TERM_LEN(str)); + if ((char *)t - RSTRING_PTR(str) != RSTRING_LEN(str)) { + STR_SET_LEN(str, (char *)t - RSTRING_PTR(str)); modify = 1; } -- ML: ruby-changes@q... Info: http://www.atdot.net/~ko1/quickml/