ruby-changes:9540
From: yugui <ko1@a...>
Date: Sat, 27 Dec 2008 11:16:18 +0900 (JST)
Subject: [ruby-changes:9540] Ruby:r21080 (ruby_1_9_1): merges r20956 from trunk into ruby_1_9_1.
yugui 2008-12-27 11:16:03 +0900 (Sat, 27 Dec 2008) New Revision: 21080 http://svn.ruby-lang.org/cgi-bin/viewvc.cgi?view=rev&revision=21080 Log: merges r20956 from trunk into ruby_1_9_1. * pack.c (pack_pack): encoding of packed string only from 'm', 'M', and 'u' should be US-ASCII. [ruby-dev:37284] * pack.c (pack_pack): encoding of packed string only from 'U' should be UTF-8. also upgrade US-ASCII strings to UTF-8. Modified files: branches/ruby_1_9_1/ChangeLog branches/ruby_1_9_1/pack.c Index: ruby_1_9_1/ChangeLog =================================================================== --- ruby_1_9_1/ChangeLog (revision 21079) +++ ruby_1_9_1/ChangeLog (revision 21080) @@ -1,3 +1,11 @@ +Mon Dec 22 16:32:21 2008 Yukihiro Matsumoto <matz@r...> + + * pack.c (pack_pack): encoding of packed string only from 'm', + 'M', and 'u' should be US-ASCII. [ruby-dev:37284] + + * pack.c (pack_pack): encoding of packed string only from 'U' + should be UTF-8. also upgrade US-ASCII strings to UTF-8. + Sat Dec 27 09:14:17 2008 Yuki Sonoda (Yugui) <yugui@y...> * cont.c: rdoc for Fiber. patch by Muhammad Ali. Index: ruby_1_9_1/pack.c =================================================================== --- ruby_1_9_1/pack.c (revision 21079) +++ ruby_1_9_1/pack.c (revision 21080) @@ -444,7 +444,7 @@ char type; long items, len, idx, plen; const char *ptr; - rb_encoding *enc; + int enc_info = 1; /* 0 - BINARY, 1 - US-ASCII, 2 - UTF-8 */ #ifdef NATINT_PACK int natint; /* native integer */ #endif @@ -508,6 +508,19 @@ } switch (type) { + case 'U': + /* if encoding is US-ASCII, upgrade to UTF-8 */ + if (enc_info == 1) enc_info = 2; + break; + case 'm': case 'M': case 'u': + /* keep US-ASCII (do nothing) */ + break; + default: + /* fall back to BINARY */ + enc_info = 0; + break; + } + switch (type) { case 'A': case 'a': case 'Z': case 'B': case 'b': case 'H': case 'h': @@ -521,15 +534,6 @@ ptr = RSTRING_PTR(from); plen = RSTRING_LEN(from); OBJ_INFECT(res, from); - switch (type) { - case 'a': case 'A': case 'Z': - enc = rb_enc_compatible(res, from); - rb_enc_associate(res, enc); - break; - default: - rb_enc_associate(res, rb_ascii8bit_encoding()); - break; - } } if (p[-1] == '*') @@ -878,8 +882,6 @@ break; case 'U': /* Unicode character */ - enc = rb_enc_compatible(res, rb_enc_from_encoding(rb_utf8_encoding())); - rb_enc_associate(res, enc); while (len-- > 0) { SIGNED_VALUE l; char buf[8]; @@ -898,8 +900,6 @@ case 'u': /* uuencoded string */ case 'm': /* base64 encoded string */ - enc = rb_enc_compatible(res, rb_enc_from_encoding(rb_usascii_encoding())); - rb_enc_associate(res, enc); from = NEXTFROM; StringValue(from); ptr = RSTRING_PTR(from); @@ -928,8 +928,6 @@ break; case 'M': /* quoted-printable encoded string */ - enc = rb_enc_compatible(res, rb_enc_from_encoding(rb_usascii_encoding())); - rb_enc_associate(res, enc); from = rb_obj_as_string(NEXTFROM); if (len <= 1) len = 72; @@ -1024,6 +1022,17 @@ rb_str_associate(res, associates); } OBJ_INFECT(res, fmt); + switch (enc_info) { + case 1: + ENCODING_CODERANGE_SET(res, rb_usascii_encindex(), ENC_CODERANGE_7BIT); + break; + case 2: + rb_enc_set_index(res, rb_utf8_encindex()); + break; + default: + /* do nothing, keep ASCII-8BIT */ + break; + } return res; } @@ -1892,7 +1901,6 @@ } } rb_str_set_len(buf, ptr - RSTRING_PTR(buf)); - ENCODING_CODERANGE_SET(buf, rb_usascii_encindex(), ENC_CODERANGE_7BIT); UNPACK_PUSH(buf); } break; -- ML: ruby-changes@q... Info: http://www.atdot.net/~ko1/quickml/