ruby-changes:28489
From: nagachika <ko1@a...>
Date: Wed, 1 May 2013 23:53:06 +0900 (JST)
Subject: [ruby-changes:28489] nagachika:r40541 (ruby_2_0_0): merge revision(s) 40462: [Backport #8323]
nagachika 2013-05-01 23:52:52 +0900 (Wed, 01 May 2013) New Revision: 40541 http://svn.ruby-lang.org/cgi-bin/viewvc.cgi?view=rev&revision=40541 Log: merge revision(s) 40462: [Backport #8323] * io.c (rb_io_ext_int_to_encs, parse_mode_enc): bom-prefixed name is not a real encoding name, just a fallback. so the proper conversion should take place even if if the internal encoding is equal to the bom-prefixed name, unless actual encoding is equal to the internal encoding. [ruby-core:54563] [Bug #8323] * io.c (io_set_encoding_by_bom): reset extenal encoding if no BOM found. [ruby-core:54569] Modified directories: branches/ruby_2_0_0/ Modified files: branches/ruby_2_0_0/ChangeLog branches/ruby_2_0_0/io.c branches/ruby_2_0_0/test/ruby/test_io_m17n.rb branches/ruby_2_0_0/version.h Index: ruby_2_0_0/ChangeLog =================================================================== --- ruby_2_0_0/ChangeLog (revision 40540) +++ ruby_2_0_0/ChangeLog (revision 40541) @@ -1,3 +1,14 @@ https://github.com/ruby/ruby/blob/trunk/ruby_2_0_0/ChangeLog#L1 +Wed May 1 23:35:18 2013 Nobuyoshi Nakada <nobu@r...> + + * io.c (rb_io_ext_int_to_encs, parse_mode_enc): bom-prefixed name is + not a real encoding name, just a fallback. so the proper conversion + should take place even if if the internal encoding is equal to the + bom-prefixed name, unless actual encoding is equal to the internal + encoding. [ruby-core:54563] [Bug #8323] + + * io.c (io_set_encoding_by_bom): reset extenal encoding if no BOM + found. [ruby-core:54569] + Sat Apr 27 02:12:14 2013 KOSAKI Motohiro <kosaki.motohiro@g...> * io.c (rb_fd_fix_cloexec): use rb_update_max_fd(). Index: ruby_2_0_0/io.c =================================================================== --- ruby_2_0_0/io.c (revision 40540) +++ ruby_2_0_0/io.c (revision 40541) @@ -4835,7 +4835,7 @@ rb_io_oflags_modestr(int oflags) https://github.com/ruby/ruby/blob/trunk/ruby_2_0_0/io.c#L4835 * Qnil => no encoding specified (internal only) */ static void -rb_io_ext_int_to_encs(rb_encoding *ext, rb_encoding *intern, rb_encoding **enc, rb_encoding **enc2) +rb_io_ext_int_to_encs(rb_encoding *ext, rb_encoding *intern, rb_encoding **enc, rb_encoding **enc2, int fmode) { int default_ext = 0; @@ -4846,7 +4846,8 @@ rb_io_ext_int_to_encs(rb_encoding *ext, https://github.com/ruby/ruby/blob/trunk/ruby_2_0_0/io.c#L4846 if (intern == NULL && ext != rb_ascii8bit_encoding()) /* If external is ASCII-8BIT, no default transcoding */ intern = rb_default_internal_encoding(); - if (intern == NULL || intern == (rb_encoding *)Qnil || intern == ext) { + if (intern == NULL || intern == (rb_encoding *)Qnil || + (!(fmode & FMODE_SETENC_BY_BOM) && (intern == ext))) { /* No internal encoding => use external + no transcoding */ *enc = (default_ext && intern != ext) ? NULL : ext; *enc2 = NULL; @@ -4869,6 +4870,7 @@ parse_mode_enc(const char *estr, rb_enco https://github.com/ruby/ruby/blob/trunk/ruby_2_0_0/io.c#L4870 const char *p; char encname[ENCODING_MAXNAMELEN+1]; int idx, idx2; + int fmode = fmode_p ? *fmode_p : 0; rb_encoding *ext_enc, *int_enc; /* parse estr as "enc" or "enc2:enc" or "enc:-" */ @@ -4880,7 +4882,7 @@ parse_mode_enc(const char *estr, rb_enco https://github.com/ruby/ruby/blob/trunk/ruby_2_0_0/io.c#L4882 idx = -1; else { if (io_encname_bom_p(estr, len)) { - if (fmode_p) *fmode_p |= FMODE_SETENC_BY_BOM; + fmode |= FMODE_SETENC_BY_BOM; estr += 4; len -= 4; } @@ -4893,7 +4895,7 @@ parse_mode_enc(const char *estr, rb_enco https://github.com/ruby/ruby/blob/trunk/ruby_2_0_0/io.c#L4895 else { long len = strlen(estr); if (io_encname_bom_p(estr, len)) { - if (fmode_p) *fmode_p |= FMODE_SETENC_BY_BOM; + fmode |= FMODE_SETENC_BY_BOM; estr += 4; len -= 4; memcpy(encname, estr, len); @@ -4902,6 +4904,7 @@ parse_mode_enc(const char *estr, rb_enco https://github.com/ruby/ruby/blob/trunk/ruby_2_0_0/io.c#L4904 } idx = rb_enc_find_index(estr); } + if (fmode_p) *fmode_p = fmode; if (idx >= 0) ext_enc = rb_enc_from_index(idx); @@ -4921,7 +4924,7 @@ parse_mode_enc(const char *estr, rb_enco https://github.com/ruby/ruby/blob/trunk/ruby_2_0_0/io.c#L4924 idx2 = rb_enc_find_index(p); if (idx2 < 0) unsupported_encoding(p); - else if (idx2 == idx) { + else if (!(fmode & FMODE_SETENC_BY_BOM) && (idx2 == idx)) { int_enc = (rb_encoding *)Qnil; } else @@ -4929,7 +4932,7 @@ parse_mode_enc(const char *estr, rb_enco https://github.com/ruby/ruby/blob/trunk/ruby_2_0_0/io.c#L4932 } } - rb_io_ext_int_to_encs(ext_enc, int_enc, enc_p, enc2_p); + rb_io_ext_int_to_encs(ext_enc, int_enc, enc_p, enc2_p, fmode); } int @@ -4990,12 +4993,12 @@ rb_io_extract_encoding_option(VALUE opt, https://github.com/ruby/ruby/blob/trunk/ruby_2_0_0/io.c#L4993 parse_mode_enc(StringValueCStr(tmp), enc_p, enc2_p, fmode_p); } else { - rb_io_ext_int_to_encs(rb_to_encoding(encoding), NULL, enc_p, enc2_p); + rb_io_ext_int_to_encs(rb_to_encoding(encoding), NULL, enc_p, enc2_p, 0); } } else if (extenc != Qundef || intenc != Qundef) { extracted = 1; - rb_io_ext_int_to_encs(extencoding, intencoding, enc_p, enc2_p); + rb_io_ext_int_to_encs(extencoding, intencoding, enc_p, enc2_p, 0); } return extracted; } @@ -5066,7 +5069,7 @@ rb_io_extract_modeenc(VALUE *vmode_p, VA https://github.com/ruby/ruby/blob/trunk/ruby_2_0_0/io.c#L5069 vmode = *vmode_p; /* Set to defaults */ - rb_io_ext_int_to_encs(NULL, NULL, &enc, &enc2); + rb_io_ext_int_to_encs(NULL, NULL, &enc, &enc2, 0); vmode_handle: if (NIL_P(vmode)) { @@ -5094,7 +5097,7 @@ rb_io_extract_modeenc(VALUE *vmode_p, VA https://github.com/ruby/ruby/blob/trunk/ruby_2_0_0/io.c#L5097 rb_encoding *e; e = (fmode & FMODE_BINMODE) ? rb_ascii8bit_encoding() : NULL; - rb_io_ext_int_to_encs(e, NULL, &enc, &enc2); + rb_io_ext_int_to_encs(e, NULL, &enc, &enc2, fmode); } } @@ -5118,7 +5121,7 @@ rb_io_extract_modeenc(VALUE *vmode_p, VA https://github.com/ruby/ruby/blob/trunk/ruby_2_0_0/io.c#L5121 oflags |= O_BINARY; #endif if (!has_enc) - rb_io_ext_int_to_encs(rb_ascii8bit_encoding(), NULL, &enc, &enc2); + rb_io_ext_int_to_encs(rb_ascii8bit_encoding(), NULL, &enc, &enc2, fmode); } #if DEFAULT_TEXTMODE else if (NIL_P(vmode)) { @@ -5341,13 +5344,16 @@ static void https://github.com/ruby/ruby/blob/trunk/ruby_2_0_0/io.c#L5344 io_set_encoding_by_bom(VALUE io) { int idx = io_strip_bom(io); + rb_io_t *fptr; + GetOpenFile(io, fptr); if (idx) { - rb_io_t *fptr; - GetOpenFile(io, fptr); io_encoding_set(fptr, rb_enc_from_encoding(rb_enc_from_index(idx)), rb_io_internal_encoding(io), Qnil); } + else { + fptr->encs.enc2 = NULL; + } } static VALUE @@ -5357,7 +5363,7 @@ rb_file_open_generic(VALUE io, VALUE fil https://github.com/ruby/ruby/blob/trunk/ruby_2_0_0/io.c#L5363 convconfig_t cc; if (!convconfig) { /* Set to default encodings */ - rb_io_ext_int_to_encs(NULL, NULL, &cc.enc, &cc.enc2); + rb_io_ext_int_to_encs(NULL, NULL, &cc.enc, &cc.enc2, fmode); cc.ecflags = 0; cc.ecopts = Qnil; convconfig = &cc; @@ -5391,7 +5397,7 @@ rb_file_open_internal(VALUE io, VALUE fi https://github.com/ruby/ruby/blob/trunk/ruby_2_0_0/io.c#L5397 /* Set to default encodings */ e = (fmode & FMODE_BINMODE) ? rb_ascii8bit_encoding() : NULL; - rb_io_ext_int_to_encs(e, NULL, &convconfig.enc, &convconfig.enc2); + rb_io_ext_int_to_encs(e, NULL, &convconfig.enc, &convconfig.enc2, fmode); convconfig.ecflags = 0; convconfig.ecopts = Qnil; } @@ -9046,7 +9052,7 @@ io_encoding_set(rb_io_t *fptr, VALUE v1, https://github.com/ruby/ruby/blob/trunk/ruby_2_0_0/io.c#L9052 else { if (NIL_P(v1)) { /* Set to default encodings */ - rb_io_ext_int_to_encs(NULL, NULL, &enc, &enc2); + rb_io_ext_int_to_encs(NULL, NULL, &enc, &enc2, 0); SET_UNIVERSAL_NEWLINE_DECORATOR_IF_ENC2(enc2, ecflags); ecopts = Qnil; } @@ -9058,7 +9064,7 @@ io_encoding_set(rb_io_t *fptr, VALUE v1, https://github.com/ruby/ruby/blob/trunk/ruby_2_0_0/io.c#L9064 ecflags = rb_econv_prepare_options(opt, &ecopts, ecflags); } else { - rb_io_ext_int_to_encs(find_encoding(v1), NULL, &enc, &enc2); + rb_io_ext_int_to_encs(find_encoding(v1), NULL, &enc, &enc2, 0); SET_UNIVERSAL_NEWLINE_DECORATOR_IF_ENC2(enc2, ecflags); ecopts = Qnil; } Index: ruby_2_0_0/version.h =================================================================== --- ruby_2_0_0/version.h (revision 40540) +++ ruby_2_0_0/version.h (revision 40541) @@ -1,10 +1,10 @@ https://github.com/ruby/ruby/blob/trunk/ruby_2_0_0/version.h#L1 #define RUBY_VERSION "2.0.0" -#define RUBY_RELEASE_DATE "2013-04-27" -#define RUBY_PATCHLEVEL 175 +#define RUBY_RELEASE_DATE "2013-05-01" +#define RUBY_PATCHLEVEL 176 #define RUBY_RELEASE_YEAR 2013 -#define RUBY_RELEASE_MONTH 4 -#define RUBY_RELEASE_DAY 27 +#define RUBY_RELEASE_MONTH 5 +#define RUBY_RELEASE_DAY 1 #include "ruby/version.h" Index: ruby_2_0_0/test/ruby/test_io_m17n.rb =================================================================== --- ruby_2_0_0/test/ruby/test_io_m17n.rb (revision 40540) +++ ruby_2_0_0/test/ruby/test_io_m17n.rb (revision 40541) @@ -1996,6 +1996,7 @@ EOT https://github.com/ruby/ruby/blob/trunk/ruby_2_0_0/test/ruby/test_io_m17n.rb#L1996 def test_strip_bom with_tmpdir { text = "\uFEFFa" + stripped = "a" %w/UTF-8 UTF-16BE UTF-16LE UTF-32BE UTF-32LE/.each do |name| path = '%s-bom.txt' % name content = text.encode(name) @@ -2003,11 +2004,32 @@ EOT https://github.com/ruby/ruby/blob/trunk/ruby_2_0_0/test/ruby/test_io_m17n.rb#L2004 result = File.read(path, mode: 'rb:BOM|UTF-8') assert_equal(content[1].force_encoding("ascii-8bit"), result.force_encoding("ascii-8bit")) + result = File.read(path, mode: 'rb:BOM|UTF-8:UTF-8') + assert_equal(Encoding::UTF_8, result.encoding) + assert_equal(stripped, result) end bug3407 = '[ruby-core:30641]' - result = File.read('UTF-8-bom.txt', encoding: 'BOM|UTF-8') + path = 'UTF-8-bom.txt' + result = File.read(path, encoding: 'BOM|UTF-8') assert_equal("a", result.force_encoding("ascii-8bit"), bug3407) + + bug8323 = '[ruby-core:54563] [Bug #8323]' + expected = "a\xff".force_encoding("utf-8") + open(path, 'ab') {|f| f.write("\xff")} + result = File.read(path, encoding: 'BOM|UTF-8') + assert_not_predicate(result, :valid_encoding?, bug8323) + assert_equal(expected, result, bug8323) + result = File.read(path, encoding: 'BOM|UTF-8:UTF-8') + assert_not_predicate(result, :valid_encoding?, bug8323) + assert_equal(expected, result, bug8323) + + path = 'ascii.txt' + generate_file(path, stripped) + result = File.read(path, encoding: 'BOM|UTF-8') + assert_equal(stripped, result, bug8323) + result = File.read(path, encoding: 'BOM|UTF-8:UTF-8') + assert_equal(stripped, result, bug8323) } end Property changes on: ruby_2_0_0 ___________________________________________________________________ Modified: svn:mergeinfo Merged /trunk:r40462 -- ML: ruby-changes@q... Info: http://www.atdot.net/~ko1/quickml/