[前][次][番号順一覧][スレッド一覧]

ruby-changes:28489

From: nagachika <ko1@a...>
Date: Wed, 1 May 2013 23:53:06 +0900 (JST)
Subject: [ruby-changes:28489] nagachika:r40541 (ruby_2_0_0): merge revision(s) 40462: [Backport #8323]

nagachika	2013-05-01 23:52:52 +0900 (Wed, 01 May 2013)

  New Revision: 40541

  http://svn.ruby-lang.org/cgi-bin/viewvc.cgi?view=rev&revision=40541

  Log:
    merge revision(s) 40462: [Backport #8323]
    
    * io.c (rb_io_ext_int_to_encs, parse_mode_enc): bom-prefixed name is
      not a real encoding name, just a fallback.  so the proper conversion
      should take place even if if the internal encoding is equal to the
      bom-prefixed name, unless actual encoding is equal to the internal
      encoding.  [ruby-core:54563] [Bug #8323]
    
    * io.c (io_set_encoding_by_bom): reset extenal encoding if no BOM
      found.  [ruby-core:54569]

  Modified directories:
    branches/ruby_2_0_0/
  Modified files:
    branches/ruby_2_0_0/ChangeLog
    branches/ruby_2_0_0/io.c
    branches/ruby_2_0_0/test/ruby/test_io_m17n.rb
    branches/ruby_2_0_0/version.h

Index: ruby_2_0_0/ChangeLog
===================================================================
--- ruby_2_0_0/ChangeLog	(revision 40540)
+++ ruby_2_0_0/ChangeLog	(revision 40541)
@@ -1,3 +1,14 @@ https://github.com/ruby/ruby/blob/trunk/ruby_2_0_0/ChangeLog#L1
+Wed May  1 23:35:18 2013  Nobuyoshi Nakada  <nobu@r...>
+
+	* io.c (rb_io_ext_int_to_encs, parse_mode_enc): bom-prefixed name is
+	  not a real encoding name, just a fallback.  so the proper conversion
+	  should take place even if if the internal encoding is equal to the
+	  bom-prefixed name, unless actual encoding is equal to the internal
+	  encoding.  [ruby-core:54563] [Bug #8323]
+
+	* io.c (io_set_encoding_by_bom): reset extenal encoding if no BOM
+	  found.  [ruby-core:54569]
+
 Sat Apr 27 02:12:14 2013  KOSAKI Motohiro  <kosaki.motohiro@g...>
 
 	* io.c (rb_fd_fix_cloexec): use rb_update_max_fd().
Index: ruby_2_0_0/io.c
===================================================================
--- ruby_2_0_0/io.c	(revision 40540)
+++ ruby_2_0_0/io.c	(revision 40541)
@@ -4835,7 +4835,7 @@ rb_io_oflags_modestr(int oflags) https://github.com/ruby/ruby/blob/trunk/ruby_2_0_0/io.c#L4835
  * Qnil => no encoding specified (internal only)
  */
 static void
-rb_io_ext_int_to_encs(rb_encoding *ext, rb_encoding *intern, rb_encoding **enc, rb_encoding **enc2)
+rb_io_ext_int_to_encs(rb_encoding *ext, rb_encoding *intern, rb_encoding **enc, rb_encoding **enc2, int fmode)
 {
     int default_ext = 0;
 
@@ -4846,7 +4846,8 @@ rb_io_ext_int_to_encs(rb_encoding *ext, https://github.com/ruby/ruby/blob/trunk/ruby_2_0_0/io.c#L4846
     if (intern == NULL && ext != rb_ascii8bit_encoding())
 	/* If external is ASCII-8BIT, no default transcoding */
 	intern = rb_default_internal_encoding();
-    if (intern == NULL || intern == (rb_encoding *)Qnil || intern == ext) {
+    if (intern == NULL || intern == (rb_encoding *)Qnil ||
+	(!(fmode & FMODE_SETENC_BY_BOM) && (intern == ext))) {
 	/* No internal encoding => use external + no transcoding */
 	*enc = (default_ext && intern != ext) ? NULL : ext;
 	*enc2 = NULL;
@@ -4869,6 +4870,7 @@ parse_mode_enc(const char *estr, rb_enco https://github.com/ruby/ruby/blob/trunk/ruby_2_0_0/io.c#L4870
     const char *p;
     char encname[ENCODING_MAXNAMELEN+1];
     int idx, idx2;
+    int fmode = fmode_p ? *fmode_p : 0;
     rb_encoding *ext_enc, *int_enc;
 
     /* parse estr as "enc" or "enc2:enc" or "enc:-" */
@@ -4880,7 +4882,7 @@ parse_mode_enc(const char *estr, rb_enco https://github.com/ruby/ruby/blob/trunk/ruby_2_0_0/io.c#L4882
 	    idx = -1;
 	else {
 	    if (io_encname_bom_p(estr, len)) {
-		if (fmode_p) *fmode_p |= FMODE_SETENC_BY_BOM;
+		fmode |= FMODE_SETENC_BY_BOM;
 		estr += 4;
                 len -= 4;
             }
@@ -4893,7 +4895,7 @@ parse_mode_enc(const char *estr, rb_enco https://github.com/ruby/ruby/blob/trunk/ruby_2_0_0/io.c#L4895
     else {
 	long len = strlen(estr);
 	if (io_encname_bom_p(estr, len)) {
-	    if (fmode_p) *fmode_p |= FMODE_SETENC_BY_BOM;
+	    fmode |= FMODE_SETENC_BY_BOM;
 	    estr += 4;
             len -= 4;
 	    memcpy(encname, estr, len);
@@ -4902,6 +4904,7 @@ parse_mode_enc(const char *estr, rb_enco https://github.com/ruby/ruby/blob/trunk/ruby_2_0_0/io.c#L4904
 	}
 	idx = rb_enc_find_index(estr);
     }
+    if (fmode_p) *fmode_p = fmode;
 
     if (idx >= 0)
 	ext_enc = rb_enc_from_index(idx);
@@ -4921,7 +4924,7 @@ parse_mode_enc(const char *estr, rb_enco https://github.com/ruby/ruby/blob/trunk/ruby_2_0_0/io.c#L4924
 	    idx2 = rb_enc_find_index(p);
 	    if (idx2 < 0)
 		unsupported_encoding(p);
-	    else if (idx2 == idx) {
+	    else if (!(fmode & FMODE_SETENC_BY_BOM) && (idx2 == idx)) {
 		int_enc = (rb_encoding *)Qnil;
 	    }
 	    else
@@ -4929,7 +4932,7 @@ parse_mode_enc(const char *estr, rb_enco https://github.com/ruby/ruby/blob/trunk/ruby_2_0_0/io.c#L4932
 	}
     }
 
-    rb_io_ext_int_to_encs(ext_enc, int_enc, enc_p, enc2_p);
+    rb_io_ext_int_to_encs(ext_enc, int_enc, enc_p, enc2_p, fmode);
 }
 
 int
@@ -4990,12 +4993,12 @@ rb_io_extract_encoding_option(VALUE opt, https://github.com/ruby/ruby/blob/trunk/ruby_2_0_0/io.c#L4993
 	    parse_mode_enc(StringValueCStr(tmp), enc_p, enc2_p, fmode_p);
 	}
 	else {
-	    rb_io_ext_int_to_encs(rb_to_encoding(encoding), NULL, enc_p, enc2_p);
+	    rb_io_ext_int_to_encs(rb_to_encoding(encoding), NULL, enc_p, enc2_p, 0);
 	}
     }
     else if (extenc != Qundef || intenc != Qundef) {
         extracted = 1;
-	rb_io_ext_int_to_encs(extencoding, intencoding, enc_p, enc2_p);
+	rb_io_ext_int_to_encs(extencoding, intencoding, enc_p, enc2_p, 0);
     }
     return extracted;
 }
@@ -5066,7 +5069,7 @@ rb_io_extract_modeenc(VALUE *vmode_p, VA https://github.com/ruby/ruby/blob/trunk/ruby_2_0_0/io.c#L5069
     vmode = *vmode_p;
 
     /* Set to defaults */
-    rb_io_ext_int_to_encs(NULL, NULL, &enc, &enc2);
+    rb_io_ext_int_to_encs(NULL, NULL, &enc, &enc2, 0);
 
   vmode_handle:
     if (NIL_P(vmode)) {
@@ -5094,7 +5097,7 @@ rb_io_extract_modeenc(VALUE *vmode_p, VA https://github.com/ruby/ruby/blob/trunk/ruby_2_0_0/io.c#L5097
 	    rb_encoding *e;
 
 	    e = (fmode & FMODE_BINMODE) ? rb_ascii8bit_encoding() : NULL;
-	    rb_io_ext_int_to_encs(e, NULL, &enc, &enc2);
+	    rb_io_ext_int_to_encs(e, NULL, &enc, &enc2, fmode);
 	}
     }
 
@@ -5118,7 +5121,7 @@ rb_io_extract_modeenc(VALUE *vmode_p, VA https://github.com/ruby/ruby/blob/trunk/ruby_2_0_0/io.c#L5121
             oflags |= O_BINARY;
 #endif
 	    if (!has_enc)
-		rb_io_ext_int_to_encs(rb_ascii8bit_encoding(), NULL, &enc, &enc2);
+		rb_io_ext_int_to_encs(rb_ascii8bit_encoding(), NULL, &enc, &enc2, fmode);
 	}
 #if DEFAULT_TEXTMODE
 	else if (NIL_P(vmode)) {
@@ -5341,13 +5344,16 @@ static void https://github.com/ruby/ruby/blob/trunk/ruby_2_0_0/io.c#L5344
 io_set_encoding_by_bom(VALUE io)
 {
     int idx = io_strip_bom(io);
+    rb_io_t *fptr;
 
+    GetOpenFile(io, fptr);
     if (idx) {
-	rb_io_t *fptr;
-	GetOpenFile(io, fptr);
 	io_encoding_set(fptr, rb_enc_from_encoding(rb_enc_from_index(idx)),
 		rb_io_internal_encoding(io), Qnil);
     }
+    else {
+	fptr->encs.enc2 = NULL;
+    }
 }
 
 static VALUE
@@ -5357,7 +5363,7 @@ rb_file_open_generic(VALUE io, VALUE fil https://github.com/ruby/ruby/blob/trunk/ruby_2_0_0/io.c#L5363
     convconfig_t cc;
     if (!convconfig) {
 	/* Set to default encodings */
-	rb_io_ext_int_to_encs(NULL, NULL, &cc.enc, &cc.enc2);
+	rb_io_ext_int_to_encs(NULL, NULL, &cc.enc, &cc.enc2, fmode);
         cc.ecflags = 0;
         cc.ecopts = Qnil;
         convconfig = &cc;
@@ -5391,7 +5397,7 @@ rb_file_open_internal(VALUE io, VALUE fi https://github.com/ruby/ruby/blob/trunk/ruby_2_0_0/io.c#L5397
 	/* Set to default encodings */
 
 	e = (fmode & FMODE_BINMODE) ? rb_ascii8bit_encoding() : NULL;
-	rb_io_ext_int_to_encs(e, NULL, &convconfig.enc, &convconfig.enc2);
+	rb_io_ext_int_to_encs(e, NULL, &convconfig.enc, &convconfig.enc2, fmode);
         convconfig.ecflags = 0;
         convconfig.ecopts = Qnil;
     }
@@ -9046,7 +9052,7 @@ io_encoding_set(rb_io_t *fptr, VALUE v1, https://github.com/ruby/ruby/blob/trunk/ruby_2_0_0/io.c#L9052
     else {
 	if (NIL_P(v1)) {
 	    /* Set to default encodings */
-	    rb_io_ext_int_to_encs(NULL, NULL, &enc, &enc2);
+	    rb_io_ext_int_to_encs(NULL, NULL, &enc, &enc2, 0);
 	    SET_UNIVERSAL_NEWLINE_DECORATOR_IF_ENC2(enc2, ecflags);
             ecopts = Qnil;
 	}
@@ -9058,7 +9064,7 @@ io_encoding_set(rb_io_t *fptr, VALUE v1, https://github.com/ruby/ruby/blob/trunk/ruby_2_0_0/io.c#L9064
                 ecflags = rb_econv_prepare_options(opt, &ecopts, ecflags);
 	    }
 	    else {
-		rb_io_ext_int_to_encs(find_encoding(v1), NULL, &enc, &enc2);
+		rb_io_ext_int_to_encs(find_encoding(v1), NULL, &enc, &enc2, 0);
 		SET_UNIVERSAL_NEWLINE_DECORATOR_IF_ENC2(enc2, ecflags);
                 ecopts = Qnil;
 	    }
Index: ruby_2_0_0/version.h
===================================================================
--- ruby_2_0_0/version.h	(revision 40540)
+++ ruby_2_0_0/version.h	(revision 40541)
@@ -1,10 +1,10 @@ https://github.com/ruby/ruby/blob/trunk/ruby_2_0_0/version.h#L1
 #define RUBY_VERSION "2.0.0"
-#define RUBY_RELEASE_DATE "2013-04-27"
-#define RUBY_PATCHLEVEL 175
+#define RUBY_RELEASE_DATE "2013-05-01"
+#define RUBY_PATCHLEVEL 176
 
 #define RUBY_RELEASE_YEAR 2013
-#define RUBY_RELEASE_MONTH 4
-#define RUBY_RELEASE_DAY 27
+#define RUBY_RELEASE_MONTH 5
+#define RUBY_RELEASE_DAY 1
 
 #include "ruby/version.h"
 
Index: ruby_2_0_0/test/ruby/test_io_m17n.rb
===================================================================
--- ruby_2_0_0/test/ruby/test_io_m17n.rb	(revision 40540)
+++ ruby_2_0_0/test/ruby/test_io_m17n.rb	(revision 40541)
@@ -1996,6 +1996,7 @@ EOT https://github.com/ruby/ruby/blob/trunk/ruby_2_0_0/test/ruby/test_io_m17n.rb#L1996
   def test_strip_bom
     with_tmpdir {
       text = "\uFEFFa"
+      stripped = "a"
       %w/UTF-8 UTF-16BE UTF-16LE UTF-32BE UTF-32LE/.each do |name|
         path = '%s-bom.txt' % name
         content = text.encode(name)
@@ -2003,11 +2004,32 @@ EOT https://github.com/ruby/ruby/blob/trunk/ruby_2_0_0/test/ruby/test_io_m17n.rb#L2004
         result = File.read(path, mode: 'rb:BOM|UTF-8')
         assert_equal(content[1].force_encoding("ascii-8bit"),
                      result.force_encoding("ascii-8bit"))
+        result = File.read(path, mode: 'rb:BOM|UTF-8:UTF-8')
+        assert_equal(Encoding::UTF_8, result.encoding)
+        assert_equal(stripped, result)
       end
 
       bug3407 = '[ruby-core:30641]'
-      result = File.read('UTF-8-bom.txt', encoding: 'BOM|UTF-8')
+      path = 'UTF-8-bom.txt'
+      result = File.read(path, encoding: 'BOM|UTF-8')
       assert_equal("a", result.force_encoding("ascii-8bit"), bug3407)
+
+      bug8323 = '[ruby-core:54563] [Bug #8323]'
+      expected = "a\xff".force_encoding("utf-8")
+      open(path, 'ab') {|f| f.write("\xff")}
+      result = File.read(path, encoding: 'BOM|UTF-8')
+      assert_not_predicate(result, :valid_encoding?, bug8323)
+      assert_equal(expected, result, bug8323)
+      result = File.read(path, encoding: 'BOM|UTF-8:UTF-8')
+      assert_not_predicate(result, :valid_encoding?, bug8323)
+      assert_equal(expected, result, bug8323)
+
+      path = 'ascii.txt'
+      generate_file(path, stripped)
+      result = File.read(path, encoding: 'BOM|UTF-8')
+      assert_equal(stripped, result, bug8323)
+      result = File.read(path, encoding: 'BOM|UTF-8:UTF-8')
+      assert_equal(stripped, result, bug8323)
     }
   end
 

Property changes on: ruby_2_0_0
___________________________________________________________________
Modified: svn:mergeinfo
   Merged /trunk:r40462


--
ML: ruby-changes@q...
Info: http://www.atdot.net/~ko1/quickml/

[前][次][番号順一覧][スレッド一覧]