ruby-changes:22494
From: naruse <ko1@a...>
Date: Sat, 11 Feb 2012 03:24:38 +0900 (JST)
Subject: [ruby-changes:22494] naruse:r34543 (ruby_1_9_3): merge revision(s) 33937: [Backport #5704]
naruse 2012-02-11 03:24:25 +0900 (Sat, 11 Feb 2012) New Revision: 34543 http://svn.ruby-lang.org/cgi-bin/viewvc.cgi?view=rev&revision=34543 Log: merge revision(s) 33937: [Backport #5704] * ext/zlib/zlib.c (rb_gzreader_initialize): use binary mode by default under Windows. Patch by Hiroshi Shirosaki. [ruby-core:40706] [Feature #5562] * include/ruby/encoding.h (void rb_econv_binmode): define NEWLINE decorator. * io.c (rb_cloexec_fcntl_dupfd): Introduce NEED_READCONV and NEED_WRITECONV to replace universal newline decorator by CRLF only when required to improve file reading and writing under Windows. Patch by Hiroshi Shirosaki. [ruby-core:40706] [Feature #5562] * io.c (do_writeconv): adjust binary mode if required. * io.c (read_all, appendline, swallow, rb_io_getline_1): ditto. * io.c (io_getc, rb_io_each_codepoint, rb_io_ungetc): ditto. * io.c (rb_io_binmode, rb_io_ascii8bit_binmode): ditto. * io.c (rb_io_extract_modeenc, rb_sysopen): ditto. * io.c (pipe_open, prep_stdio, io_encoding_set): ditto. * io.c (rb_io_s_pipe, copy_stream_body): ditto. * test/ruby/test_io_m17n.rb (EOT): add test for pipe and stdin in binary mode. * win32/win32.c (init_stdhandle): remove O_BINARY from stdhandle initialization. * win32/win32.c (rb_w32_write): use FTEXT mode accordingly. Modified files: branches/ruby_1_9_3/ChangeLog branches/ruby_1_9_3/ext/zlib/zlib.c branches/ruby_1_9_3/include/ruby/encoding.h branches/ruby_1_9_3/io.c branches/ruby_1_9_3/test/ruby/test_io_m17n.rb branches/ruby_1_9_3/version.h branches/ruby_1_9_3/win32/win32.c Index: ruby_1_9_3/include/ruby/encoding.h =================================================================== --- ruby_1_9_3/include/ruby/encoding.h (revision 34542) +++ ruby_1_9_3/include/ruby/encoding.h (revision 34543) @@ -318,7 +318,7 @@ #define ECONV_XML_ATTR_QUOTE_DECORATOR 0x00100000 #if defined(RUBY_TEST_CRLF_ENVIRONMENT) || defined(_WIN32) -#define ECONV_DEFAULT_NEWLINE_DECORATOR ECONV_UNIVERSAL_NEWLINE_DECORATOR +#define ECONV_DEFAULT_NEWLINE_DECORATOR ECONV_CRLF_NEWLINE_DECORATOR #else #define ECONV_DEFAULT_NEWLINE_DECORATOR 0 #endif Index: ruby_1_9_3/ChangeLog =================================================================== --- ruby_1_9_3/ChangeLog (revision 34542) +++ ruby_1_9_3/ChangeLog (revision 34543) @@ -1,3 +1,31 @@ +Sat Feb 11 03:23:58 2012 Luis Lavena <luislavena@g...> + + * ext/zlib/zlib.c (rb_gzreader_initialize): use binary mode by default + under Windows. Patch by Hiroshi Shirosaki. [ruby-core:40706] + [Feature #5562] + + * include/ruby/encoding.h (void rb_econv_binmode): define NEWLINE + decorator. + + * io.c (rb_cloexec_fcntl_dupfd): Introduce NEED_READCONV and + NEED_WRITECONV to replace universal newline decorator by CRLF only + when required to improve file reading and writing under Windows. + Patch by Hiroshi Shirosaki. [ruby-core:40706] [Feature #5562] + * io.c (do_writeconv): adjust binary mode if required. + * io.c (read_all, appendline, swallow, rb_io_getline_1): ditto. + * io.c (io_getc, rb_io_each_codepoint, rb_io_ungetc): ditto. + * io.c (rb_io_binmode, rb_io_ascii8bit_binmode): ditto. + * io.c (rb_io_extract_modeenc, rb_sysopen): ditto. + * io.c (pipe_open, prep_stdio, io_encoding_set): ditto. + * io.c (rb_io_s_pipe, copy_stream_body): ditto. + + * test/ruby/test_io_m17n.rb (EOT): add test for pipe and stdin in + binary mode. + + * win32/win32.c (init_stdhandle): remove O_BINARY from stdhandle + initialization. + * win32/win32.c (rb_w32_write): use FTEXT mode accordingly. + Sat Feb 11 03:20:22 2012 NAKAMURA Usaku <usa@r...> * io.c (argf_next_argv): wrong timing of setting ecflags. Index: ruby_1_9_3/io.c =================================================================== --- ruby_1_9_3/io.c (revision 34542) +++ ruby_1_9_3/io.c (revision 34543) @@ -225,12 +225,66 @@ /* Windows */ # define DEFAULT_TEXTMODE FMODE_TEXTMODE # define TEXTMODE_NEWLINE_DECORATOR_ON_WRITE ECONV_CRLF_NEWLINE_DECORATOR +/* + * CRLF newline is set as default newline decorator. + * If only CRLF newline conversion is needed, we use binary IO process + * with OS's text mode for IO performance improvement. + * If encoding conversion is needed or a user sets text mode, we use encoding + * conversion IO process and universal newline decorator by default. + */ +#define NEED_READCONV(fptr) ((fptr)->encs.enc2 != NULL || (fptr)->encs.ecflags & ~ECONV_CRLF_NEWLINE_DECORATOR) +#define NEED_WRITECONV(fptr) (((fptr)->encs.enc != NULL && (fptr)->encs.enc != rb_ascii8bit_encoding()) || ((fptr)->encs.ecflags & ((ECONV_DECORATOR_MASK & ~ECONV_CRLF_NEWLINE_DECORATOR)|ECONV_STATEFUL_DECORATOR_MASK))) +#define SET_BINARY_MODE(fptr) setmode((fptr)->fd, O_BINARY) + +#define NEED_NEWLINE_DECORATOR_ON_READ_CHECK(fptr) do {\ + if (NEED_NEWLINE_DECORATOR_ON_READ(fptr)) {\ + if (((fptr)->mode & FMODE_READABLE) &&\ + !((fptr)->encs.ecflags & ECONV_NEWLINE_DECORATOR_MASK)) {\ + setmode((fptr)->fd, O_BINARY);\ + }\ + else {\ + setmode((fptr)->fd, O_TEXT);\ + }\ + }\ +} while(0) + +#define SET_UNIVERSAL_NEWLINE_DECORATOR_IF_ENC2(enc2, ecflags) do {\ + if ((enc2) && ((ecflags) & ECONV_DEFAULT_NEWLINE_DECORATOR)) {\ + (ecflags) |= ECONV_UNIVERSAL_NEWLINE_DECORATOR;\ + }\ +} while(0) +/* + * We use io_seek to back cursor position when changing mode from text to binary, + * but stdin and pipe cannot seek back. Stdin and pipe read should use encoding + * conversion for working properly with mode change. + */ +#define SET_BINARY_MODE_WITH_SEEK_CUR(fptr) do {\ + if ((fptr)->rbuf.len > 0 && !((fptr)->mode & FMODE_DUPLEX)) {\ + off_t r;\ + errno = 0;\ + r = io_seek((fptr), -(fptr)->rbuf.len, SEEK_CUR);\ + if (r < 0 && errno) {\ + if (errno == ESPIPE)\ + (fptr)->mode |= FMODE_DUPLEX;\ + }\ + else {\ + (fptr)->rbuf.off = 0;\ + (fptr)->rbuf.len = 0;\ + }\ + }\ + setmode((fptr)->fd, O_BINARY);\ +} while(0) + #else /* Unix */ # define DEFAULT_TEXTMODE 0 -#endif #define NEED_READCONV(fptr) ((fptr)->encs.enc2 != NULL || NEED_NEWLINE_DECORATOR_ON_READ(fptr)) #define NEED_WRITECONV(fptr) (((fptr)->encs.enc != NULL && (fptr)->encs.enc != rb_ascii8bit_encoding()) || NEED_NEWLINE_DECORATOR_ON_WRITE(fptr) || ((fptr)->encs.ecflags & (ECONV_DECORATOR_MASK|ECONV_STATEFUL_DECORATOR_MASK))) +#define SET_BINARY_MODE(fptr) 0 +#define NEED_NEWLINE_DECORATOR_ON_READ_CHECK(fptr) 0 +#define SET_UNIVERSAL_NEWLINE_DECORATOR_IF_ENC2(enc2, ecflags) 0 +#define SET_BINARY_MODE_WITH_SEEK_CUR(fptr) 0 +#endif #if !defined HAVE_SHUTDOWN && !defined shutdown #define shutdown(a,b) 0 @@ -896,6 +950,7 @@ { if (NEED_WRITECONV(fptr)) { VALUE common_encoding = Qnil; + SET_BINARY_MODE(fptr); make_writeconv(fptr); @@ -925,6 +980,20 @@ str = rb_econv_str_convert(fptr->writeconv, str, ECONV_PARTIAL_INPUT); } } +#if defined(RUBY_TEST_CRLF_ENVIRONMENT) || defined(_WIN32) +#define fmode (fptr->mode) + else if (MODE_BTMODE(DEFAULT_TEXTMODE,0,1)) { + if ((fptr->mode & FMODE_READABLE) && + !(fptr->encs.ecflags & ECONV_NEWLINE_DECORATOR_MASK)) { + setmode(fptr->fd, O_BINARY); + } + if (!rb_enc_asciicompat(rb_enc_get(str))) { + rb_raise(rb_eArgError, "ASCII incompatible string written for text mode IO without encoding conversion: %s", + rb_enc_name(rb_enc_get(str))); + } + } +#undef fmode +#endif return str; } @@ -1834,6 +1903,7 @@ int cr; if (NEED_READCONV(fptr)) { + SET_BINARY_MODE(fptr); io_setstrbuf(&str,0); make_readconv(fptr, 0); while (1) { @@ -1855,6 +1925,7 @@ } } + NEED_NEWLINE_DECORATOR_ON_READ_CHECK(fptr); bytes = 0; pos = 0; @@ -2277,6 +2348,7 @@ long limit = *lp; if (NEED_READCONV(fptr)) { + SET_BINARY_MODE(fptr); make_readconv(fptr, 0); do { const char *p, *e; @@ -2319,6 +2391,7 @@ return EOF; } + NEED_NEWLINE_DECORATOR_ON_READ_CHECK(fptr); do { long pending = READ_DATA_PENDING_COUNT(fptr); if (pending > 0) { @@ -2357,6 +2430,7 @@ if (NEED_READCONV(fptr)) { rb_encoding *enc = io_read_encoding(fptr); int needconv = rb_enc_mbminlen(enc) != 1; + SET_BINARY_MODE(fptr); make_readconv(fptr, 0); do { size_t cnt; @@ -2380,6 +2454,7 @@ return FALSE; } + NEED_NEWLINE_DECORATOR_ON_READ_CHECK(fptr); do { size_t cnt; while ((cnt = READ_DATA_PENDING_COUNT(fptr)) > 0) { @@ -2516,6 +2591,7 @@ } else if (rs == rb_default_rs && limit < 0 && !NEED_READCONV(fptr) && rb_enc_asciicompat(enc = io_read_encoding(fptr))) { + NEED_NEWLINE_DECORATOR_ON_READ_CHECK(fptr); return rb_io_getline_fast(fptr, enc, io); } else { @@ -2525,6 +2601,7 @@ int rspara = 0; int extra_limit = 16; + SET_BINARY_MODE(fptr); enc = io_read_encoding(fptr); if (!NIL_P(rs)) { @@ -2874,6 +2951,7 @@ VALUE str = Qnil; rb_encoding *read_enc = io_read_encoding(fptr); + SET_BINARY_MODE(fptr); make_readconv(fptr, 0); while (1) { @@ -2918,6 +2996,7 @@ return str; } + NEED_NEWLINE_DECORATOR_ON_READ_CHECK(fptr); if (io_fillbuf(fptr) < 0) { return Qnil; } @@ -3032,6 +3111,7 @@ READ_CHECK(fptr); if (NEED_READCONV(fptr)) { + SET_BINARY_MODE(fptr); for (;;) { make_readconv(fptr, 0); for (;;) { @@ -3072,6 +3152,7 @@ rb_yield(UINT2NUM(c)); } } + NEED_NEWLINE_DECORATOR_ON_READ_CHECK(fptr); enc = io_input_encoding(fptr); for (;;) { if (io_fillbuf(fptr) < 0) { @@ -3275,6 +3356,7 @@ SafeStringValue(c); } if (NEED_READCONV(fptr)) { + SET_BINARY_MODE(fptr); len = RSTRING_LEN(c); #if SIZEOF_LONG > SIZEOF_INT if (len > INT_MAX) @@ -3294,6 +3376,7 @@ MEMMOVE(fptr->cbuf.ptr+fptr->cbuf.off, RSTRING_PTR(c), char, len); } else { + NEED_NEWLINE_DECORATOR_ON_READ_CHECK(fptr); io_ungetbyte(c, fptr); } return Qnil; @@ -4002,6 +4085,14 @@ fptr->mode |= FMODE_BINMODE; fptr->mode &= ~FMODE_TEXTMODE; fptr->writeconv_pre_ecflags &= ~ECONV_NEWLINE_DECORATOR_MASK; +#ifdef O_BINARY + if (!fptr->readconv) { + SET_BINARY_MODE_WITH_SEEK_CUR(fptr); + } + else { + setmode(fptr->fd, O_BINARY); + } +#endif return io; } @@ -4021,6 +4112,7 @@ } fptr->mode |= FMODE_BINMODE; fptr->mode &= ~FMODE_TEXTMODE; + SET_BINARY_MODE_WITH_SEEK_CUR(fptr); fptr->encs.enc = rb_ascii8bit_encoding(); fptr->encs.enc2 = NULL; @@ -4543,6 +4635,7 @@ MODE_BTMODE(TEXTMODE_NEWLINE_DECORATOR_ON_WRITE, 0, TEXTMODE_NEWLINE_DECORATOR_ON_WRITE) : 0; #endif + SET_UNIVERSAL_NEWLINE_DECORATOR_IF_ENC2(enc2, ecflags); ecopts = Qnil; } else { @@ -4583,13 +4676,14 @@ MODE_BTMODE(TEXTMODE_NEWLINE_DECORATOR_ON_WRITE, 0, TEXTMODE_NEWLINE_DECORATOR_ON_WRITE) : 0; #endif - ecflags = rb_econv_prepare_options(opthash, &ecopts, ecflags); if (rb_io_extract_encoding_option(opthash, &enc, &enc2, &fmode)) { if (has_enc) { rb_raise(rb_eArgError, "encoding specified twice"); } } + SET_UNIVERSAL_NEWLINE_DECORATOR_IF_ENC2(enc2, ecflags); + ecflags = rb_econv_prepare_options(opthash, &ecopts, ecflags); } validate_enc_binmode(&fmode, ecflags, enc, enc2); @@ -4634,9 +4728,6 @@ int fd; struct sysopen_struct data; -#ifdef O_BINARY - oflags |= O_BINARY; -#endif data.fname = rb_str_encode_ospath(fname); data.oflags = oflags; data.perm = perm; @@ -5271,6 +5362,11 @@ fptr->mode = fmode | FMODE_SYNC|FMODE_DUPLEX; if (convconfig) { fptr->encs = *convconfig; +#if defined(RUBY_TEST_CRLF_ENVIRONMENT) || defined(_WIN32) + if (fptr->encs.ecflags & ECONV_DEFAULT_NEWLINE_DECORATOR) { + fptr->encs.ecflags |= ECONV_UNIVERSAL_NEWLINE_DECORATOR; + } +#endif } else { if (NEED_NEWLINE_DECORATOR_ON_READ(fptr)) { @@ -6450,6 +6546,9 @@ fptr->encs.ecflags |= ECONV_DEFAULT_NEWLINE_DECORATOR; #ifdef TEXTMODE_NEWLINE_DECORATOR_ON_WRITE fptr->encs.ecflags |= TEXTMODE_NEWLINE_DECORATOR_ON_WRITE; + if (fmode & FMODE_READABLE) { + fptr->encs.ecflags |= ECONV_UNIVERSAL_NEWLINE_DECORATOR; + } #endif fptr->stdio_file = f; @@ -8076,22 +8175,26 @@ } else enc = rb_to_encoding(v2); + SET_UNIVERSAL_NEWLINE_DECORATOR_IF_ENC2(enc2, ecflags); ecflags = rb_econv_prepare_options(opt, &ecopts, ecflags); } else { if (NIL_P(v1)) { /* Set to default encodings */ rb_io_ext_int_to_encs(NULL, NULL, &enc, &enc2); + SET_UNIVERSAL_NEWLINE_DECORATOR_IF_ENC2(enc2, ecflags); ecopts = Qnil; } else { tmp = rb_check_string_type(v1); if (!NIL_P(tmp) && rb_enc_asciicompat(rb_enc_get(tmp))) { parse_mode_enc(RSTRING_PTR(tmp), &enc, &enc2, NULL); + SET_UNIVERSAL_NEWLINE_DECORATOR_IF_ENC2(enc2, ecflags); ecflags = rb_econv_prepare_options(opt, &ecopts, ecflags); } else { rb_io_ext_int_to_encs(rb_to_encoding(v1), NULL, &enc, &enc2); + SET_UNIVERSAL_NEWLINE_DECORATOR_IF_ENC2(enc2, ecflags); ecopts = Qnil; } } @@ -8210,13 +8313,22 @@ extract_binmode(opt, &fmode); #if DEFAULT_TEXTMODE - if ((fptr->mode & FMODE_TEXTMODE) && (fmode & FMODE_BINMODE)) + if ((fptr->mode & FMODE_TEXTMODE) && (fmode & FMODE_BINMODE)) { fptr->mode &= ~FMODE_TEXTMODE; + setmode(fptr->fd, O_BINARY); + } +#if defined(RUBY_TEST_CRLF_ENVIRONMENT) || defined(_WIN32) + if (fptr->encs.ecflags & ECONV_DEFAULT_NEWLINE_DECORATOR) { + fptr->encs.ecflags |= ECONV_UNIVERSAL_NEWLINE_DECORATOR; + } #endif +#endif fptr->mode |= fmode; #if DEFAULT_TEXTMODE - if ((fptr2->mode & FMODE_TEXTMODE) && (fmode & FMODE_BINMODE)) + if ((fptr2->mode & FMODE_TEXTMODE) && (fmode & FMODE_BINMODE)) { fptr2->mode &= ~FMODE_TEXTMODE; + setmode(fptr2->fd, O_BINARY); + } #endif fptr2->mode |= fmode; @@ -9140,6 +9252,13 @@ } stp->dst_fd = dst_fd; +#ifdef O_BINARY + if (src_fptr) + SET_BINARY_MODE_WITH_SEEK_CUR(src_fptr); + if (dst_fptr) + setmode(dst_fd, O_BINARY); +#endif + if (stp->src_offset == (off_t)-1 && src_fptr && src_fptr->rbuf.len) { size_t len = src_fptr->rbuf.len; VALUE str; Index: ruby_1_9_3/win32/win32.c =================================================================== --- ruby_1_9_3/win32/win32.c (revision 34542) +++ ruby_1_9_3/win32/win32.c (revision 34543) @@ -2128,28 +2128,19 @@ int keep = 0; #define open_null(fd) \ (((nullfd < 0) ? \ - (nullfd = open("NUL", O_RDWR|O_BINARY)) : 0), \ + (nullfd = open("NUL", O_RDWR)) : 0), \ ((nullfd == (fd)) ? (keep = 1) : dup2(nullfd, fd)), \ (fd)) if (fileno(stdin) < 0) { stdin->_file = open_null(0); } - else { - setmode(fileno(stdin), O_BINARY); - } if (fileno(stdout) < 0) { stdout->_file = open_null(1); } - else { - setmode(fileno(stdout), O_BINARY); - } if (fileno(stderr) < 0) { stderr->_file = open_null(2); } - else { - setmode(fileno(stderr), O_BINARY); - } if (nullfd >= 0 && !keep) close(nullfd); setvbuf(stderr, NULL, _IONBF, 0); } @@ -5321,7 +5312,8 @@ return -1; } - if (_osfile(fd) & FTEXT) { + if ((_osfile(fd) & FTEXT) && + (!(_osfile(fd) & FPIPE) || fd == fileno(stdout) || fd == fileno(stderr))) { return _write(fd, buf, size); } Index: ruby_1_9_3/ext/zlib/zlib.c =================================================================== --- ruby_1_9_3/ext/zlib/zlib.c (revision 34542) +++ ruby_1_9_3/ext/zlib/zlib.c (revision 34543) @@ -10,6 +10,7 @@ #include <zlib.h> #include <time.h> #include <ruby/io.h> +#include <fcntl.h> #ifdef HAVE_VALGRIND_MEMCHECK_H # include <valgrind/memcheck.h> @@ -3412,6 +3413,13 @@ Data_Get_Struct(obj, struct gzfile, gz); rb_scan_args(argc, argv, "1:", &io, &opt); +#ifdef O_BINARY + if (BUILTIN_TYPE(io) == T_FILE) { + rb_io_t *fptr; + GetOpenFile(io, fptr); + setmode(fptr->fd, O_BINARY); + } +#endif /* this is undocumented feature of zlib */ err = inflateInit2(&gz->z.stream, -MAX_WBITS); Index: ruby_1_9_3/version.h =================================================================== --- ruby_1_9_3/version.h (revision 34542) +++ ruby_1_9_3/version.h (revision 34543) @@ -1,5 +1,5 @@ #define RUBY_VERSION "1.9.3" -#define RUBY_PATCHLEVEL 88 +#define RUBY_PATCHLEVEL 89 #define RUBY_RELEASE_DATE "2012-02-11" #define RUBY_RELEASE_YEAR 2012 Index: ruby_1_9_3/test/ruby/test_io_m17n.rb =================================================================== --- ruby_1_9_3/test/ruby/test_io_m17n.rb (revision 34542) +++ ruby_1_9_3/test/ruby/test_io_m17n.rb (revision 34543) @@ -2174,4 +2174,52 @@ end end end + + def test_binmode_with_pipe + with_pipe do |r, w| + src = "a\r\nb\r\nc\r\n" + w.binmode.write src + w.close + + assert_equal("a", r.getc) + assert_equal("\n", r.getc) + r.binmode + assert_equal("b", r.getc) + assert_equal("\r", r.getc) + assert_equal("\n", r.getc) + assert_equal("c", r.getc) + assert_equal("\r", r.getc) + assert_equal("\n", r.getc) + assert_equal(nil, r.getc) + r.close + end + end if /mswin|mingw/ =~ RUBY_PLATFORM + + def test_stdin_binmode + with_pipe do |in_r, in_w| + with_pipe do |out_r, out_w| + pid = Process.spawn({}, EnvUtil.rubybin, '-e', <<-'End', in: in_r, out: out_w) + STDOUT.binmode + STDOUT.write STDIN.getc + STDOUT.write STDIN.getc + STDIN.binmode + STDOUT.write STDIN.getc + STDOUT.write STDIN.getc + STDOUT.write STDIN.getc + STDOUT.write STDIN.getc + STDOUT.write STDIN.getc + STDOUT.write STDIN.getc + STDOUT.write STDIN.getc + End + in_r.close + out_w.close + src = "a\r\nb\r\nc\r\n" + in_w.binmode.write src + in_w.close + Process.wait pid + assert_equal "a\nb\r\nc\r\n", out_r.binmode.read + out_r.close + end + end + end if /mswin|mingw/ =~ RUBY_PLATFORM end -- ML: ruby-changes@q... Info: http://www.atdot.net/~ko1/quickml/