ruby-changes:24303
From: drbrain <ko1@a...>
Date: Tue, 10 Jul 2012 12:51:35 +0900 (JST)
Subject: [ruby-changes:24303] drbrain:r36354 (trunk): * ext/zlib/zlib.c: Revert r36349. Added streaming support to inflate
drbrain 2012-07-10 12:51:25 +0900 (Tue, 10 Jul 2012) New Revision: 36354 http://svn.ruby-lang.org/cgi-bin/viewvc.cgi?view=rev&revision=36354 Log: * ext/zlib/zlib.c: Revert r36349. Added streaming support to inflate processing. rb_block_given_p() is not callable without the GVL. * ext/zlib/extconf.rb: ditto * NEWS: ditto * test/zlib/test_zlib.rb: ditto Modified files: trunk/ChangeLog trunk/NEWS trunk/ext/zlib/extconf.rb trunk/ext/zlib/zlib.c trunk/test/zlib/test_zlib.rb Index: ChangeLog =================================================================== --- ChangeLog (revision 36353) +++ ChangeLog (revision 36354) @@ -6,27 +6,6 @@ * configure.in: removed --enable/disable-win95 options. (see r36432) -Tue Jul 10 08:56:17 2012 Eric Hodel <drbrain@s...> - - * ext/zlib/zlib.c: Added streaming support to inflate processing. - This allows zlib streams to be processed without huge memory growth. - [Feature #6612] - * NEWS: ditto - * ext/zlib/zlib.c (zstream_expand_buffer): Uses rb_yield when a block - is given for streaming support. Refactored to use - zstream_expand_buffer_into to remove duplicate code. - * ext/zlib/zlib.c (zstream_expand_buffer_protect): Added wrapper - function to pass jump state back through GVL-free section to allow - zstream clean-up before terminating the ruby call. - * ext/zlib/zlib.c (zstream_expand_buffer_without_gvl): Acquire GVL to - yield processed chunk of output stream. - * ext/zlib/zlib.c (zstream_detach_buffer): When a block is given, - returns Qnil mid-stream and yields the output buffer at the end of - the stream. - * ext/zlib/extconf.rb: Update INCFLAGS to find internal.h for - rb_thread_call_with_gvl - * test/zlib/test_zlib.rb: Added tests for streaming processing - Tue Jul 10 00:44:41 2012 KOSAKI Motohiro <kosaki.motohiro@g...> * include/ruby/ruby.h: Removed RUBY_GLOBAL_SETUP complely. It is Index: ext/zlib/zlib.c =================================================================== --- ext/zlib/zlib.c (revision 36353) +++ ext/zlib/zlib.c (revision 36354) @@ -10,7 +10,6 @@ #include <zlib.h> #include <time.h> #include <ruby/io.h> -#include "internal.h" #ifdef HAVE_VALGRIND_MEMCHECK_H # include <valgrind/memcheck.h> @@ -545,18 +544,13 @@ #define ZSTREAM_FLAG_IN_STREAM 0x2 #define ZSTREAM_FLAG_FINISHED 0x4 #define ZSTREAM_FLAG_CLOSING 0x8 -#define ZSTREAM_FLAG_GZFILE 0x10 /* disallows yield from expand_buffer for - gzip*/ -#define ZSTREAM_FLAG_UNUSED 0x20 +#define ZSTREAM_FLAG_UNUSED 0x10 #define ZSTREAM_READY(z) ((z)->flags |= ZSTREAM_FLAG_READY) #define ZSTREAM_IS_READY(z) ((z)->flags & ZSTREAM_FLAG_READY) #define ZSTREAM_IS_FINISHED(z) ((z)->flags & ZSTREAM_FLAG_FINISHED) #define ZSTREAM_IS_CLOSING(z) ((z)->flags & ZSTREAM_FLAG_CLOSING) -#define ZSTREAM_IS_GZFILE(z) ((z)->flags & ZSTREAM_FLAG_GZFILE) -#define ZSTREAM_EXPAND_BUFFER_OK 0 - /* I think that more better value should be found, but I gave up finding it. B) */ #define ZSTREAM_INITIAL_BUFSIZE 1024 @@ -574,9 +568,8 @@ struct zstream_run_args { struct zstream * z; - int flush; /* stream flush value for inflate() or deflate() */ - int interrupt; /* stop processing the stream and return to ruby */ - int jump_state; /* for buffer expansion block break or exception */ + int flush; + int interrupt; }; static voidpf @@ -621,50 +614,33 @@ static void zstream_expand_buffer(struct zstream *z) { + long inc; + if (NIL_P(z->buf)) { - zstream_expand_buffer_into(z, ZSTREAM_INITIAL_BUFSIZE); + /* I uses rb_str_new here not rb_str_buf_new because + rb_str_buf_new makes a zero-length string. */ + z->buf = rb_str_new(0, ZSTREAM_INITIAL_BUFSIZE); + z->buf_filled = 0; + z->stream.next_out = (Bytef*)RSTRING_PTR(z->buf); + z->stream.avail_out = ZSTREAM_INITIAL_BUFSIZE; + RBASIC(z->buf)->klass = 0; return; } - if (!ZSTREAM_IS_GZFILE(z) && rb_block_given_p()) { - if (z->buf_filled >= ZSTREAM_AVAIL_OUT_STEP_MAX) { - int state = 0; - VALUE self = (VALUE)z->stream.opaque; - - rb_str_resize(z->buf, z->buf_filled); - RBASIC(z->buf)->klass = rb_cString; - OBJ_INFECT(z->buf, self); - - rb_protect(rb_yield, z->buf, &state); - - z->buf = Qnil; - zstream_expand_buffer_into(z, ZSTREAM_AVAIL_OUT_STEP_MAX); - - if (state) - rb_jump_tag(state); - - return; - } - else { - zstream_expand_buffer_into(z, - ZSTREAM_AVAIL_OUT_STEP_MAX - z->buf_filled); - } + if (RSTRING_LEN(z->buf) - z->buf_filled >= ZSTREAM_AVAIL_OUT_STEP_MAX) { + /* to keep other threads from freezing */ + z->stream.avail_out = ZSTREAM_AVAIL_OUT_STEP_MAX; } else { - if (RSTRING_LEN(z->buf) - z->buf_filled >= ZSTREAM_AVAIL_OUT_STEP_MAX) { - z->stream.avail_out = ZSTREAM_AVAIL_OUT_STEP_MAX; + inc = z->buf_filled / 2; + if (inc < ZSTREAM_AVAIL_OUT_STEP_MIN) { + inc = ZSTREAM_AVAIL_OUT_STEP_MIN; } - else { - long inc = z->buf_filled / 2; - if (inc < ZSTREAM_AVAIL_OUT_STEP_MIN) { - inc = ZSTREAM_AVAIL_OUT_STEP_MIN; - } - rb_str_resize(z->buf, z->buf_filled + inc); - z->stream.avail_out = (inc < ZSTREAM_AVAIL_OUT_STEP_MAX) ? - (int)inc : ZSTREAM_AVAIL_OUT_STEP_MAX; - } - z->stream.next_out = (Bytef*)RSTRING_PTR(z->buf) + z->buf_filled; + rb_str_resize(z->buf, z->buf_filled + inc); + z->stream.avail_out = (inc < ZSTREAM_AVAIL_OUT_STEP_MAX) ? + (int)inc : ZSTREAM_AVAIL_OUT_STEP_MAX; } + z->stream.next_out = (Bytef*)RSTRING_PTR(z->buf) + z->buf_filled; } static void @@ -686,27 +662,13 @@ } } -static void * -zstream_expand_buffer_protect(void *ptr) -{ - struct zstream *z = (struct zstream *)ptr; - int state = 0; - - rb_protect((VALUE (*)(VALUE))zstream_expand_buffer, (VALUE)z, &state); - - return (void *)state; -} - static int zstream_expand_buffer_without_gvl(struct zstream *z) { char * new_str; long inc, len; - if (rb_block_given_p()) { - return (int)rb_thread_call_with_gvl(zstream_expand_buffer_protect, (void *)z); - } - else if (RSTRING_LEN(z->buf) - z->buf_filled >= ZSTREAM_AVAIL_OUT_STEP_MAX) { + if (RSTRING_LEN(z->buf) - z->buf_filled >= ZSTREAM_AVAIL_OUT_STEP_MAX) { z->stream.avail_out = ZSTREAM_AVAIL_OUT_STEP_MAX; } else { @@ -719,6 +681,9 @@ new_str = ruby_xrealloc(RSTRING(z->buf)->as.heap.ptr, len + 1); + if (!new_str) + return 0; + /* from rb_str_resize */ RSTRING(z->buf)->as.heap.ptr = new_str; RSTRING(z->buf)->as.heap.ptr[len] = '\0'; /* sentinel */ @@ -730,7 +695,7 @@ } z->stream.next_out = (Bytef*)RSTRING_PTR(z->buf) + z->buf_filled; - return ZSTREAM_EXPAND_BUFFER_OK; + return 1; } static void @@ -771,13 +736,6 @@ { VALUE dst, self = (VALUE)z->stream.opaque; - if (!ZSTREAM_IS_FINISHED(z) && !ZSTREAM_IS_GZFILE(z) && - rb_block_given_p()) { - /* prevent tiny yields mid-stream, save for next - * zstream_expand_buffer() or stream end */ - return Qnil; - } - if (NIL_P(z->buf)) { dst = rb_str_new(0, 0); } @@ -793,12 +751,6 @@ z->buf_filled = 0; z->stream.next_out = 0; z->stream.avail_out = 0; - - if (!ZSTREAM_IS_GZFILE(z) && rb_block_given_p()) { - rb_yield(dst); - dst = Qnil; - } - return dst; } @@ -968,7 +920,7 @@ zstream_run_func(void *ptr) { struct zstream_run_args *args = (struct zstream_run_args *)ptr; - int err, state, flush = args->flush; + int err, flush = args->flush; struct zstream *z = args->z; uInt n; @@ -992,11 +944,8 @@ break; } - state = zstream_expand_buffer_without_gvl(z); - - if (state) { - err = Z_OK; /* buffer expanded but stream processing was stopped */ - args->jump_state = state; + if (!zstream_expand_buffer_without_gvl(z)) { + err = Z_MEM_ERROR; /* realloc failed */ break; } } @@ -1025,7 +974,6 @@ args.z = z; args.flush = flush; args.interrupt = 0; - args.jump_state = 0; if (NIL_P(z->input) && len == 0) { z->stream.next_in = (Bytef*)""; @@ -1077,9 +1025,6 @@ zstream_append_input(z, z->stream.next_in, z->stream.avail_in); guard = Qnil; /* prevent tail call to make guard effective */ } - - if (args.jump_state) - rb_jump_tag(args.jump_state); } static VALUE @@ -1262,13 +1207,8 @@ } /* - * call-seq: - * finish -> String - * finish { |chunk| ... } -> nil - * - * Finishes the stream and flushes output buffer. If a block is given each - * chunk is yielded to the block until the input buffer has been flushed to - * the output buffer. + * Finishes the stream and flushes output buffer. See Zlib::Deflate#finish and + * Zlib::Inflate#finish for details of this behavior. */ static VALUE rb_zstream_finish(VALUE obj) @@ -1281,13 +1221,7 @@ } /* - * call-seq: - * flush_next_out -> String - * flush_next_out { |chunk| ... } -> nil - * - * Flushes output buffer and returns all data in that buffer. If a block is - * given each chunk is yielded to the block until the current output buffer - * has been flushed. + * Flushes input buffer and returns all data in that buffer. */ static VALUE rb_zstream_flush_next_in(VALUE obj) @@ -1569,13 +1503,13 @@ /* * Document-method: Zlib::Deflate.deflate * - * call-seq: - * Zlib.deflate(string[, level]) - * Zlib::Deflate.deflate(string[, level]) + * call-seq: Zlib.deflate(string[, level]) + * Zlib::Deflate.deflate(string[, level]) * * Compresses the given +string+. Valid values of level are - * Zlib::NO_COMPRESSION, Zlib::BEST_SPEED, Zlib::BEST_COMPRESSION, - * Zlib::DEFAULT_COMPRESSION, or an integer from 0 to 9 (the default is 6). + * <tt>NO_COMPRESSION</tt>, <tt>BEST_SPEED</tt>, + * <tt>BEST_COMPRESSION</tt>, <tt>DEFAULT_COMPRESSION</tt>, and an + * integer from 0 to 9 (the default is 6). * * This method is almost equivalent to the following code: * @@ -1629,19 +1563,17 @@ } /* - * Document-method: Zlib::Deflate#deflate + * Document-method: Zlib#deflate * * call-seq: - * z.deflate(string, flush = Zlib::NO_FLUSH) -> String - * z.deflate(string, flush = Zlib::NO_FLUSH) { |chunk| ... } -> nil + * deflate(string, flush = Zlib::NO_FLUSH) * * Inputs +string+ into the deflate stream and returns the output from the * stream. On calling this method, both the input and the output buffers of - * the stream are flushed. If +string+ is nil, this method finishes the - * stream, just like Zlib::ZStream#finish. + * the stream are flushed. * - * If a block is given consecutive deflated chunks from the +string+ are - * yielded to the block and +nil+ is returned. + * If +string+ is nil, this method finishes the stream, just like + * Zlib::ZStream#finish. * * The +flush+ parameter specifies the flush mode. The following constants * may be used: @@ -1688,13 +1620,10 @@ * Document-method: Zlib::Deflate#flush * * call-seq: - * flush(flush = Zlib::SYNC_FLUSH) -> String - * flush(flush = Zlib::SYNC_FLUSH) { |chunk| ... } -> nil + * flush(flush = Zlib::SYNC_FLUSH) * * This method is equivalent to <tt>deflate('', flush)</tt>. This method is - * just provided to improve the readability of your Ruby program. If a block - * is given chunks of deflate output are yielded to the block until the buffer - * is flushed. + * just provided to improve the readability of your Ruby program. * * See Zlib::Deflate#deflate for detail on the +flush+ constants NO_FLUSH, * SYNC_FLUSH, FULL_FLUSH and FINISH. @@ -1882,11 +1811,9 @@ } /* - * Document-method: Zlib::inflate + * Document-method: Zlib::Inflate.inflate * - * call-seq: - * Zlib.inflate(string) - * Zlib::Inflate.inflate(string) + * call-seq: Zlib::Inflate.inflate(string) * * Decompresses +string+. Raises a Zlib::NeedDict exception if a preset * dictionary is needed for decompression. @@ -1962,18 +1889,13 @@ /* * Document-method: Zlib::Inflate#inflate * - * call-seq: - * inflate(deflate_string) -> String - * inflate(deflate_string) { |chunk| ... } -> nil + * call-seq: inflate(string) * - * Inputs +deflate_string+ into the inflate stream and returns the output from - * the stream. Calling this method, both the input and the output buffer of - * the stream are flushed. If string is +nil+, this method finishes the - * stream, just like Zlib::ZStream#finish. + * Inputs +string+ into the inflate stream and returns the output from the + * stream. Calling this method, both the input and the output buffer of the + * stream are flushed. If string is +nil+, this method finishes the stream, + * just like Zlib::ZStream#finish. * - * If a block is given consecutive inflated chunks from the +deflate_string+ - * are yielded to the block and +nil+ is returned. - * * Raises a Zlib::NeedDict exception if a preset dictionary is needed to * decompress. Set the dictionary by Zlib::Inflate#set_dictionary and then * call this method again with an empty string to flush the stream: @@ -2246,7 +2168,6 @@ obj = Data_Make_Struct(klass, struct gzfile, gzfile_mark, gzfile_free, gz); zstream_init(&gz->z, funcs); - gz->z.flags |= ZSTREAM_FLAG_GZFILE; gz->io = Qnil; gz->level = 0; gz->mtime = 0; Index: ext/zlib/extconf.rb =================================================================== --- ext/zlib/extconf.rb (revision 36353) +++ ext/zlib/extconf.rb (revision 36354) @@ -7,10 +7,9 @@ require 'mkmf' require 'rbconfig' -$INCFLAGS << " -I$(topdir) -I$(top_srcdir)" - dir_config 'zlib' + if %w'z libz zlib1 zlib zdll'.find {|z| have_library(z, 'deflateReset')} and have_header('zlib.h') then Index: NEWS =================================================================== --- NEWS (revision 36353) +++ NEWS (revision 36354) @@ -123,8 +123,6 @@ accessible from other users. * zlib - * Added streaming support for Zlib::Inflate and Zlib::Deflate. This allows - processing of a stream without the use of large amounts of memory. * Added support for the new deflate strategies Zlib::RLE and Zlib::FIXED. * Zlib streams are now processed without the GVL. This allows gzip, zlib and deflate streams to be processed in parallel. Index: test/zlib/test_zlib.rb =================================================================== --- test/zlib/test_zlib.rb (revision 36353) +++ test/zlib/test_zlib.rb (revision 36354) @@ -39,62 +39,6 @@ assert_raise(Zlib::StreamError) { Zlib::Deflate.deflate("foo", 10000) } end - def test_deflate_chunked - original = '' - chunks = [] - r = Random.new 0 - - z = Zlib::Deflate.new - - 2.times do - input = r.bytes(20000) - original << input - z.deflate(input) do |chunk| - chunks << chunk - end - end - - assert_equal [16384, 16384], - chunks.map { |chunk| chunk.length } - - final = z.finish - - assert_equal 7253, final.length - - chunks << final - all = chunks.join - - inflated = Zlib.inflate all - - assert_equal original, inflated - end - - def test_deflate_chunked_break - chunks = [] - r = Random.new 0 - - z = Zlib::Deflate.new - - input = r.bytes(20000) - z.deflate(input) do |chunk| - chunks << chunk - break - end - - assert_equal [16384], chunks.map { |chunk| chunk.length } - - final = z.finish - - assert_equal 3632, final.length - - all = chunks.join - all << final - - original = Zlib.inflate all - - assert_equal input, original - end - def test_addstr z = Zlib::Deflate.new z << "foo" @@ -258,38 +202,6 @@ assert_equal "foofoofoo", out end - def test_finish_chunked - # zeros = Zlib::Deflate.deflate("0" * 100_000) - zeros = "x\234\355\3011\001\000\000\000\302\240J\353\237\316\032\036@" \ - "\001\000\000\000\000\000\000\000\000\000\000\000\000\000\000" \ - "\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000" \ - "\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000" \ - "\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000" \ - "\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000" \ - "\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000" \ - "\000\000\000\000\000\000\000\257\006\351\247BH" - - chunks = [] - - z = Zlib::Inflate.new - - z.inflate(zeros) do |chunk| - chunks << chunk - break - end - - z.finish do |chunk| - chunks << chunk - end - - assert_equal [16384, 16384, 16384, 16384, 16384, 16384, 1696], - chunks.map { |chunk| chunk.size } - - assert chunks.all? { |chunk| - chunk =~ /\A0+\z/ - } - end - def test_inflate s = Zlib::Deflate.deflate("foo") z = Zlib::Inflate.new @@ -319,58 +231,6 @@ assert_equal "\0", inflated end - def test_inflate_chunked - # s = Zlib::Deflate.deflate("0" * 100_000) - zeros = "x\234\355\3011\001\000\000\000\302\240J\353\237\316\032\036@" \ - "\001\000\000\000\000\000\000\000\000\000\000\000\000\000\000" \ - "\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000" \ - "\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000" \ - "\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000" \ - "\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000" \ - "\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000" \ - "\000\000\000\000\000\000\000\257\006\351\247BH" - - chunks = [] - - z = Zlib::Inflate.new - - z.inflate(zeros) do |chunk| - chunks << chunk - end - - assert_equal [16384, 16384, 16384, 16384, 16384, 16384, 1696], - chunks.map { |chunk| chunk.size } - - assert chunks.all? { |chunk| - chunk =~ /\A0+\z/ - } - end - - def test_inflate_chunked_break - # zeros = Zlib::Deflate.deflate("0" * 100_000) - zeros = "x\234\355\3011\001\000\000\000\302\240J\353\237\316\032\036@" \ - "\001\000\000\000\000\000\000\000\000\000\000\000\000\000\000" \ - "\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000" \ - "\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000" \ - "\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000" \ - "\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000" \ - "\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000" \ - "\000\000\000\000\000\000\000\257\006\351\247BH" - - chunks = [] - - z = Zlib::Inflate.new - - z.inflate(zeros) do |chunk| - chunks << chunk - break - end - - out = z.inflate nil - - assert_equal 100_000 - chunks.first.length, out.length - end - def test_inflate_dictionary dictionary = "foo" @@ -1036,18 +896,5 @@ def test_deflate TestZlibDeflate.new(__name__).test_deflate end - - def test_deflate_stream - r = Random.new 0 - - deflated = '' - - Zlib.deflate(r.bytes(20000)) do |chunk| - deflated << chunk - end - - assert_equal 20016, deflated.length - end - end end -- ML: ruby-changes@q... Info: http://www.atdot.net/~ko1/quickml/