[前][次][番号順一覧][スレッド一覧]

ruby-changes:24303

From: drbrain <ko1@a...>
Date: Tue, 10 Jul 2012 12:51:35 +0900 (JST)
Subject: [ruby-changes:24303] drbrain:r36354 (trunk): * ext/zlib/zlib.c: Revert r36349. Added streaming support to inflate

drbrain	2012-07-10 12:51:25 +0900 (Tue, 10 Jul 2012)

  New Revision: 36354

  http://svn.ruby-lang.org/cgi-bin/viewvc.cgi?view=rev&revision=36354

  Log:
    * ext/zlib/zlib.c:  Revert r36349.  Added streaming support to inflate
      processing.  rb_block_given_p() is not callable without the GVL.
    * ext/zlib/extconf.rb:  ditto
    * NEWS:  ditto
    * test/zlib/test_zlib.rb:  ditto

  Modified files:
    trunk/ChangeLog
    trunk/NEWS
    trunk/ext/zlib/extconf.rb
    trunk/ext/zlib/zlib.c
    trunk/test/zlib/test_zlib.rb

Index: ChangeLog
===================================================================
--- ChangeLog	(revision 36353)
+++ ChangeLog	(revision 36354)
@@ -6,27 +6,6 @@
 
 	* configure.in: removed --enable/disable-win95 options. (see r36432)
 
-Tue Jul 10 08:56:17 2012  Eric Hodel  <drbrain@s...>
-
-	* ext/zlib/zlib.c:  Added streaming support to inflate processing.
-	  This allows zlib streams to be processed without huge memory growth.
-	  [Feature #6612]
-	* NEWS:  ditto
-	* ext/zlib/zlib.c (zstream_expand_buffer):  Uses rb_yield when a block
-	  is given for streaming support.  Refactored to use
-	  zstream_expand_buffer_into to remove duplicate code.
-	* ext/zlib/zlib.c (zstream_expand_buffer_protect):  Added wrapper
-	  function to pass jump state back through GVL-free section to allow
-          zstream clean-up before terminating the ruby call.
-	* ext/zlib/zlib.c (zstream_expand_buffer_without_gvl):  Acquire GVL to
-	  yield processed chunk of output stream.
-	* ext/zlib/zlib.c (zstream_detach_buffer):  When a block is given,
-	  returns Qnil mid-stream and yields the output buffer at the end of
-	  the stream.
-	* ext/zlib/extconf.rb:  Update INCFLAGS to find internal.h for
-	  rb_thread_call_with_gvl
-	* test/zlib/test_zlib.rb:  Added tests for streaming processing
-
 Tue Jul 10 00:44:41 2012  KOSAKI Motohiro  <kosaki.motohiro@g...>
 
 	* include/ruby/ruby.h: Removed RUBY_GLOBAL_SETUP complely. It is
Index: ext/zlib/zlib.c
===================================================================
--- ext/zlib/zlib.c	(revision 36353)
+++ ext/zlib/zlib.c	(revision 36354)
@@ -10,7 +10,6 @@
 #include <zlib.h>
 #include <time.h>
 #include <ruby/io.h>
-#include "internal.h"
 
 #ifdef HAVE_VALGRIND_MEMCHECK_H
 # include <valgrind/memcheck.h>
@@ -545,18 +544,13 @@
 #define ZSTREAM_FLAG_IN_STREAM  0x2
 #define ZSTREAM_FLAG_FINISHED   0x4
 #define ZSTREAM_FLAG_CLOSING    0x8
-#define ZSTREAM_FLAG_GZFILE     0x10 /* disallows yield from expand_buffer for
-                                        gzip*/
-#define ZSTREAM_FLAG_UNUSED     0x20
+#define ZSTREAM_FLAG_UNUSED     0x10
 
 #define ZSTREAM_READY(z)       ((z)->flags |= ZSTREAM_FLAG_READY)
 #define ZSTREAM_IS_READY(z)    ((z)->flags & ZSTREAM_FLAG_READY)
 #define ZSTREAM_IS_FINISHED(z) ((z)->flags & ZSTREAM_FLAG_FINISHED)
 #define ZSTREAM_IS_CLOSING(z)  ((z)->flags & ZSTREAM_FLAG_CLOSING)
-#define ZSTREAM_IS_GZFILE(z)   ((z)->flags & ZSTREAM_FLAG_GZFILE)
 
-#define ZSTREAM_EXPAND_BUFFER_OK          0
-
 /* I think that more better value should be found,
    but I gave up finding it. B) */
 #define ZSTREAM_INITIAL_BUFSIZE       1024
@@ -574,9 +568,8 @@
 
 struct zstream_run_args {
     struct zstream * z;
-    int flush;      /* stream flush value for inflate() or deflate() */
-    int interrupt;  /* stop processing the stream and return to ruby */
-    int jump_state; /* for buffer expansion block break or exception */
+    int flush;
+    int interrupt;
 };
 
 static voidpf
@@ -621,50 +614,33 @@
 static void
 zstream_expand_buffer(struct zstream *z)
 {
+    long inc;
+
     if (NIL_P(z->buf)) {
-	zstream_expand_buffer_into(z, ZSTREAM_INITIAL_BUFSIZE);
+	    /* I uses rb_str_new here not rb_str_buf_new because
+	       rb_str_buf_new makes a zero-length string. */
+	z->buf = rb_str_new(0, ZSTREAM_INITIAL_BUFSIZE);
+	z->buf_filled = 0;
+	z->stream.next_out = (Bytef*)RSTRING_PTR(z->buf);
+	z->stream.avail_out = ZSTREAM_INITIAL_BUFSIZE;
+	RBASIC(z->buf)->klass = 0;
 	return;
     }
 
-    if (!ZSTREAM_IS_GZFILE(z) && rb_block_given_p()) {
-	if (z->buf_filled >= ZSTREAM_AVAIL_OUT_STEP_MAX) {
-	    int state = 0;
-	    VALUE self = (VALUE)z->stream.opaque;
-
-	    rb_str_resize(z->buf, z->buf_filled);
-	    RBASIC(z->buf)->klass = rb_cString;
-	    OBJ_INFECT(z->buf, self);
-
-	    rb_protect(rb_yield, z->buf, &state);
-
-	    z->buf = Qnil;
-	    zstream_expand_buffer_into(z, ZSTREAM_AVAIL_OUT_STEP_MAX);
-
-	    if (state)
-		rb_jump_tag(state);
-
-	    return;
-	}
-	else {
-	    zstream_expand_buffer_into(z,
-		    ZSTREAM_AVAIL_OUT_STEP_MAX - z->buf_filled);
-	}
+    if (RSTRING_LEN(z->buf) - z->buf_filled >= ZSTREAM_AVAIL_OUT_STEP_MAX) {
+	/* to keep other threads from freezing */
+	z->stream.avail_out = ZSTREAM_AVAIL_OUT_STEP_MAX;
     }
     else {
-	if (RSTRING_LEN(z->buf) - z->buf_filled >= ZSTREAM_AVAIL_OUT_STEP_MAX) {
-	    z->stream.avail_out = ZSTREAM_AVAIL_OUT_STEP_MAX;
+	inc = z->buf_filled / 2;
+	if (inc < ZSTREAM_AVAIL_OUT_STEP_MIN) {
+	    inc = ZSTREAM_AVAIL_OUT_STEP_MIN;
 	}
-	else {
-	    long inc = z->buf_filled / 2;
-	    if (inc < ZSTREAM_AVAIL_OUT_STEP_MIN) {
-		inc = ZSTREAM_AVAIL_OUT_STEP_MIN;
-	    }
-	    rb_str_resize(z->buf, z->buf_filled + inc);
-	    z->stream.avail_out = (inc < ZSTREAM_AVAIL_OUT_STEP_MAX) ?
-		(int)inc : ZSTREAM_AVAIL_OUT_STEP_MAX;
-	}
-	z->stream.next_out = (Bytef*)RSTRING_PTR(z->buf) + z->buf_filled;
+	rb_str_resize(z->buf, z->buf_filled + inc);
+	z->stream.avail_out = (inc < ZSTREAM_AVAIL_OUT_STEP_MAX) ?
+	    (int)inc : ZSTREAM_AVAIL_OUT_STEP_MAX;
     }
+    z->stream.next_out = (Bytef*)RSTRING_PTR(z->buf) + z->buf_filled;
 }
 
 static void
@@ -686,27 +662,13 @@
     }
 }
 
-static void *
-zstream_expand_buffer_protect(void *ptr)
-{
-    struct zstream *z = (struct zstream *)ptr;
-    int state = 0;
-
-    rb_protect((VALUE (*)(VALUE))zstream_expand_buffer, (VALUE)z, &state);
-
-    return (void *)state;
-}
-
 static int
 zstream_expand_buffer_without_gvl(struct zstream *z)
 {
     char * new_str;
     long inc, len;
 
-    if (rb_block_given_p()) {
-	return (int)rb_thread_call_with_gvl(zstream_expand_buffer_protect, (void *)z);
-    }
-    else if (RSTRING_LEN(z->buf) - z->buf_filled >= ZSTREAM_AVAIL_OUT_STEP_MAX) {
+    if (RSTRING_LEN(z->buf) - z->buf_filled >= ZSTREAM_AVAIL_OUT_STEP_MAX) {
 	z->stream.avail_out = ZSTREAM_AVAIL_OUT_STEP_MAX;
     }
     else {
@@ -719,6 +681,9 @@
 
 	new_str = ruby_xrealloc(RSTRING(z->buf)->as.heap.ptr, len + 1);
 
+	if (!new_str)
+	    return 0;
+
 	/* from rb_str_resize */
 	RSTRING(z->buf)->as.heap.ptr = new_str;
 	RSTRING(z->buf)->as.heap.ptr[len] = '\0'; /* sentinel */
@@ -730,7 +695,7 @@
     }
     z->stream.next_out = (Bytef*)RSTRING_PTR(z->buf) + z->buf_filled;
 
-    return ZSTREAM_EXPAND_BUFFER_OK;
+    return 1;
 }
 
 static void
@@ -771,13 +736,6 @@
 {
     VALUE dst, self = (VALUE)z->stream.opaque;
 
-    if (!ZSTREAM_IS_FINISHED(z) && !ZSTREAM_IS_GZFILE(z) &&
-	    rb_block_given_p()) {
-	/* prevent tiny yields mid-stream, save for next
-	 * zstream_expand_buffer() or stream end */
-	return Qnil;
-    }
-
     if (NIL_P(z->buf)) {
 	dst = rb_str_new(0, 0);
     }
@@ -793,12 +751,6 @@
     z->buf_filled = 0;
     z->stream.next_out = 0;
     z->stream.avail_out = 0;
-
-    if (!ZSTREAM_IS_GZFILE(z) && rb_block_given_p()) {
-	rb_yield(dst);
-	dst = Qnil;
-    }
-
     return dst;
 }
 
@@ -968,7 +920,7 @@
 zstream_run_func(void *ptr)
 {
     struct zstream_run_args *args = (struct zstream_run_args *)ptr;
-    int err, state, flush = args->flush;
+    int err, flush = args->flush;
     struct zstream *z = args->z;
     uInt n;
 
@@ -992,11 +944,8 @@
 	    break;
 	}
 
-	state = zstream_expand_buffer_without_gvl(z);
-
-	if (state) {
-	    err = Z_OK; /* buffer expanded but stream processing was stopped */
-	    args->jump_state = state;
+	if (!zstream_expand_buffer_without_gvl(z)) {
+	    err = Z_MEM_ERROR; /* realloc failed */
 	    break;
 	}
     }
@@ -1025,7 +974,6 @@
     args.z = z;
     args.flush = flush;
     args.interrupt = 0;
-    args.jump_state = 0;
 
     if (NIL_P(z->input) && len == 0) {
 	z->stream.next_in = (Bytef*)"";
@@ -1077,9 +1025,6 @@
 	zstream_append_input(z, z->stream.next_in, z->stream.avail_in);
         guard = Qnil; /* prevent tail call to make guard effective */
     }
-
-    if (args.jump_state)
-	rb_jump_tag(args.jump_state);
 }
 
 static VALUE
@@ -1262,13 +1207,8 @@
 }
 
 /*
- * call-seq:
- *   finish                 -> String
- *   finish { |chunk| ... } -> nil
- *
- * Finishes the stream and flushes output buffer.  If a block is given each
- * chunk is yielded to the block until the input buffer has been flushed to
- * the output buffer.
+ * Finishes the stream and flushes output buffer. See Zlib::Deflate#finish and
+ * Zlib::Inflate#finish for details of this behavior.
  */
 static VALUE
 rb_zstream_finish(VALUE obj)
@@ -1281,13 +1221,7 @@
 }
 
 /*
- * call-seq:
- *   flush_next_out                 -> String
- *   flush_next_out { |chunk| ... } -> nil
- *
- * Flushes output buffer and returns all data in that buffer.  If a block is
- * given each chunk is yielded to the block until the current output buffer
- * has been flushed.
+ * Flushes input buffer and returns all data in that buffer.
  */
 static VALUE
 rb_zstream_flush_next_in(VALUE obj)
@@ -1569,13 +1503,13 @@
 /*
  * Document-method: Zlib::Deflate.deflate
  *
- * call-seq:
- *   Zlib.deflate(string[, level])
- *   Zlib::Deflate.deflate(string[, level])
+ * call-seq: Zlib.deflate(string[, level])
+ *           Zlib::Deflate.deflate(string[, level])
  *
  * Compresses the given +string+. Valid values of level are
- * Zlib::NO_COMPRESSION, Zlib::BEST_SPEED, Zlib::BEST_COMPRESSION,
- * Zlib::DEFAULT_COMPRESSION, or an integer from 0 to 9 (the default is 6).
+ * <tt>NO_COMPRESSION</tt>, <tt>BEST_SPEED</tt>,
+ * <tt>BEST_COMPRESSION</tt>, <tt>DEFAULT_COMPRESSION</tt>, and an
+ * integer from 0 to 9 (the default is 6).
  *
  * This method is almost equivalent to the following code:
  *
@@ -1629,19 +1563,17 @@
 }
 
 /*
- * Document-method: Zlib::Deflate#deflate
+ * Document-method: Zlib#deflate
  *
  * call-seq:
- *   z.deflate(string, flush = Zlib::NO_FLUSH)                 -> String
- *   z.deflate(string, flush = Zlib::NO_FLUSH) { |chunk| ... } -> nil
+ *   deflate(string, flush = Zlib::NO_FLUSH)
  *
  * Inputs +string+ into the deflate stream and returns the output from the
  * stream.  On calling this method, both the input and the output buffers of
- * the stream are flushed.  If +string+ is nil, this method finishes the
- * stream, just like Zlib::ZStream#finish.
+ * the stream are flushed.
  *
- * If a block is given consecutive deflated chunks from the +string+ are
- * yielded to the block and +nil+ is returned.
+ * If +string+ is nil, this method finishes the stream, just like
+ * Zlib::ZStream#finish.
  *
  * The +flush+ parameter specifies the flush mode.  The following constants
  * may be used:
@@ -1688,13 +1620,10 @@
  * Document-method: Zlib::Deflate#flush
  *
  * call-seq:
- *   flush(flush = Zlib::SYNC_FLUSH)                 -> String
- *   flush(flush = Zlib::SYNC_FLUSH) { |chunk| ... } -> nil
+ *   flush(flush = Zlib::SYNC_FLUSH)
  *
  * This method is equivalent to <tt>deflate('', flush)</tt>. This method is
- * just provided to improve the readability of your Ruby program.  If a block
- * is given chunks of deflate output are yielded to the block until the buffer
- * is flushed.
+ * just provided to improve the readability of your Ruby program.
  *
  * See Zlib::Deflate#deflate for detail on the +flush+ constants NO_FLUSH,
  * SYNC_FLUSH, FULL_FLUSH and FINISH.
@@ -1882,11 +1811,9 @@
 }
 
 /*
- * Document-method: Zlib::inflate
+ * Document-method: Zlib::Inflate.inflate
  *
- * call-seq:
- *   Zlib.inflate(string)
- *   Zlib::Inflate.inflate(string)
+ * call-seq: Zlib::Inflate.inflate(string)
  *
  * Decompresses +string+. Raises a Zlib::NeedDict exception if a preset
  * dictionary is needed for decompression.
@@ -1962,18 +1889,13 @@
 /*
  * Document-method: Zlib::Inflate#inflate
  *
- * call-seq:
- *   inflate(deflate_string)                 -> String
- *   inflate(deflate_string) { |chunk| ... } -> nil
+ * call-seq: inflate(string)
  *
- * Inputs +deflate_string+ into the inflate stream and returns the output from
- * the stream.  Calling this method, both the input and the output buffer of
- * the stream are flushed.  If string is +nil+, this method finishes the
- * stream, just like Zlib::ZStream#finish.
+ * Inputs +string+ into the inflate stream and returns the output from the
+ * stream.  Calling this method, both the input and the output buffer of the
+ * stream are flushed.  If string is +nil+, this method finishes the stream,
+ * just like Zlib::ZStream#finish.
  *
- * If a block is given consecutive inflated chunks from the +deflate_string+
- * are yielded to the block and +nil+ is returned.
- *
  * Raises a Zlib::NeedDict exception if a preset dictionary is needed to
  * decompress.  Set the dictionary by Zlib::Inflate#set_dictionary and then
  * call this method again with an empty string to flush the stream:
@@ -2246,7 +2168,6 @@
 
     obj = Data_Make_Struct(klass, struct gzfile, gzfile_mark, gzfile_free, gz);
     zstream_init(&gz->z, funcs);
-    gz->z.flags |= ZSTREAM_FLAG_GZFILE;
     gz->io = Qnil;
     gz->level = 0;
     gz->mtime = 0;
Index: ext/zlib/extconf.rb
===================================================================
--- ext/zlib/extconf.rb	(revision 36353)
+++ ext/zlib/extconf.rb	(revision 36354)
@@ -7,10 +7,9 @@
 require 'mkmf'
 require 'rbconfig'
 
-$INCFLAGS << " -I$(topdir) -I$(top_srcdir)"
-
 dir_config 'zlib'
 
+
 if %w'z libz zlib1 zlib zdll'.find {|z| have_library(z, 'deflateReset')} and
     have_header('zlib.h') then
 
Index: NEWS
===================================================================
--- NEWS	(revision 36353)
+++ NEWS	(revision 36354)
@@ -123,8 +123,6 @@
       accessible from other users.
 
 * zlib
-  * Added streaming support for Zlib::Inflate and Zlib::Deflate.  This allows
-    processing of a stream without the use of large amounts of memory.
   * Added support for the new deflate strategies Zlib::RLE and Zlib::FIXED.
   * Zlib streams are now processed without the GVL.  This allows gzip, zlib and
     deflate streams to be processed in parallel.
Index: test/zlib/test_zlib.rb
===================================================================
--- test/zlib/test_zlib.rb	(revision 36353)
+++ test/zlib/test_zlib.rb	(revision 36354)
@@ -39,62 +39,6 @@
       assert_raise(Zlib::StreamError) { Zlib::Deflate.deflate("foo", 10000) }
     end
 
-    def test_deflate_chunked
-      original = ''
-      chunks = []
-      r = Random.new 0
-
-      z = Zlib::Deflate.new
-
-      2.times do
-        input = r.bytes(20000)
-        original << input
-        z.deflate(input) do |chunk|
-          chunks << chunk
-        end
-      end
-
-      assert_equal [16384, 16384],
-                   chunks.map { |chunk| chunk.length }
-
-      final = z.finish
-
-      assert_equal 7253, final.length
-
-      chunks << final
-      all = chunks.join
-
-      inflated = Zlib.inflate all
-
-      assert_equal original, inflated
-    end
-
-    def test_deflate_chunked_break
-      chunks = []
-      r = Random.new 0
-
-      z = Zlib::Deflate.new
-
-      input = r.bytes(20000)
-      z.deflate(input) do |chunk|
-        chunks << chunk
-        break
-      end
-
-      assert_equal [16384], chunks.map { |chunk| chunk.length }
-
-      final = z.finish
-
-      assert_equal 3632, final.length
-
-      all = chunks.join
-      all << final
-
-      original = Zlib.inflate all
-
-      assert_equal input, original
-    end
-
     def test_addstr
       z = Zlib::Deflate.new
       z << "foo"
@@ -258,38 +202,6 @@
       assert_equal "foofoofoo", out
     end
 
-    def test_finish_chunked
-      # zeros = Zlib::Deflate.deflate("0" * 100_000)
-      zeros = "x\234\355\3011\001\000\000\000\302\240J\353\237\316\032\036@" \
-              "\001\000\000\000\000\000\000\000\000\000\000\000\000\000\000" \
-              "\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000" \
-              "\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000" \
-              "\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000" \
-              "\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000" \
-              "\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000" \
-              "\000\000\000\000\000\000\000\257\006\351\247BH"
-
-      chunks = []
-
-      z = Zlib::Inflate.new
-
-      z.inflate(zeros) do |chunk|
-        chunks << chunk
-        break
-      end
-
-      z.finish do |chunk|
-        chunks << chunk
-      end
-
-      assert_equal [16384, 16384, 16384, 16384, 16384, 16384, 1696],
-                   chunks.map { |chunk| chunk.size }
-
-      assert chunks.all? { |chunk|
-        chunk =~ /\A0+\z/
-      }
-    end
-
     def test_inflate
       s = Zlib::Deflate.deflate("foo")
       z = Zlib::Inflate.new
@@ -319,58 +231,6 @@
       assert_equal "\0", inflated
     end
 
-    def test_inflate_chunked
-      # s = Zlib::Deflate.deflate("0" * 100_000)
-      zeros = "x\234\355\3011\001\000\000\000\302\240J\353\237\316\032\036@" \
-              "\001\000\000\000\000\000\000\000\000\000\000\000\000\000\000" \
-              "\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000" \
-              "\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000" \
-              "\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000" \
-              "\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000" \
-              "\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000" \
-              "\000\000\000\000\000\000\000\257\006\351\247BH"
-
-      chunks = []
-
-      z = Zlib::Inflate.new
-
-      z.inflate(zeros) do |chunk|
-        chunks << chunk
-      end
-
-      assert_equal [16384, 16384, 16384, 16384, 16384, 16384, 1696],
-                   chunks.map { |chunk| chunk.size }
-
-      assert chunks.all? { |chunk|
-        chunk =~ /\A0+\z/
-      }
-    end
-
-    def test_inflate_chunked_break
-      # zeros = Zlib::Deflate.deflate("0" * 100_000)
-      zeros = "x\234\355\3011\001\000\000\000\302\240J\353\237\316\032\036@" \
-              "\001\000\000\000\000\000\000\000\000\000\000\000\000\000\000" \
-              "\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000" \
-              "\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000" \
-              "\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000" \
-              "\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000" \
-              "\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000" \
-              "\000\000\000\000\000\000\000\257\006\351\247BH"
-
-      chunks = []
-
-      z = Zlib::Inflate.new
-
-      z.inflate(zeros) do |chunk|
-        chunks << chunk
-        break
-      end
-
-      out = z.inflate nil
-
-      assert_equal 100_000 - chunks.first.length, out.length
-    end
-
     def test_inflate_dictionary
       dictionary = "foo"
 
@@ -1036,18 +896,5 @@
     def test_deflate
       TestZlibDeflate.new(__name__).test_deflate
     end
-
-    def test_deflate_stream
-      r = Random.new 0
-
-      deflated = ''
-
-      Zlib.deflate(r.bytes(20000)) do |chunk|
-        deflated << chunk
-      end
-
-      assert_equal 20016, deflated.length
-    end
-
   end
 end

--
ML: ruby-changes@q...
Info: http://www.atdot.net/~ko1/quickml/

[前][次][番号順一覧][スレッド一覧]