[前][次][番号順一覧][スレッド一覧]

ruby-changes:63895

From: Jeremy <ko1@a...>
Date: Fri, 4 Dec 2020 19:13:03 +0900 (JST)
Subject: [ruby-changes:63895] 0adc426ca5 (master): [ruby/zlib] Add Zlib::Inflate#inflate :buffer keyword argument

https://git.ruby-lang.org/ruby.git/commit/?id=0adc426ca5

From 0adc426ca5cf83e57f5e8433a2f7b55fb7a1fad6 Mon Sep 17 00:00:00 2001
From: Jeremy Evans <code@j...>
Date: Thu, 29 Oct 2020 12:28:09 -0700
Subject: [ruby/zlib] Add Zlib::Inflate#inflate :buffer keyword argument

If a buffer keyword argument is given, it is used as the buffer,
instead of creating new strings.  This can result in significantly
lower memory usage during inflation.

Implements #19

https://github.com/ruby/zlib/commit/dac9a9b57d

diff --git a/ext/zlib/zlib.c b/ext/zlib/zlib.c
index fa2112f..de5e533 100644
--- a/ext/zlib/zlib.c
+++ b/ext/zlib/zlib.c
@@ -56,7 +56,10 @@ max_uint(long n) https://github.com/ruby/ruby/blob/trunk/ext/zlib/zlib.c#L56
 #define MAX_UINT(n) (uInt)(n)
 #endif
 
-static ID id_dictionaries, id_read;
+#define OPTHASH_GIVEN_P(opts) \
+    (argc > 0 && !NIL_P((opts) = rb_check_hash_type(argv[argc-1])) && (--argc, 1))
+
+static ID id_dictionaries, id_read, id_buffer;
 
 /*--------- Prototypes --------*/
 
@@ -130,7 +133,7 @@ static VALUE rb_inflate_s_allocate(VALUE); https://github.com/ruby/ruby/blob/trunk/ext/zlib/zlib.c#L133
 static VALUE rb_inflate_initialize(int, VALUE*, VALUE);
 static VALUE rb_inflate_s_inflate(VALUE, VALUE);
 static void do_inflate(struct zstream*, VALUE);
-static VALUE rb_inflate_inflate(VALUE, VALUE);
+static VALUE rb_inflate_inflate(int, VALUE*, VALUE);
 static VALUE rb_inflate_addstr(VALUE, VALUE);
 static VALUE rb_inflate_sync(VALUE, VALUE);
 static VALUE rb_inflate_sync_point_p(VALUE);
@@ -557,7 +560,8 @@ struct zstream { https://github.com/ruby/ruby/blob/trunk/ext/zlib/zlib.c#L560
 #define ZSTREAM_FLAG_CLOSING    0x8
 #define ZSTREAM_FLAG_GZFILE     0x10 /* disallows yield from expand_buffer for
                                         gzip*/
-#define ZSTREAM_FLAG_UNUSED     0x20
+#define ZSTREAM_REUSE_BUFFER    0x20
+#define ZSTREAM_FLAG_UNUSED     0x40
 
 #define ZSTREAM_READY(z)       ((z)->flags |= ZSTREAM_FLAG_READY)
 #define ZSTREAM_IS_READY(z)    ((z)->flags & ZSTREAM_FLAG_READY)
@@ -566,6 +570,8 @@ struct zstream { https://github.com/ruby/ruby/blob/trunk/ext/zlib/zlib.c#L570
 #define ZSTREAM_IS_GZFILE(z)   ((z)->flags & ZSTREAM_FLAG_GZFILE)
 #define ZSTREAM_BUF_FILLED(z)  (NIL_P((z)->buf) ? 0 : RSTRING_LEN((z)->buf))
 
+#define ZSTREAM_REUSE_BUFFER_P(z)     ((z)->flags & ZSTREAM_REUSE_BUFFER)
+
 #define ZSTREAM_EXPAND_BUFFER_OK          0
 
 /* I think that more better value should be found,
@@ -642,11 +648,19 @@ zstream_expand_buffer(struct zstream *z) https://github.com/ruby/ruby/blob/trunk/ext/zlib/zlib.c#L648
 	if (buf_filled >= ZSTREAM_AVAIL_OUT_STEP_MAX) {
 	    int state = 0;
 
-	    rb_obj_reveal(z->buf, rb_cString);
+            if (!ZSTREAM_REUSE_BUFFER_P(z)) {
+	        rb_obj_reveal(z->buf, rb_cString);
+            }
 
 	    rb_protect(rb_yield, z->buf, &state);
 
-	    z->buf = Qnil;
+            if (ZSTREAM_REUSE_BUFFER_P(z)) {
+                rb_str_modify(z->buf);
+                rb_str_set_len(z->buf, 0);
+            }
+            else {
+                z->buf = Qnil;
+            }
 	    zstream_expand_buffer_into(z, ZSTREAM_AVAIL_OUT_STEP_MAX);
 
 	    if (state)
@@ -764,7 +778,9 @@ zstream_detach_buffer(struct zstream *z) https://github.com/ruby/ruby/blob/trunk/ext/zlib/zlib.c#L778
     }
     else {
 	dst = z->buf;
-	rb_obj_reveal(dst, rb_cString);
+        if (!ZSTREAM_REUSE_BUFFER_P(z)) {
+	    rb_obj_reveal(dst, rb_cString);
+        }
     }
 
     z->buf = Qnil;
@@ -2013,8 +2029,8 @@ rb_inflate_add_dictionary(VALUE obj, VALUE dictionary) https://github.com/ruby/ruby/blob/trunk/ext/zlib/zlib.c#L2029
  * Document-method: Zlib::Inflate#inflate
  *
  * call-seq:
- *   inflate(deflate_string)                 -> String
- *   inflate(deflate_string) { |chunk| ... } -> nil
+ *   inflate(deflate_string, buffer: nil)                 -> String
+ *   inflate(deflate_string, buffer: nil) { |chunk| ... } -> nil
  *
  * Inputs +deflate_string+ into the inflate stream and returns the output from
  * the stream.  Calling this method, both the input and the output buffer of
@@ -2024,6 +2040,15 @@ rb_inflate_add_dictionary(VALUE obj, VALUE dictionary) https://github.com/ruby/ruby/blob/trunk/ext/zlib/zlib.c#L2040
  * If a block is given consecutive inflated chunks from the +deflate_string+
  * are yielded to the block and +nil+ is returned.
  *
+ * If a :buffer keyword argument is given and not nil:
+ *
+ * * The :buffer keyword should be a String, and will used as the output buffer.
+ *   Using this option can reuse the memory required during inflation.
+ * * When not passing a block, the return value will be the same object as the
+ *   :buffer keyword argument.
+ * * When passing a block, the yielded chunks will be the same value as the
+ *   :buffer keyword argument.
+ *
  * Raises a Zlib::NeedDict exception if a preset dictionary is needed to
  * decompress.  Set the dictionary by Zlib::Inflate#set_dictionary and then
  * call this method again with an empty string to flush the stream:
@@ -2047,10 +2072,37 @@ rb_inflate_add_dictionary(VALUE obj, VALUE dictionary) https://github.com/ruby/ruby/blob/trunk/ext/zlib/zlib.c#L2072
  * See also Zlib::Inflate.new
  */
 static VALUE
-rb_inflate_inflate(VALUE obj, VALUE src)
+rb_inflate_inflate(int argc, VALUE* argv, VALUE obj)
 {
     struct zstream *z = get_zstream(obj);
-    VALUE dst;
+    VALUE dst, src, opts, buffer = Qnil;
+
+    if (OPTHASH_GIVEN_P(opts)) {
+        VALUE buf;
+        rb_get_kwargs(opts, &id_buffer, 0, 1, &buf);
+        if (buf != Qundef && buf != Qnil) {
+            buffer = StringValue(buf);
+        }
+    }
+    if (buffer != Qnil) {
+        if (!(ZSTREAM_REUSE_BUFFER_P(z) && z->buf == buffer)) {
+            long len = RSTRING_LEN(buffer);
+            if (len >= ZSTREAM_AVAIL_OUT_STEP_MAX) {
+                rb_str_modify(buffer);
+            }
+            else {
+                len = ZSTREAM_AVAIL_OUT_STEP_MAX - len;
+                rb_str_modify_expand(buffer, len);
+            }
+            rb_str_set_len(buffer, 0);
+            z->flags |= ZSTREAM_REUSE_BUFFER;
+            z->buf = buffer;
+        }
+    } else if (ZSTREAM_REUSE_BUFFER_P(z)) {
+        z->flags &= ~ZSTREAM_REUSE_BUFFER;
+        z->buf = Qnil;
+    }
+    rb_scan_args(argc, argv, "10", &src);
 
     if (ZSTREAM_IS_FINISHED(z)) {
 	if (NIL_P(src)) {
@@ -2059,7 +2111,11 @@ rb_inflate_inflate(VALUE obj, VALUE src) https://github.com/ruby/ruby/blob/trunk/ext/zlib/zlib.c#L2111
 	else {
 	    StringValue(src);
 	    zstream_append_buffer2(z, src);
-	    dst = rb_str_new(0, 0);
+	    if (ZSTREAM_REUSE_BUFFER_P(z)) {
+                dst = rb_str_resize(buffer, 0);
+            } else {
+                dst = rb_str_new(0, 0);
+            }
 	}
     }
     else {
@@ -4340,8 +4396,6 @@ zlib_gzip_end(struct gzfile *gz) https://github.com/ruby/ruby/blob/trunk/ext/zlib/zlib.c#L4396
     zstream_end(&gz->z);
 }
 
-#define OPTHASH_GIVEN_P(opts) \
-    (argc > 0 && !NIL_P((opts) = rb_check_hash_type(argv[argc-1])) && (--argc, 1))
 static ID id_level, id_strategy;
 static VALUE zlib_gzip_run(VALUE arg);
 
@@ -4588,7 +4642,7 @@ Init_zlib(void) https://github.com/ruby/ruby/blob/trunk/ext/zlib/zlib.c#L4642
     rb_define_alloc_func(cInflate, rb_inflate_s_allocate);
     rb_define_method(cInflate, "initialize", rb_inflate_initialize, -1);
     rb_define_method(cInflate, "add_dictionary", rb_inflate_add_dictionary, 1);
-    rb_define_method(cInflate, "inflate", rb_inflate_inflate, 1);
+    rb_define_method(cInflate, "inflate", rb_inflate_inflate, -1);
     rb_define_method(cInflate, "<<", rb_inflate_addstr, 1);
     rb_define_method(cInflate, "sync", rb_inflate_sync, 1);
     rb_define_method(cInflate, "sync_point?", rb_inflate_sync_point_p, 0);
@@ -4797,6 +4851,7 @@ Init_zlib(void) https://github.com/ruby/ruby/blob/trunk/ext/zlib/zlib.c#L4851
 
     id_level = rb_intern("level");
     id_strategy = rb_intern("strategy");
+    id_buffer = rb_intern("buffer");
 #endif /* GZIP_SUPPORT */
 }
 
diff --git a/test/zlib/test_zlib.rb b/test/zlib/test_zlib.rb
index c72fe76..addd427 100644
--- a/test/zlib/test_zlib.rb
+++ b/test/zlib/test_zlib.rb
@@ -363,6 +363,65 @@ if defined? Zlib https://github.com/ruby/ruby/blob/trunk/test/zlib/test_zlib.rb#L363
       }
     end
 
+    def test_inflate_buffer
+      s = Zlib::Deflate.deflate("foo")
+      z = Zlib::Inflate.new
+      buf = String.new
+      s = z.inflate(s, buffer: buf)
+      assert_same(buf, s)
+      buf = String.new
+      s << z.inflate(nil, buffer: buf)
+      assert_equal("foo", s)
+      z.inflate("foo", buffer: buf) # ???
+      z << "foo" # ???
+    end
+
+    def test_inflate_buffer_partial_input
+      deflated = Zlib::Deflate.deflate "\0"
+
+      z = Zlib::Inflate.new
+
+      inflated = "".dup
+
+      buf = String.new
+      deflated.each_char do |byte|
+        inflated << z.inflate(byte, buffer: buf)
+      end
+
+      inflated << z.finish
+
+      assert_equal "\0", inflated
+    end
+
+    def test_inflate_buffer_chunked
+      # s = Zlib::Deflate.deflate("0" * 100_000)
+      zeros = "x\234\355\3011\001\000\000\000\302\240J\353\237\316\032\036@" \
+              "\001\000\000\000\000\000\000\000\000\000\000\000\000\000\000" \
+              "\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000" \
+              "\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000" \
+              "\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000" \
+              "\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000" \
+              "\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000" \
+              "\000\000\000\000\000\000\000\257\006\351\247BH"
+
+      chunks = []
+
+      z = Zlib::Inflate.new
+
+      buf = String.new
+      z.inflate(zeros, buffer: buf) do |chunk|
+        assert_same(buf, chunk)
+        chunks << chunk.dup
+      end
+
+      assert_equal [16384, 16384, 16384, 16384, 16384, 16384, 1696],
+                   chunks.map { |chunk| chunk.size }
+
+      asser (... truncated)

--
ML: ruby-changes@q...
Info: http://www.atdot.net/~ko1/quickml/

[前][次][番号順一覧][スレッド一覧]