ruby-changes:63895
From: Jeremy <ko1@a...>
Date: Fri, 4 Dec 2020 19:13:03 +0900 (JST)
Subject: [ruby-changes:63895] 0adc426ca5 (master): [ruby/zlib] Add Zlib::Inflate#inflate :buffer keyword argument
https://git.ruby-lang.org/ruby.git/commit/?id=0adc426ca5 From 0adc426ca5cf83e57f5e8433a2f7b55fb7a1fad6 Mon Sep 17 00:00:00 2001 From: Jeremy Evans <code@j...> Date: Thu, 29 Oct 2020 12:28:09 -0700 Subject: [ruby/zlib] Add Zlib::Inflate#inflate :buffer keyword argument If a buffer keyword argument is given, it is used as the buffer, instead of creating new strings. This can result in significantly lower memory usage during inflation. Implements #19 https://github.com/ruby/zlib/commit/dac9a9b57d diff --git a/ext/zlib/zlib.c b/ext/zlib/zlib.c index fa2112f..de5e533 100644 --- a/ext/zlib/zlib.c +++ b/ext/zlib/zlib.c @@ -56,7 +56,10 @@ max_uint(long n) https://github.com/ruby/ruby/blob/trunk/ext/zlib/zlib.c#L56 #define MAX_UINT(n) (uInt)(n) #endif -static ID id_dictionaries, id_read; +#define OPTHASH_GIVEN_P(opts) \ + (argc > 0 && !NIL_P((opts) = rb_check_hash_type(argv[argc-1])) && (--argc, 1)) + +static ID id_dictionaries, id_read, id_buffer; /*--------- Prototypes --------*/ @@ -130,7 +133,7 @@ static VALUE rb_inflate_s_allocate(VALUE); https://github.com/ruby/ruby/blob/trunk/ext/zlib/zlib.c#L133 static VALUE rb_inflate_initialize(int, VALUE*, VALUE); static VALUE rb_inflate_s_inflate(VALUE, VALUE); static void do_inflate(struct zstream*, VALUE); -static VALUE rb_inflate_inflate(VALUE, VALUE); +static VALUE rb_inflate_inflate(int, VALUE*, VALUE); static VALUE rb_inflate_addstr(VALUE, VALUE); static VALUE rb_inflate_sync(VALUE, VALUE); static VALUE rb_inflate_sync_point_p(VALUE); @@ -557,7 +560,8 @@ struct zstream { https://github.com/ruby/ruby/blob/trunk/ext/zlib/zlib.c#L560 #define ZSTREAM_FLAG_CLOSING 0x8 #define ZSTREAM_FLAG_GZFILE 0x10 /* disallows yield from expand_buffer for gzip*/ -#define ZSTREAM_FLAG_UNUSED 0x20 +#define ZSTREAM_REUSE_BUFFER 0x20 +#define ZSTREAM_FLAG_UNUSED 0x40 #define ZSTREAM_READY(z) ((z)->flags |= ZSTREAM_FLAG_READY) #define ZSTREAM_IS_READY(z) ((z)->flags & ZSTREAM_FLAG_READY) @@ -566,6 +570,8 @@ struct zstream { https://github.com/ruby/ruby/blob/trunk/ext/zlib/zlib.c#L570 #define ZSTREAM_IS_GZFILE(z) ((z)->flags & ZSTREAM_FLAG_GZFILE) #define ZSTREAM_BUF_FILLED(z) (NIL_P((z)->buf) ? 0 : RSTRING_LEN((z)->buf)) +#define ZSTREAM_REUSE_BUFFER_P(z) ((z)->flags & ZSTREAM_REUSE_BUFFER) + #define ZSTREAM_EXPAND_BUFFER_OK 0 /* I think that more better value should be found, @@ -642,11 +648,19 @@ zstream_expand_buffer(struct zstream *z) https://github.com/ruby/ruby/blob/trunk/ext/zlib/zlib.c#L648 if (buf_filled >= ZSTREAM_AVAIL_OUT_STEP_MAX) { int state = 0; - rb_obj_reveal(z->buf, rb_cString); + if (!ZSTREAM_REUSE_BUFFER_P(z)) { + rb_obj_reveal(z->buf, rb_cString); + } rb_protect(rb_yield, z->buf, &state); - z->buf = Qnil; + if (ZSTREAM_REUSE_BUFFER_P(z)) { + rb_str_modify(z->buf); + rb_str_set_len(z->buf, 0); + } + else { + z->buf = Qnil; + } zstream_expand_buffer_into(z, ZSTREAM_AVAIL_OUT_STEP_MAX); if (state) @@ -764,7 +778,9 @@ zstream_detach_buffer(struct zstream *z) https://github.com/ruby/ruby/blob/trunk/ext/zlib/zlib.c#L778 } else { dst = z->buf; - rb_obj_reveal(dst, rb_cString); + if (!ZSTREAM_REUSE_BUFFER_P(z)) { + rb_obj_reveal(dst, rb_cString); + } } z->buf = Qnil; @@ -2013,8 +2029,8 @@ rb_inflate_add_dictionary(VALUE obj, VALUE dictionary) https://github.com/ruby/ruby/blob/trunk/ext/zlib/zlib.c#L2029 * Document-method: Zlib::Inflate#inflate * * call-seq: - * inflate(deflate_string) -> String - * inflate(deflate_string) { |chunk| ... } -> nil + * inflate(deflate_string, buffer: nil) -> String + * inflate(deflate_string, buffer: nil) { |chunk| ... } -> nil * * Inputs +deflate_string+ into the inflate stream and returns the output from * the stream. Calling this method, both the input and the output buffer of @@ -2024,6 +2040,15 @@ rb_inflate_add_dictionary(VALUE obj, VALUE dictionary) https://github.com/ruby/ruby/blob/trunk/ext/zlib/zlib.c#L2040 * If a block is given consecutive inflated chunks from the +deflate_string+ * are yielded to the block and +nil+ is returned. * + * If a :buffer keyword argument is given and not nil: + * + * * The :buffer keyword should be a String, and will used as the output buffer. + * Using this option can reuse the memory required during inflation. + * * When not passing a block, the return value will be the same object as the + * :buffer keyword argument. + * * When passing a block, the yielded chunks will be the same value as the + * :buffer keyword argument. + * * Raises a Zlib::NeedDict exception if a preset dictionary is needed to * decompress. Set the dictionary by Zlib::Inflate#set_dictionary and then * call this method again with an empty string to flush the stream: @@ -2047,10 +2072,37 @@ rb_inflate_add_dictionary(VALUE obj, VALUE dictionary) https://github.com/ruby/ruby/blob/trunk/ext/zlib/zlib.c#L2072 * See also Zlib::Inflate.new */ static VALUE -rb_inflate_inflate(VALUE obj, VALUE src) +rb_inflate_inflate(int argc, VALUE* argv, VALUE obj) { struct zstream *z = get_zstream(obj); - VALUE dst; + VALUE dst, src, opts, buffer = Qnil; + + if (OPTHASH_GIVEN_P(opts)) { + VALUE buf; + rb_get_kwargs(opts, &id_buffer, 0, 1, &buf); + if (buf != Qundef && buf != Qnil) { + buffer = StringValue(buf); + } + } + if (buffer != Qnil) { + if (!(ZSTREAM_REUSE_BUFFER_P(z) && z->buf == buffer)) { + long len = RSTRING_LEN(buffer); + if (len >= ZSTREAM_AVAIL_OUT_STEP_MAX) { + rb_str_modify(buffer); + } + else { + len = ZSTREAM_AVAIL_OUT_STEP_MAX - len; + rb_str_modify_expand(buffer, len); + } + rb_str_set_len(buffer, 0); + z->flags |= ZSTREAM_REUSE_BUFFER; + z->buf = buffer; + } + } else if (ZSTREAM_REUSE_BUFFER_P(z)) { + z->flags &= ~ZSTREAM_REUSE_BUFFER; + z->buf = Qnil; + } + rb_scan_args(argc, argv, "10", &src); if (ZSTREAM_IS_FINISHED(z)) { if (NIL_P(src)) { @@ -2059,7 +2111,11 @@ rb_inflate_inflate(VALUE obj, VALUE src) https://github.com/ruby/ruby/blob/trunk/ext/zlib/zlib.c#L2111 else { StringValue(src); zstream_append_buffer2(z, src); - dst = rb_str_new(0, 0); + if (ZSTREAM_REUSE_BUFFER_P(z)) { + dst = rb_str_resize(buffer, 0); + } else { + dst = rb_str_new(0, 0); + } } } else { @@ -4340,8 +4396,6 @@ zlib_gzip_end(struct gzfile *gz) https://github.com/ruby/ruby/blob/trunk/ext/zlib/zlib.c#L4396 zstream_end(&gz->z); } -#define OPTHASH_GIVEN_P(opts) \ - (argc > 0 && !NIL_P((opts) = rb_check_hash_type(argv[argc-1])) && (--argc, 1)) static ID id_level, id_strategy; static VALUE zlib_gzip_run(VALUE arg); @@ -4588,7 +4642,7 @@ Init_zlib(void) https://github.com/ruby/ruby/blob/trunk/ext/zlib/zlib.c#L4642 rb_define_alloc_func(cInflate, rb_inflate_s_allocate); rb_define_method(cInflate, "initialize", rb_inflate_initialize, -1); rb_define_method(cInflate, "add_dictionary", rb_inflate_add_dictionary, 1); - rb_define_method(cInflate, "inflate", rb_inflate_inflate, 1); + rb_define_method(cInflate, "inflate", rb_inflate_inflate, -1); rb_define_method(cInflate, "<<", rb_inflate_addstr, 1); rb_define_method(cInflate, "sync", rb_inflate_sync, 1); rb_define_method(cInflate, "sync_point?", rb_inflate_sync_point_p, 0); @@ -4797,6 +4851,7 @@ Init_zlib(void) https://github.com/ruby/ruby/blob/trunk/ext/zlib/zlib.c#L4851 id_level = rb_intern("level"); id_strategy = rb_intern("strategy"); + id_buffer = rb_intern("buffer"); #endif /* GZIP_SUPPORT */ } diff --git a/test/zlib/test_zlib.rb b/test/zlib/test_zlib.rb index c72fe76..addd427 100644 --- a/test/zlib/test_zlib.rb +++ b/test/zlib/test_zlib.rb @@ -363,6 +363,65 @@ if defined? Zlib https://github.com/ruby/ruby/blob/trunk/test/zlib/test_zlib.rb#L363 } end + def test_inflate_buffer + s = Zlib::Deflate.deflate("foo") + z = Zlib::Inflate.new + buf = String.new + s = z.inflate(s, buffer: buf) + assert_same(buf, s) + buf = String.new + s << z.inflate(nil, buffer: buf) + assert_equal("foo", s) + z.inflate("foo", buffer: buf) # ??? + z << "foo" # ??? + end + + def test_inflate_buffer_partial_input + deflated = Zlib::Deflate.deflate "\0" + + z = Zlib::Inflate.new + + inflated = "".dup + + buf = String.new + deflated.each_char do |byte| + inflated << z.inflate(byte, buffer: buf) + end + + inflated << z.finish + + assert_equal "\0", inflated + end + + def test_inflate_buffer_chunked + # s = Zlib::Deflate.deflate("0" * 100_000) + zeros = "x\234\355\3011\001\000\000\000\302\240J\353\237\316\032\036@" \ + "\001\000\000\000\000\000\000\000\000\000\000\000\000\000\000" \ + "\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000" \ + "\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000" \ + "\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000" \ + "\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000" \ + "\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000" \ + "\000\000\000\000\000\000\000\257\006\351\247BH" + + chunks = [] + + z = Zlib::Inflate.new + + buf = String.new + z.inflate(zeros, buffer: buf) do |chunk| + assert_same(buf, chunk) + chunks << chunk.dup + end + + assert_equal [16384, 16384, 16384, 16384, 16384, 16384, 1696], + chunks.map { |chunk| chunk.size } + + asser (... truncated) -- ML: ruby-changes@q... Info: http://www.atdot.net/~ko1/quickml/