[前][次][番号順一覧][スレッド一覧]

ruby-changes:49515

From: normal <ko1@a...>
Date: Sat, 6 Jan 2018 05:49:00 +0900 (JST)
Subject: [ruby-changes:49515] normal:r61631 (trunk): zlib: reduce garbage on gzip writes (deflate)

normal	2018-01-06 05:48:55 +0900 (Sat, 06 Jan 2018)

  New Revision: 61631

  https://svn.ruby-lang.org/cgi-bin/viewvc.cgi?view=revision&revision=61631

  Log:
    zlib: reduce garbage on gzip writes (deflate)
    
    Zlib::GzipWriter generated large amounts of garbage from
    (struct zstream).input.  Reuse the .input field when it is
    hidden, and recycle it when its lifetime is over.  This change
    alone reduced memory usage of the writer from 90MB to 4.5MB.
    
    For the detached buffer of compressed data used by
    gzfile_write_raw, we can only clear the string (not recycle it)
    since user code may hold references to it (but the data would be
    clobbered, anyways).  This reduced memory usage slightly by
    around 0.5MB (because it's smaller compressed data).
    
    Combined, these changes reduce the anonymous RSS memory of a
    dedicated writer process from over 90MB to under 4MB.
    
    before:
    
        #      user     system      total        real
    
        writer   7.823332   0.053333   7.876665 (  7.881464)
        writer RssAnon:    92944 kB
        reader   6.969999   0.076666   7.046665 (  7.906377)
        reader RssAnon:   109820 kB
    
    after:
    
        writer   7.359999   0.000000   7.359999 (  7.360639)
        writer RssAnon:     4040 kB
        reader   6.346667   0.070000   6.416667 (  7.387654)
        reader RssAnon:    98272 kB
    
    Script used:
    -------
    require 'zlib'
    require 'benchmark'
    nr = 16384 * 2
    
    def stats(pfx, bm)
      str = "#{bm}#{File.readlines("/proc/#$$/status").grep(/^RssAnon:/)[0]}"
      puts str.gsub!(/^/m, pfx)
    end
    
    rd, wr = IO.pipe
    pid = fork do
      buf = ((0..255).map(&:chr).join * 128).freeze
      rd.close
      gzip = Zlib::GzipWriter.new(wr)
      bm = Benchmark.measure do
        nr.times { gzip.write(buf) }
        gzip.close
        wr.close
      end
      stats('writer ', bm)
    end
    
    wr.close
    buf = ''
    gunzip = Zlib::GzipReader.new(rd)
    n = 0
    bm = Benchmark.measure do
      begin
        gunzip.readpartial(16384, buf)
        n += buf.size
      rescue EOFError
        break
      end while true
    end
    stats('reader ', bm)
    Process.waitall
    -------
    * ext/zlib/zlib.c (zstream_discard_input): reuse or recycle hidden input
      (zstream_reset_input): clear hidden input
      (zstream_run): detach input and recycle after use
      (gzfile_write_raw): clear buffer after write
      [ruby-core:84638] [Feature #14315]

  Modified files:
    trunk/ext/zlib/zlib.c
Index: ext/zlib/zlib.c
===================================================================
--- ext/zlib/zlib.c	(revision 61630)
+++ ext/zlib/zlib.c	(revision 61631)
@@ -845,19 +845,50 @@ zstream_append_input(struct zstream *z, https://github.com/ruby/ruby/blob/trunk/ext/zlib/zlib.c#L845
 static void
 zstream_discard_input(struct zstream *z, long len)
 {
-    if (NIL_P(z->input) || RSTRING_LEN(z->input) <= len) {
-	z->input = Qnil;
+    if (NIL_P(z->input)) {
     }
-    else {
-	z->input = rb_str_substr(z->input, len,
-				 RSTRING_LEN(z->input) - len);
+    else if (RBASIC_CLASS(z->input) == 0) {
+	/* hidden, we created z->input and have complete control */
+	char *ptr;
+	long oldlen, newlen;
+
+	RSTRING_GETMEM(z->input, ptr, oldlen);
+	newlen = oldlen - len;
+	if (newlen > 0) {
+	    memmove(ptr, ptr + len, newlen);
+	}
+	if (newlen < 0) {
+	    newlen = 0;
+	}
+	rb_str_resize(z->input, newlen);
+	if (newlen == 0) {
+	    rb_gc_force_recycle(z->input);
+	    z->input = Qnil;
+	}
+	else {
+	    rb_str_set_len(z->input, newlen);
+	}
+    }
+    else { /* do not mangle user-provided data */
+	if (RSTRING_LEN(z->input) <= len) {
+	    z->input = Qnil;
+	}
+	else {
+	    z->input = rb_str_substr(z->input, len,
+				     RSTRING_LEN(z->input) - len);
+	}
     }
 }
 
 static void
 zstream_reset_input(struct zstream *z)
 {
-    z->input = Qnil;
+    if (!NIL_P(z->input) && RBASIC_CLASS(z->input) == 0) {
+	rb_str_resize(z->input, 0);
+    }
+    else {
+	z->input = Qnil;
+    }
 }
 
 static void
@@ -994,7 +1025,7 @@ zstream_run(struct zstream *z, Bytef *sr https://github.com/ruby/ruby/blob/trunk/ext/zlib/zlib.c#L1025
 {
     struct zstream_run_args args;
     int err;
-    VALUE guard = Qnil;
+    VALUE old_input = Qnil;
 
     args.z = z;
     args.flush = flush;
@@ -1008,12 +1039,13 @@ zstream_run(struct zstream *z, Bytef *sr https://github.com/ruby/ruby/blob/trunk/ext/zlib/zlib.c#L1039
     }
     else {
 	zstream_append_input(z, src, len);
-	z->stream.next_in = (Bytef*)RSTRING_PTR(z->input);
-	z->stream.avail_in = MAX_UINT(RSTRING_LEN(z->input));
 	/* keep reference to `z->input' so as not to be garbage collected
 	   after zstream_reset_input() and prevent `z->stream.next_in'
 	   from dangling. */
-	guard = z->input;
+	old_input = zstream_detach_input(z);
+	rb_obj_hide(old_input); /* for GVL release and later recycle */
+	z->stream.next_in = (Bytef*)RSTRING_PTR(old_input);
+	z->stream.avail_in = MAX_UINT(RSTRING_LEN(old_input));
     }
 
     if (z->stream.avail_out == 0) {
@@ -1051,7 +1083,10 @@ loop: https://github.com/ruby/ruby/blob/trunk/ext/zlib/zlib.c#L1083
 
     if (z->stream.avail_in > 0) {
 	zstream_append_input(z, z->stream.next_in, z->stream.avail_in);
-	RB_GC_GUARD(guard); /* prevent tail call to make guard effective */
+    }
+    if (!NIL_P(old_input)) {
+	rb_str_resize(old_input, 0);
+	rb_gc_force_recycle(old_input);
     }
 
     if (args.jump_state)
@@ -2330,6 +2365,7 @@ gzfile_write_raw(struct gzfile *gz) https://github.com/ruby/ruby/blob/trunk/ext/zlib/zlib.c#L2365
 	str = zstream_detach_buffer(&gz->z);
 	OBJ_TAINT(str);  /* for safe */
 	rb_funcall(gz->io, id_write, 1, str);
+	rb_str_resize(str, 0);
 	if ((gz->z.flags & GZFILE_FLAG_SYNC)
 	    && rb_respond_to(gz->io, id_flush))
 	    rb_funcall(gz->io, id_flush, 0);

--
ML: ruby-changes@q...
Info: http://www.atdot.net/~ko1/quickml/

[前][次][番号順一覧][スレッド一覧]