ruby-changes:72939
From: John <ko1@a...>
Date: Tue, 16 Aug 2022 08:14:29 +0900 (JST)
Subject: [ruby-changes:72939] 0608a9a086 (master): Optimize Marshal dump/load for large (> 31-bit) FIXNUM (#6229)
https://git.ruby-lang.org/ruby.git/commit/?id=0608a9a086 From 0608a9a08693286a7d84845a216927ff2e3c9951 Mon Sep 17 00:00:00 2001 From: John Hawthorn <john@h...> Date: Mon, 15 Aug 2022 16:14:12 -0700 Subject: Optimize Marshal dump/load for large (> 31-bit) FIXNUM (#6229) * Optimize Marshal dump of large fixnum Marshal's FIXNUM type only supports 31-bit fixnums, so on 64-bit platforms the 63-bit fixnums need to be represented in Marshal's BIGNUM. Previously this was done by converting to a bugnum and serializing the bignum object. This commit avoids allocating the intermediate bignum object, instead outputting the T_FIXNUM directly to a Marshal bignum. This maintains the same representation as the previous implementation, including not using LINKs for these large fixnums (an artifact of the previous implementation always allocating a new BIGNUM). This commit also avoids unnecessary st_lookups on immediate values, which we know will not be in that table. * Fastpath for loading FIXNUM from Marshal bignum * Run update-deps --- benchmark/marshal_dump_load_integer.yml | 22 ++++++ common.mk | 3 + marshal.c | 126 ++++++++++++++++++++++++++------ test/ruby/test_marshal.rb | 22 +++++- 4 files changed, 148 insertions(+), 25 deletions(-) create mode 100644 benchmark/marshal_dump_load_integer.yml diff --git a/benchmark/marshal_dump_load_integer.yml b/benchmark/marshal_dump_load_integer.yml new file mode 100644 index 0000000000..78ebf823d2 --- /dev/null +++ b/benchmark/marshal_dump_load_integer.yml @@ -0,0 +1,22 @@ https://github.com/ruby/ruby/blob/trunk/benchmark/marshal_dump_load_integer.yml#L1 +prelude: | + smallint_array = 1000.times.map { |x| x } + bigint32_array = 1000.times.map { |x| x + 2**32 } + bigint64_array = 1000.times.map { |x| x + 2**64 } + + smallint_dump = Marshal.dump(smallint_array) + bigint32_dump = Marshal.dump(bigint32_array) + bigint64_dump = Marshal.dump(bigint64_array) +benchmark: + marshal_dump_integer_small: | + Marshal.dump(smallint_array) + marshal_dump_integer_over_32_bit: | + Marshal.dump(bigint32_array) + marshal_dump_integer_over_64_bit: | + Marshal.dump(bigint64_array) + marshal_load_integer_small: | + Marshal.load(smallint_dump) + marshal_load_integer_over_32_bit: | + Marshal.load(bigint32_dump) + marshal_load_integer_over_64_bit: | + Marshal.load(bigint64_dump) +loop_count: 4000 diff --git a/common.mk b/common.mk index ec5e4ae252..cf08764bc9 100644 --- a/common.mk +++ b/common.mk @@ -8715,12 +8715,15 @@ main.$(OBJEXT): {$(VPATH)}vm_debug.h https://github.com/ruby/ruby/blob/trunk/common.mk#L8715 marshal.$(OBJEXT): $(hdrdir)/ruby/ruby.h marshal.$(OBJEXT): $(top_srcdir)/internal/array.h marshal.$(OBJEXT): $(top_srcdir)/internal/bignum.h +marshal.$(OBJEXT): $(top_srcdir)/internal/bits.h marshal.$(OBJEXT): $(top_srcdir)/internal/class.h marshal.$(OBJEXT): $(top_srcdir)/internal/compilers.h marshal.$(OBJEXT): $(top_srcdir)/internal/encoding.h marshal.$(OBJEXT): $(top_srcdir)/internal/error.h +marshal.$(OBJEXT): $(top_srcdir)/internal/fixnum.h marshal.$(OBJEXT): $(top_srcdir)/internal/gc.h marshal.$(OBJEXT): $(top_srcdir)/internal/hash.h +marshal.$(OBJEXT): $(top_srcdir)/internal/numeric.h marshal.$(OBJEXT): $(top_srcdir)/internal/object.h marshal.$(OBJEXT): $(top_srcdir)/internal/serial.h marshal.$(OBJEXT): $(top_srcdir)/internal/static_assert.h diff --git a/marshal.c b/marshal.c index 325d5f126e..1eeebf7729 100644 --- a/marshal.c +++ b/marshal.c @@ -28,6 +28,7 @@ https://github.com/ruby/ruby/blob/trunk/marshal.c#L28 #include "internal/encoding.h" #include "internal/error.h" #include "internal/hash.h" +#include "internal/numeric.h" #include "internal/object.h" #include "internal/struct.h" #include "internal/symbol.h" @@ -171,6 +172,7 @@ struct dump_arg { https://github.com/ruby/ruby/blob/trunk/marshal.c#L172 st_table *data; st_table *compat_tbl; st_table *encodings; + unsigned long num_entries; }; struct dump_call_arg { @@ -754,6 +756,60 @@ w_objivar(VALUE obj, struct dump_call_arg *arg) https://github.com/ruby/ruby/blob/trunk/marshal.c#L756 w_ivar_each(obj, num, arg); } +// Optimized dump for fixnum larger than 31-bits +static void +w_bigfixnum(VALUE obj, struct dump_arg *arg) +{ + RUBY_ASSERT(FIXNUM_P(obj)); + + w_byte(TYPE_BIGNUM, arg); + +#if SIZEOF_LONG == SIZEOF_VALUE + long num, slen_num; + num = FIX2LONG(obj); +#else + long long num, slen_num; + num = NUM2LL(obj); +#endif + + char sign = num < 0 ? '-' : '+'; + w_byte(sign, arg); + + // Guaranteed not to overflow, as FIXNUM is 1-bit less than long + if (num < 0) num = -num; + + // calculate the size in shorts + int slen = 0; + { + slen_num = num; + while (slen_num) { + slen++; + slen_num = SHORTDN(slen_num); + } + } + + RUBY_ASSERT(slen > 0 && slen <= SIZEOF_LONG / 2); + + w_long((long)slen, arg); + + for (int i = 0; i < slen; i++) { + w_short(num & SHORTMASK, arg); + num = SHORTDN(num); + } + + // We aren't adding this object to the link table, but we need to increment + // the index. + arg->num_entries++; + + RUBY_ASSERT(num == 0); +} + +static void +w_remember(VALUE obj, struct dump_arg *arg) +{ + st_add_direct(arg->data, obj, arg->num_entries++); +} + static void w_object(VALUE obj, struct dump_arg *arg, int limit) { @@ -767,17 +823,6 @@ w_object(VALUE obj, struct dump_arg *arg, int limit) https://github.com/ruby/ruby/blob/trunk/marshal.c#L823 rb_raise(rb_eArgError, "exceed depth limit"); } - if (limit > 0) limit--; - c_arg.limit = limit; - c_arg.arg = arg; - c_arg.obj = obj; - - if (st_lookup(arg->data, obj, &num)) { - w_byte(TYPE_LINK, arg); - w_long((long)num, arg); - return; - } - if (NIL_P(obj)) { w_byte(TYPE_NIL, arg); } @@ -797,19 +842,32 @@ w_object(VALUE obj, struct dump_arg *arg, int limit) https://github.com/ruby/ruby/blob/trunk/marshal.c#L842 w_long(FIX2LONG(obj), arg); } else { - w_object(rb_int2big(FIX2LONG(obj)), arg, limit); + w_bigfixnum(obj, arg); } #endif } else if (SYMBOL_P(obj)) { w_symbol(obj, arg); } - else if (FLONUM_P(obj)) { - st_add_direct(arg->data, obj, arg->data->num_entries); - w_byte(TYPE_FLOAT, arg); - w_float(RFLOAT_VALUE(obj), arg); - } else { + if (st_lookup(arg->data, obj, &num)) { + w_byte(TYPE_LINK, arg); + w_long((long)num, arg); + return; + } + + if (limit > 0) limit--; + c_arg.limit = limit; + c_arg.arg = arg; + c_arg.obj = obj; + + if (FLONUM_P(obj)) { + w_remember(obj, arg); + w_byte(TYPE_FLOAT, arg); + w_float(RFLOAT_VALUE(obj), arg); + return; + } + VALUE v; if (!RBASIC_CLASS(obj)) { @@ -818,7 +876,7 @@ w_object(VALUE obj, struct dump_arg *arg, int limit) https://github.com/ruby/ruby/blob/trunk/marshal.c#L876 } if (rb_obj_respond_to(obj, s_mdump, TRUE)) { - st_add_direct(arg->data, obj, arg->data->num_entries); + w_remember(obj, arg); v = dump_funcall(arg, obj, s_mdump, 0, 0); w_class(TYPE_USRMARSHAL, obj, arg, FALSE); @@ -848,11 +906,11 @@ w_object(VALUE obj, struct dump_arg *arg, int limit) https://github.com/ruby/ruby/blob/trunk/marshal.c#L906 if (hasiv) { w_ivar(hasiv, ivobj, encname, &c_arg); } - st_add_direct(arg->data, obj, arg->data->num_entries); + w_remember(obj, arg); return; } - st_add_direct(arg->data, obj, arg->data->num_entries); + w_remember(obj, arg); hasiv = has_ivars(obj, (encname = encoding_name(obj, arg)), &ivobj); { @@ -1044,6 +1102,7 @@ clear_dump_arg(struct dump_arg *arg) https://github.com/ruby/ruby/blob/trunk/marshal.c#L1102 arg->symbols = 0; st_free_table(arg->data); arg->data = 0; + arg->num_entries = 0; if (arg->compat_tbl) { st_free_table(arg->compat_tbl); arg->compat_tbl = 0; @@ -1126,6 +1185,7 @@ rb_marshal_dump_limited(VALUE obj, VALUE port, int limit) https://github.com/ruby/ruby/blob/trunk/marshal.c#L1185 arg->dest = 0; arg->symbols = st_init_numtable(); arg->data = rb_init_identtable(); + arg->num_entries = 0; arg->compat_tbl = 0; arg->encodings = 0; arg->str = rb_str_buf_new(0); @@ -1881,10 +1941,28 @@ r_object_for(struct load_arg *arg, bool partial, int *ivp, VALUE extmod, int typ https://github.com/ruby/ruby/blob/trunk/marshal.c#L1941 sign = r_byte(arg); len = r_long(arg); - data = r_bytes0(len * 2, arg); - v = rb_integer_unpack(RSTRING_PTR(data), len, 2, 0, - INTEGER_PACK_LITTLE_ENDIAN | (sign == '-' ? INTEGER_PACK_NEGATIVE : 0)); - rb_str_resize(data, 0L); + + if (SIZEOF_VALUE >= 8 && len <= 4) { + // Representable within uintptr, likely FIXNUM + VALUE num = 0; + for (int i = 0; i < len; i++) { + num |= (VALUE)r_byte(arg) << (i * 16); + num |= (VALUE)r_byte(arg) << (i * 16 + 8); + } +#if SIZEOF_VALUE == SIZEOF_LONG + v = ULONG2NUM(num); +#else + v = ULL2NUM(num); +#endif + if (sign == '-') { + v = rb_int_uminus(v); + } + } else { + data = r_bytes0(len * 2, arg); + v = rb_integer_unpack(RSTRING_PTR(data), len, 2, 0, + INTEGER_PACK_LITTLE_ENDIAN | (sign == '-' ? INTEGER_PACK_NEGATIVE : 0)); + rb_str_resize(data, 0L); + } v = r_entry(v, arg); (... truncated) -- ML: ruby-changes@q... Info: http://www.atdot.net/~ko1/quickml/