ruby-changes:57810
From: NagayamaRyoga <ko1@a...>
Date: Thu, 19 Sep 2019 17:35:50 +0900 (JST)
Subject: [ruby-changes:57810] 20baa08d65 (master): Improve the output of `RubyVM::InstructionSequence#to_binary` (#2450)
https://git.ruby-lang.org/ruby.git/commit/?id=20baa08d65 From 20baa08d652b844806fab424a2a590408ab613ef Mon Sep 17 00:00:00 2001 From: NagayamaRyoga <38316184+NagayamaRyoga@u...> Date: Thu, 19 Sep 2019 17:35:32 +0900 Subject: Improve the output of `RubyVM::InstructionSequence#to_binary` (#2450) The output of RubyVM::InstructionSequence#to_binary is extremely large. We have reduced the output of #to_binary by more than 70%. The execution speed of RubyVM::InstructionSequence.load_from_binary is about 7% slower, but when reading a binary from a file, it may be faster than the master. Since Bootsnap gem uses #to_binary, this proposal reduces the compilation cache size of Rails projects to about 1/4. See details: [Feature #16163] diff --git a/compile.c b/compile.c index e1cd82c..eabfa65 100644 --- a/compile.c +++ b/compile.c @@ -9181,12 +9181,16 @@ rb_method_for_self_aset(VALUE name, VALUE arg, rb_insn_func_t func) https://github.com/ruby/ruby/blob/trunk/compile.c#L9181 #define IBF_ISEQ_DEBUG 0 #endif +#ifndef IBF_ISEQ_ENABLE_LOCAL_BUFFER +#define IBF_ISEQ_ENABLE_LOCAL_BUFFER 0 +#endif + typedef unsigned int ibf_offset_t; #define IBF_OFFSET(ptr) ((ibf_offset_t)(VALUE)(ptr)) #define IBF_MAJOR_VERSION ISEQ_MAJOR_VERSION #if RUBY_DEVEL -#define IBF_DEVEL_VERSION 0 +#define IBF_DEVEL_VERSION 1 #define IBF_MINOR_VERSION (ISEQ_MINOR_VERSION * 10000 + IBF_DEVEL_VERSION) #else #define IBF_MINOR_VERSION ISEQ_MINOR_VERSION @@ -9200,39 +9204,48 @@ struct ibf_header { https://github.com/ruby/ruby/blob/trunk/compile.c#L9204 unsigned int extra_size; unsigned int iseq_list_size; - unsigned int id_list_size; - unsigned int object_list_size; - + unsigned int global_object_list_size; ibf_offset_t iseq_list_offset; - ibf_offset_t id_list_offset; - ibf_offset_t object_list_offset; + ibf_offset_t global_object_list_offset; }; -struct ibf_dump { +struct ibf_dump_buffer { VALUE str; - VALUE iseq_list; /* [iseq0 offset, ...] */ - VALUE obj_list; /* [objs] */ + VALUE obj_list; /* [objs] */ +}; + +struct ibf_dump { + VALUE iseq_list; /* [iseqs] */ st_table *iseq_table; /* iseq -> iseq number */ - st_table *id_table; /* id -> id number */ + struct ibf_dump_buffer global_buffer; + struct ibf_dump_buffer *current_buffer; }; rb_iseq_t * iseq_alloc(void); +struct ibf_load_buffer { + const char *buff; + ibf_offset_t size; + + VALUE obj_list; /* [obj0, ...] */ + unsigned int obj_list_size; + ibf_offset_t obj_list_offset; +}; + struct ibf_load { - const RUBY_ALIGNAS(SIZEOF_VALUE) char *buff; const struct ibf_header *header; - ID *id_list; /* [id0, ...] */ - VALUE iseq_list; /* [iseq0, ...] */ - VALUE obj_list; /* [obj0, ...] */ + VALUE iseq_list; /* [iseq0, ...] */ + struct ibf_load_buffer global_buffer; VALUE loader_obj; - VALUE str; rb_iseq_t *iseq; + VALUE str; + struct ibf_load_buffer *current_buffer; }; static ibf_offset_t ibf_dump_pos(struct ibf_dump *dump) { - long pos = RSTRING_LEN(dump->str); + long pos = RSTRING_LEN(dump->current_buffer->str); #if SIZEOF_LONG > SIZEOF_INT if (pos >= UINT_MAX) { rb_raise(rb_eRuntimeError, "dump size exceeds"); @@ -9254,9 +9267,9 @@ ibf_dump_align(struct ibf_dump *dump, size_t align) https://github.com/ruby/ruby/blob/trunk/compile.c#L9267 } #endif for (; size > sizeof(padding); size -= sizeof(padding)) { - rb_str_cat(dump->str, padding, sizeof(padding)); + rb_str_cat(dump->current_buffer->str, padding, sizeof(padding)); } - rb_str_cat(dump->str, padding, size); + rb_str_cat(dump->current_buffer->str, padding, size); } } @@ -9264,18 +9277,24 @@ static ibf_offset_t https://github.com/ruby/ruby/blob/trunk/compile.c#L9277 ibf_dump_write(struct ibf_dump *dump, const void *buff, unsigned long size) { ibf_offset_t pos = ibf_dump_pos(dump); - rb_str_cat(dump->str, (const char *)buff, size); + rb_str_cat(dump->current_buffer->str, (const char *)buff, size); /* TODO: overflow check */ return pos; } +static ibf_offset_t +ibf_dump_write_byte(struct ibf_dump *dump, unsigned char byte) +{ + return ibf_dump_write(dump, &byte, sizeof(unsigned char)); +} + static void ibf_dump_overwrite(struct ibf_dump *dump, void *buff, unsigned int size, long offset) { - VALUE str = dump->str; + VALUE str = dump->current_buffer->str; char *ptr = RSTRING_PTR(str); if ((unsigned long)(size + offset) > (unsigned long)RSTRING_LEN(str)) - rb_bug("ibf_dump_overwrite: overflow"); + rb_bug("ibf_dump_overwrite: overflow"); memcpy(ptr + offset, buff, size); } @@ -9283,7 +9302,7 @@ static void * https://github.com/ruby/ruby/blob/trunk/compile.c#L9302 ibf_load_alloc(const struct ibf_load *load, ibf_offset_t offset, int size) { void *buff = ruby_xmalloc(size); - memcpy(buff, load->buff + offset, size); + memcpy(buff, load->current_buffer->buff + offset, size); return buff; } @@ -9301,10 +9320,10 @@ ibf_table_lookup(struct st_table *table, st_data_t key) https://github.com/ruby/ruby/blob/trunk/compile.c#L9320 st_data_t val; if (st_lookup(table, key, &val)) { - return (int)val; + return (int)val; } else { - return -1; + return -1; } } @@ -9314,8 +9333,8 @@ ibf_table_index(struct st_table *table, st_data_t key) https://github.com/ruby/ruby/blob/trunk/compile.c#L9333 int index = ibf_table_lookup(table, key); if (index < 0) { /* not found */ - index = (int)table->num_entries; - st_insert(table, key, (st_data_t)index); + index = (int)table->num_entries; + st_insert(table, key, (st_data_t)index); } return index; @@ -9323,47 +9342,50 @@ ibf_table_index(struct st_table *table, st_data_t key) https://github.com/ruby/ruby/blob/trunk/compile.c#L9342 /* dump/load generic */ +static void ibf_dump_object_list(struct ibf_dump *dump, ibf_offset_t *obj_list_offset, unsigned int *obj_list_size); + static VALUE ibf_load_object(const struct ibf_load *load, VALUE object_index); static rb_iseq_t *ibf_load_iseq(const struct ibf_load *load, const rb_iseq_t *index_iseq); static VALUE +ibf_dump_object_list_new(void) +{ + VALUE obj_list = rb_ary_tmp_new(1); + rb_ary_push(obj_list, Qnil); /* 0th is nil */ + + return obj_list; +} + +static VALUE ibf_dump_object(struct ibf_dump *dump, VALUE obj) { - long index = RARRAY_LEN(dump->obj_list); + VALUE obj_list = dump->current_buffer->obj_list; + long index = RARRAY_LEN(obj_list); long i; for (i=0; i<index; i++) { - if (RARRAY_AREF(dump->obj_list, i) == obj) return (VALUE)i; /* dedup */ + if (RARRAY_AREF(obj_list, i) == obj) return (VALUE)i; /* dedup */ } - rb_ary_push(dump->obj_list, obj); + rb_ary_push(obj_list, obj); return (VALUE)index; } static VALUE ibf_dump_id(struct ibf_dump *dump, ID id) { - return (VALUE)ibf_table_index(dump->id_table, (st_data_t)id); + if (id == 0 || rb_id2name(id) == NULL) { + return 0; + } + return ibf_dump_object(dump, rb_id2sym(id)); } static ID ibf_load_id(const struct ibf_load *load, const ID id_index) { - ID id; - if (id_index == 0) { - id = 0; - } - else { - id = load->id_list[(long)id_index]; - - if (id == 0) { - long *indices = (long *)(load->buff + load->header->id_list_offset); - VALUE str = ibf_load_object(load, indices[id_index]); - id = NIL_P(str) ? 0 : rb_intern_str(str); /* str == nil -> internal junk id */ - load->id_list[(long)id_index] = id; - } + return 0; } - - return id; + VALUE sym = ibf_load_object(load, id_index); + return rb_sym2id(sym); } /* dump/load: code */ @@ -9376,19 +9398,19 @@ ibf_dump_callinfo(struct ibf_dump *dump, const struct rb_call_info *ci) https://github.com/ruby/ruby/blob/trunk/compile.c#L9398 static ibf_offset_t ibf_dump_iseq_each(struct ibf_dump *dump, const rb_iseq_t *iseq); -static rb_iseq_t * +static int ibf_dump_iseq(struct ibf_dump *dump, const rb_iseq_t *iseq) { if (iseq == NULL) { - return (rb_iseq_t *)-1; + return -1; } else { - int iseq_index = ibf_table_lookup(dump->iseq_table, (st_data_t)iseq); - if (iseq_index < 0) { - iseq_index = ibf_table_index(dump->iseq_table, (st_data_t)iseq); - rb_ary_store(dump->iseq_list, iseq_index, LONG2NUM(ibf_dump_iseq_each(dump, rb_iseq_check(iseq)))); - } - return (rb_iseq_t *)(VALUE)iseq_index; + int iseq_index = ibf_table_lookup(dump->iseq_table, (st_data_t)iseq); + if (iseq_index < 0) { + iseq_index = ibf_table_index(dump->iseq_table, (st_data_t)iseq); + rb_ary_push(dump->iseq_list, (VALUE)iseq); + } + return iseq_index; } } @@ -9405,78 +9427,165 @@ ibf_load_gentry(const struct ibf_load *load, const struct rb_global_entry *entry https://github.com/ruby/ruby/blob/trunk/compile.c#L9427 return (VALUE)rb_global_entry(gid); } -static VALUE * +static unsigned char +ibf_load_byte(const struct ibf_load *load, ibf_offset_t *offset) +{ + if (*offset >= load->current_buffer->size) { rb_raise(rb_eRuntimeError, "invalid bytecode"); } + return (unsigned char)load->current_buffer->buff[(*offset)++]; +} + +/* + * Small uint serialization + * 0x00000000_00000000 - 0x00000000_0000007f: 1byte | XXXX XXX1 | + * 0x00000000_00000080 - 0x00000000_00003fff: 2byte | XXXX XX10 | XXXX XXXX | + * 0x00000000_00004000 - 0x00000000_001fffff: 3byte | XXXX X100 | XXXX XXXX | XXXX XXXX | + * 0x00000000_00020000 - 0x00000000_0fffffff: 4byte | XXXX 1000 | XXXX XXXX | XXXX XXXX | XXXX XXXX | + * ... + * 0x00010000_00000000 - 0x00ffffff_ffffffff: 8byte | 1000 0000 | XXXX XXXX | XXXX XXXX | XXXX XXXX | XXXX XXXX | XXXX XXXX | XXXX XXXX | XXXX XXXX | + * 0x01000000_00000000 - 0xffffffff_ffffffff: 9byte | 0000 0000 | XXXX XXXX | XXXX XXXX | XXXX XXXX | XXXX XXXX | XXXX XXXX | XXXX XXXX | XXXX XXXX | X (... truncated) -- ML: ruby-changes@q... Info: http://www.atdot.net/~ko1/quickml/