[前][次][番号順一覧][スレッド一覧]

ruby-changes:57810

From: NagayamaRyoga <ko1@a...>
Date: Thu, 19 Sep 2019 17:35:50 +0900 (JST)
Subject: [ruby-changes:57810] 20baa08d65 (master): Improve the output of `RubyVM::InstructionSequence#to_binary` (#2450)

https://git.ruby-lang.org/ruby.git/commit/?id=20baa08d65

From 20baa08d652b844806fab424a2a590408ab613ef Mon Sep 17 00:00:00 2001
From: NagayamaRyoga <38316184+NagayamaRyoga@u...>
Date: Thu, 19 Sep 2019 17:35:32 +0900
Subject: Improve the output of `RubyVM::InstructionSequence#to_binary` (#2450)

The output of RubyVM::InstructionSequence#to_binary is extremely large.
We have reduced the output of #to_binary by more than 70%.

The execution speed of RubyVM::InstructionSequence.load_from_binary is about 7% slower, but when reading a binary from a file, it may be faster than the master.

Since Bootsnap gem uses #to_binary, this proposal reduces the compilation cache size of Rails projects to about 1/4.

See details: [Feature #16163]

diff --git a/compile.c b/compile.c
index e1cd82c..eabfa65 100644
--- a/compile.c
+++ b/compile.c
@@ -9181,12 +9181,16 @@ rb_method_for_self_aset(VALUE name, VALUE arg, rb_insn_func_t func) https://github.com/ruby/ruby/blob/trunk/compile.c#L9181
 #define IBF_ISEQ_DEBUG 0
 #endif
 
+#ifndef IBF_ISEQ_ENABLE_LOCAL_BUFFER
+#define IBF_ISEQ_ENABLE_LOCAL_BUFFER 0
+#endif
+
 typedef unsigned int ibf_offset_t;
 #define IBF_OFFSET(ptr) ((ibf_offset_t)(VALUE)(ptr))
 
 #define IBF_MAJOR_VERSION ISEQ_MAJOR_VERSION
 #if RUBY_DEVEL
-#define IBF_DEVEL_VERSION 0
+#define IBF_DEVEL_VERSION 1
 #define IBF_MINOR_VERSION (ISEQ_MINOR_VERSION * 10000 + IBF_DEVEL_VERSION)
 #else
 #define IBF_MINOR_VERSION ISEQ_MINOR_VERSION
@@ -9200,39 +9204,48 @@ struct ibf_header { https://github.com/ruby/ruby/blob/trunk/compile.c#L9204
     unsigned int extra_size;
 
     unsigned int iseq_list_size;
-    unsigned int id_list_size;
-    unsigned int object_list_size;
-
+    unsigned int global_object_list_size;
     ibf_offset_t iseq_list_offset;
-    ibf_offset_t id_list_offset;
-    ibf_offset_t object_list_offset;
+    ibf_offset_t global_object_list_offset;
 };
 
-struct ibf_dump {
+struct ibf_dump_buffer {
     VALUE str;
-    VALUE iseq_list;      /* [iseq0 offset, ...] */
-    VALUE obj_list;       /* [objs] */
+    VALUE obj_list;     /* [objs] */
+};
+
+struct ibf_dump {
+    VALUE iseq_list;      /* [iseqs] */
     st_table *iseq_table; /* iseq -> iseq number */
-    st_table *id_table;   /* id -> id number */
+    struct ibf_dump_buffer global_buffer;
+    struct ibf_dump_buffer *current_buffer;
 };
 
 rb_iseq_t * iseq_alloc(void);
 
+struct ibf_load_buffer {
+    const char *buff;
+    ibf_offset_t size;
+
+    VALUE obj_list; /* [obj0, ...] */
+    unsigned int obj_list_size;
+    ibf_offset_t obj_list_offset;
+};
+
 struct ibf_load {
-    const RUBY_ALIGNAS(SIZEOF_VALUE) char *buff;
     const struct ibf_header *header;
-    ID *id_list;     /* [id0, ...] */
-    VALUE iseq_list; /* [iseq0, ...] */
-    VALUE obj_list;  /* [obj0, ...] */
+    VALUE iseq_list;       /* [iseq0, ...] */
+    struct ibf_load_buffer global_buffer;
     VALUE loader_obj;
-    VALUE str;
     rb_iseq_t *iseq;
+    VALUE str;
+    struct ibf_load_buffer *current_buffer;
 };
 
 static ibf_offset_t
 ibf_dump_pos(struct ibf_dump *dump)
 {
-    long pos = RSTRING_LEN(dump->str);
+    long pos = RSTRING_LEN(dump->current_buffer->str);
 #if SIZEOF_LONG > SIZEOF_INT
     if (pos >= UINT_MAX) {
         rb_raise(rb_eRuntimeError, "dump size exceeds");
@@ -9254,9 +9267,9 @@ ibf_dump_align(struct ibf_dump *dump, size_t align) https://github.com/ruby/ruby/blob/trunk/compile.c#L9267
         }
 #endif
         for (; size > sizeof(padding); size -= sizeof(padding)) {
-            rb_str_cat(dump->str, padding, sizeof(padding));
+            rb_str_cat(dump->current_buffer->str, padding, sizeof(padding));
         }
-        rb_str_cat(dump->str, padding, size);
+        rb_str_cat(dump->current_buffer->str, padding, size);
     }
 }
 
@@ -9264,18 +9277,24 @@ static ibf_offset_t https://github.com/ruby/ruby/blob/trunk/compile.c#L9277
 ibf_dump_write(struct ibf_dump *dump, const void *buff, unsigned long size)
 {
     ibf_offset_t pos = ibf_dump_pos(dump);
-    rb_str_cat(dump->str, (const char *)buff, size);
+    rb_str_cat(dump->current_buffer->str, (const char *)buff, size);
     /* TODO: overflow check */
     return pos;
 }
 
+static ibf_offset_t
+ibf_dump_write_byte(struct ibf_dump *dump, unsigned char byte)
+{
+    return ibf_dump_write(dump, &byte, sizeof(unsigned char));
+}
+
 static void
 ibf_dump_overwrite(struct ibf_dump *dump, void *buff, unsigned int size, long offset)
 {
-    VALUE str = dump->str;
+    VALUE str = dump->current_buffer->str;
     char *ptr = RSTRING_PTR(str);
     if ((unsigned long)(size + offset) > (unsigned long)RSTRING_LEN(str))
-	rb_bug("ibf_dump_overwrite: overflow");
+        rb_bug("ibf_dump_overwrite: overflow");
     memcpy(ptr + offset, buff, size);
 }
 
@@ -9283,7 +9302,7 @@ static void * https://github.com/ruby/ruby/blob/trunk/compile.c#L9302
 ibf_load_alloc(const struct ibf_load *load, ibf_offset_t offset, int size)
 {
     void *buff = ruby_xmalloc(size);
-    memcpy(buff, load->buff + offset, size);
+    memcpy(buff, load->current_buffer->buff + offset, size);
     return buff;
 }
 
@@ -9301,10 +9320,10 @@ ibf_table_lookup(struct st_table *table, st_data_t key) https://github.com/ruby/ruby/blob/trunk/compile.c#L9320
     st_data_t val;
 
     if (st_lookup(table, key, &val)) {
-	return (int)val;
+        return (int)val;
     }
     else {
-	return -1;
+        return -1;
     }
 }
 
@@ -9314,8 +9333,8 @@ ibf_table_index(struct st_table *table, st_data_t key) https://github.com/ruby/ruby/blob/trunk/compile.c#L9333
     int index = ibf_table_lookup(table, key);
 
     if (index < 0) { /* not found */
-	index = (int)table->num_entries;
-	st_insert(table, key, (st_data_t)index);
+        index = (int)table->num_entries;
+        st_insert(table, key, (st_data_t)index);
     }
 
     return index;
@@ -9323,47 +9342,50 @@ ibf_table_index(struct st_table *table, st_data_t key) https://github.com/ruby/ruby/blob/trunk/compile.c#L9342
 
 /* dump/load generic */
 
+static void ibf_dump_object_list(struct ibf_dump *dump, ibf_offset_t *obj_list_offset, unsigned int *obj_list_size);
+
 static VALUE ibf_load_object(const struct ibf_load *load, VALUE object_index);
 static rb_iseq_t *ibf_load_iseq(const struct ibf_load *load, const rb_iseq_t *index_iseq);
 
 static VALUE
+ibf_dump_object_list_new(void)
+{
+    VALUE obj_list = rb_ary_tmp_new(1);
+    rb_ary_push(obj_list, Qnil); /* 0th is nil */
+
+    return obj_list;
+}
+
+static VALUE
 ibf_dump_object(struct ibf_dump *dump, VALUE obj)
 {
-    long index = RARRAY_LEN(dump->obj_list);
+    VALUE obj_list = dump->current_buffer->obj_list;
+    long index = RARRAY_LEN(obj_list);
     long i;
     for (i=0; i<index; i++) {
-	if (RARRAY_AREF(dump->obj_list, i) == obj) return (VALUE)i; /* dedup */
+        if (RARRAY_AREF(obj_list, i) == obj) return (VALUE)i; /* dedup */
     }
-    rb_ary_push(dump->obj_list, obj);
+    rb_ary_push(obj_list, obj);
     return (VALUE)index;
 }
 
 static VALUE
 ibf_dump_id(struct ibf_dump *dump, ID id)
 {
-    return (VALUE)ibf_table_index(dump->id_table, (st_data_t)id);
+    if (id == 0 || rb_id2name(id) == NULL) {
+        return 0;
+    }
+    return ibf_dump_object(dump, rb_id2sym(id));
 }
 
 static ID
 ibf_load_id(const struct ibf_load *load, const ID id_index)
 {
-    ID id;
-
     if (id_index == 0) {
-	id = 0;
-    }
-    else {
-	id = load->id_list[(long)id_index];
-
-	if (id == 0) {
-	    long *indices = (long *)(load->buff + load->header->id_list_offset);
-	    VALUE str = ibf_load_object(load, indices[id_index]);
-	    id = NIL_P(str) ? 0 : rb_intern_str(str); /* str == nil -> internal junk id */
-	    load->id_list[(long)id_index] = id;
-	}
+        return 0;
     }
-
-    return id;
+    VALUE sym = ibf_load_object(load, id_index);
+    return rb_sym2id(sym);
 }
 
 /* dump/load: code */
@@ -9376,19 +9398,19 @@ ibf_dump_callinfo(struct ibf_dump *dump, const struct rb_call_info *ci) https://github.com/ruby/ruby/blob/trunk/compile.c#L9398
 
 static ibf_offset_t ibf_dump_iseq_each(struct ibf_dump *dump, const rb_iseq_t *iseq);
 
-static rb_iseq_t *
+static int
 ibf_dump_iseq(struct ibf_dump *dump, const rb_iseq_t *iseq)
 {
     if (iseq == NULL) {
-	return (rb_iseq_t *)-1;
+        return -1;
     }
     else {
-	int iseq_index = ibf_table_lookup(dump->iseq_table, (st_data_t)iseq);
-	if (iseq_index < 0) {
-	    iseq_index = ibf_table_index(dump->iseq_table, (st_data_t)iseq);
-	    rb_ary_store(dump->iseq_list, iseq_index, LONG2NUM(ibf_dump_iseq_each(dump, rb_iseq_check(iseq))));
-	}
-	return (rb_iseq_t *)(VALUE)iseq_index;
+        int iseq_index = ibf_table_lookup(dump->iseq_table, (st_data_t)iseq);
+        if (iseq_index < 0) {
+            iseq_index = ibf_table_index(dump->iseq_table, (st_data_t)iseq);
+            rb_ary_push(dump->iseq_list, (VALUE)iseq);
+        }
+        return iseq_index;
     }
 }
 
@@ -9405,78 +9427,165 @@ ibf_load_gentry(const struct ibf_load *load, const struct rb_global_entry *entry https://github.com/ruby/ruby/blob/trunk/compile.c#L9427
     return (VALUE)rb_global_entry(gid);
 }
 
-static VALUE *
+static unsigned char
+ibf_load_byte(const struct ibf_load *load, ibf_offset_t *offset)
+{
+    if (*offset >= load->current_buffer->size) { rb_raise(rb_eRuntimeError, "invalid bytecode"); }
+    return (unsigned char)load->current_buffer->buff[(*offset)++];
+}
+
+/*
+ * Small uint serialization
+ * 0x00000000_00000000 - 0x00000000_0000007f: 1byte | XXXX XXX1 |
+ * 0x00000000_00000080 - 0x00000000_00003fff: 2byte | XXXX XX10 | XXXX XXXX |
+ * 0x00000000_00004000 - 0x00000000_001fffff: 3byte | XXXX X100 | XXXX XXXX | XXXX XXXX |
+ * 0x00000000_00020000 - 0x00000000_0fffffff: 4byte | XXXX 1000 | XXXX XXXX | XXXX XXXX | XXXX XXXX |
+ * ...
+ * 0x00010000_00000000 - 0x00ffffff_ffffffff: 8byte | 1000 0000 | XXXX XXXX | XXXX XXXX | XXXX XXXX | XXXX XXXX | XXXX XXXX | XXXX XXXX | XXXX XXXX |
+ * 0x01000000_00000000 - 0xffffffff_ffffffff: 9byte | 0000 0000 | XXXX XXXX | XXXX XXXX | XXXX XXXX | XXXX XXXX | XXXX XXXX | XXXX XXXX | XXXX XXXX | X (... truncated)

--
ML: ruby-changes@q...
Info: http://www.atdot.net/~ko1/quickml/

[前][次][番号順一覧][スレッド一覧]