[前][次][番号順一覧][スレッド一覧]

ruby-changes:59982

From: NagayamaRyoga <ko1@a...>
Date: Mon, 10 Feb 2020 01:33:52 +0900 (JST)
Subject: [ruby-changes:59982] 6e5e6a40c4 (master): Deduplicate objects efficiently when dumping iseq to binary

https://git.ruby-lang.org/ruby.git/commit/?id=6e5e6a40c4

From 6e5e6a40c4c35aee1cfb7d0effa47354f80baa9e Mon Sep 17 00:00:00 2001
From: NagayamaRyoga <nagayama15@s...>
Date: Wed, 18 Dec 2019 19:26:02 +0900
Subject: Deduplicate objects efficiently when dumping iseq to binary

We were inefficient in cases where there are a lot of duplicates due to
the use of linear search. Use a hash table instead.

These cases are not that rare in the wild.

[Feature #16505]

diff --git a/compile.c b/compile.c
index 9de9114..8b3d83e 100644
--- a/compile.c
+++ b/compile.c
@@ -9520,7 +9520,8 @@ struct ibf_header { https://github.com/ruby/ruby/blob/trunk/compile.c#L9520
 
 struct ibf_dump_buffer {
     VALUE str;
-    VALUE obj_list;     /* [objs] */
+    VALUE obj_list;      /* [objs] */
+    st_table *obj_table; /* obj -> obj number */
 };
 
 struct ibf_dump {
@@ -9666,25 +9667,26 @@ static VALUE ibf_load_object(const struct ibf_load *load, VALUE object_index); https://github.com/ruby/ruby/blob/trunk/compile.c#L9667
 static rb_iseq_t *ibf_load_iseq(const struct ibf_load *load, const rb_iseq_t *index_iseq);
 
 static VALUE
-ibf_dump_object_list_new(void)
+ibf_dump_object_list_new(st_table **obj_table)
 {
     VALUE obj_list = rb_ary_tmp_new(1);
     rb_ary_push(obj_list, Qnil); /* 0th is nil */
 
+    *obj_table = st_init_numtable(); /* need free */
+    rb_st_insert(*obj_table, (st_data_t)Qnil, (st_data_t)0); /* 0th is nil */
+
     return obj_list;
 }
 
 static VALUE
 ibf_dump_object(struct ibf_dump *dump, VALUE obj)
 {
-    VALUE obj_list = dump->current_buffer->obj_list;
-    long index = RARRAY_LEN(obj_list);
-    long i;
-    for (i=0; i<index; i++) {
-        if (RARRAY_AREF(obj_list, i) == obj) return (VALUE)i; /* dedup */
+    int obj_index = ibf_table_lookup(dump->current_buffer->obj_table, (st_data_t)obj);
+    if (obj_index < 0) {
+        obj_index = ibf_table_index(dump->current_buffer->obj_table, (st_data_t)obj);
+        rb_ary_push(dump->current_buffer->obj_list, obj);
     }
-    rb_ary_push(obj_list, obj);
-    return (VALUE)index;
+    return obj_index;
 }
 
 static VALUE
@@ -10372,7 +10374,7 @@ ibf_dump_iseq_each(struct ibf_dump *dump, const rb_iseq_t *iseq) https://github.com/ruby/ruby/blob/trunk/compile.c#L10374
     struct ibf_dump_buffer *saved_buffer = dump->current_buffer;
     struct ibf_dump_buffer buffer;
     buffer.str = rb_str_new(0, 0);
-    buffer.obj_list = ibf_dump_object_list_new();
+    buffer.obj_list = ibf_dump_object_list_new(&buffer.obj_table);
     dump->current_buffer = &buffer;
 #endif
 
@@ -10477,6 +10479,8 @@ ibf_dump_iseq_each(struct ibf_dump *dump, const rb_iseq_t *iseq) https://github.com/ruby/ruby/blob/trunk/compile.c#L10479
     ibf_dump_write_small_value(dump, local_obj_list_offset);
     ibf_dump_write_small_value(dump, local_obj_list_size);
 
+    rb_st_free_table(buffer.obj_table);
+
     return offset;
 #else
     return body_offset;
@@ -10504,15 +10508,15 @@ ibf_load_iseq_each(struct ibf_load *load, rb_iseq_t *iseq, ibf_offset_t offset) https://github.com/ruby/ruby/blob/trunk/compile.c#L10508
     struct ibf_load_buffer *saved_buffer = load->current_buffer;
     load->current_buffer = &load->global_buffer;
 
-    const ibf_offset_t iseq_start = ibf_load_small_value(load, &reading_pos);
-    const ibf_offset_t iseq_length_bytes = ibf_load_small_value(load, &reading_pos);
-    const ibf_offset_t body_offset = ibf_load_small_value(load, &reading_pos);
+    const ibf_offset_t iseq_start = (ibf_offset_t)ibf_load_small_value(load, &reading_pos);
+    const ibf_offset_t iseq_length_bytes = (ibf_offset_t)ibf_load_small_value(load, &reading_pos);
+    const ibf_offset_t body_offset = (ibf_offset_t)ibf_load_small_value(load, &reading_pos);
 
     struct ibf_load_buffer buffer;
     buffer.buff = load->global_buffer.buff + iseq_start;
     buffer.size = iseq_length_bytes;
-    buffer.obj_list_offset = ibf_load_small_value(load, &reading_pos);
-    buffer.obj_list_size = ibf_load_small_value(load, &reading_pos);
+    buffer.obj_list_offset = (ibf_offset_t)ibf_load_small_value(load, &reading_pos);
+    buffer.obj_list_size = (ibf_offset_t)ibf_load_small_value(load, &reading_pos);
     buffer.obj_list = rb_ary_tmp_new(buffer.obj_list_size);
     rb_ary_resize(buffer.obj_list, buffer.obj_list_size);
 
@@ -11338,6 +11342,10 @@ static void https://github.com/ruby/ruby/blob/trunk/compile.c#L11342
 ibf_dump_free(void *ptr)
 {
     struct ibf_dump *dump = (struct ibf_dump *)ptr;
+    if (dump->global_buffer.obj_table) {
+        st_free_table(dump->global_buffer.obj_table);
+        dump->global_buffer.obj_table = 0;
+    }
     if (dump->iseq_table) {
         st_free_table(dump->iseq_table);
         dump->iseq_table = 0;
@@ -11351,6 +11359,7 @@ ibf_dump_memsize(const void *ptr) https://github.com/ruby/ruby/blob/trunk/compile.c#L11359
     struct ibf_dump *dump = (struct ibf_dump *)ptr;
     size_t size = sizeof(*dump);
     if (dump->iseq_table) size += st_memsize(dump->iseq_table);
+    if (dump->global_buffer.obj_table) size += st_memsize(dump->global_buffer.obj_table);
     return size;
 }
 
@@ -11364,7 +11373,7 @@ static void https://github.com/ruby/ruby/blob/trunk/compile.c#L11373
 ibf_dump_setup(struct ibf_dump *dump, VALUE dumper_obj)
 {
     RB_OBJ_WRITE(dumper_obj, &dump->iseq_list, rb_ary_tmp_new(0));
-    RB_OBJ_WRITE(dumper_obj, &dump->global_buffer.obj_list, ibf_dump_object_list_new());
+    RB_OBJ_WRITE(dumper_obj, &dump->global_buffer.obj_list, ibf_dump_object_list_new(&dump->global_buffer.obj_table));
     RB_OBJ_WRITE(dumper_obj, &dump->global_buffer.str, rb_str_new(0, 0));
     dump->iseq_table = st_init_numtable(); /* need free */
 
-- 
cgit v0.10.2


--
ML: ruby-changes@q...
Info: http://www.atdot.net/~ko1/quickml/

[前][次][番号順一覧][スレッド一覧]