ruby-changes:22176
From: nari <ko1@a...>
Date: Sat, 7 Jan 2012 23:02:34 +0900 (JST)
Subject: [ruby-changes:22176] nari:r34225 (trunk): * gc.c: use Bitmap Marking algorithm to avoid copy-on-write of
nari 2012-01-07 23:02:23 +0900 (Sat, 07 Jan 2012) New Revision: 34225 http://svn.ruby-lang.org/cgi-bin/viewvc.cgi?view=rev&revision=34225 Log: * gc.c: use Bitmap Marking algorithm to avoid copy-on-write of memory pages. See [ruby-dev:45085] [Feature #5839] [ruby-core:41916]. * include/ruby/ruby.h : FL_MARK rename to FL_RESERVED1. * node.h : ditto. * debug.c : ditto. * object.c (rb_obj_clone): FL_MARK move to a bitmap. * class.c (rb_singleton_class_clone): ditto. Modified files: trunk/ChangeLog trunk/class.c trunk/debug.c trunk/gc.c trunk/include/ruby/ruby.h trunk/node.h trunk/object.c Index: debug.c =================================================================== --- debug.c (revision 34224) +++ debug.c (revision 34225) @@ -32,8 +32,8 @@ RUBY_ENC_CODERANGE_7BIT = ENC_CODERANGE_7BIT, RUBY_ENC_CODERANGE_VALID = ENC_CODERANGE_VALID, RUBY_ENC_CODERANGE_BROKEN = ENC_CODERANGE_BROKEN, - RUBY_FL_MARK = FL_MARK, - RUBY_FL_RESERVED = FL_RESERVED, + RUBY_FL_RESERVED1 = FL_RESERVED1, + RUBY_FL_RESERVED2 = FL_RESERVED2, RUBY_FL_FINALIZE = FL_FINALIZE, RUBY_FL_TAINT = FL_TAINT, RUBY_FL_UNTRUSTED = FL_UNTRUSTED, Index: include/ruby/ruby.h =================================================================== --- include/ruby/ruby.h (revision 34224) +++ include/ruby/ruby.h (revision 34225) @@ -933,8 +933,8 @@ #define RCOMPLEX(obj) (R_CAST(RComplex)(obj)) #define FL_SINGLETON FL_USER0 -#define FL_MARK (((VALUE)1)<<5) -#define FL_RESERVED (((VALUE)1)<<6) /* will be used in the future GC */ +#define FL_RESERVED1 (((VALUE)1)<<5) +#define FL_RESERVED2 (((VALUE)1)<<6) /* will be used in the future GC */ #define FL_FINALIZE (((VALUE)1)<<7) #define FL_TAINT (((VALUE)1)<<8) #define FL_UNTRUSTED (((VALUE)1)<<9) Index: ChangeLog =================================================================== --- ChangeLog (revision 34224) +++ ChangeLog (revision 34225) @@ -1,3 +1,19 @@ +Sat Jan 7 22:25:50 2012 Narihiro Nakamura <authornari@g...> + + * gc.c: use Bitmap Marking algorithm to avoid copy-on-write of + memory pages. See [ruby-dev:45085] [Feature #5839] + [ruby-core:41916]. + + * include/ruby/ruby.h : FL_MARK rename to FL_RESERVED1. + + * node.h : ditto. + + * debug.c : ditto. + + * object.c (rb_obj_clone): FL_MARK move to a bitmap. + + * class.c (rb_singleton_class_clone): ditto. + Sat Jan 7 00:47:07 2012 CHIKANAGA Tomoyuki <nagachika00@g...> * configure.in: always define CANONICALIZATION_FOR_MATHN. Index: object.c =================================================================== --- object.c (revision 34224) +++ object.c (revision 34225) @@ -285,7 +285,7 @@ if (FL_TEST(singleton, FL_SINGLETON)) { rb_singleton_class_attached(singleton, clone); } - RBASIC(clone)->flags = (RBASIC(obj)->flags | FL_TEST(clone, FL_TAINT) | FL_TEST(clone, FL_UNTRUSTED)) & ~(FL_FREEZE|FL_FINALIZE|FL_MARK); + RBASIC(clone)->flags = (RBASIC(obj)->flags | FL_TEST(clone, FL_TAINT) | FL_TEST(clone, FL_UNTRUSTED)) & ~(FL_FREEZE|FL_FINALIZE); init_copy(clone, obj); rb_funcall(clone, id_init_clone, 1, obj); RBASIC(clone)->flags |= RBASIC(obj)->flags & FL_FREEZE; Index: gc.c =================================================================== --- gc.c (revision 34224) +++ gc.c (revision 34225) @@ -24,6 +24,8 @@ #include <stdio.h> #include <setjmp.h> #include <sys/types.h> +#include <malloc.h> +#include <assert.h> #ifdef HAVE_SYS_TIME_H #include <sys/time.h> @@ -306,11 +308,14 @@ #endif struct heaps_slot { - void *membase; RVALUE *slot; size_t limit; + uintptr_t *bits; + RVALUE *freelist; struct heaps_slot *next; struct heaps_slot *prev; + struct heaps_slot *free_next; + struct heaps_slot *free_prev; }; struct sorted_heaps_slot { @@ -319,6 +324,10 @@ struct heaps_slot *slot; }; +struct heaps_free_bitmap { + struct heaps_free_bitmap *next; +}; + struct gc_list { VALUE *varptr; struct gc_list *next; @@ -341,10 +350,11 @@ size_t increment; struct heaps_slot *ptr; struct heaps_slot *sweep_slots; + struct heaps_slot *free_slots; struct sorted_heaps_slot *sorted; size_t length; size_t used; - RVALUE *freelist; + struct heaps_free_bitmap *free_bitmap; RVALUE *range[2]; RVALUE *freed; size_t live_num; @@ -393,7 +403,6 @@ #define heaps objspace->heap.ptr #define heaps_length objspace->heap.length #define heaps_used objspace->heap.used -#define freelist objspace->heap.freelist #define lomem objspace->heap.range[0] #define himem objspace->heap.range[1] #define heaps_inc objspace->heap.increment @@ -416,6 +425,8 @@ #define nonspecial_obj_id(obj) (VALUE)((SIGNED_VALUE)(obj)|FIXNUM_FLAG) +#define HEAP_SLOT(p) ((struct heaps_slot *)(p)) + static void rb_objspace_call_finalizer(rb_objspace_t *objspace); #if defined(ENABLE_VM_OBJSPACE) && ENABLE_VM_OBJSPACE @@ -478,6 +489,7 @@ static void gc_sweep(rb_objspace_t *); static void slot_sweep(rb_objspace_t *, struct heaps_slot *); static void gc_clear_mark_on_sweep_slots(rb_objspace_t *); +static void aligned_free(void *); void rb_objspace_free(rb_objspace_t *objspace) @@ -495,11 +507,18 @@ free(list); } } + if (objspace->heap.free_bitmap) { + struct heaps_free_bitmap *list, *next; + for (list = objspace->heap.free_bitmap; list; list = next) { + next = list->next; + free(list); + } + } if (objspace->heap.sorted) { size_t i; for (i = 0; i < heaps_used; ++i) { - free(objspace->heap.sorted[i].slot->membase); - free(objspace->heap.sorted[i].slot); + free(objspace->heap.sorted[i].slot->bits); + aligned_free(objspace->heap.sorted[i].slot); } free(objspace->heap.sorted); heaps_used = 0; @@ -509,24 +528,24 @@ } #endif -/* tiny heap size */ -/* 32KB */ -/*#define HEAP_SIZE 0x8000 */ -/* 128KB */ -/*#define HEAP_SIZE 0x20000 */ -/* 64KB */ -/*#define HEAP_SIZE 0x10000 */ -/* 16KB */ -#define HEAP_SIZE 0x4000 -/* 8KB */ -/*#define HEAP_SIZE 0x2000 */ -/* 4KB */ -/*#define HEAP_SIZE 0x1000 */ -/* 2KB */ -/*#define HEAP_SIZE 0x800 */ +/* tiny heap size: 16KB */ +#define HEAP_ALIGN_LOG 14 +#define HEAP_ALIGN 0x4000 +#define HEAP_ALIGN_MASK 0x3fff +#define HEAP_SIZE HEAP_ALIGN -#define HEAP_OBJ_LIMIT (HEAP_SIZE / (unsigned int)sizeof(struct RVALUE)) +#define HEAP_OBJ_LIMIT (HEAP_SIZE/(unsigned int)sizeof(struct RVALUE) - (sizeof(struct heaps_slot)/(unsigned int)sizeof(struct RVALUE)+1)) +#define HEAP_BITMAP_LIMIT (HEAP_OBJ_LIMIT/sizeof(uintptr_t)+1) +#define GET_HEAP_SLOT(x) (HEAP_SLOT(((uintptr_t)x) & ~(HEAP_ALIGN_MASK))) +#define GET_HEAP_BITMAP(x) (GET_HEAP_SLOT(x)->bits) +#define NUM_IN_SLOT(p) (((uintptr_t)p & HEAP_ALIGN_MASK)/sizeof(RVALUE)) +#define BITMAP_INDEX(p) (NUM_IN_SLOT(p) / (sizeof(uintptr_t) * 8)) +#define BITMAP_OFFSET(p) (NUM_IN_SLOT(p) & ((sizeof(uintptr_t) * 8)-1)) +#define MARKED_IN_BITMAP(bits, p) (bits[BITMAP_INDEX(p)] & ((uintptr_t)1 << BITMAP_OFFSET(p))) +#define MARK_IN_BITMAP(bits, p) (bits[BITMAP_INDEX(p)] = bits[BITMAP_INDEX(p)] | ((uintptr_t)1 << BITMAP_OFFSET(p))) +#define CLEAR_IN_BITMAP(bits, p) (bits[BITMAP_INDEX(p)] &= ~((uintptr_t)1 << BITMAP_OFFSET(p))) + extern st_table *rb_class_tbl; int ruby_disable_gc_stress = 0; @@ -998,14 +1017,15 @@ } } - static void allocate_sorted_heaps(rb_objspace_t *objspace, size_t next_heaps_length) { struct sorted_heaps_slot *p; - size_t size; + struct heaps_free_bitmap *bits; + size_t size, add, i; size = next_heaps_length*sizeof(struct sorted_heaps_slot); + add = next_heaps_length - heaps_used; if (heaps_used > 0) { p = (struct sorted_heaps_slot *)realloc(objspace->heap.sorted, size); @@ -1019,10 +1039,76 @@ during_gc = 0; rb_memerror(); } - heaps_length = next_heaps_length; + + for (i = 0; i < add; i++) { + bits = (struct heaps_free_bitmap *)malloc(HEAP_BITMAP_LIMIT * sizeof(uintptr_t)); + if (bits == 0) { + during_gc = 0; + rb_memerror(); + return; + } + bits->next = objspace->heap.free_bitmap; + objspace->heap.free_bitmap = bits; + } } +static void * +aligned_malloc(size_t aligned_size) +{ + void *res; + +#if __MINGW32__ + res = __mingw_aligned_malloc(aligned_size, aligned_size); +#elif _WIN32 || defined __CYGWIN__ + res = _aligned_malloc(aligned_size, aligned_size); +#else +# if _POSIX_C_SOURCE >= 200112L || _XOPEN_SOURCE >= 600 + if (posix_memalign(&res, aligned_size, aligned_size) == 0) { + return res; + } else { + return NULL; + } +# else + res = memalign(aligned_size, aligned_size); +# endif +#endif + return res; +} + static void +aligned_free(void *ptr) +{ +#if _WIN32 || defined __CYGWIN__ + _aligned_free(ptr); +#else + free(ptr); +#endif +} + +static void +link_free_heap_slot(rb_objspace_t *objspace, struct heaps_slot *slot) +{ + slot->free_next = objspace->heap.free_slots; + if (objspace->heap.free_slots) { + objspace->heap.free_slots->free_prev = slot; + } + objspace->heap.free_slots = slot; +} + +static void +unlink_free_heap_slot(rb_objspace_t *objspace, struct heaps_slot *slot) +{ + if (slot->free_prev) + slot->free_prev->free_next = slot->free_next; + if (slot->free_next) + slot->free_next->free_prev = slot->free_prev; + if (objspace->heap.free_slots == slot) + objspace->heap.free_slots = slot->free_next; + slot->free_prev = NULL; + slot->free_next = NULL; +} + +static void assign_heap_slot(rb_objspace_t *objspace) { RVALUE *p, *pend, *membase; @@ -1031,17 +1117,12 @@ size_t objs; objs = HEAP_OBJ_LIMIT; - p = (RVALUE*)malloc(HEAP_SIZE); + p = (RVALUE*)aligned_malloc(HEAP_SIZE); if (p == 0) { during_gc = 0; rb_memerror(); } - slot = (struct heaps_slot *)malloc(sizeof(struct heaps_slot)); - if (slot == 0) { - xfree(p); - during_gc = 0; - rb_memerror(); - } + slot = (struct heaps_slot *)p; MEMZERO((void*)slot, struct heaps_slot, 1); slot->next = heaps; @@ -1049,11 +1130,12 @@ heaps = slot; membase = p; + p = (RVALUE*)((VALUE)p + sizeof(struct heaps_slot)); if ((VALUE)p % sizeof(RVALUE) != 0) { - p = (RVALUE*)((VALUE)p + sizeof(RVALUE) - ((VALUE)p % sizeof(RVALUE))); - if ((HEAP_SIZE - HEAP_OBJ_LIMIT * sizeof(RVALUE)) < (size_t)((char*)p - (char*)membase)) { - objs--; - } + p = (RVALUE*)((VALUE)p + sizeof(RVALUE) - ((VALUE)p % sizeof(RVALUE))); + if ((HEAP_SIZE - HEAP_OBJ_LIMIT * sizeof(RVALUE)) < (size_t)((char*)p - (char*)membase)) { + objs--; + } } lo = 0; @@ -1061,7 +1143,7 @@ while (lo < hi) { register RVALUE *mid_membase; mid = (lo + hi) / 2; - mid_membase = objspace->heap.sorted[mid].slot->membase; + mid_membase = (void *)objspace->heap.sorted[mid].slot; if (mid_membase < membase) { lo = mid + 1; } @@ -1078,9 +1160,12 @@ objspace->heap.sorted[hi].slot = slot; objspace->heap.sorted[hi].start = p; objspace->heap.sorted[hi].end = (p + objs); - heaps->membase = membase; heaps->slot = p; heaps->limit = objs; + assert(objspace->heap.free_bitmap != NULL); + heaps->bits = (uintptr_t *)objspace->heap.free_bitmap; + objspace->heap.free_bitmap = objspace->heap.free_bitmap->next; + memset(heaps->bits, 0, HEAP_BITMAP_LIMIT * sizeof(uintptr_t)); objspace->heap.free_num += objs; pend = p + objs; if (lomem == 0 || lomem > p) lomem = p; @@ -1089,19 +1174,24 @@ while (p < pend) { p->as.free.flags = 0; - p->as.free.next = freelist; - freelist = p; + p->as.free.next = heaps->freelist; + heaps->freelist = p; p++; } + link_free_heap_slot(objspace, heaps); } static void add_heap_slots(rb_objspace_t *objspace, size_t add) { size_t i; + size_t next_heaps_length; - if ((heaps_used + add) > heaps_length) { - allocate_sorted_heaps(objspace, heaps_used + add); + next_heaps_length = heaps_used + add; + + if (next_heaps_length > heaps_length) { + allocate_sorted_heaps(objspace, next_heaps_length); + heaps_length = next_heaps_length; } for (i = 0; i < add; i++) { @@ -1151,6 +1241,7 @@ if (next_heaps_length > heaps_length) { allocate_sorted_heaps(objspace, next_heaps_length); + heaps_length = next_heaps_length; } } @@ -1173,6 +1264,7 @@ } #define RANY(o) ((RVALUE*)(o)) +#define has_free_object (objspace->heap.free_slots && objspace->heap.free_slots->freelist) VALUE rb_newobj(void) @@ -1193,15 +1285,18 @@ } } - if (UNLIKELY(!freelist)) { + if (UNLIKELY(!has_free_object)) { if (!gc_lazy_sweep(objspace)) { during_gc = 0; rb_memerror(); } } - obj = (VALUE)freelist; - freelist = freelist->as.free.next; + obj = (VALUE)objspace->heap.free_slots->freelist; + objspace->heap.free_slots->freelist = RANY(obj)->as.free.next; + if (objspace->heap.free_slots->freelist == NULL) { + unlink_free_heap_slot(objspace, objspace->heap.free_slots); + } MEMZERO((void*)obj, RVALUE, 1); #ifdef GC_DEBUG @@ -1372,8 +1467,8 @@ for (i = 0; i < heaps_used; i++) { p = objspace->heap.sorted[i].start; pend = objspace->heap.sorted[i].end; while (p < pend) { - if ((p->as.basic.flags & FL_MARK) && - (p->as.basic.flags != FL_MARK)) { + if (MARKED_IN_BITMAP(GET_HEAP_BITMAP(p), p) && + p->as.basic.flags) { gc_mark_children(objspace, (VALUE)p, 0); } p++; @@ -1641,12 +1736,14 @@ gc_mark(rb_objspace_t *objspace, VALUE ptr, int lev) { register RVALUE *obj; + register uintptr_t *bits; obj = RANY(ptr); if (rb_special_const_p(ptr)) return; /* special const not marked */ if (obj->as.basic.flags == 0) return; /* free cell */ - if (obj->as.basic.flags & FL_MARK) return; /* already marked */ - obj->as.basic.flags |= FL_MARK; + bits = GET_HEAP_BITMAP(ptr); + if (MARKED_IN_BITMAP(bits, ptr)) return; /* already marked */ + MARK_IN_BITMAP(bits, ptr); objspace->heap.live_num++; if (lev > GC_LEVEL_MAX || (lev == 0 && stack_check(STACKFRAME_FOR_GC_MARK))) { @@ -1674,6 +1771,7 @@ gc_mark_children(rb_objspace_t *objspace, VALUE ptr, int lev) { register RVALUE *obj = RANY(ptr); + register uintptr_t *bits; goto marking; /* skip */ @@ -1681,8 +1779,9 @@ obj = RANY(ptr); if (rb_special_const_p(ptr)) return; /* special const not marked */ if (obj->as.basic.flags == 0) return; /* free cell */ - if (obj->as.basic.flags & FL_MARK) return; /* already marked */ - obj->as.basic.flags |= FL_MARK; + bits = GET_HEAP_BITMAP(ptr); + if (MARKED_IN_BITMAP(bits, ptr)) return; /* already marked */ + MARK_IN_BITMAP(bits, ptr); objspace->heap.live_num++; marking: @@ -1958,13 +2057,18 @@ static int obj_free(rb_objspace_t *, VALUE); -static inline void -add_freelist(rb_objspace_t *objspace, RVALUE *p) +static inline struct heaps_slot * +add_slot_local_freelist(rb_objspace_t *objspace, RVALUE *p) { + struct heaps_slot *slot; + VALGRIND_MAKE_MEM_UNDEFINED((void*)p, sizeof(RVALUE)); p->as.free.flags = 0; - p->as.free.next = freelist; - freelist = p; + slot = GET_HEAP_SLOT(p); + p->as.free.next = slot->freelist; + slot->freelist = p; + + return slot; } static void @@ -1974,12 +2078,9 @@ RVALUE *tmp = p->as.free.next; run_final(objspace, (VALUE)p); if (!FL_TEST(p, FL_SINGLETON)) { /* not freeing page */ - if (objspace->heap.sweep_slots) { - p->as.free.flags = 0; - } - else { + add_slot_local_freelist(objspace, p); + if (!is_lazy_sweeping(objspace)) { GC_PROF_DEC_LIVE_NUM; - add_freelist(objspace, p); } } else { @@ -2005,7 +2106,6 @@ slot->next = NULL; } - static void free_unused_heaps(rb_objspace_t *objspace) { @@ -2014,13 +2114,16 @@ for (i = j = 1; j < heaps_used; i++) { if (objspace->heap.sorted[i].slot->limit == 0) { + struct heaps_slot* h = objspace->heap.sorted[i].slot; + ((struct heaps_free_bitmap *)(h->bits))->next = + objspace->heap.free_bitmap; + objspace->heap.free_bitmap = (struct heaps_free_bitmap *)h->bits; if (!last) { - last = objspace->heap.sorted[i].slot->membase; + last = (RVALUE *)objspace->heap.sorted[i].slot; } else { - free(objspace->heap.sorted[i].slot->membase); + aligned_free(objspace->heap.sorted[i].slot); } - free(objspace->heap.sorted[i].slot); heaps_used--; } else { @@ -2032,52 +2135,64 @@ } if (last) { if (last < heaps_freed) { - free(heaps_freed); + aligned_free(heaps_freed); heaps_freed = last; } else { - free(last); + aligned_free(last); } } } static void +gc_clear_slot_bits(struct heaps_slot *slot) +{ + memset(GET_HEAP_BITMAP(slot->slot), 0, + HEAP_BITMAP_LIMIT * sizeof(uintptr_t)); +} + +static void slot_sweep(rb_objspace_t *objspace, struct heaps_slot *sweep_slot) { size_t free_num = 0, final_num = 0; RVALUE *p, *pend; - RVALUE *free = freelist, *final = deferred_final_list; + RVALUE *final = deferred_final_list; int deferred; + uintptr_t *bits; p = sweep_slot->slot; pend = p + sweep_slot->limit; + bits = GET_HEAP_BITMAP(p); while (p < pend) { - if (!(p->as.basic.flags & FL_MARK)) { - if (p->as.basic.flags && - ((deferred = obj_free(objspace, (VALUE)p)) || - (FL_TEST(p, FL_FINALIZE)))) { - if (!deferred) { - p->as.free.flags = T_ZOMBIE; - RDATA(p)->dfree = 0; + if ((!(MARKED_IN_BITMAP(bits, p))) && BUILTIN_TYPE(p) != T_ZOMBIE) { + if (p->as.basic.flags) { + if ((deferred = obj_free(objspace, (VALUE)p)) || + (FL_TEST(p, FL_FINALIZE))) { + if (!deferred) { + p->as.free.flags = T_ZOMBIE; + RDATA(p)->dfree = 0; + } + p->as.free.next = deferred_final_list; + deferred_final_list = p; + if (BUILTIN_TYPE(p) != T_ZOMBIE) { + fprintf(stderr, "NOT T_ZOMBIE!!\n"); + } + final_num++; } - p->as.free.flags |= FL_MARK; - p->as.free.next = deferred_final_list; - deferred_final_list = p; - final_num++; + else { + VALGRIND_MAKE_MEM_UNDEFINED((void*)p, sizeof(RVALUE)); + p->as.free.flags = 0; + p->as.free.next = sweep_slot->freelist; + sweep_slot->freelist = p; + free_num++; + } } else { - add_freelist(objspace, p); free_num++; } } - else if (BUILTIN_TYPE(p) == T_ZOMBIE) { - /* objects to be finalized */ - /* do nothing remain marked */ - } - else { - RBASIC(p)->flags &= ~FL_MARK; - } p++; } + gc_clear_slot_bits(sweep_slot); if (final_num + free_num == sweep_slot->limit && objspace->heap.free_num > objspace->heap.do_heap_free) { RVALUE *pp; @@ -2087,10 +2202,14 @@ pp->as.free.flags |= FL_SINGLETON; /* freeing page mark */ } sweep_slot->limit = final_num; - freelist = free; /* cancel this page from freelist */ unlink_heap_slot(objspace, sweep_slot); + unlink_free_heap_slot(objspace, sweep_slot); } else { + if (free_num > 0 && sweep_slot->free_next == NULL && + sweep_slot->free_prev == NULL) { + link_free_heap_slot(objspace, sweep_slot); + } objspace->heap.free_num += free_num; } objspace->heap.final_num += final_num; @@ -2107,7 +2226,7 @@ ready_to_gc(rb_objspace_t *objspace) { if (dont_gc || during_gc) { - if (!freelist) { + if (!has_free_object) { if (!heaps_increment(objspace)) { set_heaps_increment(objspace); heaps_increment(objspace); @@ -2121,7 +2240,6 @@ static void before_gc_sweep(rb_objspace_t *objspace (... truncated) -- ML: ruby-changes@q... Info: http://www.atdot.net/~ko1/quickml/