ruby-changes:53618
From: samuel <ko1@a...>
Date: Tue, 20 Nov 2018 18:59:16 +0900 (JST)
Subject: [ruby-changes:53618] samuel:r65834 (trunk): Native implementation of coroutines to improve performance of fibers
samuel 2018-11-20 18:59:10 +0900 (Tue, 20 Nov 2018) New Revision: 65834 https://svn.ruby-lang.org/cgi-bin/viewvc.cgi?view=revision&revision=65834 Log: Native implementation of coroutines to improve performance of fibers Added directories: trunk/coroutine/ trunk/coroutine/amd64/ trunk/coroutine/arm32/ trunk/coroutine/arm64/ trunk/coroutine/win32/ trunk/coroutine/win64/ Added files: trunk/coroutine/amd64/Context.h trunk/coroutine/amd64/Context.s trunk/coroutine/arm32/Context.h trunk/coroutine/arm32/Context.s trunk/coroutine/arm64/Context.h trunk/coroutine/arm64/Context.s trunk/coroutine/win32/Context.asm trunk/coroutine/win32/Context.h trunk/coroutine/win64/Context.asm trunk/coroutine/win64/Context.h trunk/fiber_benchmark.rb Modified files: trunk/.gitignore trunk/common.mk trunk/configure.ac trunk/cont.c Index: configure.ac =================================================================== --- configure.ac (revision 65833) +++ configure.ac (revision 65834) @@ -2274,6 +2274,12 @@ AS_IF([test x"$target_cpu" = xia64], [ https://github.com/ruby/ruby/blob/trunk/configure.ac#L2274 ]) ]) +AS_CASE(["$target_cpu-$target_os"], + [*-darwin*], [ + AC_LIBOBJ([coroutine/amd64/Context]) + ], +) + AC_CACHE_CHECK(whether right shift preserve sign bit, rb_cv_rshift_sign, [AC_COMPILE_IFELSE([AC_LANG_BOOL_COMPILE_TRY([], [(-1==(-1>>1))])], rb_cv_rshift_sign=yes, Index: .gitignore =================================================================== --- .gitignore (revision 65833) +++ .gitignore (revision 65834) @@ -18,7 +18,6 @@ https://github.com/ruby/ruby/blob/trunk/.gitignore#L18 *.pch *.pdb *.rej -*.s *.sav *.swp *.yarb Index: coroutine/win64/Context.h =================================================================== --- coroutine/win64/Context.h (nonexistent) +++ coroutine/win64/Context.h (revision 65834) @@ -0,0 +1,64 @@ https://github.com/ruby/ruby/blob/trunk/coroutine/win64/Context.h#L1 +// +// win64.h +// File file is part of the "Coroutine" project and released under the MIT License. +// +// Created by Samuel Williams on 10/5/2018. +// Copyright, 2018, by Samuel Williams. All rights reserved. +// + +#pragma once + +#include <assert.h> +#include <string.h> + +#if __cplusplus +extern "C" { +#endif + +#define COROUTINE void + +const size_t COROUTINE_REGISTERS = 8; + +// The fiber context (stack pointer). +struct coroutine_context +{ + void **stack_pointer; +}; + +// The initialization function. +typedef void(* coroutine_start)(coroutine_context *from, coroutine_context *self); + +inline void coroutine_initialize( + coroutine_context *context, + coroutine_start start, + void *stack_pointer, + size_t stack_size +) { + context->stack_pointer = (void**)stack_pointer; + + if (!start) { + assert(!context->stack_pointer); + /* We are main coroutine for this thread */ + return; + } + + /* Windows Thread Information Block */ + *--context->stack_pointer = 0; + *--context->stack_pointer = stack_pointer; + *--context->stack_pointer = (void*)stack_size; + + *--context->stack_pointer = (void*)start; + + context->stack_pointer -= COROUTINE_REGISTERS; + memset(context->stack_pointer, 0, sizeof(void*) * COROUTINE_REGISTERS); +} + +coroutine_context * coroutine_transfer(coroutine_context * current, coroutine_context * target); + +inline void coroutine_destroy(coroutine_context * context) +{ +} + +#if __cplusplus +} +#endif \ No newline at end of file Index: coroutine/win64/Context.asm =================================================================== --- coroutine/win64/Context.asm (nonexistent) +++ coroutine/win64/Context.asm (revision 65834) @@ -0,0 +1,44 @@ https://github.com/ruby/ruby/blob/trunk/coroutine/win64/Context.asm#L1 +;; +;; win64.asm +;; File file is part of the "Coroutine" project and released under the MIT License. +;; +;; Created by Samuel Williams on 10/5/2018. +;; Copyright, 2018, by Samuel Williams. All rights reserved. +;; + +.code + +coroutine_transfer proc + push rbp + push rbx + push rdi + push rsi + push r12 + push r13 + push r14 + push r15 + + ; Save caller stack pointer + mov [rcx], rsp + + ; Restore callee stack pointer + mov rsp, [rdx] + + ; Restore callee stack + pop r15 + pop r14 + pop r13 + pop r12 + pop rsi + pop rdi + pop rbx + pop rbp + + ; Put the first argument into the return value + mov rax, rcx + + ; We pop the return address and jump to it + ret +coroutine_transfer endp + +end Index: coroutine/arm64/Context.s =================================================================== --- coroutine/arm64/Context.s (nonexistent) +++ coroutine/arm64/Context.s (revision 65834) @@ -0,0 +1,58 @@ https://github.com/ruby/ruby/blob/trunk/coroutine/arm64/Context.s#L1 +## +## arm64.s +## File file is part of the "Coroutine" project and released under the MIT License. +## +## Created by Samuel Williams on 10/5/2018. +## Copyright, 2018, by Samuel Williams. All rights reserved. +## + +.text +.align 2 +.global coroutine_transfer +coroutine_transfer: + # Make space on the stack for caller registers + sub sp, sp, 0xb0 + + # Save caller registers + stp d8, d9, [sp, 0x00] + stp d10, d11, [sp, 0x10] + stp d12, d13, [sp, 0x20] + stp d14, d15, [sp, 0x30] + stp x19, x20, [sp, 0x40] + stp x21, x22, [sp, 0x50] + stp x23, x24, [sp, 0x60] + stp x25, x26, [sp, 0x70] + stp x27, x28, [sp, 0x80] + stp x29, x30, [sp, 0x90] + + # Save return address + str x30, [sp, 0xa0] + + # Save stack pointer to x0 (first argument) + mov x2, sp + str x2, [x0, 0] + + # Load stack pointer from x1 (second argument) + ldr x3, [x1, 0] + mov sp, x3 + + # Restore caller registers + ldp d8, d9, [sp, 0x00] + ldp d10, d11, [sp, 0x10] + ldp d12, d13, [sp, 0x20] + ldp d14, d15, [sp, 0x30] + ldp x19, x20, [sp, 0x40] + ldp x21, x22, [sp, 0x50] + ldp x23, x24, [sp, 0x60] + ldp x25, x26, [sp, 0x70] + ldp x27, x28, [sp, 0x80] + ldp x29, x30, [sp, 0x90] + + # Load return address into x4 + ldr x4, [sp, 0xa0] + + # Pop stack frame + add sp, sp, 0xb0 + + # Jump to return address (in x4) + ret x4 Index: coroutine/arm64/Context.h =================================================================== --- coroutine/arm64/Context.h (nonexistent) +++ coroutine/arm64/Context.h (revision 65834) @@ -0,0 +1,60 @@ https://github.com/ruby/ruby/blob/trunk/coroutine/arm64/Context.h#L1 +// +// amd64.h +// File file is part of the "Coroutine" project and released under the MIT License. +// +// Created by Samuel Williams on 10/5/2018. +// Copyright, 2018, by Samuel Williams. All rights reserved. +// + +#pragma once + +#include <assert.h> +#include <string.h> + +#if __cplusplus +extern "C" { +#endif + +#define COROUTINE void + +const size_t COROUTINE_REGISTERS = 0xb0 / 8; + +// The fiber context (stack pointer). +typedef struct +{ + void **stack_pointer; +} coroutine_context; + +// The initialization function. +typedef void(* coroutine_start)(coroutine_context *from, coroutine_context *self); + +inline void coroutine_initialize( + coroutine_context *context, + coroutine_start start, + void *stack_pointer, + size_t stack_size +) { + /* Force 16-byte alignment */ + context->stack_pointer = (void**)((uintptr_t)stack_pointer & ~0xF); + + if (!start) { + assert(!context->stack_pointer); + /* We are main coroutine for this thread */ + return; + } + + context->stack_pointer -= COROUTINE_REGISTERS; + memset(context->stack_pointer, 0, sizeof(void*) * COROUTINE_REGISTERS); + + context->stack_pointer[0xa0 / 8] = (void*)start; +} + +coroutine_context * coroutine_transfer(coroutine_context * current, coroutine_context * target); + +inline void coroutine_destroy(coroutine_context * context) +{ +} + +#if __cplusplus +} +#endif Index: coroutine/amd64/Context.s =================================================================== --- coroutine/amd64/Context.s (nonexistent) +++ coroutine/amd64/Context.s (revision 65834) @@ -0,0 +1,44 @@ https://github.com/ruby/ruby/blob/trunk/coroutine/amd64/Context.s#L1 +## +## amd64.c +## File file is part of the "Coroutine" project and released under the MIT License. +## +## Created by Samuel Williams on 10/5/2018. +## Copyright, 2018, by Samuel Williams. All rights reserved. +## + +.text + +.globl coroutine_transfer +coroutine_transfer: + +# For older linkers +.globl _coroutine_transfer +_coroutine_transfer: + + # Save caller state + pushq %rbp + pushq %rbx + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + + # Save caller stack pointer + movq %rsp, (%rdi) + + # Restore callee stack pointer + movq (%rsi), %rsp + + # Restore callee stack + popq %r15 + popq %r14 + popq %r13 + popq %r12 + popq %rbx + popq %rbp + + # Put the first argument into the return value + # movq %rdi, %rax + + # We pop the return address and jump to it + ret Index: coroutine/amd64/Context.h =================================================================== --- coroutine/amd64/Context.h (nonexistent) +++ coroutine/amd64/Context.h (revision 65834) @@ -0,0 +1,62 @@ https://github.com/ruby/ruby/blob/trunk/coroutine/amd64/Context.h#L1 +// +// amd64.h +// File file is part of the "Coroutine" project and released under the MIT License. +// +// Created by Samuel Williams on 10/5/2018. +// Copyright, 2018, by Samuel Williams. All rights reserved. +// + +#pragma once + +#include <assert.h> +#include <string.h> + +#if __cplusplus +extern "C" { +#endif + +#define COROUTINE __attribute__((noreturn)) void + +const size_t COROUTINE_REGISTERS = 6; + +// The fiber context (stack pointer). +typedef struct +{ + void **stack_pointer; +} coroutine_context; + +// The initialization function. +typedef void(* coroutine_start)(coroutine_context *from, coroutine_context *self); + +inline void coroutine_initialize( + coroutine_context *context, + coroutine_start start, + void *stack_pointer, + size_t stack_size +) { + /* Force 16-byte alignment */ + context->stack_pointer = (void**)((uintptr_t)stack_pointer & ~0xF); + + if (!start) { + assert(!context->stack_pointer); + /* We are main coroutine for this thread */ + return; + } + + *--context->stack_pointer = NULL; + *--context->stack_pointer = (void*)start; + + context->stack_pointer -= COROUTINE_REGISTERS; + memset(context->stack_pointer, 0, sizeof(void*) * COROUTINE_REGISTERS); +} + +coroutine_context * coroutine_transfer(coroutine_context * current, coroutine_context * target); + +inline void coroutine_destroy(coroutine_context * context) +{ + context->stack_pointer = NULL; +} + +#if __cplusplus +} +#endif Index: coroutine/win32/Context.asm =================================================================== --- coroutine/win32/Context.asm (nonexistent) +++ coroutine/win32/Context.asm (revision 65834) @@ -0,0 +1,43 @@ https://github.com/ruby/ruby/blob/trunk/coroutine/win32/Context.asm#L1 +;; +;; win32.asm +;; File file is part of the "Coroutine" project and released under the MIT License. +;; +;; Created by Samuel Williams on 10/5/2018. +;; Copyright, 2018, by Samuel Williams. All rights reserved. +;; + +.model flat + +.code + +; Using fastcall is a big win (and it's the same has how x64 works). +; In coroutine transfer, the arguments are passed in ecx and edx. We don't need +; to touch these in order to pass them to the destination coroutine. + +@coroutine_transfer@8 proc + ; Save caller registers + push ebp + push ebx + push edi + push esi + + ; Save caller stack pointer + mov dword ptr [ecx], esp + + ; Restore callee stack pointer + mov esp, dword ptr [edx] + + ; Restore callee stack + pop esi + pop edi + pop ebx + pop ebp + + ; Save the first argument as the return value + mov eax, dword ptr ecx + + ; Jump to the address on the stack + ret +@coroutine_transfer@8 endp + +end Index: coroutine/win32/Context.h =================================================================== --- coroutine/win32/Context.h (nonexistent) +++ coroutine/win32/Context.h (revision 65834) @@ -0,0 +1,64 @@ https://github.com/ruby/ruby/blob/trunk/coroutine/win32/Context.h#L1 +// +// win32.h +// File file is part of the "Coroutine" project and released under the MIT License. +// +// Created by Samuel Williams on 10/5/2018. +// Copyright, 2018, by Samuel Williams. All rights reserved. +// + +#pragma once + +#include <assert.h> +#include <string.h> + +#if __cplusplus +extern "C" { +#endif + +#define COROUTINE void __fastcall + +const size_t COROUTINE_REGISTERS = 4; + +// The fiber context (stack pointer). +struct coroutine_context +{ + void **stack_pointer; +}; + +// The initialization function. +typedef void(__fastcall * coroutine_start)(coroutine_context *from, coroutine_context *self); + +inline void coroutine_initialize( + coroutine_context *context, + coroutine_start start, + void *stack_pointer, + size_t stack_size +) { + context->stack_pointer = (void**)stack_pointer; + + if (!start) { + assert(!context->stack_pointer); + /* We are main coroutine for this thread */ + return; + } + + /* Windows Thread Information Block */ + *--context->stack_pointer = 0; + *--context->stack_pointer = stack_pointer; + *--context->stack_pointer = (void*)stack_size; + + *--context->stack_pointer = (void*)start; + + context->stack_pointer -= COROUTINE_REGISTERS; + memset(context->stack_pointer, 0, sizeof(void*) * COROUTINE_REGISTERS); +} + +coroutine_context * __fastcall coroutine_transfer(coroutine_context * current, coroutine_context * target); + +inline void coroutine_destroy(coroutine_context * context) +{ +} + +#if __cplusplus +} +#endif \ No newline at end of file Index: coroutine/arm32/Context.h =================================================================== --- coroutine/arm32/Context.h (nonexistent) +++ coroutine/arm32/Context.h (revision 65834) @@ -0,0 +1,59 @@ https://github.com/ruby/ruby/blob/trunk/coroutine/arm32/Context.h#L1 +// +// amd64.h +// File file is part of the "Coroutine" project and released under the MIT License. +// +// Created by Samuel Williams on 10/5/2018. +// Copyright, 2018, by Samuel Williams. All rights reserved. +// + +#pragma once + +#include <assert.h> +#include <string.h> + +#if __cplusplus +extern "C" { +#endif + +#define COROUTINE void + +const size_t COROUTINE_REGISTERS = 9; + +// The fiber context (stack pointer). +typedef struct +{ + void **stack_pointer; +} coroutine_context; + +// The initialization function. +typedef void(* coroutine_start)(coroutine_context *from, coroutine_context *self); + +inline void coroutine_initialize( + coroutine_context *context, + coroutine_start start, + void *stack_pointer, + size_t stack_size +) { + context->stack_pointer = (void**)stack_pointer; + + if (!start) { + assert(!context->stack_pointer); + /* We are main coroutine for this thread */ + return; + } + + *--context->stack_pointer = (void*)start; + + context->stack_pointer -= COROUTINE_REGISTERS; + memset(context->stack_pointer, 0, sizeof(void*) * COROUTINE_REGISTERS); +} + +coroutine_context * coroutine_transfer(coroutine_context * current, coroutine_context * target); + +inline void coroutine_destroy(coroutine_context * context) +{ +} + +#if __cplusplus +} +#endif Index: coroutine/arm32/Context.s =================================================================== --- coroutine/arm32/Context.s (nonexistent) +++ coroutine/arm32/Context.s (revision 65834) @@ -0,0 +1,15 @@ https://github.com/ruby/ruby/blob/trunk/coroutine/arm32/Context.s#L1 +## +## arm.c +## File file is part of the "Coroutine" project and released under the MIT License. +## +## Created by Samuel Williams on 10/5/2018. +## Copyright, 2018, by Samuel Williams. All rights reserved. +## + +.text + +.globl coroutine_transfer +coroutine_transfer: + stmia r1!, {r4-r11,sp,lr} + ldmia r0!, {r4-r11,sp,pc} + bx lr \ No newline at end of file Index: fiber_benchmark.rb =================================================================== --- fiber_benchmark.rb (nonexistent) +++ fiber_benchmark.rb (revision 65834) @@ -0,0 +1,103 @@ https://github.com/ruby/ruby/blob/trunk/fiber_benchmark.rb#L1 +#!/usr/bin/env ruby + +require 'fiber' +require 'benchmark' + +class Ring + attr_reader :id + attr_accessor :attach + + def initialize(id) + @id = id + #puts "Creating ring ... #{id}" + @fiber = Fiber.new do + pass_message + end + end + + def |(other) + other.attach = self if !other.nil? + #puts "attaching #{@id} to #{other.id}" if !other.nil? + other + end + + def resume + @fiber.resume + end + + def pass_message + #puts "I'm fiber #{@id}" + while message = message_in + #puts "... #{@id} I received message #{message}" + # do something with message + message_out(message) + end + end + + def message_in + #puts "Resuming #{@attach.id}" if !@attach.nil? + @attach.resume if !@attach.nil? + end + + def message_out(message) + Fiber.yield(message) + end + +end + +class RingStart < Ring + attr_accessor :message + def initialize(n, m, message) + @m = m + @message = message + super(n) + end + + def pass_message + loop { message_out(@message) } + end + +end + + +def create_chain_r(i, chain) + # recursive version + return chain if i<=0 + r = chain.nil? ? Ring.new(i) : chain | Ring.new(i) + create_chain(i-1, r) +end + +def create_chain(n, chain) + # loop version + # needed to avoid stack overflow for high n + n.downto(0) { + chain = chain | Ring.new(n) + } + chain +end + +def run_benchmark(n, m) + mess = :hello + ringu = nil + chain = nil + + tm = Benchmark.measure { + ringu = RingStart.new(0, m, mess) + chain = create_chain(n, ringu) + }.format("%10.6r\n").gsub!(/\(|\)/, "") + + puts "setup time for #{n} fibers: #{tm}" + + tm = Benchmark.measure { + m.times { ringu.message = chain.resume } + }.format("%10.6r\n").gsub!(/\(|\)/, "") + + puts "execution time for #{m} messages: #{tm}" +end + +n = (ARGV[0] || 1000).to_i +m = (ARGV[1] || 10000).to_i + +5.times do + run_benchmark(n, m) +end Property changes on: fiber_benchmark.rb ___________________________________________________________________ Added: svn:executable ## -0,0 +1 ## +* \ No newline at end of property Index: cont.c =================================================================== --- cont.c (revision 65833) +++ cont.c (revision 65834) @@ -26,6 +26,27 @@ https://github.com/ruby/ruby/blob/trunk/cont.c#L26 * in Proc. of 51th Programming Symposium, pp.21--28 (2010) (in Japanese). */ +/* + Enable this include to make fiber yield/resume about twice as fast. + + # Without libcoro + koyoko% ./build/bin/ruby ./fiber_benchmark.rb 10000 1000 + setup time for 10000 fibers: 0.099961 + execution time for 1000 messages: 19.505909 + + # With libcoro + koyoko% ./build/bin/ruby ./fiber_benchmark.rb 10000 1000 + setup time for 10000 fibers: 0.099268 + execution time for 1000 messages: 8.491746 +*/ + +#define FIBER_USE_COROUTINE + +#ifdef FIBER_USE_COROUTINE +#include "coroutine/amd64/Context.h" +#define FIBER_USE_NATIVE 1 +#endif + #if !defined(FIBER_USE_NATIVE) # if defined(HAVE_GETCONTEXT) && defined(HAVE_SETCONTEXT) # if 0 @@ -139,7 +160,7 @@ enum fiber_status { https://github.com/ruby/ruby/blob/trunk/cont.c#L160 #define FIBER_TERMINATED_P(fib) ((fib)->status == FIBER_TERMINATED) #define FIBER_RUNNABLE_P(fib) (FIBER_CREATED_P(fib) || FIBER_SUSPENDED_P(fib)) -#if FIBER_USE_NATIVE && !defined(_WIN32) +#if FIBER_USE_NATIVE && !defined(FIBER_USE_COROUTINE) && !defined(_WIN32) static inline int fiber_context_create(ucontext_t *context, void (*func)(), void *arg, void *ptr, size_t size) { @@ -181,7 +202,11 @@ struct rb_fiber_struct { https://github.com/ruby/ruby/blob/trunk/cont.c#L202 unsigned int transferred : 1; #if FIBER_USE_NATIVE -#ifdef _WIN32 +#if defined(FIBER_USE_COROUTINE) + coroutine_context context; + void *ss_sp; + size_t ss_size; +#elif defined(_WIN32) void *fib_handle; #else ucontext_t context; @@ -382,8 +407,16 @@ cont_free(void *ptr) https://github.com/ruby/ruby/blob/trunk/cont.c#L407 else { /* fiber */ const rb_fiber_t *fib = (rb_fiber_t*)cont; -#ifdef _WIN32 - if (!fiber_is_root_p(fib)) { +#if defined(FIBER_USE_COROUTINE) + coroutine_destroy(&fib->context); + if (fib->ss_sp != NULL) { + if (fiber_is_root_p(fib)) { + rb_bug("Illegal root fiber parameter"); + } + munmap((void*)fib->ss_sp, fib->ss_size); + } +#elif defined(_WIN32) + if (!fiber_is_root_p(fib)) { /* don't delete root fiber handle */ if (fib->fib_handle) { DeleteFiber(fib->fib_handle); @@ -799,9 +832,7 @@ fiber_entry(void *arg) https://github.com/ruby/ruby/blob/trunk/cont.c#L832 } #else /* _WIN32 */ -NORETURN(static void fiber_entry(void *arg)); -static void -fiber_entry(void *arg) +COROUTINE fiber_entry(coroutine_context * from, coroutine_context * to) { rb_fiber_start(); } @@ -862,7 +893,17 @@ fiber_initialize_m (... truncated) -- ML: ruby-changes@q... Info: http://www.atdot.net/~ko1/quickml/