ruby-changes:53668
From: samuel <ko1@a...>
Date: Wed, 21 Nov 2018 01:17:38 +0900 (JST)
Subject: [ruby-changes:53668] samuel:r65868 (trunk): Initial support for x64-mingw32
samuel 2018-11-20 19:17:44 +0900 (Tue, 20 Nov 2018) New Revision: 65868 https://svn.ruby-lang.org/cgi-bin/viewvc.cgi?view=revision&revision=65868 Log: Initial support for x64-mingw32 Modified files: trunk/configure.ac trunk/coroutine/win32/Context.asm trunk/coroutine/win32/Context.h trunk/coroutine/win64/Context.asm trunk/coroutine/win64/Context.h Index: configure.ac =================================================================== --- configure.ac (revision 65867) +++ configure.ac (revision 65868) @@ -2325,8 +2325,14 @@ AS_IF([test "${universal_binary-no}" = y https://github.com/ruby/ruby/blob/trunk/configure.ac#L2325 AC_DEFINE_UNQUOTED(STACK_GROW_DIRECTION, $dir) ]) -AC_MSG_CHECKING(native fiber implementation) +AC_MSG_CHECKING(native coroutine implementation for $target_cpu-$target_os) AS_CASE(["$target_cpu-$target_os"], + [x*64-darwin], [ + AC_MSG_RESULT(amd64) + COROUTINE_H=coroutine/amd64/Context.h + AC_DEFINE_UNQUOTED(FIBER_USE_COROUTINE, ["$COROUTINE_H"]) + AC_LIBOBJ([coroutine/amd64/Context]) + ], [x*64-linux], [ AC_MSG_RESULT(amd64) COROUTINE_H=coroutine/amd64/Context.h @@ -2339,6 +2345,12 @@ AS_CASE(["$target_cpu-$target_os"], https://github.com/ruby/ruby/blob/trunk/configure.ac#L2345 AC_DEFINE_UNQUOTED(FIBER_USE_COROUTINE, ["$COROUTINE_H"]) AC_LIBOBJ([coroutine/x86/Context]) ] + [x64-mingw32], [ + AC_MSG_RESULT(x86) + COROUTINE_H=coroutine/win64/Context.h + AC_DEFINE_UNQUOTED(FIBER_USE_COROUTINE, ["$COROUTINE_H"]) + AC_LIBOBJ([coroutine/win64/Context]) + ] [*], [ AC_MSG_RESULT(no) ] Index: coroutine/win32/Context.h =================================================================== --- coroutine/win32/Context.h (revision 65867) +++ coroutine/win32/Context.h (revision 65868) @@ -24,7 +24,7 @@ struct coroutine_context https://github.com/ruby/ruby/blob/trunk/coroutine/win32/Context.h#L24 void **stack_pointer; }; -typedef COROUTINE(* coroutine_start)(coroutine_context *from, coroutine_context *self); +typedef void(__fastcall * coroutine_start)(coroutine_context *from, coroutine_context *self); static inline void coroutine_initialize( coroutine_context *context, @@ -40,12 +40,12 @@ static inline void coroutine_initialize( https://github.com/ruby/ruby/blob/trunk/coroutine/win32/Context.h#L40 return; } + *--context->stack_pointer = (void*)start; + /* Windows Thread Information Block */ *--context->stack_pointer = 0; /* fs:[0] */ - *--context->stack_pointer = stack_pointer + stack_size; /* fs:[4] */ - *--context->stack_pointer = (void*)stack_pointer; /* fs:[8] */ - - *--context->stack_pointer = (void*)start; + *--context->stack_pointer = (void*)stack_pointer; /* fs:[4] */ + *--context->stack_pointer = (void*)((char *)stack_pointer - stack_size); /* fs:[8] */ context->stack_pointer -= COROUTINE_REGISTERS; memset(context->stack_pointer, 0, sizeof(void*) * COROUTINE_REGISTERS); Index: coroutine/win32/Context.asm =================================================================== --- coroutine/win32/Context.asm (revision 65867) +++ coroutine/win32/Context.asm (revision 65868) @@ -9,6 +9,8 @@ https://github.com/ruby/ruby/blob/trunk/coroutine/win32/Context.asm#L9 .code +assume fs:nothing + ; Using fastcall is a big win (and it's the same has how x64 works). ; In coroutine transfer, the arguments are passed in ecx and edx. We don't need ; to touch these in order to pass them to the destination coroutine. Index: coroutine/win64/Context.h =================================================================== --- coroutine/win64/Context.h (revision 65867) +++ coroutine/win64/Context.h (revision 65868) @@ -17,13 +17,16 @@ extern "C" { https://github.com/ruby/ruby/blob/trunk/coroutine/win64/Context.h#L17 #define COROUTINE __declspec(noreturn) void const size_t COROUTINE_REGISTERS = 8; +const size_t COROUTINE_XMM_REGISTERS = 1+10*2; struct coroutine_context { void **stack_pointer; }; -typedef COROUTINE(* coroutine_start)(coroutine_context *from, coroutine_context *self); +typedef void(* coroutine_start)(coroutine_context *from, coroutine_context *self); + +void coroutine_trampoline(); static inline void coroutine_initialize( coroutine_context *context, @@ -31,7 +34,8 @@ static inline void coroutine_initialize( https://github.com/ruby/ruby/blob/trunk/coroutine/win64/Context.h#L34 void *stack_pointer, size_t stack_size ) { - context->stack_pointer = (void**)stack_pointer; + /* Force 16-byte alignment */ + context->stack_pointer = (void**)((uintptr_t)stack_pointer & ~0xF); if (!start) { assert(!context->stack_pointer); @@ -39,16 +43,22 @@ static inline void coroutine_initialize( https://github.com/ruby/ruby/blob/trunk/coroutine/win64/Context.h#L43 return; } - /* Windows Thread Information Block */ - *--context->stack_pointer = 0; /* gs:[0x00] */ - *--context->stack_pointer = stack_pointer + stack_size; /* gs:[0x08] */ - *--context->stack_pointer = (void*)stack_pointer; /* gs:[0x10] */ - + /* Win64 ABI requires space for arguments */ + context->stack_pointer -= 4; + /* Return address */ + *--context->stack_pointer = 0; *--context->stack_pointer = (void*)start; + *--context->stack_pointer = (void*)coroutine_trampoline; + + /* Windows Thread Information Block */ + /* *--context->stack_pointer = 0; */ /* gs:[0x00] is not used */ + *--context->stack_pointer = (void*)stack_pointer; /* gs:[0x08] */ + *--context->stack_pointer = (void*)((char *)stack_pointer - stack_size); /* gs:[0x10] */ context->stack_pointer -= COROUTINE_REGISTERS; memset(context->stack_pointer, 0, sizeof(void*) * COROUTINE_REGISTERS); + memset(context->stack_pointer - COROUTINE_XMM_REGISTERS, 0, sizeof(void*) * COROUTINE_XMM_REGISTERS); } coroutine_context * coroutine_transfer(coroutine_context * current, coroutine_context * target); Index: coroutine/win64/Context.asm =================================================================== --- coroutine/win64/Context.asm (revision 65867) +++ coroutine/win64/Context.asm (revision 65868) @@ -9,9 +9,8 @@ https://github.com/ruby/ruby/blob/trunk/coroutine/win64/Context.asm#L9 coroutine_transfer proc ; Save the thread information block: - push gs:[0x00] - push gs:[0x08] - push gs:[0x10] + push qword ptr gs:[8] + push qword ptr gs:[16] ; Save caller registers: push rbp @@ -23,12 +22,34 @@ coroutine_transfer proc https://github.com/ruby/ruby/blob/trunk/coroutine/win64/Context.asm#L22 push r14 push r15 + movaps [rsp - 24], xmm6 + movaps [rsp - 40], xmm7 + movaps [rsp - 56], xmm8 + movaps [rsp - 72], xmm9 + movaps [rsp - 88], xmm10 + movaps [rsp - 104], xmm11 + movaps [rsp - 120], xmm12 + movaps [rsp - 136], xmm13 + movaps [rsp - 152], xmm14 + movaps [rsp - 168], xmm15 + ; Save caller stack pointer: mov [rcx], rsp ; Restore callee stack pointer: mov rsp, [rdx] + movaps xmm15, [rsp - 168] + movaps xmm14, [rsp - 152] + movaps xmm13, [rsp - 136] + movaps xmm12, [rsp - 120] + movaps xmm11, [rsp - 104] + movaps xmm10, [rsp - 88] + movaps xmm9, [rsp - 72] + movaps xmm8, [rsp - 56] + movaps xmm7, [rsp - 40] + movaps xmm6, [rsp - 24] + ; Restore callee stack: pop r15 pop r14 @@ -40,9 +61,8 @@ coroutine_transfer proc https://github.com/ruby/ruby/blob/trunk/coroutine/win64/Context.asm#L61 pop rbp ; Restore the thread information block: - pop gs:[0x10] - pop gs:[0x08] - pop gs:[0x00] + pop qword ptr gs:[16] + pop qword ptr gs:[8] ; Put the first argument into the return value: mov rax, rcx @@ -51,4 +71,9 @@ coroutine_transfer proc https://github.com/ruby/ruby/blob/trunk/coroutine/win64/Context.asm#L71 ret coroutine_transfer endp +coroutine_trampoline proc + ; Do not remove this. This forces 16-byte alignment when entering the coroutine. + ret +coroutine_trampoline endp + end -- ML: ruby-changes@q... Info: http://www.atdot.net/~ko1/quickml/