ruby-changes:62568
From: AGSaidi <ko1@a...>
Date: Fri, 14 Aug 2020 02:16:12 +0900 (JST)
Subject: [ruby-changes:62568] 511b55bcef (master): Enable arm64 optimizations that exist for power/x86 (#3393)
https://git.ruby-lang.org/ruby.git/commit/?id=511b55bcef From 511b55bcefc81c036294dc9a544d14bd342acd3b Mon Sep 17 00:00:00 2001 From: AGSaidi <AGSaidi@u...> Date: Thu, 13 Aug 2020 12:15:54 -0500 Subject: Enable arm64 optimizations that exist for power/x86 (#3393) * Enable unaligned accesses on arm64 64-bit Arm platforms support unaligned accesses. Running the string benchmarks this change improves performance by an average of 1.04x, min .96x, max 1.21x, median 1.01x * arm64 enable gc optimizations Similar to x86 and powerpc optimizations. | |compare-ruby|built-ruby| |:------|-----------:|---------:| |hash1 | 0.225| 0.237| | | -| 1.05x| |hash2 | 0.110| 0.110| | | 1.00x| -| * vm_exec.c: improve performance for arm64 | |compare-ruby|built-ruby| |:------------------------------|-----------:|---------:| |vm_array | 26.501M| 27.959M| | | -| 1.06x| |vm_attr_ivar | 21.606M| 31.429M| | | -| 1.45x| |vm_attr_ivar_set | 21.178M| 26.113M| | | -| 1.23x| |vm_backtrace | 6.621| 6.668| | | -| 1.01x| |vm_bigarray | 26.205M| 29.958M| | | -| 1.14x| |vm_bighash | 504.155k| 479.306k| | | 1.05x| -| |vm_block | 16.692M| 21.315M| | | -| 1.28x| |block_handler_type_iseq | 5.083| 7.004| | | -| 1.38x| diff --git a/gc.c b/gc.c index 22972df..788f06f 100644 --- a/gc.c +++ b/gc.c @@ -1115,6 +1115,19 @@ tick(void) https://github.com/ruby/ruby/blob/trunk/gc.c#L1115 return val; } +#elif defined(__aarch64__) && defined(__GNUC__) +typedef unsigned long tick_t; +#define PRItick "lu" + +static __inline__ tick_t +tick(void) +{ + unsigned long val; + __asm__ __volatile__ ("mrs %0, cntvct_el0", : "=r" (val)); + return val; +} + + #elif defined(_WIN32) && defined(_MSC_VER) #include <intrin.h> typedef unsigned __int64 tick_t; diff --git a/gc.h b/gc.h index 6568079..47a4ca1 100644 --- a/gc.h +++ b/gc.h @@ -8,6 +8,8 @@ https://github.com/ruby/ruby/blob/trunk/gc.h#L8 #define SET_MACHINE_STACK_END(p) __asm__ __volatile__ ("movl\t%%esp, %0" : "=r" (*(p))) #elif defined(__powerpc64__) && defined(__GNUC__) #define SET_MACHINE_STACK_END(p) __asm__ __volatile__ ("mr\t%0, %%r1" : "=r" (*(p))) +#elif defined(__aarch64__) && defined(__GNUC__) +#define SET_MACHINE_STACK_END(p) __asm__ __volatile__ ("mov\t%0, sp" : "=r" (*(p))) #else NOINLINE(void rb_gc_set_stack_end(VALUE **stack_end_p)); #define SET_MACHINE_STACK_END(p) rb_gc_set_stack_end(p) diff --git a/include/ruby/internal/config.h b/include/ruby/internal/config.h index 49f673e..0193275 100644 --- a/include/ruby/internal/config.h +++ b/include/ruby/internal/config.h @@ -103,6 +103,8 @@ https://github.com/ruby/ruby/blob/trunk/include/ruby/internal/config.h#L103 # define UNALIGNED_WORD_ACCESS 1 #elif defined(__powerpc64__) # define UNALIGNED_WORD_ACCESS 1 +#elif defined(__aarch64__) +# define UNALIGNED_WORD_ACCESS 1 #elif defined(__mc68020__) # define UNALIGNED_WORD_ACCESS 1 #else diff --git a/regint.h b/regint.h index a2f5bbb..0740429 100644 --- a/regint.h +++ b/regint.h @@ -52,7 +52,7 @@ https://github.com/ruby/ruby/blob/trunk/regint.h#L52 #ifndef UNALIGNED_WORD_ACCESS # if defined(__i386) || defined(__i386__) || defined(_M_IX86) || \ defined(__x86_64) || defined(__x86_64__) || defined(_M_AMD64) || \ - defined(__powerpc64__) || \ + defined(__powerpc64__) || defined(__aarch64__) || \ defined(__mc68020__) # define UNALIGNED_WORD_ACCESS 1 # else diff --git a/siphash.c b/siphash.c index 153d2c6..ddf8ee2 100644 --- a/siphash.c +++ b/siphash.c @@ -30,7 +30,7 @@ https://github.com/ruby/ruby/blob/trunk/siphash.c#L30 #ifndef UNALIGNED_WORD_ACCESS # if defined(__i386) || defined(__i386__) || defined(_M_IX86) || \ defined(__x86_64) || defined(__x86_64__) || defined(_M_AMD64) || \ - defined(__powerpc64__) || \ + defined(__powerpc64__) || defined(__aarch64__) || \ defined(__mc68020__) # define UNALIGNED_WORD_ACCESS 1 # endif diff --git a/st.c b/st.c index c11535e..8be466b 100644 --- a/st.c +++ b/st.c @@ -1662,7 +1662,7 @@ st_values_check(st_table *tab, st_data_t *values, st_index_t size, https://github.com/ruby/ruby/blob/trunk/st.c#L1662 #ifndef UNALIGNED_WORD_ACCESS # if defined(__i386) || defined(__i386__) || defined(_M_IX86) || \ defined(__x86_64) || defined(__x86_64__) || defined(_M_AMD64) || \ - defined(__powerpc64__) || \ + defined(__powerpc64__) || defined(__aarch64__) || \ defined(__mc68020__) # define UNALIGNED_WORD_ACCESS 1 # endif diff --git a/vm_exec.c b/vm_exec.c index ce2e053..7aa56f6 100644 --- a/vm_exec.c +++ b/vm_exec.c @@ -57,6 +57,9 @@ static void vm_insns_counter_count_insn(int insn) {} https://github.com/ruby/ruby/blob/trunk/vm_exec.c#L57 #elif defined(__GNUC__) && defined(__powerpc64__) #define DECL_SC_REG(type, r, reg) register type reg_##r __asm__("r" reg) +#elif defined(__GNUC__) && defined(__aarch64__) +#define DECL_SC_REG(type, r, reg) register type reg_##r __asm__("x" reg) + #else #define DECL_SC_REG(type, r, reg) register type reg_##r #endif @@ -93,6 +96,11 @@ vm_exec_core(rb_execution_context_t *ec, VALUE initial) https://github.com/ruby/ruby/blob/trunk/vm_exec.c#L96 DECL_SC_REG(rb_control_frame_t *, cfp, "15"); #define USE_MACHINE_REGS 1 +#elif defined(__GNUC__) && defined(__aarch64__) + DECL_SC_REG(const VALUE *, pc, "19"); + DECL_SC_REG(rb_control_frame_t *, cfp, "20"); +#define USE_MACHINE_REGS 1 + #else register rb_control_frame_t *reg_cfp; const VALUE *reg_pc; -- cgit v0.10.2 -- ML: ruby-changes@q... Info: http://www.atdot.net/~ko1/quickml/