ruby-changes:42820
From: naruse <ko1@a...>
Date: Tue, 3 May 2016 21:17:54 +0900 (JST)
Subject: [ruby-changes:42820] naruse:r54894 (trunk): * string.c (count_utf8_lead_bytes_with_word): Use __builtin_popcount
naruse 2016-05-03 22:14:30 +0900 (Tue, 03 May 2016) New Revision: 54894 https://svn.ruby-lang.org/cgi-bin/viewvc.cgi?view=revision&revision=54894 Log: * string.c (count_utf8_lead_bytes_with_word): Use __builtin_popcount only if it can use SSE 4.2 POPCNT whose latency is 3 cycle. * internal.h (rb_popcount64): use __builtin_popcountll because now it is in fast path. Modified files: trunk/ChangeLog trunk/configure.in trunk/internal.h trunk/string.c Index: ChangeLog =================================================================== --- ChangeLog (revision 54893) +++ ChangeLog (revision 54894) @@ -1,3 +1,11 @@ https://github.com/ruby/ruby/blob/trunk/ChangeLog#L1 +Tue May 3 22:10:09 2016 NARUSE, Yui <naruse@r...> + + * string.c (count_utf8_lead_bytes_with_word): Use __builtin_popcount + only if it can use SSE 4.2 POPCNT whose latency is 3 cycle. + + * internal.h (rb_popcount64): use __builtin_popcountll because now + it is in fast path. + Tue May 3 14:19:18 2016 Nobuyoshi Nakada <nobu@r...> * parse.y (new_if_gen): set newline flag to NODE_IF to trace all Index: internal.h =================================================================== --- internal.h (revision 54893) +++ internal.h (revision 54894) @@ -271,21 +271,38 @@ nlz_intptr(uintptr_t x) { https://github.com/ruby/ruby/blob/trunk/internal.h#L271 static inline int rb_popcount32(uint32_t x) { +#ifdef HAVE_BUILTIN___BUILTIN_POPCOUNT + return __builtin_popcount(x); +#else x = (x & 0x55555555) + (x >> 1 & 0x55555555); x = (x & 0x33333333) + (x >> 2 & 0x33333333); x = (x & 0x0f0f0f0f) + (x >> 4 & 0x0f0f0f0f); x = (x & 0x001f001f) + (x >> 8 & 0x001f001f); return (x & 0x0000003f) + (x >>16 & 0x0000003f); +#endif } static inline int rb_popcount64(uint64_t x) { +#ifdef HAVE_BUILTIN___BUILTIN_POPCOUNT + return __builtin_popcountll(x); +#else x = (x & 0x5555555555555555) + (x >> 1 & 0x5555555555555555); x = (x & 0x3333333333333333) + (x >> 2 & 0x3333333333333333); x = (x & 0x0707070707070707) + (x >> 4 & 0x0707070707070707); x = (x & 0x001f001f001f001f) + (x >> 8 & 0x001f001f001f001f); x = (x & 0x0000003f0000003f) + (x >>16 & 0x0000003f0000003f); return (x & 0x7f) + (x >>32 & 0x7f); +#endif +} + +static inline int +rb_popcount_intptr(uintptr_t x) { +#if SIZEOF_VOIDP == 8 + return rb_popcount64(x); +#elif SIZEOF_VOIDP == 4 + return rb_popcount32(x); +#endif } static inline int Index: configure.in =================================================================== --- configure.in (revision 54893) +++ configure.in (revision 54894) @@ -2429,6 +2429,8 @@ fi]) https://github.com/ruby/ruby/blob/trunk/configure.in#L2429 RUBY_CHECK_BUILTIN_FUNC(__builtin_bswap16, [__builtin_bswap16(0)]) RUBY_CHECK_BUILTIN_FUNC(__builtin_bswap32, [__builtin_bswap32(0)]) RUBY_CHECK_BUILTIN_FUNC(__builtin_bswap64, [__builtin_bswap64(0)]) +RUBY_CHECK_BUILTIN_FUNC(__builtin_popcount, [__builtin_popcount(0)]) +RUBY_CHECK_BUILTIN_FUNC(__builtin_popcountll, [__builtin_popcountll(0)]) RUBY_CHECK_BUILTIN_FUNC(__builtin_clz, [__builtin_clz(0)]) RUBY_CHECK_BUILTIN_FUNC(__builtin_clzl, [__builtin_clzl(0)]) RUBY_CHECK_BUILTIN_FUNC(__builtin_clzll, [__builtin_clzll(0)]) Index: string.c =================================================================== --- string.c (revision 54893) +++ string.c (revision 54894) @@ -1476,17 +1476,21 @@ count_utf8_lead_bytes_with_word(const ui https://github.com/ruby/ruby/blob/trunk/string.c#L1476 uintptr_t d = *s; /* Transform so that bit0 indicates whether we have a UTF-8 leading byte or not. */ - d |= ~(d>>1); - d >>= 6; + d = (d>>6) | (~d>>7); d &= NONASCII_MASK >> 7; /* Gather all bytes. */ +#if defined(HAVE_BUILTIN___BUILTIN_POPCOUNT) && defined(__POPCNT__) + /* use only if it can use POPCNT */ + return rb_popcount_intptr(d); +#else d += (d>>8); d += (d>>16); -#if SIZEOF_VOIDP == 8 +# if SIZEOF_VOIDP == 8 d += (d>>32); -#endif +# endif return (d&0xF); +#endif } #endif -- ML: ruby-changes@q... Info: http://www.atdot.net/~ko1/quickml/