ruby-changes:42780
From: naruse <ko1@a...>
Date: Sat, 30 Apr 2016 23:42:26 +0900 (JST)
Subject: [ruby-changes:42780] naruse:r54854 (trunk): * string.c (search_nonascii): unroll and use ntz
naruse 2016-05-01 00:39:02 +0900 (Sun, 01 May 2016) New Revision: 54854 https://svn.ruby-lang.org/cgi-bin/viewvc.cgi?view=revision&revision=54854 Log: * string.c (search_nonascii): unroll and use ntz * configure.in (__builtin_ctz): check. * configure.in (__builtin_ctzll): check. * internal.h (rb_popcount32): defined for ntz_int32. it can use __builtin_popcount but this function is not used on GCC environment because it uses __builtin_ctz. When another function uses this, using __builtin_popcount should be re-considered. * internal.h (rb_popcount64): ditto. * internal.h (ntz_int32): defined for ntz_intptr. * internal.h (ntz_int64): defined for ntz_intptr. * internal.h (ntz_intptr): defined as ntz for uintptr_t. Modified files: trunk/ChangeLog trunk/configure.in trunk/internal.h trunk/string.c Index: ChangeLog =================================================================== --- ChangeLog (revision 54853) +++ ChangeLog (revision 54854) @@ -1,3 +1,25 @@ https://github.com/ruby/ruby/blob/trunk/ChangeLog#L1 +Sun May 1 00:03:30 2016 NARUSE, Yui <naruse@r...> + + * configure.in (__builtin_ctz): check. + + * configure.in (__builtin_ctzll): check. + + * internal.h (rb_popcount32): defined for ntz_int32. + it can use __builtin_popcount but this function is not used on + GCC environment because it uses __builtin_ctz. + When another function uses this, using __builtin_popcount + should be re-considered. + + * internal.h (rb_popcount64): ditto. + + * internal.h (ntz_int32): defined for ntz_intptr. + + * internal.h (ntz_int64): defined for ntz_intptr. + + * internal.h (ntz_intptr): defined as ntz for uintptr_t. + + * string.c (search_nonascii): unroll and use ntz. + Sat Apr 30 21:54:13 2016 Tanaka Akira <akr@f...> * numeric.c (Init_Numeric): Gather Fixnum method definitions. Index: internal.h =================================================================== --- internal.h (revision 54853) +++ internal.h (revision 54854) @@ -260,6 +260,52 @@ nlz_int128(uint128_t x) https://github.com/ruby/ruby/blob/trunk/internal.h#L260 } #endif +static inline int +rb_popcount32(uint32_t x) { + x = (x & 0x55555555) + (x >> 1 & 0x55555555); + x = (x & 0x33333333) + (x >> 2 & 0x33333333); + x = (x & 0x0f0f0f0f) + (x >> 4 & 0x0f0f0f0f); + x = (x & 0x001f001f) + (x >> 8 & 0x001f001f); + return (x & 0x0000003f) + (x >>16 & 0x0000003f); +} + +static inline int +rb_popcount64(uint64_t x) { + x = (x & 0x5555555555555555) + (x >> 1 & 0x5555555555555555); + x = (x & 0x3333333333333333) + (x >> 2 & 0x3333333333333333); + x = (x & 0x0707070707070707) + (x >> 4 & 0x0707070707070707); + x = (x & 0x001f001f001f001f) + (x >> 8 & 0x001f001f001f001f); + x = (x & 0x0000003f0000003f) + (x >>16 & 0x0000003f0000003f); + return (x & 0x7f) + (x >>32 & 0x7f); +} + +static inline int +ntz_int32(uint32_t x) { +#ifdef HAVE_BUILTIN___BUILTIN_CTZ + return __builtin_ctz(x); +#else + return rb_popcount32((~x) & (x-1)); +#endif +} + +static inline int +ntz_int64(uint64_t x) { +#ifdef HAVE_BUILTIN___BUILTIN_CTZLL + return __builtin_ctzll(x); +#else + return rb_popcount64((~x) & (x-1)); +#endif +} + +static inline int +ntz_intptr(uintptr_t x) { +#if SIZEOF_VOIDP == 8 + return ntz_int64(x); +#elif SIZEOF_VOIDP == 4 + return ntz_int32(x); +#endif +} + #if HAVE_LONG_LONG && SIZEOF_LONG * 2 <= SIZEOF_LONG_LONG # define DLONG LONG_LONG # define DL2NUM(x) LL2NUM(x) Index: string.c =================================================================== --- string.c (revision 54853) +++ string.c (revision 54854) @@ -427,32 +427,46 @@ search_nonascii(const char *p, const cha https://github.com/ruby/ruby/blob/trunk/string.c#L427 #elif SIZEOF_VOIDP == 4 # define NONASCII_MASK 0x80808080UL #endif -#ifdef NONASCII_MASK - if ((int)SIZEOF_VOIDP * 2 < e - p) { - const uintptr_t *s, *t; - const uintptr_t lowbits = SIZEOF_VOIDP - 1; - s = (const uintptr_t*)(~lowbits & ((uintptr_t)p + lowbits)); - while (p < (const char *)s) { - if (!ISASCII(*p)) - return p; - p++; - } - t = (const uintptr_t*)(~lowbits & (uintptr_t)e); - while (s < t) { - if (*s & NONASCII_MASK) { - t = s; - break; - } - s++; - } - p = (const char *)t; + +#if !UNALIGNED_WORD_ACCESS + if (e - p > SIZEOF_VOIDP) { + switch (8 - (uintptr_t)p % 8) { +#if SIZEOF_VOIDP > 4 + case 7: if (*p&0x80) return p; p++; + case 6: if (*p&0x80) return p; p++; + case 5: if (*p&0x80) return p; p++; + case 4: if (*p&0x80) return p; p++; +#endif + case 3: if (*p&0x80) return p; p++; + case 2: if (*p&0x80) return p; p++; + case 1: if (*p&0x80) return p; p++; + } } #endif - while (p < e) { - if (!ISASCII(*p)) - return p; - p++; + + { + const uintptr_t *s = (const uintptr_t *)p; + const uintptr_t *t = (const uintptr_t *)(e - (SIZEOF_VOIDP-1)); + for (;s < t; s++) { + if (*s & NONASCII_MASK) { + return (const char *)s + (ntz_intptr(*s&NONASCII_MASK)>>3); + } + } + p = (const char *)s; } + + switch ((e - p) % SIZEOF_VOIDP) { +#if SIZEOF_VOIDP > 4 + case 7: if (*p&0x80) return p; p++; + case 6: if (*p&0x80) return p; p++; + case 5: if (*p&0x80) return p; p++; + case 4: if (*p&0x80) return p; p++; +#endif + case 3: if (*p&0x80) return p; p++; + case 2: if (*p&0x80) return p; p++; + case 1: if (*p&0x80) return p; + } + return NULL; } Index: configure.in =================================================================== --- configure.in (revision 54853) +++ configure.in (revision 54854) @@ -2432,6 +2432,8 @@ RUBY_CHECK_BUILTIN_FUNC(__builtin_bswap6 https://github.com/ruby/ruby/blob/trunk/configure.in#L2432 RUBY_CHECK_BUILTIN_FUNC(__builtin_clz, [__builtin_clz(0)]) RUBY_CHECK_BUILTIN_FUNC(__builtin_clzl, [__builtin_clzl(0)]) RUBY_CHECK_BUILTIN_FUNC(__builtin_clzll, [__builtin_clzll(0)]) +RUBY_CHECK_BUILTIN_FUNC(__builtin_ctz, [__builtin_ctz(0)]) +RUBY_CHECK_BUILTIN_FUNC(__builtin_ctzll, [__builtin_ctzll(0)]) RUBY_CHECK_BUILTIN_FUNC(__builtin_choose_expr, [ [int x[__extension__(__builtin_choose_expr(1, 1, -1))]]; [int y[__extension__(__builtin_choose_expr(0, -1, 1))]]; -- ML: ruby-changes@q... Info: http://www.atdot.net/~ko1/quickml/