[前][次][番号順一覧][スレッド一覧]

ruby-changes:42780

From: naruse <ko1@a...>
Date: Sat, 30 Apr 2016 23:42:26 +0900 (JST)
Subject: [ruby-changes:42780] naruse:r54854 (trunk): * string.c (search_nonascii): unroll and use ntz

naruse	2016-05-01 00:39:02 +0900 (Sun, 01 May 2016)

  New Revision: 54854

  https://svn.ruby-lang.org/cgi-bin/viewvc.cgi?view=revision&revision=54854

  Log:
    * string.c (search_nonascii): unroll and use ntz
    
    * configure.in (__builtin_ctz): check.
    
    * configure.in (__builtin_ctzll): check.
    
    * internal.h (rb_popcount32): defined for ntz_int32.
      it can use __builtin_popcount but this function is not used on
      GCC environment because it uses __builtin_ctz.
      When another function uses this, using __builtin_popcount
      should be re-considered.
    
    * internal.h (rb_popcount64): ditto.
    
    * internal.h (ntz_int32): defined for ntz_intptr.
    
    * internal.h (ntz_int64): defined for ntz_intptr.
    
    * internal.h (ntz_intptr): defined as ntz for uintptr_t.

  Modified files:
    trunk/ChangeLog
    trunk/configure.in
    trunk/internal.h
    trunk/string.c
Index: ChangeLog
===================================================================
--- ChangeLog	(revision 54853)
+++ ChangeLog	(revision 54854)
@@ -1,3 +1,25 @@ https://github.com/ruby/ruby/blob/trunk/ChangeLog#L1
+Sun May  1 00:03:30 2016  NARUSE, Yui  <naruse@r...>
+
+	* configure.in (__builtin_ctz): check.
+
+	* configure.in (__builtin_ctzll): check.
+
+	* internal.h (rb_popcount32): defined for ntz_int32.
+	  it can use __builtin_popcount but this function is not used on
+	  GCC environment because it uses __builtin_ctz.
+	  When another function uses this, using __builtin_popcount
+	  should be re-considered.
+
+	* internal.h (rb_popcount64): ditto.
+
+	* internal.h (ntz_int32): defined for ntz_intptr.
+
+	* internal.h (ntz_int64): defined for ntz_intptr.
+
+	* internal.h (ntz_intptr): defined as ntz for uintptr_t.
+
+	* string.c (search_nonascii): unroll and use ntz.
+
 Sat Apr 30 21:54:13 2016  Tanaka Akira  <akr@f...>
 
 	* numeric.c (Init_Numeric): Gather Fixnum method definitions.
Index: internal.h
===================================================================
--- internal.h	(revision 54853)
+++ internal.h	(revision 54854)
@@ -260,6 +260,52 @@ nlz_int128(uint128_t x) https://github.com/ruby/ruby/blob/trunk/internal.h#L260
 }
 #endif
 
+static inline int
+rb_popcount32(uint32_t x) {
+    x = (x & 0x55555555) + (x >> 1 & 0x55555555);
+    x = (x & 0x33333333) + (x >> 2 & 0x33333333);
+    x = (x & 0x0f0f0f0f) + (x >> 4 & 0x0f0f0f0f);
+    x = (x & 0x001f001f) + (x >> 8 & 0x001f001f);
+    return (x & 0x0000003f) + (x >>16 & 0x0000003f);
+}
+
+static inline int
+rb_popcount64(uint64_t x) {
+    x = (x & 0x5555555555555555) + (x >> 1 & 0x5555555555555555);
+    x = (x & 0x3333333333333333) + (x >> 2 & 0x3333333333333333);
+    x = (x & 0x0707070707070707) + (x >> 4 & 0x0707070707070707);
+    x = (x & 0x001f001f001f001f) + (x >> 8 & 0x001f001f001f001f);
+    x = (x & 0x0000003f0000003f) + (x >>16 & 0x0000003f0000003f);
+    return (x & 0x7f) + (x >>32 & 0x7f);
+}
+
+static inline int
+ntz_int32(uint32_t x) {
+#ifdef HAVE_BUILTIN___BUILTIN_CTZ
+    return __builtin_ctz(x);
+#else
+    return rb_popcount32((~x) & (x-1));
+#endif
+}
+
+static inline int
+ntz_int64(uint64_t x) {
+#ifdef HAVE_BUILTIN___BUILTIN_CTZLL
+    return __builtin_ctzll(x);
+#else
+    return rb_popcount64((~x) & (x-1));
+#endif
+}
+
+static inline int
+ntz_intptr(uintptr_t x) {
+#if SIZEOF_VOIDP == 8
+    return ntz_int64(x);
+#elif SIZEOF_VOIDP == 4
+    return ntz_int32(x);
+#endif
+}
+
 #if HAVE_LONG_LONG && SIZEOF_LONG * 2 <= SIZEOF_LONG_LONG
 # define DLONG LONG_LONG
 # define DL2NUM(x) LL2NUM(x)
Index: string.c
===================================================================
--- string.c	(revision 54853)
+++ string.c	(revision 54854)
@@ -427,32 +427,46 @@ search_nonascii(const char *p, const cha https://github.com/ruby/ruby/blob/trunk/string.c#L427
 #elif SIZEOF_VOIDP == 4
 # define NONASCII_MASK 0x80808080UL
 #endif
-#ifdef NONASCII_MASK
-    if ((int)SIZEOF_VOIDP * 2 < e - p) {
-        const uintptr_t *s, *t;
-        const uintptr_t lowbits = SIZEOF_VOIDP - 1;
-        s = (const uintptr_t*)(~lowbits & ((uintptr_t)p + lowbits));
-        while (p < (const char *)s) {
-            if (!ISASCII(*p))
-                return p;
-            p++;
-        }
-        t = (const uintptr_t*)(~lowbits & (uintptr_t)e);
-        while (s < t) {
-            if (*s & NONASCII_MASK) {
-                t = s;
-                break;
-            }
-            s++;
-        }
-        p = (const char *)t;
+
+#if !UNALIGNED_WORD_ACCESS
+    if (e - p > SIZEOF_VOIDP) {
+	switch (8 - (uintptr_t)p % 8) {
+#if SIZEOF_VOIDP > 4
+	  case 7: if (*p&0x80) return p; p++;
+	  case 6: if (*p&0x80) return p; p++;
+	  case 5: if (*p&0x80) return p; p++;
+	  case 4: if (*p&0x80) return p; p++;
+#endif
+	  case 3: if (*p&0x80) return p; p++;
+	  case 2: if (*p&0x80) return p; p++;
+	  case 1: if (*p&0x80) return p; p++;
+	}
     }
 #endif
-    while (p < e) {
-        if (!ISASCII(*p))
-            return p;
-        p++;
+
+    {
+	const uintptr_t *s = (const uintptr_t *)p;
+	const uintptr_t *t = (const uintptr_t *)(e - (SIZEOF_VOIDP-1));
+	for (;s < t; s++) {
+	    if (*s & NONASCII_MASK) {
+		return (const char *)s + (ntz_intptr(*s&NONASCII_MASK)>>3);
+	    }
+	}
+	p = (const char *)s;
     }
+
+    switch ((e - p) % SIZEOF_VOIDP) {
+#if SIZEOF_VOIDP > 4
+      case 7: if (*p&0x80) return p; p++;
+      case 6: if (*p&0x80) return p; p++;
+      case 5: if (*p&0x80) return p; p++;
+      case 4: if (*p&0x80) return p; p++;
+#endif
+      case 3: if (*p&0x80) return p; p++;
+      case 2: if (*p&0x80) return p; p++;
+      case 1: if (*p&0x80) return p;
+    }
+
     return NULL;
 }
 
Index: configure.in
===================================================================
--- configure.in	(revision 54853)
+++ configure.in	(revision 54854)
@@ -2432,6 +2432,8 @@ RUBY_CHECK_BUILTIN_FUNC(__builtin_bswap6 https://github.com/ruby/ruby/blob/trunk/configure.in#L2432
 RUBY_CHECK_BUILTIN_FUNC(__builtin_clz, [__builtin_clz(0)])
 RUBY_CHECK_BUILTIN_FUNC(__builtin_clzl, [__builtin_clzl(0)])
 RUBY_CHECK_BUILTIN_FUNC(__builtin_clzll, [__builtin_clzll(0)])
+RUBY_CHECK_BUILTIN_FUNC(__builtin_ctz, [__builtin_ctz(0)])
+RUBY_CHECK_BUILTIN_FUNC(__builtin_ctzll, [__builtin_ctzll(0)])
 RUBY_CHECK_BUILTIN_FUNC(__builtin_choose_expr, [
     [int x[__extension__(__builtin_choose_expr(1, 1, -1))]];
     [int y[__extension__(__builtin_choose_expr(0, -1, 1))]];

--
ML: ruby-changes@q...
Info: http://www.atdot.net/~ko1/quickml/

[前][次][番号順一覧][スレッド一覧]