[前][次][番号順一覧][スレッド一覧]

ruby-changes:4302

From: ko1@a...
Date: Tue, 18 Mar 2008 04:04:48 +0900 (JST)
Subject: [ruby-changes:4302] naruse - Ruby:r15792 (trunk): * re.c (rb_memsearch_ss): simple shift search.

naruse	2008-03-18 04:04:29 +0900 (Tue, 18 Mar 2008)

  New Revision: 15792

  Modified files:
    trunk/ChangeLog
    trunk/common.mk
    trunk/include/ruby/encoding.h
    trunk/include/ruby/intern.h
    trunk/re.c
    trunk/string.c
    trunk/version.h

  Log:
    * re.c (rb_memsearch_ss): simple shift search.
    
    * re.c (rb_memsearch_qs): quick search.
    
    * re.c (rb_memsearch_qs_utf8): quick search for UTF-8 string.
    
    * re.c (rb_memsearch_qs_utf8_hash): hash functions for above.
    
    * re.c (rb_memsearch): use above functions.
    
    * string.c (rb_str_index): give enc to rb_memsearch.
    
    * include/ruby/intern.h (rb_memsearch): move to encoding.h.
    
    * include/ruby/encoding.h (rb_memsearch): move from intern.h.
    
    * common.mk (PREP): add dependency.

  http://svn.ruby-lang.org/cgi-bin/viewvc.cgi/trunk/version.h?r1=15792&r2=15791&diff_format=u
  http://svn.ruby-lang.org/cgi-bin/viewvc.cgi/trunk/string.c?r1=15792&r2=15791&diff_format=u
  http://svn.ruby-lang.org/cgi-bin/viewvc.cgi/trunk/ChangeLog?r1=15792&r2=15791&diff_format=u
  http://svn.ruby-lang.org/cgi-bin/viewvc.cgi/trunk/include/ruby/encoding.h?r1=15792&r2=15791&diff_format=u
  http://svn.ruby-lang.org/cgi-bin/viewvc.cgi/trunk/re.c?r1=15792&r2=15791&diff_format=u
  http://svn.ruby-lang.org/cgi-bin/viewvc.cgi/trunk/include/ruby/intern.h?r1=15792&r2=15791&diff_format=u
  http://svn.ruby-lang.org/cgi-bin/viewvc.cgi/trunk/common.mk?r1=15792&r2=15791&diff_format=u

Index: include/ruby/intern.h
===================================================================
--- include/ruby/intern.h	(revision 15791)
+++ include/ruby/intern.h	(revision 15792)
@@ -466,7 +466,6 @@
 /* re.c */
 #define rb_memcmp memcmp
 int rb_memcicmp(const void*,const void*,long);
-long rb_memsearch(const void*,long,const void*,long);
 VALUE rb_reg_nth_defined(int, VALUE);
 VALUE rb_reg_nth_match(int, VALUE);
 VALUE rb_reg_last_match(VALUE);
Index: include/ruby/encoding.h
===================================================================
--- include/ruby/encoding.h	(revision 15791)
+++ include/ruby/encoding.h	(revision 15792)
@@ -176,5 +176,6 @@
 VALUE rb_enc_default_external(void);
 void rb_enc_set_default_external(VALUE encoding);
 VALUE rb_locale_charmap(VALUE klass);
+long rb_memsearch(const void*,long,const void*,long,rb_encoding*);
 
 #endif /* RUBY_ENCODING_H */
Index: re.c
===================================================================
--- re.c	(revision 15791)
+++ re.c	(revision 15792)
@@ -95,43 +95,147 @@
     return memcmp(p1, p2, len);
 }
 
-long
-rb_memsearch(const void *x0, long m, const void *y0, long n)
+static inline long
+rb_memsearch_ss(const unsigned char *xs, long m, const unsigned char *ys, long n)
 {
-    const unsigned char *x = x0, *y = y0;
-    const unsigned char *s, *e;
-    long i;
-    int d;
-    unsigned long hx, hy;
+    const unsigned char *x = xs, *xe = xs + m;
+    const unsigned char *y = ys, *ye = ys + n;
+#ifndef VALUE_MAX
+# if SIZEOF_VALUE == 8
+#  define VALUE_MAX 0xFFFFFFFFFFFFFFFFULL
+# elif SIZEOF_VALUE == 4
+#  define VALUE_MAX 0xFFFFFFFFUL
+# endif
+#endif
+    VALUE hx, hy, mask = VALUE_MAX >> ((SIZEOF_VALUE - m) * CHAR_BIT);
 
-#define KR_REHASH(a, b, h) (((h) << 1) - (((unsigned long)(a))<<d) + (b))
+    if (m > SIZEOF_VALUE)
+	rb_bug("!!too long pattern string!!");
 
-    if (m > n) return -1;
-    s = y; e = s + n - m;
+    	/* Prepare hash value */
+    for (hx = *x++, hy = *y++; x < xe; ++x, ++y) {
+	hx <<= CHAR_BIT;
+	hy <<= CHAR_BIT;
+	hx |= *x;
+	hy |= *y;
+    }
+    /* Searching */
+    while (hx != hy) {
+	if (y == ye)
+	    return -1;
+	hy <<= CHAR_BIT;
+	hy |= *y;
+	hy &= mask;
+	y++;
+    }
+    return y - ys - m;
+}
 
+static inline long
+rb_memsearch_qs(const unsigned char *xs, long m, const unsigned char *ys, long n)
+{
+    const unsigned char *x = xs, *xe = xs + m;
+    const unsigned char *y = ys, *ye = ys + n;
+    VALUE i, qstable[256];
+
     /* Preprocessing */
-    /* computes d = 2^(m-1) with
-       the left-shift operator */
-    d = sizeof(hx) * CHAR_BIT - 1;
-    if (d > m) d = m;
+    for (i = 0; i < 256; ++i)
+	qstable[i] = m + 1;
+    for (; x < xe; ++x)
+	qstable[*x] = xe - x;
+    /* Searching */
+    for (; y < ye; y += *(qstable + y[m])) {
+	if (*xs == *y && memcmp(xs, y, m) == 0)
+	    return y - ys;
+    }
+    return -1;
+}
 
-    if (n == m) {
-	return memcmp(x, s, m) == 0 ? 0 : -1;
+static inline unsigned int
+rb_memsearch_qs_utf8_hash(const unsigned char *x)
+{
+    register const unsigned int mix = 8353;
+    register unsigned int h = *x;
+    if (h < 0xC0) {
+	return h + 256;
     }
-    /* Prepare hash value */
-    for (hy = hx = i = 0; i < d; ++i) {
-	hx = KR_REHASH(0, x[i], hx);
-	hy = KR_REHASH(0, s[i], hy);
+    else if (h < 0xE0) {
+	h *= mix;
+	h += x[1];
     }
+    else if (h < 0xF0) {
+	h *= mix;
+	h += x[1];
+	h *= mix;
+	h += x[2];
+    }
+    else if (h < 0xF5) {
+	h *= mix;
+	h += x[1];
+	h *= mix;
+	h += x[2];
+	h *= mix;
+	h += x[3];
+    }
+    else {
+	return h + 256;
+    }
+    return (unsigned char)h;
+}
+
+static inline long
+rb_memsearch_qs_utf8(const unsigned char *xs, long m, const unsigned char *ys, long n)
+{
+    const unsigned char *x = xs, *xe = xs + m;
+    const unsigned char *y = ys, *ye = ys + n;
+    VALUE i, qstable[512];
+
+    /* Preprocessing */
+    for (i = 0; i < 512; ++i) {
+	qstable[i] = m + 1;
+    }
+    for (; x < xe; ++x) {
+	qstable[rb_memsearch_qs_utf8_hash(x)] = xe - x;
+    }
     /* Searching */
-    while (hx != hy || memcmp(x, s, m)) {
-	if (s >= e) return -1;
-	hy = KR_REHASH(*s, *(s+d), hy);
-	s++;
+    for (; y < ye; y += qstable[rb_memsearch_qs_utf8_hash(y+m)]) {
+	if (*xs == *y && memcmp(xs, y, m) == 0)
+	    return y - ys;
     }
-    return s-y;
+    return -1;
 }
 
+long
+rb_memsearch(const void *x0, long m, const void *y0, long n, rb_encoding *enc)
+{
+    const unsigned char *x = x0, *y = y0;
+
+    if (m > n) return -1;
+    else if (m == n) {
+	return memcmp(x0, y0, m) == 0 ? 0 : -1;
+    }
+    else if (m < 1) {
+	return 0;
+    }
+    else if (m == 1) {
+	const unsigned char *ys = y, *ye = ys + n;
+	for (; y < ye; ++y) {
+	    if (*x == *y)
+		return y - ys;
+	}
+	return -1;
+    }
+    else if (m <= SIZEOF_VALUE) {
+	return rb_memsearch_ss(x0, m, y0, n);
+    }
+    else if (enc == rb_utf8_encoding()){
+	return rb_memsearch_qs_utf8(x0, m, y0, n);
+    }
+    else {
+	return rb_memsearch_qs(x0, m, y0, n);
+    }
+}
+
 #define REG_LITERAL FL_USER5
 #define REG_ENCODING_NONE FL_USER6
 
Index: ChangeLog
===================================================================
--- ChangeLog	(revision 15791)
+++ ChangeLog	(revision 15792)
@@ -1,3 +1,23 @@
+Tue Mar 18 04:00:27 2008  NARUSE, Yui  <naruse@r...>
+
+	* re.c (rb_memsearch_ss): simple shift search.
+
+	* re.c (rb_memsearch_qs): quick search.
+
+	* re.c (rb_memsearch_qs_utf8): quick search for UTF-8 string.
+
+	* re.c (rb_memsearch_qs_utf8_hash): hash functions for above.
+
+	* re.c (rb_memsearch): use above functions.
+
+	* string.c (rb_str_index): give enc to rb_memsearch.
+
+	* include/ruby/intern.h (rb_memsearch): move to encoding.h.
+
+	* include/ruby/encoding.h (rb_memsearch): move from intern.h.
+
+	* common.mk (PREP): add dependency.
+
 Mon Mar 17 22:23:54 2008  Yusuke Endoh  <mame@t...>
 
 	* array.c (rb_ary_take, rb_ary_take_while, rb_ary_drop,
Index: string.c
===================================================================
--- string.c	(revision 15791)
+++ string.c	(revision 15792)
@@ -2088,7 +2088,7 @@
     len = RSTRING_LEN(str) - offset;
     for (;;) {
 	char *t;
-	pos = rb_memsearch(sptr, slen, s, len);
+	pos = rb_memsearch(sptr, slen, s, len, enc);
 	if (pos < 0) return pos;
 	t = rb_enc_right_char_head(s, s+pos, enc);
 	if (t == s + pos) break;
Index: common.mk
===================================================================
--- common.mk	(revision 15791)
+++ common.mk	(revision 15792)
@@ -110,6 +110,8 @@
 	@$(MINIRUBY) $(srcdir)/ext/extmk.rb --make="$(MAKE)" $(EXTMK_ARGS)
 prog: $(PROGRAM) $(WPROGRAM)
 
+$(PREP): $(MKFILES)
+
 miniruby$(EXEEXT): config.status $(NORMALMAINOBJ) $(MINIOBJS) $(COMMONOBJS) $(DMYEXT) $(ARCHFILE)
 
 GORUBY = go$(RUBY_INSTALL_NAME)
Index: version.h
===================================================================
--- version.h	(revision 15791)
+++ version.h	(revision 15792)
@@ -1,7 +1,7 @@
 #define RUBY_VERSION "1.9.0"
-#define RUBY_RELEASE_DATE "2008-03-17"
+#define RUBY_RELEASE_DATE "2008-03-18"
 #define RUBY_VERSION_CODE 190
-#define RUBY_RELEASE_CODE 20080317
+#define RUBY_RELEASE_CODE 20080318
 #define RUBY_PATCHLEVEL 0
 
 #define RUBY_VERSION_MAJOR 1
@@ -9,7 +9,7 @@
 #define RUBY_VERSION_TEENY 0
 #define RUBY_RELEASE_YEAR 2008
 #define RUBY_RELEASE_MONTH 3
-#define RUBY_RELEASE_DAY 17
+#define RUBY_RELEASE_DAY 18
 
 #ifdef RUBY_EXTERN
 RUBY_EXTERN const char ruby_version[];

--
ML: ruby-changes@q...
Info: http://www.atdot.net/~ko1/quickml/

[前][次][番号順一覧][スレッド一覧]