ruby-changes:4302
From: ko1@a...
Date: Tue, 18 Mar 2008 04:04:48 +0900 (JST)
Subject: [ruby-changes:4302] naruse - Ruby:r15792 (trunk): * re.c (rb_memsearch_ss): simple shift search.
naruse 2008-03-18 04:04:29 +0900 (Tue, 18 Mar 2008)
New Revision: 15792
Modified files:
trunk/ChangeLog
trunk/common.mk
trunk/include/ruby/encoding.h
trunk/include/ruby/intern.h
trunk/re.c
trunk/string.c
trunk/version.h
Log:
* re.c (rb_memsearch_ss): simple shift search.
* re.c (rb_memsearch_qs): quick search.
* re.c (rb_memsearch_qs_utf8): quick search for UTF-8 string.
* re.c (rb_memsearch_qs_utf8_hash): hash functions for above.
* re.c (rb_memsearch): use above functions.
* string.c (rb_str_index): give enc to rb_memsearch.
* include/ruby/intern.h (rb_memsearch): move to encoding.h.
* include/ruby/encoding.h (rb_memsearch): move from intern.h.
* common.mk (PREP): add dependency.
http://svn.ruby-lang.org/cgi-bin/viewvc.cgi/trunk/version.h?r1=15792&r2=15791&diff_format=u
http://svn.ruby-lang.org/cgi-bin/viewvc.cgi/trunk/string.c?r1=15792&r2=15791&diff_format=u
http://svn.ruby-lang.org/cgi-bin/viewvc.cgi/trunk/ChangeLog?r1=15792&r2=15791&diff_format=u
http://svn.ruby-lang.org/cgi-bin/viewvc.cgi/trunk/include/ruby/encoding.h?r1=15792&r2=15791&diff_format=u
http://svn.ruby-lang.org/cgi-bin/viewvc.cgi/trunk/re.c?r1=15792&r2=15791&diff_format=u
http://svn.ruby-lang.org/cgi-bin/viewvc.cgi/trunk/include/ruby/intern.h?r1=15792&r2=15791&diff_format=u
http://svn.ruby-lang.org/cgi-bin/viewvc.cgi/trunk/common.mk?r1=15792&r2=15791&diff_format=u
Index: include/ruby/intern.h
===================================================================
--- include/ruby/intern.h (revision 15791)
+++ include/ruby/intern.h (revision 15792)
@@ -466,7 +466,6 @@
/* re.c */
#define rb_memcmp memcmp
int rb_memcicmp(const void*,const void*,long);
-long rb_memsearch(const void*,long,const void*,long);
VALUE rb_reg_nth_defined(int, VALUE);
VALUE rb_reg_nth_match(int, VALUE);
VALUE rb_reg_last_match(VALUE);
Index: include/ruby/encoding.h
===================================================================
--- include/ruby/encoding.h (revision 15791)
+++ include/ruby/encoding.h (revision 15792)
@@ -176,5 +176,6 @@
VALUE rb_enc_default_external(void);
void rb_enc_set_default_external(VALUE encoding);
VALUE rb_locale_charmap(VALUE klass);
+long rb_memsearch(const void*,long,const void*,long,rb_encoding*);
#endif /* RUBY_ENCODING_H */
Index: re.c
===================================================================
--- re.c (revision 15791)
+++ re.c (revision 15792)
@@ -95,43 +95,147 @@
return memcmp(p1, p2, len);
}
-long
-rb_memsearch(const void *x0, long m, const void *y0, long n)
+static inline long
+rb_memsearch_ss(const unsigned char *xs, long m, const unsigned char *ys, long n)
{
- const unsigned char *x = x0, *y = y0;
- const unsigned char *s, *e;
- long i;
- int d;
- unsigned long hx, hy;
+ const unsigned char *x = xs, *xe = xs + m;
+ const unsigned char *y = ys, *ye = ys + n;
+#ifndef VALUE_MAX
+# if SIZEOF_VALUE == 8
+# define VALUE_MAX 0xFFFFFFFFFFFFFFFFULL
+# elif SIZEOF_VALUE == 4
+# define VALUE_MAX 0xFFFFFFFFUL
+# endif
+#endif
+ VALUE hx, hy, mask = VALUE_MAX >> ((SIZEOF_VALUE - m) * CHAR_BIT);
-#define KR_REHASH(a, b, h) (((h) << 1) - (((unsigned long)(a))<<d) + (b))
+ if (m > SIZEOF_VALUE)
+ rb_bug("!!too long pattern string!!");
- if (m > n) return -1;
- s = y; e = s + n - m;
+ /* Prepare hash value */
+ for (hx = *x++, hy = *y++; x < xe; ++x, ++y) {
+ hx <<= CHAR_BIT;
+ hy <<= CHAR_BIT;
+ hx |= *x;
+ hy |= *y;
+ }
+ /* Searching */
+ while (hx != hy) {
+ if (y == ye)
+ return -1;
+ hy <<= CHAR_BIT;
+ hy |= *y;
+ hy &= mask;
+ y++;
+ }
+ return y - ys - m;
+}
+static inline long
+rb_memsearch_qs(const unsigned char *xs, long m, const unsigned char *ys, long n)
+{
+ const unsigned char *x = xs, *xe = xs + m;
+ const unsigned char *y = ys, *ye = ys + n;
+ VALUE i, qstable[256];
+
/* Preprocessing */
- /* computes d = 2^(m-1) with
- the left-shift operator */
- d = sizeof(hx) * CHAR_BIT - 1;
- if (d > m) d = m;
+ for (i = 0; i < 256; ++i)
+ qstable[i] = m + 1;
+ for (; x < xe; ++x)
+ qstable[*x] = xe - x;
+ /* Searching */
+ for (; y < ye; y += *(qstable + y[m])) {
+ if (*xs == *y && memcmp(xs, y, m) == 0)
+ return y - ys;
+ }
+ return -1;
+}
- if (n == m) {
- return memcmp(x, s, m) == 0 ? 0 : -1;
+static inline unsigned int
+rb_memsearch_qs_utf8_hash(const unsigned char *x)
+{
+ register const unsigned int mix = 8353;
+ register unsigned int h = *x;
+ if (h < 0xC0) {
+ return h + 256;
}
- /* Prepare hash value */
- for (hy = hx = i = 0; i < d; ++i) {
- hx = KR_REHASH(0, x[i], hx);
- hy = KR_REHASH(0, s[i], hy);
+ else if (h < 0xE0) {
+ h *= mix;
+ h += x[1];
}
+ else if (h < 0xF0) {
+ h *= mix;
+ h += x[1];
+ h *= mix;
+ h += x[2];
+ }
+ else if (h < 0xF5) {
+ h *= mix;
+ h += x[1];
+ h *= mix;
+ h += x[2];
+ h *= mix;
+ h += x[3];
+ }
+ else {
+ return h + 256;
+ }
+ return (unsigned char)h;
+}
+
+static inline long
+rb_memsearch_qs_utf8(const unsigned char *xs, long m, const unsigned char *ys, long n)
+{
+ const unsigned char *x = xs, *xe = xs + m;
+ const unsigned char *y = ys, *ye = ys + n;
+ VALUE i, qstable[512];
+
+ /* Preprocessing */
+ for (i = 0; i < 512; ++i) {
+ qstable[i] = m + 1;
+ }
+ for (; x < xe; ++x) {
+ qstable[rb_memsearch_qs_utf8_hash(x)] = xe - x;
+ }
/* Searching */
- while (hx != hy || memcmp(x, s, m)) {
- if (s >= e) return -1;
- hy = KR_REHASH(*s, *(s+d), hy);
- s++;
+ for (; y < ye; y += qstable[rb_memsearch_qs_utf8_hash(y+m)]) {
+ if (*xs == *y && memcmp(xs, y, m) == 0)
+ return y - ys;
}
- return s-y;
+ return -1;
}
+long
+rb_memsearch(const void *x0, long m, const void *y0, long n, rb_encoding *enc)
+{
+ const unsigned char *x = x0, *y = y0;
+
+ if (m > n) return -1;
+ else if (m == n) {
+ return memcmp(x0, y0, m) == 0 ? 0 : -1;
+ }
+ else if (m < 1) {
+ return 0;
+ }
+ else if (m == 1) {
+ const unsigned char *ys = y, *ye = ys + n;
+ for (; y < ye; ++y) {
+ if (*x == *y)
+ return y - ys;
+ }
+ return -1;
+ }
+ else if (m <= SIZEOF_VALUE) {
+ return rb_memsearch_ss(x0, m, y0, n);
+ }
+ else if (enc == rb_utf8_encoding()){
+ return rb_memsearch_qs_utf8(x0, m, y0, n);
+ }
+ else {
+ return rb_memsearch_qs(x0, m, y0, n);
+ }
+}
+
#define REG_LITERAL FL_USER5
#define REG_ENCODING_NONE FL_USER6
Index: ChangeLog
===================================================================
--- ChangeLog (revision 15791)
+++ ChangeLog (revision 15792)
@@ -1,3 +1,23 @@
+Tue Mar 18 04:00:27 2008 NARUSE, Yui <naruse@r...>
+
+ * re.c (rb_memsearch_ss): simple shift search.
+
+ * re.c (rb_memsearch_qs): quick search.
+
+ * re.c (rb_memsearch_qs_utf8): quick search for UTF-8 string.
+
+ * re.c (rb_memsearch_qs_utf8_hash): hash functions for above.
+
+ * re.c (rb_memsearch): use above functions.
+
+ * string.c (rb_str_index): give enc to rb_memsearch.
+
+ * include/ruby/intern.h (rb_memsearch): move to encoding.h.
+
+ * include/ruby/encoding.h (rb_memsearch): move from intern.h.
+
+ * common.mk (PREP): add dependency.
+
Mon Mar 17 22:23:54 2008 Yusuke Endoh <mame@t...>
* array.c (rb_ary_take, rb_ary_take_while, rb_ary_drop,
Index: string.c
===================================================================
--- string.c (revision 15791)
+++ string.c (revision 15792)
@@ -2088,7 +2088,7 @@
len = RSTRING_LEN(str) - offset;
for (;;) {
char *t;
- pos = rb_memsearch(sptr, slen, s, len);
+ pos = rb_memsearch(sptr, slen, s, len, enc);
if (pos < 0) return pos;
t = rb_enc_right_char_head(s, s+pos, enc);
if (t == s + pos) break;
Index: common.mk
===================================================================
--- common.mk (revision 15791)
+++ common.mk (revision 15792)
@@ -110,6 +110,8 @@
@$(MINIRUBY) $(srcdir)/ext/extmk.rb --make="$(MAKE)" $(EXTMK_ARGS)
prog: $(PROGRAM) $(WPROGRAM)
+$(PREP): $(MKFILES)
+
miniruby$(EXEEXT): config.status $(NORMALMAINOBJ) $(MINIOBJS) $(COMMONOBJS) $(DMYEXT) $(ARCHFILE)
GORUBY = go$(RUBY_INSTALL_NAME)
Index: version.h
===================================================================
--- version.h (revision 15791)
+++ version.h (revision 15792)
@@ -1,7 +1,7 @@
#define RUBY_VERSION "1.9.0"
-#define RUBY_RELEASE_DATE "2008-03-17"
+#define RUBY_RELEASE_DATE "2008-03-18"
#define RUBY_VERSION_CODE 190
-#define RUBY_RELEASE_CODE 20080317
+#define RUBY_RELEASE_CODE 20080318
#define RUBY_PATCHLEVEL 0
#define RUBY_VERSION_MAJOR 1
@@ -9,7 +9,7 @@
#define RUBY_VERSION_TEENY 0
#define RUBY_RELEASE_YEAR 2008
#define RUBY_RELEASE_MONTH 3
-#define RUBY_RELEASE_DAY 17
+#define RUBY_RELEASE_DAY 18
#ifdef RUBY_EXTERN
RUBY_EXTERN const char ruby_version[];
--
ML: ruby-changes@q...
Info: http://www.atdot.net/~ko1/quickml/