ruby-changes:4302
From: ko1@a...
Date: Tue, 18 Mar 2008 04:04:48 +0900 (JST)
Subject: [ruby-changes:4302] naruse - Ruby:r15792 (trunk): * re.c (rb_memsearch_ss): simple shift search.
naruse 2008-03-18 04:04:29 +0900 (Tue, 18 Mar 2008) New Revision: 15792 Modified files: trunk/ChangeLog trunk/common.mk trunk/include/ruby/encoding.h trunk/include/ruby/intern.h trunk/re.c trunk/string.c trunk/version.h Log: * re.c (rb_memsearch_ss): simple shift search. * re.c (rb_memsearch_qs): quick search. * re.c (rb_memsearch_qs_utf8): quick search for UTF-8 string. * re.c (rb_memsearch_qs_utf8_hash): hash functions for above. * re.c (rb_memsearch): use above functions. * string.c (rb_str_index): give enc to rb_memsearch. * include/ruby/intern.h (rb_memsearch): move to encoding.h. * include/ruby/encoding.h (rb_memsearch): move from intern.h. * common.mk (PREP): add dependency. http://svn.ruby-lang.org/cgi-bin/viewvc.cgi/trunk/version.h?r1=15792&r2=15791&diff_format=u http://svn.ruby-lang.org/cgi-bin/viewvc.cgi/trunk/string.c?r1=15792&r2=15791&diff_format=u http://svn.ruby-lang.org/cgi-bin/viewvc.cgi/trunk/ChangeLog?r1=15792&r2=15791&diff_format=u http://svn.ruby-lang.org/cgi-bin/viewvc.cgi/trunk/include/ruby/encoding.h?r1=15792&r2=15791&diff_format=u http://svn.ruby-lang.org/cgi-bin/viewvc.cgi/trunk/re.c?r1=15792&r2=15791&diff_format=u http://svn.ruby-lang.org/cgi-bin/viewvc.cgi/trunk/include/ruby/intern.h?r1=15792&r2=15791&diff_format=u http://svn.ruby-lang.org/cgi-bin/viewvc.cgi/trunk/common.mk?r1=15792&r2=15791&diff_format=u Index: include/ruby/intern.h =================================================================== --- include/ruby/intern.h (revision 15791) +++ include/ruby/intern.h (revision 15792) @@ -466,7 +466,6 @@ /* re.c */ #define rb_memcmp memcmp int rb_memcicmp(const void*,const void*,long); -long rb_memsearch(const void*,long,const void*,long); VALUE rb_reg_nth_defined(int, VALUE); VALUE rb_reg_nth_match(int, VALUE); VALUE rb_reg_last_match(VALUE); Index: include/ruby/encoding.h =================================================================== --- include/ruby/encoding.h (revision 15791) +++ include/ruby/encoding.h (revision 15792) @@ -176,5 +176,6 @@ VALUE rb_enc_default_external(void); void rb_enc_set_default_external(VALUE encoding); VALUE rb_locale_charmap(VALUE klass); +long rb_memsearch(const void*,long,const void*,long,rb_encoding*); #endif /* RUBY_ENCODING_H */ Index: re.c =================================================================== --- re.c (revision 15791) +++ re.c (revision 15792) @@ -95,43 +95,147 @@ return memcmp(p1, p2, len); } -long -rb_memsearch(const void *x0, long m, const void *y0, long n) +static inline long +rb_memsearch_ss(const unsigned char *xs, long m, const unsigned char *ys, long n) { - const unsigned char *x = x0, *y = y0; - const unsigned char *s, *e; - long i; - int d; - unsigned long hx, hy; + const unsigned char *x = xs, *xe = xs + m; + const unsigned char *y = ys, *ye = ys + n; +#ifndef VALUE_MAX +# if SIZEOF_VALUE == 8 +# define VALUE_MAX 0xFFFFFFFFFFFFFFFFULL +# elif SIZEOF_VALUE == 4 +# define VALUE_MAX 0xFFFFFFFFUL +# endif +#endif + VALUE hx, hy, mask = VALUE_MAX >> ((SIZEOF_VALUE - m) * CHAR_BIT); -#define KR_REHASH(a, b, h) (((h) << 1) - (((unsigned long)(a))<<d) + (b)) + if (m > SIZEOF_VALUE) + rb_bug("!!too long pattern string!!"); - if (m > n) return -1; - s = y; e = s + n - m; + /* Prepare hash value */ + for (hx = *x++, hy = *y++; x < xe; ++x, ++y) { + hx <<= CHAR_BIT; + hy <<= CHAR_BIT; + hx |= *x; + hy |= *y; + } + /* Searching */ + while (hx != hy) { + if (y == ye) + return -1; + hy <<= CHAR_BIT; + hy |= *y; + hy &= mask; + y++; + } + return y - ys - m; +} +static inline long +rb_memsearch_qs(const unsigned char *xs, long m, const unsigned char *ys, long n) +{ + const unsigned char *x = xs, *xe = xs + m; + const unsigned char *y = ys, *ye = ys + n; + VALUE i, qstable[256]; + /* Preprocessing */ - /* computes d = 2^(m-1) with - the left-shift operator */ - d = sizeof(hx) * CHAR_BIT - 1; - if (d > m) d = m; + for (i = 0; i < 256; ++i) + qstable[i] = m + 1; + for (; x < xe; ++x) + qstable[*x] = xe - x; + /* Searching */ + for (; y < ye; y += *(qstable + y[m])) { + if (*xs == *y && memcmp(xs, y, m) == 0) + return y - ys; + } + return -1; +} - if (n == m) { - return memcmp(x, s, m) == 0 ? 0 : -1; +static inline unsigned int +rb_memsearch_qs_utf8_hash(const unsigned char *x) +{ + register const unsigned int mix = 8353; + register unsigned int h = *x; + if (h < 0xC0) { + return h + 256; } - /* Prepare hash value */ - for (hy = hx = i = 0; i < d; ++i) { - hx = KR_REHASH(0, x[i], hx); - hy = KR_REHASH(0, s[i], hy); + else if (h < 0xE0) { + h *= mix; + h += x[1]; } + else if (h < 0xF0) { + h *= mix; + h += x[1]; + h *= mix; + h += x[2]; + } + else if (h < 0xF5) { + h *= mix; + h += x[1]; + h *= mix; + h += x[2]; + h *= mix; + h += x[3]; + } + else { + return h + 256; + } + return (unsigned char)h; +} + +static inline long +rb_memsearch_qs_utf8(const unsigned char *xs, long m, const unsigned char *ys, long n) +{ + const unsigned char *x = xs, *xe = xs + m; + const unsigned char *y = ys, *ye = ys + n; + VALUE i, qstable[512]; + + /* Preprocessing */ + for (i = 0; i < 512; ++i) { + qstable[i] = m + 1; + } + for (; x < xe; ++x) { + qstable[rb_memsearch_qs_utf8_hash(x)] = xe - x; + } /* Searching */ - while (hx != hy || memcmp(x, s, m)) { - if (s >= e) return -1; - hy = KR_REHASH(*s, *(s+d), hy); - s++; + for (; y < ye; y += qstable[rb_memsearch_qs_utf8_hash(y+m)]) { + if (*xs == *y && memcmp(xs, y, m) == 0) + return y - ys; } - return s-y; + return -1; } +long +rb_memsearch(const void *x0, long m, const void *y0, long n, rb_encoding *enc) +{ + const unsigned char *x = x0, *y = y0; + + if (m > n) return -1; + else if (m == n) { + return memcmp(x0, y0, m) == 0 ? 0 : -1; + } + else if (m < 1) { + return 0; + } + else if (m == 1) { + const unsigned char *ys = y, *ye = ys + n; + for (; y < ye; ++y) { + if (*x == *y) + return y - ys; + } + return -1; + } + else if (m <= SIZEOF_VALUE) { + return rb_memsearch_ss(x0, m, y0, n); + } + else if (enc == rb_utf8_encoding()){ + return rb_memsearch_qs_utf8(x0, m, y0, n); + } + else { + return rb_memsearch_qs(x0, m, y0, n); + } +} + #define REG_LITERAL FL_USER5 #define REG_ENCODING_NONE FL_USER6 Index: ChangeLog =================================================================== --- ChangeLog (revision 15791) +++ ChangeLog (revision 15792) @@ -1,3 +1,23 @@ +Tue Mar 18 04:00:27 2008 NARUSE, Yui <naruse@r...> + + * re.c (rb_memsearch_ss): simple shift search. + + * re.c (rb_memsearch_qs): quick search. + + * re.c (rb_memsearch_qs_utf8): quick search for UTF-8 string. + + * re.c (rb_memsearch_qs_utf8_hash): hash functions for above. + + * re.c (rb_memsearch): use above functions. + + * string.c (rb_str_index): give enc to rb_memsearch. + + * include/ruby/intern.h (rb_memsearch): move to encoding.h. + + * include/ruby/encoding.h (rb_memsearch): move from intern.h. + + * common.mk (PREP): add dependency. + Mon Mar 17 22:23:54 2008 Yusuke Endoh <mame@t...> * array.c (rb_ary_take, rb_ary_take_while, rb_ary_drop, Index: string.c =================================================================== --- string.c (revision 15791) +++ string.c (revision 15792) @@ -2088,7 +2088,7 @@ len = RSTRING_LEN(str) - offset; for (;;) { char *t; - pos = rb_memsearch(sptr, slen, s, len); + pos = rb_memsearch(sptr, slen, s, len, enc); if (pos < 0) return pos; t = rb_enc_right_char_head(s, s+pos, enc); if (t == s + pos) break; Index: common.mk =================================================================== --- common.mk (revision 15791) +++ common.mk (revision 15792) @@ -110,6 +110,8 @@ @$(MINIRUBY) $(srcdir)/ext/extmk.rb --make="$(MAKE)" $(EXTMK_ARGS) prog: $(PROGRAM) $(WPROGRAM) +$(PREP): $(MKFILES) + miniruby$(EXEEXT): config.status $(NORMALMAINOBJ) $(MINIOBJS) $(COMMONOBJS) $(DMYEXT) $(ARCHFILE) GORUBY = go$(RUBY_INSTALL_NAME) Index: version.h =================================================================== --- version.h (revision 15791) +++ version.h (revision 15792) @@ -1,7 +1,7 @@ #define RUBY_VERSION "1.9.0" -#define RUBY_RELEASE_DATE "2008-03-17" +#define RUBY_RELEASE_DATE "2008-03-18" #define RUBY_VERSION_CODE 190 -#define RUBY_RELEASE_CODE 20080317 +#define RUBY_RELEASE_CODE 20080318 #define RUBY_PATCHLEVEL 0 #define RUBY_VERSION_MAJOR 1 @@ -9,7 +9,7 @@ #define RUBY_VERSION_TEENY 0 #define RUBY_RELEASE_YEAR 2008 #define RUBY_RELEASE_MONTH 3 -#define RUBY_RELEASE_DAY 17 +#define RUBY_RELEASE_DAY 18 #ifdef RUBY_EXTERN RUBY_EXTERN const char ruby_version[]; -- ML: ruby-changes@q... Info: http://www.atdot.net/~ko1/quickml/