[前][次][番号順一覧][スレッド一覧]

ruby-changes:14127

From: yugui <ko1@a...>
Date: Fri, 27 Nov 2009 11:54:29 +0900 (JST)
Subject: [ruby-changes:14127] Ruby:r25941 (ruby_1_9_1): merges r24544 from trunk into ruby_1_9_1.

yugui	2009-11-27 11:54:10 +0900 (Fri, 27 Nov 2009)

  New Revision: 25941

  http://svn.ruby-lang.org/cgi-bin/viewvc.cgi?view=rev&revision=25941

  Log:
    merges r24544 from trunk into ruby_1_9_1.
    --
    \d, \s and \w are now non Unicode class. [ruby-dev:39026]
    
    * include/ruby/oniguruma.h
      (ONIGENC_CTYPE_SPECIAL_MASK): added.
      (ONIGENC_CTYPE_D): ditto.
      (ONIGENC_CTYPE_S): ditto.
      (ONIGENC_CTYPE_W): ditto.
    
    * regparse.c: \d, \s and \w are now non Unicode class.
      [ruby-dev:39026]
      (fetch_token_in_cc): use ONIGENC_CTYPE_[DSW] for \d/\s/\w.
      (fetch_token): ditto.
      (add_ctype_to_cc): add routines for ONIGENC_CTYPE_[DSW].
      (parse_exp): ditto.
    
    * test/ruby/test_regexp.rb (TestRegexp#test_char_class):
      add tests for above.

  Modified files:
    branches/ruby_1_9_1/ChangeLog
    branches/ruby_1_9_1/include/ruby/oniguruma.h
    branches/ruby_1_9_1/regparse.c
    branches/ruby_1_9_1/test/ruby/test_regexp.rb
    branches/ruby_1_9_1/version.h

Index: ruby_1_9_1/regparse.c
===================================================================
--- ruby_1_9_1/regparse.c	(revision 25940)
+++ ruby_1_9_1/regparse.c	(revision 25941)
@@ -2946,32 +2946,32 @@
     switch (c) {
     case 'w':
       tok->type = TK_CHAR_TYPE;
-      tok->u.prop.ctype = ONIGENC_CTYPE_WORD;
+      tok->u.prop.ctype = ONIGENC_CTYPE_W;
       tok->u.prop.not   = 0;
       break;
     case 'W':
       tok->type = TK_CHAR_TYPE;
-      tok->u.prop.ctype = ONIGENC_CTYPE_WORD;
+      tok->u.prop.ctype = ONIGENC_CTYPE_W;
       tok->u.prop.not   = 1;
       break;
     case 'd':
       tok->type = TK_CHAR_TYPE;
-      tok->u.prop.ctype = ONIGENC_CTYPE_DIGIT;
+      tok->u.prop.ctype = ONIGENC_CTYPE_D;
       tok->u.prop.not   = 0;
       break;
     case 'D':
       tok->type = TK_CHAR_TYPE;
-      tok->u.prop.ctype = ONIGENC_CTYPE_DIGIT;
+      tok->u.prop.ctype = ONIGENC_CTYPE_D;
       tok->u.prop.not   = 1;
       break;
     case 's':
       tok->type = TK_CHAR_TYPE;
-      tok->u.prop.ctype = ONIGENC_CTYPE_SPACE;
+      tok->u.prop.ctype = ONIGENC_CTYPE_S;
       tok->u.prop.not   = 0;
       break;
     case 'S':
       tok->type = TK_CHAR_TYPE;
-      tok->u.prop.ctype = ONIGENC_CTYPE_SPACE;
+      tok->u.prop.ctype = ONIGENC_CTYPE_S;
       tok->u.prop.not   = 1;
       break;
     case 'h':
@@ -3233,14 +3233,14 @@
     case 'w':
       if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_W_WORD)) break;
       tok->type = TK_CHAR_TYPE;
-      tok->u.prop.ctype = ONIGENC_CTYPE_WORD;
+      tok->u.prop.ctype = ONIGENC_CTYPE_W;
       tok->u.prop.not   = 0;
       break;
 
     case 'W':
       if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_W_WORD)) break;
       tok->type = TK_CHAR_TYPE;
-      tok->u.prop.ctype = ONIGENC_CTYPE_WORD;
+      tok->u.prop.ctype = ONIGENC_CTYPE_W;
       tok->u.prop.not   = 1;
       break;
 
@@ -3273,28 +3273,28 @@
     case 's':
       if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_S_WHITE_SPACE)) break;
       tok->type = TK_CHAR_TYPE;
-      tok->u.prop.ctype = ONIGENC_CTYPE_SPACE;
+      tok->u.prop.ctype = ONIGENC_CTYPE_S;
       tok->u.prop.not   = 0;
       break;
 
     case 'S':
       if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_S_WHITE_SPACE)) break;
       tok->type = TK_CHAR_TYPE;
-      tok->u.prop.ctype = ONIGENC_CTYPE_SPACE;
+      tok->u.prop.ctype = ONIGENC_CTYPE_S;
       tok->u.prop.not   = 1;
       break;
 
     case 'd':
       if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_D_DIGIT)) break;
       tok->type = TK_CHAR_TYPE;
-      tok->u.prop.ctype = ONIGENC_CTYPE_DIGIT;
+      tok->u.prop.ctype = ONIGENC_CTYPE_D;
       tok->u.prop.not   = 0;
       break;
 
     case 'D':
       if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_D_DIGIT)) break;
       tok->type = TK_CHAR_TYPE;
-      tok->u.prop.ctype = ONIGENC_CTYPE_DIGIT;
+      tok->u.prop.ctype = ONIGENC_CTYPE_D;
       tok->u.prop.not   = 1;
       break;
 
@@ -3835,6 +3835,28 @@
   OnigCodePoint sb_out;
   OnigEncoding enc = env->enc;
 
+  switch (ctype) {
+  case ONIGENC_CTYPE_D:
+  case ONIGENC_CTYPE_S:
+  case ONIGENC_CTYPE_W:
+    ctype ^= ONIGENC_CTYPE_SPECIAL_MASK;
+    if (not != 0) {
+      for (c = 0; c < SINGLE_BYTE_SIZE; c++) {
+	if (! ONIGENC_IS_ASCII_CODE_CTYPE((OnigCodePoint )c, ctype))
+	  BITSET_SET_BIT(cc->bs, c);
+      }
+      ADD_ALL_MULTI_BYTE_RANGE(enc, cc->mbuf);
+    }
+    else {
+      for (c = 0; c < SINGLE_BYTE_SIZE; c++) {
+	if (ONIGENC_IS_ASCII_CODE_CTYPE((OnigCodePoint )c, ctype))
+	  BITSET_SET_BIT(cc->bs, c);
+      }
+    }
+    return 0;
+    break;
+  }
+
   r = ONIGENC_GET_CTYPE_CODE_RANGE(enc, ctype, &sb_out, &ranges);
   if (r == 0) {
     return add_ctype_to_cc_by_range(cc, ctype, not, env->enc, sb_out, ranges);
@@ -5186,6 +5208,19 @@
   case TK_CHAR_TYPE:
     {
       switch (tok->u.prop.ctype) {
+      case ONIGENC_CTYPE_D:
+      case ONIGENC_CTYPE_S:
+      case ONIGENC_CTYPE_W:
+	{
+	    CClassNode* cc;
+	    *np = node_new_cclass();
+	    CHECK_NULL_RETURN_MEMERR(*np);
+	    cc = NCCLASS(*np);
+	    add_ctype_to_cc(cc, tok->u.prop.ctype, 0, env);
+	    if (tok->u.prop.not != 0) NCCLASS_SET_NOT(cc);
+	}
+	break;
+
       case ONIGENC_CTYPE_WORD:
 	*np = node_new_ctype(tok->u.prop.ctype, tok->u.prop.not);
 	CHECK_NULL_RETURN_MEMERR(*np);
Index: ruby_1_9_1/include/ruby/oniguruma.h
===================================================================
--- ruby_1_9_1/include/ruby/oniguruma.h	(revision 25940)
+++ ruby_1_9_1/include/ruby/oniguruma.h	(revision 25941)
@@ -200,6 +200,14 @@
 #define ONIGENC_CTYPE_ALNUM    13  /* alpha || digit */
 #define ONIGENC_CTYPE_ASCII    14
 #define ONIGENC_MAX_STD_CTYPE  ONIGENC_CTYPE_ASCII
+#define ONIGENC_CTYPE_SPECIAL_MASK        128
+#define ONIGENC_CTYPE_S            /* [\t\n\v\f\r\s] */ \
+    ONIGENC_CTYPE_SPECIAL_MASK | ONIGENC_CTYPE_SPACE
+#define ONIGENC_CTYPE_D            /* [0-9] */ \
+    ONIGENC_CTYPE_SPECIAL_MASK | ONIGENC_CTYPE_DIGIT
+#define ONIGENC_CTYPE_W            /* [0-9A-Za-z_] */ \
+    ONIGENC_CTYPE_SPECIAL_MASK | ONIGENC_CTYPE_WORD
+#define ONIGENC_CTYPE_SPECIAL_P(ctype) ((ctype) & ONIGENC_CTYPE_SPECIAL_MASK)
 
 
 #define onig_enc_len(enc,p,e)                ONIGENC_MBC_ENC_LEN(enc, p, e)
Index: ruby_1_9_1/ChangeLog
===================================================================
--- ruby_1_9_1/ChangeLog	(revision 25940)
+++ ruby_1_9_1/ChangeLog	(revision 25941)
@@ -1,3 +1,21 @@
+Sun Aug 16 00:30:33 2009  NARUSE, Yui  <naruse@r...>
+
+	* include/ruby/oniguruma.h
+	  (ONIGENC_CTYPE_SPECIAL_MASK): added.
+	  (ONIGENC_CTYPE_D): ditto.
+	  (ONIGENC_CTYPE_S): ditto.
+	  (ONIGENC_CTYPE_W): ditto.
+
+	* regparse.c: \d, \s and \w are now non Unicode class.
+	  [ruby-dev:39026]
+	  (fetch_token_in_cc): use ONIGENC_CTYPE_[DSW] for \d/\s/\w.
+	  (fetch_token): ditto.
+	  (add_ctype_to_cc): add routines for ONIGENC_CTYPE_[DSW].
+	  (parse_exp): ditto.
+
+	* test/ruby/test_regexp.rb (TestRegexp#test_char_class):
+	  add tests for above.
+
 Fri Sep 18 16:15:04 2009  Nobuyoshi Nakada  <nobu@r...>
 
 	* compile.c (iseq_compile_each), parse.y (stmt, arg): arg_concat()
Index: ruby_1_9_1/version.h
===================================================================
--- ruby_1_9_1/version.h	(revision 25940)
+++ ruby_1_9_1/version.h	(revision 25941)
@@ -1,5 +1,5 @@
 #define RUBY_VERSION "1.9.1"
-#define RUBY_PATCHLEVEL 342
+#define RUBY_PATCHLEVEL 343
 #define RUBY_VERSION_MAJOR 1
 #define RUBY_VERSION_MINOR 9
 #define RUBY_VERSION_TEENY 1
Index: ruby_1_9_1/test/ruby/test_regexp.rb
===================================================================
--- ruby_1_9_1/test/ruby/test_regexp.rb	(revision 25940)
+++ ruby_1_9_1/test/ruby/test_regexp.rb	(revision 25941)
@@ -659,6 +659,13 @@
     check(/\A[[^b-c]&&[^e]&&a-f]\z/, %w(a d f), %w(b c e g 0))
     check(/\A[\n\r\t]\z/, ["\n", "\r", "\t"])
     failcheck('[9-1]')
+
+    assert_match(/\A\d+\z/, "0123456789")
+    assert_no_match(/\d/, "\uff10\uff11\uff12\uff13\uff14\uff15\uff16\uff17\uff18\uff19")
+    assert_match(/\A\w+\z/, "09azAZ_")
+    assert_no_match(/\w/, "\uff10\uff19\uff41\uff5a\uff21\uff3a")
+    assert_match(/\A\s+\z/, "\r\n\v\f\r\s")
+    assert_no_match(/\s/, "\u0085")
   end
 
   def test_posix_bracket

--
ML: ruby-changes@q...
Info: http://www.atdot.net/~ko1/quickml/

[前][次][番号順一覧][スレッド一覧]