[前][次][番号順一覧][スレッド一覧]

ruby-changes:35516

From: naruse <ko1@a...>
Date: Tue, 16 Sep 2014 01:19:03 +0900 (JST)
Subject: [ruby-changes:35516] naruse:r47598 (trunk): * reg*.c: Merge Onigmo 5.15.0 38a870960aa7370051a3544

naruse	2014-09-16 01:18:41 +0900 (Tue, 16 Sep 2014)

  New Revision: 47598

  http://svn.ruby-lang.org/cgi-bin/viewvc.cgi?view=revision&revision=47598

  Log:
    * reg*.c: Merge Onigmo 5.15.0 38a870960aa7370051a3544

  Modified files:
    trunk/ChangeLog
    trunk/enc/big5.c
    trunk/enc/euc_jp.c
    trunk/enc/iso_8859_1.c
    trunk/enc/iso_8859_10.c
    trunk/enc/iso_8859_13.c
    trunk/enc/iso_8859_14.c
    trunk/enc/iso_8859_15.c
    trunk/enc/iso_8859_16.c
    trunk/enc/iso_8859_2.c
    trunk/enc/iso_8859_3.c
    trunk/enc/iso_8859_4.c
    trunk/enc/iso_8859_5.c
    trunk/enc/iso_8859_7.c
    trunk/enc/iso_8859_9.c
    trunk/enc/koi8_r.c
    trunk/enc/koi8_u.c
    trunk/enc/shift_jis.c
    trunk/enc/unicode.c
    trunk/enc/us_ascii.c
    trunk/enc/utf_16_32.h
    trunk/enc/utf_16be.c
    trunk/enc/utf_16le.c
    trunk/enc/utf_8.c
    trunk/enc/windows_1251.c
    trunk/include/ruby/oniguruma.h
    trunk/regcomp.c
    trunk/regenc.c
    trunk/regenc.h
    trunk/regexec.c
    trunk/regint.h
    trunk/regparse.c
    trunk/regparse.h
Index: regparse.c
===================================================================
--- regparse.c	(revision 47597)
+++ regparse.c	(revision 47598)
@@ -4153,17 +4153,15 @@ add_ctype_to_cc_by_range(CClassNode* cc, https://github.com/ruby/ruby/blob/trunk/regparse.c#L4153
 }
 
 static int
-add_ctype_to_cc(CClassNode* cc, int ctype, int not, int char_prop, ScanEnv* env)
+add_ctype_to_cc(CClassNode* cc, int ctype, int not, int ascii_range, ScanEnv* env)
 {
-  int maxcode, ascii_range;
+  int maxcode;
   int c, r;
   const OnigCodePoint *ranges;
   OnigCodePoint sb_out;
   OnigEncoding enc = env->enc;
   OnigOptionType option = env->option;
 
-  ascii_range = IS_ASCII_RANGE(option) && (char_prop == 0);
-
   r = ONIGENC_GET_CTYPE_CODE_RANGE(enc, ctype, &sb_out, &ranges);
   if (r == 0) {
     if (ascii_range) {
@@ -4280,31 +4278,32 @@ add_ctype_to_cc(CClassNode* cc, int ctyp https://github.com/ruby/ruby/blob/trunk/regparse.c#L4278
 }
 
 static int
-parse_posix_bracket(CClassNode* cc, UChar** src, UChar* end, ScanEnv* env)
+parse_posix_bracket(CClassNode* cc, CClassNode* asc_cc,
+		    UChar** src, UChar* end, ScanEnv* env)
 {
 #define POSIX_BRACKET_CHECK_LIMIT_LENGTH  20
 #define POSIX_BRACKET_NAME_MIN_LEN         4
 
   static const PosixBracketEntryType PBS[] = {
-    { (UChar* )"alnum",  ONIGENC_CTYPE_ALNUM,  5 },
-    { (UChar* )"alpha",  ONIGENC_CTYPE_ALPHA,  5 },
-    { (UChar* )"blank",  ONIGENC_CTYPE_BLANK,  5 },
-    { (UChar* )"cntrl",  ONIGENC_CTYPE_CNTRL,  5 },
-    { (UChar* )"digit",  ONIGENC_CTYPE_DIGIT,  5 },
-    { (UChar* )"graph",  ONIGENC_CTYPE_GRAPH,  5 },
-    { (UChar* )"lower",  ONIGENC_CTYPE_LOWER,  5 },
-    { (UChar* )"print",  ONIGENC_CTYPE_PRINT,  5 },
-    { (UChar* )"punct",  ONIGENC_CTYPE_PUNCT,  5 },
-    { (UChar* )"space",  ONIGENC_CTYPE_SPACE,  5 },
-    { (UChar* )"upper",  ONIGENC_CTYPE_UPPER,  5 },
-    { (UChar* )"xdigit", ONIGENC_CTYPE_XDIGIT, 6 },
-    { (UChar* )"ascii",  ONIGENC_CTYPE_ASCII,  5 },
-    { (UChar* )"word",   ONIGENC_CTYPE_WORD,   4 },
-    { (UChar* )NULL,     -1, 0 }
+    POSIX_BRACKET_ENTRY_INIT("alnum",  ONIGENC_CTYPE_ALNUM),
+    POSIX_BRACKET_ENTRY_INIT("alpha",  ONIGENC_CTYPE_ALPHA),
+    POSIX_BRACKET_ENTRY_INIT("blank",  ONIGENC_CTYPE_BLANK),
+    POSIX_BRACKET_ENTRY_INIT("cntrl",  ONIGENC_CTYPE_CNTRL),
+    POSIX_BRACKET_ENTRY_INIT("digit",  ONIGENC_CTYPE_DIGIT),
+    POSIX_BRACKET_ENTRY_INIT("graph",  ONIGENC_CTYPE_GRAPH),
+    POSIX_BRACKET_ENTRY_INIT("lower",  ONIGENC_CTYPE_LOWER),
+    POSIX_BRACKET_ENTRY_INIT("print",  ONIGENC_CTYPE_PRINT),
+    POSIX_BRACKET_ENTRY_INIT("punct",  ONIGENC_CTYPE_PUNCT),
+    POSIX_BRACKET_ENTRY_INIT("space",  ONIGENC_CTYPE_SPACE),
+    POSIX_BRACKET_ENTRY_INIT("upper",  ONIGENC_CTYPE_UPPER),
+    POSIX_BRACKET_ENTRY_INIT("xdigit", ONIGENC_CTYPE_XDIGIT),
+    POSIX_BRACKET_ENTRY_INIT("ascii",  ONIGENC_CTYPE_ASCII),
+    POSIX_BRACKET_ENTRY_INIT("word",   ONIGENC_CTYPE_WORD),
   };
 
   const PosixBracketEntryType *pb;
   int not, i, r;
+  int ascii_range;
   OnigCodePoint c;
   OnigEncoding enc = env->enc;
   UChar *p = *src;
@@ -4319,17 +4318,25 @@ parse_posix_bracket(CClassNode* cc, UCha https://github.com/ruby/ruby/blob/trunk/regparse.c#L4318
   if (onigenc_strlen(enc, p, end) < POSIX_BRACKET_NAME_MIN_LEN + 3)
     goto not_posix_bracket;
 
-  for (pb = PBS; IS_NOT_NULL(pb->name); pb++) {
+  ascii_range = IS_ASCII_RANGE(env->option) &&
+		  ! IS_POSIX_BRACKET_ALL_RANGE(env->option);
+  for (pb = PBS; pb < PBS + numberof(PBS); pb++) {
     if (onigenc_with_ascii_strncmp(enc, p, end, pb->name, pb->len) == 0) {
       p = (UChar* )onigenc_step(enc, p, end, pb->len);
       if (onigenc_with_ascii_strncmp(enc, p, end, (UChar* )":]", 2) != 0)
         return ONIGERR_INVALID_POSIX_BRACKET_TYPE;
 
-      r = add_ctype_to_cc(cc, pb->ctype, not,
-	    IS_POSIX_BRACKET_ALL_RANGE(env->option),
-	    env);
+      r = add_ctype_to_cc(cc, pb->ctype, not, ascii_range, env);
       if (r != 0) return r;
 
+      if (IS_NOT_NULL(asc_cc)) {
+	if (pb->ctype != ONIGENC_CTYPE_WORD &&
+	    pb->ctype != ONIGENC_CTYPE_ASCII &&
+	    !ascii_range)
+	  r = add_ctype_to_cc(asc_cc, pb->ctype, not, ascii_range, env);
+	if (r != 0) return r;
+      }
+
       PINC_S; PINC_S;
       *src = p;
       return 0;
@@ -4386,6 +4393,8 @@ fetch_char_property_to_ctype(UChar** src https://github.com/ruby/ruby/blob/trunk/regparse.c#L4393
   return r;
 }
 
+static int cclass_case_fold(Node** np, CClassNode* cc, CClassNode* asc_cc, ScanEnv* env);
+
 static int
 parse_char_property(Node** np, OnigToken* tok, UChar** src, UChar* end,
 		    ScanEnv* env)
@@ -4399,11 +4408,15 @@ parse_char_property(Node** np, OnigToken https://github.com/ruby/ruby/blob/trunk/regparse.c#L4408
   *np = node_new_cclass();
   CHECK_NULL_RETURN_MEMERR(*np);
   cc = NCCLASS(*np);
-  r = add_ctype_to_cc(cc, ctype, 0, 1, env);
+  r = add_ctype_to_cc(cc, ctype, 0, 0, env);
   if (r != 0) return r;
   if (tok->u.prop.not != 0) NCCLASS_SET_NOT(cc);
 
-  return 0;
+  if (IS_IGNORECASE(env->option)) {
+    if (ctype != ONIGENC_CTYPE_ASCII)
+      r = cclass_case_fold(np, cc, cc, env);
+  }
+  return r;
 }
 
 
@@ -4421,7 +4434,8 @@ enum CCVALTYPE { https://github.com/ruby/ruby/blob/trunk/regparse.c#L4434
 };
 
 static int
-next_state_class(CClassNode* cc, OnigCodePoint* vs, enum CCVALTYPE* type,
+next_state_class(CClassNode* cc, CClassNode* asc_cc,
+	         OnigCodePoint* vs, enum CCVALTYPE* type,
 		 enum CCSTATE* state, ScanEnv* env)
 {
   int r;
@@ -4430,11 +4444,18 @@ next_state_class(CClassNode* cc, OnigCod https://github.com/ruby/ruby/blob/trunk/regparse.c#L4444
     return ONIGERR_CHAR_CLASS_VALUE_AT_END_OF_RANGE;
 
   if (*state == CCS_VALUE && *type != CCV_CLASS) {
-    if (*type == CCV_SB)
+    if (*type == CCV_SB) {
       BITSET_SET_BIT_CHKDUP(cc->bs, (int )(*vs));
+      if (IS_NOT_NULL(asc_cc))
+	BITSET_SET_BIT(asc_cc->bs, (int )(*vs));
+    }
     else if (*type == CCV_CODE_POINT) {
       r = add_code_range(&(cc->mbuf), env, *vs, *vs);
       if (r < 0) return r;
+      if (IS_NOT_NULL(asc_cc)) {
+	r = add_code_range0(&(asc_cc->mbuf), env, *vs, *vs, 0);
+	if (r < 0) return r;
+      }
     }
   }
 
@@ -4444,7 +4465,8 @@ next_state_class(CClassNode* cc, OnigCod https://github.com/ruby/ruby/blob/trunk/regparse.c#L4465
 }
 
 static int
-next_state_val(CClassNode* cc, OnigCodePoint *vs, OnigCodePoint v,
+next_state_val(CClassNode* cc, CClassNode* asc_cc,
+	       OnigCodePoint *vs, OnigCodePoint v,
 	       int* vs_israw, int v_israw,
 	       enum CCVALTYPE intype, enum CCVALTYPE* type,
 	       enum CCSTATE* state, ScanEnv* env)
@@ -4453,11 +4475,18 @@ next_state_val(CClassNode* cc, OnigCodeP https://github.com/ruby/ruby/blob/trunk/regparse.c#L4475
 
   switch (*state) {
   case CCS_VALUE:
-    if (*type == CCV_SB)
+    if (*type == CCV_SB) {
       BITSET_SET_BIT_CHKDUP(cc->bs, (int )(*vs));
+      if (IS_NOT_NULL(asc_cc))
+	BITSET_SET_BIT(asc_cc->bs, (int )(*vs));
+    }
     else if (*type == CCV_CODE_POINT) {
       r = add_code_range(&(cc->mbuf), env, *vs, *vs);
       if (r < 0) return r;
+      if (IS_NOT_NULL(asc_cc)) {
+	r = add_code_range0(&(asc_cc->mbuf), env, *vs, *vs, 0);
+	if (r < 0) return r;
+      }
     }
     break;
 
@@ -4474,10 +4503,16 @@ next_state_val(CClassNode* cc, OnigCodeP https://github.com/ruby/ruby/blob/trunk/regparse.c#L4503
 	    return ONIGERR_EMPTY_RANGE_IN_CHAR_CLASS;
 	}
 	bitset_set_range(env, cc->bs, (int )*vs, (int )v);
+	if (IS_NOT_NULL(asc_cc))
+	  bitset_set_range(env, asc_cc->bs, (int )*vs, (int )v);
       }
       else {
 	r = add_code_range(&(cc->mbuf), env, *vs, v);
 	if (r < 0) return r;
+	if (IS_NOT_NULL(asc_cc)) {
+	  r = add_code_range0(&(asc_cc->mbuf), env, *vs, v, 0);
+	  if (r < 0) return r;
+	}
       }
     }
     else {
@@ -4493,6 +4528,11 @@ next_state_val(CClassNode* cc, OnigCodeP https://github.com/ruby/ruby/blob/trunk/regparse.c#L4528
 	bitset_set_range(env, cc->bs, (int )*vs, (int )(v < 0xff ? v : 0xff));
 	r = add_code_range(&(cc->mbuf), env, (OnigCodePoint )*vs, v);
 	if (r < 0) return r;
+	if (IS_NOT_NULL(asc_cc)) {
+	  bitset_set_range(env, asc_cc->bs, (int )*vs, (int )(v < 0xff ? v : 0xff));
+	  r = add_code_range0(&(asc_cc->mbuf), env, (OnigCodePoint )*vs, v, 0);
+	  if (r < 0) return r;
+	}
 #if 0
       }
       else
@@ -4542,22 +4582,24 @@ code_exist_check(OnigCodePoint c, UChar* https://github.com/ruby/ruby/blob/trunk/regparse.c#L4582
 }
 
 static int
-parse_char_class(Node** np, OnigToken* tok, UChar** src, UChar* end,
+parse_char_class(Node** np, Node** asc_np, OnigToken* tok, UChar** src, UChar* end,
 		 ScanEnv* env)
 {
   int r, neg, len, fetched, and_start;
   OnigCodePoint v, vs;
   UChar *p;
   Node* node;
+  Node* asc_node;
   CClassNode *cc, *prev_cc;
-  CClassNode work_cc;
+  CClassNode *asc_cc, *asc_prev_cc;
+  CClassNode work_cc, asc_work_cc;
 
   enum CCSTATE state;
   enum CCVALTYPE val_type, in_type;
   int val_israw, in_israw;
 
-  prev_cc = (CClassNode* )NULL;
-  *np = NULL_NODE;
+  prev_cc = asc_prev_cc = (CClassNode* )NULL;
+  *np = *asc_np = NULL_NODE;
   r = fetch_token_in_cc(tok, src, end, env);
   if (r == TK_CHAR && tok->u.c == '^' && tok->escaped == 0) {
     neg = 1;
@@ -4581,6 +4623,16 @@ parse_char_class(Node** np, OnigToken* t https://github.com/ruby/ruby/blob/trunk/regparse.c#L4623
   CHECK_NULL_RETURN_MEMERR(node);
   cc = NCCLASS(node);
 
+  if (IS_IGNORECASE(env->option)) {
+    *asc_np = asc_node = node_new_cclass();
+    CHECK_NULL_RETURN_MEMERR(asc_node);
+    asc_cc = NCCLASS(asc_node);
+  }
+  else {
+    asc_node = NULL_NODE;
+    asc_cc = NULL;
+  }
+
   and_start = 0;
   state = CCS_START;
   p = *src;
@@ -4671,13 +4723,13 @@ parse_char_class(Node** np, OnigToken* t https://github.com/ruby/ruby/blob/trunk/regparse.c#L4723
       }
       in_type = (len == 1 ? CCV_SB : CCV_CODE_POINT);
     val_entry2:
-      r = next_state_val(cc, &vs, v, &val_israw, in_israw, in_type, &val_type,
+      r = next_state_val(cc, asc_cc, &vs, v, &val_israw, in_israw, in_type, &val_type,
 			 &state, env);
       if (r != 0) goto err;
       break;
 
     case TK_POSIX_BRACKET_OPEN:
-      r = parse_posix_bracket(cc, &p, end, env);
+      r = parse_posix_bracket(cc, asc_cc, &p, end, env);
       if (r < 0) goto err;
       if (r == 1) {  /* is not POSIX bracket */
 	CC_ESC_WARN(env, (UChar* )"[");
@@ -4690,11 +4742,18 @@ parse_char_class(Node** np, OnigToken* t https://github.com/ruby/ruby/blob/trunk/regparse.c#L4742
       break;
 
     case TK_CHAR_TYPE:
-      r = add_ctype_to_cc(cc, tok->u.prop.ctype, tok->u.prop.not, 0, env);
+      r = add_ctype_to_cc(cc, tok->u.prop.ctype, tok->u.prop.not,
+			  IS_ASCII_RANGE(env->option), env);
       if (r != 0) return r;
+      if (IS_NOT_NULL(asc_cc)) {
+	if (tok->u.prop.ctype != ONIGENC_CTYPE_WORD)
+	  r = add_ctype_to_cc(asc_cc, tok->u.prop.ctype, tok->u.prop.not,
+			      IS_ASCII_RANGE(env->option), env);
+	if (r != 0) return r;
+      }
 
     next_class:
-      r = next_state_class(cc, &vs, &val_type, &state, env);
+      r = next_state_class(cc, asc_cc, &vs, &val_type, &state, env);
       if (r != 0) goto err;
       break;
 
@@ -4704,8 +4763,13 @@ parse_char_class(Node** np, OnigToken* t https://github.com/ruby/ruby/blob/trunk/regparse.c#L4763
 
 	ctype = fetch_char_property_to_ctype(&p, end, env);
 	if (ctype < 0) return ctype;
-	r = add_ctype_to_cc(cc, ctype, tok->u.prop.not, 1, env);
+	r = add_ctype_to_cc(cc, ctype, tok->u.prop.not, 0, env);
 	if (r != 0) return r;
+	if (IS_NOT_NULL(asc_cc)) {
+	  if (ctype != ONIGENC_CTYPE_ASCII)
+	    r = add_ctype_to_cc(asc_cc, ctype, tok->u.prop.not, 0, env);
+	  if (r != 0) return r;
+	}
 	goto next_class;
       }
       break;
@@ -4766,15 +4830,20 @@ parse_char_class(Node** np, OnigToken* t https://github.com/ruby/ruby/blob/trunk/regparse.c#L4830
 
     case TK_CC_CC_OPEN: /* [ */
       {
-	Node *anode;
+	Node *anode, *aasc_node;
 	CClassNode* acc;
 
-	r = parse_char_class(&anode, tok, &p, end, env);
+	r = parse_char_class(&anode, &aasc_node, tok, &p, end, env);
 	if (r == 0) {
 	  acc = NCCLASS(anode);
 	  r = or_cclass(cc, acc, env);
 	}
+	if (r == 0 && IS_NOT_NULL(aasc_node)) {
+	  acc = NCCLASS(aasc_node);
+	  r = or_cclass(asc_cc, acc, env);
+	}
 	onig_node_free(anode);
+	onig_node_free(aasc_node);
 	if (r != 0) goto err;
       }
       break;
@@ -4782,7 +4851,7 @@ parse_char_class(Node** np, OnigToken* t https://github.com/ruby/ruby/blob/trunk/regparse.c#L4851
     case TK_CC_AND: /* && */
       {
 	if (state == CCS_VALUE) {
-	  r = next_state_val(cc, &vs, 0, &val_israw, 0, val_type,
+	  r = next_state_val(cc, asc_cc, &vs, 0, &val_israw, 0, val_type,
 			     &val_type, &state, env);
 	  if (r != 0) goto err;
 	}
@@ -4794,12 +4863,23 @@ parse_char_class(Node** np, OnigToken* t https://github.com/ruby/ruby/blob/trunk/regparse.c#L4863
 	  r = and_cclass(prev_cc, cc, env);
 	  if (r != 0) goto err;
 	  bbuf_free(cc->mbuf);
+	  if (IS_NOT_NULL(asc_cc)) {
+	    r = and_cclass(asc_prev_cc, asc_cc, env);
+	    if (r != 0) goto err;
+	    bbuf_free(asc_cc->mbuf);
+	  }
 	}
 	else {
 	  prev_cc = cc;
 	  cc = &work_cc;
+	  if (IS_NOT_NULL(asc_cc)) {
+	    asc_prev_cc = asc_cc;
+	    asc_cc = &asc_work_cc;
+	  }
 	}
 	initialize_cclass(cc);
+	if (IS_NOT_NULL(asc_cc))
+	  initialize_cclass(asc_cc);
       }
       break;
 
@@ -4822,7 +4902,7 @@ parse_char_class(Node** np, OnigToken* t https://github.com/ruby/ruby/blob/trunk/regparse.c#L4902
   }
 
   if (state == CCS_VALUE) {
-    r = next_state_val(cc, &vs, 0, &val_israw, 0, val_type,
+    r = next_state_val(cc, asc_cc, &vs, 0, &val_israw, 0, val_type,
 		       &val_type, &state, env);
     if (r != 0) goto err;
   }
@@ -4832,12 +4912,24 @@ parse_char_class(Node** np, OnigToken* t https://github.com/ruby/ruby/blob/trunk/regparse.c#L4912
     if (r != 0) goto err;
     bbuf_free(cc->mbuf);
     cc = prev_cc;
+    if (IS_NOT_NULL(asc_cc)) {
+      r = and_cclass(asc_prev_cc, asc_cc, env);
+      if (r != 0) goto err;
+      bbuf_free(asc_cc->mbuf);
+      asc_cc = asc_prev_cc;
+    }
   }
 
-  if (neg != 0)
+  if (neg != 0) {
     NCCLASS_SET_NOT(cc);
-  else
+    if (IS_NOT_NULL(asc_cc))
+      NCCLASS_SET_NOT(asc_cc);
+  }
+  else {
     NCCLASS_CLEAR_NOT(cc);
+    if (IS_NOT_NULL(asc_cc))
+      NCCLASS_CLEAR_NOT(asc_cc);
+  }
   if (IS_NCCLASS_NOT(cc) &&
       IS_SYNTAX_BV(env->syntax, ONIG_SYN_NOT_NEWLINE_IN_NEGATIVE_CC)) {
     int is_empty;
@@ -4865,6 +4957,8 @@ parse_char_class(Node** np, OnigToken* t https://github.com/ruby/ruby/blob/trunk/regparse.c#L4957
  err:
   if (cc != NCCLASS(*np))
     bbuf_free(cc->mbuf);
+  if (IS_NOT_NULL(asc_cc) && (asc_cc != NCCLASS(*asc_np)))
+    bbuf_free(asc_cc->mbuf);
   return r;
 }
 
@@ -5489,6 +5583,7 @@ clear_not_flag_cclass(CClassNode* cc, On https://github.com/ruby/ruby/blob/trunk/regparse.c#L5583
 typedef struct {
   ScanEnv*    env;
   CClassNode* cc;
+  CClassNode* asc_cc;
   Node*       alt_root;
   Node**      ptail;
 } IApplyCaseFoldArg;
@@ -5500,37 +5595,57 @@ i_apply_case_fold(OnigCodePoint from, On https://github.com/ruby/ruby/blob/trunk/regparse.c#L5595
   IApplyCaseFoldArg* iarg;
   ScanEnv* env;
   CClassNode* cc;
+  CClassNode* asc_cc;
   BitSetRef bs;
+  int add_flag;
 
   iarg = (IApplyCaseFoldArg* )arg;
   env = iarg->env;
   cc  = iarg->cc;
+  asc_cc = iarg->asc_cc;
   bs = cc->bs;
 
+  if (IS_NULL(asc_cc)) {
+    add_flag = 0;
+  }
+  else if (ONIGENC_IS_ASCII_CODE(from) == ONIGENC_IS_ASCII_CODE(*to)) {
+    add_flag = 1;
+  }
+  else {
+    add_flag = onig_is_code_in_cc(env->enc, from, asc_cc);
+    if (IS_NCCLASS_NOT(asc_cc))
+      add_flag = !add_flag;
+  }
+
   if (to_len == 1) {
     int is_in = onig_is_code_in_cc(env->enc, from, cc);
 #ifdef CASE_FOLD_IS_APPLIED_INSIDE_NEGATIVE_CCLASS
     if ((is_in != 0 && !IS_NCCLASS_NOT(cc)) ||
 	(is_in == 0 &&  IS_NCCLASS_NOT(cc))) {
-      if (ONIGENC_MBC_MINLEN(env->enc) > 1 || *to >= SINGLE_BYTE_SIZE) {
-	add_code_range0(&(cc->mbuf), env, *to, *to, 0);
-      }
-      else {
-	BITSET_SET_BIT(bs, *to);
+      if (add_flag) {
+	if (ONIGENC_MBC_MINLEN(env->enc) > 1 || *to >= SINGLE_BYTE_SIZE) {
+	  add_code_range0(&(cc->mbuf), env, *to, *to, 0);
+	}
+	else {
+	  BITSET_SET_BIT(bs, *to);
+	}
       }
     }
 #else
     if (is_in != 0) {
-      if (ONIGENC_MBC_MINLEN(env->enc) > 1 || *to >= SINGLE_BYTE_SIZE) {
-	if (IS_NCCLASS_NOT(cc)) clear_not_flag_cclass(cc, env->enc);
-	add_code_range0(&(cc->mbuf), env, *to, *to, 0);
-      }
-      else {
-	if (IS_NCCLASS_NOT(cc)) {
-	  BITSET_CLEAR_BIT(bs, *to);
+      if (add_flag) {
+	if (ONIGENC_MBC_MINLEN(env->enc) > 1 || *to >= SINGLE_BYTE_SIZE) {
+	  if (IS_NCCLASS_NOT(cc)) clear_not_flag_cclass(cc, env->enc);
+	  add_code_range0(&(cc->mbuf), env, *to, *to, 0);
+	}
+	else {
+	  if (IS_NCCLASS_NOT(cc)) {
+	    BITSET_CLEAR_BIT(bs, *to);
+	  }
+	  else {
+	    BITSET_SET_BIT(bs, *to);
+	  }
 	}
-	else
-	  BITSET_SET_BIT(bs, *to);
       }
     }
 #endif /* CASE_FOLD_IS_APPLIED_INSIDE_NEGATIVE_CCLASS */
@@ -5574,6 +5689,35 @@ i_apply_case_fold(OnigCodePoint from, On https://github.com/ruby/ruby/blob/trunk/regparse.c#L5689
 }
 
 static int
+cclass_case_fold(Node** np, CClassNode* cc, CClassNode* asc_cc, ScanEnv* env)
+{
+  int r;
+  IApplyCaseFoldArg iarg;
+
+  iarg.env         = env;
+  iarg.cc          = cc;
+  iarg.asc_cc      = asc_cc;
+  iarg.alt_root    = NULL_NODE;
+  iarg.ptail       = &(iarg.alt_root);
+
+  r = ONIGENC_APPLY_ALL_CASE_FOLD(env->enc, env->case_fold_flag,
+				  i_apply_case_fold, &iarg);
+  if (r != 0) {
+    onig_node_free(iarg.alt_root);
+    return r;
+  }
+  if (IS_NOT_NULL(iarg.alt_root)) {
+    Node* work = onig_node_new_alt(*np, iarg.alt_root);
+    if (IS_NULL(work)) {
+      onig_node_free(iarg.alt_root);
+      return ONIGERR_MEMORY;
+    }
+    *np = work;
+  }
+  return r;
+}
+
+static int
 node_linebreak(Node** np, ScanEnv* env)
 {
   /* same as (?>\x0D\x0A|[\x0A-\x0D\x{85}\x{2028}\x{2029}]) */
@@ -5658,7 +5802,7 @@ node_extended_grapheme_cluster(Node** np https://github.com/ruby/ruby/blob/trunk/regparse.c#L5802
       np1 = node_new_cclass();
       if (IS_NULL(np1)) goto err;
       cc1 = NCCLASS(np1);
-      r = add_ctype_to_cc(cc1, ctype, 0, 1, env);
+      r = add_ctype_to_cc(cc1, ctype, 0, 0, env);
       if (r != 0) goto err;
       NCCLASS_SET_NOT(cc1);
 
@@ -5666,7 +5810,7 @@ node_extended_grapheme_cluster(Node** np https://github.com/ruby/ruby/blob/trunk/regparse.c#L5810
       np2 = node_new_cclass();
       if (IS_NULL(np2)) goto err;
       cc2 = NCCLASS(np2);
-      r = add_ctype_to_cc(cc2, ctype, 0, 1, env);
+      r = add_ctype_to_cc(cc2, ctype, 0, 0, env);
       if (r != 0) goto err;
 
       qn = node_new_quantifier(0, REPEAT_INFINITE, 0);
@@ -6013,7 +6157,8 @@ parse_exp(Node** np, OnigToken* tok, int https://github.com/ruby/ruby/blob/trunk/regparse.c#L6157
             *np = node_new_cclass();
             CHECK_NULL_RETURN_MEMERR(*np);
             cc = NCCLASS(*np);
-            r = add_ctype_to_cc(cc, tok->u.prop.ctype, 0, 0, env);
+            r = add_ctype_to_cc(cc, tok->u.prop.ctype, 0,
+			    IS_ASCII_RANGE(env->option), env);
 	    if (r != 0) return r;
             if (tok->u.prop.not != 0) NCCLASS_SET_NOT(cc);
 #ifdef USE_SHARED_CCLASS_TABLE
@@ -6036,15 +6181,20 @@ parse_exp(Node** np, OnigToken* tok, int https://github.com/ruby/ruby/blob/trunk/regparse.c#L6181
 
   case TK_CC_OPEN:
     {
+      Node *asc_node;
       CClassNode* cc;
       OnigCodePoint code;
 
-      r = parse_char_class(np, tok, src, end, env);
-      if (r != 0) return r;
+      r = parse_char_class(np, &asc_node, tok, src, end, env);
+      if (r != 0) {
+	onig_node_free(asc_node);
+	return r;
+      }
 
       cc = NCCLASS(*np);
       if (is_onechar_cclass(cc, &code)) {
 	onig_node_free(*np);
+	onig_node_free(asc_node);
 	*np = node_new_empty();
 	CHECK_NULL_RETURN_MEMERR(*np);
 	r = node_str_cat_codepoint(*np, env->enc, code);
@@ -6052,28 +6202,13 @@ parse_exp(Node** np, OnigToken* tok, int https://github.com/ruby/ruby/blob/trunk/regparse.c# (... truncated)

--
ML: ruby-changes@q...
Info: http://www.atdot.net/~ko1/quickml/

[前][次][番号順一覧][スレッド一覧]