[前][次][番号順一覧][スレッド一覧]

ruby-changes:2252

From: ko1@a...
Date: 19 Oct 2007 16:41:46 +0900
Subject: [ruby-changes:2252] nobu - Ruby:r13743 (trunk): * parse.y (parser_regx_options, reg_compile_gen): relaxened encoding

nobu	2007-10-19 16:41:03 +0900 (Fri, 19 Oct 2007)

  New Revision: 13743

  Modified files:
    trunk/ChangeLog
    trunk/parse.y
    trunk/re.c

  Log:
    * parse.y (parser_regx_options, reg_compile_gen): relaxened encoding
      matching rule.
    
    * re.c (rb_reg_initialize): always set encoding of Regexp.
    
    * re.c (rb_reg_initialize_str): fix enconding for non 7bit-clean
      strings.
    
    * re.c (rb_reg_initialize_m): use ascii encoding for 'n' option.


  http://svn.ruby-lang.org/cgi-bin/viewvc.cgi/trunk/parse.y?r1=13743&r2=13742
  http://svn.ruby-lang.org/cgi-bin/viewvc.cgi/trunk/ChangeLog?r1=13743&r2=13742
  http://svn.ruby-lang.org/cgi-bin/viewvc.cgi/trunk/re.c?r1=13743&r2=13742

Index: re.c
===================================================================
--- re.c	(revision 13742)
+++ re.c	(revision 13743)
@@ -136,8 +136,11 @@
 
 #define KCODE_FIXED FL_USER4
 
-#define ARG_REG_OPTION_MASK   0x0f
-#define ARG_KCODE_NONE	      0x10
+#define ARG_REG_OPTION_MASK \
+    (ONIG_OPTION_IGNORECASE|ONIG_OPTION_MULTILINE|ONIG_OPTION_EXTEND)
+#define ARG_ENCODING_FIXED    16
+
+#define ARG_KCODE_NONE	      0
 #define ARG_KCODE_EUC 	      1
 #define ARG_KCODE_SJIS	      2
 #define ARG_KCODE_UTF8	      3
@@ -157,9 +160,6 @@
       case 'm':
 	val = ONIG_OPTION_MULTILINE;
 	break;
-      case 'n':
-	val = ARG_KCODE_NONE;
-	break;
       default:
 	val = 0;
 	break;
@@ -184,19 +184,24 @@
     *option = 0;
 
     switch (c) {
+      case 'n':
+	*kcode = ARG_KCODE_NONE;
+	break;
       case 'e':
 	*kcode = ARG_KCODE_EUC;
-	return 1;
+	break;
       case 's':
 	*kcode = ARG_KCODE_SJIS;
-	return 1;
+	break;
       case 'u':
 	*kcode = ARG_KCODE_UTF8;
-	return 1;
+	break;
       default:
-	*kcode  = 0;
+	*kcode = -1;
 	return (*option = char_to_option(c));
     }
+    *option = ARG_ENCODING_FIXED;
+    return 1;
 }
 
 static void
@@ -1227,14 +1232,10 @@
     re->ptr = 0;
     re->str = 0;
 
-    if (options & ARG_KCODE_NONE) {
-	rb_enc_associate_index((VALUE)re, 0);
-	enc = rb_enc_from_index(0);
+    rb_enc_associate((VALUE)re, enc);
+    if (options & ARG_ENCODING_FIXED) {
 	re->basic.flags |= KCODE_FIXED;
     }
-    else {
-	rb_enc_associate((VALUE)re, enc);
-    }
     re->ptr = make_regexp(s, len, enc, options & ARG_REG_OPTION_MASK, err);
     if (!re->ptr) return -1;
     re->str = ALLOC_N(char, len+1);
@@ -1247,6 +1248,9 @@
 static int
 rb_reg_initialize_str(VALUE obj, VALUE str, int options, onig_errmsg_buffer err)
 {
+    if (rb_enc_str_coderange(str) != ENC_CODERANGE_SINGLE) {
+	options |= ARG_ENCODING_FIXED;
+    }
     return rb_reg_initialize(obj, RSTRING_PTR(str), RSTRING_LEN(str), rb_enc_get(str),
 			     options, err);
 }
@@ -1573,21 +1577,21 @@
     onig_errmsg_buffer err;
     int flags = 0;
     VALUE str;
+    rb_encoding *enc;
+    const char *ptr;
+    long len;
 
     if (argc == 0 || argc > 3) {
 	rb_raise(rb_eArgError, "wrong number of arguments");
     }
     if (TYPE(argv[0]) == T_REGEXP) {
 	VALUE re = argv[0];
-	const char *ptr;
-	long len;
-	rb_encoding *enc;
 
 	if (argc > 1) {
 	    rb_warn("flags ignored");
 	}
 	rb_reg_check(re);
-	flags = RREGEXP(argv[0])->ptr->options & ARG_REG_OPTION_MASK;
+	flags = rb_reg_options(re);
 	ptr = RREGEXP(re)->str;
 	len = RREGEXP(re)->len;
 	enc = rb_enc_get(re);
@@ -1601,18 +1605,22 @@
 	    if (FIXNUM_P(argv[1])) flags = FIX2INT(argv[1]);
 	    else if (RTEST(argv[1])) flags = ONIG_OPTION_IGNORECASE;
 	}
+	enc = 0;
 	if (argc == 3 && !NIL_P(argv[2])) {
 	    char *kcode = StringValuePtr(argv[2]);
 	    if (kcode[0] == 'n' || kcode[1] == 'N') {
-		flags |= ARG_KCODE_NONE;
+		enc = rb_enc_from_index(0);
+		flags |= ARG_ENCODING_FIXED;
 	    }
 	    else {
 		rb_warning("encoding option is obsolete - %s", kcode);
 	    }
 	}
 	str = argv[0];
-	StringValueCStr(str);
-	if (rb_reg_initialize_str(self, str, flags, err)) {
+	ptr = StringValueCStr(str);
+	if (enc
+	    ? rb_reg_initialize(self, ptr, RSTRING_LEN(str), enc, flags, err)
+	    : rb_reg_initialize_str(self, str, flags, err)) {
 	    rb_reg_raise_str(str, flags, err);
 	}
     }
@@ -1731,8 +1739,8 @@
     int options;
 
     rb_reg_check(re);
-    options = RREGEXP(re)->ptr->options &
-	(ONIG_OPTION_IGNORECASE|ONIG_OPTION_MULTILINE|ONIG_OPTION_EXTEND);
+    options = RREGEXP(re)->ptr->options & ARG_REG_OPTION_MASK;
+    if (RBASIC(re)->flags & KCODE_FIXED) options |= ARG_ENCODING_FIXED;
     return options;
 }
 
Index: ChangeLog
===================================================================
--- ChangeLog	(revision 13742)
+++ ChangeLog	(revision 13743)
@@ -1,3 +1,15 @@
+Fri Oct 19 16:41:00 2007  Nobuyoshi Nakada  <nobu@r...>
+
+	* parse.y (parser_regx_options, reg_compile_gen): relaxened encoding
+	  matching rule.
+
+	* re.c (rb_reg_initialize): always set encoding of Regexp.
+
+	* re.c (rb_reg_initialize_str): fix enconding for non 7bit-clean
+	  strings.
+
+	* re.c (rb_reg_initialize_m): use ascii encoding for 'n' option.
+
 Fri Oct 19 11:09:56 2007  Nobuyoshi Nakada  <nobu@r...>
 
 	* ruby.c (process_options): set primary encoding from the parser
Index: parse.y
===================================================================
--- parse.y	(revision 13742)
+++ parse.y	(revision 13743)
@@ -261,7 +261,7 @@
 };
 
 #define STR_NEW(p,n) rb_enc_str_new((p),(n),parser->enc)
-#define STR_NEW0() rb_enc_str_new(0,0,rb_enc_from_index(0))
+#define STR_NEW0() rb_str_new(0,0)
 #define STR_NEW2(p) rb_enc_str_new((p),strlen(p),parser->enc)
 #define STR_NEW3(p,n,m) parser_str_new((p),(n),STR_ENC(!ENC_SINGLE(m)),(m))
 #define STR_ENC(m) ((m)?parser->enc:rb_enc_from_index(0))
@@ -443,6 +443,10 @@
 #define lvar_defined(id) lvar_defined_gen(parser, id)
 
 #define RE_OPTION_ONCE (1<<16)
+#define RE_OPTION_ENCODING_SHIFT 8
+#define RE_OPTION_ENCODING(e) (((e)&0xff)<<RE_OPTION_ENCODING_SHIFT)
+#define RE_OPTION_ENCODING_IDX(o) (((o)>>RE_OPTION_ENCODING_SHIFT)&0xff)
+#define RE_OPTION_MASK  0xff
 
 #define NODE_STRTERM NODE_ZARRAY	/* nothing to gc */
 #define NODE_HEREDOC NODE_ARRAY 	/* 1, 3 to gc */
@@ -3639,14 +3643,14 @@
 			int options = $3;
 			NODE *node = $2;
 			if (!node) {
-			    node = NEW_LIT(reg_compile(0, options & ~RE_OPTION_ONCE));
+			    node = NEW_LIT(reg_compile(STR_NEW0(), options));
 			}
 			else switch (nd_type(node)) {
 			  case NODE_STR:
 			    {
 				VALUE src = node->nd_lit;
 				nd_set_type(node, NODE_LIT);
-				node->nd_lit = reg_compile(src, options&~RE_OPTION_ONCE);
+				node->nd_lit = reg_compile(src, options);
 			    }
 			    break;
 			  default:
@@ -3658,7 +3662,7 @@
 			    else {
 				nd_set_type(node, NODE_DREGX);
 			    }
-			    node->nd_cflag = options & ~RE_OPTION_ONCE;
+			    node->nd_cflag = options & RE_OPTION_MASK;
 			    break;
 			}
 			$$ = node;
@@ -5110,11 +5114,12 @@
     return 0;
 }
 
+extern int rb_char_to_option_kcode(int c, int *option, int *kcode);
+
 static int
 parser_regx_options(struct parser_params *parser)
 {
-    extern int rb_char_to_option_kcode(int c, int *option, int *kcode);
-
+    int kcode = 0;
     int options = 0;
     int c, opt, kc;
 
@@ -5125,11 +5130,7 @@
         }
         else if (rb_char_to_option_kcode(c, &opt, &kc)) {
             options |= opt;
-            if (kc != 0 && rb_enc_from_index(kc) != parser->enc) {
-		compile_error(PARSER_ARG
-			      "regexp encoding option '%c' mismatch to %s",
-			      c, rb_enc_name(parser->enc));
-	    }
+	    if (kc >= 0) kcode = c;
         }
         else {
 	    tokadd(c);
@@ -5141,7 +5142,7 @@
 	compile_error(PARSER_ARG "unknown regexp option%s - %s",
 		      toklen() > 1 ? "s" : "", tok());
     }
-    return options;
+    return options | RE_OPTION_ENCODING(kcode);
 }
 
 #define STR_FUNC_ESCAPE 0x01
@@ -8212,8 +8213,21 @@
 static VALUE
 reg_compile_gen(struct parser_params* parser, VALUE str, int options)
 {
-    VALUE re = rb_reg_compile(str, (options) & ~RE_OPTION_ONCE);
+    VALUE re;
+    int c = RE_OPTION_ENCODING_IDX(options);
 
+    if (c) {
+	int opt, idx;
+	rb_char_to_option_kcode(c, &opt, &idx);
+	if (idx != ENCODING_GET(str) && ENCODING_GET(str) &&
+	    rb_enc_str_coderange(str) != ENC_CODERANGE_SINGLE) {
+	    compile_error(PARSER_ARG
+			  "regexp encoding option '%c' differs from source encoding '%s'",
+			  c, rb_enc_name(rb_enc_get(str)));
+	}
+	ENCODING_SET(str, idx);
+    }
+    re = rb_reg_compile(str, options & RE_OPTION_MASK);
     if (NIL_P(re)) {
 	RB_GC_GUARD(re) = rb_obj_as_string(rb_errinfo());
 	compile_error(PARSER_ARG "%s", RSTRING_PTR(re));

--
ML: ruby-changes@q...
Info: http://www.atdot.net/~ko1/quickml

[前][次][番号順一覧][スレッド一覧]