[前][次][番号順一覧][スレッド一覧]

ruby-changes:2115

From: ko1@a...
Date: 3 Oct 2007 16:59:15 +0900
Subject: [ruby-changes:2115] nobu - Ruby:r13606 (trunk): * parse.y (parser_str_new, parser_yylex, rb_intern3): set code-range

nobu	2007-10-03 16:58:50 +0900 (Wed, 03 Oct 2007)

  New Revision: 13606

  Modified files:
    trunk/ChangeLog
    trunk/parse.y

  Log:
    * parse.y (parser_str_new, parser_yylex, rb_intern3): set code-range
      bits.
    
    * parse.y (parser_tokadd_string): check code-range.
    
    * parse.y (parser_parse_string, parser_here_document): ditto.
    
    * parse.y (parser_set_encode): check if valid encoding.


  http://svn.ruby-lang.org/cgi-bin/viewvc.cgi/trunk/parse.y?r1=13606&r2=13605
  http://svn.ruby-lang.org/cgi-bin/viewvc.cgi/trunk/ChangeLog?r1=13606&r2=13605

Index: ChangeLog
===================================================================
--- ChangeLog	(revision 13605)
+++ ChangeLog	(revision 13606)
@@ -1,3 +1,14 @@
+Wed Oct  3 16:58:48 2007  Nobuyoshi Nakada  <nobu@r...>
+
+	* parse.y (parser_str_new, parser_yylex, rb_intern3): set code-range
+	  bits.
+
+	* parse.y (parser_tokadd_string): check code-range.
+
+	* parse.y (parser_parse_string, parser_here_document): ditto.
+
+	* parse.y (parser_set_encode): check if valid encoding.
+
 Wed Oct  3 15:43:15 2007  Yukihiro Matsumoto  <matz@r...>
 
 	* variable.c (rb_cvar_set): check whether class variable is
Index: parse.y
===================================================================
--- parse.y	(revision 13605)
+++ parse.y	(revision 13606)
@@ -263,8 +263,10 @@
 #define STR_NEW(p,n) rb_enc_str_new((p),(n),parser->enc)
 #define STR_NEW0() rb_enc_str_new(0,0,rb_enc_from_index(0))
 #define STR_NEW2(p) rb_enc_str_new((p),strlen(p),parser->enc)
-#define STR_NEW3(p,n,m) rb_enc_str_new((p),(n), STR_ENC(m))
+#define STR_NEW3(p,n,m) parser_str_new((p),(n),STR_ENC(m),(m))
 #define STR_ENC(m) ((m)?parser->enc:rb_enc_from_index(0))
+#define ENC_SINGLE(cr) ((cr)==ENC_CODERANGE_SINGLE)
+#define TOK_INTERN(mb) rb_intern3(tok(), toklen(), STR_ENC(mb))
 
 #ifdef YYMALLOC
 void *rb_parser_malloc(struct parser_params *, size_t);
@@ -4664,7 +4666,7 @@
     if (!compile_for_eval && rb_safe_level() == 0) {
 	ruby_debug_lines = ruby_suppress_tracing(debug_lines, (VALUE)f);
 	if (ruby_debug_lines && line > 1) {
-	    VALUE str = STR_NEW0();
+	    VALUE str = rb_str_new(0, 0);
 	    n = line - 1;
 	    do {
 		rb_ary_push(ruby_debug_lines, str);
@@ -4808,6 +4810,14 @@
 }
 #endif  /* !RIPPER */
 
+static VALUE
+parser_str_new(const char *p, long n, rb_encoding *enc, int coderange)
+{
+    VALUE str = rb_enc_str_new(p, n, enc);
+    ENC_CODERANGE_SET(str, coderange);
+    return str;
+}
+
 static inline int
 parser_nextc(struct parser_params *parser)
 {
@@ -5204,12 +5214,20 @@
 		    pushback(c);
 		    if (tokadd_escape(term) < 0)
 			return -1;
+		    if (mb) {
+			*mb = ENC_CODERANGE_UNKNOWN;
+			mb = 0;
+		    }
 		    continue;
 		}
 		else if (func & STR_FUNC_EXPAND) {
 		    pushback(c);
 		    if (func & STR_FUNC_ESCAPE) tokadd('\\');
 		    c = read_escape();
+		    if (mb) {
+			*mb = ENC_CODERANGE_UNKNOWN;
+			mb = 0;
+		    }
 		}
 		else if ((func & STR_FUNC_QWORDS) && ISSPACE(c)) {
 		    /* ignore backslashed spaces in %w */
@@ -5221,7 +5239,7 @@
 	}
 	else if (parser_ismbchar()) {
 	    tokadd_mbchar(c);
-	    if (mb) *mb = 1;
+	    if (mb) *mb = ENC_CODERANGE_MULTI;
 	    continue;
 	}
 	else if ((func & STR_FUNC_QWORDS) && ISSPACE(c)) {
@@ -5247,7 +5265,7 @@
     int func = quote->nd_func;
     int term = nd_term(quote);
     int paren = nd_paren(quote);
-    int c, space = 0, mb = 0;
+    int c, space = 0, mb = ENC_CODERANGE_SINGLE;
 
     if (func == -1) return tSTRING_END;
     c = nextc();
@@ -5458,7 +5476,7 @@
 	} while (!whole_match_p(eos, len, indent));
     }
     else {
-	int mb = 0;
+	int mb = ENC_CODERANGE_SINGLE, *mbp = &mb;
 	newtok();
 	if (c == '#') {
 	    switch (c = nextc()) {
@@ -5473,12 +5491,13 @@
 	}
 	do {
 	    pushback(c);
-	    if ((c = tokadd_string(func, '\n', 0, NULL, &mb)) == -1) goto error;
+	    if ((c = tokadd_string(func, '\n', 0, NULL, mbp)) == -1) goto error;
 	    if (c != '\n') {
-                set_yylval_str(STR_NEW3(tok(), toklen(), mb));
+		set_yylval_str(STR_NEW3(tok(), toklen(), mb));
 		return tSTRING_CONTENT;
 	    }
 	    tokadd(nextc());
+	    if (mbp && mb == ENC_CODERANGE_UNKNOWN) mbp = 0;
 	    if ((c = nextc()) == -1) goto error;
 	} while (!whole_match_p(eos, len, indent));
 	str = STR_NEW3(tok(), toklen(), mb);
@@ -5520,7 +5539,11 @@
 static void
 parser_set_encode(struct parser_params *parser, const char *name)
 {
-    parser->enc = rb_enc_find(name);
+    int idx = rb_enc_find_index(name);
+    if (idx < 0) {
+	rb_raise(rb_eArgError, "unknown encoding name: %s", name);
+    }
+    parser->enc = rb_enc_from_index(idx);
 }
 
 #ifndef RIPPER
@@ -6706,10 +6729,6 @@
 	  gvar:
 	    tokfix();
             set_yylval_id(rb_intern(tok()));
-	    if (!is_global_id(yylval_id())) {
-	    	compile_error(PARSER_ARG "invalid global variable `%s'", rb_id2name(yylval.id));
-		return 0;
-	    }
 	    return tGVAR;
 
 	  case '&':		/* $&: last match */
@@ -6893,7 +6912,7 @@
 		if (peek(':') && !(lex_p + 1 < lex_pend && lex_p[1] == ':')) {
 		    lex_state = EXPR_BEG;
 		    nextc();
-		    set_yylval_id(rb_intern3(tok(), toklen(), STR_ENC(mb)));
+		    set_yylval_id(TOK_INTERN(!ENC_SINGLE(mb)));
 		    return tLABEL;
 		}
 	    }
@@ -6912,7 +6931,7 @@
 	    }
 	}
         {
-            ID ident = rb_intern3(tok(), toklen(), STR_ENC(mb));
+            ID ident = TOK_INTERN(!ENC_SINGLE(mb));
 
             set_yylval_id(ident);
             if (last_state != EXPR_DOT && is_local_id(ident) && lvar_defined(ident)) {
@@ -8346,6 +8365,9 @@
 static int
 is_special_global_name(const char *m, const char *e, rb_encoding *enc)
 {
+    int mb = 0;
+
+    if (m >= e) return 0;
     switch (*m) {
       case '~': case '*': case '$': case '?': case '!': case '@':
       case '/': case '\\': case ';': case ',': case '.': case '=':
@@ -8356,13 +8378,19 @@
 	break;
       case '-':
 	++m;
-	if (is_identchar(m, e, enc)) m += rb_enc_mbclen(m, e, enc);
+	if (is_identchar(m, e, enc)) {
+	    if (!ISASCII(*m)) mb = 1;
+	    m += rb_enc_mbclen(m, e, enc);
+	}
 	break;
       default:
 	if (!rb_enc_isdigit(*m, enc)) return 0;
-	do ++m; while (rb_enc_isdigit(*m, enc));
+	do {
+	    if (!ISASCII(*m)) mb = 1;
+	    ++m;
+	} while (rb_enc_isdigit(*m, enc));
     }
-    return !*m;
+    return m == e ? mb + 1 : 0;
 }
 
 int
@@ -8454,6 +8482,7 @@
     VALUE str;
     ID id;
     int last;
+    int mb;
     struct RString fake_str;
     fake_str.basic.flags = T_STRING|RSTRING_NOEMBED|FL_FREEZE;
     fake_str.basic.klass = rb_cString;
@@ -8471,7 +8500,10 @@
     switch (*m) {
       case '$':
 	id |= ID_GLOBAL;
-	if (is_special_global_name(++m, e, enc)) goto new_id;
+	if ((mb = is_special_global_name(++m, e, enc)) != 0) {
+	    if (!--mb) enc = rb_enc_from_index(0);
+	    goto new_id;
+	}
 	break;
       case '@':
 	if (m[1] == '@') {
@@ -8500,8 +8532,9 @@
 
 	if (m[last] == '=') {
 	    /* attribute assignment */
-	    id = rb_intern2(name, last);
+	    id = rb_intern3(name, last, enc);
 	    if (id > tLAST_TOKEN && !is_attrset_id(id)) {
+		enc = rb_enc_get(rb_id2str(id));
 		id = rb_id_attrset(id);
 		goto id_register;
 	    }
@@ -8515,12 +8548,23 @@
 	}
 	break;
     }
+    mb = 0;
     if (!rb_enc_isdigit(*m, enc)) {
 	while (m <= name + last && is_identchar(m, e, enc)) {
+	    if (!ISASCII(*m)) mb = 1;
 	    m += rb_enc_mbclen(m, e, enc);
 	}
     }
     if (m - name < len) id = ID_JUNK;
+    if (enc != rb_enc_from_index(0)) {
+	if (!mb) {
+	    for (; m <= name + len; ++m) {
+		if (!ISASCII(*m)) goto mbstr;
+	    }
+	}
+	enc = rb_enc_from_index(0);
+      mbstr:;
+    }
   new_id:
     id |= ++global_symbols.last_id << ID_SCOPE_SHIFT;
   id_register:

--
ML: ruby-changes@q...
Info: http://www.atdot.net/~ko1/quickml

[前][次][番号順一覧][スレッド一覧]