ruby-changes:2115
From: ko1@a...
Date: 3 Oct 2007 16:59:15 +0900
Subject: [ruby-changes:2115] nobu - Ruby:r13606 (trunk): * parse.y (parser_str_new, parser_yylex, rb_intern3): set code-range
nobu 2007-10-03 16:58:50 +0900 (Wed, 03 Oct 2007)
New Revision: 13606
Modified files:
trunk/ChangeLog
trunk/parse.y
Log:
* parse.y (parser_str_new, parser_yylex, rb_intern3): set code-range
bits.
* parse.y (parser_tokadd_string): check code-range.
* parse.y (parser_parse_string, parser_here_document): ditto.
* parse.y (parser_set_encode): check if valid encoding.
http://svn.ruby-lang.org/cgi-bin/viewvc.cgi/trunk/parse.y?r1=13606&r2=13605
http://svn.ruby-lang.org/cgi-bin/viewvc.cgi/trunk/ChangeLog?r1=13606&r2=13605
Index: ChangeLog
===================================================================
--- ChangeLog (revision 13605)
+++ ChangeLog (revision 13606)
@@ -1,3 +1,14 @@
+Wed Oct 3 16:58:48 2007 Nobuyoshi Nakada <nobu@r...>
+
+ * parse.y (parser_str_new, parser_yylex, rb_intern3): set code-range
+ bits.
+
+ * parse.y (parser_tokadd_string): check code-range.
+
+ * parse.y (parser_parse_string, parser_here_document): ditto.
+
+ * parse.y (parser_set_encode): check if valid encoding.
+
Wed Oct 3 15:43:15 2007 Yukihiro Matsumoto <matz@r...>
* variable.c (rb_cvar_set): check whether class variable is
Index: parse.y
===================================================================
--- parse.y (revision 13605)
+++ parse.y (revision 13606)
@@ -263,8 +263,10 @@
#define STR_NEW(p,n) rb_enc_str_new((p),(n),parser->enc)
#define STR_NEW0() rb_enc_str_new(0,0,rb_enc_from_index(0))
#define STR_NEW2(p) rb_enc_str_new((p),strlen(p),parser->enc)
-#define STR_NEW3(p,n,m) rb_enc_str_new((p),(n), STR_ENC(m))
+#define STR_NEW3(p,n,m) parser_str_new((p),(n),STR_ENC(m),(m))
#define STR_ENC(m) ((m)?parser->enc:rb_enc_from_index(0))
+#define ENC_SINGLE(cr) ((cr)==ENC_CODERANGE_SINGLE)
+#define TOK_INTERN(mb) rb_intern3(tok(), toklen(), STR_ENC(mb))
#ifdef YYMALLOC
void *rb_parser_malloc(struct parser_params *, size_t);
@@ -4664,7 +4666,7 @@
if (!compile_for_eval && rb_safe_level() == 0) {
ruby_debug_lines = ruby_suppress_tracing(debug_lines, (VALUE)f);
if (ruby_debug_lines && line > 1) {
- VALUE str = STR_NEW0();
+ VALUE str = rb_str_new(0, 0);
n = line - 1;
do {
rb_ary_push(ruby_debug_lines, str);
@@ -4808,6 +4810,14 @@
}
#endif /* !RIPPER */
+static VALUE
+parser_str_new(const char *p, long n, rb_encoding *enc, int coderange)
+{
+ VALUE str = rb_enc_str_new(p, n, enc);
+ ENC_CODERANGE_SET(str, coderange);
+ return str;
+}
+
static inline int
parser_nextc(struct parser_params *parser)
{
@@ -5204,12 +5214,20 @@
pushback(c);
if (tokadd_escape(term) < 0)
return -1;
+ if (mb) {
+ *mb = ENC_CODERANGE_UNKNOWN;
+ mb = 0;
+ }
continue;
}
else if (func & STR_FUNC_EXPAND) {
pushback(c);
if (func & STR_FUNC_ESCAPE) tokadd('\\');
c = read_escape();
+ if (mb) {
+ *mb = ENC_CODERANGE_UNKNOWN;
+ mb = 0;
+ }
}
else if ((func & STR_FUNC_QWORDS) && ISSPACE(c)) {
/* ignore backslashed spaces in %w */
@@ -5221,7 +5239,7 @@
}
else if (parser_ismbchar()) {
tokadd_mbchar(c);
- if (mb) *mb = 1;
+ if (mb) *mb = ENC_CODERANGE_MULTI;
continue;
}
else if ((func & STR_FUNC_QWORDS) && ISSPACE(c)) {
@@ -5247,7 +5265,7 @@
int func = quote->nd_func;
int term = nd_term(quote);
int paren = nd_paren(quote);
- int c, space = 0, mb = 0;
+ int c, space = 0, mb = ENC_CODERANGE_SINGLE;
if (func == -1) return tSTRING_END;
c = nextc();
@@ -5458,7 +5476,7 @@
} while (!whole_match_p(eos, len, indent));
}
else {
- int mb = 0;
+ int mb = ENC_CODERANGE_SINGLE, *mbp = &mb;
newtok();
if (c == '#') {
switch (c = nextc()) {
@@ -5473,12 +5491,13 @@
}
do {
pushback(c);
- if ((c = tokadd_string(func, '\n', 0, NULL, &mb)) == -1) goto error;
+ if ((c = tokadd_string(func, '\n', 0, NULL, mbp)) == -1) goto error;
if (c != '\n') {
- set_yylval_str(STR_NEW3(tok(), toklen(), mb));
+ set_yylval_str(STR_NEW3(tok(), toklen(), mb));
return tSTRING_CONTENT;
}
tokadd(nextc());
+ if (mbp && mb == ENC_CODERANGE_UNKNOWN) mbp = 0;
if ((c = nextc()) == -1) goto error;
} while (!whole_match_p(eos, len, indent));
str = STR_NEW3(tok(), toklen(), mb);
@@ -5520,7 +5539,11 @@
static void
parser_set_encode(struct parser_params *parser, const char *name)
{
- parser->enc = rb_enc_find(name);
+ int idx = rb_enc_find_index(name);
+ if (idx < 0) {
+ rb_raise(rb_eArgError, "unknown encoding name: %s", name);
+ }
+ parser->enc = rb_enc_from_index(idx);
}
#ifndef RIPPER
@@ -6706,10 +6729,6 @@
gvar:
tokfix();
set_yylval_id(rb_intern(tok()));
- if (!is_global_id(yylval_id())) {
- compile_error(PARSER_ARG "invalid global variable `%s'", rb_id2name(yylval.id));
- return 0;
- }
return tGVAR;
case '&': /* $&: last match */
@@ -6893,7 +6912,7 @@
if (peek(':') && !(lex_p + 1 < lex_pend && lex_p[1] == ':')) {
lex_state = EXPR_BEG;
nextc();
- set_yylval_id(rb_intern3(tok(), toklen(), STR_ENC(mb)));
+ set_yylval_id(TOK_INTERN(!ENC_SINGLE(mb)));
return tLABEL;
}
}
@@ -6912,7 +6931,7 @@
}
}
{
- ID ident = rb_intern3(tok(), toklen(), STR_ENC(mb));
+ ID ident = TOK_INTERN(!ENC_SINGLE(mb));
set_yylval_id(ident);
if (last_state != EXPR_DOT && is_local_id(ident) && lvar_defined(ident)) {
@@ -8346,6 +8365,9 @@
static int
is_special_global_name(const char *m, const char *e, rb_encoding *enc)
{
+ int mb = 0;
+
+ if (m >= e) return 0;
switch (*m) {
case '~': case '*': case '$': case '?': case '!': case '@':
case '/': case '\\': case ';': case ',': case '.': case '=':
@@ -8356,13 +8378,19 @@
break;
case '-':
++m;
- if (is_identchar(m, e, enc)) m += rb_enc_mbclen(m, e, enc);
+ if (is_identchar(m, e, enc)) {
+ if (!ISASCII(*m)) mb = 1;
+ m += rb_enc_mbclen(m, e, enc);
+ }
break;
default:
if (!rb_enc_isdigit(*m, enc)) return 0;
- do ++m; while (rb_enc_isdigit(*m, enc));
+ do {
+ if (!ISASCII(*m)) mb = 1;
+ ++m;
+ } while (rb_enc_isdigit(*m, enc));
}
- return !*m;
+ return m == e ? mb + 1 : 0;
}
int
@@ -8454,6 +8482,7 @@
VALUE str;
ID id;
int last;
+ int mb;
struct RString fake_str;
fake_str.basic.flags = T_STRING|RSTRING_NOEMBED|FL_FREEZE;
fake_str.basic.klass = rb_cString;
@@ -8471,7 +8500,10 @@
switch (*m) {
case '$':
id |= ID_GLOBAL;
- if (is_special_global_name(++m, e, enc)) goto new_id;
+ if ((mb = is_special_global_name(++m, e, enc)) != 0) {
+ if (!--mb) enc = rb_enc_from_index(0);
+ goto new_id;
+ }
break;
case '@':
if (m[1] == '@') {
@@ -8500,8 +8532,9 @@
if (m[last] == '=') {
/* attribute assignment */
- id = rb_intern2(name, last);
+ id = rb_intern3(name, last, enc);
if (id > tLAST_TOKEN && !is_attrset_id(id)) {
+ enc = rb_enc_get(rb_id2str(id));
id = rb_id_attrset(id);
goto id_register;
}
@@ -8515,12 +8548,23 @@
}
break;
}
+ mb = 0;
if (!rb_enc_isdigit(*m, enc)) {
while (m <= name + last && is_identchar(m, e, enc)) {
+ if (!ISASCII(*m)) mb = 1;
m += rb_enc_mbclen(m, e, enc);
}
}
if (m - name < len) id = ID_JUNK;
+ if (enc != rb_enc_from_index(0)) {
+ if (!mb) {
+ for (; m <= name + len; ++m) {
+ if (!ISASCII(*m)) goto mbstr;
+ }
+ }
+ enc = rb_enc_from_index(0);
+ mbstr:;
+ }
new_id:
id |= ++global_symbols.last_id << ID_SCOPE_SHIFT;
id_register:
--
ML: ruby-changes@q...
Info: http://www.atdot.net/~ko1/quickml