ruby-changes:2806
From: ko1@a...
Date: 18 Dec 2007 20:26:53 +0900
Subject: [ruby-changes:2806] akr - Ruby:r14297 (trunk): * parse.y (arg tMATCH arg): call reg_named_capture_assign_gen if regexp
akr 2007-12-18 20:26:24 +0900 (Tue, 18 Dec 2007) New Revision: 14297 Modified files: trunk/ChangeLog trunk/parse.y trunk/re.c trunk/test/ruby/test_regexp.rb Log: * parse.y (arg tMATCH arg): call reg_named_capture_assign_gen if regexp literal is used. (reg_named_capture_assign_gen): assign the result of named capture into local variables. [ruby-dev:32588] * re.c: document the assignment by named captures. http://svn.ruby-lang.org/cgi-bin/viewvc.cgi/trunk/parse.y?r1=14297&r2=14296 http://svn.ruby-lang.org/cgi-bin/viewvc.cgi/trunk/ChangeLog?r1=14297&r2=14296 http://svn.ruby-lang.org/cgi-bin/viewvc.cgi/trunk/test/ruby/test_regexp.rb?r1=14297&r2=14296 http://svn.ruby-lang.org/cgi-bin/viewvc.cgi/trunk/re.c?r1=14297&r2=14296 Index: re.c =================================================================== --- re.c (revision 14296) +++ re.c (revision 14297) @@ -2164,6 +2164,37 @@ * * /at/ =~ "input data" #=> 7 * /ax/ =~ "input data" #=> nil + * + * If <code>=~</code> is used with a regexp literal with named captures, + * captured strings (or nil) is assigned to local variables named by + * the capture names. + * + * /(?<lhs>\w+)\s*=\s*(?<rhs>\w+)/ =~ " x = y " + * p lhs #=> "x" + * p rhs #=> "y" + * + * If it is not matched, nil is assigned for the variables. + * + * /(?<lhs>\w+)\s*=\s*(?<rhs>\w+)/ =~ " x = " + * p lhs #=> nil + * p rhs #=> nil + * + * This assignment is implemented in the Ruby parser. + * So a regexp literal is required for the assignment. + * The assignment is not occur if the regexp is not a literal. + * + * re = /(?<lhs>\w+)\s*=\s*(?<rhs>\w+)/ + * re =~ " x = " + * p lhs # undefined local variable + * p rhs # undefined local variable + * + * A regexp interpolation, <code>#{}</code>, also disables + * the assignment. + * + * rhs_pat = /(?<rhs>\w+)/ + * /(?<lhs>\w+)\s*=\s*#{rhs_pat}/ =~ "x = y" + * p lhs # undefined local variable + * */ VALUE Index: ChangeLog =================================================================== --- ChangeLog (revision 14296) +++ ChangeLog (revision 14297) @@ -1,3 +1,13 @@ +Tue Dec 18 20:22:44 2007 Tanaka Akira <akr@f...> + + * parse.y (arg tMATCH arg): call reg_named_capture_assign_gen if regexp + literal is used. + (reg_named_capture_assign_gen): assign the result of named capture + into local variables. + [ruby-dev:32588] + + * re.c: document the assignment by named captures. + Tue Dec 18 18:09:15 2007 Yukihiro Matsumoto <matz@r...> * string.c (rb_str_splice): propagate encoding. Index: parse.y =================================================================== --- parse.y (revision 14296) +++ parse.y (revision 14297) @@ -21,6 +21,7 @@ #include "ruby/node.h" #include "ruby/st.h" #include "ruby/encoding.h" +#include "id.h" #include "regenc.h" #include <stdio.h> #include <errno.h> @@ -442,6 +443,8 @@ #define reg_fragment_setenc(str,options) reg_fragment_setenc_gen(parser, str, options) static void reg_fragment_check_gen(struct parser_params*, VALUE, int); #define reg_fragment_check(str,options) reg_fragment_check_gen(parser, str, options) +static NODE *reg_named_capture_assign_gen(struct parser_params* parser, VALUE regexp, NODE *match); +#define reg_named_capture_assign(regexp,match) reg_named_capture_assign_gen(parser,regexp,match) #else #define remove_begin(node) (node) #endif /* !RIPPER */ @@ -2127,6 +2130,9 @@ { /*%%%*/ $$ = match_op($1, $3); + if (nd_type($1) == NODE_LIT && TYPE($1->nd_lit) == T_REGEXP) { + $$ = reg_named_capture_assign($1->nd_lit, $$); + } /*% $$ = dispatch3(binary, $1, ripper_intern("=~"), $3); %*/ @@ -8474,6 +8480,82 @@ } } +typedef struct { + struct parser_params* parser; + rb_encoding *enc; + NODE *succ_block; + NODE *fail_block; + int num; +} reg_named_capture_assign_t; + +static int +reg_named_capture_assign_iter(const OnigUChar *name, const OnigUChar *name_end, + int back_num, int *back_refs, OnigRegex regex, void *arg0) +{ + reg_named_capture_assign_t *arg = (reg_named_capture_assign_t*)arg0; + struct parser_params* parser = arg->parser; + + arg->num++; + + if (arg->succ_block == 0) { + arg->succ_block = NEW_BEGIN(0); + arg->fail_block = NEW_BEGIN(0); + } + + ID var = rb_intern3((const char *)name, name_end-name, arg->enc); + if (!is_local_id(var)) { + compile_error(PARSER_ARG "named capture with a non local variable - %s", + rb_id2name(var)); + return ST_CONTINUE; + } + if (dvar_defined(var) || local_id(var)) { + rb_warningS("named capture conflicts a local variable - %s", + rb_id2name(var)); + } + arg->succ_block = block_append(arg->succ_block, + newline_node(node_assign(assignable(var,0), + NEW_CALL( + gettable(rb_intern("$~")), + idAREF, + NEW_LIST(NEW_LIT(ID2SYM(var)))) + ))); + arg->fail_block = block_append(arg->fail_block, + newline_node(node_assign(assignable(var,0), NEW_LIT(Qnil)))); + return ST_CONTINUE; +} + +static NODE * +reg_named_capture_assign_gen(struct parser_params* parser, VALUE regexp, NODE *match) +{ + reg_named_capture_assign_t arg; + + arg.parser = parser; + arg.enc = rb_enc_get(regexp); + arg.succ_block = 0; + arg.fail_block = 0; + arg.num = 0; + onig_foreach_name(RREGEXP(regexp)->ptr, reg_named_capture_assign_iter, (void*)&arg); + + if (arg.num == 0) + return match; + + return + block_append( + newline_node(match), + NEW_IF(gettable(rb_intern("$~")), + block_append( + newline_node(arg.succ_block), + newline_node( + NEW_CALL( + gettable(rb_intern("$~")), + rb_intern("begin"), + NEW_LIST(NEW_LIT(INT2FIX(0)))))), + block_append( + newline_node(arg.fail_block), + newline_node( + NEW_LIT(Qnil))))); +} + static VALUE reg_compile_gen(struct parser_params* parser, VALUE str, int options) { Index: test/ruby/test_regexp.rb =================================================================== --- test/ruby/test_regexp.rb (revision 14296) +++ test/ruby/test_regexp.rb (revision 14297) @@ -95,6 +95,13 @@ assert_equal({}, /(.)(.)/.named_captures) end + def test_assign_named_capture + assert_equal("a", eval('/(?<foo>.)/ =~ "a"; foo')) + assert_equal("a", eval('foo = 1; /(?<foo>.)/ =~ "a"; foo')) + assert_equal("a", eval('1.times {|foo| /(?<foo>.)/ =~ "a"; break foo }')) + assert_raise(SyntaxError) { eval('/(?<Foo>.)/ =~ "a"') } + end + def test_match_regexp r = /./ m = r.match("a") -- ML: ruby-changes@q... Info: http://www.atdot.net/~ko1/quickml