ruby-changes:22614
From: naruse <ko1@a...>
Date: Fri, 17 Feb 2012 16:42:51 +0900 (JST)
Subject: [ruby-changes:22614] naruse:r34663 (trunk): * Merge Onigmo-5.13.1. [Feature #5820]
naruse 2012-02-17 16:42:23 +0900 (Fri, 17 Feb 2012) New Revision: 34663 http://svn.ruby-lang.org/cgi-bin/viewvc.cgi?view=rev&revision=34663 Log: * Merge Onigmo-5.13.1. [ruby-dev:45057] [Feature #5820] https://github.com/k-takata/Onigmo cp reg{comp,enc,error,exec,parse,syntax}.c reg{enc,int,parse}.h cp oniguruma.h cp tool/enc-unicode.rb cp -r enc/ Added files: trunk/enc/mktable.c trunk/enc/unicode/casefold.h trunk/enc/windows_31j.c Modified files: trunk/ChangeLog trunk/NEWS trunk/enc/ascii.c trunk/enc/big5.c trunk/enc/cp949.c trunk/enc/emacs_mule.c trunk/enc/euc_jp.c trunk/enc/euc_kr.c trunk/enc/euc_tw.c trunk/enc/gb18030.c trunk/enc/gbk.c trunk/enc/iso_8859_1.c trunk/enc/iso_8859_10.c trunk/enc/iso_8859_11.c trunk/enc/iso_8859_13.c trunk/enc/iso_8859_14.c trunk/enc/iso_8859_15.c trunk/enc/iso_8859_16.c trunk/enc/iso_8859_2.c trunk/enc/iso_8859_3.c trunk/enc/iso_8859_4.c trunk/enc/iso_8859_5.c trunk/enc/iso_8859_6.c trunk/enc/iso_8859_7.c trunk/enc/iso_8859_8.c trunk/enc/iso_8859_9.c trunk/enc/koi8_r.c trunk/enc/koi8_u.c trunk/enc/shift_jis.c trunk/enc/unicode/name2ctype.h trunk/enc/unicode.c trunk/enc/us_ascii.c trunk/enc/utf_16be.c trunk/enc/utf_16le.c trunk/enc/utf_32be.c trunk/enc/utf_32le.c trunk/enc/utf_8.c trunk/enc/windows_1251.c trunk/include/ruby/oniguruma.h trunk/regcomp.c trunk/regenc.c trunk/regenc.h trunk/regerror.c trunk/regexec.c trunk/regint.h trunk/regparse.c trunk/regparse.h trunk/regsyntax.c trunk/test/ruby/enc/test_euc_jp.rb trunk/test/ruby/enc/test_shift_jis.rb trunk/tool/enc-unicode.rb Index: regparse.c =================================================================== --- regparse.c (revision 34662) +++ regparse.c (revision 34663) @@ -1,9 +1,9 @@ -/* -*- mode:c; c-file-style:"gnu" -*- */ /********************************************************************** - regparse.c - Oniguruma (regular expression library) + regparse.c - Onigmo (Oniguruma-mod) (regular expression library) **********************************************************************/ /*- * Copyright (c) 2002-2008 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> + * Copyright (c) 2011-2012 K.Takata <kentkt AT csc DOT jp> * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -50,7 +50,11 @@ ONIG_SYN_OP2_PLUS_POSSESSIVE_REPEAT | ONIG_SYN_OP2_CCLASS_SET_OP | ONIG_SYN_OP2_ESC_CAPITAL_C_BAR_CONTROL | ONIG_SYN_OP2_ESC_CAPITAL_M_BAR_META | ONIG_SYN_OP2_ESC_V_VTAB | - ONIG_SYN_OP2_ESC_H_XDIGIT ) + ONIG_SYN_OP2_ESC_H_XDIGIT | + ONIG_SYN_OP2_ESC_CAPITAL_X_EXTENDED_GRAPHEME_CLUSTER | + ONIG_SYN_OP2_QMARK_LPAREN_CONDITION | + ONIG_SYN_OP2_ESC_CAPITAL_R_LINEBREAK | + ONIG_SYN_OP2_ESC_CAPITAL_K_KEEP ) , ( SYN_GNU_REGEX_BV | ONIG_SYN_ALLOW_INTERVAL_LOW_ABBREV | ONIG_SYN_DIFFERENT_LEN_ALT_LOOK_BEHIND | @@ -60,7 +64,8 @@ ONIG_SYN_WARN_CC_OP_NOT_ESCAPED | ONIG_SYN_WARN_CC_DUP | ONIG_SYN_WARN_REDUNDANT_NESTED_REPEAT ) - , ONIG_OPTION_NONE + , ( ONIG_OPTION_ASCII_RANGE | ONIG_OPTION_POSIX_BRACKET_ALL_RANGE | + ONIG_OPTION_WORD_BOUND_ALL_RANGE ) , { (OnigCodePoint )'\\' /* esc */ @@ -133,7 +138,7 @@ (OnigCodePoint )(ONIGENC_MBC_MINLEN(enc) > 1 ? 0 : 0x80) #define SET_ALL_MULTI_BYTE_RANGE(enc, pbuf) \ - add_code_range_to_buf(pbuf, env, MBCODE_START_POS(enc), ~((OnigCodePoint )0)) + add_code_range_to_buf(pbuf, env, MBCODE_START_POS(enc), ONIG_LAST_CODE_POINT) #define ADD_ALL_MULTI_BYTE_RANGE(enc, mbuf) do {\ if (! ONIGENC_IS_SINGLEBYTE(enc)) {\ @@ -677,7 +682,7 @@ } static NameEntry* -name_find(regex_t* reg, UChar* name, UChar* name_end) +name_find(regex_t* reg, const UChar* name, const UChar* name_end) { int i, len; NameEntry* e; @@ -781,10 +786,12 @@ } else if (t->num == t->alloc) { int i; + NameEntry* p; alloc = t->alloc * 2; - t->e = (NameEntry* )xrealloc(t->e, sizeof(NameEntry) * alloc); - CHECK_NULL_RETURN_MEMERR(t->e); + p = (NameEntry* )xrealloc(t->e, sizeof(NameEntry) * alloc); + CHECK_NULL_RETURN_MEMERR(p); + t->e = p; t->alloc = alloc; clear: @@ -826,9 +833,11 @@ } else { if (e->back_num > e->back_alloc) { + int* p; alloc = e->back_alloc * 2; - e->back_refs = (int* )xrealloc(e->back_refs, sizeof(int) * alloc); - CHECK_NULL_RETURN_MEMERR(e->back_refs); + p = (int* )xrealloc(e->back_refs, sizeof(int) * alloc); + CHECK_NULL_RETURN_MEMERR(p); + e->back_refs = p; e->back_alloc = alloc; } e->back_refs[e->back_num - 1] = backref; @@ -1209,7 +1218,7 @@ } static Node* -node_new_ctype(int type, int not) +node_new_ctype(int type, int not, int ascii_range) { Node* node = node_new(); CHECK_NULL_RETURN(node); @@ -1217,6 +1226,7 @@ SET_NTYPE(node, NT_CTYPE); NCTYPE(node)->ctype = type; NCTYPE(node)->not = not; + NCTYPE(node)->ascii_range = ascii_range; return node; } @@ -1288,6 +1298,7 @@ NANCHOR(node)->type = type; NANCHOR(node)->target = NULL; NANCHOR(node)->char_len = -1; + NANCHOR(node)->ascii_range = 0; return node; } @@ -1455,7 +1466,7 @@ CHECK_NULL_RETURN_MEMERR(p); NSTR(node)->s = p; - NSTR(node)->capa = (int)capa; + NSTR(node)->capa = (int )capa; } } else { @@ -1483,6 +1494,15 @@ return onig_node_str_cat(node, s, s + 1); } +static int +node_str_cat_codepoint(Node* node, OnigEncoding enc, OnigCodePoint c) +{ + UChar buf[ONIGENC_CODE_TO_MBC_MAXLEN]; + int num = ONIGENC_CODE_TO_MBC(enc, c, buf); + if (num < 0) return num; + return onig_node_str_cat(node, buf, buf + num); +} + extern void onig_node_conv_to_str_node(Node* node, int flag) { @@ -1535,7 +1555,8 @@ node_new_str_raw(UChar* s, UChar* end) { Node* node = node_new_str(s, end); - NSTRING_SET_RAW(node); + if (IS_NOT_NULL(node)) + NSTRING_SET_RAW(node); return node; } @@ -1564,7 +1585,7 @@ p = onigenc_get_prev_char_head(enc, sn->s, sn->end, sn->end); if (p && p > sn->s) { /* can be splitted. */ n = node_new_str(p, sn->end); - if ((sn->flag & NSTR_RAW) != 0) + if (IS_NOT_NULL(n) && (sn->flag & NSTR_RAW) != 0) NSTRING_SET_RAW(n); sn->end = (UChar* )p; } @@ -1627,14 +1648,16 @@ } static int -scan_unsigned_hexadecimal_number(UChar** src, UChar* end, int maxlen, - OnigEncoding enc) +scan_unsigned_hexadecimal_number(UChar** src, UChar* end, int minlen, + int maxlen, OnigEncoding enc) { OnigCodePoint c; unsigned int num, val; + int restlen; UChar* p = *src; PFETCH_READY; + restlen = maxlen - minlen; num = 0; while (!PEND && maxlen-- != 0) { PFETCH(c); @@ -1650,6 +1673,8 @@ break; } } + if (maxlen > restlen) + return -2; /* not enough digits */ *src = p; return num; } @@ -1734,17 +1759,19 @@ data = (OnigCodePoint* )(bbuf->p); data++; - for (low = 0, bound = n; low < bound; ) { + bound = (from == 0) ? 0 : n; + for (low = 0; low < bound; ) { x = (low + bound) >> 1; - if (from > data[x*2 + 1]) + if (from - 1 > data[x*2 + 1]) low = x + 1; else bound = x; } - for (high = low, bound = n; high < bound; ) { + high = (to == ONIG_LAST_CODE_POINT) ? n : low; + for (bound = n; high < bound; ) { x = (high + bound) >> 1; - if (to >= data[x*2] - 1) + if (to + 1 >= data[x*2]) high = x + 1; else bound = x; @@ -1762,13 +1789,15 @@ to = data[(high - 1)*2 + 1]; } - if (inc_n != 0 && (OnigCodePoint )high < n) { + if (inc_n != 0) { int from_pos = SIZE_CODE_POINT * (1 + high * 2); int to_pos = SIZE_CODE_POINT * (1 + (low + 1) * 2); - int size = (n - high) * 2 * SIZE_CODE_POINT; if (inc_n > 0) { - BBUF_MOVE_RIGHT(bbuf, from_pos, to_pos, size); + if ((OnigCodePoint )high < n) { + int size = (n - high) * 2 * SIZE_CODE_POINT; + BBUF_MOVE_RIGHT(bbuf, from_pos, to_pos, size); + } } else { BBUF_MOVE_LEFT_REDUCE(bbuf, from_pos, to_pos); @@ -1836,11 +1865,11 @@ r = add_code_range_to_buf(pbuf, env, pre, from - 1); if (r != 0) return r; } - if (to == ~((OnigCodePoint )0)) break; + if (to == ONIG_LAST_CODE_POINT) break; pre = to + 1; } - if (to < ~((OnigCodePoint )0)) { - r = add_code_range_to_buf(pbuf, env, to + 1, ~((OnigCodePoint )0)); + if (to < ONIG_LAST_CODE_POINT) { + r = add_code_range_to_buf(pbuf, env, to + 1, ONIG_LAST_CODE_POINT); } return r; } @@ -2053,8 +2082,8 @@ } } if (r != 0) { - bbuf_free(pbuf); - return r; + bbuf_free(pbuf); + return r; } dest->mbuf = pbuf; @@ -2111,8 +2140,8 @@ } } if (r != 0) { - bbuf_free(pbuf); - return r; + bbuf_free(pbuf); + return r; } dest->mbuf = pbuf; @@ -2151,7 +2180,9 @@ return c; } -#if 0 /* no invalid quantifier */ +#ifdef USE_NO_INVALID_QUANTIFIER +#define is_invalid_quantifier_target(node) 0 +#else static int is_invalid_quantifier_target(Node* node) { @@ -2183,8 +2214,6 @@ } return 0; } -#else -#define is_invalid_quantifier_target(node) 0 #endif /* ?:0, *:1, +:2, ??:3, *?:4, +?:5 */ @@ -2303,6 +2332,9 @@ TK_CC_OPEN, TK_QUOTE_OPEN, TK_CHAR_PROPERTY, /* \p{...}, \P{...} */ + TK_LINEBREAK, + TK_EXTENDED_GRAPHEME_CLUSTER, + TK_KEEP, /* in cc */ TK_CC_CLOSE, TK_CC_RANGE, @@ -2320,9 +2352,11 @@ UChar* s; int c; OnigCodePoint code; - int anchor; - int subtype; struct { + int subtype; + int ascii_range; + } anchor; + struct { int lower; int upper; int greedy; @@ -2342,6 +2376,7 @@ UChar* name; UChar* name_end; int gnum; + int rel; } call; struct { int ctype; @@ -2523,6 +2558,8 @@ switch (start) { case '<': return (OnigCodePoint )'>'; break; case '\'': return (OnigCodePoint )'\''; break; + case '(': return (OnigCodePoint )')'; break; + case '{': return (OnigCodePoint )'}'; break; default: break; } @@ -2648,7 +2685,7 @@ #endif /* USE_BACKREF_WITH_LEVEL */ /* - def: 0 -> define name (don't allow number name) + ref: 0 -> define name (don't allow number name) 1 -> reference name (allow number name) */ static int @@ -2883,8 +2920,8 @@ { if (onig_warn == onig_null_warn) return ; - if (IS_SYNTAX_BV((env)->syntax, ONIG_SYN_WARN_CC_OP_NOT_ESCAPED)) { - onig_syntax_warn(env, "regular expression has '%s' without escape", c); + if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_WARN_CC_OP_NOT_ESCAPED)) { + onig_syntax_warn(env, "regular expression has '%s' without escape", c); } } @@ -2893,9 +2930,9 @@ { if (onig_warn == onig_null_warn || !RTEST(ruby_verbose)) return ; - if (IS_SYNTAX_BV((env)->syntax, ONIG_SYN_WARN_CC_DUP) && - !((env)->warnings_flag & ONIG_SYN_WARN_CC_DUP)) { - (env)->warnings_flag |= ONIG_SYN_WARN_CC_DUP; + if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_WARN_CC_DUP) && + !(env->warnings_flag & ONIG_SYN_WARN_CC_DUP)) { + env->warnings_flag |= ONIG_SYN_WARN_CC_DUP; onig_syntax_warn(env, "character class has duplicated range"); } } @@ -3014,32 +3051,32 @@ switch (c) { case 'w': tok->type = TK_CHAR_TYPE; - tok->u.prop.ctype = ONIGENC_CTYPE_W; + tok->u.prop.ctype = ONIGENC_CTYPE_WORD; tok->u.prop.not = 0; break; case 'W': tok->type = TK_CHAR_TYPE; - tok->u.prop.ctype = ONIGENC_CTYPE_W; + tok->u.prop.ctype = ONIGENC_CTYPE_WORD; tok->u.prop.not = 1; break; case 'd': tok->type = TK_CHAR_TYPE; - tok->u.prop.ctype = ONIGENC_CTYPE_D; + tok->u.prop.ctype = ONIGENC_CTYPE_DIGIT; tok->u.prop.not = 0; break; case 'D': tok->type = TK_CHAR_TYPE; - tok->u.prop.ctype = ONIGENC_CTYPE_D; + tok->u.prop.ctype = ONIGENC_CTYPE_DIGIT; tok->u.prop.not = 1; break; case 's': tok->type = TK_CHAR_TYPE; - tok->u.prop.ctype = ONIGENC_CTYPE_S; + tok->u.prop.ctype = ONIGENC_CTYPE_SPACE; tok->u.prop.not = 0; break; case 'S': tok->type = TK_CHAR_TYPE; - tok->u.prop.ctype = ONIGENC_CTYPE_S; + tok->u.prop.ctype = ONIGENC_CTYPE_SPACE; tok->u.prop.not = 1; break; case 'h': @@ -3074,7 +3111,7 @@ } } else { - onig_syntax_warn(env, "invalid Unicode Property \\%c", c); + onig_syntax_warn(env, "invalid Unicode Property \\%c", c); } break; @@ -3084,7 +3121,7 @@ prev = p; if (PPEEK_IS('{') && IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_X_BRACE_HEX8)) { PINC; - num = scan_unsigned_hexadecimal_number(&p, end, 8, enc); + num = scan_unsigned_hexadecimal_number(&p, end, 0, 8, enc); if (num < 0) return ONIGERR_TOO_BIG_WIDE_CHAR_VALUE; if (!PEND) { c2 = PPEEK; @@ -3104,7 +3141,7 @@ } } else if (IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_X_HEX2)) { - num = scan_unsigned_hexadecimal_number(&p, end, 2, enc); + num = scan_unsigned_hexadecimal_number(&p, end, 0, 2, enc); if (num < 0) return ONIGERR_TOO_BIG_NUMBER; if (p == prev) { /* can't read nothing. */ num = 0; /* but, it's not error */ @@ -3120,8 +3157,9 @@ prev = p; if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_U_HEX4)) { - num = scan_unsigned_hexadecimal_number(&p, end, 4, enc); - if (num < 0) return ONIGERR_TOO_BIG_NUMBER; + num = scan_unsigned_hexadecimal_number(&p, end, 4, 4, enc); + if (num < -1) return ONIGERR_TOO_SHORT_DIGITS; + else if (num < 0) return ONIGERR_TOO_BIG_NUMBER; if (p == prev) { /* can't read nothing. */ num = 0; /* but, it's not error */ } @@ -3195,7 +3233,83 @@ return tok->type; } +#ifdef USE_NAMED_GROUP static int +fetch_named_backref_token(OnigCodePoint c, OnigToken* tok, UChar** src, + UChar* end, ScanEnv* env) +{ + int r, num; + OnigEncoding enc = env->enc; + const OnigSyntaxType* syn = env->syntax; + UChar* prev; + UChar* p = *src; + UChar* name_end; + int* backs; + int back_num; + + prev = p; + +#ifdef USE_BACKREF_WITH_LEVEL + name_end = NULL_UCHARP; /* no need. escape gcc warning. */ + r = fetch_name_with_level(c, &p, end, &name_end, + env, &back_num, &tok->u.backref.level); + if (r == 1) tok->u.backref.exist_level = 1; + else tok->u.backref.exist_level = 0; +#else + r = fetch_name(&p, end, &name_end, env, &back_num, 1); +#endif + if (r < 0) return r; + + if (back_num != 0) { + if (back_num < 0) { + back_num = BACKREF_REL_TO_ABS(back_num, env); + if (back_num <= 0) + return ONIGERR_INVALID_BACKREF; + } + + if (IS_SYNTAX_BV(syn, ONIG_SYN_STRICT_CHECK_BACKREF)) { + if (back_num > env->num_mem || + IS_NULL(SCANENV_MEM_NODES(env)[back_num])) + return ONIGERR_INVALID_BACKREF; + } + tok->type = TK_BACKREF; + tok->u.backref.by_name = 0; + tok->u.backref.num = 1; + tok->u.backref.ref1 = back_num; + } + else { + num = onig_name_to_group_numbers(env->reg, prev, name_end, &backs); + if (num <= 0) { + onig_scan_env_set_error_string(env, + ONIGERR_UNDEFINED_NAME_REFERENCE, prev, name_end); + return ONIGERR_UNDEFINED_NAME_REFERENCE; + } + if (IS_SYNTAX_BV(syn, ONIG_SYN_STRICT_CHECK_BACKREF)) { + int i; + for (i = 0; i < num; i++) { + if (backs[i] > env->num_mem || + IS_NULL(SCANENV_MEM_NODES(env)[backs[i]])) + return ONIGERR_INVALID_BACKREF; + } + } + + tok->type = TK_BACKREF; + tok->u.backref.by_name = 1; + if (num == 1) { + tok->u.backref.num = 1; + tok->u.backref.ref1 = backs[0]; + } + else { + tok->u.backref.num = num; + tok->u.backref.refs = backs; + } + } + *src = p; + return 0; +} +#endif + +static int fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env) { int r, num; @@ -3304,68 +3418,74 @@ case 'w': if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_W_WORD)) break; tok->type = TK_CHAR_TYPE; - tok->u.prop.ctype = ONIGENC_CTYPE_W; + tok->u.prop.ctype = ONIGENC_CTYPE_WORD; tok->u.prop.not = 0; break; case 'W': if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_W_WORD)) break; tok->type = TK_CHAR_TYPE; - tok->u.prop.ctype = ONIGENC_CTYPE_W; + tok->u.prop.ctype = ONIGENC_CTYPE_WORD; tok->u.prop.not = 1; break; case 'b': if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_B_WORD_BOUND)) break; tok->type = TK_ANCHOR; - tok->u.anchor = ANCHOR_WORD_BOUND; + tok->u.anchor.subtype = ANCHOR_WORD_BOUND; + tok->u.anchor.ascii_range = IS_ASCII_RANGE(env->option) + && ! IS_WORD_BOUND_ALL_RANGE(env->option); break; case 'B': if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_B_WORD_BOUND)) break; tok->type = TK_ANCHOR; - tok->u.anchor = ANCHOR_NOT_WORD_BOUND; + tok->u.anchor.subtype = ANCHOR_NOT_WORD_BOUND; + tok->u.anchor.ascii_range = IS_ASCII_RANGE(env->option) + && ! IS_WORD_BOUND_ALL_RANGE(env->option); break; #ifdef USE_WORD_BEGIN_END case '<': if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END)) break; tok->type = TK_ANCHOR; - tok->u.anchor = ANCHOR_WORD_BEGIN; + tok->u.anchor.subtype = ANCHOR_WORD_BEGIN; + tok->u.anchor.ascii_range = IS_ASCII_RANGE(env->option); break; case '>': if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END)) break; tok->type = TK_ANCHOR; - tok->u.anchor = ANCHOR_WORD_END; + tok->u.anchor.subtype = ANCHOR_WORD_END; + tok->u.anchor.ascii_range = IS_ASCII_RANGE(env->option); break; #endif case 's': if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_S_WHITE_SPACE)) break; tok->type = TK_CHAR_TYPE; - tok->u.prop.ctype = ONIGENC_CTYPE_S; + tok->u.prop.ctype = ONIGENC_CTYPE_SPACE; tok->u.prop.not = 0; break; case 'S': if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_S_WHITE_SPACE)) break; tok->type = TK_CHAR_TYPE; - tok->u.prop.ctype = ONIGENC_CTYPE_S; + tok->u.prop.ctype = ONIGENC_CTYPE_SPACE; tok->u.prop.not = 1; break; case 'd': if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_D_DIGIT)) break; tok->type = TK_CHAR_TYPE; - tok->u.prop.ctype = ONIGENC_CTYPE_D; + tok->u.prop.ctype = ONIGENC_CTYPE_DIGIT; tok->u.prop.not = 0; break; case 'D': if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_D_DIGIT)) break; tok->type = TK_CHAR_TYPE; - tok->u.prop.ctype = ONIGENC_CTYPE_D; + tok->u.prop.ctype = ONIGENC_CTYPE_DIGIT; tok->u.prop.not = 1; break; @@ -3387,26 +3507,26 @@ if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_AZ_BUF_ANCHOR)) break; begin_buf: tok->type = TK_ANCHOR; - tok->u.subtype = ANCHOR_BEGIN_BUF; + tok->u.anchor.subtype = ANCHOR_BEGIN_BUF; break; case 'Z': if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_AZ_BUF_ANCHOR)) break; tok->type = TK_ANCHOR; - tok->u.subtype = ANCHOR_SEMI_END_BUF; + tok->u.anchor.subtype = ANCHOR_SEMI_END_BUF; break; case 'z': if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_AZ_BUF_ANCHOR)) break; end_buf: tok->type = TK_ANCHOR; - tok->u.subtype = ANCHOR_END_BUF; + tok->u.anchor.subtype = ANCHOR_END_BUF; break; case 'G': if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_CAPITAL_G_BEGIN_ANCHOR)) break; tok->type = TK_ANCHOR; - tok->u.subtype = ANCHOR_BEGIN_POSITION; + tok->u.anchor.subtype = ANCHOR_BEGIN_POSITION; break; case '`': @@ -3425,7 +3545,7 @@ prev = p; if (PPEEK_IS('{') && IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_X_BRACE_HEX8)) { PINC; - num = scan_unsigned_hexadecimal_number(&p, end, 8, enc); + num = scan_unsigned_hexadecimal_number(&p, end, 0, 8, enc); if (num < 0) return ONIGERR_TOO_BIG_WIDE_CHAR_VALUE; if (!PEND) { if (ONIGENC_IS_CODE_XDIGIT(enc, PPEEK)) @@ -3443,7 +3563,7 @@ } } else if (IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_X_HEX2)) { - num = scan_unsigned_hexadecimal_number(&p, end, 2, enc); + num = scan_unsigned_hexadecimal_number(&p, end, 0, 2, enc); if (num < 0) return ONIGERR_TOO_BIG_NUMBER; if (p == prev) { /* can't read nothing. */ num = 0; /* but, it's not error */ @@ -3459,8 +3579,9 @@ prev = p; if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_U_HEX4)) { - num = scan_unsigned_hexadecimal_number(&p, end, 4, enc); - if (num < 0) return ONIGERR_TOO_BIG_NUMBER; + num = scan_unsigned_hexadecimal_number(&p, end, 4, 4, enc); + if (num < -1) return ONIGERR_TOO_SHORT_DIGITS; + else if (num < 0) return ONIGERR_TOO_BIG_NUMBER; if (p == prev) { /* can't read nothing. */ num = 0; /* but, it's not error */ } @@ -3527,98 +3648,69 @@ if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_K_NAMED_BACKREF)) { PFETCH(c); if (c == '<' || c == '\'') { - UChar* name_end; - int* backs; - int back_num; - - prev = p; - -#ifdef USE_BACKREF_WITH_LEVEL - name_end = NULL_UCHARP; /* no need. escape gcc warning. */ - r = fetch_name_with_level((OnigCodePoint )c, &p, end, &name_end, - env, &back_num, &tok->u.backref.level); - if (r == 1) tok->u.backref.exist_level = 1; - else tok->u.backref.exist_level = 0; -#else - r = fetch_name(&p, end, &name_end, env, &back_num, 1); -#endif + r = fetch_named_backref_token(c, tok, &p, end, env); if (r < 0) return r; - - if (back_num != 0) { - if (back_nu (... truncated) -- ML: ruby-changes@q... Info: http://www.atdot.net/~ko1/quickml/