ruby-changes:27495
From: naruse <ko1@a...>
Date: Sat, 2 Mar 2013 01:36:50 +0900 (JST)
Subject: [ruby-changes:27495] naruse:r39547 (trunk): * Merge Onigmo 0fe387da2fee089254f6b04990541c731a26757f
naruse 2013-03-02 01:36:37 +0900 (Sat, 02 Mar 2013) New Revision: 39547 http://svn.ruby-lang.org/cgi-bin/viewvc.cgi?view=rev&revision=39547 Log: * Merge Onigmo 0fe387da2fee089254f6b04990541c731a26757f v5.13.3 [Bug#7972] [Bug#7974] Modified files: trunk/ChangeLog trunk/enc/shift_jis.c trunk/include/ruby/oniguruma.h trunk/regcomp.c trunk/regexec.c trunk/regint.h trunk/regparse.c trunk/tool/enc-unicode.rb Index: regparse.c =================================================================== --- regparse.c (revision 39546) +++ regparse.c (revision 39547) @@ -3,7 +3,7 @@ https://github.com/ruby/ruby/blob/trunk/regparse.c#L3 **********************************************************************/ /*- * Copyright (c) 2002-2008 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> - * Copyright (c) 2011-2012 K.Takata <kentkt AT csc DOT jp> + * Copyright (c) 2011-2013 K.Takata <kentkt AT csc DOT jp> * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -156,7 +156,7 @@ bbuf_clone(BBuf** rto, BBuf* from) https://github.com/ruby/ruby/blob/trunk/regparse.c#L156 #define BITSET_IS_EMPTY(bs,empty) do {\ int i;\ empty = 1;\ - for (i = 0; i < (int )BITSET_SIZE; i++) {\ + for (i = 0; i < BITSET_SIZE; i++) {\ if ((bs)[i] != 0) {\ empty = 0; break;\ }\ @@ -185,35 +185,35 @@ static void https://github.com/ruby/ruby/blob/trunk/regparse.c#L185 bitset_invert(BitSetRef bs) { int i; - for (i = 0; i < (int )BITSET_SIZE; i++) { bs[i] = ~(bs[i]); } + for (i = 0; i < BITSET_SIZE; i++) { bs[i] = ~(bs[i]); } } static void bitset_invert_to(BitSetRef from, BitSetRef to) { int i; - for (i = 0; i < (int )BITSET_SIZE; i++) { to[i] = ~(from[i]); } + for (i = 0; i < BITSET_SIZE; i++) { to[i] = ~(from[i]); } } static void bitset_and(BitSetRef dest, BitSetRef bs) { int i; - for (i = 0; i < (int )BITSET_SIZE; i++) { dest[i] &= bs[i]; } + for (i = 0; i < BITSET_SIZE; i++) { dest[i] &= bs[i]; } } static void bitset_or(BitSetRef dest, BitSetRef bs) { int i; - for (i = 0; i < (int )BITSET_SIZE; i++) { dest[i] |= bs[i]; } + for (i = 0; i < BITSET_SIZE; i++) { dest[i] |= bs[i]; } } static void bitset_copy(BitSetRef dest, BitSetRef bs) { int i; - for (i = 0; i < (int )BITSET_SIZE; i++) { dest[i] = bs[i]; } + for (i = 0; i < BITSET_SIZE; i++) { dest[i] = bs[i]; } } extern int @@ -425,9 +425,6 @@ typedef struct { https://github.com/ruby/ruby/blob/trunk/regparse.c#L425 typedef st_table NameTable; typedef st_data_t HashDataType; /* 1.6 st.h doesn't define st_data_t type */ -#define NAMEBUF_SIZE 24 -#define NAMEBUF_SIZE_1 25 - #ifdef ONIG_DEBUG static int i_print_name_entry(UChar* key, NameEntry* e, void* arg) @@ -589,7 +586,7 @@ onig_number_of_names(regex_t* reg) https://github.com/ruby/ruby/blob/trunk/regparse.c#L586 NameTable* t = (NameTable* )reg->name_table; if (IS_NOT_NULL(t)) - return (int)t->num_entries; + return (int )t->num_entries; else return 0; } @@ -2627,7 +2624,7 @@ fetch_name_with_level(OnigCodePoint star https://github.com/ruby/ruby/blob/trunk/regparse.c#L2624 name_end = p; PFETCH(c); if (c == end_code || c == ')' || c == '+' || c == '-') { - if (is_num == 2) r = ONIGERR_INVALID_GROUP_NAME; + if (is_num == 2) r = ONIGERR_INVALID_GROUP_NAME; break; } @@ -2752,7 +2749,7 @@ fetch_name(OnigCodePoint start_code, UCh https://github.com/ruby/ruby/blob/trunk/regparse.c#L2749 name_end = p; PFETCH(c); if (c == end_code || c == ')') { - if (is_num == 2) r = ONIGERR_INVALID_GROUP_NAME; + if (is_num == 2) r = ONIGERR_INVALID_GROUP_NAME; break; } @@ -4124,24 +4121,36 @@ add_ctype_to_cc(CClassNode* cc, int ctyp https://github.com/ruby/ruby/blob/trunk/regparse.c#L4121 r = ONIGENC_GET_CTYPE_CODE_RANGE(enc, ctype, &sb_out, &ranges); if (r == 0) { - r = add_ctype_to_cc_by_range(cc, ctype, not, env, sb_out, ranges); - if ((r == 0) && ascii_range) { - if (not != 0) { - r = add_code_range_to_buf0(&(cc->mbuf), env, 0x80, ONIG_LAST_CODE_POINT, FALSE); - } - else { - CClassNode ccascii; - initialize_cclass(&ccascii); - if (ONIGENC_MBC_MINLEN(env->enc) > 1) { - add_code_range(&(ccascii.mbuf), env, 0x00, 0x7F); + if (ascii_range) { + CClassNode ccwork; + initialize_cclass(&ccwork); + r = add_ctype_to_cc_by_range(&ccwork, ctype, not, env, sb_out, + ranges); + if (r == 0) { + if (not) { + r = add_code_range_to_buf0(&(ccwork.mbuf), env, 0x80, ONIG_LAST_CODE_POINT, FALSE); } else { - bitset_set_range(env, ccascii.bs, 0x00, 0x7F); + CClassNode ccascii; + initialize_cclass(&ccascii); + if (ONIGENC_MBC_MINLEN(env->enc) > 1) { + add_code_range(&(ccascii.mbuf), env, 0x00, 0x7F); + } + else { + bitset_set_range(env, ccascii.bs, 0x00, 0x7F); + } + r = and_cclass(&ccwork, &ccascii, env); + if (IS_NOT_NULL(ccascii.mbuf)) bbuf_free(ccascii.mbuf); + } + if (r == 0) { + r = or_cclass(cc, &ccwork, env); } - r = and_cclass(cc, &ccascii, env); - if (IS_NOT_NULL(ccascii.mbuf)) bbuf_free(ccascii.mbuf); + if (IS_NOT_NULL(ccwork.mbuf)) bbuf_free(ccwork.mbuf); } } + else { + r = add_ctype_to_cc_by_range(cc, ctype, not, env, sb_out, ranges); + } return r; } else if (r != ONIG_NO_SUPPORT_CONFIG) { @@ -4562,7 +4571,7 @@ parse_char_class(Node** np, OnigToken* t https://github.com/ruby/ruby/blob/trunk/regparse.c#L4571 UChar* psave = p; int i, base = tok->base; - buf[0] = tok->u.c; + buf[0] = (UChar )tok->u.c; for (i = 1; i < ONIGENC_MBC_MAXLEN(env->enc); i++) { r = fetch_token_in_cc(tok, &p, end, env); if (r < 0) goto err; @@ -4570,7 +4579,7 @@ parse_char_class(Node** np, OnigToken* t https://github.com/ruby/ruby/blob/trunk/regparse.c#L4579 fetched = 1; break; } - buf[i] = tok->u.c; + buf[i] = (UChar )tok->u.c; } if (i < ONIGENC_MBC_MINLEN(env->enc)) { @@ -4706,7 +4715,7 @@ parse_char_class(Node** np, OnigToken* t https://github.com/ruby/ruby/blob/trunk/regparse.c#L4715 if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_DOUBLE_RANGE_OP_IN_CC)) { CC_ESC_WARN(env, (UChar* )"-"); - goto range_end_val; /* [0-9-a] is allowed as [0-9\-a] */ + goto range_end_val; /* [0-9-a] is allowed as [0-9\-a] */ } r = ONIGERR_UNMATCHED_RANGE_SPECIFIER_IN_CHAR_CLASS; goto err; @@ -5684,7 +5693,7 @@ countbits(unsigned int bits) https://github.com/ruby/ruby/blob/trunk/regparse.c#L5693 static int is_onechar_cclass(CClassNode* cc, OnigCodePoint* code) { - const OnigCodePoint not_found = (OnigCodePoint)-1; + const OnigCodePoint not_found = ONIG_LAST_CODE_POINT; OnigCodePoint c = not_found; int i; BBuf *bbuf = cc->mbuf; @@ -5710,7 +5719,7 @@ is_onechar_cclass(CClassNode* cc, OnigCo https://github.com/ruby/ruby/blob/trunk/regparse.c#L5719 } /* check bitset */ - for (i = 0; i < (int )BITSET_SIZE; i++) { + for (i = 0; i < BITSET_SIZE; i++) { Bits b1 = cc->bs[i]; if (b1 != 0) { if (((b1 & (b1 - 1)) == 0) && (c == not_found)) { Index: regcomp.c =================================================================== --- regcomp.c (revision 39546) +++ regcomp.c (revision 39547) @@ -3,7 +3,7 @@ https://github.com/ruby/ruby/blob/trunk/regcomp.c#L3 **********************************************************************/ /*- * Copyright (c) 2002-2008 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> - * Copyright (c) 2011-2012 K.Takata <kentkt AT csc DOT jp> + * Copyright (c) 2011-2013 K.Takata <kentkt AT csc DOT jp> * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -118,7 +118,7 @@ static int https://github.com/ruby/ruby/blob/trunk/regcomp.c#L118 bitset_is_empty(BitSetRef bs) { int i; - for (i = 0; i < (int )BITSET_SIZE; i++) { + for (i = 0; i < BITSET_SIZE; i++) { if (bs[i] != 0) return 0; } return 1; @@ -4311,7 +4311,7 @@ typedef struct { https://github.com/ruby/ruby/blob/trunk/regcomp.c#L4311 OptAncInfo anc; int reach_end; - int ignore_case; + int ignore_case; /* -1: unset, 0: case sensitive, 1: ignore case */ int len; UChar s[OPT_EXACT_MAXLEN]; } OptExactInfo; @@ -4548,7 +4548,7 @@ clear_opt_exact_info(OptExactInfo* ex) https://github.com/ruby/ruby/blob/trunk/regcomp.c#L4548 clear_mml(&ex->mmd); clear_opt_anc_info(&ex->anc); ex->reach_end = 0; - ex->ignore_case = 0; + ex->ignore_case = -1; /* unset */ ex->len = 0; ex->s[0] = '\0'; } @@ -4566,11 +4566,10 @@ concat_opt_exact_info(OptExactInfo* to, https://github.com/ruby/ruby/blob/trunk/regcomp.c#L4566 UChar *p, *end; OptAncInfo tanc; - if (! to->ignore_case && add->ignore_case) { - if (to->len >= add->len) return ; /* avoid */ - - to->ignore_case = 1; - } + if (to->ignore_case < 0) + to->ignore_case = add->ignore_case; + else if (to->ignore_case != add->ignore_case) + return ; /* avoid */ p = add->s; end = p + add->len; @@ -4636,7 +4635,10 @@ alt_merge_opt_exact_info(OptExactInfo* t https://github.com/ruby/ruby/blob/trunk/regcomp.c#L4635 to->reach_end = 0; } to->len = i; - to->ignore_case |= add->ignore_case; + if (to->ignore_case < 0) + to->ignore_case = add->ignore_case; + else if (add->ignore_case >= 0) + to->ignore_case |= add->ignore_case; alt_merge_opt_anc_info(&to->anc, &add->anc); if (! to->reach_end) to->anc.right_anchor = 0; @@ -4666,8 +4668,8 @@ select_opt_exact_info(OnigEncoding enc, https://github.com/ruby/ruby/blob/trunk/regcomp.c#L4668 if (alt->len > 1) v2 += 5; } - if (now->ignore_case == 0) v1 *= 2; - if (alt->ignore_case == 0) v2 *= 2; + if (now->ignore_case <= 0) v1 *= 2; + if (alt->ignore_case <= 0) v2 *= 2; if (comp_distance_value(&now->mmd, &alt->mmd, v1, v2) > 0) copy_opt_exact_info(now, alt); @@ -4765,7 +4767,7 @@ comp_opt_exact_or_map_info(OptExactInfo* https://github.com/ruby/ruby/blob/trunk/regcomp.c#L4767 if (m->value <= 0) return -1; - ve = COMP_EM_BASE * e->len * (e->ignore_case ? 1 : 2); + ve = COMP_EM_BASE * e->len * (e->ignore_case > 0 ? 1 : 2); vm = COMP_EM_BASE * 5 * 2 / m->value; return comp_distance_value(&e->mmd, &m->mmd, ve, vm); } @@ -4947,7 +4949,8 @@ optimize_node_left(Node* node, NodeOptIn https://github.com/ruby/ruby/blob/trunk/regcomp.c#L4949 if (! NSTRING_IS_AMBIG(node)) { concat_opt_exact_info_str(&opt->exb, sn->s, sn->end, - NSTRING_IS_RAW(node), env->enc); + is_raw, env->enc); + opt->exb.ignore_case = 0; if (slen > 0) { add_char_opt_map_info(&opt->map, *(sn->s), env->enc); } @@ -5260,7 +5263,7 @@ set_optimize_exact_info(regex_t* reg, Op https://github.com/ruby/ruby/blob/trunk/regcomp.c#L5263 allow_reverse = ONIGENC_IS_ALLOWED_REVERSE_MATCH(reg->enc, reg->exact, reg->exact_end); - if (e->ignore_case) { + if (e->ignore_case > 0) { if (e->len >= 3 || (e->len >= 2 && allow_reverse)) { r = set_bm_skip(reg->exact, reg->exact_end, reg, reg->map, &(reg->int_map), 1); Index: include/ruby/oniguruma.h =================================================================== --- include/ruby/oniguruma.h (revision 39546) +++ include/ruby/oniguruma.h (revision 39547) @@ -40,7 +40,7 @@ extern "C" { https://github.com/ruby/ruby/blob/trunk/include/ruby/oniguruma.h#L40 #define ONIGURUMA #define ONIGURUMA_VERSION_MAJOR 5 #define ONIGURUMA_VERSION_MINOR 13 -#define ONIGURUMA_VERSION_TEENY 1 +#define ONIGURUMA_VERSION_TEENY 3 #ifdef __cplusplus # ifndef HAVE_PROTOTYPES Index: ChangeLog =================================================================== --- ChangeLog (revision 39546) +++ ChangeLog (revision 39547) @@ -1,3 +1,8 @@ https://github.com/ruby/ruby/blob/trunk/ChangeLog#L1 +Sat Mar 2 01:33:17 2013 NARUSE, Yui <naruse@r...> + + * Merge Onigmo 0fe387da2fee089254f6b04990541c731a26757f + v5.13.3 [Bug#7972] [Bug#7974] + Fri Mar 1 11:09:06 2013 Eric Hodel <drbrain@s...> * lib/fileutils.rb: Revert r34669 which altered the way Index: enc/shift_jis.c =================================================================== --- enc/shift_jis.c (revision 39546) +++ enc/shift_jis.c (revision 39547) @@ -231,7 +231,7 @@ code_to_mbclen(OnigCodePoint code, OnigE https://github.com/ruby/ruby/blob/trunk/enc/shift_jis.c#L231 } else if (code <= 0xffff) { int low = code & 0xff; - if (low < 0x40 || low == 0x7f || 0xfc < low) + if (! SJIS_ISMB_TRAIL(low)) return ONIGERR_INVALID_CODE_POINT_VALUE; return 2; } Index: regint.h =================================================================== --- regint.h (revision 39546) +++ regint.h (revision 39547) @@ -390,7 +390,7 @@ typedef unsigned int BitStatusType; https://github.com/ruby/ruby/blob/trunk/regint.h#L390 /* bitset */ #define BITS_PER_BYTE 8 #define SINGLE_BYTE_SIZE (1 << BITS_PER_BYTE) -#define BITS_IN_ROOM ((int)sizeof(Bits) * BITS_PER_BYTE) +#define BITS_IN_ROOM ((int )sizeof(Bits) * BITS_PER_BYTE) #define BITSET_SIZE (SINGLE_BYTE_SIZE / BITS_IN_ROOM) #ifdef PLATFORM_UNALIGNED_WORD_ACCESS @@ -405,11 +405,11 @@ typedef Bits* BitSetRef; https://github.com/ruby/ruby/blob/trunk/regint.h#L405 #define BITSET_CLEAR(bs) do {\ int i;\ - for (i = 0; i < (int )BITSET_SIZE; i++) { (bs)[i] = 0; } \ + for (i = 0; i < BITSET_SIZE; i++) { (bs)[i] = 0; } \ } while (0) -#define BS_ROOM(bs,pos) (bs)[(int)(pos) / BITS_IN_ROOM] -#define BS_BIT(pos) (1 << ((int)(pos) % BITS_IN_ROOM)) +#define BS_ROOM(bs,pos) (bs)[(int )(pos) / BITS_IN_ROOM] +#define BS_BIT(pos) (1 << ((int )(pos) % BITS_IN_ROOM)) #define BITSET_AT(bs, pos) (BS_ROOM(bs,pos) & BS_BIT(pos)) #define BITSET_SET_BIT(bs, pos) BS_ROOM(bs,pos) |= BS_BIT(pos) @@ -457,7 +457,7 @@ typedef struct _BBuf { https://github.com/ruby/ruby/blob/trunk/regint.h#L457 #define BBUF_WRITE1(buf,pos,byte) do{\ int used = (pos) + 1;\ if ((buf)->alloc < (unsigned int )used) BBUF_EXPAND((buf),used);\ - (buf)->p[(pos)] = (byte);\ + (buf)->p[(pos)] = (UChar )(byte);\ if ((buf)->used < (unsigned int )used) (buf)->used = used;\ } while (0) Index: regexec.c =================================================================== --- regexec.c (revision 39546) +++ regexec.c (revision 39547) @@ -2559,7 +2559,7 @@ match_at(regex_t* reg, const UChar* str, https://github.com/ruby/ruby/blob/trunk/regexec.c#L2559 (int )mem, (intptr_t )s, s); #endif if (isnull == -1) goto fail; - goto null_check_found; + goto null_check_found; } } MOP_OUT; @@ -2585,7 +2585,7 @@ match_at(regex_t* reg, const UChar* str, https://github.com/ruby/ruby/blob/trunk/regexec.c#L2585 (int )mem, (intptr_t )s, s); #endif if (isnull == -1) goto fail; - goto null_check_found; + goto null_check_found; } else { STACK_PUSH_NULL_CHECK_END(mem); Index: tool/enc-unicode.rb =================================================================== --- tool/enc-unicode.rb (revision 39546) +++ tool/enc-unicode.rb (revision 39547) @@ -225,13 +225,20 @@ def parse_block(data) https://github.com/ruby/ruby/blob/trunk/tool/enc-unicode.rb#L225 blocks << constname end +# shim for Ruby 1.8 +unless {}.respond_to?(:key) + class Hash + alias key index + end +end + $const_cache = {} # make_const(property, pairs, name): Prints a 'static const' structure for a # given property, group of paired codepoints, and a human-friendly name for # the group def make_const(prop, data, name) puts "\n/* '#{prop}': #{name} */" - if origprop = $const_cache.index(data) # don't use Hash#key because it is 1.9 feature + if origprop = $const_cache.key(data) puts "#define CR_#{prop} CR_#{origprop}" else $const_cache[prop] = data -- ML: ruby-changes@q... Info: http://www.atdot.net/~ko1/quickml/