ruby-changes:74419
From: TSUYUSATO <ko1@a...>
Date: Wed, 9 Nov 2022 23:22:05 +0900 (JST)
Subject: [ruby-changes:74419] 22294731a8 (master): Refactor field names
https://git.ruby-lang.org/ruby.git/commit/?id=22294731a8 From 22294731a82642ac63ed6708e0015361d05d6677 Mon Sep 17 00:00:00 2001 From: TSUYUSATO Kitsune <make.just.on@g...> Date: Tue, 8 Nov 2022 15:05:52 +0900 Subject: Refactor field names --- include/ruby/onigmo.h | 5 +- regexec.c | 303 ++++++++++++++++++++++++-------------------------- regint.h | 23 ++-- 3 files changed, 162 insertions(+), 169 deletions(-) diff --git a/include/ruby/onigmo.h b/include/ruby/onigmo.h index 703f38f590..40cbedd4df 100644 --- a/include/ruby/onigmo.h +++ b/include/ruby/onigmo.h @@ -744,8 +744,9 @@ typedef struct { https://github.com/ruby/ruby/blob/trunk/include/ruby/onigmo.h#L744 typedef struct { int lower; int upper; - int base_num; - int inner_num; + /* These fields are for cache optimization. */ + int base_point; + int inner_point; } OnigRepeatRange; typedef void (*OnigWarnFunc)(const char* s); diff --git a/regexec.c b/regexec.c index 0bd4c8a96c..b9f8411faf 100644 --- a/regexec.c +++ b/regexec.c @@ -233,19 +233,17 @@ onig_get_capture_tree(OnigRegion* region) https://github.com/ruby/ruby/blob/trunk/regexec.c#L233 #ifdef USE_CACHE_MATCH_OPT -/* count number of jump-like opcodes for allocation of cache memory. */ -/* return -1 if we cannot optimize the regex matching by using cache. */ -static int count_num_cache_opcode(regex_t* reg, int* table_size) +static int count_num_cache_index(regex_t* reg) { - int num = 0; - UChar* p = reg->p; - UChar* pend = p + reg->used; - LengthType len; - MemNumType mem; - MemNumType current_mem = -1; - int current_mem_num = 0; + UChar* p = reg->p; + UChar* pend = p + reg->used; + LengthType len; + MemNumType mem; OnigEncoding enc = reg->enc; + int num_cache_index = 0; + MemNumType current_repeat = NO_OUTER_REPEAT; + while (p < pend) { switch (*p++) { case OP_FINISH: @@ -298,10 +296,10 @@ static int count_num_cache_opcode(regex_t* reg, int* table_size) https://github.com/ruby/ruby/blob/trunk/regexec.c#L296 break; case OP_ANYCHAR_STAR: case OP_ANYCHAR_ML_STAR: - num++; *table_size += 1; break; + num_cache_index++; break; case OP_ANYCHAR_STAR_PEEK_NEXT: case OP_ANYCHAR_ML_STAR_PEEK_NEXT: - p++; num++; *table_size += 1; break; + p++; num_cache_index++; break; case OP_WORD: case OP_NOT_WORD: @@ -334,7 +332,7 @@ static int count_num_cache_opcode(regex_t* reg, int* table_size) https://github.com/ruby/ruby/blob/trunk/regexec.c#L332 case OP_BACKREF_MULTI: case OP_BACKREF_MULTI_IC: case OP_BACKREF_WITH_LEVEL: - return NUM_CACHE_OPCODE_FAIL; + return NUM_CACHE_INDEX_FAIL; case OP_MEMORY_START: case OP_MEMORY_START_PUSH: @@ -354,56 +352,45 @@ static int count_num_cache_opcode(regex_t* reg, int* table_size) https://github.com/ruby/ruby/blob/trunk/regexec.c#L352 break; case OP_PUSH: p += SIZE_RELADDR; - num++; - *table_size += 1; + num_cache_index++; break; case OP_POP: break; case OP_PUSH_OR_JUMP_EXACT1: case OP_PUSH_IF_PEEK_NEXT: - p += SIZE_RELADDR + 1; num++; *table_size += 1; break; + p += SIZE_RELADDR + 1; num_cache_index++; break; case OP_REPEAT: case OP_REPEAT_NG: - if (current_mem != -1) { + if (current_repeat != NO_OUTER_REPEAT) { // A nested OP_REPEAT is not yet supported. - return NUM_CACHE_OPCODE_FAIL; + return NUM_CACHE_INDEX_FAIL; } GET_MEMNUM_INC(mem, p); p += SIZE_RELADDR; if (reg->repeat_range[mem].lower == 0) { - num++; - *table_size += 1; + num_cache_index++; } - reg->repeat_range[mem].base_num = num; - current_mem = mem; - current_mem_num = num; + current_repeat = mem; break; case OP_REPEAT_INC: case OP_REPEAT_INC_NG: GET_MEMNUM_INC(mem, p); - //fprintf(stderr, "OP_REPEAT %d\n", mem); - if (mem != current_mem) { + if (mem != current_repeat) { // A lone or invalid OP_REPEAT_INC is found. - return NUM_CACHE_OPCODE_FAIL; + return NUM_CACHE_INDEX_FAIL; } { - int inner_num = num - current_mem_num; OnigRepeatRange *repeat_range = ®->repeat_range[mem]; - repeat_range->inner_num = inner_num; - num -= inner_num; - num += inner_num * repeat_range->lower + (inner_num + 1) * (repeat_range->upper == 0x7fffffff ? 1 : repeat_range->upper - repeat_range->lower); - //fprintf(stderr, "lower %d < upper %d\n", repeat_range->lower, repeat_range->upper); if (repeat_range->lower < repeat_range->upper) { - *table_size += 1; + num_cache_index++; } - current_mem = -1; - current_mem_num = 0; + current_repeat = NO_OUTER_REPEAT; } break; case OP_REPEAT_INC_SG: case OP_REPEAT_INC_NG_SG: // TODO: Support nested OP_REPEAT. - return NUM_CACHE_OPCODE_FAIL; + return NUM_CACHE_INDEX_FAIL; case OP_NULL_CHECK_START: case OP_NULL_CHECK_END: case OP_NULL_CHECK_END_MEMST: @@ -422,21 +409,21 @@ static int count_num_cache_opcode(regex_t* reg, int* table_size) https://github.com/ruby/ruby/blob/trunk/regexec.c#L409 case OP_PUSH_ABSENT_POS: case OP_ABSENT_END: case OP_ABSENT: - return NUM_CACHE_OPCODE_FAIL; + return NUM_CACHE_INDEX_FAIL; case OP_CALL: case OP_RETURN: - return NUM_CACHE_OPCODE_FAIL; + return NUM_CACHE_INDEX_FAIL; case OP_CONDITION: - return NUM_CACHE_OPCODE_FAIL; + return NUM_CACHE_INDEX_FAIL; case OP_STATE_CHECK_PUSH: case OP_STATE_CHECK_PUSH_OR_JUMP: case OP_STATE_CHECK: case OP_STATE_CHECK_ANYCHAR_STAR: case OP_STATE_CHECK_ANYCHAR_ML_STAR: - return NUM_CACHE_OPCODE_FAIL; + return NUM_CACHE_INDEX_FAIL; case OP_SET_OPTION_PUSH: case OP_SET_OPTION: @@ -445,21 +432,22 @@ static int count_num_cache_opcode(regex_t* reg, int* table_size) https://github.com/ruby/ruby/blob/trunk/regexec.c#L432 } } - return num; + return num_cache_index; } -static void init_cache_index_table(regex_t* reg, OnigCacheIndex *table) +static int init_cache_index(regex_t* reg, OnigCacheIndex *cache_index) { - UChar* pbegin; - UChar* p = reg->p; - UChar* pend = p + reg->used; - LengthType len; - MemNumType mem; - MemNumType current_mem = -1; - int num = 0; - int current_mem_num = 0; + UChar* pbegin; + UChar* p = reg->p; + UChar* pend = p + reg->used; + LengthType len; + MemNumType mem; OnigEncoding enc = reg->enc; + int num_cache_point = 0; + MemNumType current_repeat = -1; + int current_repeat_base_point = 0; + while (p < pend) { pbegin = p; switch (*p++) { @@ -512,20 +500,20 @@ static void init_cache_index_table(regex_t* reg, OnigCacheIndex *table) https://github.com/ruby/ruby/blob/trunk/regexec.c#L500 break; case OP_ANYCHAR_STAR: case OP_ANYCHAR_ML_STAR: - table->addr = pbegin; - table->num = num - current_mem_num; - table->outer_repeat = current_mem; - num++; - table++; + cache_index->addr = pbegin; + cache_index->point = num_cache_point - current_repeat_base_point; + cache_index->outer_repeat = current_repeat; + num_cache_point++; + cache_index++; break; case OP_ANYCHAR_STAR_PEEK_NEXT: case OP_ANYCHAR_ML_STAR_PEEK_NEXT: p++; - table->addr = pbegin; - table->num = num - current_mem_num; - table->outer_repeat = current_mem; - num++; - table++; + cache_index->addr = pbegin; + cache_index->point = num_cache_point - current_repeat_base_point; + cache_index->outer_repeat = current_repeat; + num_cache_point++; + cache_index++; break; case OP_WORD: @@ -559,7 +547,7 @@ static void init_cache_index_table(regex_t* reg, OnigCacheIndex *table) https://github.com/ruby/ruby/blob/trunk/regexec.c#L547 case OP_BACKREF_MULTI: case OP_BACKREF_MULTI_IC: case OP_BACKREF_WITH_LEVEL: - return; + return NUM_CACHE_INDEX_FAIL; case OP_MEMORY_START: case OP_MEMORY_START_PUSH: @@ -579,59 +567,61 @@ static void init_cache_index_table(regex_t* reg, OnigCacheIndex *table) https://github.com/ruby/ruby/blob/trunk/regexec.c#L567 break; case OP_PUSH: p += SIZE_RELADDR; - table->addr = pbegin; - table->num = num - current_mem_num; - table->outer_repeat = current_mem; - num++; - table++; + cache_index->addr = pbegin; + cache_index->point = num_cache_point - current_repeat_base_point; + cache_index->outer_repeat = current_repeat; + num_cache_point++; + cache_index++; break; case OP_POP: break; case OP_PUSH_OR_JUMP_EXACT1: case OP_PUSH_IF_PEEK_NEXT: p += SIZE_RELADDR + 1; - table->addr = pbegin; - table->num = num - current_mem_num; - table->outer_repeat = current_mem; - num++; - table++; + cache_index->addr = pbegin; + cache_index->point = num_cache_point - current_repeat_base_point; + cache_index->outer_repeat = current_repeat; + num_cache_point++; + cache_index++; break; case OP_REPEAT: case OP_REPEAT_NG: GET_MEMNUM_INC(mem, p); p += SIZE_RELADDR; if (reg->repeat_range[mem].lower == 0) { - table->addr = pbegin; - table->num = num - current_mem_num; - table->outer_repeat = mem; - num++; - table++; + cache_index->addr = pbegin; + cache_index->point = num_cache_point - current_repeat_base_point; + cache_index->outer_repeat = current_repeat; + num_cache_point++; + cache_index++; } - current_mem = mem; - current_mem_num = num; + reg->repeat_range[mem].base_point = num_cache_point; + current_repeat = mem; + current_repeat_base_point = num_cache_point; break; case OP_REPEAT_INC: case OP_REPEAT_INC_NG: - GET_MEMNUM_INC(mem, p); + GET_MEMNUM_INC(mem, p); { - int inner_num = num - current_mem_num; + int inner_point = num_cache_point - current_repeat_base_point; OnigRepeatRange *repeat_range = ®->repeat_range[mem]; if (repeat_range->lower < repeat_range->upper) { - table->addr = pbegin; - table->num = num - current_mem_num; - table->outer_repeat = mem; - table++; + cache_index->addr = pbegin; + cache_index->point = num_cache_point - current_re (... truncated) -- ML: ruby-changes@q... Info: http://www.atdot.net/~ko1/quickml/