ruby-changes:74413
From: TSUYUSATO <ko1@a...>
Date: Wed, 9 Nov 2022 23:21:58 +0900 (JST)
Subject: [ruby-changes:74413] a1c1fc558a (master): Revert "Refactor field names"
https://git.ruby-lang.org/ruby.git/commit/?id=a1c1fc558a From a1c1fc558a0ee791e91a66cae5c9515679890339 Mon Sep 17 00:00:00 2001 From: TSUYUSATO Kitsune <make.just.on@g...> Date: Tue, 8 Nov 2022 15:13:27 +0900 Subject: Revert "Refactor field names" This reverts commit 1e6673d6bbd2adbf555d82c7c0906ceb148ed6ee. --- include/ruby/onigmo.h | 5 +- regexec.c | 303 ++++++++++++++++++++++++++------------------------ regint.h | 23 ++-- 3 files changed, 169 insertions(+), 162 deletions(-) diff --git a/include/ruby/onigmo.h b/include/ruby/onigmo.h index 40cbedd4df..703f38f590 100644 --- a/include/ruby/onigmo.h +++ b/include/ruby/onigmo.h @@ -744,9 +744,8 @@ typedef struct { https://github.com/ruby/ruby/blob/trunk/include/ruby/onigmo.h#L744 typedef struct { int lower; int upper; - /* These fields are for cache optimization. */ - int base_point; - int inner_point; + int base_num; + int inner_num; } OnigRepeatRange; typedef void (*OnigWarnFunc)(const char* s); diff --git a/regexec.c b/regexec.c index b9f8411faf..0bd4c8a96c 100644 --- a/regexec.c +++ b/regexec.c @@ -233,17 +233,19 @@ onig_get_capture_tree(OnigRegion* region) https://github.com/ruby/ruby/blob/trunk/regexec.c#L233 #ifdef USE_CACHE_MATCH_OPT -static int count_num_cache_index(regex_t* reg) +/* count number of jump-like opcodes for allocation of cache memory. */ +/* return -1 if we cannot optimize the regex matching by using cache. */ +static int count_num_cache_opcode(regex_t* reg, int* table_size) { - UChar* p = reg->p; - UChar* pend = p + reg->used; - LengthType len; - MemNumType mem; + int num = 0; + UChar* p = reg->p; + UChar* pend = p + reg->used; + LengthType len; + MemNumType mem; + MemNumType current_mem = -1; + int current_mem_num = 0; OnigEncoding enc = reg->enc; - int num_cache_index = 0; - MemNumType current_repeat = NO_OUTER_REPEAT; - while (p < pend) { switch (*p++) { case OP_FINISH: @@ -296,10 +298,10 @@ static int count_num_cache_index(regex_t* reg) https://github.com/ruby/ruby/blob/trunk/regexec.c#L298 break; case OP_ANYCHAR_STAR: case OP_ANYCHAR_ML_STAR: - num_cache_index++; break; + num++; *table_size += 1; break; case OP_ANYCHAR_STAR_PEEK_NEXT: case OP_ANYCHAR_ML_STAR_PEEK_NEXT: - p++; num_cache_index++; break; + p++; num++; *table_size += 1; break; case OP_WORD: case OP_NOT_WORD: @@ -332,7 +334,7 @@ static int count_num_cache_index(regex_t* reg) https://github.com/ruby/ruby/blob/trunk/regexec.c#L334 case OP_BACKREF_MULTI: case OP_BACKREF_MULTI_IC: case OP_BACKREF_WITH_LEVEL: - return NUM_CACHE_INDEX_FAIL; + return NUM_CACHE_OPCODE_FAIL; case OP_MEMORY_START: case OP_MEMORY_START_PUSH: @@ -352,45 +354,56 @@ static int count_num_cache_index(regex_t* reg) https://github.com/ruby/ruby/blob/trunk/regexec.c#L354 break; case OP_PUSH: p += SIZE_RELADDR; - num_cache_index++; + num++; + *table_size += 1; break; case OP_POP: break; case OP_PUSH_OR_JUMP_EXACT1: case OP_PUSH_IF_PEEK_NEXT: - p += SIZE_RELADDR + 1; num_cache_index++; break; + p += SIZE_RELADDR + 1; num++; *table_size += 1; break; case OP_REPEAT: case OP_REPEAT_NG: - if (current_repeat != NO_OUTER_REPEAT) { + if (current_mem != -1) { // A nested OP_REPEAT is not yet supported. - return NUM_CACHE_INDEX_FAIL; + return NUM_CACHE_OPCODE_FAIL; } GET_MEMNUM_INC(mem, p); p += SIZE_RELADDR; if (reg->repeat_range[mem].lower == 0) { - num_cache_index++; + num++; + *table_size += 1; } - current_repeat = mem; + reg->repeat_range[mem].base_num = num; + current_mem = mem; + current_mem_num = num; break; case OP_REPEAT_INC: case OP_REPEAT_INC_NG: GET_MEMNUM_INC(mem, p); - if (mem != current_repeat) { + //fprintf(stderr, "OP_REPEAT %d\n", mem); + if (mem != current_mem) { // A lone or invalid OP_REPEAT_INC is found. - return NUM_CACHE_INDEX_FAIL; + return NUM_CACHE_OPCODE_FAIL; } { + int inner_num = num - current_mem_num; OnigRepeatRange *repeat_range = ®->repeat_range[mem]; + repeat_range->inner_num = inner_num; + num -= inner_num; + num += inner_num * repeat_range->lower + (inner_num + 1) * (repeat_range->upper == 0x7fffffff ? 1 : repeat_range->upper - repeat_range->lower); + //fprintf(stderr, "lower %d < upper %d\n", repeat_range->lower, repeat_range->upper); if (repeat_range->lower < repeat_range->upper) { - num_cache_index++; + *table_size += 1; } - current_repeat = NO_OUTER_REPEAT; + current_mem = -1; + current_mem_num = 0; } break; case OP_REPEAT_INC_SG: case OP_REPEAT_INC_NG_SG: // TODO: Support nested OP_REPEAT. - return NUM_CACHE_INDEX_FAIL; + return NUM_CACHE_OPCODE_FAIL; case OP_NULL_CHECK_START: case OP_NULL_CHECK_END: case OP_NULL_CHECK_END_MEMST: @@ -409,21 +422,21 @@ static int count_num_cache_index(regex_t* reg) https://github.com/ruby/ruby/blob/trunk/regexec.c#L422 case OP_PUSH_ABSENT_POS: case OP_ABSENT_END: case OP_ABSENT: - return NUM_CACHE_INDEX_FAIL; + return NUM_CACHE_OPCODE_FAIL; case OP_CALL: case OP_RETURN: - return NUM_CACHE_INDEX_FAIL; + return NUM_CACHE_OPCODE_FAIL; case OP_CONDITION: - return NUM_CACHE_INDEX_FAIL; + return NUM_CACHE_OPCODE_FAIL; case OP_STATE_CHECK_PUSH: case OP_STATE_CHECK_PUSH_OR_JUMP: case OP_STATE_CHECK: case OP_STATE_CHECK_ANYCHAR_STAR: case OP_STATE_CHECK_ANYCHAR_ML_STAR: - return NUM_CACHE_INDEX_FAIL; + return NUM_CACHE_OPCODE_FAIL; case OP_SET_OPTION_PUSH: case OP_SET_OPTION: @@ -432,22 +445,21 @@ static int count_num_cache_index(regex_t* reg) https://github.com/ruby/ruby/blob/trunk/regexec.c#L445 } } - return num_cache_index; + return num; } -static int init_cache_index(regex_t* reg, OnigCacheIndex *cache_index) +static void init_cache_index_table(regex_t* reg, OnigCacheIndex *table) { - UChar* pbegin; - UChar* p = reg->p; - UChar* pend = p + reg->used; - LengthType len; - MemNumType mem; + UChar* pbegin; + UChar* p = reg->p; + UChar* pend = p + reg->used; + LengthType len; + MemNumType mem; + MemNumType current_mem = -1; + int num = 0; + int current_mem_num = 0; OnigEncoding enc = reg->enc; - int num_cache_point = 0; - MemNumType current_repeat = -1; - int current_repeat_base_point = 0; - while (p < pend) { pbegin = p; switch (*p++) { @@ -500,20 +512,20 @@ static int init_cache_index(regex_t* reg, OnigCacheIndex *cache_index) https://github.com/ruby/ruby/blob/trunk/regexec.c#L512 break; case OP_ANYCHAR_STAR: case OP_ANYCHAR_ML_STAR: - cache_index->addr = pbegin; - cache_index->point = num_cache_point - current_repeat_base_point; - cache_index->outer_repeat = current_repeat; - num_cache_point++; - cache_index++; + table->addr = pbegin; + table->num = num - current_mem_num; + table->outer_repeat = current_mem; + num++; + table++; break; case OP_ANYCHAR_STAR_PEEK_NEXT: case OP_ANYCHAR_ML_STAR_PEEK_NEXT: p++; - cache_index->addr = pbegin; - cache_index->point = num_cache_point - current_repeat_base_point; - cache_index->outer_repeat = current_repeat; - num_cache_point++; - cache_index++; + table->addr = pbegin; + table->num = num - current_mem_num; + table->outer_repeat = current_mem; + num++; + table++; break; case OP_WORD: @@ -547,7 +559,7 @@ static int init_cache_index(regex_t* reg, OnigCacheIndex *cache_index) https://github.com/ruby/ruby/blob/trunk/regexec.c#L559 case OP_BACKREF_MULTI: case OP_BACKREF_MULTI_IC: case OP_BACKREF_WITH_LEVEL: - return NUM_CACHE_INDEX_FAIL; + return; case OP_MEMORY_START: case OP_MEMORY_START_PUSH: @@ -567,61 +579,59 @@ static int init_cache_index(regex_t* reg, OnigCacheIndex *cache_index) https://github.com/ruby/ruby/blob/trunk/regexec.c#L579 break; case OP_PUSH: p += SIZE_RELADDR; - cache_index->addr = pbegin; - cache_index->point = num_cache_point - current_repeat_base_point; - cache_index->outer_repeat = current_repeat; - num_cache_point++; - cache_index++; + table->addr = pbegin; + table->num = num - current_mem_num; + table->outer_repeat = current_mem; + num++; + table++; break; case OP_POP: break; case OP_PUSH_OR_JUMP_EXACT1: case OP_PUSH_IF_PEEK_NEXT: p += SIZE_RELADDR + 1; - cache_index->addr = pbegin; - cache_index->point = num_cache_point - current_repeat_base_point; - cache_index->outer_repeat = current_repeat; - num_cache_point++; - cache_index++; + table->addr = pbegin; + table->num = num - current_mem_num; + table->outer_repeat = current_mem; + num++; + table++; break; case OP_REPEAT: case OP_REPEAT_NG: GET_MEMNUM_INC(mem, p); p += SIZE_RELADDR; if (reg->repeat_range[mem].lower == 0) { - cache_index->addr = pbegin; - cache_index->point = num_cache_point - current_repeat_base_point; - cache_index->outer_repeat = current_repeat; - num_cache_point++; - cache_index++; + table->addr = pbegin; + table->num = num - current_mem_num; + table->outer_repeat = mem; + num++; + table++; } - reg->repeat_range[mem].base_point = num_cache_point; - current_repeat = mem; - current_repeat_base_point = num_cache_point; + current_mem = mem; + current_mem_num = num; break; case OP_REPEAT_INC: case OP_REPEAT_INC_NG: - GET_MEMNUM_INC(mem, p); + GET_MEMNUM_INC(mem, p); { - int inner_point = num_cache_point - current_repeat_base_point; + int inner_num = num - current_mem_num; OnigRepeatRange *repeat_range = ®->repeat_range[mem]; if (repeat_range->lower < repeat_range->upper) { - cache_index->addr = pbegin; - cache_index->point = num_cache_point - current_repeat_base_point; - cache_index->outer_repeat = mem; - cache_index++; + table->addr = pbegin; + table->num = num - current_me (... truncated) -- ML: ruby-changes@q... Info: http://www.atdot.net/~ko1/quickml/