[前][次][番号順一覧][スレッド一覧]

ruby-changes:74413

From: TSUYUSATO <ko1@a...>
Date: Wed, 9 Nov 2022 23:21:58 +0900 (JST)
Subject: [ruby-changes:74413] a1c1fc558a (master): Revert "Refactor field names"

https://git.ruby-lang.org/ruby.git/commit/?id=a1c1fc558a

From a1c1fc558a0ee791e91a66cae5c9515679890339 Mon Sep 17 00:00:00 2001
From: TSUYUSATO Kitsune <make.just.on@g...>
Date: Tue, 8 Nov 2022 15:13:27 +0900
Subject: Revert "Refactor field names"

This reverts commit 1e6673d6bbd2adbf555d82c7c0906ceb148ed6ee.
---
 include/ruby/onigmo.h |   5 +-
 regexec.c             | 303 ++++++++++++++++++++++++++------------------------
 regint.h              |  23 ++--
 3 files changed, 169 insertions(+), 162 deletions(-)

diff --git a/include/ruby/onigmo.h b/include/ruby/onigmo.h
index 40cbedd4df..703f38f590 100644
--- a/include/ruby/onigmo.h
+++ b/include/ruby/onigmo.h
@@ -744,9 +744,8 @@ typedef struct { https://github.com/ruby/ruby/blob/trunk/include/ruby/onigmo.h#L744
 typedef struct {
   int lower;
   int upper;
-  /* These fields are for cache optimization. */
-  int base_point;
-  int inner_point;
+  int base_num;
+  int inner_num;
 } OnigRepeatRange;
 
 typedef void (*OnigWarnFunc)(const char* s);
diff --git a/regexec.c b/regexec.c
index b9f8411faf..0bd4c8a96c 100644
--- a/regexec.c
+++ b/regexec.c
@@ -233,17 +233,19 @@ onig_get_capture_tree(OnigRegion* region) https://github.com/ruby/ruby/blob/trunk/regexec.c#L233
 
 #ifdef USE_CACHE_MATCH_OPT
 
-static int count_num_cache_index(regex_t* reg)
+/* count number of jump-like opcodes for allocation of cache memory. */
+/* return -1 if we cannot optimize the regex matching by using cache. */
+static int count_num_cache_opcode(regex_t* reg, int* table_size)
 {
-  UChar*       p = reg->p;
-  UChar*       pend = p + reg->used;
-  LengthType   len;
-  MemNumType   mem;
+  int num = 0;
+  UChar* p = reg->p;
+  UChar* pend = p + reg->used;
+  LengthType len;
+  MemNumType  mem;
+  MemNumType current_mem = -1;
+  int current_mem_num = 0;
   OnigEncoding enc = reg->enc;
 
-  int        num_cache_index = 0;
-  MemNumType current_repeat = NO_OUTER_REPEAT;
-
   while (p < pend) {
     switch (*p++) {
       case OP_FINISH:
@@ -296,10 +298,10 @@ static int count_num_cache_index(regex_t* reg) https://github.com/ruby/ruby/blob/trunk/regexec.c#L298
 	break;
       case OP_ANYCHAR_STAR:
       case OP_ANYCHAR_ML_STAR:
-	num_cache_index++; break;
+	num++; *table_size += 1; break;
       case OP_ANYCHAR_STAR_PEEK_NEXT:
       case OP_ANYCHAR_ML_STAR_PEEK_NEXT:
-	p++; num_cache_index++; break;
+	p++; num++; *table_size += 1; break;
 
       case OP_WORD:
       case OP_NOT_WORD:
@@ -332,7 +334,7 @@ static int count_num_cache_index(regex_t* reg) https://github.com/ruby/ruby/blob/trunk/regexec.c#L334
       case OP_BACKREF_MULTI:
       case OP_BACKREF_MULTI_IC:
       case OP_BACKREF_WITH_LEVEL:
-	return NUM_CACHE_INDEX_FAIL;
+	return NUM_CACHE_OPCODE_FAIL;
 
       case OP_MEMORY_START:
       case OP_MEMORY_START_PUSH:
@@ -352,45 +354,56 @@ static int count_num_cache_index(regex_t* reg) https://github.com/ruby/ruby/blob/trunk/regexec.c#L354
 	break;
       case OP_PUSH:
         p += SIZE_RELADDR;
-	num_cache_index++;
+	num++;
+	*table_size += 1;
 	break;
       case OP_POP:
 	break;
       case OP_PUSH_OR_JUMP_EXACT1:
       case OP_PUSH_IF_PEEK_NEXT:
-	p += SIZE_RELADDR + 1; num_cache_index++; break;
+	p += SIZE_RELADDR + 1; num++; *table_size += 1; break;
       case OP_REPEAT:
       case OP_REPEAT_NG:
-	if (current_repeat != NO_OUTER_REPEAT) {
+	if (current_mem != -1) {
 	  // A nested OP_REPEAT is not yet supported.
-	  return NUM_CACHE_INDEX_FAIL;
+	  return NUM_CACHE_OPCODE_FAIL;
 	}
 	GET_MEMNUM_INC(mem, p);
 	p += SIZE_RELADDR;
 	if (reg->repeat_range[mem].lower == 0) {
-	  num_cache_index++;
+	  num++;
+	  *table_size += 1;
 	}
-	current_repeat = mem;
+	reg->repeat_range[mem].base_num = num;
+	current_mem = mem;
+	current_mem_num = num;
 	break;
       case OP_REPEAT_INC:
       case OP_REPEAT_INC_NG:
         GET_MEMNUM_INC(mem, p);
-	if (mem != current_repeat) {
+	//fprintf(stderr, "OP_REPEAT %d\n", mem);
+	if (mem != current_mem) {
 	  // A lone or invalid OP_REPEAT_INC is found.
-	  return NUM_CACHE_INDEX_FAIL;
+	  return NUM_CACHE_OPCODE_FAIL;
 	}
 	{
+	  int inner_num = num - current_mem_num;
 	  OnigRepeatRange *repeat_range = &reg->repeat_range[mem];
+	  repeat_range->inner_num = inner_num;
+	  num -= inner_num;
+	  num += inner_num * repeat_range->lower + (inner_num + 1) * (repeat_range->upper == 0x7fffffff ? 1 : repeat_range->upper - repeat_range->lower);
+	  //fprintf(stderr, "lower %d < upper %d\n", repeat_range->lower, repeat_range->upper);
 	  if (repeat_range->lower < repeat_range->upper) {
-	    num_cache_index++;
+	    *table_size += 1;
 	  }
-	  current_repeat = NO_OUTER_REPEAT;
+	  current_mem = -1;
+	  current_mem_num = 0;
 	}
 	break;
       case OP_REPEAT_INC_SG:
       case OP_REPEAT_INC_NG_SG:
 	// TODO: Support nested OP_REPEAT.
-	return NUM_CACHE_INDEX_FAIL;
+	return NUM_CACHE_OPCODE_FAIL;
       case OP_NULL_CHECK_START:
       case OP_NULL_CHECK_END:
       case OP_NULL_CHECK_END_MEMST:
@@ -409,21 +422,21 @@ static int count_num_cache_index(regex_t* reg) https://github.com/ruby/ruby/blob/trunk/regexec.c#L422
       case OP_PUSH_ABSENT_POS:
       case OP_ABSENT_END:
       case OP_ABSENT:
-	return NUM_CACHE_INDEX_FAIL;
+	return NUM_CACHE_OPCODE_FAIL;
 
       case OP_CALL:
       case OP_RETURN:
-	return NUM_CACHE_INDEX_FAIL;
+	return NUM_CACHE_OPCODE_FAIL;
 
       case OP_CONDITION:
-	return NUM_CACHE_INDEX_FAIL;
+	return NUM_CACHE_OPCODE_FAIL;
 
       case OP_STATE_CHECK_PUSH:
       case OP_STATE_CHECK_PUSH_OR_JUMP:
       case OP_STATE_CHECK:
       case OP_STATE_CHECK_ANYCHAR_STAR:
       case OP_STATE_CHECK_ANYCHAR_ML_STAR:
-	return NUM_CACHE_INDEX_FAIL;
+	return NUM_CACHE_OPCODE_FAIL;
 
       case OP_SET_OPTION_PUSH:
       case OP_SET_OPTION:
@@ -432,22 +445,21 @@ static int count_num_cache_index(regex_t* reg) https://github.com/ruby/ruby/blob/trunk/regexec.c#L445
     }
   }
 
-  return num_cache_index;
+  return num;
 }
 
-static int init_cache_index(regex_t* reg, OnigCacheIndex *cache_index)
+static void init_cache_index_table(regex_t* reg, OnigCacheIndex *table)
 {
-  UChar*       pbegin;
-  UChar*       p = reg->p;
-  UChar*       pend = p + reg->used;
-  LengthType   len;
-  MemNumType   mem;
+  UChar* pbegin;
+  UChar* p = reg->p;
+  UChar* pend = p + reg->used;
+  LengthType len;
+  MemNumType mem;
+  MemNumType current_mem = -1;
+  int num = 0;
+  int current_mem_num = 0;
   OnigEncoding enc = reg->enc;
 
-  int        num_cache_point = 0;
-  MemNumType current_repeat = -1;
-  int        current_repeat_base_point = 0;
-
   while (p < pend) {
     pbegin = p;
     switch (*p++) {
@@ -500,20 +512,20 @@ static int init_cache_index(regex_t* reg, OnigCacheIndex *cache_index) https://github.com/ruby/ruby/blob/trunk/regexec.c#L512
 	break;
       case OP_ANYCHAR_STAR:
       case OP_ANYCHAR_ML_STAR:
-	cache_index->addr = pbegin;
-	cache_index->point = num_cache_point - current_repeat_base_point;
-	cache_index->outer_repeat = current_repeat;
-	num_cache_point++;
-	cache_index++;
+	table->addr = pbegin;
+	table->num = num - current_mem_num;
+	table->outer_repeat = current_mem;
+	num++;
+	table++;
 	break;
       case OP_ANYCHAR_STAR_PEEK_NEXT:
       case OP_ANYCHAR_ML_STAR_PEEK_NEXT:
 	p++;
-	cache_index->addr = pbegin;
-	cache_index->point = num_cache_point - current_repeat_base_point;
-	cache_index->outer_repeat = current_repeat;
-	num_cache_point++;
-	cache_index++;
+	table->addr = pbegin;
+	table->num = num - current_mem_num;
+	table->outer_repeat = current_mem;
+	num++;
+	table++;
 	break;
 
       case OP_WORD:
@@ -547,7 +559,7 @@ static int init_cache_index(regex_t* reg, OnigCacheIndex *cache_index) https://github.com/ruby/ruby/blob/trunk/regexec.c#L559
       case OP_BACKREF_MULTI:
       case OP_BACKREF_MULTI_IC:
       case OP_BACKREF_WITH_LEVEL:
-	return NUM_CACHE_INDEX_FAIL;
+	return;
 
       case OP_MEMORY_START:
       case OP_MEMORY_START_PUSH:
@@ -567,61 +579,59 @@ static int init_cache_index(regex_t* reg, OnigCacheIndex *cache_index) https://github.com/ruby/ruby/blob/trunk/regexec.c#L579
 	break;
       case OP_PUSH:
         p += SIZE_RELADDR;
-	cache_index->addr = pbegin;
-	cache_index->point = num_cache_point - current_repeat_base_point;
-	cache_index->outer_repeat = current_repeat;
-	num_cache_point++;
-	cache_index++;
+	table->addr = pbegin;
+	table->num = num - current_mem_num;
+	table->outer_repeat = current_mem;
+	num++;
+	table++;
 	break;
       case OP_POP:
 	break;
       case OP_PUSH_OR_JUMP_EXACT1:
       case OP_PUSH_IF_PEEK_NEXT:
 	p += SIZE_RELADDR + 1;
-	cache_index->addr = pbegin;
-	cache_index->point = num_cache_point - current_repeat_base_point;
-	cache_index->outer_repeat = current_repeat;
-	num_cache_point++;
-	cache_index++;
+	table->addr = pbegin;
+	table->num = num - current_mem_num;
+	table->outer_repeat = current_mem;
+	num++;
+	table++;
 	break;
       case OP_REPEAT:
       case OP_REPEAT_NG:
         GET_MEMNUM_INC(mem, p);
 	p += SIZE_RELADDR;
 	if (reg->repeat_range[mem].lower == 0) {
-	  cache_index->addr = pbegin;
-	  cache_index->point = num_cache_point - current_repeat_base_point;
-	  cache_index->outer_repeat = current_repeat;
-	  num_cache_point++;
-	  cache_index++;
+	  table->addr = pbegin;
+	  table->num = num - current_mem_num;
+	  table->outer_repeat = mem;
+	  num++;
+	  table++;
 	}
-	reg->repeat_range[mem].base_point = num_cache_point;
-	current_repeat = mem;
-	current_repeat_base_point = num_cache_point;
+	current_mem = mem;
+	current_mem_num = num;
 	break;
       case OP_REPEAT_INC:
       case OP_REPEAT_INC_NG:
-	GET_MEMNUM_INC(mem, p);
+        GET_MEMNUM_INC(mem, p);
 	{
-	  int inner_point = num_cache_point - current_repeat_base_point;
+	  int inner_num = num - current_mem_num;
 	  OnigRepeatRange *repeat_range = &reg->repeat_range[mem];
 	  if (repeat_range->lower < repeat_range->upper) {
-	    cache_index->addr = pbegin;
-	    cache_index->point = num_cache_point - current_repeat_base_point;
-	    cache_index->outer_repeat = mem;
-	    cache_index++;
+	    table->addr = pbegin;
+	    table->num = num - current_me (... truncated)

--
ML: ruby-changes@q...
Info: http://www.atdot.net/~ko1/quickml/

[前][次][番号順一覧][スレッド一覧]