[前][次][番号順一覧][スレッド一覧]

ruby-changes:74406

From: TSUYUSATO <ko1@a...>
Date: Wed, 9 Nov 2022 23:21:55 +0900 (JST)
Subject: [ruby-changes:74406] 37613fea16 (master): Clear cache on OP_NULL_CHECK_END_MEMST

https://git.ruby-lang.org/ruby.git/commit/?id=37613fea16

From 37613fea1657b1a0732501657275bc03e8e0ebc4 Mon Sep 17 00:00:00 2001
From: TSUYUSATO Kitsune <make.just.on@g...>
Date: Thu, 27 Oct 2022 22:39:56 +0900
Subject: Clear cache on OP_NULL_CHECK_END_MEMST

---
 regexec.c | 112 ++++++++++++++++++++++++++++++++++++++++++++++++++++----------
 1 file changed, 95 insertions(+), 17 deletions(-)

diff --git a/regexec.c b/regexec.c
index 4baf8d32b3..fd308d1883 100644
--- a/regexec.c
+++ b/regexec.c
@@ -1175,7 +1175,6 @@ stack_double(OnigStackType** arg_stk_base, OnigStackType** arg_stk_end, https://github.com/ruby/ruby/blob/trunk/regexec.c#L1175
   }\
 } while (0)
 
-
 static int find_cache_index_table(regex_t* reg, OnigStackType *stk, OnigStackIndex *repeat_stk, OnigCacheIndex* table, int num_cache_table, UChar* p)
 {
   int l = 0, r = num_cache_table - 1, m;
@@ -1197,17 +1196,14 @@ static int find_cache_index_table(regex_t* reg, OnigStackType *stk, OnigStackInd https://github.com/ruby/ruby/blob/trunk/regexec.c#L1196
   }
 
   item = &table[m];
-  //fprintf(stderr, "m = %d, outer_repeat = %d, num = %d\n", item->outer_repeat, item->num);
   if (item->outer_repeat == -1) {
     return item->num;
   }
 
   range = &reg->repeat_range[item->outer_repeat];
-  //fprintf(stderr, "inner_num = %d, lower = %d, upper = %d\n", range->inner_num, range->lower, range->upper);
 
   stkp = &stk[repeat_stk[item->outer_repeat]];
   count = is_inc ? stkp->u.repeat.count - 1 : stkp->u.repeat.count;
-  //fprintf(stderr, "count = %d\n", count);
 
   if (count < range->lower) {
     return range->base_num + range->inner_num * count + item->num;
@@ -1220,6 +1216,64 @@ static int find_cache_index_table(regex_t* reg, OnigStackType *stk, OnigStackInd https://github.com/ruby/ruby/blob/trunk/regexec.c#L1216
   return range->base_num + range->inner_num * range->lower + (range->inner_num + 1) * (count - range->lower) + item->num;
 }
 
+static void reset_match_cache(regex_t* reg, UChar* pbegin, UChar* pend, int pos, uint8_t* match_cache, OnigCacheIndex *table, int num_cache_size, int num_cache_table) {
+  int l = 0, r = num_cache_table - 1, m1, m2;
+  int is_inc = *pend == OP_REPEAT_INC || *pend == OP_REPEAT_INC_NG;
+  OnigCacheIndex *item1, *item2;
+  int k1, k2;
+
+  while (l <= r) {
+    m1 = (l + r) / 2;
+    if (table[m1].addr == pbegin) break;
+    if (table[m1].addr < pbegin) l = m1 + 1;
+    else r = m1 - 1;
+  }
+
+  l = 0, r = num_cache_table - 1;
+  while (l <= r) {
+    m2 = (l + r) / 2;
+    if (table[m2].addr == pend) break;
+    if (table[m2].addr < pend) l = m2 + 1;
+    else r = m2 - 1;
+  }
+
+  if (table[m1].addr < pbegin && m1 + 1 < num_cache_table) m1++;
+  if (table[m2].addr > pend && m2 - 1 > 0) m2--;
+
+  item1 = &table[m1];
+  item2 = &table[m2];
+
+  if (item1->outer_repeat < 0) k1 = item1->num;
+  else k1 = reg->repeat_range[item1->outer_repeat].base_num + item1->num;
+
+  if (item2->outer_repeat < 0) k2 = item2->num;
+  else {
+    OnigRepeatRange *range = &reg->repeat_range[item2->outer_repeat];
+    if (range->upper == 0x7fffffff) k2 = range->base_num + range->inner_num * range->lower + (is_inc ? 0 : 1) + item2->num;
+    else k2 = range->base_num + range->inner_num * range->lower + (range->inner_num + 1) * (range->upper - range->lower - (is_inc ? 1 : 0)) + item2->num;
+  }
+
+  int base = pos * num_cache_size;
+  k1 += base;
+  k2 += base;
+
+  if ((k1 >> 3) == (k2 >> 3)) {
+    match_cache[k1 >> 3] &= ((1 << (8 - (k2 & 7) - 1)) - 1 << ((k2 & 7) + 1)) | ((1 << (k1 & 7)) - 1);
+  } else {
+    int i = k1 >> 3;
+    if (k1 & 7) {
+      match_cache[k1 >> 3] &= (1 << ((k1 & 7) - 1)) - 1;
+      i++;
+    }
+    if (i < (k2 >> 3)) {
+      xmemset(&match_cache[i], 0, (k2 >> 3) - i);
+      if (k2 & 7) {
+        match_cache[k2 >> 3] &= ((1 << (8 - (k2 & 7) - 1)) - 1 << ((k2 & 7) + 1));
+      }
+    }
+  }
+}
+
 #else
 #define DO_CACHE_MATCH_OPT(reg,stk,repeat_stk,enable,p,num_cache_table,num_cache_size,table,pos,match_cache)
 #endif /* USE_CACHE_MATCH_OPT */
@@ -1542,7 +1596,7 @@ static int find_cache_index_table(regex_t* reg, OnigStackType *stk, OnigStackInd https://github.com/ruby/ruby/blob/trunk/regexec.c#L1596
   }\
 } while(0)
 
-#define STACK_NULL_CHECK_MEMST(isnull,id,s,reg) do {\
+#define STACK_NULL_CHECK_MEMST(isnull,ischange,id,s,reg) do {\
   OnigStackType* k = STACK_AT((stk-1)->null_check)+1;\
   while (1) {\
     k--;\
@@ -1559,14 +1613,14 @@ static int find_cache_index_table(regex_t* reg, OnigStackType *stk, OnigStackInd https://github.com/ruby/ruby/blob/trunk/regexec.c#L1613
           while (k < stk) {\
             if (k->type == STK_MEM_START) {\
               if (k->u.mem.end == INVALID_STACK_INDEX) {\
-                (isnull) = 0; break;\
+                (isnull) = 0; (ischange) = 1; break;\
               }\
               if (BIT_STATUS_AT(reg->bt_mem_end, k->u.mem.num))\
                 endp = STACK_AT(k->u.mem.end)->u.mem.pstr;\
               else\
                 endp = (UChar* )k->u.mem.end;\
               if (STACK_AT(k->u.mem.start)->u.mem.pstr != endp) {\
-                (isnull) = 0; break;\
+                (isnull) = 0; (ischange) = 1; break;\
               }\
               else if (endp != s) {\
                 (isnull) = -1; /* empty, but position changed */ \
@@ -2709,7 +2763,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, https://github.com/ruby/ruby/blob/trunk/regexec.c#L2763
 
     CASE(OP_ANYCHAR_STAR)  MOP_IN(OP_ANYCHAR_STAR);
       while (DATA_ENSURE_CHECK1) {
-	DO_CACHE_MATCH_OPT(reg, stk_base, repeat_stk, msa->enable_cache_match_opt, pbegin, msa->num_cache_table, msa->num_cache_opcode, msa->cache_index_table, end - s, msa->match_cache);
+	DO_CACHE_MATCH_OPT(reg, stk_base, repeat_stk, msa->enable_cache_match_opt, pbegin, msa->num_cache_table, msa->num_cache_opcode, msa->cache_index_table, s - str, msa->match_cache);
 	STACK_PUSH_ALT(p, s, sprev, pkeep);
 	n = enclen(encode, s, end);
 	DATA_ENSURE(n);
@@ -2722,7 +2776,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, https://github.com/ruby/ruby/blob/trunk/regexec.c#L2776
 
     CASE(OP_ANYCHAR_ML_STAR)  MOP_IN(OP_ANYCHAR_ML_STAR);
       while (DATA_ENSURE_CHECK1) {
-	DO_CACHE_MATCH_OPT(reg, stk_base, repeat_stk, msa->enable_cache_match_opt, pbegin, msa->num_cache_table, msa->num_cache_opcode, msa->cache_index_table, end - s, msa->match_cache);
+	DO_CACHE_MATCH_OPT(reg, stk_base, repeat_stk, msa->enable_cache_match_opt, pbegin, msa->num_cache_table, msa->num_cache_opcode, msa->cache_index_table, s - str, msa->match_cache);
 	STACK_PUSH_ALT(p, s, sprev, pkeep);
 	n = enclen(encode, s, end);
 	if (n > 1) {
@@ -2757,7 +2811,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, https://github.com/ruby/ruby/blob/trunk/regexec.c#L2811
     CASE(OP_ANYCHAR_ML_STAR_PEEK_NEXT)MOP_IN(OP_ANYCHAR_ML_STAR_PEEK_NEXT);
       while (DATA_ENSURE_CHECK1) {
 	if (*p == *s) {
-	  DO_CACHE_MATCH_OPT(reg, stk_base, repeat_stk, msa->enable_cache_match_opt, pbegin, msa->num_cache_table, msa->num_cache_opcode, msa->cache_index_table, end - s, msa->match_cache);
+	  DO_CACHE_MATCH_OPT(reg, stk_base, repeat_stk, msa->enable_cache_match_opt, pbegin, msa->num_cache_table, msa->num_cache_opcode, msa->cache_index_table, s - str, msa->match_cache);
 	  STACK_PUSH_ALT(p + 1, s, sprev, pkeep);
 	}
 	n = enclen(encode, s, end);
@@ -3344,9 +3398,10 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, https://github.com/ruby/ruby/blob/trunk/regexec.c#L3398
     CASE(OP_NULL_CHECK_END_MEMST)  MOP_IN(OP_NULL_CHECK_END_MEMST);
       {
 	int isnull;
+	int ischanged = 0; // set 1 when a loop is empty but memory status is changed.
 
 	GET_MEMNUM_INC(mem, p); /* mem: null check id */
-	STACK_NULL_CHECK_MEMST(isnull, mem, s, reg);
+	STACK_NULL_CHECK_MEMST(isnull, ischanged, mem, s, reg);
 	if (isnull) {
 # ifdef ONIG_DEBUG_MATCH
 	  fprintf(stderr, "NULL_CHECK_END_MEMST: skip  id:%d, s:%"PRIuPTR" (%p)\n",
@@ -3355,6 +3410,29 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, https://github.com/ruby/ruby/blob/trunk/regexec.c#L3410
 	  if (isnull == -1) goto fail;
 	  goto null_check_found;
 	}
+# ifdef USE_CACHE_MATCH_OPT
+	if (ischanged && msa->enable_cache_match_opt) {
+	  RelAddrType rel;
+	  OnigUChar *addr;
+	  int mem;
+	  UChar* tmp = p;
+	  switch (*tmp++) {
+	  case OP_JUMP:
+	  case OP_PUSH:
+	    GET_RELADDR_INC(rel, tmp);
+	    addr = tmp + rel;
+	    break;
+	  case OP_REPEAT_INC:
+	  case OP_REPEAT_INC_NG:
+	    GET_MEMNUM_INC(mem, tmp);
+	    addr = STACK_AT(repeat_stk[mem])->u.repeat.pcode;
+	    break;
+	  default:
+	    goto unexpected_bytecode_error;
+	  }
+	  reset_match_cache(reg, addr, pbegin, (int)(s - str), msa->match_cache, msa->cache_index_table, msa->num_cache_table ,msa->num_cache_opcode);
+	}
+# endif
       }
       MOP_OUT;
       JUMP;
@@ -3397,7 +3475,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, https://github.com/ruby/ruby/blob/trunk/regexec.c#L3475
 
     CASE(OP_PUSH)  MOP_IN(OP_PUSH);
       GET_RELADDR_INC(addr, p);
-      DO_CACHE_MATCH_OPT(reg, stk_base, repeat_stk, msa->enable_cache_match_opt, pbegin, msa->num_cache_table, msa->num_cache_opcode, msa->cache_index_table, end - s, msa->match_cache);
+      DO_CACHE_MATCH_OPT(reg, stk_base, repeat_stk, msa->enable_cache_match_opt, pbegin, msa->num_cache_table, msa->num_cache_opcode, msa->cache_index_table, s - str, msa->match_cache);
       STACK_PUSH_ALT(p + addr, s, sprev, pkeep);
       MOP_OUT;
       JUMP;
@@ -3451,7 +3529,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, https://github.com/ruby/ruby/blob/trunk/regexec.c#L3529
       GET_RELADDR_INC(addr, p);
       if (*p == *s && DATA_ENSURE_CHECK1) {
 	p++;
-	DO_CACHE_MATCH_OPT(reg, stk_base, repeat_stk, msa->enable_cache_match_opt, pbegin, msa->num_cache_table, msa->num_cache_opcode, msa->cache_index_table, end - s, msa->match_cache);
+	DO_CACHE_MATCH_OPT(reg, stk_base, repeat_stk, msa->enable_cache_match_opt, pbegin, msa->num_cache_table, msa->num_cache_opcode, msa->cache_i (... truncated)

--
ML: ruby-changes@q...
Info: http://www.atdot.net/~ko1/quickml/

[前][次][番号順一覧][スレッド一覧]