[前][次][番号順一覧][スレッド一覧]

ruby-changes:58235

From: Sutou <ko1@a...>
Date: Mon, 14 Oct 2019 12:41:09 +0900 (JST)
Subject: [ruby-changes:58235] 95c420c4a6 (master): Import StringScanner 1.0.3 (#2553)

https://git.ruby-lang.org/ruby.git/commit/?id=95c420c4a6

From 95c420c4a65ca2e7f3edf27134ad33691959296c Mon Sep 17 00:00:00 2001
From: Sutou Kouhei <kou@c...>
Date: Mon, 14 Oct 2019 12:40:50 +0900
Subject: Import StringScanner 1.0.3 (#2553)


diff --git a/NEWS b/NEWS
index d265233..a3283a6 100644
--- a/NEWS
+++ b/NEWS
@@ -416,6 +416,11 @@ RubyGems:: https://github.com/ruby/ruby/blob/trunk/NEWS#L416
   * Upgrade to RubyGems 3.1.0.pre1
     Bundled from https://github.com/rubygems/rubygems/commit/97b264f0fa248c864b6ee9a23d3ff1cdd217dddb
 
+StringScanner::
+
+  * Upgrade to 1.0.3.
+    See https://github.com/ruby/strscan/blob/master/NEWS.md.
+
 === Compatibility issues (excluding feature bug fixes)
 
 * Removed unmaintained libraries.
diff --git a/ext/strscan/extconf.rb b/ext/strscan/extconf.rb
index 714fa99..f0ecbf8 100644
--- a/ext/strscan/extconf.rb
+++ b/ext/strscan/extconf.rb
@@ -1,4 +1,5 @@ https://github.com/ruby/ruby/blob/trunk/ext/strscan/extconf.rb#L1
 # frozen_string_literal: true
 require 'mkmf'
-$INCFLAGS << " -I$(top_srcdir)"
+$INCFLAGS << " -I$(top_srcdir)" if $extmk
+have_func("onig_region_memsize", "ruby.h")
 create_makefile 'strscan'
diff --git a/ext/strscan/strscan.c b/ext/strscan/strscan.c
index 77a36fe..99d6992 100644
--- a/ext/strscan/strscan.c
+++ b/ext/strscan/strscan.c
@@ -11,9 +11,18 @@ https://github.com/ruby/ruby/blob/trunk/ext/strscan/strscan.c#L11
 #include "ruby/ruby.h"
 #include "ruby/re.h"
 #include "ruby/encoding.h"
-#include "regint.h"
 
-#define STRSCAN_VERSION "0.7.0"
+#ifdef RUBY_EXTCONF_H
+#  include RUBY_EXTCONF_H
+#endif
+
+#ifdef HAVE_ONIG_REGION_MEMSIZE
+extern size_t onig_region_memsize(const struct re_registers *regs);
+#endif
+
+#include <stdbool.h>
+
+#define STRSCAN_VERSION "1.0.3"
 
 /* =======================================================================
                          Data Type Definitions
@@ -41,6 +50,9 @@ struct strscanner https://github.com/ruby/ruby/blob/trunk/ext/strscan/strscan.c#L50
 
     /* regexp used for last scan */
     VALUE regex;
+
+    /* anchor mode */
+    bool fixed_anchor_p;
 };
 
 #define MATCHED_P(s)          ((s)->flags & FLAG_MATCHED)
@@ -186,7 +198,11 @@ static size_t https://github.com/ruby/ruby/blob/trunk/ext/strscan/strscan.c#L198
 strscan_memsize(const void *ptr)
 {
     const struct strscanner *p = ptr;
-    return sizeof(*p) - sizeof(p->regs) + onig_region_memsize(&p->regs);
+    size_t size = sizeof(*p) - sizeof(p->regs);
+#ifdef HAVE_ONIG_REGION_MEMSIZE
+    size += onig_region_memsize(&p->regs);
+#endif
+    return size;
 }
 
 static const rb_data_type_t strscanner_type = {
@@ -208,19 +224,41 @@ strscan_s_allocate(VALUE klass) https://github.com/ruby/ruby/blob/trunk/ext/strscan/strscan.c#L224
 }
 
 /*
- * call-seq: StringScanner.new(string, dup = false)
+ * call-seq:
+ *    StringScanner.new(string, fixed_anchor: false)
+ *    StringScanner.new(string, dup = false)
  *
  * Creates a new StringScanner object to scan over the given +string+.
+ *
+ * If +fixed_anchor+ is +true+, +\A+ always matches the beginning of
+ * the string. Otherwise, +\A+ always matches the current position.
+ *
  * +dup+ argument is obsolete and not used now.
  */
 static VALUE
 strscan_initialize(int argc, VALUE *argv, VALUE self)
 {
     struct strscanner *p;
-    VALUE str, need_dup;
+    VALUE str, options;
 
     p = check_strscan(self);
-    rb_scan_args(argc, argv, "11", &str, &need_dup);
+    rb_scan_args(argc, argv, "11", &str, &options);
+    options = rb_check_hash_type(options);
+    if (!NIL_P(options)) {
+        VALUE fixed_anchor;
+        ID keyword_ids[1];
+        keyword_ids[0] = rb_intern("fixed_anchor");
+        rb_get_kwargs(options, keyword_ids, 0, 1, &fixed_anchor);
+        if (fixed_anchor == Qundef) {
+            p->fixed_anchor_p = false;
+        }
+        else {
+            p->fixed_anchor_p = RTEST(fixed_anchor);
+        }
+    }
+    else {
+        p->fixed_anchor_p = false;
+    }
     StringValue(str);
     p->str = str;
 
@@ -294,7 +332,7 @@ strscan_reset(VALUE self) https://github.com/ruby/ruby/blob/trunk/ext/strscan/strscan.c#L332
  *   terminate
  *   clear
  *
- * Set the scan pointer to the end of the string and clear matching data.
+ * Sets the scan pointer to the end of the string and clear matching data.
  */
 static VALUE
 strscan_terminate(VALUE self)
@@ -425,7 +463,7 @@ strscan_get_charpos(VALUE self) https://github.com/ruby/ruby/blob/trunk/ext/strscan/strscan.c#L463
 /*
  * call-seq: pos=(n)
  *
- * Set the byte position of the scan pointer.
+ * Sets the byte position of the scan pointer.
  *
  *   s = StringScanner.new('test string')
  *   s.pos = 7            # -> 7
@@ -446,16 +484,79 @@ strscan_set_pos(VALUE self, VALUE v) https://github.com/ruby/ruby/blob/trunk/ext/strscan/strscan.c#L484
     return INT2NUM(i);
 }
 
+static inline UChar *
+match_target(struct strscanner *p)
+{
+    if (p->fixed_anchor_p) {
+        return (UChar *)S_PBEG(p);
+    }
+    else
+    {
+        return (UChar *)CURPTR(p);
+    }
+}
+
+static inline void
+set_registers(struct strscanner *p, size_t length)
+{
+    onig_region_clear(&(p->regs));
+    if (p->fixed_anchor_p) {
+        onig_region_set(&(p->regs), 0, p->curr, p->curr + length);
+    }
+    else
+    {
+        onig_region_set(&(p->regs), 0, 0, length);
+    }
+}
+
+static inline void
+succ(struct strscanner *p)
+{
+    if (p->fixed_anchor_p) {
+        p->curr = p->regs.end[0];
+    }
+    else
+    {
+        p->curr += p->regs.end[0];
+    }
+}
+
+static inline long
+last_match_length(struct strscanner *p)
+{
+    if (p->fixed_anchor_p) {
+        return p->regs.end[0] - p->prev;
+    }
+    else
+    {
+        return p->regs.end[0];
+    }
+}
+
+static inline long
+adjust_register_position(struct strscanner *p, long position)
+{
+    if (p->fixed_anchor_p) {
+        return position;
+    }
+    else {
+        return p->prev + position;
+    }
+}
+
 static VALUE
-strscan_do_scan(VALUE self, VALUE regex, int succptr, int getstr, int headonly)
+strscan_do_scan(VALUE self, VALUE pattern, int succptr, int getstr, int headonly)
 {
-    regex_t *rb_reg_prepare_re(VALUE re, VALUE str);
     struct strscanner *p;
-    regex_t *re;
-    long ret;
-    int tmpreg;
 
-    Check_Type(regex, T_REGEXP);
+    if (headonly) {
+        if (!RB_TYPE_P(pattern, T_REGEXP)) {
+            StringValue(pattern);
+        }
+    }
+    else {
+        Check_Type(pattern, T_REGEXP);
+    }
     GET_SCANNER(self, p);
 
     CLEAR_MATCH_STATUS(p);
@@ -463,49 +564,76 @@ strscan_do_scan(VALUE self, VALUE regex, int succptr, int getstr, int headonly) https://github.com/ruby/ruby/blob/trunk/ext/strscan/strscan.c#L564
         return Qnil;
     }
 
-    p->regex = regex;
-    re = rb_reg_prepare_re(regex, p->str);
-    tmpreg = re != RREGEXP_PTR(regex);
-    if (!tmpreg) RREGEXP(regex)->usecnt++;
+    if (RB_TYPE_P(pattern, T_REGEXP)) {
+        regex_t *rb_reg_prepare_re(VALUE re, VALUE str);
+        regex_t *re;
+        long ret;
+        int tmpreg;
+
+        p->regex = pattern;
+        re = rb_reg_prepare_re(pattern, p->str);
+        tmpreg = re != RREGEXP_PTR(pattern);
+        if (!tmpreg) RREGEXP(pattern)->usecnt++;
+
+        if (headonly) {
+            ret = onig_match(re,
+                             match_target(p),
+                             (UChar* )(CURPTR(p) + S_RESTLEN(p)),
+                             (UChar* )CURPTR(p),
+                             &(p->regs),
+                             ONIG_OPTION_NONE);
+        }
+        else {
+            ret = onig_search(re,
+                              match_target(p),
+                              (UChar* )(CURPTR(p) + S_RESTLEN(p)),
+                              (UChar* )CURPTR(p),
+                              (UChar* )(CURPTR(p) + S_RESTLEN(p)),
+                              &(p->regs),
+                              ONIG_OPTION_NONE);
+        }
+        if (!tmpreg) RREGEXP(pattern)->usecnt--;
+        if (tmpreg) {
+            if (RREGEXP(pattern)->usecnt) {
+                onig_free(re);
+            }
+            else {
+                onig_free(RREGEXP_PTR(pattern));
+                RREGEXP_PTR(pattern) = re;
+            }
+        }
 
-    if (headonly) {
-        ret = onig_match(re, (UChar* )CURPTR(p),
-                         (UChar* )(CURPTR(p) + S_RESTLEN(p)),
-                         (UChar* )CURPTR(p), &(p->regs), ONIG_OPTION_NONE);
+        if (ret == -2) rb_raise(ScanError, "regexp buffer overflow");
+        if (ret < 0) {
+            /* not matched */
+            return Qnil;
+        }
     }
     else {
-        ret = onig_search(re,
-                          (UChar* )CURPTR(p), (UChar* )(CURPTR(p) + S_RESTLEN(p)),
-                          (UChar* )CURPTR(p), (UChar* )(CURPTR(p) + S_RESTLEN(p)),
-                          &(p->regs), ONIG_OPTION_NONE);
-    }
-    if (!tmpreg) RREGEXP(regex)->usecnt--;
-    if (tmpreg) {
-        if (RREGEXP(regex)->usecnt) {
-            onig_free(re);
+        rb_enc_check(p->str, pattern);
+        if (S_RESTLEN(p) < RSTRING_LEN(pattern)) {
+            return Qnil;
         }
-        else {
-            onig_free(RREGEXP_PTR(regex));
-            RREGEXP_PTR(regex) = re;
+        if (memcmp(CURPTR(p), RSTRING_PTR(pattern), RSTRING_LEN(pattern)) != 0) {
+            return Qnil;
         }
-    }
-
-    if (ret == -2) rb_raise(ScanError, "regexp buffer overflow");
-    if (ret < 0) {
-        /* not matched */
-        return Qnil;
+        set_registers(p, RSTRING_LEN(pattern));
     }
 
     MATCHED(p);
     p->prev = p->curr;
+
     if (succptr) {
-        p->curr += p->regs.end[0];
-    }
-    if (getstr) {
-        return extract_beg_len(p, p->prev, p->regs.end[0]);
+        succ(p);
     }
-    else {
-        return INT2FIX(p->regs.end[0]);
+    {
+        const long length = last_match_length(p);
+        if (getstr) {
+            return extract_beg_len(p, p->prev, length);
+        }
+        else {
+            return INT2FIX(length);
+        }
     }
 }
  (... truncated)

--
ML: ruby-changes@q...
Info: http://www.atdot.net/~ko1/quickml/

[前][次][番号順一覧][スレッド一覧]