ruby-changes:58235
From: Sutou <ko1@a...>
Date: Mon, 14 Oct 2019 12:41:09 +0900 (JST)
Subject: [ruby-changes:58235] 95c420c4a6 (master): Import StringScanner 1.0.3 (#2553)
https://git.ruby-lang.org/ruby.git/commit/?id=95c420c4a6 From 95c420c4a65ca2e7f3edf27134ad33691959296c Mon Sep 17 00:00:00 2001 From: Sutou Kouhei <kou@c...> Date: Mon, 14 Oct 2019 12:40:50 +0900 Subject: Import StringScanner 1.0.3 (#2553) diff --git a/NEWS b/NEWS index d265233..a3283a6 100644 --- a/NEWS +++ b/NEWS @@ -416,6 +416,11 @@ RubyGems:: https://github.com/ruby/ruby/blob/trunk/NEWS#L416 * Upgrade to RubyGems 3.1.0.pre1 Bundled from https://github.com/rubygems/rubygems/commit/97b264f0fa248c864b6ee9a23d3ff1cdd217dddb +StringScanner:: + + * Upgrade to 1.0.3. + See https://github.com/ruby/strscan/blob/master/NEWS.md. + === Compatibility issues (excluding feature bug fixes) * Removed unmaintained libraries. diff --git a/ext/strscan/extconf.rb b/ext/strscan/extconf.rb index 714fa99..f0ecbf8 100644 --- a/ext/strscan/extconf.rb +++ b/ext/strscan/extconf.rb @@ -1,4 +1,5 @@ https://github.com/ruby/ruby/blob/trunk/ext/strscan/extconf.rb#L1 # frozen_string_literal: true require 'mkmf' -$INCFLAGS << " -I$(top_srcdir)" +$INCFLAGS << " -I$(top_srcdir)" if $extmk +have_func("onig_region_memsize", "ruby.h") create_makefile 'strscan' diff --git a/ext/strscan/strscan.c b/ext/strscan/strscan.c index 77a36fe..99d6992 100644 --- a/ext/strscan/strscan.c +++ b/ext/strscan/strscan.c @@ -11,9 +11,18 @@ https://github.com/ruby/ruby/blob/trunk/ext/strscan/strscan.c#L11 #include "ruby/ruby.h" #include "ruby/re.h" #include "ruby/encoding.h" -#include "regint.h" -#define STRSCAN_VERSION "0.7.0" +#ifdef RUBY_EXTCONF_H +# include RUBY_EXTCONF_H +#endif + +#ifdef HAVE_ONIG_REGION_MEMSIZE +extern size_t onig_region_memsize(const struct re_registers *regs); +#endif + +#include <stdbool.h> + +#define STRSCAN_VERSION "1.0.3" /* ======================================================================= Data Type Definitions @@ -41,6 +50,9 @@ struct strscanner https://github.com/ruby/ruby/blob/trunk/ext/strscan/strscan.c#L50 /* regexp used for last scan */ VALUE regex; + + /* anchor mode */ + bool fixed_anchor_p; }; #define MATCHED_P(s) ((s)->flags & FLAG_MATCHED) @@ -186,7 +198,11 @@ static size_t https://github.com/ruby/ruby/blob/trunk/ext/strscan/strscan.c#L198 strscan_memsize(const void *ptr) { const struct strscanner *p = ptr; - return sizeof(*p) - sizeof(p->regs) + onig_region_memsize(&p->regs); + size_t size = sizeof(*p) - sizeof(p->regs); +#ifdef HAVE_ONIG_REGION_MEMSIZE + size += onig_region_memsize(&p->regs); +#endif + return size; } static const rb_data_type_t strscanner_type = { @@ -208,19 +224,41 @@ strscan_s_allocate(VALUE klass) https://github.com/ruby/ruby/blob/trunk/ext/strscan/strscan.c#L224 } /* - * call-seq: StringScanner.new(string, dup = false) + * call-seq: + * StringScanner.new(string, fixed_anchor: false) + * StringScanner.new(string, dup = false) * * Creates a new StringScanner object to scan over the given +string+. + * + * If +fixed_anchor+ is +true+, +\A+ always matches the beginning of + * the string. Otherwise, +\A+ always matches the current position. + * * +dup+ argument is obsolete and not used now. */ static VALUE strscan_initialize(int argc, VALUE *argv, VALUE self) { struct strscanner *p; - VALUE str, need_dup; + VALUE str, options; p = check_strscan(self); - rb_scan_args(argc, argv, "11", &str, &need_dup); + rb_scan_args(argc, argv, "11", &str, &options); + options = rb_check_hash_type(options); + if (!NIL_P(options)) { + VALUE fixed_anchor; + ID keyword_ids[1]; + keyword_ids[0] = rb_intern("fixed_anchor"); + rb_get_kwargs(options, keyword_ids, 0, 1, &fixed_anchor); + if (fixed_anchor == Qundef) { + p->fixed_anchor_p = false; + } + else { + p->fixed_anchor_p = RTEST(fixed_anchor); + } + } + else { + p->fixed_anchor_p = false; + } StringValue(str); p->str = str; @@ -294,7 +332,7 @@ strscan_reset(VALUE self) https://github.com/ruby/ruby/blob/trunk/ext/strscan/strscan.c#L332 * terminate * clear * - * Set the scan pointer to the end of the string and clear matching data. + * Sets the scan pointer to the end of the string and clear matching data. */ static VALUE strscan_terminate(VALUE self) @@ -425,7 +463,7 @@ strscan_get_charpos(VALUE self) https://github.com/ruby/ruby/blob/trunk/ext/strscan/strscan.c#L463 /* * call-seq: pos=(n) * - * Set the byte position of the scan pointer. + * Sets the byte position of the scan pointer. * * s = StringScanner.new('test string') * s.pos = 7 # -> 7 @@ -446,16 +484,79 @@ strscan_set_pos(VALUE self, VALUE v) https://github.com/ruby/ruby/blob/trunk/ext/strscan/strscan.c#L484 return INT2NUM(i); } +static inline UChar * +match_target(struct strscanner *p) +{ + if (p->fixed_anchor_p) { + return (UChar *)S_PBEG(p); + } + else + { + return (UChar *)CURPTR(p); + } +} + +static inline void +set_registers(struct strscanner *p, size_t length) +{ + onig_region_clear(&(p->regs)); + if (p->fixed_anchor_p) { + onig_region_set(&(p->regs), 0, p->curr, p->curr + length); + } + else + { + onig_region_set(&(p->regs), 0, 0, length); + } +} + +static inline void +succ(struct strscanner *p) +{ + if (p->fixed_anchor_p) { + p->curr = p->regs.end[0]; + } + else + { + p->curr += p->regs.end[0]; + } +} + +static inline long +last_match_length(struct strscanner *p) +{ + if (p->fixed_anchor_p) { + return p->regs.end[0] - p->prev; + } + else + { + return p->regs.end[0]; + } +} + +static inline long +adjust_register_position(struct strscanner *p, long position) +{ + if (p->fixed_anchor_p) { + return position; + } + else { + return p->prev + position; + } +} + static VALUE -strscan_do_scan(VALUE self, VALUE regex, int succptr, int getstr, int headonly) +strscan_do_scan(VALUE self, VALUE pattern, int succptr, int getstr, int headonly) { - regex_t *rb_reg_prepare_re(VALUE re, VALUE str); struct strscanner *p; - regex_t *re; - long ret; - int tmpreg; - Check_Type(regex, T_REGEXP); + if (headonly) { + if (!RB_TYPE_P(pattern, T_REGEXP)) { + StringValue(pattern); + } + } + else { + Check_Type(pattern, T_REGEXP); + } GET_SCANNER(self, p); CLEAR_MATCH_STATUS(p); @@ -463,49 +564,76 @@ strscan_do_scan(VALUE self, VALUE regex, int succptr, int getstr, int headonly) https://github.com/ruby/ruby/blob/trunk/ext/strscan/strscan.c#L564 return Qnil; } - p->regex = regex; - re = rb_reg_prepare_re(regex, p->str); - tmpreg = re != RREGEXP_PTR(regex); - if (!tmpreg) RREGEXP(regex)->usecnt++; + if (RB_TYPE_P(pattern, T_REGEXP)) { + regex_t *rb_reg_prepare_re(VALUE re, VALUE str); + regex_t *re; + long ret; + int tmpreg; + + p->regex = pattern; + re = rb_reg_prepare_re(pattern, p->str); + tmpreg = re != RREGEXP_PTR(pattern); + if (!tmpreg) RREGEXP(pattern)->usecnt++; + + if (headonly) { + ret = onig_match(re, + match_target(p), + (UChar* )(CURPTR(p) + S_RESTLEN(p)), + (UChar* )CURPTR(p), + &(p->regs), + ONIG_OPTION_NONE); + } + else { + ret = onig_search(re, + match_target(p), + (UChar* )(CURPTR(p) + S_RESTLEN(p)), + (UChar* )CURPTR(p), + (UChar* )(CURPTR(p) + S_RESTLEN(p)), + &(p->regs), + ONIG_OPTION_NONE); + } + if (!tmpreg) RREGEXP(pattern)->usecnt--; + if (tmpreg) { + if (RREGEXP(pattern)->usecnt) { + onig_free(re); + } + else { + onig_free(RREGEXP_PTR(pattern)); + RREGEXP_PTR(pattern) = re; + } + } - if (headonly) { - ret = onig_match(re, (UChar* )CURPTR(p), - (UChar* )(CURPTR(p) + S_RESTLEN(p)), - (UChar* )CURPTR(p), &(p->regs), ONIG_OPTION_NONE); + if (ret == -2) rb_raise(ScanError, "regexp buffer overflow"); + if (ret < 0) { + /* not matched */ + return Qnil; + } } else { - ret = onig_search(re, - (UChar* )CURPTR(p), (UChar* )(CURPTR(p) + S_RESTLEN(p)), - (UChar* )CURPTR(p), (UChar* )(CURPTR(p) + S_RESTLEN(p)), - &(p->regs), ONIG_OPTION_NONE); - } - if (!tmpreg) RREGEXP(regex)->usecnt--; - if (tmpreg) { - if (RREGEXP(regex)->usecnt) { - onig_free(re); + rb_enc_check(p->str, pattern); + if (S_RESTLEN(p) < RSTRING_LEN(pattern)) { + return Qnil; } - else { - onig_free(RREGEXP_PTR(regex)); - RREGEXP_PTR(regex) = re; + if (memcmp(CURPTR(p), RSTRING_PTR(pattern), RSTRING_LEN(pattern)) != 0) { + return Qnil; } - } - - if (ret == -2) rb_raise(ScanError, "regexp buffer overflow"); - if (ret < 0) { - /* not matched */ - return Qnil; + set_registers(p, RSTRING_LEN(pattern)); } MATCHED(p); p->prev = p->curr; + if (succptr) { - p->curr += p->regs.end[0]; - } - if (getstr) { - return extract_beg_len(p, p->prev, p->regs.end[0]); + succ(p); } - else { - return INT2FIX(p->regs.end[0]); + { + const long length = last_match_length(p); + if (getstr) { + return extract_beg_len(p, p->prev, length); + } + else { + return INT2FIX(length); + } } } (... truncated) -- ML: ruby-changes@q... Info: http://www.atdot.net/~ko1/quickml/