ruby-changes:71742
From: Burdette <ko1@a...>
Date: Sat, 16 Apr 2022 03:31:37 +0900 (JST)
Subject: [ruby-changes:71742] e021754db0 (master): [DOC] Enhanced RDoc for Regexp (#5807)
https://git.ruby-lang.org/ruby.git/commit/?id=e021754db0 From e021754db013ca9cd6dbd68b416425b32ee81490 Mon Sep 17 00:00:00 2001 From: Burdette Lamar <BurdetteLamar@Y...> Date: Fri, 15 Apr 2022 13:31:15 -0500 Subject: [DOC] Enhanced RDoc for Regexp (#5807) Treats: #source #inspect #to_s #casefold? #options #names #named_captures --- doc/regexp.rdoc | 12 ++--- re.c | 162 +++++++++++++++++++++++++++++--------------------------- 2 files changed, 90 insertions(+), 84 deletions(-) diff --git a/doc/regexp.rdoc b/doc/regexp.rdoc index 65d8cd46fa..b8efc7e3d4 100644 --- a/doc/regexp.rdoc +++ b/doc/regexp.rdoc @@ -35,7 +35,7 @@ exceeded. See "Timeout" section in detail. https://github.com/ruby/ruby/blob/trunk/doc/regexp.rdoc#L35 Pattern matching may be achieved by using <tt>=~</tt> operator or Regexp#match method. -=== <tt>=~</tt> operator +=== <tt>=~</tt> Operator <tt>=~</tt> is Ruby's basic pattern-matching operator. When one operand is a regular expression and the other is a string then the regular expression is @@ -54,7 +54,7 @@ Using <tt>=~</tt> operator with a String and Regexp the <tt>$~</tt> global https://github.com/ruby/ruby/blob/trunk/doc/regexp.rdoc#L54 variable is set after a successful match. <tt>$~</tt> holds a MatchData object. Regexp.last_match is equivalent to <tt>$~</tt>. -=== Regexp#match method +=== Regexp#match Method The #match method returns a MatchData object: @@ -193,7 +193,7 @@ At least one uppercase character ('H'), at least one lowercase character https://github.com/ruby/ruby/blob/trunk/doc/regexp.rdoc#L193 "Hello".match(/[[:upper:]]+[[:lower:]]+l{2}o/) #=> #<MatchData "Hello"> -=== Greedy match +=== Greedy Match Repetition is <i>greedy</i> by default: as many occurrences as possible are matched while still allowing the overall match to succeed. By @@ -211,7 +211,7 @@ Both patterns below match the string. The first uses a greedy quantifier so https://github.com/ruby/ruby/blob/trunk/doc/regexp.rdoc#L211 /<.+>/.match("<a><b>") #=> #<MatchData "<a><b>"> /<.+?>/.match("<a><b>") #=> #<MatchData "<a>"> -=== Possessive match +=== Possessive Match A quantifier followed by <tt>+</tt> matches <i>possessively</i>: once it has matched it does not backtrack. They behave like greedy quantifiers, @@ -256,7 +256,7 @@ this backreference when doing substitution: https://github.com/ruby/ruby/blob/trunk/doc/regexp.rdoc#L256 "The cat sat in the hat".gsub(/[csh]at/, '\0s') # => "The cats sats in the hats" -=== Named captures +=== Named Captures Capture groups can be referred to by name when defined with the <tt>(?<</tt><i>name</i><tt>>)</tt> or <tt>(?'</tt><i>name</i><tt>')</tt> @@ -672,7 +672,7 @@ regexp's encoding can be explicitly fixed by supplying https://github.com/ruby/ruby/blob/trunk/doc/regexp.rdoc#L672 # raises Encoding::CompatibilityError: incompatible encoding regexp match # (ISO-8859-1 regexp with UTF-8 string) -== Special global variables +== Special Global Variables Pattern matching sets some global variables : * <tt>$~</tt> is equivalent to Regexp.last_match; diff --git a/re.c b/re.c index e7c5487655..e8a2094beb 100644 --- a/re.c +++ b/re.c @@ -480,15 +480,15 @@ rb_reg_desc(const char *s, long len, VALUE re) https://github.com/ruby/ruby/blob/trunk/re.c#L480 /* * call-seq: - * rxp.source -> str + * source -> string * - * Returns the original string of the pattern. + * Returns the original string of +self+: * - * /ab+c/ix.source #=> "ab+c" + * /ab+c/ix.source # => "ab+c" * - * Note that escape sequences are retained as is. + * Note that escape sequences are retained as is: * - * /\x20\+/.source #=> "\\x20\\+" + * /\x20\+/.source # => "\\x20\\+" * */ @@ -503,15 +503,14 @@ rb_reg_source(VALUE re) https://github.com/ruby/ruby/blob/trunk/re.c#L503 } /* - * call-seq: - * rxp.inspect -> string + * call-seq: + * inspect -> string * - * Produce a nicely formatted string-version of _rxp_. Perhaps surprisingly, - * <code>#inspect</code> actually produces the more natural version of - * the string than <code>#to_s</code>. + * Returns a nicely-formatted string representation of +self+: * - * /ab+c/ix.inspect #=> "/ab+c/ix" + * /ab+c/ix.inspect # => "/ab+c/ix" * + * Related: Regexp#to_s. */ static VALUE @@ -527,22 +526,29 @@ static VALUE rb_reg_str_with_term(VALUE re, int term); https://github.com/ruby/ruby/blob/trunk/re.c#L526 /* * call-seq: - * rxp.to_s -> str - * - * Returns a string containing the regular expression and its options (using the - * <code>(?opts:source)</code> notation. This string can be fed back in to - * Regexp::new to a regular expression with the same semantics as the - * original. (However, <code>Regexp#==</code> may not return true - * when comparing the two, as the source of the regular expression - * itself may differ, as the example shows). Regexp#inspect produces - * a generally more readable version of <i>rxp</i>. - * - * r1 = /ab+c/ix #=> /ab+c/ix - * s1 = r1.to_s #=> "(?ix-m:ab+c)" - * r2 = Regexp.new(s1) #=> /(?ix-m:ab+c)/ - * r1 == r2 #=> false - * r1.source #=> "ab+c" - * r2.source #=> "(?ix-m:ab+c)" + * to_s -> string + * + * Returns a string showing the options and string of +self+: + * + * r0 = /ab+c/ix + * s0 = r0.to_s # => "(?ix-m:ab+c)" + * + * The returned string may be used as an argument to Regexp.new, + * or as interpolated text for a + * {Regexp literal}[rdoc-ref:regexp.rdoc@Regexp+Literal]: + * + * r1 = Regexp.new(s0) # => /(?ix-m:ab+c)/ + * r2 = /#{s0}/ # => /(?ix-m:ab+c)/ + * + * Note that +r1+ and +r2+ are not equal to +r0+ + * because their original strings are different: + * + * r0 == r1 # => false + * r0.source # => "ab+c" + * r1.source # => "(?ix-m:ab+c)" + * + * Related: Regexp#inspect. + * */ static VALUE @@ -713,13 +719,15 @@ rb_reg_raise_str(VALUE str, int options, const char *err) https://github.com/ruby/ruby/blob/trunk/re.c#L719 /* * call-seq: - * rxp.casefold? -> true or false + * casefold?-> true or false * - * Returns the value of the case-insensitive flag. + * Returns +true+ if the case-insensitivity flag in +self+ is set, + * +false+ otherwise: + * + * /a/.casefold? # => false + * /a/i.casefold? # => true + * /(?i:a)/.casefold? # => false * - * /a/.casefold? #=> false - * /a/i.casefold? #=> true - * /(?i:a)/.casefold? #=> false */ static VALUE @@ -732,25 +740,39 @@ rb_reg_casefold_p(VALUE re) https://github.com/ruby/ruby/blob/trunk/re.c#L740 /* * call-seq: - * rxp.options -> integer + * options -> integer + * + * Returns an integer whose bits show the options set in +self+. + * + * The option bits are: + * + * Regexp::IGNORECASE # => 1 + * Regexp::EXTENDED # => 2 + * Regexp::MULTILINE # => 4 + * + * Examples: + * + * /foo/.options # => 0 + * /foo/i.options # => 1 + * /foo/x.options # => 2 + * /foo/m.options # => 4 + * /foo/mix.options # => 7 + * + * Note that additional bits may be set in the returned integer; + * these are maintained internally internally in +self+, + * are ignored if passed to Regexp.new, and may be ignored by the caller: * * Returns the set of bits corresponding to the options used when - * creating this Regexp (see Regexp::new for details. Note that + * creating this regexp (see Regexp::new for details). Note that * additional bits may be set in the returned options: these are used * internally by the regular expression code. These extra bits are - * ignored if the options are passed to Regexp::new. + * ignored if the options are passed to Regexp::new: * - * Regexp::IGNORECASE #=> 1 - * Regexp::EXTENDED #=> 2 - * Regexp::MULTILINE #=> 4 + * r = /\xa1\xa2/e # => /\xa1\xa2/ + * r.source # => "\\xa1\\xa2" + * r.options # => 16 + * Regexp.new(r.source, r.options) # => /\xa1\xa2/ * - * /cat/.options #=> 0 - * /cat/ix.options #=> 3 - * Regexp.new('cat', true).options #=> 1 - * /\xa1\xa2/e.options #=> 16 - * - * r = /cat/ix - * Regexp.new(r.source, r.options) #=> /cat/ix */ static VALUE @@ -770,19 +792,16 @@ reg_names_iter(const OnigUChar *name, const OnigUChar *name_end, https://github.com/ruby/ruby/blob/trunk/re.c#L792 } /* - * call-seq: - * rxp.names -> [name1, name2, ...] - * - * Returns a list of names of captures as an array of strings. + * call-seq: + * names -> array_of_names * - * /(?<foo>.)(?<bar>.)(?<baz>.)/.names - * #=> ["foo", "bar", "baz"] + * Returns an array of names of captures + * (see {Named Captures}[rdoc-ref:Regexp@Named+Captures]): * - * /(?<foo>.)(?<foo>.)/.names - * #=> ["foo"] + * /(?<foo>.)(?<bar>.)(?<baz>.)/.names # => ["foo", "bar", "baz"] + * /(?<foo>.)(?<foo>.)/.names # => ["foo"] + * /(.)(.)/.names # => [] * - * /(.)(.)/.names - * #=> [] */ static VALUE @@ -812,25 +831,21 @@ reg_named_captures_iter(const OnigUChar *name, const OnigUChar *name_end, https://github.com/ruby/ruby/blob/trunk/re.c#L831 } /* - * call-seq: - * rxp.named_captures -> hash - * - * Returns a hash representing information about named captures of <i>rxp</i>. + * call-seq: + * named_captures -> hash * - * A key of the hash is a name of the named captures. - * A value of the hash is an array which is list of indexes of corresponding - * named captures. + * Returns a hash representing named captures of +self+ + * (see {Named Captures}[rdoc-ref:Regexp@Named+Captures]): * - * /(?<foo>.)(?<bar>.) (... truncated) -- ML: ruby-changes@q... Info: http://www.atdot.net/~ko1/quickml/