ruby-changes:56972
From: Yusuke <ko1@a...>
Date: Mon, 12 Aug 2019 23:28:52 +0900 (JST)
Subject: [ruby-changes:56972] Yusuke Endoh: 8d302c914c (master): string.c (rb_str_sub, _gsub): improve the rdoc
https://git.ruby-lang.org/ruby.git/commit/?id=8d302c914c From 8d302c914c15af4a29c8b8af801281fa117a7ad2 Mon Sep 17 00:00:00 2001 From: Yusuke Endoh <mame@r...> Date: Mon, 12 Aug 2019 23:12:27 +0900 Subject: string.c (rb_str_sub, _gsub): improve the rdoc This change: * Added an explanation about back references except \n and \k<n> (\` \& \' \+ \0) * Added an explanation about an escape (\\) * Added some rdoc references * Rephrased and clarified the reason why double escape is needed, added some examples, and moved the note to the last (because it is not specific to the method itself). diff --git a/string.c b/string.c index 2890f46..77d6757 100644 --- a/string.c +++ b/string.c @@ -5129,27 +5129,31 @@ rb_str_sub_bang(int argc, VALUE *argv, VALUE str) https://github.com/ruby/ruby/blob/trunk/string.c#L5129 * Returns a copy of +str+ with the _first_ occurrence of +pattern+ * replaced by the second argument. The +pattern+ is typically a Regexp; if * given as a String, any regular expression metacharacters it contains will - * be interpreted literally, e.g. <code>'\\\d'</code> will match a backslash + * be interpreted literally, e.g. <code>\d</code> will match a backslash * followed by 'd', instead of a digit. * * If +replacement+ is a String it will be substituted for the matched text. * It may contain back-references to the pattern's capture groups of the form - * <code>"\\d"</code>, where <i>d</i> is a group number, or - * <code>"\\k<n>"</code>, where <i>n</i> is a group name. If it is a - * double-quoted string, both back-references must be preceded by an - * additional backslash. However, within +replacement+ the special match - * variables, such as <code>$&</code>, will not refer to the current match. - * If +replacement+ is a String that looks like a pattern's capture group but - * is actually not a pattern capture group e.g. <code>"\\'"</code>, then it - * will have to be preceded by two backslashes like so <code>"\\\\'"</code>. + * <code>\d</code>, where <i>d</i> is a group number, or + * <code>\k<n></code>, where <i>n</i> is a group name. + * Similarly, <code>\&</code>, <code>\'</code>, <code>\`</code>, and + * <code>\+</code> are corresponded to special variables, <code>$&</code>, + * <code>$'</code>, <code>$`</code>, and <code>$+</code>, respectively. + * (See rdoc-ref:regexp.rdoc in detail.) + * <code>\0</code> is the same as <code>\&</code>. + * <code>\\\\</code> is interpreted as an escape, i.e., a single backslash. + * Note that, within +replacement+ the special match variables, such as + * <code>$&</code>, will not refer to the current match. * * If the second argument is a Hash, and the matched text is one of its keys, * the corresponding value is the replacement string. * * In the block form, the current match string is passed in as a parameter, * and variables such as <code>$1</code>, <code>$2</code>, <code>$`</code>, - * <code>$&</code>, and <code>$'</code> will be set appropriately. The value - * returned by the block will be substituted for the match on each call. + * <code>$&</code>, and <code>$'</code> will be set appropriately. + * (See rdoc-ref:regexp.rdoc in detail.) + * The value returned by the block will be substituted for the match on each + * call. * * The result inherits any tainting in the original string or any supplied * replacement string. @@ -5160,6 +5164,19 @@ rb_str_sub_bang(int argc, VALUE *argv, VALUE str) https://github.com/ruby/ruby/blob/trunk/string.c#L5164 * "hello".sub(/(?<foo>[aeiou])/, '*\k<foo>*') #=> "h*e*llo" * 'Is SHELL your preferred shell?'.sub(/[[:upper:]]{2,}/, ENV) * #=> "Is /bin/bash your preferred shell?" + * + * Note that a string literal consumes backslashes. + * (See rdoc-ref:syntax/literals.rdoc for the detail of string literals.) + * So, back-references are typically preceded by an additional backslash. + * For example, if you want to write a back-reference <code>\&</code> in + * +replacement+ with a double-quoted string literal, you need to write: + * <code>"..\\\\&.."</code>. + * If you want to write a non-back-reference string <code>\&</code> in + * +replacement+, you need first to escape the backslash to prevent + * this method from interpreting it as a back-reference, and then you + * need to escape the backslashes again to prevent a string literal from + * consuming them: <code>"..\\\\\\\\&.."</code>. + * You may want to use the block form to avoid a lot of backslashes. */ static VALUE @@ -5332,24 +5349,31 @@ rb_str_gsub_bang(int argc, VALUE *argv, VALUE str) https://github.com/ruby/ruby/blob/trunk/string.c#L5349 * <i>pattern</i> substituted for the second argument. The <i>pattern</i> is * typically a Regexp; if given as a String, any * regular expression metacharacters it contains will be interpreted - * literally, e.g. <code>'\\\d'</code> will match a backslash followed by 'd', + * literally, e.g. <code>\d</code> will match a backslash followed by 'd', * instead of a digit. * - * If <i>replacement</i> is a String it will be substituted for - * the matched text. It may contain back-references to the pattern's capture - * groups of the form <code>\\\d</code>, where <i>d</i> is a group number, or - * <code>\\\k<n></code>, where <i>n</i> is a group name. If it is a - * double-quoted string, both back-references must be preceded by an - * additional backslash. However, within <i>replacement</i> the special match - * variables, such as <code>$&</code>, will not refer to the current match. + * If +replacement+ is a String it will be substituted for the matched text. + * It may contain back-references to the pattern's capture groups of the form + * <code>\d</code>, where <i>d</i> is a group number, or + * <code>\k<n></code>, where <i>n</i> is a group name. + * Similarly, <code>\&</code>, <code>\'</code>, <code>\`</code>, and + * <code>\+</code> are corresponded to special variables, <code>$&</code>, + * <code>$'</code>, <code>$`</code>, and <code>$+</code>, respectively. + * (See rdoc-ref:regexp.rdoc in detail.) + * <code>\0</code> is the same as <code>\&</code>. + * <code>\\\\</code> is interpreted as an escape, i.e., a single backslash. + * Note that, within +replacement+ the special match variables, such as + * <code>$&</code>, will not refer to the current match. * * If the second argument is a Hash, and the matched text is one * of its keys, the corresponding value is the replacement string. * * In the block form, the current match string is passed in as a parameter, * and variables such as <code>$1</code>, <code>$2</code>, <code>$`</code>, - * <code>$&</code>, and <code>$'</code> will be set appropriately. The value - * returned by the block will be substituted for the match on each call. + * <code>$&</code>, and <code>$'</code> will be set appropriately. + * (See rdoc-ref:regexp.rdoc in detail.) + * The value returned by the block will be substituted for the match on each + * call. * * The result inherits any tainting in the original string or any supplied * replacement string. @@ -5362,6 +5386,19 @@ rb_str_gsub_bang(int argc, VALUE *argv, VALUE str) https://github.com/ruby/ruby/blob/trunk/string.c#L5386 * "hello".gsub(/./) {|s| s.ord.to_s + ' '} #=> "104 101 108 108 111 " * "hello".gsub(/(?<foo>[aeiou])/, '{\k<foo>}') #=> "h{e}ll{o}" * 'hello'.gsub(/[eo]/, 'e' => 3, 'o' => '*') #=> "h3ll*" + * + * Note that a string literal consumes backslashes. + * (See rdoc-ref:syntax/literals.rdoc for the detail of string literals.) + * So, back-references are typically preceded by an additional backslash. + * For example, if you want to write a back-reference <code>\&</code> in + * +replacement+ with a double-quoted string literal, you need to write: + * <code>"..\\\\&.."</code>. + * If you want to write a non-back-reference string <code>\&</code> in + * +replacement+, you need first to escape the backslash to prevent + * this method from interpreting it as a back-reference, and then you + * need to escape the backslashes again to prevent a string literal from + * consuming them: <code>"..\\\\\\\\&.."</code>. + * You may want to use the block form to avoid a lot of backslashes. */ static VALUE -- cgit v0.10.2 -- ML: ruby-changes@q... Info: http://www.atdot.net/~ko1/quickml/