ruby-changes:67742
From: =E5=8D=9C=E9=83=A8=E6=98=8C=E5=B9=B3 <ko1@a...>
Date: Fri, 10 Sep 2021 20:01:20 +0900 (JST)
Subject: [ruby-changes:67742] 1bd1339492 (master): include/ruby/internal/core/rstring.h: add doxygen
https://git.ruby-lang.org/ruby.git/commit/?id=1bd1339492 From 1bd133949295be3b50439c956f951f7b1bfe7d6e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=8D=9C=E9=83=A8=E6=98=8C=E5=B9=B3?= <shyouhei@r...> Date: Mon, 1 Feb 2021 12:10:21 +0900 Subject: include/ruby/internal/core/rstring.h: add doxygen Must not be a bad idea to improve documents. [ci skip] --- include/ruby/internal/core/rstring.h | 355 ++++++++++++++++++++++++++++++++++- 1 file changed, 347 insertions(+), 8 deletions(-) diff --git a/include/ruby/internal/core/rstring.h b/include/ruby/internal/core/rstring.h index a616eba..e14753a 100644 --- a/include/ruby/internal/core/rstring.h +++ b/include/ruby/internal/core/rstring.h @@ -32,14 +32,20 @@ https://github.com/ruby/ruby/blob/trunk/include/ruby/internal/core/rstring.h#L32 #include "ruby/internal/warning_push.h" #include "ruby/assert.h" +/** + * Convenient casting macro. + * + * @param obj An object, which is in fact an ::RString. + * @return The passed object casted to ::RString. + */ #define RSTRING(obj) RBIMPL_CAST((struct RString *)(obj)) + +/** @cond INTERNAL_MACRO */ #define RSTRING_NOEMBED RSTRING_NOEMBED #define RSTRING_EMBED_LEN_MASK RSTRING_EMBED_LEN_MASK #define RSTRING_EMBED_LEN_SHIFT RSTRING_EMBED_LEN_SHIFT #define RSTRING_EMBED_LEN_MAX RSTRING_EMBED_LEN_MAX #define RSTRING_FSTR RSTRING_FSTR - -/** @cond INTERNAL_MACRO */ #define RSTRING_EMBED_LEN RSTRING_EMBED_LEN #define RSTRING_LEN RSTRING_LEN #define RSTRING_LENINT RSTRING_LENINT @@ -47,59 +53,343 @@ https://github.com/ruby/ruby/blob/trunk/include/ruby/internal/core/rstring.h#L53 #define RSTRING_END RSTRING_END /** @endcond */ +/** + * @name Conversion of Ruby strings into C's + * + * @{ + */ + +/** + * Ensures that the parameter object is a String. This is done by calling its + * `to_str` method. + * + * @param[in,out] v Arbitrary Ruby object. + * @exception rb_eTypeError No implicit conversion defined. + * @post `v` is a String. + */ #define StringValue(v) rb_string_value(&(v)) + +/** + * Identical to #StringValue, except it returns a `char*`. + * + * @param[in,out] v Arbitrary Ruby object. + * @exception rb_eTypeError No implicit conversion defined. + * @return Converted Ruby string's backend C string. + * @post `v` is a String. + */ #define StringValuePtr(v) rb_string_value_ptr(&(v)) + +/** + * Identical to #StringValuePtr, except it additionally checks for the contents + * for viability as a C string. Ruby can accept wider range of contents as + * strings, compared to C. This function is to check that. + * + * @param[in,out] v Arbitrary Ruby object. + * @exception rb_eTypeError No implicit conversion defined. + * @exception rb_eArgError String is not C-compatible. + * @return Converted Ruby string's backend C string. + * @post `v` is a String. + */ #define StringValueCStr(v) rb_string_value_cstr(&(v)) + +/** + * @private + * + * @deprecated This macro once was a thing in the old days, but makes no sense + * any longer today. Exists here for backwards compatibility + * only. You can safely forget about it. + */ #define SafeStringValue(v) StringValue(v) + +/** + * Identical to #StringValue, except it additionally converts the string's + * encoding to default external encoding. Ruby has a concept called encodings. + * A string can have different encoding than the environment expects. Someone + * has to make sure its contents be converted to something suitable. This is + * that routine. Call it when necessary. + * + * @param[in,out] v Arbitrary Ruby object. + * @exception rb_eTypeError No implicit conversion defined. + * @return Converted Ruby string's backend C string. + * @post `v` is a String. + * + * @internal + * + * Not sure but it seems this macro does not raise on encoding + * incompatibilities? Doesn't sound right to @shyouhei. + */ #define ExportStringValue(v) do { \ StringValue(v); \ (v) = rb_str_export(v); \ } while (0) +/** @} */ + +/** + * @private + * + * Bits that you can set to ::RBasic::flags. + * + * @warning These enums are not the only bits we use for strings. + * + * @internal + * + * Actually all bits through FL_USER1 to FL_USER19 are used for strings. Why + * only this tiny part of them are made public here? @shyouhei can find no + * reason. + */ enum ruby_rstring_flags { + + /** + * This flag has something to do with memory footprint. If the string is + * short enough, ruby tries to be creative to abuse padding bits of struct + * ::RString for storing contents. If this flag is set that string does + * _not_ do that, to resort to good old fashioned external allocation + * strategy instead. + * + * @warning This bit has to be considered read-only. Setting/clearing + * this bit without corresponding fix up must cause immediate + * SEGV. Also, internal structures of a string change + * dynamically and transparently throughout of its lifetime. + * Don't assume it being persistent. + * + * @internal + * + * 3rd parties must not be aware that there even is more than one way to + * store a string. Might better be hidden. + */ RSTRING_NOEMBED = RUBY_FL_USER1, + + /** + * When a string employs embedded strategy (see ::RSTRING_NOEMBED), these + * bits are used to store the number of bytes actually filled into + * ::RString::ary. + * + * @internal + * + * 3rd parties must not be aware that there even is more than one way to + * store a string. Might better be hidden. + */ RSTRING_EMBED_LEN_MASK = RUBY_FL_USER2 | RUBY_FL_USER3 | RUBY_FL_USER4 | RUBY_FL_USER5 | RUBY_FL_USER6, + /* Actually, string encodings are also encoded into the flags, using * remaining bits.*/ + + /** + * This flag has something to do with infamous "f"string. What is a + * fstring? Well it is a special subkind of strings that is immutable, + * deduped globally, and managed by our GC. It is much like a Symbol (in + * fact Symbols are dynamic these days and are backended using fstrings). + * This concept has been silently introduced at some point in 2.x era. + * Since then it gained wider acceptance in the core. But extension + * libraries could not know that until very recently. Strings of this flag + * live in a special Limbo deep inside of the interpreter. Never try to + * manipulate it by hand. + * + * @internal + * + * Fstrings are not the only variant strings that we implement today. + * Other things are behind-the-scene. This is the only one that is visible + * from extension library. There is no clear reason why it has to be. + * Given there are more "polite" ways to create fstrings, it seems this bit + * need not be exposed to extension libraries. Might better be hidden. + */ RSTRING_FSTR = RUBY_FL_USER17 }; +/** + * This is an enum because GDB wants it (rather than a macro). People need not + * bother. + */ enum ruby_rstring_consts { + /** Where ::RSTRING_EMBED_LEN_MASK resides. */ RSTRING_EMBED_LEN_SHIFT = RUBY_FL_USHIFT + 2, + + /** Max possible number of characters that can be embedded. */ RSTRING_EMBED_LEN_MAX = RBIMPL_EMBED_LEN_MAX_OF(char) - 1 }; +/** + * Ruby's String. A string in ruby conceptually has these information: + * + * - Encoding of the string. + * - Length of the string. + * - Contents of the string. + * + * It is worth noting that a string is _not_ an array of characters in ruby. + * It has never been. In 1.x a string was an array of integers. Since 2.x a + * string is no longer an array of anything. A string is a string -- just like + * a Time is not an integer. + */ struct RString { + + /** Basic part, including flags and class. */ struct RBasic basic; + + /** String's specific fields. */ union { + + /** + * Strings that use separated memory region for contents use this + * pattern. + */ struct { + + /** + * Length of the string, not including terminating NUL character. + * + * @note This is in bytes. + */ long len; + + /** + * Pointer to the contents of the string. In the old days each + * string had dedicated memory regions. That is no longer true + * today, but there still are strings of such properties. This + * field could be used to point such things. + */ char *ptr; + + /** Auxiliary info. */ union { + + /** + * Capacity of `*ptr`. A continuous memory region of at least + * `capa` bytes is expected to exist at `*ptr`. This can be + * bigger than `len`. + */ long capa; + + /** + * Parent of the string. Nowadays strings can share their + * contents each other, constructing gigantic nest of objects. + * This situation is called "shared", and this is the field to + * control such properties. + */ VALUE shared; } aux; } heap; + + /** + * Embedded contents. When a string is short enough, it uses this area + * to store the contents themselves. This was impractical in the 20th + * century, but these days 64 bit machines can typically hold 48 bytes + (... truncated) -- ML: ruby-changes@q... Info: http://www.atdot.net/~ko1/quickml/