ruby-changes:67830
From: =E5=8D=9C=E9=83=A8=E6=98=8C=E5=B9=B3 <ko1@a...>
Date: Fri, 10 Sep 2021 20:02:30 +0900 (JST)
Subject: [ruby-changes:67830] 3ca688aeb0 (master): include/ruby/encoding.h: add doxygen
https://git.ruby-lang.org/ruby.git/commit/?id=3ca688aeb0 From 3ca688aeb0506ce2cdf1c7b6f7058e0c42581b01 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=8D=9C=E9=83=A8=E6=98=8C=E5=B9=B3?= <shyouhei@r...> Date: Fri, 6 Aug 2021 23:32:41 +0900 Subject: include/ruby/encoding.h: add doxygen Must not be a bad idea to improve documents. [ci skip] --- include/ruby/encoding.h | 2121 ++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 2001 insertions(+), 120 deletions(-) diff --git a/include/ruby/encoding.h b/include/ruby/encoding.h index 4e46d0d..414c37d 100644 --- a/include/ruby/encoding.h +++ b/include/ruby/encoding.h @@ -9,78 +9,236 @@ https://github.com/ruby/ruby/blob/trunk/include/ruby/encoding.h#L9 * Permission is hereby granted, to either redistribute and/or * modify this file, provided that the conditions mentioned in the * file COPYING are met. Consult the file for details. + * @brief Encoding relates APIs. + * + * These APIs are mainly for implementing encodings themselves. Encodings are + * built on top of Ruby's core CAPIs. Though not prohibited, there can be + * relatively less rooms for things in this header file be useful when writing + * an extension library. */ #include "ruby/internal/config.h" #include <stdarg.h> #include "ruby/ruby.h" #include "ruby/oniguruma.h" +#include "ruby/internal/attr/const.h" +#include "ruby/internal/attr/deprecated.h" +#include "ruby/internal/attr/format.h" +#include "ruby/internal/attr/noalias.h" +#include "ruby/internal/attr/nonnull.h" +#include "ruby/internal/attr/noreturn.h" +#include "ruby/internal/attr/returns_nonnull.h" +#include "ruby/internal/attr/pure.h" +#include "ruby/internal/core/rbasic.h" #include "ruby/internal/dllexport.h" +#include "ruby/internal/fl_type.h" RBIMPL_SYMBOL_EXPORT_BEGIN() +/** + * @private + * + * Bit constants used when embedding encodings into ::RBasic::flags. Extension + * libraries must not bother such things. + */ enum ruby_encoding_consts { + + /** Max possible number of embeddable encodings. */ RUBY_ENCODING_INLINE_MAX = 127, + + /** Where inline encodings reside. */ RUBY_ENCODING_SHIFT = (RUBY_FL_USHIFT+10), + + /** Bits we use to store inline encodings. */ RUBY_ENCODING_MASK = (RUBY_ENCODING_INLINE_MAX<<RUBY_ENCODING_SHIFT /* RUBY_FL_USER10..RUBY_FL_USER16 */), + + /** Max possible length of an encoding name. */ RUBY_ENCODING_MAXNAMELEN = 42 }; -#define ENCODING_INLINE_MAX RUBY_ENCODING_INLINE_MAX -#define ENCODING_SHIFT RUBY_ENCODING_SHIFT -#define ENCODING_MASK RUBY_ENCODING_MASK +#define ENCODING_INLINE_MAX RUBY_ENCODING_INLINE_MAX /**< @old{RUBY_ENCODING_INLINE_MAX} */ +#define ENCODING_SHIFT RUBY_ENCODING_SHIFT /**< @old{RUBY_ENCODING_SHIFT} */ +#define ENCODING_MASK RUBY_ENCODING_MASK /**< @old{RUBY_ENCODING_SHIFT} */ +/** + * Destructively assigns the passed encoding to the passed object. The object + * must be capable of having inline encoding. Using this macro needs deep + * understanding of bit level object binary layout. + * + * @param[out] obj Target object to modify. + * @param[in] i Encoding in encindex format. + * @post `obj`'s encoding is `i`. + */ #define RB_ENCODING_SET_INLINED(obj,i) do {\ RBASIC(obj)->flags &= ~RUBY_ENCODING_MASK;\ RBASIC(obj)->flags |= (VALUE)(i) << RUBY_ENCODING_SHIFT;\ } while (0) + +/** @alias{rb_enc_set_index} */ #define RB_ENCODING_SET(obj,i) rb_enc_set_index((obj), (i)) +/** + * Queries the encoding of the passed object. The encoding must be smaller + * than ::RUBY_ENCODING_INLINE_MAX, which means you have some assumption on the + * return value. This means the API is for internal use only. + * + * @param[in] obj Target object. + * @return `obj`'s encoding index. + */ #define RB_ENCODING_GET_INLINED(obj) \ (int)((RBASIC(obj)->flags & RUBY_ENCODING_MASK)>>RUBY_ENCODING_SHIFT) + +/** + * @alias{rb_enc_get_index} + * + * @internal + * + * Implementation wise this is not a verbatim alias of rb_enc_get_index(). But + * the API is consistent. Don't bother. + */ #define RB_ENCODING_GET(obj) \ (RB_ENCODING_GET_INLINED(obj) != RUBY_ENCODING_INLINE_MAX ? \ RB_ENCODING_GET_INLINED(obj) : \ rb_enc_get_index(obj)) +/** + * Queries if the passed object is in ascii 8bit (== binary) encoding. The + * object must be capable of having inline encoding. Using this macro needs + * deep understanding of bit level object binary layout. + * + * @param[in] obj An object to check. + * @retval 1 It is. + * @retval 0 It isn't. + */ #define RB_ENCODING_IS_ASCII8BIT(obj) (RB_ENCODING_GET_INLINED(obj) == 0) -#define ENCODING_SET_INLINED(obj,i) RB_ENCODING_SET_INLINED(obj,i) -#define ENCODING_SET(obj,i) RB_ENCODING_SET(obj,i) -#define ENCODING_GET_INLINED(obj) RB_ENCODING_GET_INLINED(obj) -#define ENCODING_GET(obj) RB_ENCODING_GET(obj) -#define ENCODING_IS_ASCII8BIT(obj) RB_ENCODING_IS_ASCII8BIT(obj) -#define ENCODING_MAXNAMELEN RUBY_ENCODING_MAXNAMELEN +#define ENCODING_SET_INLINED(obj,i) RB_ENCODING_SET_INLINED(obj,i) /**< @old{RB_ENCODING_SET_INLINED} */ +#define ENCODING_SET(obj,i) RB_ENCODING_SET(obj,i) /**< @old{RB_ENCODING_SET} */ +#define ENCODING_GET_INLINED(obj) RB_ENCODING_GET_INLINED(obj) /**< @old{RB_ENCODING_GET_INLINED} */ +#define ENCODING_GET(obj) RB_ENCODING_GET(obj) /**< @old{RB_ENCODING_GET} */ +#define ENCODING_IS_ASCII8BIT(obj) RB_ENCODING_IS_ASCII8BIT(obj) /**< @old{RB_ENCODING_IS_ASCII8BIT} */ +#define ENCODING_MAXNAMELEN RUBY_ENCODING_MAXNAMELEN /**< @old{RUBY_ENCODING_MAXNAMELEN} */ +/** What rb_enc_str_coderange() returns. */ enum ruby_coderange_type { + + /** The object's coderange is unclear yet. */ RUBY_ENC_CODERANGE_UNKNOWN = 0, + + /** The object holds 0 to 127 inclusive and nothing else. */ RUBY_ENC_CODERANGE_7BIT = ((int)RUBY_FL_USER8), + + /** The object's encoding and contents are consistent each other */ RUBY_ENC_CODERANGE_VALID = ((int)RUBY_FL_USER9), + + /** The object holds invalid/malformed/broken character(s). */ RUBY_ENC_CODERANGE_BROKEN = ((int)(RUBY_FL_USER8|RUBY_FL_USER9)), + + /** Where the coderange resides. */ RUBY_ENC_CODERANGE_MASK = (RUBY_ENC_CODERANGE_7BIT| RUBY_ENC_CODERANGE_VALID| RUBY_ENC_CODERANGE_BROKEN) }; +RBIMPL_ATTR_CONST() +/** + * @private + * + * This is an implementation detail of #RB_ENC_CODERANGE_CLEAN_P. People don't + * use it directly. + * + * @param[in] cr An enum ::ruby_coderange_type. + * @retval 1 It is. + * @retval 0 It isn't. + */ static inline int rb_enc_coderange_clean_p(int cr) { return (cr ^ (cr >> 1)) & RUBY_ENC_CODERANGE_7BIT; } + +/** + * Queries if a code range is "clean". "Clean" in this context means it is + * known and valid. + * + * @param[in] cr An enum ::ruby_coderange_type. + * @retval 1 It is. + * @retval 0 It isn't. + */ #define RB_ENC_CODERANGE_CLEAN_P(cr) rb_enc_coderange_clean_p(cr) + +/** + * Queries the (inline) code range of the passed object. The object must be + * capable of having inline encoding. Using this macro needs deep + * understanding of bit level object binary layout. + * + * @param[in] obj Target object. + * @return An enum ::ruby_coderange_type. + */ #define RB_ENC_CODERANGE(obj) ((int)RBASIC(obj)->flags & RUBY_ENC_CODERANGE_MASK) + +/** + * Queries the (inline) code range of the passed object is + * ::RUBY_ENC_CODERANGE_7BIT. The object must be capable of having inline + * encoding. Using this macro needs deep understanding of bit level object + * binary layout. + * + * @param[in] obj Target object. + * @retval 1 It is ascii only. + * @retval 0 Otherwise (including cases when the range is not known). + */ #define RB_ENC_CODERANGE_ASCIIONLY(obj) (RB_ENC_CODERANGE(obj) == RUBY_ENC_CODERANGE_7BIT) + +/** + * Destructively modifies the passed object so that its (inline) code range is + * the passed one. The object must be capable of having inline encoding. + * Using this macro needs deep understanding of bit level object binary layout. + * + * @param[out] obj Target object. + * @param[out] cr An enum ::ruby_coderange_type. + * @post `obj`'s code range is `cr`. + */ #define RB_ENC_CODERANGE_SET(obj,cr) (\ RBASIC(obj)->flags = \ (RBASIC(obj)->flags & ~RUBY_ENC_CODERANGE_MASK) | (cr)) + +/** + * Destructively clears the passed object's (inline) code range. The object + * must be capable of having inline encoding. Using this macro needs deep + * understanding of bit level object binary layout. + * + * @param[out] obj Target object. + * @post `obj`'s code range is ::RUBY_ENC_CODERANGE_UNKNOWN. + */ #define RB_ENC_CODERANGE_CLEAR(obj) RB_ENC_CODERANGE_SET((obj),0) /* assumed ASCII compatibility */ +/** + * "Mix" two code ranges into one. This is handy for instance when you + * concatenate two strings into one. Consider one of then is valid but the + * other isn't. The result must be invalid. This macro computes that kind of + * mixture. + * + * @param[in] a An enum ::ruby_coderange_type. + * @param[in] b Another enum ::ruby_coderange_type. + * @return The `a` "and" `b`. + */ #define RB_ENC_CODERANGE_AND(a, b) \ ((a) == RUBY_ENC_CODERANGE_7BIT ? (b) : \ (a) != RUBY_ENC_CODERANGE_VALID ? RUBY_ENC_CODERANGE_UNKNOWN : \ (b) == RUBY_ENC_CODERANGE_7BIT ? RUBY_ENC_CODERANGE_VALID : (b)) +/** + * This is #RB_ENCODING_SET + RB_ENC_CODERANGE_SET combo. The object must be + * capable of having inline encoding. Using this macro needs deep + * understanding of bit level object binary layout. + * + * @param[out] obj Target object. + * @param[in] encindex Encoding in encindex format. + * @param[in] cr An enum ::ruby_coderange_type. + * @post `obj`'s encoding is (... truncated) -- ML: ruby-changes@q... Info: http://www.atdot.net/~ko1/quickml/