ruby-changes:68259
From: =E5=8D=9C=E9=83=A8=E6=98=8C=E5=B9=B3 <ko1@a...>
Date: Tue, 5 Oct 2021 14:18:43 +0900 (JST)
Subject: [ruby-changes:68259] 312668cf03 (master): split include/ruby/encoding.h
https://git.ruby-lang.org/ruby.git/commit/?id=312668cf03 From 312668cf031ce5e018f78d6a7cad9bcdcdac6ae6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=8D=9C=E9=83=A8=E6=98=8C=E5=B9=B3?= <shyouhei@r...> Date: Fri, 24 Sep 2021 10:34:32 +0900 Subject: split include/ruby/encoding.h 2,291 lines are too much! include/ruby/encoding.h became the biggest header file once it had doxygen comments. Let us split it into smaller parts, so that we can better organise their contents. --- include/ruby/encoding.h | 2278 +--------------------------- include/ruby/internal/encoding/coderange.h | 172 +++ include/ruby/internal/encoding/ctype.h | 182 +++ include/ruby/internal/encoding/encoding.h | 897 +++++++++++ include/ruby/internal/encoding/pathname.h | 184 +++ include/ruby/internal/encoding/re.h | 46 + include/ruby/internal/encoding/sprintf.h | 78 + include/ruby/internal/encoding/string.h | 337 ++++ include/ruby/internal/encoding/symbol.h | 100 ++ include/ruby/internal/encoding/transcode.h | 558 +++++++ 10 files changed, 2563 insertions(+), 2269 deletions(-) create mode 100644 include/ruby/internal/encoding/coderange.h create mode 100644 include/ruby/internal/encoding/ctype.h create mode 100644 include/ruby/internal/encoding/encoding.h create mode 100644 include/ruby/internal/encoding/pathname.h create mode 100644 include/ruby/internal/encoding/re.h create mode 100644 include/ruby/internal/encoding/sprintf.h create mode 100644 include/ruby/internal/encoding/string.h create mode 100644 include/ruby/internal/encoding/symbol.h create mode 100644 include/ruby/internal/encoding/transcode.h diff --git a/include/ruby/encoding.h b/include/ruby/encoding.h index 95cf74a3b8..1256393701 100644 --- a/include/ruby/encoding.h +++ b/include/ruby/encoding.h @@ -16,2276 +16,16 @@ https://github.com/ruby/ruby/blob/trunk/include/ruby/encoding.h#L16 * relatively less rooms for things in this header file be useful when writing * an extension library. */ -#include "ruby/internal/config.h" -#include <stdarg.h> #include "ruby/ruby.h" -#include "ruby/oniguruma.h" -#include "ruby/internal/attr/const.h" -#include "ruby/internal/attr/deprecated.h" -#include "ruby/internal/attr/format.h" -#include "ruby/internal/attr/noalias.h" -#include "ruby/internal/attr/nonnull.h" -#include "ruby/internal/attr/noreturn.h" -#include "ruby/internal/attr/returns_nonnull.h" -#include "ruby/internal/attr/pure.h" -#include "ruby/internal/core/rbasic.h" -#include "ruby/internal/dllexport.h" -#include "ruby/internal/fl_type.h" -RBIMPL_SYMBOL_EXPORT_BEGIN() - -/** - * @private - * - * Bit constants used when embedding encodings into ::RBasic::flags. Extension - * libraries must not bother such things. - */ -enum ruby_encoding_consts { - - /** Max possible number of embeddable encodings. */ - RUBY_ENCODING_INLINE_MAX = 127, - - /** Where inline encodings reside. */ - RUBY_ENCODING_SHIFT = (RUBY_FL_USHIFT+10), - - /** Bits we use to store inline encodings. */ - RUBY_ENCODING_MASK = (RUBY_ENCODING_INLINE_MAX<<RUBY_ENCODING_SHIFT - /* RUBY_FL_USER10..RUBY_FL_USER16 */), - - /** Max possible length of an encoding name. */ - RUBY_ENCODING_MAXNAMELEN = 42 -}; - -#define ENCODING_INLINE_MAX RUBY_ENCODING_INLINE_MAX /**< @old{RUBY_ENCODING_INLINE_MAX} */ -#define ENCODING_SHIFT RUBY_ENCODING_SHIFT /**< @old{RUBY_ENCODING_SHIFT} */ -#define ENCODING_MASK RUBY_ENCODING_MASK /**< @old{RUBY_ENCODING_SHIFT} */ - -/** - * Destructively assigns the passed encoding to the passed object. The object - * must be capable of having inline encoding. Using this macro needs deep - * understanding of bit level object binary layout. - * - * @param[out] obj Target object to modify. - * @param[in] i Encoding in encindex format. - * @post `obj`'s encoding is `i`. - */ -#define RB_ENCODING_SET_INLINED(obj,i) do {\ - RBASIC(obj)->flags &= ~RUBY_ENCODING_MASK;\ - RBASIC(obj)->flags |= (VALUE)(i) << RUBY_ENCODING_SHIFT;\ -} while (0) - -/** @alias{rb_enc_set_index} */ -#define RB_ENCODING_SET(obj,i) rb_enc_set_index((obj), (i)) - -/** - * Queries the encoding of the passed object. The encoding must be smaller - * than ::RUBY_ENCODING_INLINE_MAX, which means you have some assumption on the - * return value. This means the API is for internal use only. - * - * @param[in] obj Target object. - * @return `obj`'s encoding index. - */ -#define RB_ENCODING_GET_INLINED(obj) \ - (int)((RBASIC(obj)->flags & RUBY_ENCODING_MASK)>>RUBY_ENCODING_SHIFT) - -/** - * @alias{rb_enc_get_index} - * - * @internal - * - * Implementation wise this is not a verbatim alias of rb_enc_get_index(). But - * the API is consistent. Don't bother. - */ -#define RB_ENCODING_GET(obj) \ - (RB_ENCODING_GET_INLINED(obj) != RUBY_ENCODING_INLINE_MAX ? \ - RB_ENCODING_GET_INLINED(obj) : \ - rb_enc_get_index(obj)) - -/** - * Queries if the passed object is in ascii 8bit (== binary) encoding. The - * object must be capable of having inline encoding. Using this macro needs - * deep understanding of bit level object binary layout. - * - * @param[in] obj An object to check. - * @retval 1 It is. - * @retval 0 It isn't. - */ -#define RB_ENCODING_IS_ASCII8BIT(obj) (RB_ENCODING_GET_INLINED(obj) == 0) - -#define ENCODING_SET_INLINED(obj,i) RB_ENCODING_SET_INLINED(obj,i) /**< @old{RB_ENCODING_SET_INLINED} */ -#define ENCODING_SET(obj,i) RB_ENCODING_SET(obj,i) /**< @old{RB_ENCODING_SET} */ -#define ENCODING_GET_INLINED(obj) RB_ENCODING_GET_INLINED(obj) /**< @old{RB_ENCODING_GET_INLINED} */ -#define ENCODING_GET(obj) RB_ENCODING_GET(obj) /**< @old{RB_ENCODING_GET} */ -#define ENCODING_IS_ASCII8BIT(obj) RB_ENCODING_IS_ASCII8BIT(obj) /**< @old{RB_ENCODING_IS_ASCII8BIT} */ -#define ENCODING_MAXNAMELEN RUBY_ENCODING_MAXNAMELEN /**< @old{RUBY_ENCODING_MAXNAMELEN} */ - -/** What rb_enc_str_coderange() returns. */ -enum ruby_coderange_type { - - /** The object's coderange is unclear yet. */ - RUBY_ENC_CODERANGE_UNKNOWN = 0, - - /** The object holds 0 to 127 inclusive and nothing else. */ - RUBY_ENC_CODERANGE_7BIT = ((int)RUBY_FL_USER8), - - /** The object's encoding and contents are consistent each other */ - RUBY_ENC_CODERANGE_VALID = ((int)RUBY_FL_USER9), - - /** The object holds invalid/malformed/broken character(s). */ - RUBY_ENC_CODERANGE_BROKEN = ((int)(RUBY_FL_USER8|RUBY_FL_USER9)), - - /** Where the coderange resides. */ - RUBY_ENC_CODERANGE_MASK = (RUBY_ENC_CODERANGE_7BIT| - RUBY_ENC_CODERANGE_VALID| - RUBY_ENC_CODERANGE_BROKEN) -}; - -RBIMPL_ATTR_CONST() -/** - * @private - * - * This is an implementation detail of #RB_ENC_CODERANGE_CLEAN_P. People don't - * use it directly. - * - * @param[in] cr An enum ::ruby_coderange_type. - * @retval 1 It is. - * @retval 0 It isn't. - */ -static inline int -rb_enc_coderange_clean_p(int cr) -{ - return (cr ^ (cr >> 1)) & RUBY_ENC_CODERANGE_7BIT; -} - -/** - * Queries if a code range is "clean". "Clean" in this context means it is - * known and valid. - * - * @param[in] cr An enum ::ruby_coderange_type. - * @retval 1 It is. - * @retval 0 It isn't. - */ -#define RB_ENC_CODERANGE_CLEAN_P(cr) rb_enc_coderange_clean_p(cr) - -/** - * Queries the (inline) code range of the passed object. The object must be - * capable of having inline encoding. Using this macro needs deep - * understanding of bit level object binary layout. - * - * @param[in] obj Target object. - * @return An enum ::ruby_coderange_type. - */ -#define RB_ENC_CODERANGE(obj) ((int)RBASIC(obj)->flags & RUBY_ENC_CODERANGE_MASK) - -/** - * Queries the (inline) code range of the passed object is - * ::RUBY_ENC_CODERANGE_7BIT. The object must be capable of having inline - * encoding. Using this macro needs deep understanding of bit level object - * binary layout. - * - * @param[in] obj Target object. - * @retval 1 It is ascii only. - * @retval 0 Otherwise (including cases when the range is not known). - */ -#define RB_ENC_CODERANGE_ASCIIONLY(obj) (RB_ENC_CODERANGE(obj) == RUBY_ENC_CODERANGE_7BIT) - -/** - * Destructively modifies the passed object so that its (inline) code range is - * the passed one. The object must be capable of having inline encoding. - * Using this macro needs deep understanding of bit level object binary layout. - * - * @param[out] obj Target object. - * @param[out] cr An enum ::ruby_coderange_type. - * @post `obj`'s code range is `cr`. - */ -#define RB_ENC_CODERANGE_SET(obj,cr) (\ - RBASIC(obj)->flags = \ - (RBASIC(obj)->flags & ~RUBY_ENC_CODERANGE_MASK) | (cr)) - -/** - * Destructively clears the passed object's (inline) code range. The object - * must be capable of having inline encoding. Using this macro needs deep - * understanding of bit level object binary layout. - * - * @param[out] obj Target object. - * @post `obj`'s code range is ::RUBY_ENC_CODERANGE_UNKNOWN. - */ -#define RB_ENC_CODERANGE_CLEAR(obj) RB_ENC_CODERANGE_SET((obj),0) - -/* assumed ASCII compatibility */ -/** - * "Mix" two code ranges into one. This is handy for instance when you - * concatenate two strings into one. Consider one of then is valid but the - * other isn't. The result must be invalid. This macro computes that kind of - * mixture. - * - * @param[in] a An enum ::ruby_coderange_type. - * @param[in] b Another enum ::ruby_coderange_type. - * @return The `a` "and" `b`. - */ -#define RB_ENC_CODERANGE_AND(a, b) \ - ((a) == RUBY_ENC_CODERANGE_7BIT ? (b) : \ - (a) != RUBY_ENC_CODERANGE_VALID ? RUBY_ENC_CODERANGE_UNKNOWN : \ - (b) == RUBY_ENC_CODERANGE_7BIT ? RUBY_ENC_CODERANGE_VALID : (b)) - -/** - * This is #RB_ENCODING_SET + RB_ENC_CODERANGE_SET combo. The object must be - * capable of having inline encoding. Using this m (... truncated) -- ML: ruby-changes@q... Info: http://www.atdot.net/~ko1/quickml/