ruby-changes:67726

https://git.ruby-lang.org/ruby.git/commit/?id=538a3919d9

From 538a3919d993a253e0aef14ca2fab8f65aa87634 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E5=8D=9C=E9=83=A8=E6=98=8C=E5=B9=B3?=
 <shyouhei@r...>
Date: Fri, 8 Jan 2021 15:55:05 +0900
Subject: include/ruby/internal/ctype.h: add doxygen

Must not be a bad idea to improve documents.
---
 include/ruby/internal/ctype.h | 380 +++++++++++++++++++++++++++++++++++++++---
 util.c                        |   7 +
 2 files changed, 368 insertions(+), 19 deletions(-)

diff --git a/include/ruby/internal/ctype.h b/include/ruby/internal/ctype.h
index ba9eb20..0f7ca6c 100644
--- a/include/ruby/internal/ctype.h
+++ b/include/ruby/internal/ctype.h
@@ -29,34 +29,161 @@ https://github.com/ruby/ruby/blob/trunk/include/ruby/internal/ctype.h#L29
 #include "ruby/internal/attr/artificial.h"
 #include "ruby/internal/attr/const.h"
 #include "ruby/internal/attr/constexpr.h"
+#include "ruby/internal/attr/nonnull.h"
 #include "ruby/internal/dllexport.h"
 
+/**
+ * @name Old character classification macros
+ *
+ * What  is this  #ISPRINT  business?   Well, according  to  our  VCS and  some
+ * internet surfing, it appears that the initial intent of these macros were to
+ * mimic codes appear  in common in several GNU projects.   As far as @shyouhei
+ * detects they  seem to originate GNU  regex (that standalone one  rather than
+ * Gnulib or Glibc), and at least date back to 1995.
+ *
+ * Let me lawfully quote from a GNU coreutils commit
+ * https://git.savannah.gnu.org/cgit/coreutils.git/commit/?id=49803907f5dbd7646184a8912c9db9b09dcd0f22
+ *
+ *   > Jim Meyering writes:
+ *   >
+ *   > "... Some ctype macros are valid only for character codes that
+ *   > isascii says are ASCII (SGI's IRIX-4.0.5 is one such system --when
+ *   > using /bin/cc or gcc but without giving an ansi option).  So, all
+ *   > ctype uses should be through macros like ISPRINT...  If
+ *   > STDC_HEADERS is defined, then autoconf has verified that the ctype
+ *   > macros don't need to be guarded with references to isascii. ...
+ *   > Defining isascii to 1 should let any compiler worth its salt
+ *   > eliminate the && through constant folding."
+ *   >
+ *   > Bruno Haible adds:
+ *   >
+ *   > "... Furthermore, isupper(c) etc. have an undefined result if c is
+ *   > outside the range -1 <= c <= 255. One is tempted to write isupper(c)
+ *   > with c being of type `char', but this is wrong if c is an 8-bit
+ *   > character >= 128 which gets sign-extended to a negative value.
+ *   > The macro ISUPPER protects against this as well."
+ *
+ * So the intent  was to reroute old problematic systems  that no longer exist.
+ * At the same time the problems described  above no longer hurt us, because we
+ * decided to completely  avoid using system-provided isupper  etc. to reinvent
+ * the wheel.  These macros are entirely legacy; please ignore them.
+ *
+ * But let me also  put stress that GNU people are wise;  they use those macros
+ * only inside of  their own implementations and never let  them be public.  On
+ * the other hand ruby has thoughtlessly publicised them to 3rd party libraries
+ * since its beginning, which is a very bad idea.  These macros are too easy to
+ * get conflicted with definitions elsewhere.
+ *
+ * New programs should stick to the `rb_` prefixed names.
+ *
+ * @note  It seems we just mimic the API.  We do not share their implementation
+ *        with GPL-ed programs.
+ *
+ * @{
+ */
 #ifndef ISPRINT
-# define ISASCII  rb_isascii
-# define ISPRINT  rb_isprint
-# define ISGRAPH  rb_isgraph
-# define ISSPACE  rb_isspace
-# define ISUPPER  rb_isupper
-# define ISLOWER  rb_islower
-# define ISALNUM  rb_isalnum
-# define ISALPHA  rb_isalpha
-# define ISDIGIT  rb_isdigit
-# define ISXDIGIT rb_isxdigit
-# define ISBLANK  rb_isblank
-# define ISCNTRL  rb_iscntrl
-# define ISPUNCT  rb_ispunct
+# define ISASCII  rb_isascii    /**< @old{rb_isascii}*/
+# define ISPRINT  rb_isprint    /**< @old{rb_isprint}*/
+# define ISGRAPH  rb_isgraph    /**< @old{rb_isgraph}*/
+# define ISSPACE  rb_isspace    /**< @old{rb_isspace}*/
+# define ISUPPER  rb_isupper    /**< @old{rb_isupper}*/
+# define ISLOWER  rb_islower    /**< @old{rb_islower}*/
+# define ISALNUM  rb_isalnum    /**< @old{rb_isalnum}*/
+# define ISALPHA  rb_isalpha    /**< @old{rb_isalpha}*/
+# define ISDIGIT  rb_isdigit    /**< @old{rb_isdigit}*/
+# define ISXDIGIT rb_isxdigit   /**< @old{rb_isxdigit}*/
+# define ISBLANK  rb_isblank    /**< @old{rb_isblank}*/
+# define ISCNTRL  rb_iscntrl    /**< @old{rb_iscntrl}*/
+# define ISPUNCT  rb_ispunct    /**< @old{rb_ispunct}*/
 #endif
 
-#define TOUPPER     rb_toupper
-#define TOLOWER     rb_tolower
-#define STRCASECMP  st_locale_insensitive_strcasecmp
-#define STRNCASECMP st_locale_insensitive_strncasecmp
-#define STRTOUL     ruby_strtoul
+#define TOUPPER     rb_toupper    /**< @old{rb_toupper}*/
+#define TOLOWER     rb_tolower    /**< @old{rb_tolower}*/
+#define STRCASECMP  st_locale_insensitive_strcasecmp  /**< @old{st_locale_insensitive_strcasecmp}*/
+#define STRNCASECMP st_locale_insensitive_strncasecmp /**< @old{st_locale_insensitive_strncasecmp}*/
+#define STRTOUL     ruby_strtoul  /**< @old{ruby_strtoul}*/
+
+/** @} */
 
 RBIMPL_SYMBOL_EXPORT_BEGIN()
-/* locale insensitive functions */
+/** @name locale insensitive functions
+ *  @{
+ */
+
+/* In descriptions below, `the POSIX Locale` and `the "C" locale` are tactfully
+ * used as to whether the described function mimics POSIX or C99. */
+
+RBIMPL_ATTR_NONNULL(())
+/**
+ * Our  own locale-insensitive  version  of `strcasecmp(3)`.   The "case"  here
+ * always means that of the POSIX  Locale.  It doesn't depend on runtime locale
+ * settings.
+ *
+ * @param[in]  s1  Comparison LHS.
+ * @param[in]  s2  Comparison RHS.
+ * @retval     -1  `s1` is "less" than `s2`.
+ * @retval      0  Both strings converted into lowercase would be identical.
+ * @retval      1  `s1` is "greater" than `s2`.
+ * @note       Not only  does this function  works under the POSIX  Locale, but
+ *             also assumes its  execution character set be what  ruby calls an
+ *             ASCII-compatible  character  set;  which does  not  include  for
+ *             instance EBCDIC or UTF-16LE.
+ */
 int st_locale_insensitive_strcasecmp(const char *s1, const char *s2);
+
+RBIMPL_ATTR_NONNULL(())
+/**
+ * Our  own locale-insensitive  version of  `strcnasecmp(3)`.  The  "case" here
+ * always means that of the POSIX  Locale.  It doesn't depend on runtime locale
+ * settings.
+ *
+ * @param[in]  s1  Comparison LHS.
+ * @param[in]  s2  Comparison RHS.
+ * @param[in]  n   Comparison shall stop after first `n` bytes are scanned.
+ * @retval     -1  `s1` is "less" than `s2`.
+ * @retval      0  Both strings converted into lowercase would be identical.
+ * @retval      1  `s1` is "greater" than `s2`.
+ * @note       Not only  does this function  works under the POSIX  Locale, but
+ *             also assumes its  execution character set be what  ruby calls an
+ *             ASCII-compatible  character  set;  which does  not  include  for
+ *             instance EBCDIC or UTF-16LE.
+ * @warning    This function is _not_ timing safe.
+ */
 int st_locale_insensitive_strncasecmp(const char *s1, const char *s2, size_t n);
+
+RBIMPL_ATTR_NONNULL((1))
+/**
+ * Our own locale-insensitive version of  `strtoul(3)`.  The conversion is done
+ * as if the current locale is set  to the "C" locale, no matter actual runtime
+ * locale settings.
+ *
+ * @note        This is needed because  `strtoul("i", 0, 36)` would return zero
+ *              if it is locale sensitive and the current locale is `tr_TR`.
+ * @param[in]   str     String of digits,  optionally preceded with whitespaces
+ *                      (ignored) and optionally `+` or `-` sign.
+ * @param[out]  endptr  NULL, or an arbitrary pointer (overwritten on return).
+ * @param[in]   base    `2` to  `36` inclusive for  each base, or  special case
+ *                      `0` to detect the base from the contents of the string.
+ * @return      Converted integer, casted to unsigned long.
+ * @post        If `endptr` is not NULL, it  is updated to point the first such
+ *              byte where conversion failed.
+ * @note        This function sets `errno` on failure.
+ *                - `EINVAL`: Passed `base` is out of range.
+ *                - `ERANGE`: Converted integer is out of range of `long`.
+ * @warning     As far as @shyouhei reads ISO/IEC 9899:2018 section 7.22.1.4, a
+ *              conforming  `strtoul`  implementation   shall  render  `ERANGE`
+ *              whenever  it  finds  the  input string  represents  a  negative
+ *              integer.  Such thing can never be representable using `unsigned
+ *              long`.   However  this  implementation  does  not  honour  that
+ *              language.   It just  casts such  negative value  to the  return
+ *              type, resulting a very big  return value.  This behaviour is at
+ *              least questionable.  But  we can no longer change  that at this
+ *              point.
+ * @note        Not only  does this  function works under  the "C"  locale, but
+ *              also assumes its execution character  set be what ruby calls an
+ *              ASCII-compatible  character set;  which  does  not include  for
+ *              instance EBCDIC or UTF-16LE.
+ */
 unsigned long ruby_strtoul(const char *str, char **endptr, int base);
 RBIMPL_SYMBOL_EXPORT_END()
 
@@ -68,6 +195,16 @@ RBIMPL_SYMBOL_EXPORT_END() https://github.com/ruby/ruby/blob/trunk/include/ruby/internal/ctype.h#L195
 RBIMPL_ATTR_CONST()
 RBIMPL_ATTR_CONSTEXPR(CXX11)
 RBIMPL_ATTR_ARTIFICIAL()
+/**
+ * Our own locale-insensitive version of `isascii(3)`.
+ *
+ * @param[in]  c      Byte in question to query.
+ * @retval     false  `c` is out of range of ASCII character set.
+ * @retval     true   Yes it is.
+ * @warning    `c` is  an int.  This  means that when  you pass a  `char` value
+ *             here, it  experiences "integer prom (... truncated)

--
ML: ruby-changes@q...
Info: http://www.atdot.net/~ko1/quickml/