ruby-changes:18751

kosaki	2011-02-04 01:54:52 +0900 (Fri, 04 Feb 2011)

  New Revision: 30778

  http://svn.ruby-lang.org/cgi-bin/viewvc.cgi?view=rev&revision=30778

  Log:
    * string.c (count_utf8_lead_bytes_with_word): wrote function
      comments.

  Modified files:
    trunk/ChangeLog
    trunk/string.c

Index: ChangeLog
===================================================================
--- ChangeLog	(revision 30777)
+++ ChangeLog	(revision 30778)
@@ -1,3 +1,8 @@
+Fri Feb  4 01:50:13 2011  KOSAKI Motohiro  <kosaki.motohiro@g...>
+
+	* string.c (count_utf8_lead_bytes_with_word): wrote function
+	  comments.
+
 Fri Feb  4 00:14:55 2011  Nobuyoshi Nakada  <nobu@r...>
 
 	* ext/zlib/zlib.c (gzfile_reader_get_unused): no need to dup
Index: string.c
===================================================================
--- string.c	(revision 30777)
+++ string.c	(revision 30778)
@@ -1038,13 +1038,30 @@
 
 #ifdef NONASCII_MASK
 #define is_utf8_lead_byte(c) (((c)&0xC0) != 0x80)
+
+/*
+ * UTF-8 leading bytes have either 0xxxxxxx or 11xxxxxx
+ * bit represention. (see http://en.wikipedia.org/wiki/UTF-8)
+ * Therefore, following pseudo code can detect UTF-8 leading byte.
+ *
+ * if (!(byte & 0x80))
+ *   byte |= 0x40;          // turn on bit6
+ * return ((byte>>6) & 1);  // bit6 represent it's leading byte or not.
+ *
+ * This function calculate every bytes in the argument word `s'
+ * using the above logic concurrently. and gather every bytes result.
+ */
 static inline VALUE
 count_utf8_lead_bytes_with_word(const VALUE *s)
 {
     VALUE d = *s;
+
+    /* Transform into bit0 represent UTF-8 leading or not. */
     d |= ~(d>>1);
     d >>= 6;
     d &= NONASCII_MASK >> 7;
+
+    /* Gather every bytes. */
     d += (d>>8);
     d += (d>>16);
 #if SIZEOF_VALUE == 8

--
ML: ruby-changes@q...
Info: http://www.atdot.net/~ko1/quickml/