[前][次][番号順一覧][スレッド一覧]

ruby-changes:7858

From: matz <ko1@a...>
Date: Tue, 16 Sep 2008 12:14:55 +0900 (JST)
Subject: [ruby-changes:7858] Ruby:r19379 (trunk): * string.c (rb_str_each_codepoint): add new methods, #codepoints

matz	2008-09-16 12:14:41 +0900 (Tue, 16 Sep 2008)

  New Revision: 19379

  http://svn.ruby-lang.org/cgi-bin/viewvc.cgi?view=rev&revision=19379

  Log:
    * string.c (rb_str_each_codepoint): add new methods, #codepoints
      and #each_codepoint.  a patch from Michael Selig
      <michael.selig at fs.com.au> in [ruby-core:18532].

  Modified files:
    trunk/ChangeLog
    trunk/string.c

Index: ChangeLog
===================================================================
--- ChangeLog	(revision 19378)
+++ ChangeLog	(revision 19379)
@@ -1,3 +1,9 @@
+Tue Sep 16 11:55:16 2008  Yukihiro Matsumoto  <matz@r...>
+
+	* string.c (rb_str_each_codepoint): add new methods, #codepoints
+	  and #each_codepoint.  a patch from Michael Selig
+	  <michael.selig at fs.com.au> in [ruby-core:18532].
+
 Tue Sep 16 11:24:44 2008  Yukihiro Matsumoto  <matz@r...>
 
 	* ext/socket/mkconstants.rb: add new constants.  a patch from
Index: string.c
===================================================================
--- string.c	(revision 19378)
+++ string.c	(revision 19379)
@@ -5506,6 +5506,59 @@
     return str;
 }
 
+/*
+ *  Document-method: codepoints
+ *  call-seq:
+ *     str.codepoints                   => anEnumerator
+ *     str.codepoints {|fixnum| block } => str
+ *  
+ *  Returns an enumerator that gives the <code>Integer</code> ordinal
+ *  of each character in the string, also known as a <i>codepoint</i>
+ *  when applied to Unicode strings. If a block is given, it iterates
+ *  over each character in the string.
+ *     
+ *     "foo\u0635".chars.to_a   #=> [102, 111, 111, 1589]
+ */
+
+/*
+ *  Document-method: each_codepoint
+ *  call-seq:
+ *     str.each_codepoint {|fixnum| block }    => str
+ *  
+ *  Passes the <code>Integer</code> ordinal of each character in <i>str</i>,
+ *  also known as a <i>codepoint</i> when applied to Unicode strings to the
+ *  given block.
+ *     
+ *     "hello\u0639".each_codepoint {|c| print c, ' ' }
+ *     
+ *  <em>produces:</em>
+ *     
+ *     104 101 108 108 111 1593
+ */
+
+static VALUE
+rb_str_each_codepoint(VALUE str)
+{
+    int i, len, n;
+    unsigned int c;
+    const char *ptr, *end;
+    rb_encoding *enc;
+
+    if (single_byte_optimizable(str)) return rb_str_each_byte(str);
+    RETURN_ENUMERATOR(str, 0, 0);
+    ptr = RSTRING_PTR(str);
+    len = RSTRING_LEN(str);
+    end = RSTRING_END(str);
+    enc = STR_ENC_GET(str);
+    while (ptr < end) {
+	c = rb_enc_codepoint(ptr, end, enc);
+	n = rb_enc_codelen(c, enc);
+	rb_yield(INT2FIX(c));
+	ptr += n;
+    }
+    return str;
+}
+
 static long
 chopped_length(VALUE str)
 {
@@ -6883,6 +6936,7 @@
     rb_define_method(rb_cString, "lines", rb_str_each_line, -1);
     rb_define_method(rb_cString, "bytes", rb_str_each_byte, 0);
     rb_define_method(rb_cString, "chars", rb_str_each_char, 0);
+    rb_define_method(rb_cString, "codepoints", rb_str_each_codepoint, 0);
     rb_define_method(rb_cString, "reverse", rb_str_reverse, 0);
     rb_define_method(rb_cString, "reverse!", rb_str_reverse_bang, 0);
     rb_define_method(rb_cString, "concat", rb_str_concat, 1);
@@ -6932,6 +6986,7 @@
     rb_define_method(rb_cString, "each_line", rb_str_each_line, -1);
     rb_define_method(rb_cString, "each_byte", rb_str_each_byte, 0);
     rb_define_method(rb_cString, "each_char", rb_str_each_char, 0);
+    rb_define_method(rb_cString, "each_codepoint", rb_str_each_codepoint, 0);
 
     rb_define_method(rb_cString, "sum", rb_str_sum, -1);
 

--
ML: ruby-changes@q...
Info: http://www.atdot.net/~ko1/quickml/

[前][次][番号順一覧][スレッド一覧]