[前][次][番号順一覧][スレッド一覧]

ruby-changes:13271

From: akr <ko1@a...>
Date: Tue, 22 Sep 2009 10:36:07 +0900 (JST)
Subject: [ruby-changes:13271] Ruby:r25032 (trunk): * enum.c (enum_chunk): new method Enumerable#chunk.

akr	2009-09-22 10:35:53 +0900 (Tue, 22 Sep 2009)

  New Revision: 25032

  http://svn.ruby-lang.org/cgi-bin/viewvc.cgi?view=rev&revision=25032

  Log:
    * enum.c (enum_chunk): new method Enumerable#chunk.
    * enum.c (enum_slice_before): new method Enumerable#slice_before.
      [ruby-dev:38392] [ruby-dev:39240]

  Modified files:
    trunk/ChangeLog
    trunk/enum.c
    trunk/test/ruby/test_enum.rb

Index: ChangeLog
===================================================================
--- ChangeLog	(revision 25031)
+++ ChangeLog	(revision 25032)
@@ -1,3 +1,9 @@
+Tue Sep 22 10:29:06 2009  Tanaka Akira  <akr@f...>
+
+	* enum.c (enum_chunk): new method Enumerable#chunk.
+	* enum.c (enum_slice_before): new method Enumerable#slice_before.
+	  [ruby-dev:38392] [ruby-dev:39240]
+
 Tue Sep 22 05:58:25 2009  Nobuyoshi Nakada  <nobu@r...>
 
 	* compile.c, cont.c, gc.c, insns.def, iseq.c, iseq.h, process.c,
Index: enum.c
===================================================================
--- enum.c	(revision 25031)
+++ enum.c	(revision 25032)
@@ -1802,8 +1802,331 @@
     return Qnil;		/* not reached */
 }
 
+struct chunk_arg {
+    VALUE categorize;
+    VALUE state;
+    VALUE prev_value;
+    VALUE prev_elts;
+    VALUE yielder;
+};
+
+static VALUE
+chunk_ii(VALUE i, VALUE _argp, int argc, VALUE *argv)
+{
+    struct chunk_arg *argp = (struct chunk_arg *)_argp;
+    VALUE v;
+    VALUE alone = ID2SYM(rb_intern("_alone"));
+    VALUE separator = ID2SYM(rb_intern("_separator"));
+
+    ENUM_WANT_SVALUE();
+
+    if (NIL_P(argp->state))
+        v = rb_funcall(argp->categorize, rb_intern("call"), 1, i);
+    else
+        v = rb_funcall(argp->categorize, rb_intern("call"), 2, i, argp->state);
+
+    if (v == alone) {
+        if (!NIL_P(argp->prev_value)) {
+            rb_funcall(argp->yielder, rb_intern("<<"), 1, rb_assoc_new(argp->prev_value, argp->prev_elts));
+            argp->prev_value = argp->prev_elts = Qnil;
+        }
+        rb_funcall(argp->yielder, rb_intern("<<"), 1, rb_assoc_new(v, rb_ary_new3(1, i)));
+    }
+    else if (NIL_P(v) || v == separator) {
+        if (!NIL_P(argp->prev_value)) {
+            rb_funcall(argp->yielder, rb_intern("<<"), 1, rb_assoc_new(argp->prev_value, argp->prev_elts));
+            argp->prev_value = argp->prev_elts = Qnil;
+        }
+    }
+    else if (SYMBOL_P(v) && rb_id2name(SYM2ID(v))[0] == '_') {
+	rb_raise(rb_eRuntimeError, "symbol begins with an underscore is reserved");
+    }
+    else {
+        if (NIL_P(argp->prev_value)) {
+            argp->prev_value = v;
+            argp->prev_elts = rb_ary_new3(1, i);
+        }
+        else {
+            if (rb_equal(argp->prev_value, v)) {
+                rb_ary_push(argp->prev_elts, i);
+            }
+            else {
+                rb_funcall(argp->yielder, rb_intern("<<"), 1, rb_assoc_new(argp->prev_value, argp->prev_elts));
+                argp->prev_value = v;
+                argp->prev_elts = rb_ary_new3(1, i);
+            }
+        }
+    }
+    return Qnil;
+}
+
+static VALUE
+chunk_i(VALUE yielder, VALUE enumerator, int argc, VALUE *argv)
+{
+    VALUE enumerable;
+    struct chunk_arg arg;
+
+    enumerable = rb_ivar_get(enumerator, rb_intern("chunk_enumerable"));
+    arg.categorize = rb_ivar_get(enumerator, rb_intern("chunk_categorize"));
+    arg.state = rb_ivar_get(enumerator, rb_intern("chunk_initial_state"));
+    arg.prev_value = Qnil;
+    arg.prev_elts = Qnil;
+    arg.yielder = yielder;
+
+    if (!NIL_P(arg.state))
+        arg.state = rb_obj_dup(arg.state);
+
+    rb_block_call(enumerable, id_each, 0, 0, chunk_ii, (VALUE)&arg);
+    if (!NIL_P(arg.prev_elts))
+        rb_funcall(arg.yielder, rb_intern("<<"), 1, rb_assoc_new(arg.prev_value, arg.prev_elts));
+    return Qnil;
+}
+
 /*
  *  call-seq:
+ *     enum.chunk {|elt| ... } => enumerator
+ *     enum.chunk(initial_state) {|elt, state| ... } => enumerator
+ *
+ *  Creates an enumerator for each chunked elements.
+ *  The elements which have same block value are chunked.
+ *
+ *  The result enumerator yields the block value and an array of chunked elements.
+ *  So "each" method can be called as follows.
+ *
+ *    enum.chunk {|elt| key }.each {|key, ary| ... }
+ *
+ *  For example, consecutive even numbers and odd numbers can be
+ *  splitted as follows.
+ *
+ *    [5, 3, 3, 5, 2, 8, 0, 6, 0, 3].chunk {|n|
+ *      n.even?
+ *    }.each {|even, ary|
+ *      p [even, ary]
+ *    }
+ *    #=> [false, [5, 3, 3, 5]]
+ *    #   [true, [2, 8, 0, 6, 0]]
+ *    #   [false, [3]]
+ *
+ *  This method is useful for sorted series of elements.
+ *  The following example counts words for each initial letter.
+ *
+ *    open("/usr/share/dict/words", "r:iso-8859-1") {|f|
+ *      f.chunk {|line| line.ord }.each {|ch, lines| p [ch.chr, lines.length] }
+ *    }
+ *    #=> ["\n", 1]
+ *    #   ["A", 1327]
+ *    #   ["B", 1372]
+ *    #   ["C", 1507]
+ *    #   ["D", 791]
+ *    #   ...
+ *
+ *  The following key values has special meaning:
+ *  - nil and :_separator specifies that the elements are dropped.
+ *  - :_alone specifies that the element should be chunked as a singleton.
+ *  Other symbols which begins an underscore are reserved.
+ *
+ *  nil and :_separator can be used to ignore some elements.
+ *  For example, the sequence of hyphens in svn log can be eliminated as follows.
+ *
+ *    sep = "-"*72 + "\n"
+ *    IO.popen("svn log README") {|f|                 
+ *      f.chunk {|line|
+ *        line != sep || nil
+ *      }.each {|_, lines|
+ *        pp lines
+ *      }      
+ *    }
+ *    #=> ["r20018 | knu | 2008-10-29 13:20:42 +0900 (Wed, 29 Oct 2008) | 2 lines\n",
+ *    #    "\n",
+ *    #    "* README, README.ja: Update the portability section.\n",
+ *    #    "\n"]
+ *    #   ["r16725 | knu | 2008-05-31 23:34:23 +0900 (Sat, 31 May 2008) | 2 lines\n",
+ *    #    "\n",
+ *    #    "* README, README.ja: Add a note about default C flags.\n",
+ *    #    "\n"]
+ *    #   ...
+ *
+ *  :_alone can be used to pass through bunch of elements.
+ *  For example, sort consective lines formed as Foo#bar and
+ *  pass other lines, chunk can be used as follows.
+ *
+ *    pat = /\A[A-Z][A-Za-z0-9_]+\#/
+ *    open(filename) {|f|
+ *      f.chunk {|line| pat =~ line ? $& : :_alone }.each {|key, lines|
+ *        if key != :_alone
+ *          print lines.sort.join('')
+ *        else
+ *          print lines.join('')
+ *        end
+ *      }
+ *    }
+ *
+ *  If the block needs to maintain state over multiple elements,
+ *  _initial_state_ argument can be used.
+ *  If non-nil value is given,
+ *  it is duplicated for each "each" method invocation of the enumerator.
+ *  The duplicated object is passed to 2nd argument of the block for "chunk" method..
+ *
+ */
+static VALUE
+enum_chunk(int argc, VALUE *argv, VALUE enumerable)
+{
+    VALUE initial_state;
+    VALUE enumerator;
+
+    rb_scan_args(argc, argv, "01", &initial_state);
+
+    enumerator = rb_obj_alloc(rb_cEnumerator);
+    rb_ivar_set(enumerator, rb_intern("chunk_enumerable"), enumerable);
+    rb_ivar_set(enumerator, rb_intern("chunk_categorize"), rb_block_proc());
+    rb_ivar_set(enumerator, rb_intern("chunk_initial_state"), initial_state);
+    rb_block_call(enumerator, rb_intern("initialize"), 0, 0, chunk_i, enumerator);
+    return enumerator;
+}
+
+
+struct slicebefore_arg {
+    VALUE separator_p;
+    VALUE state;
+    VALUE prev_elts;
+    VALUE yielder;
+};
+
+static VALUE
+slicebefore_ii(VALUE i, VALUE _argp, int argc, VALUE *argv)
+{
+    struct slicebefore_arg *argp = (struct slicebefore_arg *)_argp;
+    VALUE header_p;
+
+    ENUM_WANT_SVALUE();
+
+    if (NIL_P(argp->state))
+        header_p = rb_funcall(argp->separator_p, rb_intern("call"), 1, i);
+    else
+        header_p = rb_funcall(argp->separator_p, rb_intern("call"), 2, i, argp->state);
+    if (RTEST(header_p)) {
+        if (!NIL_P(argp->prev_elts))
+            rb_funcall(argp->yielder, rb_intern("<<"), 1, argp->prev_elts);
+        argp->prev_elts = rb_ary_new3(1, i);
+    }
+    else {
+        if (NIL_P(argp->prev_elts))
+            argp->prev_elts = rb_ary_new3(1, i);
+        else
+            rb_ary_push(argp->prev_elts, i);
+    }
+
+    return Qnil;
+}
+
+static VALUE
+slicebefore_i(VALUE yielder, VALUE enumerator, int argc, VALUE *argv)
+{
+    VALUE enumerable;
+    struct slicebefore_arg arg;
+
+    enumerable = rb_ivar_get(enumerator, rb_intern("slicebefore_enumerable"));
+    arg.separator_p = rb_ivar_get(enumerator, rb_intern("slicebefore_separator_p"));
+    arg.state = rb_ivar_get(enumerator, rb_intern("slicebefore_initial_state"));
+    arg.prev_elts = Qnil;
+    arg.yielder = yielder;
+
+    if (!NIL_P(arg.state))
+        arg.state = rb_obj_dup(arg.state);
+
+    rb_block_call(enumerable, id_each, 0, 0, slicebefore_ii, (VALUE)&arg);
+    if (!NIL_P(arg.prev_elts))
+        rb_funcall(arg.yielder, rb_intern("<<"), 1, arg.prev_elts);
+    return Qnil;
+}
+
+/*
+ *  call-seq:
+ *     enum.slice_before {|elt| ... } => enumerator
+ *     enum.slice_before(initial_state) {|elt, state| ... } => enumerator
+ *
+ *  Creates an enumerator for each chunked elements.
+ *  The chunked elements begins an element which the block returns true value.
+ *
+ *  The result enumerator yields the chunked elements as an array.
+ *  So "each" method can be called as follows.
+ *
+ *    enum.slice_before {|elt| bool }.each {|ary| ... }
+ *
+ *  For example, iteration over ChangeLog entries can be implemented as follows.
+ *
+ *    # iterate over ChangeLog entries.
+ *    open("ChangeLog") {|f|
+ *      f.slice_before {|line| /\A\S/ =~ line }.each {|e| pp e}
+ *    }
+ *
+ *  If the block needs to maintain state over multiple elements,
+ *  _initial_state_ argument can be used.
+ *  If non-nil value is given,
+ *  it is duplicated for each "each" method invocation of the enumerator.
+ *  The duplicated object is passed to 2nd argument of the block for "slice_before" method..
+ *
+ *  For example, monotonically increasing elements can be chunked as follows.
+ *
+ *    a = [2, 5, 2, 1, 4, 3, 1, 2, 8, 0]
+ *    enum = a.slice_before(n: 0) {|elt, h|
+ *      prev = h[:n]
+ *      h[:n] = elt
+ *      prev > elt
+ *    }
+ *    enum.each {|ary| p ary }
+ *    #=> [2, 5]
+ *    #   [2]
+ *    #   [1, 4]
+ *    #   [3]
+ *    #   [1, 2, 8]
+ *    #   [0]
+ *
+ *
+ *    # parse mbox
+ *    open("mbox") {|f|
+ *      f.slice_before {|line|
+ *        line.start_with? "From "
+ *      }.each {|mail|
+ *        unix_from = mail.shift
+ *        i = mail.index("\n")
+ *        header = mail[0...i]
+ *        body = mail[(i+1)..-1]
+ *        fields = header.slice_before {|line| !" \t".include?(line[0]) }.to_a
+ *        p unix_from
+ *        pp fields
+ *        pp body
+ *      }
+ *    }
+ *
+ *    # split mails in mbox (slice before Unix From line after an empty line)
+ *    open("mbox") {|f|
+ *      f.slice_before(emp: true) {|line,h|
+ *      prevemp = h[:emp]
+ *      h[:emp] = line == "\n"
+ *      prevemp && line.start_with?("From ")
+ *    }.each {|mail|
+ *      pp mail
+ *    }
+ *
+ */
+static VALUE
+enum_slice_before(int argc, VALUE *argv, VALUE enumerable)
+{
+    VALUE initial_state, enumerator;
+
+    rb_scan_args(argc, argv, "01", &initial_state);
+
+    enumerator = rb_obj_alloc(rb_cEnumerator);
+    rb_ivar_set(enumerator, rb_intern("slicebefore_enumerable"), enumerable);
+    rb_ivar_set(enumerator, rb_intern("slicebefore_separator_p"), rb_block_proc());
+    rb_ivar_set(enumerator, rb_intern("slicebefore_initial_state"), initial_state);
+    rb_block_call(enumerator, rb_intern("initialize"), 0, 0, slicebefore_i, enumerator);
+    return enumerator;
+}
+
+/*
+ *  call-seq:
  *     enum.join(sep=$,)    -> str
  *
  *  Returns a string created by converting each element of the
@@ -1881,6 +2204,8 @@
     rb_define_method(rb_mEnumerable, "drop_while", enum_drop_while, 0);
     rb_define_method(rb_mEnumerable, "cycle", enum_cycle, -1);
     rb_define_method(rb_mEnumerable, "join", enum_join, -1);
+    rb_define_method(rb_mEnumerable, "chunk", enum_chunk, -1);
+    rb_define_method(rb_mEnumerable, "slice_before", enum_slice_before, -1);
 
     id_eqq  = rb_intern("===");
     id_each = rb_intern("each");
Index: test/ruby/test_enum.rb
===================================================================
--- test/ruby/test_enum.rb	(revision 25031)
+++ test/ruby/test_enum.rb	(revision 25032)
@@ -315,4 +315,74 @@
   ensure
     $, = ofs
   end
+
+  def test_chunk
+    e = [].chunk {|elt| true }
+    assert_equal([], e.to_a)
+
+    e = @obj.chunk {|elt| elt & 2 == 0 ? false : true }
+    assert_equal([[false, [1]], [true, [2, 3]], [false, [1]], [true, [2]]], e.to_a)
+
+    e = @obj.chunk(acc: 0) {|elt, h| h[:acc] += elt; h[:acc].even? }
+    assert_equal([[false, [1,2]], [true, [3]], [false, [1,2]]], e.to_a)
+    assert_equal([[false, [1,2]], [true, [3]], [false, [1,2]]], e.to_a) # this tests h is duplicated.
+
+    hs = [{}]
+    e = [:foo].chunk(hs[0]) {|elt, h|
+      hs << h
+      true
+    }
+    assert_equal([[true, [:foo]]], e.to_a)
+    assert_equal([[true, [:foo]]], e.to_a)
+    assert_equal([{}, {}, {}], hs)
+    assert_not_same(hs[0], hs[1])
+    assert_not_same(hs[0], hs[2])
+    assert_not_same(hs[1], hs[2])
+
+    e = @obj.chunk {|elt| elt < 3 ? :_alone : true }
+    assert_equal([[:_alone, [1]],
+                  [:_alone, [2]],
+                  [true, [3]],
+                  [:_alone, [1]],
+                  [:_alone, [2]]], e.to_a)
+
+    e = @obj.chunk {|elt| elt == 3 ? :_separator : true }
+    assert_equal([[true, [1, 2]],
+                  [true, [1, 2]]], e.to_a)
+
+    e = @obj.chunk {|elt| elt == 3 ? nil : true }
+    assert_equal([[true, [1, 2]],
+                  [true, [1, 2]]], e.to_a)
+
+    e = @obj.chunk {|elt| :_foo }
+    assert_raise(RuntimeError) { e.to_a }
+  end
+
+  def test_slice_before
+    e = [].slice_before {|elt| true }
+    assert_equal([], e.to_a)
+
+    e = @obj.slice_before {|elt| elt.even? }
+    assert_equal([[1], [2,3,1], [2]], e.to_a)
+
+    e = @obj.slice_before {|elt| elt.odd? }
+    assert_equal([[1,2], [3], [1,2]], e.to_a)
+
+    e = @obj.slice_before(acc: 0) {|elt, h| h[:acc] += elt; h[:acc].even? }
+    assert_equal([[1,2], [3,1,2]], e.to_a)
+    assert_equal([[1,2], [3,1,2]], e.to_a) # this tests h is duplicated.
+
+    hs = [{}]
+    e = [:foo].slice_before(hs[0]) {|elt, h|
+      hs << h
+      true
+    }
+    assert_equal([[:foo]], e.to_a)
+    assert_equal([[:foo]], e.to_a)
+    assert_equal([{}, {}, {}], hs)
+    assert_not_same(hs[0], hs[1])
+    assert_not_same(hs[0], hs[2])
+    assert_not_same(hs[1], hs[2])
+  end
+
 end

--
ML: ruby-changes@q...
Info: http://www.atdot.net/~ko1/quickml/

[前][次][番号順一覧][スレッド一覧]