[前][次][番号順一覧][スレッド一覧]

ruby-changes:44886

From: naruse <ko1@a...>
Date: Thu, 1 Dec 2016 23:18:37 +0900 (JST)
Subject: [ruby-changes:44886] naruse:r56959 (trunk): String#unpack1 [Feature #12752]

naruse	2016-12-01 23:18:32 +0900 (Thu, 01 Dec 2016)

  New Revision: 56959

  https://svn.ruby-lang.org/cgi-bin/viewvc.cgi?view=revision&revision=56959

  Log:
    String#unpack1 [Feature #12752]
    
    Returns the first value of String#unpack.

  Modified files:
    trunk/NEWS
    trunk/pack.c
    trunk/test/ruby/test_pack.rb
Index: test/ruby/test_pack.rb
===================================================================
--- test/ruby/test_pack.rb	(revision 56958)
+++ test/ruby/test_pack.rb	(revision 56959)
@@ -837,4 +837,11 @@ EXPECTED https://github.com/ruby/ruby/blob/trunk/test/ruby/test_pack.rb#L837
 
     assert_equal addr, [buf].pack('p')
   end
+
+  def test_unpack1
+    assert_equal 65, "A".unpack1("C")
+    assert_equal 68, "ABCD".unpack1("x3C")
+    assert_equal 0x3042, "\u{3042 3044 3046}".unpack1("U*")
+    assert_equal "hogefuga", "aG9nZWZ1Z2E=".unpack1("m")
+  end
 end
Index: pack.c
===================================================================
--- pack.c	(revision 56958)
+++ pack.c	(revision 56959)
@@ -1021,7 +1021,7 @@ hex2num(char c) https://github.com/ruby/ruby/blob/trunk/pack.c#L1021
 } while (0)
 
 #define PACK_ITEM_ADJUST() do { \
-    if (tmp_len > 0 && !block_p) \
+    if (tmp_len > 0 && mode == UNPACK_ARRAY) \
 	rb_ary_store(ary, RARRAY_LEN(ary)+tmp_len-1, Qnil); \
 } while (0)
 
@@ -1043,128 +1043,13 @@ infected_str_new(const char *ptr, long l https://github.com/ruby/ruby/blob/trunk/pack.c#L1043
     return s;
 }
 
-/*
- *  call-seq:
- *     str.unpack(format)    ->  anArray
- *
- *  Decodes <i>str</i> (which may contain binary data) according to the
- *  format string, returning an array of each value extracted. The
- *  format string consists of a sequence of single-character directives,
- *  summarized in the table at the end of this entry.
- *  Each directive may be followed
- *  by a number, indicating the number of times to repeat with this
- *  directive. An asterisk (``<code>*</code>'') will use up all
- *  remaining elements. The directives <code>sSiIlL</code> may each be
- *  followed by an underscore (``<code>_</code>'') or
- *  exclamation mark (``<code>!</code>'') to use the underlying
- *  platform's native size for the specified type; otherwise, it uses a
- *  platform-independent consistent size. Spaces are ignored in the
- *  format string. See also <code>Array#pack</code>.
- *
- *     "abc \0\0abc \0\0".unpack('A6Z6')   #=> ["abc", "abc "]
- *     "abc \0\0".unpack('a3a3')           #=> ["abc", " \000\000"]
- *     "abc \0abc \0".unpack('Z*Z*')       #=> ["abc ", "abc "]
- *     "aa".unpack('b8B8')                 #=> ["10000110", "01100001"]
- *     "aaa".unpack('h2H2c')               #=> ["16", "61", 97]
- *     "\xfe\xff\xfe\xff".unpack('sS')     #=> [-2, 65534]
- *     "now=20is".unpack('M*')             #=> ["now is"]
- *     "whole".unpack('xax2aX2aX1aX2a')    #=> ["h", "e", "l", "l", "o"]
- *
- *  This table summarizes the various formats and the Ruby classes
- *  returned by each.
- *
- *   Integer       |         |
- *   Directive     | Returns | Meaning
- *   ------------------------------------------------------------------
- *   C             | Integer | 8-bit unsigned (unsigned char)
- *   S             | Integer | 16-bit unsigned, native endian (uint16_t)
- *   L             | Integer | 32-bit unsigned, native endian (uint32_t)
- *   Q             | Integer | 64-bit unsigned, native endian (uint64_t)
- *   J             | Integer | pointer width unsigned, native endian (uintptr_t)
- *                 |         | (J is available since Ruby 2.3.)
- *                 |         |
- *   c             | Integer | 8-bit signed (signed char)
- *   s             | Integer | 16-bit signed, native endian (int16_t)
- *   l             | Integer | 32-bit signed, native endian (int32_t)
- *   q             | Integer | 64-bit signed, native endian (int64_t)
- *   j             | Integer | pointer width signed, native endian (intptr_t)
- *                 |         | (j is available since Ruby 2.3.)
- *                 |         |
- *   S_ S!         | Integer | unsigned short, native endian
- *   I I_ I!       | Integer | unsigned int, native endian
- *   L_ L!         | Integer | unsigned long, native endian
- *   Q_ Q!         | Integer | unsigned long long, native endian (ArgumentError
- *                 |         | if the platform has no long long type.)
- *                 |         | (Q_ and Q! is available since Ruby 2.1.)
- *   J!            | Integer | uintptr_t, native endian (same with J)
- *                 |         | (J! is available since Ruby 2.3.)
- *                 |         |
- *   s_ s!         | Integer | signed short, native endian
- *   i i_ i!       | Integer | signed int, native endian
- *   l_ l!         | Integer | signed long, native endian
- *   q_ q!         | Integer | signed long long, native endian (ArgumentError
- *                 |         | if the platform has no long long type.)
- *                 |         | (q_ and q! is available since Ruby 2.1.)
- *   j!            | Integer | intptr_t, native endian (same with j)
- *                 |         | (j! is available since Ruby 2.3.)
- *                 |         |
- *   S> s> S!> s!> | Integer | same as the directives without ">" except
- *   L> l> L!> l!> |         | big endian
- *   I!> i!>       |         | (available since Ruby 1.9.3)
- *   Q> q> Q!> q!> |         | "S>" is same as "n"
- *   J> j> J!> j!> |         | "L>" is same as "N"
- *                 |         |
- *   S< s< S!< s!< | Integer | same as the directives without "<" except
- *   L< l< L!< l!< |         | little endian
- *   I!< i!<       |         | (available since Ruby 1.9.3)
- *   Q< q< Q!< q!< |         | "S<" is same as "v"
- *   J< j< J!< j!< |         | "L<" is same as "V"
- *                 |         |
- *   n             | Integer | 16-bit unsigned, network (big-endian) byte order
- *   N             | Integer | 32-bit unsigned, network (big-endian) byte order
- *   v             | Integer | 16-bit unsigned, VAX (little-endian) byte order
- *   V             | Integer | 32-bit unsigned, VAX (little-endian) byte order
- *                 |         |
- *   U             | Integer | UTF-8 character
- *   w             | Integer | BER-compressed integer (see Array.pack)
- *
- *   Float        |         |
- *   Directive    | Returns | Meaning
- *   -----------------------------------------------------------------
- *   D d          | Float   | double-precision, native format
- *   F f          | Float   | single-precision, native format
- *   E            | Float   | double-precision, little-endian byte order
- *   e            | Float   | single-precision, little-endian byte order
- *   G            | Float   | double-precision, network (big-endian) byte order
- *   g            | Float   | single-precision, network (big-endian) byte order
- *
- *   String       |         |
- *   Directive    | Returns | Meaning
- *   -----------------------------------------------------------------
- *   A            | String  | arbitrary binary string (remove trailing nulls and ASCII spaces)
- *   a            | String  | arbitrary binary string
- *   Z            | String  | null-terminated string
- *   B            | String  | bit string (MSB first)
- *   b            | String  | bit string (LSB first)
- *   H            | String  | hex string (high nibble first)
- *   h            | String  | hex string (low nibble first)
- *   u            | String  | UU-encoded string
- *   M            | String  | quoted-printable, MIME encoding (see RFC2045)
- *   m            | String  | base64 encoded string (RFC 2045) (default)
- *                |         | base64 encoded string (RFC 4648) if followed by 0
- *   P            | String  | pointer to a structure (fixed-length string)
- *   p            | String  | pointer to a null-terminated string
- *
- *   Misc.        |         |
- *   Directive    | Returns | Meaning
- *   -----------------------------------------------------------------
- *   @            | ---     | skip to the offset given by the length argument
- *   X            | ---     | skip backward one byte
- *   x            | ---     | skip forward one byte
- */
+/* unpack mode */
+#define UNPACK_ARRAY 0
+#define UNPACK_BLOCK 1
+#define UNPACK_1 2
 
 static VALUE
-pack_unpack(VALUE str, VALUE fmt)
+pack_unpack_internal(VALUE str, VALUE fmt, int mode)
 {
 #define hexdigits ruby_hexdigits
     char *s, *send;
@@ -1177,16 +1062,18 @@ pack_unpack(VALUE str, VALUE fmt) https://github.com/ruby/ruby/blob/trunk/pack.c#L1062
 #ifdef NATINT_PACK
     int natint;			/* native integer */
 #endif
-    int block_p = rb_block_given_p();
     int signed_p, integer_size, bigendian_p;
 #define UNPACK_PUSH(item) do {\
 	VALUE item_val = (item);\
-	if (block_p) {\
+	if ((mode) == UNPACK_BLOCK) {\
 	    rb_yield(item_val);\
 	}\
-	else {\
+	else if ((mode) == UNPACK_ARRAY) {\
 	    rb_ary_push(ary, item_val);\
 	}\
+	else /* if ((mode) == UNPACK_1) { */ {\
+	    return item_val; \
+	}\
     } while (0)
 
     StringValue(str);
@@ -1196,7 +1083,7 @@ pack_unpack(VALUE str, VALUE fmt) https://github.com/ruby/ruby/blob/trunk/pack.c#L1083
     p = RSTRING_PTR(fmt);
     pend = p + RSTRING_LEN(fmt);
 
-    ary = block_p ? Qnil : rb_ary_new();
+    ary = mode == UNPACK_ARRAY ? rb_ary_new() : Qnil;
     while (p < pend) {
 	int explicit_endian = 0;
 	type = *p++;
@@ -1868,6 +1755,148 @@ pack_unpack(VALUE str, VALUE fmt) https://github.com/ruby/ruby/blob/trunk/pack.c#L1755
     return ary;
 }
 
+/*
+ *  call-seq:
+ *     str.unpack(format)    ->  anArray
+ *
+ *  Decodes <i>str</i> (which may contain binary data) according to the
+ *  format string, returning an array of each value extracted. The
+ *  format string consists of a sequence of single-character directives,
+ *  summarized in the table at the end of this entry.
+ *  Each directive may be followed
+ *  by a number, indicating the number of times to repeat with this
+ *  directive. An asterisk (``<code>*</code>'') will use up all
+ *  remaining elements. The directives <code>sSiIlL</code> may each be
+ *  followed by an underscore (``<code>_</code>'') or
+ *  exclamation mark (``<code>!</code>'') to use the underlying
+ *  platform's native size for the specified type; otherwise, it uses a
+ *  platform-independent consistent size. Spaces are ignored in the
+ *  format string. See also <code>String#unpack1</code>,  <code>Array#pack</code>.
+ *
+ *     "abc \0\0abc \0\0".unpack('A6Z6')   #=> ["abc", "abc "]
+ *     "abc \0\0".unpack('a3a3')           #=> ["abc", " \000\000"]
+ *     "abc \0abc \0".unpack('Z*Z*')       #=> ["abc ", "abc "]
+ *     "aa".unpack('b8B8')                 #=> ["10000110", "01100001"]
+ *     "aaa".unpack('h2H2c')               #=> ["16", "61", 97]
+ *     "\xfe\xff\xfe\xff".unpack('sS')     #=> [-2, 65534]
+ *     "now=20is".unpack('M*')             #=> ["now is"]
+ *     "whole".unpack('xax2aX2aX1aX2a')    #=> ["h", "e", "l", "l", "o"]
+ *
+ *  This table summarizes the various formats and the Ruby classes
+ *  returned by each.
+ *
+ *   Integer       |         |
+ *   Directive     | Returns | Meaning
+ *   ------------------------------------------------------------------
+ *   C             | Integer | 8-bit unsigned (unsigned char)
+ *   S             | Integer | 16-bit unsigned, native endian (uint16_t)
+ *   L             | Integer | 32-bit unsigned, native endian (uint32_t)
+ *   Q             | Integer | 64-bit unsigned, native endian (uint64_t)
+ *   J             | Integer | pointer width unsigned, native endian (uintptr_t)
+ *                 |         |
+ *   c             | Integer | 8-bit signed (signed char)
+ *   s             | Integer | 16-bit signed, native endian (int16_t)
+ *   l             | Integer | 32-bit signed, native endian (int32_t)
+ *   q             | Integer | 64-bit signed, native endian (int64_t)
+ *   j             | Integer | pointer width signed, native endian (intptr_t)
+ *                 |         |
+ *   S_ S!         | Integer | unsigned short, native endian
+ *   I I_ I!       | Integer | unsigned int, native endian
+ *   L_ L!         | Integer | unsigned long, native endian
+ *   Q_ Q!         | Integer | unsigned long long, native endian (ArgumentError
+ *                 |         | if the platform has no long long type.)
+ *   J!            | Integer | uintptr_t, native endian (same with J)
+ *                 |         |
+ *   s_ s!         | Integer | signed short, native endian
+ *   i i_ i!       | Integer | signed int, native endian
+ *   l_ l!         | Integer | signed long, native endian
+ *   q_ q!         | Integer | signed long long, native endian (ArgumentError
+ *                 |         | if the platform has no long long type.)
+ *   j!            | Integer | intptr_t, native endian (same with j)
+ *                 |         |
+ *   S> s> S!> s!> | Integer | same as the directives without ">" except
+ *   L> l> L!> l!> |         | big endian
+ *   I!> i!>       |         |
+ *   Q> q> Q!> q!> |         | "S>" is same as "n"
+ *   J> j> J!> j!> |         | "L>" is same as "N"
+ *                 |         |
+ *   S< s< S!< s!< | Integer | same as the directives without "<" except
+ *   L< l< L!< l!< |         | little endian
+ *   I!< i!<       |         |
+ *   Q< q< Q!< q!< |         | "S<" is same as "v"
+ *   J< j< J!< j!< |         | "L<" is same as "V"
+ *                 |         |
+ *   n             | Integer | 16-bit unsigned, network (big-endian) byte order
+ *   N             | Integer | 32-bit unsigned, network (big-endian) byte order
+ *   v             | Integer | 16-bit unsigned, VAX (little-endian) byte order
+ *   V             | Integer | 32-bit unsigned, VAX (little-endian) byte order
+ *                 |         |
+ *   U             | Integer | UTF-8 character
+ *   w             | Integer | BER-compressed integer (see Array.pack)
+ *
+ *   Float        |         |
+ *   Directive    | Returns | Meaning
+ *   -----------------------------------------------------------------
+ *   D d          | Float   | double-precision, native format
+ *   F f          | Float   | single-precision, native format
+ *   E            | Float   | double-precision, little-endian byte order
+ *   e            | Float   | single-precision, little-endian byte order
+ *   G            | Float   | double-precision, network (big-endian) byte order
+ *   g            | Float   | single-precision, network (big-endian) byte order
+ *
+ *   String       |         |
+ *   Directive    | Returns | Meaning
+ *   -----------------------------------------------------------------
+ *   A            | String  | arbitrary binary string (remove trailing nulls and ASCII spaces)
+ *   a            | String  | arbitrary binary string
+ *   Z            | String  | null-terminated string
+ *   B            | String  | bit string (MSB first)
+ *   b            | String  | bit string (LSB first)
+ *   H            | String  | hex string (high nibble first)
+ *   h            | String  | hex string (low nibble first)
+ *   u            | String  | UU-encoded string
+ *   M            | String  | quoted-printable, MIME encoding (see RFC2045)
+ *   m            | String  | base64 encoded string (RFC 2045) (default)
+ *                |         | base64 encoded string (RFC 4648) if followed by 0
+ *   P            | String  | pointer to a structure (fixed-length string)
+ *   p            | String  | pointer to a null-terminated string
+ *
+ *   Misc.        |         |
+ *   Directive    | Returns | Meaning
+ *   -----------------------------------------------------------------
+ *   @            | ---     | skip to the offset given by the length argument
+ *   X            | ---     | skip backward one byte
+ *   x            | ---     | skip forward one byte
+ *
+ *  HISTORY
+ *
+ *  * J, J! j, and j! are available since Ruby 2.3.
+ *  * Q_, Q!, q_, and q! are available since Ruby 2.1.
+ *  * I!<, i!<, I!>, and i!> are available since Ruby 1.9.3.
+ */
+
+static VALUE
+pack_unpack(VALUE str, VALUE fmt)
+{
+    int mode = rb_block_given_p() ? UNPACK_BLOCK : UNPACK_ARRAY;
+    return pack_unpack_internal(str, fmt, mode);
+}
+
+/*
+ *  call-seq:
+ *     str.unpack1(format)    ->  obj
+ *
+ *  Decodes <i>str</i> (which may contain binary data) according to the
+ *  format string, returning the first value extracted.
+ *  See also <code>String#unpack</code>, <code>Array#pack</code>.
+ */
+
+static VALUE
+pack_unpack1(VALUE str, VALUE fmt)
+{
+    return pack_unpack_internal(str, fmt, UNPACK_1);
+}
+
 int
 rb_uv_to_utf8(char buf[6], unsigned long uv)
 {
@@ -1980,6 +2009,7 @@ Init_pack(void) https://github.com/ruby/ruby/blob/trunk/pack.c#L2009
 {
     rb_define_method(rb_cArray, "pack", pack_pack, -1);
     rb_define_method(rb_cString, "unpack", pack_unpack, 1);
+    rb_define_method(rb_cString, "unpack1", pack_unpack1, 1);
 
     id_associated = rb_make_internal_id();
 }
Index: NEWS
===================================================================
--- NEWS	(revision 56958)
+++ NEWS	(revision 56959)
@@ -159,6 +159,8 @@ with all sufficient information, see the https://github.com/ruby/ruby/blob/trunk/NEWS#L159
   * String#concat, String#prepend [Feature #12333]
     Now takes multiple arguments.
 
+  * String#unpack1 [Feature #12752]
+
 * Symbol
 
   * Symbol#casecmp? [Feature #12786]

--
ML: ruby-changes@q...
Info: http://www.atdot.net/~ko1/quickml/

[前][次][番号順一覧][スレッド一覧]