[前][次][番号順一覧][スレッド一覧]

ruby-changes:7129

From: akr <ko1@a...>
Date: Fri, 15 Aug 2008 23:17:31 +0900 (JST)
Subject: [ruby-changes:7129] Ruby:r18647 (trunk): * transcode.c (econv_just_convert): extracted from rb_econv_output.

akr	2008-08-15 23:17:11 +0900 (Fri, 15 Aug 2008)

  New Revision: 18647

  http://svn.ruby-lang.org/cgi-bin/viewvc.cgi?view=rev&revision=18647

  Log:
    * transcode.c (econv_just_convert): extracted from rb_econv_output.
      (rb_econv_output): use econv_just_convert.
      (econv_primitive_output): new method.

  Modified files:
    trunk/ChangeLog
    trunk/test/ruby/test_econv.rb
    trunk/transcode.c

Index: ChangeLog
===================================================================
--- ChangeLog	(revision 18646)
+++ ChangeLog	(revision 18647)
@@ -1,3 +1,9 @@
+Fri Aug 15 23:07:48 2008  Tanaka Akira  <akr@f...>
+
+	* transcode.c (econv_just_convert): extracted from rb_econv_output.
+	  (rb_econv_output): use econv_just_convert.
+	  (econv_primitive_output): new method.
+
 Fri Aug 15 19:57:01 2008  Tanaka Akira  <akr@f...>
 
 	* include/ruby/encoding.h (rb_econv_output): add str_encoding
Index: test/ruby/test_econv.rb
===================================================================
--- test/ruby/test_econv.rb	(revision 18646)
+++ test/ruby/test_econv.rb	(revision 18647)
@@ -365,4 +365,38 @@
     assert_errinfo(:invalid_byte_sequence, "UTF-16LE", "UTF-8", "\x00\xD8", "@\x00", nil, ec)
     assert_equal("", src)
   end
+
+  def test_output_region
+    ec = Encoding::Converter.new("EUC-JP", "UTF-8")
+    assert_equal(true, ec.primitive_output("abc", dst="", nil, 6))
+    assert_equal("abc", dst)
+    assert_raise(ArgumentError) { ec.primitive_output("abc", dst, 4, 6) }
+    assert_equal(true, ec.primitive_output("def", dst))
+    assert_equal("abcdef", dst)
+    assert_equal(false, ec.primitive_output("ghi", dst, nil, 1))
+    assert_equal("abcdef", dst)
+    assert_raise(ArgumentError) { ec.primitive_output("jkl", dst, -1, 6) }
+    assert_raise(ArgumentError) { ec.primitive_output("hij", dst, nil, -1) }
+    assert_equal("abcdef", dst)
+  end
+
+  def test_output_iso2022jp
+    ec = Encoding::Converter.new("EUC-JP", "ISO-2022-JP")
+    ec.primitive_convert(src="\xa1\xa1", dst="", nil, 10, Encoding::Converter::PARTIAL_INPUT)
+    assert_equal("\e$B!!".force_encoding("ISO-2022-JP"), dst)
+    assert_equal(true, ec.primitive_output("???", dst))
+    assert_equal("\e$B!!\e(B???".force_encoding("ISO-2022-JP"), dst)
+    ec.primitive_convert(src="\xa1\xa2", dst, nil, 10, Encoding::Converter::PARTIAL_INPUT)
+    assert_equal("\e$B!!\e(B???\e$B!\"".force_encoding("ISO-2022-JP"), dst)
+
+    # escape sequences may be reduced in future.
+    assert_equal(true, ec.primitive_output("\xA1\xA1".force_encoding("EUC-JP"), dst))
+    assert_equal("\e$B!!\e(B???\e$B!\"\e(B\e$B!!\e(B".force_encoding("ISO-2022-JP"), dst) 
+
+    ec.primitive_convert(src="\xa1\xa3", dst, nil, 10, Encoding::Converter::PARTIAL_INPUT)
+    assert_equal("\e$B!!\e(B???\e$B!\"\e(B\e$B!!\e(B\e$B!\#".force_encoding("ISO-2022-JP"), dst)
+
+    assert_equal(true, ec.primitive_output("\u3042", dst))
+    assert_equal("\e$B!!\e(B???\e$B!\"\e(B\e$B!!\e(B\e$B!\#\e(B\e$B$\"\e(B".force_encoding("ISO-2022-JP"), dst)
+  end
 end
Index: transcode.c
===================================================================
--- transcode.c	(revision 18646)
+++ transcode.c	(revision 18647)
@@ -1021,6 +1021,47 @@
     return 0;
 }
 
+static ssize_t
+econv_just_convert(const char *src_enc, const char *dst_enc,
+                   const unsigned char *source_string, size_t source_len,
+                   unsigned char *buf, size_t bufsize)
+{
+    rb_econv_t *ec;
+    const unsigned char *src_ptr;
+    unsigned char *dst_ptr;
+    size_t dst_len;
+    rb_econv_result_t res;
+    const unsigned char *source_end = source_string + source_len;
+
+    ec = rb_econv_open(src_enc, dst_enc, 0);
+    if (!ec)
+        return -1;
+
+    src_ptr = source_string;
+    dst_len = 0;
+    do {
+        dst_ptr = buf;
+        res = rb_econv_convert(ec, &src_ptr, source_end, &dst_ptr, buf+bufsize, 0);
+        if (dst_len + (dst_ptr - buf) < dst_len)
+            goto convfail;
+        dst_len += dst_ptr - buf;
+        if (SSIZE_MAX < dst_len)
+            goto convfail;
+    } while (res == econv_destination_buffer_full);
+
+    if (res != econv_finished)
+        goto convfail;
+
+    rb_econv_close(ec);
+
+    return dst_len;
+
+convfail:
+    if (ec)
+        rb_econv_close(ec);
+    return -1;
+}
+
 /* result: 0:success -1:failure -2:conversion-failure-to-destination-encoding */
 int
 rb_econv_output(rb_econv_t *ec,
@@ -1028,12 +1069,9 @@
     unsigned char **destination_buffer_ptr, unsigned char *destination_buffer_end,
     size_t *required_size)
 {
-    rb_econv_t *from_ascii = NULL;;
-    unsigned char buf[1024], *buf2;
-    size_t dst_len;
-    const unsigned char *src_ptr;
-    unsigned char *dst_ptr;
-    rb_econv_result_t res;
+    const char *dst_enc;
+    unsigned char buf[1024], *buf2 = NULL;
+    ssize_t dst_len;
     int ret;
 
     if (encoding_equal(str_encoding, ec->last_tc->transcoder->to_encoding)) {
@@ -1043,52 +1081,34 @@
     if (required_size)
         *required_size = 0;
 
-    from_ascii = rb_econv_open(str_encoding, ec->last_tc->transcoder->to_encoding, 0);
-    if (!from_ascii)
+    dst_enc = ec->last_tc->transcoder->to_encoding;
+
+    dst_len = econv_just_convert(str_encoding, dst_enc,
+                                 str, str_len, buf, sizeof(buf));
+    if (dst_len < 0)
         return -2;
 
-    src_ptr = str;
-    dst_len = 0;
-    do {
-        dst_ptr = buf;
-        res = rb_econv_convert(from_ascii, &src_ptr, str+str_len, &dst_ptr, buf+sizeof(buf), 0);
-        if (dst_len + (dst_ptr - buf) < dst_len)
-            goto convfail;
-        dst_len += dst_ptr - buf;
-    } while (res == econv_destination_buffer_full);
-
-    if (res != econv_finished)
-        goto convfail;
-
-    rb_econv_close(from_ascii);
-    from_ascii = NULL;
-
     if (dst_len <= sizeof(buf)) {
-        return rb_econv_output_with_destination_encoding(ec, buf, dst_len, destination_buffer_ptr, destination_buffer_end, required_size);
+        return rb_econv_output_with_destination_encoding(ec, buf, dst_len,
+                destination_buffer_ptr, destination_buffer_end, required_size);
     }
 
     buf2 = xmalloc(dst_len);
 
-    from_ascii = rb_econv_open(str_encoding, ec->last_tc->transcoder->to_encoding, 0);
-    if (!from_ascii)
+    dst_len = econv_just_convert(str_encoding, dst_enc,
+                                 str, str_len, buf2, dst_len);
+    if (dst_len < 0)
         goto convfail;
 
-    src_ptr = str;
-    dst_ptr = buf2;
-    res = rb_econv_convert(from_ascii, &src_ptr, str+str_len, &dst_ptr, buf2+dst_len, 0);
-    if (res != econv_finished)
-        goto convfail;
-    rb_econv_close(from_ascii);
-    from_ascii = NULL;
+    ret = rb_econv_output_with_destination_encoding(ec, buf2, dst_len,
+            destination_buffer_ptr, destination_buffer_end, required_size);
 
-    ret = rb_econv_output_with_destination_encoding(ec, buf2, dst_len, destination_buffer_ptr, destination_buffer_end, required_size);
-
     xfree(buf2);
     return ret;
 
 convfail:
-    if (from_ascii)
-        rb_econv_close(from_ascii);
+    if (buf2)
+        xfree(buf2);
     return -2;
 }
 
@@ -1934,6 +1954,84 @@
     return ary;
 }
 
+static VALUE
+econv_primitive_output(int argc, VALUE *argv, VALUE self)
+{
+    volatile VALUE string, output;
+    VALUE output_byteoffset_v, output_bytesize_v;
+    long output_byteoffset, output_bytesize;
+    unsigned long output_byteend;
+
+    unsigned char *dst_start, *dst_ptr;
+    int ret;
+    size_t required_size;
+
+    rb_econv_t *ec = check_econv(self);
+
+    rb_scan_args(argc, argv, "22", &string, &output, &output_byteoffset_v, &output_bytesize_v);
+
+    StringValue(string);
+    string = rb_str_transcode(string, rb_enc_from_encoding(ec->destination_encoding));
+
+    if (NIL_P(output_byteoffset_v))
+        output_byteoffset = 0;
+    else
+        output_byteoffset = NUM2LONG(output_byteoffset_v);
+
+    if (NIL_P(output_bytesize_v))
+        output_bytesize = 0;
+    else
+        output_bytesize = NUM2LONG(output_bytesize_v);
+
+    StringValue(output);
+    StringValue(string);
+    rb_str_modify(output);
+
+    if (output_byteoffset_v == Qnil)
+        output_byteoffset = RSTRING_LEN(output);
+
+    if (output_byteoffset < 0)
+        rb_raise(rb_eArgError, "negative output_byteoffset");
+
+    if (RSTRING_LEN(output) < output_byteoffset)
+        rb_raise(rb_eArgError, "output_byteoffset too big");
+
+    if (output_bytesize < 0)
+        rb_raise(rb_eArgError, "negative output_bytesize");
+
+    if (output_bytesize == 0) {
+        output_byteend = ec->last_tc->transcoder->max_output;
+        output_byteend += (unsigned long)RSTRING_LEN(string);
+        if (output_byteend < (unsigned long)RSTRING_LEN(string) ||
+            LONG_MAX < output_byteend)
+            rb_raise(rb_eArgError, "max_output + string.bytesize too big");
+    }
+    else {
+        output_byteend = (unsigned long)output_bytesize;
+    }
+
+    output_byteend += (unsigned long)output_byteoffset;
+    if (output_byteend < (unsigned long)output_byteoffset ||
+        LONG_MAX < output_byteend)
+        rb_raise(rb_eArgError, "output_byteoffset+output_bytesize too big");
+
+    if (rb_str_capacity(output) < output_byteend)
+        rb_str_resize(output, output_byteend);
+
+    dst_start = dst_ptr = (unsigned char *)RSTRING_PTR(output)+output_byteoffset;
+    ret = rb_econv_output_with_destination_encoding(ec,
+            (unsigned char *)RSTRING_PTR(string), RSTRING_LEN(string),
+            &dst_ptr, (unsigned char *)RSTRING_PTR(output)+output_byteend,
+            &required_size);
+
+    rb_str_set_len(output, dst_ptr - (unsigned char *)RSTRING_PTR(output));
+
+    if (ret == 0)
+        return Qtrue;
+
+    return Qfalse;
+}
+
 void
 Init_transcode(void)
 {
@@ -1958,6 +2056,7 @@
     rb_define_method(rb_cEncodingConverter, "destination_encoding", econv_destination_encoding, 0);
     rb_define_method(rb_cEncodingConverter, "primitive_convert", econv_primitive_convert, -1);
     rb_define_method(rb_cEncodingConverter, "primitive_errinfo", econv_primitive_errinfo, 0);
+    rb_define_method(rb_cEncodingConverter, "primitive_output", econv_primitive_output, -1);
     rb_define_const(rb_cEncodingConverter, "PARTIAL_INPUT", INT2FIX(ECONV_PARTIAL_INPUT));
     rb_define_const(rb_cEncodingConverter, "OUTPUT_FOLLOWED_BY_INPUT", INT2FIX(ECONV_OUTPUT_FOLLOWED_BY_INPUT));
     rb_define_const(rb_cEncodingConverter, "UNIVERSAL_NEWLINE_DECODER", INT2FIX(ECONV_UNIVERSAL_NEWLINE_DECODER));

--
ML: ruby-changes@q...
Info: http://www.atdot.net/~ko1/quickml/

[前][次][番号順一覧][スレッド一覧]