[前][次][番号順一覧][スレッド一覧]

ruby-changes:7587

From: akr <ko1@a...>
Date: Thu, 4 Sep 2008 01:36:20 +0900 (JST)
Subject: [ruby-changes:7587] Ruby:r19108 (trunk): * include/ruby/encoding.h (rb_econv_set_replacemenet): declared.

akr	2008-09-04 01:34:11 +0900 (Thu, 04 Sep 2008)

  New Revision: 19108

  http://svn.ruby-lang.org/cgi-bin/viewvc.cgi?view=rev&revision=19108

  Log:
    * include/ruby/encoding.h (rb_econv_set_replacemenet): declared.
    
    * transcode.c (rb_econv_t): new fields: replacement_str,
      replacement_len, replacement_enc and replacement_allocated.
      (get_replacement_character): make len as size_t.
      (rb_econv_open_by_transcoder_entries): initialize the new fields.
      (rb_econv_close): deallocate replacement_str if it allocated.
      (make_replacement): new function.
      (output_replacement_character): use make_replacement.
      (rb_econv_set_replacemenet): defined.
      (econv_get_replacement): new method.
      (econv_set_replacement): new method.

  Modified files:
    trunk/ChangeLog
    trunk/include/ruby/encoding.h
    trunk/test/ruby/test_econv.rb
    trunk/transcode.c

Index: include/ruby/encoding.h
===================================================================
--- include/ruby/encoding.h	(revision 19107)
+++ include/ruby/encoding.h	(revision 19108)
@@ -219,6 +219,9 @@
     int flags);
 void rb_econv_close(rb_econv_t *ec);
 
+/* result: 0:success -1:failure */
+int rb_econv_set_replacemenet(rb_econv_t *ec, const unsigned char *str, size_t len, const char *encname);
+
 VALUE rb_econv_open_exc(const char *senc, const char *denc, int ecflags);
 
 /* result: 0:success -1:failure */
Index: ChangeLog
===================================================================
--- ChangeLog	(revision 19107)
+++ ChangeLog	(revision 19108)
@@ -1,3 +1,18 @@
+Thu Sep  4 01:30:26 2008  Tanaka Akira  <akr@f...>
+
+	* include/ruby/encoding.h (rb_econv_set_replacemenet): declared.
+
+	* transcode.c (rb_econv_t): new fields: replacement_str,
+	  replacement_len, replacement_enc and replacement_allocated.
+	  (get_replacement_character): make len as size_t.
+	  (rb_econv_open_by_transcoder_entries): initialize the new fields.
+	  (rb_econv_close): deallocate replacement_str if it allocated.
+	  (make_replacement): new function.
+	  (output_replacement_character): use make_replacement.
+	  (rb_econv_set_replacemenet): defined.
+	  (econv_get_replacement): new method.
+	  (econv_set_replacement): new method.
+
 Thu Sep  4 01:12:03 2008  NAKAMURA Usaku  <usa@r...>
 
 	* win32/win32.c (filetime_to_timeval): new function, split from
Index: test/ruby/test_econv.rb
===================================================================
--- test/ruby/test_econv.rb	(revision 19107)
+++ test/ruby/test_econv.rb	(revision 19108)
@@ -640,4 +640,18 @@
     assert_kind_of(Encoding::ConversionUndefined, err)
     assert_equal("\u{3042}", err.error_char)
   end
+
+  def test_get_replacement
+    ec = Encoding::Converter.new("euc-jp", "iso-8859-1")
+    assert_equal("?", ec.replacement)
+
+    ec = Encoding::Converter.new("euc-jp", "utf-8")
+    assert_equal("\uFFFD", ec.replacement)
+  end
+
+  def test_set_replacement
+    ec = Encoding::Converter.new("utf-8", "us-ascii", Encoding::Converter::UNDEF_REPLACE)
+    ec.replacement = "<undef>"
+    assert_equal("a <undef> b", ec.convert("a \u3042 b"))
+  end
 end
Index: transcode.c
===================================================================
--- transcode.c	(revision 19107)
+++ transcode.c	(revision 19108)
@@ -87,6 +87,11 @@
     const char *source_encoding_name;
     const char *destination_encoding_name;
 
+    const unsigned char *replacement_str;
+    size_t replacement_len;
+    const char *replacement_enc;
+    int replacement_allocated;
+
     unsigned char *in_buf_start;
     unsigned char *in_data_start;
     unsigned char *in_data_end;
@@ -357,7 +362,7 @@
 }
 
 static const char*
-get_replacement_character(rb_encoding *enc, int *len_ret, const char **repl_enc_ptr)
+get_replacement_character(rb_encoding *enc, size_t *len_ret, const char **repl_enc_ptr)
 {
     static rb_encoding *utf16be_encoding, *utf16le_encoding;
     static rb_encoding *utf32be_encoding, *utf32le_encoding;
@@ -793,6 +798,9 @@
     ec->flags = 0;
     ec->source_encoding_name = NULL;
     ec->destination_encoding_name = NULL;
+    ec->replacement_str = NULL;
+    ec->replacement_len = 0;
+    ec->replacement_allocated = 0;
     ec->in_buf_start = NULL;
     ec->in_data_start = NULL;
     ec->in_data_end = NULL;
@@ -1481,6 +1489,9 @@
 {
     int i;
 
+    if (ec->replacement_allocated) {
+        xfree((void *)ec->replacement_str);
+    }
     for (i = 0; i < ec->num_trans; i++) {
         rb_transcoding_close(ec->elems[i].tc);
         if (ec->elems[i].out_buf_start)
@@ -1773,16 +1784,20 @@
 }
 
 static int
-output_replacement_character(rb_econv_t *ec)
+make_replacement(rb_econv_t *ec)
 {
     rb_transcoding *tc;
     const rb_transcoder *tr;
     rb_encoding *enc;
     const unsigned char *replacement;
     const char *repl_enc;
-    int len;
-    int ret;
+    const char *ins_enc;
+    size_t len;
+    int allocated = 0;
 
+    if (ec->replacement_str)
+        return 0;
+
     tc = ec->last_tc;
     if (tc) {
         tr = tc->transcoder;
@@ -1795,7 +1810,62 @@
         repl_enc = "";
     }
 
-    ret = rb_econv_insert_output(ec, replacement, len, repl_enc);
+    ins_enc = rb_econv_encoding_to_insert_output(ec);
+    if (*repl_enc && !encoding_equal(repl_enc, ins_enc)) {
+        replacement = allocate_converted_string(repl_enc, ins_enc, replacement, len, &len);
+        if (!replacement)
+            return -1;
+        allocated = 1;
+        repl_enc = ins_enc;
+    }
+    ec->replacement_str = replacement;
+    ec->replacement_len = len;
+    ec->replacement_enc = repl_enc;
+    ec->replacement_allocated = allocated;
+    return 0;
+}
+
+int
+rb_econv_set_replacemenet(rb_econv_t *ec,
+    const unsigned char *str, size_t len, const char *encname)
+{
+    unsigned char *str2;
+    size_t len2;
+    const char *encname2;
+
+    encname2 = rb_econv_encoding_to_insert_output(ec);
+
+    if (encoding_equal(encname, encname2)) {
+        str2 = xmalloc(len);
+        MEMCPY(str2, str, unsigned char, len); /* xxx: str may be invalid */
+        len2 = len;
+        encname2 = encname;
+    }
+    else {
+        str2 = allocate_converted_string(encname, encname2, str, len, &len2);
+        if (!str2)
+            return -1;
+    }
+
+    if (ec->replacement_allocated) {
+        xfree((void *)ec->replacement_str);
+    }
+    ec->replacement_allocated = 1;
+    ec->replacement_str = str2;
+    ec->replacement_len = len2;
+    ec->replacement_enc = encname2;
+    return 0;
+}
+
+static int
+output_replacement_character(rb_econv_t *ec)
+{
+    int ret;
+
+    if (make_replacement(ec) == -1)
+        return -1;
+
+    ret = rb_econv_insert_output(ec, ec->replacement_str, ec->replacement_len, ec->replacement_enc);
     if (ret == -1)
         return -1;
 
@@ -2898,11 +2968,11 @@
  * :invalid_byte_sequence, :incomplete_input and :undefined_conversion for
  * Encoding::Converter#primitive_convert.
  *
- * ec = Encoding::Converter.new("utf-8", "iso-8859-1")
- * p ec.primitive_convert(src="\xf1abcd", dst="")       #=> :invalid_byte_sequence
- * p ec.last_error      #=> #<Encoding::InvalidByteSequence: "\xF1" followed by "a" on UTF-8>
- * p ec.primitive_convert(src, dst, nil, 1)             #=> :destination_buffer_full
- * p ec.last_error      #=> nil
+ *  ec = Encoding::Converter.new("utf-8", "iso-8859-1")
+ *  p ec.primitive_convert(src="\xf1abcd", dst="")       #=> :invalid_byte_sequence
+ *  p ec.last_error      #=> #<Encoding::InvalidByteSequence: "\xF1" followed by "a" on UTF-8>
+ *  p ec.primitive_convert(src, dst, nil, 1)             #=> :destination_buffer_full
+ *  p ec.last_error      #=> nil
  *
  */
 static VALUE
@@ -2917,6 +2987,68 @@
     return exc;
 }
 
+/*
+ * call-seq:
+ *   ec.replacement -> string
+ *
+ * returns the replacement string.
+ *
+ *  ec = Encoding::Converter.new("euc-jp", "us-ascii")
+ *  p ec.replacement    #=> "?"
+ *
+ *  ec = Encoding::Converter.new("euc-jp", "utf-8")
+ *  p ec.replacement    #=> "\uFFFD"
+ */
+static VALUE
+econv_get_replacement(VALUE self)
+{
+    rb_econv_t *ec = check_econv(self);
+    int ret;
+    rb_encoding *enc;
+
+    ret = make_replacement(ec);
+    if (ret == -1) {
+        rb_raise(rb_eConversionUndefined, "replacement character setup failed");
+    }
+
+    enc = rb_enc_find(ec->replacement_enc);
+    return rb_enc_str_new((const char *)ec->replacement_str, (long)ec->replacement_len, enc);
+}
+
+/*
+ * call-seq:
+ *   ec.replacement = string
+ *
+ * sets the replacement string.
+ *
+ *  ec = Encoding::Converter.new("utf-8", "us-ascii", Encoding::Converter::UNDEF_REPLACE)
+ *  ec.replacement = "<undef>"
+ *  p ec.convert("a \u3042 b")      #=> "a <undef> b"
+ */
+static VALUE
+econv_set_replacement(VALUE self, VALUE arg)
+{
+    rb_econv_t *ec = check_econv(self);
+    VALUE string = arg;
+    int ret;
+    rb_encoding *enc;
+
+    StringValue(string);
+    enc = rb_enc_get(string);
+
+    ret = rb_econv_set_replacemenet(ec,
+            (const unsigned char *)RSTRING_PTR(string),
+            RSTRING_LEN(string),
+            enc->name);
+
+    if (ret == -1) {
+        /* xxx: rb_eInvalidByteSequence? */
+        rb_raise(rb_eConversionUndefined, "replacement character setup failed");
+    }
+
+    return arg;
+}
+
 void
 rb_econv_check_error(rb_econv_t *ec)
 {
@@ -3114,6 +3246,8 @@
     rb_define_method(rb_cEncodingConverter, "insert_output", econv_insert_output, 1);
     rb_define_method(rb_cEncodingConverter, "putback", econv_putback, -1);
     rb_define_method(rb_cEncodingConverter, "last_error", econv_last_error, 0);
+    rb_define_method(rb_cEncodingConverter, "replacement", econv_get_replacement, 0);
+    rb_define_method(rb_cEncodingConverter, "replacement=", econv_set_replacement, 1);
     rb_define_const(rb_cEncodingConverter, "INVALID_MASK", INT2FIX(ECONV_INVALID_MASK));
     rb_define_const(rb_cEncodingConverter, "INVALID_IGNORE", INT2FIX(ECONV_INVALID_IGNORE));
     rb_define_const(rb_cEncodingConverter, "INVALID_REPLACE", INT2FIX(ECONV_INVALID_REPLACE));

--
ML: ruby-changes@q...
Info: http://www.atdot.net/~ko1/quickml/

[前][次][番号順一覧][スレッド一覧]