ruby-changes:6784

naruse	2008-08-01 05:35:35 +0900 (Fri, 01 Aug 2008)

  New Revision: 18300

  http://svn.ruby-lang.org/cgi-bin/viewvc.cgi?view=rev&revision=18300

  Log:
    * transcode.c (output_replacement_character):
      rename from _get_replacement_character.
    
    * transcode.c (output_replacement_character):
      fix replacement on UTF-32{BE,LE}. [ruby-dev:35705]
    
    * transcode.c (transcode_loop): ditto.
    
    * test/ruby/test_transcode.rb (test_invalid_replace):
      add for above.

  Modified files:
    trunk/ChangeLog
    trunk/test/ruby/test_transcode.rb
    trunk/transcode.c

Index: ChangeLog
===================================================================
--- ChangeLog	(revision 18299)
+++ ChangeLog	(revision 18300)
@@ -1,3 +1,16 @@
+Fri Aug  1 05:31:08 2008  NARUSE, Yui  <naruse@r...>
+
+	* transcode.c (output_replacement_character):
+	  rename from _get_replacement_character.
+
+	* transcode.c (output_replacement_character):
+	  fix replacement on UTF-32{BE,LE}. [ruby-dev:35705]
+
+	* transcode.c (transcode_loop): ditto.
+
+	* test/ruby/test_transcode.rb (test_invalid_replace):
+	  add for above.
+
 Fri Aug  1 01:01:49 2008  Yusuke Endoh  <mame@t...>
 
 	* proc.c (rb_proc_call_with_block): reduce comparison.
Index: test/ruby/test_transcode.rb
===================================================================
--- test/ruby/test_transcode.rb	(revision 18299)
+++ test/ruby/test_transcode.rb	(revision 18300)
@@ -254,6 +254,21 @@
       "\x82\xAB".encode('UTF-16BE', 'UTF-8', invalid: :ignore))
   end
 
+  def test_invalid_replace
+    # arguments only
+    assert_nothing_raised { 'abc'.encode('UTF-8', invalid: :replace) }
+    assert_equal("\xEF\xBF\xBD".force_encoding("UTF-8"),
+      "\x80".encode("UTF-8", "UTF-16BE", invalid: :replace))
+    assert_equal("\xFF\xFD".force_encoding("UTF-16BE"),
+      "\x80".encode("UTF-16BE", "UTF-8", invalid: :replace))
+    assert_equal("\xFD\xFF".force_encoding("UTF-16LE"),
+      "\x80".encode("UTF-16LE", "UTF-8", invalid: :replace))
+    assert_equal("\x00\x00\xFF\xFD".force_encoding("UTF-32BE"),
+      "\x80".encode("UTF-32BE", "UTF-8", invalid: :replace))
+    assert_equal("\xFD\xFF\x00\x00".force_encoding("UTF-32LE"),
+      "\x80".encode("UTF-32LE", "UTF-8", invalid: :replace))
+  end
+
   def test_shift_jis
     check_both_ways("\u3000", "\x81\x40", 'shift_jis') # full-width space
     check_both_ways("\u00D7", "\x81\x7E", 'shift_jis') # ~
Index: transcode.c
===================================================================
--- transcode.c	(revision 18299)
+++ transcode.c	(revision 18300)
@@ -91,7 +91,7 @@
 #define encoding_equal(enc1, enc2) (STRCASECMP(enc1, enc2) == 0)
 
 static const rb_transcoder *
-transcode_dispatch(const char* from_encoding, const char* to_encoding)
+transcode_dispatch(const char *from_encoding, const char *to_encoding)
 {
     char *const key = transcoder_key(from_encoding, to_encoding);
     st_data_t k, val = 0;
@@ -122,9 +122,10 @@
     return (rb_transcoder *)val;
 }
 
-static const char*
-get_replacement_character(rb_encoding *enc)
+static void
+output_replacement_character(unsigned char **out_pp, rb_encoding *enc)
 {
+    unsigned char *out_p = *out_pp;
     static rb_encoding *utf16be_encoding, *utf16le_encoding;
     static rb_encoding *utf32be_encoding, *utf32le_encoding;
     if (!utf16be_encoding) {
@@ -133,24 +134,36 @@
 	utf32be_encoding = rb_enc_find("UTF-32BE");
 	utf32le_encoding = rb_enc_find("UTF-32LE");
     }
-    if (rb_enc_asciicompat(enc)) {
-	return "?";
+    if (rb_utf8_encoding() == enc) {
+	*out_p++ = 0xEF;
+	*out_p++ = 0xBF;
+	*out_p++ = 0xBD;
     }
     else if (utf16be_encoding == enc) {
-	return "\xFF\xFD";
+	*out_p++ = 0xFF;
+	*out_p++ = 0xFD;
     }
     else if (utf16le_encoding == enc) {
-	return "\xFD\xFF";
+	*out_p++ = 0xFD;
+	*out_p++ = 0xFF;
     }
     else if (utf32be_encoding == enc) {
-	return "\x00\x00\xFF\xFD";
+	*out_p++ = 0x00;
+	*out_p++ = 0x00;
+	*out_p++ = 0xFF;
+	*out_p++ = 0xFD;
     }
     else if (utf32le_encoding == enc) {
-	return "\xFD\xFF\x00\x00";
+	*out_p++ = 0xFD;
+	*out_p++ = 0xFF;
+	*out_p++ = 0x00;
+	*out_p++ = 0x00;
     }
     else {
-	return "?";
+	*out_p++ = '?';
     }
+    *out_pp = out_p;
+    return;
 }
 
 /*
@@ -255,10 +268,7 @@
 	    continue;
 	}
 	else if (opt&INVALID_REPLACE) {
-	    const char *rep = get_replacement_character(to_encoding);
-	    do {
-		*out_p++ = *rep++;
-	    } while (*rep);
+	    output_replacement_character(&out_p, to_encoding);
 	    continue;
 	}
 	rb_raise(rb_eRuntimeError /*change exception*/, "invalid byte sequence");
@@ -271,10 +281,7 @@
 	    continue;
 	}
 	else if (opt&UNDEF_REPLACE) {
-	    const char *rep = get_replacement_character(to_encoding);
-	    do {
-		*out_p++ = *rep++;
-	    } while (*rep);
+	    output_replacement_character(&out_p, to_encoding);
 	    continue;
 	}
 	rb_raise(rb_eRuntimeError /*@@@change exception*/, "conversion undefined for byte sequence (maybe invalid byte sequence)");

--
ML: ruby-changes@q...
Info: http://www.atdot.net/~ko1/quickml/