[前][次][番号順一覧][スレッド一覧]

ruby-changes:6546

From: naruse <ko1@a...>
Date: Mon, 14 Jul 2008 15:27:41 +0900 (JST)
Subject: [ruby-changes:6546] Ruby:r18062 (trunk): * transcode.c (get_replacement_character): temporary function,

naruse	2008-07-14 15:27:26 +0900 (Mon, 14 Jul 2008)

  New Revision: 18062

  http://svn.ruby-lang.org/cgi-bin/viewvc.cgi?view=rev&revision=18062

  Log:
    * transcode.c (get_replacement_character): temporary function,
      get characters for replacement mode.
    
    * transcode.c (transcode_loop): add undef key and replace value.
    
    * transcode.c (str_transcode): ditto.
    
    * transcode.c (Init_transcode): define sym_undef and sym_replace.

  Modified files:
    trunk/ChangeLog
    trunk/transcode.c

Index: ChangeLog
===================================================================
--- ChangeLog	(revision 18061)
+++ ChangeLog	(revision 18062)
@@ -1,3 +1,14 @@
+Mon Jul 14 15:18:30 2008  NARUSE, Yui  <naruse@r...>
+
+	* transcode.c (get_replacement_character): temporary function,
+	  get characters for replacement mode.
+
+	* transcode.c (transcode_loop): add undef key and replace value.
+
+	* transcode.c (str_transcode): ditto.
+
+	* transcode.c (Init_transcode): define sym_undef and sym_replace.
+
 Mon Jul 14 15:16:40 2008  Kazuhiro NISHIYAMA  <zn@m...>
 
 	* test/ruby/test_variable.rb (TestVariable#test_global_variable_0):
Index: transcode.c
===================================================================
--- transcode.c	(revision 18061)
+++ transcode.c	(revision 18062)
@@ -15,8 +15,11 @@
 #include "transcode_data.h"
 #include <ctype.h>
 
-static VALUE sym_invalid, sym_ignore;
+static VALUE sym_invalid, sym_undef, sym_ignore, sym_replace;
 #define INVALID_IGNORE 0x1
+#define INVALID_REPLACE 0x2
+#define UNDEF_IGNORE 0x10
+#define UNDEF_REPLACE 0x20
 
 /*
  *  Dispatch data and logic
@@ -119,6 +122,36 @@
     return (rb_transcoder *)val;
 }
 
+static const char*
+get_replacement_character(rb_encoding *enc)
+{
+    static rb_encoding *utf16be_encoding, *utf16le_encoding;
+    static rb_encoding *utf32be_encoding, *utf32le_encoding;
+    if (!utf16be_encoding) {
+	utf16be_encoding = rb_enc_find("UTF-16BE");
+	utf16le_encoding = rb_enc_find("UTF-16LE");
+	utf32be_encoding = rb_enc_find("UTF-32BE");
+	utf32le_encoding = rb_enc_find("UTF-32LE");
+    }
+    if (rb_enc_asciicompat(enc)) {
+	return "?";
+    }
+    else if (utf16be_encoding = enc) {
+	return "\x00?";
+    }
+    else if (utf16le_encoding = enc) {
+	return "?\x00";
+    }
+    else if (utf32be_encoding = enc) {
+	return "\x00\x00\x00?";
+    }
+    else if (utf32le_encoding = enc) {
+	return "?\x00\x00\x00";
+    }
+    else {
+	return "?";
+    }
+}
 
 /*
  *  Transcoding engine logic
@@ -139,6 +172,7 @@
     unsigned char next_byte;
     int from_utf8 = my_transcoder->from_utf8;
     unsigned char *out_s = out_stop - my_transcoder->max_output + 1;
+    rb_encoding *to_encoding = rb_enc_find(my_transcoder->to_encoding);
     while (in_p < in_stop) {
 	char_start = in_p;
 	next_table = conv_tree_start;
@@ -209,9 +243,7 @@
 	  case INVALID:
 	    goto invalid;
 	  case UNDEF:
-	    /* todo: add code for alternate behaviors */
-	    rb_raise(rb_eRuntimeError /*@@@change exception*/, "conversion undefined for byte sequence (maybe invalid byte sequence)");
-	    continue;
+	    goto undef;
 	}
 	continue;
       invalid:
@@ -220,8 +252,31 @@
 	if (opt&INVALID_IGNORE) {
 	    continue;
 	}
+	else if (opt&INVALID_REPLACE) {
+	    const char *rep = get_replacement_character(to_encoding);
+	    do {
+		*out_p++ = *rep++;
+	    } while (*rep);
+	    continue;
+	}
 	rb_raise(rb_eRuntimeError /*change exception*/, "invalid byte sequence");
 	continue;
+      undef:
+	/* valid character in from encoding
+	 * but no related character(s) in to encoding */
+	/* todo: add more alternative behaviors */
+	if (opt&UNDEF_IGNORE) {
+	    continue;
+	}
+	else if (opt&UNDEF_REPLACE) {
+	    const char *rep = get_replacement_character(to_encoding);
+	    do {
+		*out_p++ = *rep++;
+	    } while (*rep);
+	    continue;
+	}
+	rb_raise(rb_eRuntimeError /*@@@change exception*/, "conversion undefined for byte sequence (maybe invalid byte sequence)");
+	continue;
     }
     /* cleanup */
     *in_pos  = in_p;
@@ -265,11 +320,29 @@
 	argc--;
 	v = rb_hash_aref(opt, sym_invalid);
 	if (NIL_P(v)) {
-	    rb_raise(rb_eArgError, "unknown value for invalid: setting");
 	}
 	else if (v==sym_ignore) {
 	    options |= INVALID_IGNORE;
 	}
+	else if (v==sym_replace) {
+	    options |= INVALID_REPLACE;
+	    v = rb_hash_aref(opt, sym_replace);
+	}
+	else {
+	    rb_raise(rb_eArgError, "unknown value for invalid: setting");
+	}
+	v = rb_hash_aref(opt, sym_undef);
+	if (NIL_P(v)) {
+	}
+	else if (v==sym_ignore) {
+	    options |= UNDEF_IGNORE;
+	}
+	else if (v==sym_replace) {
+	    options |= UNDEF_REPLACE;
+	}
+	else {
+	    rb_raise(rb_eArgError, "unknown value for undef: setting");
+	}
     }
     if (argc < 1 || argc > 2) {
 	rb_raise(rb_eArgError, "wrong number of arguments (%d for 1..2)", argc);
@@ -451,7 +524,9 @@
     transcoder_lib_table = st_init_strcasetable();
 
     sym_invalid = ID2SYM(rb_intern("invalid"));
+    sym_undef = ID2SYM(rb_intern("undef"));
     sym_ignore = ID2SYM(rb_intern("ignore"));
+    sym_replace = ID2SYM(rb_intern("replace"));
 
     rb_define_method(rb_cString, "encode", str_encode, -1);
     rb_define_method(rb_cString, "encode!", str_encode_bang, -1);

--
ML: ruby-changes@q...
Info: http://www.atdot.net/~ko1/quickml/

[前][次][番号順一覧][スレッド一覧]