[前][次][番号順一覧][スレッド一覧]

ruby-changes:4075

From: ko1@a...
Date: Thu, 21 Feb 2008 17:42:31 +0900 (JST)
Subject: [ruby-changes:4075] duerst - Ruby:r15565 (trunk): Thu Feb 21 17:15:15 2008 Martin Duerst <duerst@i...>

duerst	2008-02-21 17:42:10 +0900 (Thu, 21 Feb 2008)

  New Revision: 15565

  Modified files:
    trunk/ChangeLog
    trunk/test/ruby/test_transcode.rb
    trunk/transcode.c

  Log:
    Thu Feb 21 17:15:15 2008  Martin Duerst  <duerst@i...>
    
    * transcode.c: Added basic support for passing options to String#encode
      via a hash. Currently only one option, with one value, is supported:
      invalid: :ignore (dropping invalid byte sequences instead of
      producing an error). Option naming is not yet stable!
    
    * test/ruby/test_transcode.rb: Added a single test for invalid: :ignore
      option. Not more tests because most data does not yet distinguish
      between INVALID and UNKNOWN.
    


  http://svn.ruby-lang.org/cgi-bin/viewvc.cgi/trunk/test/ruby/test_transcode.rb?r1=15565&r2=15564&diff_format=u
  http://svn.ruby-lang.org/cgi-bin/viewvc.cgi/trunk/ChangeLog?r1=15565&r2=15564&diff_format=u
  http://svn.ruby-lang.org/cgi-bin/viewvc.cgi/trunk/transcode.c?r1=15565&r2=15564&diff_format=u

Index: ChangeLog
===================================================================
--- ChangeLog	(revision 15564)
+++ ChangeLog	(revision 15565)
@@ -1,3 +1,14 @@
+Thu Feb 21 17:15:15 2008  Martin Duerst  <duerst@i...>
+
+	* transcode.c: Added basic support for passing options to String#encode
+	  via a hash. Currently only one option, with one value, is supported:
+	  invalid: :ignore (dropping invalid byte sequences instead of
+	  producing an error). Option naming is not yet stable!
+
+	* test/ruby/test_transcode.rb: Added a single test for invalid: :ignore
+	  option. Not more tests because most data does not yet distinguish
+	  between INVALID and UNKNOWN.
+
 Thu Feb 21 16:35:26 2008  Nobuyoshi Nakada  <nobu@r...>
 
 	* array.c (rb_ary_unshift_m): expands enough for argc.  [ruby-dev:33880]
Index: test/ruby/test_transcode.rb
===================================================================
--- test/ruby/test_transcode.rb	(revision 15564)
+++ test/ruby/test_transcode.rb	(revision 15565)
@@ -19,8 +19,6 @@
   end
 
   def test_errors
-    # we don't have semantics for conversion without attribute yet
-    # maybe 'convert to UTF-8' would be nice :-)
     assert_raise(ArgumentError) { 'abc'.encode }
     assert_raise(ArgumentError) { 'abc'.encode! }
     assert_raise(ArgumentError) { 'abc'.encode('foo', 'bar') }
@@ -241,4 +239,9 @@
     check_utf_32_both_ways("\u{8FF00}", "\x00\x08\xFF\x00")
     check_utf_32_both_ways("\u{F00FF}", "\x00\x0F\x00\xFF")
   end
+  
+  def test_invalid_ignore
+    # arguments only
+    'abc'.encode('utf-8', invalid: :ignore)
+  end
 end
Index: transcode.c
===================================================================
--- transcode.c	(revision 15564)
+++ transcode.c	(revision 15565)
@@ -15,6 +15,9 @@
 #include "transcode_data.h"
 #include <ctype.h>
 
+static VALUE sym_invalid, sym_ignore;
+#define INVALID_IGNORE 0x1
+
 /*
  *  Dispatch data and logic
  */
@@ -132,7 +135,8 @@
 transcode_loop(unsigned char **in_pos, unsigned char **out_pos,
 	       unsigned char *in_stop, unsigned char *out_stop,
 	       const rb_transcoder *my_transcoder,
-	       rb_transcoding *my_transcoding)
+	       rb_transcoding *my_transcoding,
+	       const int opt)
 {
     unsigned char *in_p = *in_pos, *out_p = *out_pos;
     const BYTE_LOOKUP *conv_tree_start = my_transcoder->conv_tree_start;
@@ -211,14 +215,17 @@
 	  case INVALID:
 	    goto invalid;
 	  case UNDEF:
-	    /* todo: add code for alternative behaviors */
+	    /* todo: add code for alternate behaviors */
 	    rb_raise(rb_eRuntimeError /*@@@change exception*/, "conversion undefined for byte sequence (maybe invalid byte sequence)");
 	    continue;
 	}
 	continue;
       invalid:
 	/* deal with invalid byte sequence */
-	/* todo: add code for alternative behaviors */
+	/* todo: add more alternative behaviors */
+	if (opt&INVALID_IGNORE) {
+	    continue;
+	}
 	rb_raise(rb_eRuntimeError /*change exception*/, "invalid byte sequence");
 	continue;
     }
@@ -254,7 +261,22 @@
     const rb_transcoder *my_transcoder;
     rb_transcoding my_transcoding;
     int final_encoding = 0;
+    VALUE opt;
+    int options = 0;
 
+    opt = rb_check_convert_type(argv[argc-1], T_HASH, "Hash", "to_hash");
+    if (!NIL_P(opt)) {
+	VALUE v;
+
+	argc--;
+	v = rb_hash_aref(opt, sym_invalid);
+	if (NIL_P(v)) {
+	    rb_raise(rb_eArgError, "unknown value for invalid: setting");
+	}
+	else if (v==sym_ignore) {
+	    options |= INVALID_IGNORE;
+	}
+    }
     if (argc < 1 || argc > 2) {
 	rb_raise(rb_eArgError, "wrong number of arguments (%d for 1..2)", argc);
     }
@@ -325,7 +347,7 @@
 	my_transcoding.ruby_string_dest = dest;
 	my_transcoding.flush_func = str_transcoding_resize;
 
-	transcode_loop(&fromp, &bp, (sp+slen), (bp+blen), my_transcoder, &my_transcoding);
+	transcode_loop(&fromp, &bp, (sp+slen), (bp+blen), my_transcoder, &my_transcoding, options);
 	if (fromp != sp+slen) {
 	    rb_raise(rb_eArgError, "not fully converted, %d bytes left", sp+slen-fromp);
 	}
@@ -426,6 +448,9 @@
     transcoder_lib_table = st_init_strcasetable();
     init_transcoder_table();
 
+    sym_invalid = ID2SYM(rb_intern("invalid"));
+    sym_ignore = ID2SYM(rb_intern("ignore"));
+
     rb_define_method(rb_cString, "encode", rb_str_transcode, -1);
     rb_define_method(rb_cString, "encode!", rb_str_transcode_bang, -1);
 }

--
ML: ruby-changes@q...
Info: http://www.atdot.net/~ko1/quickml/

[前][次][番号順一覧][スレッド一覧]