[前][次][番号順一覧][スレッド一覧]

ruby-changes:7289

From: akr <ko1@a...>
Date: Sun, 24 Aug 2008 15:25:40 +0900 (JST)
Subject: [ruby-changes:7289] Ruby:r18808 (trunk): * include/ruby/encoding.h (rb_str_transcode): add ecflags argument.

akr	2008-08-24 15:25:24 +0900 (Sun, 24 Aug 2008)

  New Revision: 18808

  http://svn.ruby-lang.org/cgi-bin/viewvc.cgi?view=rev&revision=18808

  Log:
    * include/ruby/encoding.h (rb_str_transcode): add ecflags argument.
    
    * transcode.c (econv_opts): extracted from str_transcode.
      (str_transcode_enc_args): extracted from str_transcode.
      (str_transcode0): extracted from str_transcode.
      (str_transcode): use econv_opts, str_transcode_enc_args,
      str_transcode0.
      (rb_str_transcode): call str_transcode0.
      (econv_primitive_insert_output): give the additional argument for
      rb_str_transcode.
    
    * io.c (make_writeconv): use invalid/undef flags.
      (io_fwrite): ditto.
      (rb_scan_open_args): give the additional argument for
      rb_str_transcode.

  Modified files:
    trunk/ChangeLog
    trunk/include/ruby/encoding.h
    trunk/io.c
    trunk/test/ruby/test_io_m17n.rb
    trunk/transcode.c

Index: include/ruby/encoding.h
===================================================================
--- include/ruby/encoding.h	(revision 18807)
+++ include/ruby/encoding.h	(revision 18808)
@@ -194,7 +194,7 @@
     return ENC_DUMMY_P(enc) != 0;
 }
 
-VALUE rb_str_transcode(VALUE str, VALUE to);
+VALUE rb_str_transcode(VALUE str, VALUE to, int ecflags);
 
 /* econv stuff */
 
Index: ChangeLog
===================================================================
--- ChangeLog	(revision 18807)
+++ ChangeLog	(revision 18808)
@@ -1,3 +1,21 @@
+Sun Aug 24 15:21:28 2008  Tanaka Akira  <akr@f...>
+
+	* include/ruby/encoding.h (rb_str_transcode): add ecflags argument.
+
+	* transcode.c (econv_opts): extracted from str_transcode.
+	  (str_transcode_enc_args): extracted from str_transcode.
+	  (str_transcode0): extracted from str_transcode.
+	  (str_transcode): use econv_opts, str_transcode_enc_args,
+	  str_transcode0.
+	  (rb_str_transcode): call str_transcode0.
+	  (econv_primitive_insert_output): give the additional argument for
+	  rb_str_transcode.
+
+	* io.c (make_writeconv): use invalid/undef flags.
+	  (io_fwrite): ditto.
+	  (rb_scan_open_args): give the additional argument for
+	  rb_str_transcode.
+
 Sun Aug 24 13:27:42 2008  Tanaka Akira  <akr@f...>
 
 	* transcode.c (str_transcode): check last hash only if 0 < argc.
Index: io.c
===================================================================
--- io.c	(revision 18807)
+++ io.c	(revision 18808)
@@ -699,6 +699,12 @@
         fptr->writeconv_initialized = 1;
 
         ecflags = 0;
+
+        if (fptr->mode & FMODE_INVALID_MASK)
+            ecflags |= (fptr->mode / (FMODE_INVALID_MASK/ECONV_INVALID_MASK)) & ECONV_INVALID_MASK;
+        if (fptr->mode & FMODE_UNDEF_MASK)
+            ecflags |= (fptr->mode / (FMODE_UNDEF_MASK/ECONV_UNDEF_MASK)) & ECONV_UNDEF_MASK;
+
 #ifdef TEXTMODE_NEWLINE_ENCODER
         if (NEED_NEWLINE_ENCODER(fptr))
             ecflags |= TEXTMODE_NEWLINE_ENCODER;
@@ -740,19 +746,32 @@
     long len, n, r, l, offset = 0;
 
     if (NEED_WRITECONV(fptr)) {
+        VALUE common_encoding = Qnil;
         make_writeconv(fptr);
+
         if (fptr->writeconv) {
-            if (!NIL_P(fptr->writeconv_stateless)) {
-                str = rb_str_transcode(str, fptr->writeconv_stateless);
-            }
-            str = rb_econv_str_convert(fptr->writeconv, str, ECONV_PARTIAL_INPUT);
+            if (!NIL_P(fptr->writeconv_stateless))
+                common_encoding = fptr->writeconv_stateless;
         }
         else {
             if (fptr->enc2)
-                str = rb_str_transcode(str, rb_enc_from_encoding(fptr->enc2));
+                common_encoding = rb_enc_from_encoding(fptr->enc2);
             else
-                str = rb_str_transcode(str, rb_enc_from_encoding(fptr->enc));
+                common_encoding = rb_enc_from_encoding(fptr->enc);
         }
+
+        if (!NIL_P(common_encoding)) {
+            int ecflags = 0;
+            if (fptr->mode & FMODE_INVALID_MASK)
+                ecflags |= (fptr->mode / (FMODE_INVALID_MASK/ECONV_INVALID_MASK)) & ECONV_INVALID_MASK;
+            if (fptr->mode & FMODE_UNDEF_MASK)
+                ecflags |= (fptr->mode / (FMODE_UNDEF_MASK/ECONV_UNDEF_MASK)) & ECONV_UNDEF_MASK;
+            str = rb_str_transcode(str, common_encoding, ecflags);
+        }
+
+        if (fptr->writeconv) {
+            str = rb_econv_str_convert(fptr->writeconv, str, ECONV_PARTIAL_INPUT);
+        }
     }
 
     len = RSTRING_LEN(str);
@@ -4622,7 +4641,7 @@
 	    static VALUE fs_enc;
 	    if (!fs_enc)
 		fs_enc = rb_enc_from_encoding(fs_encoding);
-	    fname = rb_str_transcode(fname, fs_enc);
+	    fname = rb_str_transcode(fname, fs_enc, 0);
 	}
     }
 #endif
Index: test/ruby/test_io_m17n.rb
===================================================================
--- test/ruby/test_io_m17n.rb	(revision 18807)
+++ test/ruby/test_io_m17n.rb	(revision 18808)
@@ -1202,5 +1202,68 @@
       }
     }
   end
+
+  def test_invalid_w
+    with_tmpdir {
+      invalid_utf8 = "a\x80b".force_encoding("utf-8")
+      open("t.txt", "w:euc-jp", :invalid => :replace) {|f|
+        assert_nothing_raised { f.write invalid_utf8 }
+      }
+      assert_equal("a?b", File.read("t.txt"))
+
+      open("t.txt", "w:euc-jp", :invalid => :ignore) {|f|
+        assert_nothing_raised { f.write invalid_utf8 }
+      }
+      assert_equal("ab", File.read("t.txt"))
+
+      open("t.txt", "w:euc-jp", :undef => :replace) {|f|
+        assert_raise(Encoding::InvalidByteSequence) { f.write invalid_utf8 }
+      }
+      open("t.txt", "w:euc-jp", :undef => :ignore) {|f|
+        assert_raise(Encoding::InvalidByteSequence) { f.write invalid_utf8 }
+      }
+    }
+  end
+
+  def test_undef_w_stateless
+    with_tmpdir {
+      generate_file("t.txt", "a\uFFFDb")
+      open("t.txt", "w:euc-jp:utf-8", :undef => :replace) {|f|
+        assert_nothing_raised { f.write "a\uFFFDb" }
+      }
+      assert_equal("a?b", File.read("t.txt"))
+      open("t.txt", "w:euc-jp:utf-8", :undef => :ignore) {|f|
+        assert_nothing_raised { f.write "a\uFFFDb" }
+      }
+      assert_equal("ab", File.read("t.txt"))
+      open("t.txt", "w:euc-jp:utf-8", :invalid => :replace) {|f|
+        assert_raise(Encoding::ConversionUndefined) { f.write "a\uFFFDb" }
+      }
+      open("t.txt", "w:euc-jp:utf-8", :invalid => :ignore) {|f|
+        assert_raise(Encoding::ConversionUndefined) { f.write "a\uFFFDb" }
+      }
+    }
+  end
+
+  def test_undef_w_stateful
+    with_tmpdir {
+      generate_file("t.txt", "a\uFFFDb")
+      open("t.txt", "w:iso-2022-jp:utf-8", :undef => :replace) {|f|
+        assert_nothing_raised { f.write "a\uFFFDb" }
+      }
+      assert_equal("a?b", File.read("t.txt"))
+      open("t.txt", "w:iso-2022-jp:utf-8", :undef => :ignore) {|f|
+        assert_nothing_raised { f.write "a\uFFFDb" }
+      }
+      assert_equal("ab", File.read("t.txt"))
+      open("t.txt", "w:iso-2022-jp:utf-8", :invalid => :replace) {|f|
+        assert_raise(Encoding::ConversionUndefined) { f.write "a\uFFFDb" }
+      }
+      open("t.txt", "w:iso-2022-jp:utf-8", :invalid => :ignore) {|f|
+        assert_raise(Encoding::ConversionUndefined) { f.write "a\uFFFDb" }
+      }
+    }
+  end
+
 end
 
Index: transcode.c
===================================================================
--- transcode.c	(revision 18807)
+++ transcode.c	(revision 18808)
@@ -1673,58 +1673,49 @@
 }
 
 static int
-str_transcode(int argc, VALUE *argv, VALUE *self)
+econv_opts(VALUE opt)
 {
-    VALUE dest;
-    VALUE str = *self;
-    long blen, slen;
-    unsigned char *buf, *bp, *sp;
-    const unsigned char *fromp;
+    VALUE v;
+    int options = 0;
+    v = rb_hash_aref(opt, sym_invalid);
+    if (NIL_P(v)) {
+    }
+    else if (v==sym_ignore) {
+        options |= ECONV_INVALID_IGNORE;
+    }
+    else if (v==sym_replace) {
+        options |= ECONV_INVALID_REPLACE;
+        v = rb_hash_aref(opt, sym_replace);
+    }
+    else {
+        rb_raise(rb_eArgError, "unknown value for invalid character option");
+    }
+    v = rb_hash_aref(opt, sym_undef);
+    if (NIL_P(v)) {
+    }
+    else if (v==sym_ignore) {
+        options |= ECONV_UNDEF_IGNORE;
+    }
+    else if (v==sym_replace) {
+        options |= ECONV_UNDEF_REPLACE;
+    }
+    else {
+        rb_raise(rb_eArgError, "unknown value for undefined character option");
+    }
+    return options;
+}
+
+static int
+str_transcode_enc_args(VALUE str, VALUE arg1, VALUE arg2,
+        const char **sname, rb_encoding **senc,
+        const char **dname, rb_encoding **denc)
+{
     rb_encoding *from_enc, *to_enc;
     const char *from_e, *to_e;
     int from_encidx, to_encidx;
     VALUE from_encval, to_encval;
-    VALUE opt;
-    int options = 0;
 
-    if (0 < argc)
-        opt = rb_check_convert_type(argv[argc-1], T_HASH, "Hash", "to_hash");
-    else
-        opt = Qnil;
-    if (!NIL_P(opt)) {
-	VALUE v;
-
-	argc--;
-	v = rb_hash_aref(opt, sym_invalid);
-	if (NIL_P(v)) {
-	}
-	else if (v==sym_ignore) {
-	    options |= ECONV_INVALID_IGNORE;
-	}
-	else if (v==sym_replace) {
-	    options |= ECONV_INVALID_REPLACE;
-	    v = rb_hash_aref(opt, sym_replace);
-	}
-	else {
-	    rb_raise(rb_eArgError, "unknown value for invalid character option");
-	}
-	v = rb_hash_aref(opt, sym_undef);
-	if (NIL_P(v)) {
-	}
-	else if (v==sym_ignore) {
-	    options |= ECONV_UNDEF_IGNORE;
-	}
-	else if (v==sym_replace) {
-	    options |= ECONV_UNDEF_REPLACE;
-	}
-	else {
-	    rb_raise(rb_eArgError, "unknown value for undefined character option");
-	}
-    }
-    if (argc < 1 || argc > 2) {
-	rb_raise(rb_eArgError, "wrong number of arguments (%d for 1..2)", argc);
-    }
-    if ((to_encidx = rb_to_encoding_index(to_encval = argv[0])) < 0) {
+    if ((to_encidx = rb_to_encoding_index(to_encval = arg1)) < 0) {
 	to_enc = 0;
 	to_encidx = 0;
 	to_e = StringValueCStr(to_encval);
@@ -1733,12 +1724,12 @@
 	to_enc = rb_enc_from_index(to_encidx);
 	to_e = rb_enc_name(to_enc);
     }
-    if (argc==1) {
+    if (NIL_P(arg2)) {
 	from_encidx = rb_enc_get_index(str);
 	from_enc = rb_enc_from_index(from_encidx);
 	from_e = rb_enc_name(from_enc);
     }
-    else if ((from_encidx = rb_to_encoding_index(from_encval = argv[1])) < 0) {
+    else if ((from_encidx = rb_to_encoding_index(from_encval = arg2)) < 0) {
 	from_enc = 0;
 	from_e = StringValueCStr(from_encval);
     }
@@ -1747,6 +1738,31 @@
 	from_e = rb_enc_name(from_enc);
     }
 
+    *sname = from_e;
+    *senc = from_enc;
+    *dname = to_e;
+    *denc = to_enc;
+    return to_encidx;
+}
+
+static int
+str_transcode0(int argc, VALUE *argv, VALUE *self, int options)
+{
+    VALUE dest;
+    VALUE str = *self;
+    long blen, slen;
+    unsigned char *buf, *bp, *sp;
+    const unsigned char *fromp;
+    rb_encoding *from_enc, *to_enc;
+    const char *from_e, *to_e;
+    int to_encidx;
+
+    if (argc < 1 || argc > 2) {
+	rb_raise(rb_eArgError, "wrong number of arguments (%d for 1..2)", argc);
+    }
+
+    to_encidx = str_transcode_enc_args(str, argv[0], argc==1 ? Qnil : argv[1], &from_e, &from_enc, &to_e, &to_enc);
+
     if (from_enc && from_enc == to_enc) {
 	return -1;
     }
@@ -1782,6 +1798,22 @@
     return to_encidx;
 }
 
+static int
+str_transcode(int argc, VALUE *argv, VALUE *self)
+{
+    VALUE opt;
+    int options = 0;
+
+    if (0 < argc) {
+        opt = rb_check_convert_type(argv[argc-1], T_HASH, "Hash", "to_hash");
+        if (!NIL_P(opt)) {
+            argc--;
+            options = econv_opts(opt);
+        }
+    }
+    return str_transcode0(argc, argv, self, options);
+}
+
 static inline VALUE
 str_encode_associate(VALUE str, int encidx)
 {
@@ -1850,9 +1882,16 @@
 }
 
 VALUE
-rb_str_transcode(VALUE str, VALUE to)
+rb_str_transcode(VALUE str, VALUE to, int flags)
 {
-    return str_encode(1, &to, str);
+    int argc = 1;
+    VALUE *argv = &to;
+    VALUE newstr = str;
+    int encidx = str_transcode0(argc, argv, &newstr, flags);
+
+    if (encidx < 0) return rb_str_dup(str);
+    RBASIC(newstr)->klass = rb_obj_class(str);
+    return str_encode_associate(newstr, encidx);
 }
 
 static void
@@ -2305,7 +2344,7 @@
 
     StringValue(string);
     insert_enc = rb_econv_encoding_to_insert_output(ec);
-    string = rb_str_transcode(string, rb_enc_from_encoding(rb_enc_find(insert_enc)));
+    string = rb_str_transcode(string, rb_enc_from_encoding(rb_enc_find(insert_enc)), 0);
 
     ret = rb_econv_insert_output(ec, (const unsigned char *)RSTRING_PTR(string), RSTRING_LEN(string), insert_enc);
     if (ret == -1)

--
ML: ruby-changes@q...
Info: http://www.atdot.net/~ko1/quickml/

[前][次][番号順一覧][スレッド一覧]