[前][次][番号順一覧][スレッド一覧]

ruby-changes:8181

From: matz <ko1@a...>
Date: Wed, 8 Oct 2008 02:40:03 +0900 (JST)
Subject: [ruby-changes:8181] Ruby:r19709 (trunk): * encoding.c (rb_default_internal_encoding): merged a patch from

matz	2008-10-08 02:39:44 +0900 (Wed, 08 Oct 2008)

  New Revision: 19709

  http://svn.ruby-lang.org/cgi-bin/viewvc.cgi?view=rev&revision=19709

  Log:
    * encoding.c (rb_default_internal_encoding): merged a patch from
      Michael Selig <michael.selig at fs.com.au> in [ruby-core:18985].
    
    * io.c (rb_io_ext_int_to_encs): ditto.
    
    * ruby.c (proc_options): support default internal encoding in -E
      option.

  Modified files:
    trunk/ChangeLog
    trunk/encoding.c
    trunk/include/ruby/encoding.h
    trunk/io.c
    trunk/ruby.c

Index: encoding.c
===================================================================
--- encoding.c	(revision 19708)
+++ encoding.c	(revision 19709)
@@ -1027,8 +1027,57 @@
     default_external = 0;
 }
 
+/* -2 => not yet set, -1 => nil */
+static int default_internal_index = -2;
+static rb_encoding *default_internal;
+
+rb_encoding *
+rb_default_internal_encoding(void)
+{
+    if (!default_internal && default_internal_index >= 0) {
+	default_internal = rb_enc_from_index(default_internal_index);
+    }
+    return default_internal;
+}
+
+VALUE
+rb_enc_default_internal(void)
+{
+    /* Note: These functions cope with default_internal not being set */
+    return rb_enc_from_encoding(rb_default_internal_encoding());
+}
+
 /*
  * call-seq:
+ *   Encoding.default_internal => enc
+ *
+ * Returns default internal encoding.
+ *
+ * It is initialized by the source internal_encoding or -E option,
+ * and can't be modified after that.
+ */
+static VALUE
+get_default_internal(VALUE klass)
+{
+    return rb_enc_default_internal();
+}
+
+void
+rb_enc_set_default_internal(VALUE encoding)
+{
+    if (default_internal_index != -2)
+	/* Already set */
+	return;
+    default_internal_index = encoding == Qnil ?
+				-1 :rb_enc_to_index(rb_to_encoding(encoding));
+    /* Convert US-ASCII => UTF-8 */
+    if (default_internal_index == rb_usascii_encindex())
+	default_internal_index = rb_utf8_encindex();
+    default_internal = 0;
+}
+
+/*
+ * call-seq:
  *   Encoding.locale_charmap => string
  *
  * Returns the locale charmap name.
@@ -1212,6 +1261,7 @@
     rb_define_singleton_method(rb_cEncoding, "_load", enc_load, 1);
 
     rb_define_singleton_method(rb_cEncoding, "default_external", get_default_external, 0);
+    rb_define_singleton_method(rb_cEncoding, "default_internal", get_default_internal, 0);
     rb_define_singleton_method(rb_cEncoding, "locale_charmap", rb_locale_charmap, 0);
 
     list = rb_ary_new2(enc_table.count);
Index: include/ruby/encoding.h
===================================================================
--- include/ruby/encoding.h	(revision 19708)
+++ include/ruby/encoding.h	(revision 19709)
@@ -168,11 +168,14 @@
 rb_encoding *rb_locale_encoding(void);
 rb_encoding *rb_filesystem_encoding(void);
 rb_encoding *rb_default_external_encoding(void);
+rb_encoding *rb_default_internal_encoding(void);
 int rb_ascii8bit_encindex(void);
 int rb_utf8_encindex(void);
 int rb_usascii_encindex(void);
 VALUE rb_enc_default_external(void);
+VALUE rb_enc_default_internal(void);
 void rb_enc_set_default_external(VALUE encoding);
+void rb_enc_set_default_internal(VALUE encoding);
 VALUE rb_locale_charmap(VALUE klass);
 long rb_memsearch(const void*,long,const void*,long,rb_encoding*);
 
Index: ChangeLog
===================================================================
--- ChangeLog	(revision 19708)
+++ ChangeLog	(revision 19709)
@@ -1,3 +1,13 @@
+Wed Oct  8 02:38:28 2008  Yukihiro Matsumoto  <matz@r...>
+
+	* encoding.c (rb_default_internal_encoding): merged a patch from
+	  Michael Selig <michael.selig at fs.com.au> in [ruby-core:18985].
+
+	* io.c (rb_io_ext_int_to_encs): ditto.
+
+	* ruby.c (proc_options): support default internal encoding in -E
+	  option.
+
 Wed Oct  8 00:03:39 2008  Tadayoshi Funaba  <tadf@d...>
 
 	* lib/date.rb (today,now): should produce own instances.
Index: io.c
===================================================================
--- io.c	(revision 19708)
+++ io.c	(revision 19709)
@@ -2177,10 +2177,8 @@
 	}
 	newline = (unsigned char)rsptr[rslen - 1];
 
-        if (fptr->encs.enc2)
-            enc = fptr->encs.enc;
-        else
-            enc = io_input_encoding(fptr);
+	/* MS - Optimisation */
+        enc = io_read_encoding(fptr);
 	while ((c = appendline(fptr, newline, &str, &limit)) != EOF) {
             const char *s, *p, *pp, *e;
 
@@ -3740,52 +3738,87 @@
     return NULL;		/* not reached */
 }
 
+/*
+ * Convert external/internal encodings to enc/enc2
+ * NULL => use default encoding
+ * Qnil => no encoding specified (internal only)
+ */
 static void
+rb_io_ext_int_to_encs(rb_encoding *ext, rb_encoding *intern, rb_encoding **enc, rb_encoding **enc2)
+{
+    int default_ext = 0;
+
+    if (ext == NULL) {
+	ext = rb_default_external_encoding();
+	default_ext = 1;
+    }
+    if (intern == NULL && ext != rb_ascii8bit_encoding())
+	/* If external is ASCII-8BIT, no default transcoding */
+	intern = rb_default_internal_encoding();
+    if (intern == NULL || intern == (rb_encoding *)Qnil || intern == ext) {
+	/* No internal encoding => use external + no transcoding */
+	*enc = default_ext ? NULL : ext;
+	*enc2 = NULL;
+    }
+    else {
+	*enc = intern;
+	*enc2 = ext;
+    }
+}
+
+static void
 parse_mode_enc(const char *estr, rb_encoding **enc_p, rb_encoding **enc2_p)
 {
-    const char *p0, *p1;
-    char *enc2name;
+    const char *p;
+    char encname[ENCODING_MAXNAMELEN+1];
     int idx, idx2;
+    rb_encoding *ext_enc, *int_enc;
 
-    /* parse estr as "enc" or "enc2:enc" */
+    /* parse estr as "enc" or "enc2:enc" or "enc:-" */
 
-    *enc_p = 0;
-    *enc2_p = 0;
+    p = strrchr(estr, ':');
+    if (p) {
+	int len = (p++) - estr;
+	if (len == 0 || len > ENCODING_MAXNAMELEN)
+	    idx = -1;
+	else {
+	    memcpy(encname, estr, len);
+	    encname[len] = '\0';
+	    estr = encname;
+	    idx = rb_enc_find_index(encname);
+	}
+    }
+    else
+	idx = rb_enc_find_index(estr);
 
-    p0 = strrchr(estr, ':');
-    if (!p0) p1 = estr;
-    else     p1 = p0 + 1;
-    idx = rb_enc_find_index(p1);
-    if (idx >= 0) {
-	*enc_p = rb_enc_from_index(idx);
-    }
+    if (idx >= 0)
+	ext_enc = rb_enc_from_index(idx);
     else {
-	rb_warn("Unsupported encoding %s ignored", p1);
+	if (idx != -2)
+	    rb_warn("Unsupported encoding %s ignored", estr);
+	ext_enc = NULL;
     }
 
-    if (*enc_p && p0) {
-	int n = p0 - estr;
-	if (n > ENCODING_MAXNAMELEN) {
-	    idx2 = -1;
+    int_enc = NULL;
+    if (p) {
+	if (*p == '-' && *(p+1) == '\0') {
+	    /* Special case - "-" => no transcoding */
+	    int_enc = (rb_encoding *)Qnil;
 	}
 	else {
-	    enc2name = ALLOCA_N(char, n+1);
-	    memcpy(enc2name, estr, n);
-	    enc2name[n] = '\0';
-	    estr = enc2name;
-	    idx2 = rb_enc_find_index(enc2name);
+	    idx2 = rb_enc_find_index(p);
+	    if (idx2 < 0)
+		rb_warn("Unsupported encoding %s ignored", p);
+	    else if (idx2 == idx) {
+		rb_warn("Ignoring internal encoding %s: it is identical to external encoding %s", p, estr);
+		int_enc = (rb_encoding *)Qnil;
+	    }
+	    else
+		int_enc = rb_enc_from_index(idx2);
 	}
-	if (idx2 < 0) {
-	    rb_warn("Unsupported encoding %.*s ignored", n, estr);
-	}
-	else if (idx2 == idx) {
-	    rb_warn("Ignoring internal encoding %.*s: it is identical to external encoding %s",
-		    n, estr, p1);
-	}
-	else {
-	    *enc2_p = rb_enc_from_index(idx2);
-	}
     }
+
+    rb_io_ext_int_to_encs(ext_enc, int_enc, enc_p, enc2_p);
 }
 
 static void
@@ -3821,28 +3854,32 @@
     }
     if (!NIL_P(extenc)) {
 	rb_encoding *extencoding = rb_to_encoding(extenc);
+	rb_encoding *intencoding = NULL;
         extracted = 1;
-        *enc_p = 0;
-        *enc2_p = 0;
 	if (!NIL_P(encoding)) {
 	    rb_warn("Ignoring encoding parameter '%s': external_encoding is used",
 		    RSTRING_PTR(encoding));
 	}
 	if (!NIL_P(intenc)) {
-	    rb_encoding *intencoding = rb_to_encoding(intenc);
+	    if (!NIL_P(encoding = rb_check_string_type(intenc))) {
+		char *p = StringValueCStr(encoding);
+		if (*p == '-' && *(p+1) == '\0') {
+		    /* Special case - "-" => no transcoding */
+		    intencoding = (rb_encoding *)Qnil;
+		}
+		else
+		    intencoding = rb_to_encoding(intenc);
+	    }
+	    else
+		intencoding = rb_to_encoding(intenc);
 	    if (extencoding == intencoding) {
 		rb_warn("Ignoring internal encoding '%s': it is identical to external encoding '%s'",
 			RSTRING_PTR(rb_inspect(intenc)),
 			RSTRING_PTR(rb_inspect(extenc)));
+		intencoding = (rb_encoding *)Qnil;
 	    }
-	    else {
-		*enc_p = intencoding;
-                *enc2_p = extencoding;
-	    }
 	}
-        else {
-            *enc_p = extencoding;
-        }
+	rb_io_ext_int_to_encs(extencoding, intencoding, enc_p, enc2_p);
     }
     else {
 	if (!NIL_P(intenc)) {
@@ -3882,8 +3919,8 @@
 
     vmode = *vmode_p;
 
-    enc = NULL;
-    enc2 = NULL;
+    /* Set to defaults */
+    rb_io_ext_int_to_encs(NULL, NULL, &enc, &enc2);
 
     if (NIL_P(vmode)) {
         fmode = FMODE_READABLE;
@@ -4070,8 +4107,8 @@
     rb_io_t *fptr;
     convconfig_t cc;
     if (!convconfig) {
-        cc.enc = NULL;
-        cc.enc2 = NULL;
+	/* Set to default encodings */
+	rb_io_ext_int_to_encs(NULL, NULL, &cc.enc, &cc.enc2);
         cc.ecflags = 0;
         cc.ecopts = Qnil;
         convconfig = &cc;
@@ -4099,8 +4136,8 @@
         parse_mode_enc(p+1, &convconfig.enc, &convconfig.enc2);
     }
     else {
-        convconfig.enc = NULL;
-        convconfig.enc2 = NULL;
+	/* Set to default encodings */
+	rb_io_ext_int_to_encs(NULL, NULL, &convconfig.enc, &convconfig.enc2);
         convconfig.ecflags = 0;
         convconfig.ecopts = Qnil;
     }
@@ -6661,29 +6698,40 @@
 {
     rb_encoding *enc, *enc2;
     int ecflags;
-    VALUE ecopts;
+    VALUE ecopts, tmp;
 
     if (!NIL_P(v2)) {
 	enc2 = rb_to_encoding(v1);
-	enc = rb_to_encoding(v2);
+	tmp = rb_check_string_type(v2);
+	if (!NIL_P(tmp)) {
+	    char *p = StringValueCStr(tmp);
+	    if (*p == '-' && *(p+1) == '\0') {
+		/* Special case - "-" => no transcoding */
+		enc = enc2;
+		enc2 = NULL;
+	    }
+	    else
+		enc = rb_to_encoding(v2);
+	}
+	else
+	    enc = rb_to_encoding(v2);
         ecflags = rb_econv_prepare_opts(opt, &ecopts);
     }
     else {
 	if (NIL_P(v1)) {
-	    enc = NULL;
-	    enc2 = NULL;
+	    /* Set to default encodings */
+	    rb_io_ext_int_to_encs(NULL, NULL, &enc, &enc2);
             ecflags = 0;
             ecopts = Qnil;
 	}
 	else {
-	    VALUE tmp = rb_check_string_type(v1);
+	    tmp = rb_check_string_type(v1);
 	    if (!NIL_P(tmp)) {
                 parse_mode_enc(StringValueCStr(tmp), &enc, &enc2);
                 ecflags = rb_econv_prepare_opts(opt, &ecopts);
 	    }
 	    else {
-		enc = rb_to_encoding(v1);
-		enc2 = NULL;
+		rb_io_ext_int_to_encs(rb_to_encoding(v1), NULL, &enc, &enc2);
                 ecflags = 0;
                 ecopts = Qnil;
 	    }
Index: ruby.c
===================================================================
--- ruby.c	(revision 19708)
+++ ruby.c	(revision 19709)
@@ -86,7 +86,7 @@
 	    VALUE name;
 	    int index;
 	} enc;
-    } src, ext;
+    } src, ext, intern;
     VALUE req_list;
 };
 
@@ -855,6 +855,7 @@
 		ruby_each_words(s, disable_option, &opt->disable);
 	    }
 	    else if (strncmp("encoding", s, n = 8) == 0 && (!s[n] || s[n] == '=')) {
+		char *p;
 		s += n;
 		if (!*s++) {
 		  next_encoding:
@@ -863,7 +864,15 @@
 		    }
 		}
 	      encoding:
-		opt->ext.enc.name = rb_str_new2(s);
+		p = strchr(s, ':');
+		if (p) {
+		    if (p > s)
+			opt->ext.enc.name = rb_str_new(s, p-s);
+		    if (*++p)
+			opt->intern.enc.name = rb_str_new2(p);
+		}
+		else    
+		    opt->ext.enc.name = rb_str_new2(s);
 	    }
 	    else if (strcmp("version", s) == 0)
 		opt->version = 1;
@@ -966,6 +975,7 @@
 	rb_safe_level() == 0 && (s = getenv("RUBYOPT"))) {
 	VALUE src_enc_name = opt->src.enc.name;
 	VALUE ext_enc_name = opt->ext.enc.name;
+	VALUE int_enc_name = opt->intern.enc.name;
 
 	while (ISSPACE(*s))
 	    s++;
@@ -1005,6 +1015,8 @@
 	    opt->src.enc.name = src_enc_name;
 	if (ext_enc_name)
 	    opt->ext.enc.name = ext_enc_name;
+	if (int_enc_name)
+	    opt->intern.enc.name = int_enc_name;
     }
 
     if (opt->version) {
@@ -1073,6 +1085,9 @@
     if (opt->ext.enc.name != 0) {
 	opt->ext.enc.index = opt_enc_index(opt->ext.enc.name);
     }
+    if (opt->intern.enc.name != 0) {
+	opt->intern.enc.index = opt_enc_index(opt->intern.enc.name);
+    }
     if (opt->src.enc.name != 0) {
 	opt->src.enc.index = opt_enc_index(opt->src.enc.name);
 	src_encoding_index = opt->src.enc.index;
@@ -1084,6 +1099,11 @@
 	enc = lenc;
     }
     rb_enc_set_default_external(rb_enc_from_encoding(enc));
+    if (opt->intern.enc.index >= 0) {
+	enc = rb_enc_from_index(opt->intern.enc.index);
+	rb_enc_set_default_internal(rb_enc_from_encoding(enc));
+	opt->intern.enc.index = -1;
+    }
 
     rb_set_safe_level_force(safe);
     if (opt->e_script) {
@@ -1105,6 +1125,15 @@
 	tree = load_file(parser, opt->script, 1, opt);
     }
 
+    if (opt->intern.enc.index >= 0) {
+	/* Set in the shebang line */
+	enc = rb_enc_from_index(opt->intern.enc.index);
+	rb_enc_set_default_internal(rb_enc_from_encoding(enc));
+    }
+    else
+	/* Freeze default_internal */
+	rb_enc_set_default_internal(Qnil);
+
     if (!tree) return Qfalse;
 
     process_sflag(opt);
@@ -1175,6 +1204,7 @@
 	char *p;
 	int no_src_enc = !opt->src.enc.name;
 	int no_ext_enc = !opt->ext.enc.name;
+	int no_int_enc = !opt->intern.enc.name;
 
 	enc = rb_usascii_encoding();
 	rb_funcall(f, rb_intern("set_encoding"), 1, rb_enc_from_encoding(enc));
@@ -1261,6 +1291,9 @@
 	    if (no_ext_enc && opt->ext.enc.name) {
 		opt->ext.enc.index = opt_enc_index(opt->ext.enc.name);
 	    }
+	    if (no_int_enc && opt->intern.enc.name) {
+		opt->intern.enc.index = opt_enc_index(opt->intern.enc.name);
+	    }
 	}
 	else if (!NIL_P(c)) {
 	    rb_io_ungetbyte(f, c);
@@ -1511,6 +1544,7 @@
     args.argv = argv;
     args.opt = cmdline_options_init(&opt);
     opt.ext.enc.index = -1;
+    opt.intern.enc.index = -1;
     tree = (NODE *)rb_vm_call_cfunc(rb_vm_top_self(),
 				    process_options, (VALUE)&args,
 				    0, rb_progname);

--
ML: ruby-changes@q...
Info: http://www.atdot.net/~ko1/quickml/

[前][次][番号順一覧][スレッド一覧]