[前][次][番号順一覧][スレッド一覧]

ruby-changes:8306

From: matz <ko1@a...>
Date: Sat, 18 Oct 2008 19:36:39 +0900 (JST)
Subject: [ruby-changes:8306] Ruby:r19834 (trunk): * string.c (rb_external_str_new): a new function to convert from

matz	2008-10-18 19:36:20 +0900 (Sat, 18 Oct 2008)

  New Revision: 19834

  http://svn.ruby-lang.org/cgi-bin/viewvc.cgi?view=rev&revision=19834

  Log:
    * string.c (rb_external_str_new): a new function to convert from
      external encoding to internal encoding.  if something went
      wrong, it returns a string with the external encoding.
    
    * string.c (rb_external_str_new_with_enc): same as above besides
      you can specify the source encoding.
    
    * ruby.c (ruby_set_argv): use rb_external_str_new()
    
    * ruby.c (set_arg0, ruby_script): ditto.

  Modified files:
    trunk/ChangeLog
    trunk/dir.c
    trunk/include/ruby/encoding.h
    trunk/include/ruby/intern.h
    trunk/ruby.c
    trunk/string.c

Index: include/ruby/intern.h
===================================================================
--- include/ruby/intern.h	(revision 19833)
+++ include/ruby/intern.h	(revision 19834)
@@ -546,6 +546,7 @@
 VALUE rb_tainted_str_new_cstr(const char*);
 VALUE rb_tainted_str_new(const char*, long);
 VALUE rb_tainted_str_new2(const char*);
+VALUE rb_external_str_new(const char*, long);
 VALUE rb_str_buf_new(long);
 VALUE rb_str_buf_new_cstr(const char*);
 VALUE rb_str_buf_new2(const char*);
Index: include/ruby/encoding.h
===================================================================
--- include/ruby/encoding.h	(revision 19833)
+++ include/ruby/encoding.h	(revision 19834)
@@ -92,6 +92,8 @@
 VALUE rb_obj_encoding(VALUE);
 VALUE rb_enc_str_buf_cat(VALUE str, const char *ptr, long len, rb_encoding *enc);
 
+VALUE rb_external_str_new_with_enc(const char *ptr, long len, rb_encoding *enc);
+
 /* index -> rb_encoding */
 rb_encoding* rb_enc_from_index(int idx);
 
Index: ChangeLog
===================================================================
--- ChangeLog	(revision 19833)
+++ ChangeLog	(revision 19834)
@@ -1,3 +1,16 @@
+Sat Oct 18 13:30:53 2008  Yukihiro Matsumoto  <matz@r...>
+
+	* string.c (rb_external_str_new): a new function to convert from
+	  external encoding to internal encoding.  if something went
+	  wrong, it returns a string with the external encoding.
+
+	* string.c (rb_external_str_new_with_enc): same as above besides
+	  you can specify the source encoding.
+
+	* ruby.c (ruby_set_argv): use rb_external_str_new()
+
+	* ruby.c (set_arg0, ruby_script): ditto.
+
 Sat Oct 18 04:08:18 2008  Yukihiro Matsumoto  <matz@r...>
 
 	* lib/tempfile.rb (Tempfile#initialize): now Tempfile.new takes
Index: string.c
===================================================================
--- string.c	(revision 19833)
+++ string.c	(revision 19834)
@@ -472,6 +472,60 @@
 RUBY_ALIAS_FUNCTION(rb_tainted_str_new2(const char *ptr), rb_tainted_str_new_cstr, (ptr))
 #define rb_tainted_str_new2 rb_tainted_str_new_cstr
 
+VALUE
+rb_external_str_new_with_enc(const char *ptr, long len, rb_encoding *eenc)
+{
+    VALUE str;
+    rb_encoding *ienc;
+
+    if (len == 0 && !ptr) len = strlen(ptr);
+    str = rb_tainted_str_new(ptr, len);
+    rb_enc_associate(str, eenc);
+    ienc = rb_default_internal_encoding();
+    if (ienc) {
+	rb_econv_t *ec;
+	rb_econv_result_t ret;
+	VALUE newstr = rb_str_new(0, len);
+	long nlen = len;
+	const unsigned char *sp;
+	unsigned char *dp;
+
+      retry:
+	ec = rb_econv_open_opts(eenc->name, ienc->name, 0, Qnil);
+	if (!ec) return str;
+
+	sp = (unsigned char*)RSTRING_PTR(str);
+	dp = (unsigned char*)RSTRING_PTR(newstr);
+	ret = rb_econv_convert(ec, &sp, (unsigned char*)RSTRING_END(str),
+			       &dp, (unsigned char*)RSTRING_END(newstr), 0);
+	rb_econv_close(ec);
+        switch (ret) {
+          case econv_destination_buffer_full:
+	    /* destination buffer short */
+	    nlen *= 2;
+	    rb_str_resize(newstr, nlen);
+	    goto retry;
+
+          case econv_finished:
+	    nlen = dp - (unsigned char*)RSTRING_PTR(newstr);
+	    rb_str_set_len(newstr, nlen);
+	    rb_enc_associate(newstr, ienc);
+	    return newstr;
+
+	  default:
+	    /* some error, return original */
+	    return str;
+	}
+    }
+    return str;
+}
+
+VALUE
+rb_external_str_new(const char *ptr, long len)
+{
+    return rb_external_str_new_with_enc(ptr, len, rb_default_external_encoding());
+}
+
 static VALUE
 str_replace_shared(VALUE str2, VALUE str)
 {
Index: dir.c
===================================================================
--- dir.c	(revision 19833)
+++ dir.c	(revision 19834)
@@ -423,16 +423,6 @@
     if (dirp->dir == NULL) dir_closed();\
 } while (0)
 
-static VALUE
-dir_enc_str_new(const char *p, long len, rb_encoding *enc)
-{
-    VALUE path = rb_tainted_str_new(p, len);
-    if (rb_enc_asciicompat(enc) && rb_enc_str_asciionly_p(path)) {
-	enc = rb_usascii_encoding();
-    }
-    rb_enc_associate(path, enc);
-    return path;
-}
 
 /*
  *  call-seq:
@@ -494,7 +484,7 @@
     errno = 0;
     dp = readdir(dirp->dir);
     if (dp) {
-	return dir_enc_str_new(dp->d_name, NAMLEN(dp), dirp->enc);
+	return rb_external_str_new_with_enc(dp->d_name, NAMLEN(dp), dirp->enc);
     }
     else if (errno == 0) {	/* end of stream */
 	return Qnil;
@@ -532,7 +522,7 @@
     GetDIR(dir, dirp);
     rewinddir(dirp->dir);
     for (dp = readdir(dirp->dir); dp != NULL; dp = readdir(dirp->dir)) {
-	rb_yield(dir_enc_str_new(dp->d_name, NAMLEN(dp), dirp->enc));
+	rb_yield(rb_external_str_new_with_enc(dp->d_name, NAMLEN(dp), dirp->enc));
 	if (dirp->dir == NULL) dir_closed();
     }
     return dir;
@@ -1436,7 +1426,7 @@
 static void
 push_pattern(const char *path, VALUE ary, void *enc)
 {
-    rb_ary_push(ary, dir_enc_str_new(path, strlen(path), enc));
+    rb_ary_push(ary, rb_external_str_new_with_enc(path, strlen(path), enc));
 }
 
 static int
Index: ruby.c
===================================================================
--- ruby.c	(revision 19833)
+++ ruby.c	(revision 19834)
@@ -999,7 +999,6 @@
     NODE *tree = 0;
     VALUE parser;
     VALUE iseq;
-    VALUE args;
     rb_encoding *enc, *lenc;
     const char *s;
     char fbuf[MAXPATHLEN];
@@ -1108,17 +1107,12 @@
     opt->script = RSTRING_PTR(opt->script_name);
     safe = rb_safe_level();
     rb_set_safe_level_force(0);
-    ruby_set_argv(argc, argv);
-    process_sflag(opt);
 
     ruby_init_loadpath();
     ruby_init_gems(!(opt->disable & DISABLE_BIT(gems)));
     lenc = rb_locale_encoding();
     rb_enc_associate(rb_progname, lenc);
     opt->script_name = rb_str_new4(rb_progname);
-    for (i = 0, args = rb_argv; i < RARRAY_LEN(args); i++) {
-	rb_enc_associate(RARRAY_PTR(args)[i], lenc);
-    }
     parser = rb_parser_new();
     if (opt->yydebug) rb_parser_set_yydebug(parser, Qtrue);
     if (opt->ext.enc.name != 0) {
@@ -1143,6 +1137,8 @@
 	rb_enc_set_default_internal(rb_enc_from_encoding(enc));
 	opt->intern.enc.index = -1;
     }
+    ruby_set_argv(argc, argv);
+    process_sflag(opt);
 
     rb_set_safe_level_force(safe);
     if (opt->e_script) {
@@ -1457,14 +1453,14 @@
 	}
     }
 #endif
-    rb_progname = rb_obj_freeze(rb_tainted_str_new(s, i));
+    rb_progname = rb_obj_freeze(rb_external_str_new(s, i));
 }
 
 void
 ruby_script(const char *name)
 {
     if (name) {
-	rb_progname = rb_obj_freeze(rb_tainted_str_new2(name));
+	rb_progname = rb_obj_freeze(rb_external_str_new(name, strlen(name)));
     }
 }
 
@@ -1547,7 +1543,7 @@
 #endif
     rb_ary_clear(av);
     for (i = 0; i < argc; i++) {
-	VALUE arg = rb_tainted_str_new2(argv[i]);
+	VALUE arg = rb_external_str_new(argv[i], strlen(argv[i]));
 
 	OBJ_FREEZE(arg);
 	rb_ary_push(av, arg);

--
ML: ruby-changes@q...
Info: http://www.atdot.net/~ko1/quickml/

[前][次][番号順一覧][スレッド一覧]