ruby-changes:8306
From: matz <ko1@a...>
Date: Sat, 18 Oct 2008 19:36:39 +0900 (JST)
Subject: [ruby-changes:8306] Ruby:r19834 (trunk): * string.c (rb_external_str_new): a new function to convert from
matz 2008-10-18 19:36:20 +0900 (Sat, 18 Oct 2008) New Revision: 19834 http://svn.ruby-lang.org/cgi-bin/viewvc.cgi?view=rev&revision=19834 Log: * string.c (rb_external_str_new): a new function to convert from external encoding to internal encoding. if something went wrong, it returns a string with the external encoding. * string.c (rb_external_str_new_with_enc): same as above besides you can specify the source encoding. * ruby.c (ruby_set_argv): use rb_external_str_new() * ruby.c (set_arg0, ruby_script): ditto. Modified files: trunk/ChangeLog trunk/dir.c trunk/include/ruby/encoding.h trunk/include/ruby/intern.h trunk/ruby.c trunk/string.c Index: include/ruby/intern.h =================================================================== --- include/ruby/intern.h (revision 19833) +++ include/ruby/intern.h (revision 19834) @@ -546,6 +546,7 @@ VALUE rb_tainted_str_new_cstr(const char*); VALUE rb_tainted_str_new(const char*, long); VALUE rb_tainted_str_new2(const char*); +VALUE rb_external_str_new(const char*, long); VALUE rb_str_buf_new(long); VALUE rb_str_buf_new_cstr(const char*); VALUE rb_str_buf_new2(const char*); Index: include/ruby/encoding.h =================================================================== --- include/ruby/encoding.h (revision 19833) +++ include/ruby/encoding.h (revision 19834) @@ -92,6 +92,8 @@ VALUE rb_obj_encoding(VALUE); VALUE rb_enc_str_buf_cat(VALUE str, const char *ptr, long len, rb_encoding *enc); +VALUE rb_external_str_new_with_enc(const char *ptr, long len, rb_encoding *enc); + /* index -> rb_encoding */ rb_encoding* rb_enc_from_index(int idx); Index: ChangeLog =================================================================== --- ChangeLog (revision 19833) +++ ChangeLog (revision 19834) @@ -1,3 +1,16 @@ +Sat Oct 18 13:30:53 2008 Yukihiro Matsumoto <matz@r...> + + * string.c (rb_external_str_new): a new function to convert from + external encoding to internal encoding. if something went + wrong, it returns a string with the external encoding. + + * string.c (rb_external_str_new_with_enc): same as above besides + you can specify the source encoding. + + * ruby.c (ruby_set_argv): use rb_external_str_new() + + * ruby.c (set_arg0, ruby_script): ditto. + Sat Oct 18 04:08:18 2008 Yukihiro Matsumoto <matz@r...> * lib/tempfile.rb (Tempfile#initialize): now Tempfile.new takes Index: string.c =================================================================== --- string.c (revision 19833) +++ string.c (revision 19834) @@ -472,6 +472,60 @@ RUBY_ALIAS_FUNCTION(rb_tainted_str_new2(const char *ptr), rb_tainted_str_new_cstr, (ptr)) #define rb_tainted_str_new2 rb_tainted_str_new_cstr +VALUE +rb_external_str_new_with_enc(const char *ptr, long len, rb_encoding *eenc) +{ + VALUE str; + rb_encoding *ienc; + + if (len == 0 && !ptr) len = strlen(ptr); + str = rb_tainted_str_new(ptr, len); + rb_enc_associate(str, eenc); + ienc = rb_default_internal_encoding(); + if (ienc) { + rb_econv_t *ec; + rb_econv_result_t ret; + VALUE newstr = rb_str_new(0, len); + long nlen = len; + const unsigned char *sp; + unsigned char *dp; + + retry: + ec = rb_econv_open_opts(eenc->name, ienc->name, 0, Qnil); + if (!ec) return str; + + sp = (unsigned char*)RSTRING_PTR(str); + dp = (unsigned char*)RSTRING_PTR(newstr); + ret = rb_econv_convert(ec, &sp, (unsigned char*)RSTRING_END(str), + &dp, (unsigned char*)RSTRING_END(newstr), 0); + rb_econv_close(ec); + switch (ret) { + case econv_destination_buffer_full: + /* destination buffer short */ + nlen *= 2; + rb_str_resize(newstr, nlen); + goto retry; + + case econv_finished: + nlen = dp - (unsigned char*)RSTRING_PTR(newstr); + rb_str_set_len(newstr, nlen); + rb_enc_associate(newstr, ienc); + return newstr; + + default: + /* some error, return original */ + return str; + } + } + return str; +} + +VALUE +rb_external_str_new(const char *ptr, long len) +{ + return rb_external_str_new_with_enc(ptr, len, rb_default_external_encoding()); +} + static VALUE str_replace_shared(VALUE str2, VALUE str) { Index: dir.c =================================================================== --- dir.c (revision 19833) +++ dir.c (revision 19834) @@ -423,16 +423,6 @@ if (dirp->dir == NULL) dir_closed();\ } while (0) -static VALUE -dir_enc_str_new(const char *p, long len, rb_encoding *enc) -{ - VALUE path = rb_tainted_str_new(p, len); - if (rb_enc_asciicompat(enc) && rb_enc_str_asciionly_p(path)) { - enc = rb_usascii_encoding(); - } - rb_enc_associate(path, enc); - return path; -} /* * call-seq: @@ -494,7 +484,7 @@ errno = 0; dp = readdir(dirp->dir); if (dp) { - return dir_enc_str_new(dp->d_name, NAMLEN(dp), dirp->enc); + return rb_external_str_new_with_enc(dp->d_name, NAMLEN(dp), dirp->enc); } else if (errno == 0) { /* end of stream */ return Qnil; @@ -532,7 +522,7 @@ GetDIR(dir, dirp); rewinddir(dirp->dir); for (dp = readdir(dirp->dir); dp != NULL; dp = readdir(dirp->dir)) { - rb_yield(dir_enc_str_new(dp->d_name, NAMLEN(dp), dirp->enc)); + rb_yield(rb_external_str_new_with_enc(dp->d_name, NAMLEN(dp), dirp->enc)); if (dirp->dir == NULL) dir_closed(); } return dir; @@ -1436,7 +1426,7 @@ static void push_pattern(const char *path, VALUE ary, void *enc) { - rb_ary_push(ary, dir_enc_str_new(path, strlen(path), enc)); + rb_ary_push(ary, rb_external_str_new_with_enc(path, strlen(path), enc)); } static int Index: ruby.c =================================================================== --- ruby.c (revision 19833) +++ ruby.c (revision 19834) @@ -999,7 +999,6 @@ NODE *tree = 0; VALUE parser; VALUE iseq; - VALUE args; rb_encoding *enc, *lenc; const char *s; char fbuf[MAXPATHLEN]; @@ -1108,17 +1107,12 @@ opt->script = RSTRING_PTR(opt->script_name); safe = rb_safe_level(); rb_set_safe_level_force(0); - ruby_set_argv(argc, argv); - process_sflag(opt); ruby_init_loadpath(); ruby_init_gems(!(opt->disable & DISABLE_BIT(gems))); lenc = rb_locale_encoding(); rb_enc_associate(rb_progname, lenc); opt->script_name = rb_str_new4(rb_progname); - for (i = 0, args = rb_argv; i < RARRAY_LEN(args); i++) { - rb_enc_associate(RARRAY_PTR(args)[i], lenc); - } parser = rb_parser_new(); if (opt->yydebug) rb_parser_set_yydebug(parser, Qtrue); if (opt->ext.enc.name != 0) { @@ -1143,6 +1137,8 @@ rb_enc_set_default_internal(rb_enc_from_encoding(enc)); opt->intern.enc.index = -1; } + ruby_set_argv(argc, argv); + process_sflag(opt); rb_set_safe_level_force(safe); if (opt->e_script) { @@ -1457,14 +1453,14 @@ } } #endif - rb_progname = rb_obj_freeze(rb_tainted_str_new(s, i)); + rb_progname = rb_obj_freeze(rb_external_str_new(s, i)); } void ruby_script(const char *name) { if (name) { - rb_progname = rb_obj_freeze(rb_tainted_str_new2(name)); + rb_progname = rb_obj_freeze(rb_external_str_new(name, strlen(name))); } } @@ -1547,7 +1543,7 @@ #endif rb_ary_clear(av); for (i = 0; i < argc; i++) { - VALUE arg = rb_tainted_str_new2(argv[i]); + VALUE arg = rb_external_str_new(argv[i], strlen(argv[i])); OBJ_FREEZE(arg); rb_ary_push(av, arg); -- ML: ruby-changes@q... Info: http://www.atdot.net/~ko1/quickml/