ruby-changes:36567
From: nobu <ko1@a...>
Date: Sat, 29 Nov 2014 16:53:30 +0900 (JST)
Subject: [ruby-changes:36567] nobu:r48648 (trunk): win32.c: use UTF-8 for argv
nobu 2014-11-29 16:53:17 +0900 (Sat, 29 Nov 2014) New Revision: 48648 http://svn.ruby-lang.org/cgi-bin/viewvc.cgi?view=revision&revision=48648 Log: win32.c: use UTF-8 for argv * ruby.c (ruby_set_argv): convert argv from UTF-8. * win32/win32.c (rb_w32_sysinit, cmdglob, w32_cmdvector): convert wide char command line to UTF-8 argv, and glob in UTF-8 so that metacharacters would match multibyte characters. [ruby-dev:48752] [Bug #10555] Modified files: trunk/ChangeLog trunk/ruby.c trunk/test/ruby/test_rubyoptions.rb trunk/win32/win32.c Index: ChangeLog =================================================================== --- ChangeLog (revision 48647) +++ ChangeLog (revision 48648) @@ -1,3 +1,12 @@ https://github.com/ruby/ruby/blob/trunk/ChangeLog#L1 +Sat Nov 29 16:53:14 2014 Nobuyoshi Nakada <nobu@r...> + + * ruby.c (ruby_set_argv): convert argv from UTF-8. + + * win32/win32.c (rb_w32_sysinit, cmdglob, w32_cmdvector): convert + wide char command line to UTF-8 argv, and glob in UTF-8 so that + metacharacters would match multibyte characters. + [ruby-dev:48752] [Bug #10555] + Sat Nov 29 16:14:50 2014 Nobuyoshi Nakada <nobu@r...> * error.c (rb_typeddata_is_kind_of, rb_check_typeddata): ditto. Index: win32/win32.c =================================================================== --- win32/win32.c (revision 48647) +++ win32/win32.c (revision 48648) @@ -108,6 +108,7 @@ int rb_w32_wait_events(HANDLE *events, i https://github.com/ruby/ruby/blob/trunk/win32/win32.c#L108 static int rb_w32_open_osfhandle(intptr_t osfhandle, int flags); static int wstati64(const WCHAR *path, struct stati64 *st); VALUE rb_w32_conv_from_wchar(const WCHAR *wstr, rb_encoding *enc); +int ruby_brace_glob_with_enc(const char *str, int flags, ruby_glob_func *func, VALUE arg, rb_encoding *enc); #define RUBY_CRITICAL(expr) do { expr; } while (0) @@ -743,7 +744,7 @@ socklist_delete(SOCKET *sockp, int *flag https://github.com/ruby/ruby/blob/trunk/win32/win32.c#L744 return ret; } -static int w32_cmdvector(const WCHAR *, char ***, UINT); +static int w32_cmdvector(const WCHAR *, char ***, UINT, rb_encoding *); // // Initialization stuff // @@ -767,7 +768,7 @@ rb_w32_sysinit(int *argc, char ***argv) https://github.com/ruby/ruby/blob/trunk/win32/win32.c#L768 // // subvert cmd.exe's feeble attempt at command line parsing // - *argc = w32_cmdvector(GetCommandLineW(), argv, CP_ACP); + *argc = w32_cmdvector(GetCommandLineW(), argv, CP_UTF8, rb_utf8_encoding()); // // Now set up the correct time stuff @@ -1486,7 +1487,7 @@ insert(const char *path, VALUE vinfo, vo https://github.com/ruby/ruby/blob/trunk/win32/win32.c#L1487 /* License: Artistic or GPL */ static NtCmdLineElement ** -cmdglob(NtCmdLineElement *patt, NtCmdLineElement **tail, UINT cp) +cmdglob(NtCmdLineElement *patt, NtCmdLineElement **tail, UINT cp, rb_encoding *enc) { char buffer[MAXPATHLEN], *buf = buffer; NtCmdLineElement **last = tail; @@ -1498,7 +1499,7 @@ cmdglob(NtCmdLineElement *patt, NtCmdLin https://github.com/ruby/ruby/blob/trunk/win32/win32.c#L1499 strlcpy(buf, patt->str, patt->len + 1); buf[patt->len] = '\0'; translate_char(buf, '\\', '/', cp); - status = ruby_brace_glob(buf, 0, insert, (VALUE)&tail); + status = ruby_brace_glob_with_enc(buf, 0, insert, (VALUE)&tail, enc); if (buf != buffer) free(buf); @@ -1574,7 +1575,7 @@ skipspace(WCHAR *ptr) https://github.com/ruby/ruby/blob/trunk/win32/win32.c#L1575 /* License: Artistic or GPL */ static int -w32_cmdvector(const WCHAR *cmd, char ***vec, UINT cp) +w32_cmdvector(const WCHAR *cmd, char ***vec, UINT cp, rb_encoding *enc) { int globbing, len; int elements, strsz, done; @@ -1742,7 +1743,7 @@ w32_cmdvector(const WCHAR *cmd, char *** https://github.com/ruby/ruby/blob/trunk/win32/win32.c#L1743 curr->str = rb_w32_wstr_to_mbstr(cp, base, len, &curr->len); curr->flags |= NTMALLOC; - if (globbing && (tail = cmdglob(curr, cmdtail, cp))) { + if (globbing && (tail = cmdglob(curr, cmdtail, cp, enc))) { cmdtail = tail; } else { Index: ruby.c =================================================================== --- ruby.c (revision 48647) +++ ruby.c (revision 48648) @@ -311,6 +311,7 @@ ruby_incpush_expand(const char *path) https://github.com/ruby/ruby/blob/trunk/ruby.c#L311 ruby_push_include(path, expand_include_path); } +#undef UTF8_PATH #if defined _WIN32 || defined __CYGWIN__ static HMODULE libruby; @@ -327,6 +328,12 @@ rb_libruby_handle(void) https://github.com/ruby/ruby/blob/trunk/ruby.c#L328 { return libruby; } + +# define UTF8_PATH 1 +#endif + +#ifndef UTF8_PATH +# define UTF8_PATH 0 #endif void ruby_init_loadpath_safe(int safe_level); @@ -1794,6 +1801,19 @@ set_arg0(VALUE val, ID id) https://github.com/ruby/ruby/blob/trunk/ruby.c#L1801 rb_progname = rb_str_new_frozen(proc_setproctitle(rb_mProcess, val)); } +static inline VALUE +external_str_new_cstr(const char *p) +{ +#if UTF8_PATH + VALUE str = rb_utf8_str_new_cstr(p); + return rb_str_conv_enc_opts(str, NULL, rb_default_external_encoding(), + ECONV_UNDEF_REPLACE|ECONV_INVALID_REPLACE, + Qnil); +#else + return rb_external_str_new_cstr(p); +#endif +} + /*! Sets the current script name to this value. * * This is similar to <code>$0 = name</code> in Ruby level but also affects @@ -1910,7 +1930,7 @@ ruby_set_argv(int argc, char **argv) https://github.com/ruby/ruby/blob/trunk/ruby.c#L1930 #endif rb_ary_clear(av); for (i = 0; i < argc; i++) { - VALUE arg = rb_external_str_new_cstr(argv[i]); + VALUE arg = external_str_new_cstr(argv[i]); OBJ_FREEZE(arg); rb_ary_push(av, arg); Index: test/ruby/test_rubyoptions.rb =================================================================== --- test/ruby/test_rubyoptions.rb (revision 48647) +++ test/ruby/test_rubyoptions.rb (revision 48648) @@ -694,6 +694,18 @@ class TestRubyOptions < Test::Unit::Test https://github.com/ruby/ruby/blob/trunk/test/ruby/test_rubyoptions.rb#L694 end end + if /mswin|mingw/ =~ RUBY_PLATFORM + def test_command_line_glob_nonascii + bug10555 = '[ruby-dev:48752] [Bug #10555]' + name = "\u{3042}.txt" + with_tmpchdir do |dir| + open(name, "w") {} + assert_in_out_err(["-Eutf-8", "-e", "puts ARGV", "?.txt"], "", [name], [], + bug10555, encoding: "utf-8") + end + end + end + def test_script_is_directory feature2408 = '[ruby-core:26925]' assert_in_out_err(%w[.], "", [], /Is a directory -- \./, feature2408) -- ML: ruby-changes@q... Info: http://www.atdot.net/~ko1/quickml/