[前][次][番号順一覧][スレッド一覧]

ruby-changes:64005

From: Lars <ko1@a...>
Date: Tue, 8 Dec 2020 02:00:56 +0900 (JST)
Subject: [ruby-changes:64005] ca76337a00 (master): Windows: Read ENV names and values as UTF-8 encoded Strings (#3818)

https://git.ruby-lang.org/ruby.git/commit/?id=ca76337a00

From ca76337a00244635faa331afd04f4b75161ce6fb Mon Sep 17 00:00:00 2001
From: Lars Kanis <kanis@c...>
Date: Mon, 7 Dec 2020 18:00:39 +0100
Subject: Windows: Read ENV names and values as UTF-8 encoded Strings (#3818)

* Windows: Read ENV names and values as UTF-8 encoded Strings

Implements issue #12650: fix https://bugs.ruby-lang.org/issues/12650

This also removes the special encoding for ENV['PATH'] and some
complexity in the code that is unnecessary now.

* Windows: Improve readablity of getenv() encoding

getenv() did use the expected codepage as an implicit parameter of the macro.
This is mis-leading since include/ruby/win32.h has a different definition.
Using the "cp" variable explicit (like the other function calls) makes it
more readable and consistent.

* Windows: Change external C-API macros getenv() and execv() to use UTF-8

They used to process and return strings with locale encoding,
but since all ruby-internal spawn and environment functions use UTF-8,
it makes sense to change the C-API equally.

diff --git a/hash.c b/hash.c
index bde35d0..98405ec 100644
--- a/hash.c
+++ b/hash.c
@@ -4815,22 +4815,7 @@ static char **my_environ; https://github.com/ruby/ruby/blob/trunk/hash.c#L4815
 #undef environ
 #define environ my_environ
 #undef getenv
-static char *(*w32_getenv)(const char*);
-static char *
-w32_getenv_unknown(const char *name)
-{
-    char *(*func)(const char*);
-    if (rb_locale_encindex() == rb_ascii8bit_encindex()) {
-	func = rb_w32_getenv;
-    }
-    else {
-	func = rb_w32_ugetenv;
-    }
-    /* atomic assignment in flat memory model */
-    return (w32_getenv = func)(name);
-}
-static char *(*w32_getenv)(const char*) = w32_getenv_unknown;
-#define getenv(n) w32_getenv(n)
+#define getenv(n) rb_w32_ugetenv(n)
 #elif defined(__APPLE__)
 #undef environ
 #define environ (*_NSGetEnviron())
@@ -4849,20 +4834,20 @@ extern char **environ; https://github.com/ruby/ruby/blob/trunk/hash.c#L4834
 #define ENVNMATCH(s1, s2, n) (memcmp((s1), (s2), (n)) == 0)
 #endif
 
-static VALUE
-env_enc_str_new(const char *ptr, long len, rb_encoding *enc)
+static inline rb_encoding *
+env_encoding()
 {
 #ifdef _WIN32
-    rb_encoding *internal = rb_default_internal_encoding();
-    const int ecflags = ECONV_INVALID_REPLACE | ECONV_UNDEF_REPLACE;
-    rb_encoding *utf8 = rb_utf8_encoding();
-    VALUE str = rb_enc_str_new(NULL, 0, (internal ? internal : enc));
-    if (NIL_P(rb_str_cat_conv_enc_opts(str, 0, ptr, len, utf8, ecflags, Qnil))) {
-        rb_str_initialize(str, ptr, len, NULL);
-    }
+    return rb_utf8_encoding();
 #else
-    VALUE str = rb_external_str_new_with_enc(ptr, len, enc);
+    return rb_locale_encoding();
 #endif
+}
+
+static VALUE
+env_enc_str_new(const char *ptr, long len, rb_encoding *enc)
+{
+    VALUE str = rb_external_str_new_with_enc(ptr, len, enc);
 
     rb_obj_freeze(str);
     return str;
@@ -4877,7 +4862,7 @@ env_enc_str_new_cstr(const char *ptr, rb_encoding *enc) https://github.com/ruby/ruby/blob/trunk/hash.c#L4862
 static VALUE
 env_str_new(const char *ptr, long len)
 {
-    return env_enc_str_new(ptr, len, rb_locale_encoding());
+    return env_enc_str_new(ptr, len, env_encoding());
 }
 
 static VALUE
@@ -4889,46 +4874,23 @@ env_str_new2(const char *ptr) https://github.com/ruby/ruby/blob/trunk/hash.c#L4874
 
 static const char TZ_ENV[] = "TZ";
 
-static rb_encoding *
-env_encoding_for(const char *name, const char *ptr)
-{
-    if (ENVMATCH(name, PATH_ENV)) {
-	return rb_filesystem_encoding();
-    }
-    else {
-	return rb_locale_encoding();
-    }
-}
-
 static VALUE
 env_name_new(const char *name, const char *ptr)
 {
-    return env_enc_str_new_cstr(ptr, env_encoding_for(name, ptr));
+    return env_enc_str_new_cstr(ptr, env_encoding());
 }
 
 static void *
 get_env_cstr(
-#ifdef _WIN32
-    volatile VALUE *pstr,
-#else
     VALUE str,
-#endif
     const char *name)
 {
-#ifdef _WIN32
-    VALUE str = *pstr;
-#endif
     char *var;
     rb_encoding *enc = rb_enc_get(str);
     if (!rb_enc_asciicompat(enc)) {
 	rb_raise(rb_eArgError, "bad environment variable %s: ASCII incompatible encoding: %s",
 		 name, rb_enc_name(enc));
     }
-#ifdef _WIN32
-    if (!rb_enc_str_asciionly_p(str)) {
-	*pstr = str = rb_str_conv_enc(str, NULL, rb_utf8_encoding());
-    }
-#endif
     var = RSTRING_PTR(str);
     if (memchr(var, '\0', RSTRING_LEN(str))) {
 	rb_raise(rb_eArgError, "bad environment variable %s: contains null byte", name);
@@ -4936,13 +4898,8 @@ get_env_cstr( https://github.com/ruby/ruby/blob/trunk/hash.c#L4898
     return rb_str_fill_terminator(str, 1); /* ASCII compatible */
 }
 
-#ifdef _WIN32
-#define get_env_ptr(var, val) \
-    (var = get_env_cstr(&(val), #var))
-#else
 #define get_env_ptr(var, val) \
     (var = get_env_cstr(val, #var))
-#endif
 
 static inline const char *
 env_name(volatile VALUE *s)
@@ -4983,9 +4940,6 @@ env_delete(VALUE name) https://github.com/ruby/ruby/blob/trunk/hash.c#L4940
 	VALUE value = env_str_new2(val);
 
 	ruby_setenv(nam, 0);
-	if (ENVMATCH(nam, PATH_ENV)) {
-	    RB_GC_GUARD(name);
-	}
 	return value;
     }
     return Qnil;
@@ -5407,9 +5361,6 @@ env_aset(VALUE nm, VALUE val) https://github.com/ruby/ruby/blob/trunk/hash.c#L5361
     get_env_ptr(value, val);
 
     ruby_setenv(name, value);
-    if (ENVMATCH(name, PATH_ENV)) {
-	RB_GC_GUARD(nm);
-    }
     reset_by_modified_env(name);
     return val;
 }
diff --git a/include/ruby/win32.h b/include/ruby/win32.h
index b29470b..b3b8b47 100644
--- a/include/ruby/win32.h
+++ b/include/ruby/win32.h
@@ -160,7 +160,7 @@ typedef int clockid_t; https://github.com/ruby/ruby/blob/trunk/include/ruby/win32.h#L160
 #define Sleep(msec)		(void)rb_w32_Sleep(msec)
 
 #undef execv
-#define execv(path,argv)	rb_w32_aspawn(P_OVERLAY,path,argv)
+#define execv(path,argv)	rb_w32_uaspawn(P_OVERLAY,path,argv)
 #undef isatty
 #define isatty(h)		rb_w32_isatty(h)
 
@@ -717,7 +717,7 @@ extern char *rb_w32_strerror(int); https://github.com/ruby/ruby/blob/trunk/include/ruby/win32.h#L717
 #define getcwd(b, s)		rb_w32_getcwd(b, s)
 
 #undef getenv
-#define getenv(n)		rb_w32_getenv(n)
+#define getenv(n)		rb_w32_ugetenv(n)
 
 #undef rename
 #define rename(o, n)		rb_w32_rename(o, n)
diff --git a/spec/ruby/core/env/element_reference_spec.rb b/spec/ruby/core/env/element_reference_spec.rb
index 1cd58ac..22633e7 100644
--- a/spec/ruby/core/env/element_reference_spec.rb
+++ b/spec/ruby/core/env/element_reference_spec.rb
@@ -59,6 +59,7 @@ describe "ENV.[]" do https://github.com/ruby/ruby/blob/trunk/spec/ruby/core/env/element_reference_spec.rb#L59
     Encoding.default_internal = nil
 
     locale = Encoding.find('locale')
+    locale = Encoding::UTF_8 if platform_is :windows
     locale = Encoding::BINARY if locale == Encoding::US_ASCII
     ENV[@variable] = "\xC3\xB8"
     ENV[@variable].encoding.should == locale
diff --git a/spec/ruby/core/env/fetch_spec.rb b/spec/ruby/core/env/fetch_spec.rb
index ef8f0a4..f3af6f3 100644
--- a/spec/ruby/core/env/fetch_spec.rb
+++ b/spec/ruby/core/env/fetch_spec.rb
@@ -56,7 +56,8 @@ describe "ENV.fetch" do https://github.com/ruby/ruby/blob/trunk/spec/ruby/core/env/fetch_spec.rb#L56
   end
 
   it "uses the locale encoding" do
+    encoding = platform_is(:windows) ? Encoding::UTF_8 : Encoding.find('locale')
     ENV["foo"] = "bar"
-    ENV.fetch("foo").encoding.should == Encoding.find('locale')
+    ENV.fetch("foo").encoding.should == encoding
   end
 end
diff --git a/spec/ruby/core/env/shift_spec.rb b/spec/ruby/core/env/shift_spec.rb
index 0fe08d4..49d98c3 100644
--- a/spec/ruby/core/env/shift_spec.rb
+++ b/spec/ruby/core/env/shift_spec.rb
@@ -42,9 +42,10 @@ describe "ENV.shift" do https://github.com/ruby/ruby/blob/trunk/spec/ruby/core/env/shift_spec.rb#L42
   it "uses the locale encoding if Encoding.default_internal is nil" do
     Encoding.default_internal = nil
 
+    encoding = platform_is(:windows) ? Encoding::UTF_8 : Encoding.find('locale')
     pair = ENV.shift
-    pair.first.encoding.should equal(Encoding.find("locale"))
-    pair.last.encoding.should equal(Encoding.find("locale"))
+    pair.first.encoding.should equal(encoding)
+    pair.last.encoding.should equal(encoding)
   end
 
   it "transcodes from the locale encoding to Encoding.default_internal if set" do
diff --git a/spec/ruby/core/env/values_at_spec.rb b/spec/ruby/core/env/values_at_spec.rb
index ee970e5..87c8c7f 100644
--- a/spec/ruby/core/env/values_at_spec.rb
+++ b/spec/ruby/core/env/values_at_spec.rb
@@ -28,7 +28,8 @@ describe "ENV.values_at" do https://github.com/ruby/ruby/blob/trunk/spec/ruby/core/env/values_at_spec.rb#L28
   end
 
   it "uses the locale encoding" do
-    ENV.values_at(ENV.keys.first).first.encoding.should == Encoding.find('locale')
+    encoding = platform_is(:windows) ? Encoding::UTF_8 : Encoding.find('locale')
+    ENV.values_at(ENV.keys.first).first.encoding.should == encoding
   end
 
   it "raises TypeError when a key is not coercible to String" do
diff --git a/test/ruby/test_env.rb b/test/ruby/test_env.rb
index 7735b53..ddfce13 100644
--- a/test/ruby/test_env.rb
+++ b/test/ruby/test_env.rb
@@ -369,7 +369,8 @@ class TestEnv < Test::Unit::TestCase https://github.com/ruby/ruby/blob/trunk/test/ruby/test_env.rb#L369
       assert_equal("foo", v)
     end
     assert_invalid_env {|var| ENV.assoc(var)}
-    assert_equal(Encoding.find("locale"), v.encoding)
+    encoding = /mswin|mingw/ =~ RUBY_PLATFORM ? Encoding::UTF_8 : Encoding.find("locale")
+    assert_equal(encoding, v.encoding)
   end
 
   def test_has_value2
@@ -579,15 +580,13 @@ class TestEnv < Test::Unit::TestCase https://github.com/ruby/ruby/blob/trunk/test/ruby/test_env.rb#L580
       end;
     end
 
-    if Encoding.find("locale") == Encoding::UTF_8
-      def test_utf8
-        text = "testing \u{e5 e1 e2 e4 e3 101 3042}"
-        test = ENV["test"]
-        ENV["test"] = text
-        assert_equal text, ENV["test"]
-      ensure
-        ENV["test"] = test
-      end
+    def test_utf8
+      text = "testing \u{e5 e1 e2 e4 e3 101 3042} (... truncated)

--
ML: ruby-changes@q...
Info: http://www.atdot.net/~ko1/quickml/

[前][次][番号順一覧][スレッド一覧]