[前][次][番号順一覧][スレッド一覧]

ruby-changes:50923

From: nobu <ko1@a...>
Date: Tue, 10 Apr 2018 09:41:52 +0900 (JST)
Subject: [ruby-changes:50923] nobu:r63130 (trunk): symbol.c: non-ASCII constant names

nobu	2018-04-10 09:41:47 +0900 (Tue, 10 Apr 2018)

  New Revision: 63130

  https://svn.ruby-lang.org/cgi-bin/viewvc.cgi?view=revision&revision=63130

  Log:
    symbol.c: non-ASCII constant names
    
    * symbol.c (rb_sym_constant_char_p): support for non-ASCII
      constant names.  [Feature #13770]
    
    * object.c (rb_mod_const_get, rb_mod_const_defined): support for
      non-ASCII constant names.

  Modified files:
    trunk/NEWS
    trunk/object.c
    trunk/symbol.c
    trunk/test/ruby/test_parse.rb
Index: object.c
===================================================================
--- object.c	(revision 63129)
+++ object.c	(revision 63130)
@@ -2480,7 +2480,7 @@ rb_mod_const_get(int argc, VALUE *argv, https://github.com/ruby/ruby/blob/trunk/object.c#L2480
 	if (!id) {
 	    part = rb_str_subseq(name, beglen, len);
 	    OBJ_FREEZE(part);
-	    if (!ISUPPER(*pbeg) || !rb_is_const_name(part)) {
+	    if (!rb_is_const_name(part)) {
 		name = part;
 		goto wrong_name;
 	    }
@@ -2633,7 +2633,7 @@ rb_mod_const_defined(int argc, VALUE *ar https://github.com/ruby/ruby/blob/trunk/object.c#L2633
 	if (!id) {
 	    part = rb_str_subseq(name, beglen, len);
 	    OBJ_FREEZE(part);
-	    if (!ISUPPER(*pbeg) || !rb_is_const_name(part)) {
+	    if (!rb_is_const_name(part)) {
 		name = part;
 		goto wrong_name;
 	    }
Index: NEWS
===================================================================
--- NEWS	(revision 63129)
+++ NEWS	(revision 63130)
@@ -20,6 +20,8 @@ with all sufficient information, see the https://github.com/ruby/ruby/blob/trunk/NEWS#L20
 
 * `else` without `rescue` is now causes a syntax error.  [EXPERIMENTAL]
 
+* constant names may start with a non-ASCII capital letter. [Feature #13770]
+
 === Core classes updates (outstanding ones only)
 
 * Array
Index: test/ruby/test_parse.rb
===================================================================
--- test/ruby/test_parse.rb	(revision 63129)
+++ test/ruby/test_parse.rb	(revision 63130)
@@ -1157,6 +1157,45 @@ x = __ENCODING__ https://github.com/ruby/ruby/blob/trunk/test/ruby/test_parse.rb#L1157
     end;
   end
 
+  NONASCII_CONSTANTS = [
+    *%W"\u{00de} \u{00C0}".flat_map {|c| [c, c.encode("iso-8859-15")]},
+    "\u{1c4}", "\u{1f2}", "\u{1f88}", "\u{370}",
+    *%W"\u{391} \u{ff21}".flat_map {|c| [c, c.encode("cp932"), c.encode("euc-jp")]},
+  ]
+
+  def assert_nonascii_const
+    assert_all_assertions_foreach("NONASCII_CONSTANTS", *NONASCII_CONSTANTS) do |n|
+      m = Module.new
+      assert_not_operator(m, :const_defined?, n)
+      assert_raise_with_message(NameError, /uninitialized/) do
+        m.const_get(n)
+      end
+      assert_nil(eval("defined?(m::#{n})"))
+
+      v = yield m, n
+
+      assert_operator(m, :const_defined?, n)
+      assert_equal("constant", eval("defined?(m::#{n})"))
+      assert_same(v, m.const_get(n))
+
+      m.__send__(:remove_const, n)
+      assert_not_operator(m, :const_defined?, n)
+      assert_nil(eval("defined?(m::#{n})"))
+    end
+  end
+
+  def test_nonascii_const_set
+    assert_nonascii_const do |m, n|
+      m.const_set(n, 42)
+    end
+  end
+
+  def test_nonascii_constant
+    assert_nonascii_const do |m, n|
+      m.module_eval("class #{n}; self; end")
+    end
+  end
+
 =begin
   def test_past_scope_variable
     assert_warning(/past scope/) {catch {|tag| eval("BEGIN{throw tag}; tap {a = 1}; a")}}
Index: symbol.c
===================================================================
--- symbol.c	(revision 63129)
+++ symbol.c	(revision 63130)
@@ -199,6 +199,42 @@ rb_enc_symname_p(const char *name, rb_en https://github.com/ruby/ruby/blob/trunk/symbol.c#L199
     return rb_enc_symname2_p(name, strlen(name), enc);
 }
 
+static int
+rb_sym_constant_char_p(const char *name, long nlen, rb_encoding *enc)
+{
+    int c, len;
+    const char *end = name + nlen;
+
+    if (nlen < 1) return FALSE;
+    if (ISASCII(*name)) return ISUPPER(*name);
+    c = rb_enc_precise_mbclen(name, end, enc);
+    if (!MBCLEN_CHARFOUND_P(c)) return FALSE;
+    len = MBCLEN_CHARFOUND_LEN(c);
+    c = rb_enc_mbc_to_codepoint(name, end, enc);
+    if (ONIGENC_IS_UNICODE(enc)) {
+	static int ctype_titlecase = 0;
+	if (rb_enc_isupper(c, enc)) return TRUE;
+	if (rb_enc_islower(c, enc)) return FALSE;
+	if (!ctype_titlecase) {
+	    static const UChar cname[] = "titlecaseletter";
+	    static const UChar *const end = cname + sizeof(cname) - 1;
+	    ctype_titlecase = ONIGENC_PROPERTY_NAME_TO_CTYPE(enc, cname, end);
+	}
+	if (rb_enc_isctype(c, ctype_titlecase, enc)) return TRUE;
+    }
+    else {
+	/* fallback to case-folding */
+	OnigUChar fold[ONIGENC_GET_CASE_FOLD_CODES_MAX_NUM];
+	const OnigUChar *beg = (const OnigUChar *)name;
+	int r = enc->mbc_case_fold(ONIGENC_CASE_FOLD,
+				   &beg, (const OnigUChar *)end,
+				   fold, enc);
+	if (r > 0 && (r != len || memcmp(fold, name, r)))
+	    return TRUE;
+    }
+    return FALSE;
+}
+
 #define IDSET_ATTRSET_FOR_SYNTAX ((1U<<ID_LOCAL)|(1U<<ID_CONST))
 #define IDSET_ATTRSET_FOR_INTERN (~(~0U<<(1<<ID_SCOPE_SHIFT)) & ~(1U<<ID_ATTRSET))
 
@@ -279,7 +315,7 @@ rb_enc_symname_type(const char *name, lo https://github.com/ruby/ruby/blob/trunk/symbol.c#L315
 	break;
 
       default:
-	type = ISUPPER(*m) ? ID_CONST : ID_LOCAL;
+	type = rb_sym_constant_char_p(m, e-m, enc) ? ID_CONST : ID_LOCAL;
       id:
 	if (m >= e || (*m != '_' && !ISALPHA(*m) && ISASCII(*m))) {
 	    if (len > 1 && *(e-1) == '=') {

--
ML: ruby-changes@q...
Info: http://www.atdot.net/~ko1/quickml/

[前][次][番号順一覧][スレッド一覧]