ruby-changes:50923
From: nobu <ko1@a...>
Date: Tue, 10 Apr 2018 09:41:52 +0900 (JST)
Subject: [ruby-changes:50923] nobu:r63130 (trunk): symbol.c: non-ASCII constant names
nobu 2018-04-10 09:41:47 +0900 (Tue, 10 Apr 2018) New Revision: 63130 https://svn.ruby-lang.org/cgi-bin/viewvc.cgi?view=revision&revision=63130 Log: symbol.c: non-ASCII constant names * symbol.c (rb_sym_constant_char_p): support for non-ASCII constant names. [Feature #13770] * object.c (rb_mod_const_get, rb_mod_const_defined): support for non-ASCII constant names. Modified files: trunk/NEWS trunk/object.c trunk/symbol.c trunk/test/ruby/test_parse.rb Index: object.c =================================================================== --- object.c (revision 63129) +++ object.c (revision 63130) @@ -2480,7 +2480,7 @@ rb_mod_const_get(int argc, VALUE *argv, https://github.com/ruby/ruby/blob/trunk/object.c#L2480 if (!id) { part = rb_str_subseq(name, beglen, len); OBJ_FREEZE(part); - if (!ISUPPER(*pbeg) || !rb_is_const_name(part)) { + if (!rb_is_const_name(part)) { name = part; goto wrong_name; } @@ -2633,7 +2633,7 @@ rb_mod_const_defined(int argc, VALUE *ar https://github.com/ruby/ruby/blob/trunk/object.c#L2633 if (!id) { part = rb_str_subseq(name, beglen, len); OBJ_FREEZE(part); - if (!ISUPPER(*pbeg) || !rb_is_const_name(part)) { + if (!rb_is_const_name(part)) { name = part; goto wrong_name; } Index: NEWS =================================================================== --- NEWS (revision 63129) +++ NEWS (revision 63130) @@ -20,6 +20,8 @@ with all sufficient information, see the https://github.com/ruby/ruby/blob/trunk/NEWS#L20 * `else` without `rescue` is now causes a syntax error. [EXPERIMENTAL] +* constant names may start with a non-ASCII capital letter. [Feature #13770] + === Core classes updates (outstanding ones only) * Array Index: test/ruby/test_parse.rb =================================================================== --- test/ruby/test_parse.rb (revision 63129) +++ test/ruby/test_parse.rb (revision 63130) @@ -1157,6 +1157,45 @@ x = __ENCODING__ https://github.com/ruby/ruby/blob/trunk/test/ruby/test_parse.rb#L1157 end; end + NONASCII_CONSTANTS = [ + *%W"\u{00de} \u{00C0}".flat_map {|c| [c, c.encode("iso-8859-15")]}, + "\u{1c4}", "\u{1f2}", "\u{1f88}", "\u{370}", + *%W"\u{391} \u{ff21}".flat_map {|c| [c, c.encode("cp932"), c.encode("euc-jp")]}, + ] + + def assert_nonascii_const + assert_all_assertions_foreach("NONASCII_CONSTANTS", *NONASCII_CONSTANTS) do |n| + m = Module.new + assert_not_operator(m, :const_defined?, n) + assert_raise_with_message(NameError, /uninitialized/) do + m.const_get(n) + end + assert_nil(eval("defined?(m::#{n})")) + + v = yield m, n + + assert_operator(m, :const_defined?, n) + assert_equal("constant", eval("defined?(m::#{n})")) + assert_same(v, m.const_get(n)) + + m.__send__(:remove_const, n) + assert_not_operator(m, :const_defined?, n) + assert_nil(eval("defined?(m::#{n})")) + end + end + + def test_nonascii_const_set + assert_nonascii_const do |m, n| + m.const_set(n, 42) + end + end + + def test_nonascii_constant + assert_nonascii_const do |m, n| + m.module_eval("class #{n}; self; end") + end + end + =begin def test_past_scope_variable assert_warning(/past scope/) {catch {|tag| eval("BEGIN{throw tag}; tap {a = 1}; a")}} Index: symbol.c =================================================================== --- symbol.c (revision 63129) +++ symbol.c (revision 63130) @@ -199,6 +199,42 @@ rb_enc_symname_p(const char *name, rb_en https://github.com/ruby/ruby/blob/trunk/symbol.c#L199 return rb_enc_symname2_p(name, strlen(name), enc); } +static int +rb_sym_constant_char_p(const char *name, long nlen, rb_encoding *enc) +{ + int c, len; + const char *end = name + nlen; + + if (nlen < 1) return FALSE; + if (ISASCII(*name)) return ISUPPER(*name); + c = rb_enc_precise_mbclen(name, end, enc); + if (!MBCLEN_CHARFOUND_P(c)) return FALSE; + len = MBCLEN_CHARFOUND_LEN(c); + c = rb_enc_mbc_to_codepoint(name, end, enc); + if (ONIGENC_IS_UNICODE(enc)) { + static int ctype_titlecase = 0; + if (rb_enc_isupper(c, enc)) return TRUE; + if (rb_enc_islower(c, enc)) return FALSE; + if (!ctype_titlecase) { + static const UChar cname[] = "titlecaseletter"; + static const UChar *const end = cname + sizeof(cname) - 1; + ctype_titlecase = ONIGENC_PROPERTY_NAME_TO_CTYPE(enc, cname, end); + } + if (rb_enc_isctype(c, ctype_titlecase, enc)) return TRUE; + } + else { + /* fallback to case-folding */ + OnigUChar fold[ONIGENC_GET_CASE_FOLD_CODES_MAX_NUM]; + const OnigUChar *beg = (const OnigUChar *)name; + int r = enc->mbc_case_fold(ONIGENC_CASE_FOLD, + &beg, (const OnigUChar *)end, + fold, enc); + if (r > 0 && (r != len || memcmp(fold, name, r))) + return TRUE; + } + return FALSE; +} + #define IDSET_ATTRSET_FOR_SYNTAX ((1U<<ID_LOCAL)|(1U<<ID_CONST)) #define IDSET_ATTRSET_FOR_INTERN (~(~0U<<(1<<ID_SCOPE_SHIFT)) & ~(1U<<ID_ATTRSET)) @@ -279,7 +315,7 @@ rb_enc_symname_type(const char *name, lo https://github.com/ruby/ruby/blob/trunk/symbol.c#L315 break; default: - type = ISUPPER(*m) ? ID_CONST : ID_LOCAL; + type = rb_sym_constant_char_p(m, e-m, enc) ? ID_CONST : ID_LOCAL; id: if (m >= e || (*m != '_' && !ISALPHA(*m) && ISASCII(*m))) { if (len > 1 && *(e-1) == '=') { -- ML: ruby-changes@q... Info: http://www.atdot.net/~ko1/quickml/