[前][次][番号順一覧][スレッド一覧]

ruby-changes:72199

From: Nobuyoshi <ko1@a...>
Date: Fri, 17 Jun 2022 01:49:07 +0900 (JST)
Subject: [ruby-changes:72199] cd5cafa4a3 (master): Respect the encoding of the source [Bug #18827]

https://git.ruby-lang.org/ruby.git/commit/?id=cd5cafa4a3

From cd5cafa4a380e2459862b6e99ff0c381362ef1be Mon Sep 17 00:00:00 2001
From: Nobuyoshi Nakada <nobu@r...>
Date: Tue, 14 Jun 2022 12:27:25 +0900
Subject: Respect the encoding of the source [Bug #18827]

Do not override the input string encoding at the time of preparation,
the source encoding is not determined from the input yet.
---
 parse.y                  | 26 ++++++++++++++++----------
 test/ruby/test_ast.rb    | 13 +++++++++++++
 test/ruby/test_syntax.rb |  9 +++++++++
 3 files changed, 38 insertions(+), 10 deletions(-)

diff --git a/parse.y b/parse.y
index 59eff0228a..be5af00d43 100644
--- a/parse.y
+++ b/parse.y
@@ -6462,12 +6462,6 @@ lex_getline(struct parser_params *p) https://github.com/ruby/ruby/blob/trunk/parse.y#L6462
     if (NIL_P(line)) return line;
     must_be_ascii_compatible(line);
     if (RB_OBJ_FROZEN(line)) line = rb_str_dup(line); // needed for RubyVM::AST.of because script_lines in iseq is deep-frozen
-#ifndef RIPPER
-    if (p->debug_lines) {
-	rb_enc_associate(line, p->enc);
-	rb_ary_push(p->debug_lines, line);
-    }
-#endif
     p->line_count++;
     return line;
 }
@@ -6614,7 +6608,7 @@ add_delayed_token(struct parser_params *p, const char *tok, const char *end) https://github.com/ruby/ruby/blob/trunk/parse.y#L6608
 #endif
 
 static int
-nextline(struct parser_params *p)
+nextline(struct parser_params *p, int set_encoding)
 {
     VALUE v = p->lex.nextline;
     p->lex.nextline = 0;
@@ -6632,6 +6626,12 @@ nextline(struct parser_params *p) https://github.com/ruby/ruby/blob/trunk/parse.y#L6626
 	    lex_goto_eol(p);
 	    return -1;
 	}
+#ifndef RIPPER
+	if (p->debug_lines) {
+	    if (set_encoding) rb_enc_associate(v, p->enc);
+	    rb_ary_push(p->debug_lines, v);
+	}
+#endif
 	p->cr_seen = FALSE;
     }
     else if (NIL_P(v)) {
@@ -6663,12 +6663,12 @@ parser_cr(struct parser_params *p, int c) https://github.com/ruby/ruby/blob/trunk/parse.y#L6663
 }
 
 static inline int
-nextc(struct parser_params *p)
+nextc0(struct parser_params *p, int set_encoding)
 {
     int c;
 
     if (UNLIKELY((p->lex.pcur == p->lex.pend) || p->eofp || RTEST(p->lex.nextline))) {
-	if (nextline(p)) return -1;
+	if (nextline(p, set_encoding)) return -1;
     }
     c = (unsigned char)*p->lex.pcur++;
     if (UNLIKELY(c == '\r')) {
@@ -6677,6 +6677,7 @@ nextc(struct parser_params *p) https://github.com/ruby/ruby/blob/trunk/parse.y#L6677
 
     return c;
 }
+#define nextc(p) nextc0(p, TRUE)
 
 static void
 pushback(struct parser_params *p, int c)
@@ -8467,7 +8468,7 @@ set_file_encoding(struct parser_params *p, const char *str, const char *send) https://github.com/ruby/ruby/blob/trunk/parse.y#L8468
 static void
 parser_prepare(struct parser_params *p)
 {
-    int c = nextc(p);
+    int c = nextc0(p, FALSE);
     p->token_info_enabled = !compile_for_eval && RTEST(ruby_verbose);
     switch (c) {
       case '#':
@@ -8479,6 +8480,11 @@ parser_prepare(struct parser_params *p) https://github.com/ruby/ruby/blob/trunk/parse.y#L8480
 	    (unsigned char)p->lex.pcur[1] == 0xbf) {
 	    p->enc = rb_utf8_encoding();
 	    p->lex.pcur += 2;
+#ifndef RIPPER
+	    if (p->debug_lines) {
+		rb_enc_associate(p->lex.lastline, p->enc);
+	    }
+#endif
 	    p->lex.pbeg = p->lex.pcur;
 	    return;
 	}
diff --git a/test/ruby/test_ast.rb b/test/ruby/test_ast.rb
index cd96027654..5cf7a7f1b5 100644
--- a/test/ruby/test_ast.rb
+++ b/test/ruby/test_ast.rb
@@ -542,6 +542,19 @@ dummy https://github.com/ruby/ruby/blob/trunk/test/ruby/test_ast.rb#L542
     assert_equal("def test_keep_script_lines_for_of\n", node_method.source.lines.first)
   end
 
+  def test_encoding_with_keep_script_lines
+    enc = Encoding::EUC_JP
+    code = "__ENCODING__".encode(enc)
+
+    assert_equal(enc, eval(code))
+
+    node = RubyVM::AbstractSyntaxTree.parse(code, keep_script_lines: false)
+    assert_equal(enc, node.children[2].children[0])
+
+    node = RubyVM::AbstractSyntaxTree.parse(code, keep_script_lines: true)
+    assert_equal(enc, node.children[2].children[0])
+  end
+
   def test_e_option
     assert_in_out_err(["-e", "def foo; end; pp RubyVM::AbstractSyntaxTree.of(method(:foo)).type"],
                       "", [":SCOPE"], [])
diff --git a/test/ruby/test_syntax.rb b/test/ruby/test_syntax.rb
index b1fa3e5227..1d7b89de57 100644
--- a/test/ruby/test_syntax.rb
+++ b/test/ruby/test_syntax.rb
@@ -66,6 +66,15 @@ class TestSyntax < Test::Unit::TestCase https://github.com/ruby/ruby/blob/trunk/test/ruby/test_syntax.rb#L66
     f&.close!
   end
 
+  def test_script_lines_encoding
+    require 'tmpdir'
+    Dir.mktmpdir do |dir|
+      File.write(File.join(dir, "script_lines.rb"), "SCRIPT_LINES__ = {}\n")
+      assert_in_out_err(%w"-r./script_lines -w -Ke", "puts __ENCODING__.name",
+                        %w"EUC-JP", /-K is specified/, chdir: dir)
+    end
+  end
+
   def test_anonymous_block_forwarding
     assert_syntax_error("def b; c(&); end", /no anonymous block parameter/)
     assert_separately([], "#{<<-"begin;"}\n#{<<-'end;'}")
-- 
cgit v1.2.1


--
ML: ruby-changes@q...
Info: http://www.atdot.net/~ko1/quickml/

[前][次][番号順一覧][スレッド一覧]