[前][次][番号順一覧][スレッド一覧]

ruby-changes:13373

From: naruse <ko1@a...>
Date: Tue, 29 Sep 2009 09:34:22 +0900 (JST)
Subject: [ruby-changes:13373] Ruby:r25143 (trunk): * string.c (rb_str_inspect): dump as \uXXXX when the

naruse	2009-09-29 09:34:06 +0900 (Tue, 29 Sep 2009)

  New Revision: 25143

  http://svn.ruby-lang.org/cgi-bin/viewvc.cgi?view=rev&revision=25143

  Log:
    * string.c (rb_str_inspect): dump as \uXXXX when the
      string is in Unicode. [ruby-dev:39388]

  Modified files:
    trunk/ChangeLog
    trunk/string.c
    trunk/test/ruby/test_m17n.rb

Index: ChangeLog
===================================================================
--- ChangeLog	(revision 25142)
+++ ChangeLog	(revision 25143)
@@ -1,3 +1,8 @@
+Tue Sep 29 06:50:32 2009  NARUSE, Yui  <naruse@r...>
+
+	* string.c (rb_str_inspect): dump as \uXXXX when the
+	  string is in Unicode. [ruby-dev:39388]
+
 Tue Sep 29 06:49:16 2009  NARUSE, Yui  <naruse@r...>
 
 	* encoding.c (rb_enc_unicode_p): defined.
Index: string.c
===================================================================
--- string.c	(revision 25142)
+++ string.c	(revision 25143)
@@ -4061,6 +4061,7 @@
     char *p, *pend;
     VALUE result = rb_str_buf_new(0);
     rb_encoding *resenc = rb_default_internal_encoding();
+    int unicode_p = rb_enc_unicode_p(enc);
 
     if (resenc == NULL) resenc = rb_default_external_encoding();
     if (!rb_enc_asciicompat(resenc)) resenc = rb_usascii_encoding();
@@ -4069,7 +4070,7 @@
 
     p = RSTRING_PTR(str); pend = RSTRING_END(str);
     while (p < pend) {
-	unsigned int c, cc;
+	unsigned int c = -1, cc;
 	int n;
 
         n = rb_enc_precise_mbclen(p, pend, enc);
@@ -4114,19 +4115,30 @@
 	else if (c == 033) {
 	    str_buf_cat2(result, "\\e");
 	}
-	else if ((enc == resenc && rb_enc_isprint(c, enc)) || rb_enc_isascii(c, enc)) {
+	else if ((enc == resenc && rb_enc_isprint(c, enc)) ||
+		(rb_enc_isascii(c, enc) && ISPRINT(c))) {
 	    str_buf_cat(result, p-n, n);
 	}
 	else {
-            char *q;
+	    char buf[11];
 	  escape_codepoint:
-            for (q = p-n; q < p; q++) {
-#define BACKESC_BUFSIZE 5
-		char buf[BACKESC_BUFSIZE];
-		sprintf(buf, "\\x%02X", *q & 0377);
-		str_buf_cat(result, buf, BACKESC_BUFSIZE - 1);
-#undef BACKESC_BUFSIZE
+
+	    if (unicode_p && c != -1) {
+		if (c > 0xFFFF) {
+		    sprintf(buf, "\\u{%X}", c);
+		}
+		else {
+		    sprintf(buf, "\\u%04X", c);
+		}
+		str_buf_cat(result, buf, strlen(buf));
 	    }
+	    else {
+		char *q;
+		for (q = p-n; q < p; q++) {
+		    sprintf(buf, "\\x%02X", *q & 0377);
+		    str_buf_cat(result, buf, strlen(buf));
+		}
+	    }
 	}
     }
     str_buf_cat2(result, "\"");
Index: test/ruby/test_m17n.rb
===================================================================
--- test/ruby/test_m17n.rb	(revision 25142)
+++ test/ruby/test_m17n.rb	(revision 25143)
@@ -2,6 +2,15 @@
 require 'stringio'
 
 class TestM17N < Test::Unit::TestCase
+  def inspect_encoding
+    Encoding.default_internal || Encoding.default_external
+  end
+
+  def setup
+    Encoding.default_internal = nil
+    Encoding.default_external = Encoding::UTF_8
+  end
+
   def assert_encoding(encname, actual, message=nil)
     assert_equal(Encoding.find(encname), actual, message)
   end
@@ -201,10 +210,10 @@
     assert_equal('"\xFC\x80\x80\x80\x80 "', u("\xfc\x80\x80\x80\x80 ").inspect)
 
 
-    assert_equal(e("\"\\xA1\x8f\xA1\xA1\""), e("\xa1\x8f\xa1\xa1").inspect)
+    assert_equal("\"\\xA1\\x8F\\xA1\\xA1\"", e("\xa1\x8f\xa1\xa1").inspect)
 
     assert_equal('"\x81."', s("\x81.").inspect)
-    assert_equal(s("\"\x81@\""), s("\x81@").inspect)
+    assert_equal(s('"\x81\x40"'), s("\x81@").inspect)
 
     assert_equal('"\xFC"', u("\xfc").inspect)
   end
@@ -756,30 +765,30 @@
   end
 
   def test_sprintf_p
-    assert_strenc('""', 'ASCII-8BIT', a("%p") % a(""))
-    assert_strenc('""', 'EUC-JP', e("%p") % e(""))
-    assert_strenc('""', 'Windows-31J', s("%p") % s(""))
-    assert_strenc('""', 'UTF-8', u("%p") % u(""))
+    assert_strenc('""', inspect_encoding, a("%p") % a(""))
+    assert_strenc('""', inspect_encoding, e("%p") % e(""))
+    assert_strenc('""', inspect_encoding, s("%p") % s(""))
+    assert_strenc('""', inspect_encoding, u("%p") % u(""))
 
-    assert_strenc('"a"', 'ASCII-8BIT', a("%p") % a("a"))
-    assert_strenc('"a"', 'EUC-JP', e("%p") % e("a"))
-    assert_strenc('"a"', 'Windows-31J', s("%p") % s("a"))
-    assert_strenc('"a"', 'UTF-8', u("%p") % u("a"))
+    assert_strenc('"a"', inspect_encoding, a("%p") % a("a"))
+    assert_strenc('"a"', inspect_encoding, e("%p") % e("a"))
+    assert_strenc('"a"', inspect_encoding, s("%p") % s("a"))
+    assert_strenc('"a"', inspect_encoding, u("%p") % u("a"))
 
-    assert_strenc('"\xC2\xA1"', 'ASCII-8BIT', a("%p") % a("\xc2\xa1"))
-    assert_strenc("\"\xC2\xA1\"", 'EUC-JP', e("%p") % e("\xc2\xa1"))
-    #assert_strenc("\"\xC2\xA1\"", 'Windows-31J', s("%p") % s("\xc2\xa1"))
-    assert_strenc("\"\xC2\xA1\"", 'UTF-8', u("%p") % u("\xc2\xa1"))
+    assert_strenc('"\xC2\xA1"', inspect_encoding, a("%p") % a("\xc2\xa1"))
+    assert_strenc('"\xC2\xA1"', inspect_encoding, e("%p") % e("\xc2\xa1"))
+    #assert_strenc("\"\xC2\xA1\"", inspect_encoding, s("%p") % s("\xc2\xa1"))
+    assert_strenc("\"\xC2\xA1\"", inspect_encoding, u("%p") % u("\xc2\xa1"))
 
-    assert_strenc('"\xC2\xA1"', 'ASCII-8BIT', "%10p" % a("\xc2\xa1"))
-    assert_strenc("       \"\xC2\xA1\"", 'EUC-JP', "%10p" % e("\xc2\xa1"))
-    #assert_strenc("       \"\xC2\xA1\"", 'Windows-31J', "%10p" % s("\xc2\xa1"))
-    assert_strenc("       \"\xC2\xA1\"", 'UTF-8', "%10p" % u("\xc2\xa1"))
+    assert_strenc('"\xC2\xA1"', inspect_encoding, "%10p" % a("\xc2\xa1"))
+    assert_strenc('"\xC2\xA1"', inspect_encoding, "%10p" % e("\xc2\xa1"))
+    #assert_strenc("       \"\xC2\xA1\"", inspect_encoding, "%10p" % s("\xc2\xa1"))
+    assert_strenc("       \"\xC2\xA1\"", inspect_encoding, "%10p" % u("\xc2\xa1"))
 
-    assert_strenc('"\x00"', 'ASCII-8BIT', a("%p") % a("\x00"))
-    assert_strenc('"\x00"', 'EUC-JP', e("%p") % e("\x00"))
-    assert_strenc('"\x00"', 'Windows-31J', s("%p") % s("\x00"))
-    assert_strenc('"\x00"', 'UTF-8', u("%p") % u("\x00"))
+    assert_strenc('"\x00"', inspect_encoding, a("%p") % a("\x00"))
+    assert_strenc('"\x00"', inspect_encoding, e("%p") % e("\x00"))
+    assert_strenc('"\x00"', inspect_encoding, s("%p") % s("\x00"))
+    assert_strenc('"\u0000"', inspect_encoding, u("%p") % u("\x00"))
   end
 
   def test_sprintf_s
@@ -1176,8 +1185,8 @@
     assert_equal(Encoding::US_ASCII, [].to_s.encoding)
     assert_equal(Encoding::US_ASCII, [nil].to_s.encoding)
     assert_equal(Encoding::US_ASCII, [1].to_s.encoding)
-    assert_equal(Encoding::US_ASCII, [""].to_s.encoding)
-    assert_equal(Encoding::US_ASCII, ["a"].to_s.encoding)
+    assert_equal(inspect_encoding, [""].to_s.encoding)
+    assert_equal(inspect_encoding, ["a"].to_s.encoding)
     assert_equal(Encoding::US_ASCII, [nil,1,"","a","\x20",[]].to_s.encoding)
   end
 

--
ML: ruby-changes@q...
Info: http://www.atdot.net/~ko1/quickml/

[前][次][番号順一覧][スレッド一覧]