ruby-changes:4151

matz	2008-02-29 22:33:26 +0900 (Fri, 29 Feb 2008)

  New Revision: 15641

  Modified files:
    trunk/ChangeLog
    trunk/string.c
    trunk/test/ruby/test_string.rb
    trunk/test/ruby/test_utf16.rb

  Log:
    * string.c (rb_str_chomp_bang): now works on UTF-16.
    
    * string.c (tr_setup_table): negation should work on non ASCII
      compatible strings as well.
    
    * string.c (rb_str_split_m): awk split should work on non ASCII
      compatible strings as well.

  http://svn.ruby-lang.org/cgi-bin/viewvc.cgi/trunk/test/ruby/test_string.rb?r1=15641&r2=15640&diff_format=u
  http://svn.ruby-lang.org/cgi-bin/viewvc.cgi/trunk/string.c?r1=15641&r2=15640&diff_format=u
  http://svn.ruby-lang.org/cgi-bin/viewvc.cgi/trunk/ChangeLog?r1=15641&r2=15640&diff_format=u
  http://svn.ruby-lang.org/cgi-bin/viewvc.cgi/trunk/test/ruby/test_utf16.rb?r1=15641&r2=15640&diff_format=u

Index: ChangeLog
===================================================================
--- ChangeLog	(revision 15640)
+++ ChangeLog	(revision 15641)
@@ -1,3 +1,13 @@
+Fri Feb 29 20:25:07 2008  Yukihiro Matsumoto  <matz@r...>
+
+	* string.c (rb_str_chomp_bang): now works on UTF-16.
+
+	* string.c (tr_setup_table): negation should work on non ASCII
+	  compatible strings as well.
+
+	* string.c (rb_str_split_m): awk split should work on non ASCII
+	  compatible strings as well.
+
 Fri Feb 29 18:08:43 2008  Yukihiro Matsumoto  <matz@r...>
 
 	* time.c (time_strftime): format should be ascii compatible.
Index: string.c
===================================================================
--- string.c	(revision 15640)
+++ string.c	(revision 15641)
@@ -4453,9 +4453,20 @@
     tr.p = RSTRING_PTR(str); tr.pend = tr.p + RSTRING_LEN(str);
     tr.gen = tr.now = tr.max = 0;
     
-    if (RSTRING_LEN(str) > 1 && RSTRING_PTR(str)[0] == '^') {
-	cflag = 1;
-	tr.p++;
+    if (RSTRING_LEN(str) > 1) {
+	if (rb_enc_asciicompat(enc)) {
+	    if (RSTRING_PTR(str)[0] == '^') {
+		cflag = 1;
+		tr.p++;
+	    }
+	}
+	else {
+	    c = rb_enc_codepoint(RSTRING_PTR(str), RSTRING_END(str), enc);
+	    if (c == '^') {
+		cflag = 1;
+		tr.p+=rb_enc_codelen(c, enc);
+	    }
+	}
     }
     if (first) {
 	for (i=0; i<256; i++) {
@@ -4838,11 +4849,21 @@
     }
     else {
       fs_set:
-	if (TYPE(spat) == T_STRING && RSTRING_LEN(spat) == 1) {
-	    if (RSTRING_PTR(spat)[0] == ' ') {
-		awk_split = Qtrue;
+	if (TYPE(spat) == T_STRING) {
+	    rb_encoding *enc2 = STR_ENC_GET(spat);
+
+	    if (rb_enc_mbminlen(enc2) == 1) {
+		if (RSTRING_LEN(spat) == 1 && RSTRING_PTR(spat)[0] == ' '){
+		    awk_split = Qtrue;
+		}
 	    }
 	    else {
+		if (str_strlen(spat, enc2) == 1 &&
+		    rb_enc_codepoint(RSTRING_PTR(spat), RSTRING_END(spat), enc2) == ' ') {
+		    awk_split = Qtrue;
+		}
+	    }
+	    if (!awk_split) {
 		spat = rb_reg_regcomp(rb_reg_quote(spat));
 	    }
 	}
@@ -5266,27 +5287,49 @@
     char *p, *pp, *e;
     long len, rslen;
 
+    len = RSTRING_LEN(str);
+    if (len == 0) return Qnil;
+    p = RSTRING_PTR(str);
+    e = p + len;
     if (rb_scan_args(argc, argv, "01", &rs) == 0) {
-	len = RSTRING_LEN(str);
-	if (len == 0) return Qnil;
-	p = RSTRING_PTR(str);
 	rs = rb_rs;
 	if (rs == rb_default_rs) {
 	  smart_chomp:
-	    rb_enc_check(str, rs);
 	    rb_str_modify(str);
-	    if (RSTRING_PTR(str)[len-1] == '\n') {
-		STR_DEC_LEN(str);
-		if (RSTRING_LEN(str) > 0 &&
-		    RSTRING_PTR(str)[RSTRING_LEN(str)-1] == '\r') {
-		    STR_DEC_LEN(str);
+	    enc = rb_enc_get(str);
+	    if (rb_enc_mbminlen(enc) > 1) {
+		len = str_strlen(str, enc);
+		pp = rb_enc_nth(p, e, len-1, enc);
+		if (rb_enc_is_newline(pp, e, enc)) {
+		    e = pp;
+		    len--;
 		}
+		if (len > 0) {
+		    p = rb_enc_nth(p, e, len-1, enc);
+		    if (rb_enc_codepoint(p, e, enc) == '\r') {
+			pp = e = p;
+		    }
+		}
+		if (e == RSTRING_END(str)) {
+		    return Qnil;
+		}
+		len = pp - RSTRING_PTR(str);
+		STR_SET_LEN(str, len);
 	    }
-	    else if (RSTRING_PTR(str)[len-1] == '\r') {
-		STR_DEC_LEN(str);
-	    }
 	    else {
-		return Qnil;
+		if (RSTRING_PTR(str)[len-1] == '\n') {
+		    STR_DEC_LEN(str);
+		    if (RSTRING_LEN(str) > 0 &&
+			RSTRING_PTR(str)[RSTRING_LEN(str)-1] == '\r') {
+			STR_DEC_LEN(str);
+		    }
+		}
+		else if (RSTRING_PTR(str)[len-1] == '\r') {
+		    STR_DEC_LEN(str);
+		}
+		else {
+		    return Qnil;
+		}
 	    }
 	    RSTRING_PTR(str)[RSTRING_LEN(str)] = '\0';
 	    return str;
@@ -5294,10 +5337,6 @@
     }
     if (NIL_P(rs)) return Qnil;
     StringValue(rs);
-    enc = rb_enc_check(str, rs);
-    len = RSTRING_LEN(str);
-    if (len == 0) return Qnil;
-    p = RSTRING_PTR(str);
     rslen = RSTRING_LEN(rs);
     if (rslen == 0) {
 	while (len>0 && p[len-1] == '\n') {
@@ -5321,8 +5360,8 @@
     if (is_broken_string(rs)) {
 	return Qnil;
     }
-    e = p + len;
     pp = e - rslen;
+    enc = rb_enc_check(str, rs);
     if (p[len-1] == newline &&
 	(rslen <= 1 ||
 	 memcmp(RSTRING_PTR(rs), pp, rslen) == 0)) {
Index: test/ruby/test_utf16.rb
===================================================================
--- test/ruby/test_utf16.rb	(revision 15640)
+++ test/ruby/test_utf16.rb	(revision 15641)
@@ -232,9 +232,11 @@
 
   def test_chomp
     s = "\1\n".force_encoding("utf-16be")
-    assert_raise(ArgumentError, "#{encdump s}.chomp") {
-      s.chomp
-    }
+    assert_equal(s, s.chomp, "#{encdump s}.chomp")
+    s = "\0\n".force_encoding("utf-16be")
+    assert_equal("", s.chomp, "#{encdump s}.chomp")
+    s = "\0\r\0\n".force_encoding("utf-16be")
+    assert_equal("", s.chomp, "#{encdump s}.chomp")
   end
 
   def test_succ
Index: test/ruby/test_string.rb
===================================================================
--- test/ruby/test_string.rb	(revision 15640)
+++ test/ruby/test_string.rb	(revision 15641)
@@ -280,6 +280,7 @@
   def test_chomp
     assert_equal(S("hello"), S("hello").chomp("\n"))
     assert_equal(S("hello"), S("hello\n").chomp("\n"))
+    save = $/
 
     $/ = "\n"
 
@@ -289,7 +290,7 @@
     $/ = "!"
     assert_equal(S("hello"), S("hello").chomp)
     assert_equal(S("hello"), S("hello!").chomp)
-    $/ = "\n"
+    $/ = save
   end
 
   def test_chomp!
@@ -302,6 +303,7 @@
     a = S("hello\n")
     a.chomp!(S("\n"))
     assert_equal(S("hello"), a)
+    save = $/
 
     $/ = "\n"
     a = S("hello")
@@ -321,7 +323,7 @@
     a.chomp!
     assert_equal(S("hello"), a)
 
-    $/ = "\n"
+    $/ = save
 
     a = S("hello\n")
     b = a.dup
@@ -474,6 +476,7 @@
   end
 
   def test_each
+    save = $/
     $/ = "\n"
     res=[]
     S("hello\nworld").lines.each {|x| res << x}
@@ -490,7 +493,7 @@
     S("hello!world").lines.each {|x| res << x}
     assert_equal(S("hello!"), res[0])
     assert_equal(S("world"),  res[1])
-    $/ = "\n"
+    $/ = save
   end
 
   def test_each_byte
@@ -502,6 +505,7 @@
   end
 
   def test_each_line
+    save = $/
     $/ = "\n"
     res=[]
     S("hello\nworld").lines.each {|x| res << x}
@@ -520,7 +524,7 @@
     assert_equal(S("hello!"), res[0])
     assert_equal(S("world"),  res[1])
     
-    $/ = "\n"
+    $/ = save
   end
 
   def test_empty?

--
ML: ruby-changes@q...
Info: http://www.atdot.net/~ko1/quickml/