ruby-changes:4151
From: ko1@a...
Date: Fri, 29 Feb 2008 22:33:47 +0900 (JST)
Subject: [ruby-changes:4151] matz - Ruby:r15641 (trunk): * string.c (rb_str_chomp_bang): now works on UTF-16.
matz 2008-02-29 22:33:26 +0900 (Fri, 29 Feb 2008)
New Revision: 15641
Modified files:
trunk/ChangeLog
trunk/string.c
trunk/test/ruby/test_string.rb
trunk/test/ruby/test_utf16.rb
Log:
* string.c (rb_str_chomp_bang): now works on UTF-16.
* string.c (tr_setup_table): negation should work on non ASCII
compatible strings as well.
* string.c (rb_str_split_m): awk split should work on non ASCII
compatible strings as well.
http://svn.ruby-lang.org/cgi-bin/viewvc.cgi/trunk/test/ruby/test_string.rb?r1=15641&r2=15640&diff_format=u
http://svn.ruby-lang.org/cgi-bin/viewvc.cgi/trunk/string.c?r1=15641&r2=15640&diff_format=u
http://svn.ruby-lang.org/cgi-bin/viewvc.cgi/trunk/ChangeLog?r1=15641&r2=15640&diff_format=u
http://svn.ruby-lang.org/cgi-bin/viewvc.cgi/trunk/test/ruby/test_utf16.rb?r1=15641&r2=15640&diff_format=u
Index: ChangeLog
===================================================================
--- ChangeLog (revision 15640)
+++ ChangeLog (revision 15641)
@@ -1,3 +1,13 @@
+Fri Feb 29 20:25:07 2008 Yukihiro Matsumoto <matz@r...>
+
+ * string.c (rb_str_chomp_bang): now works on UTF-16.
+
+ * string.c (tr_setup_table): negation should work on non ASCII
+ compatible strings as well.
+
+ * string.c (rb_str_split_m): awk split should work on non ASCII
+ compatible strings as well.
+
Fri Feb 29 18:08:43 2008 Yukihiro Matsumoto <matz@r...>
* time.c (time_strftime): format should be ascii compatible.
Index: string.c
===================================================================
--- string.c (revision 15640)
+++ string.c (revision 15641)
@@ -4453,9 +4453,20 @@
tr.p = RSTRING_PTR(str); tr.pend = tr.p + RSTRING_LEN(str);
tr.gen = tr.now = tr.max = 0;
- if (RSTRING_LEN(str) > 1 && RSTRING_PTR(str)[0] == '^') {
- cflag = 1;
- tr.p++;
+ if (RSTRING_LEN(str) > 1) {
+ if (rb_enc_asciicompat(enc)) {
+ if (RSTRING_PTR(str)[0] == '^') {
+ cflag = 1;
+ tr.p++;
+ }
+ }
+ else {
+ c = rb_enc_codepoint(RSTRING_PTR(str), RSTRING_END(str), enc);
+ if (c == '^') {
+ cflag = 1;
+ tr.p+=rb_enc_codelen(c, enc);
+ }
+ }
}
if (first) {
for (i=0; i<256; i++) {
@@ -4838,11 +4849,21 @@
}
else {
fs_set:
- if (TYPE(spat) == T_STRING && RSTRING_LEN(spat) == 1) {
- if (RSTRING_PTR(spat)[0] == ' ') {
- awk_split = Qtrue;
+ if (TYPE(spat) == T_STRING) {
+ rb_encoding *enc2 = STR_ENC_GET(spat);
+
+ if (rb_enc_mbminlen(enc2) == 1) {
+ if (RSTRING_LEN(spat) == 1 && RSTRING_PTR(spat)[0] == ' '){
+ awk_split = Qtrue;
+ }
}
else {
+ if (str_strlen(spat, enc2) == 1 &&
+ rb_enc_codepoint(RSTRING_PTR(spat), RSTRING_END(spat), enc2) == ' ') {
+ awk_split = Qtrue;
+ }
+ }
+ if (!awk_split) {
spat = rb_reg_regcomp(rb_reg_quote(spat));
}
}
@@ -5266,27 +5287,49 @@
char *p, *pp, *e;
long len, rslen;
+ len = RSTRING_LEN(str);
+ if (len == 0) return Qnil;
+ p = RSTRING_PTR(str);
+ e = p + len;
if (rb_scan_args(argc, argv, "01", &rs) == 0) {
- len = RSTRING_LEN(str);
- if (len == 0) return Qnil;
- p = RSTRING_PTR(str);
rs = rb_rs;
if (rs == rb_default_rs) {
smart_chomp:
- rb_enc_check(str, rs);
rb_str_modify(str);
- if (RSTRING_PTR(str)[len-1] == '\n') {
- STR_DEC_LEN(str);
- if (RSTRING_LEN(str) > 0 &&
- RSTRING_PTR(str)[RSTRING_LEN(str)-1] == '\r') {
- STR_DEC_LEN(str);
+ enc = rb_enc_get(str);
+ if (rb_enc_mbminlen(enc) > 1) {
+ len = str_strlen(str, enc);
+ pp = rb_enc_nth(p, e, len-1, enc);
+ if (rb_enc_is_newline(pp, e, enc)) {
+ e = pp;
+ len--;
}
+ if (len > 0) {
+ p = rb_enc_nth(p, e, len-1, enc);
+ if (rb_enc_codepoint(p, e, enc) == '\r') {
+ pp = e = p;
+ }
+ }
+ if (e == RSTRING_END(str)) {
+ return Qnil;
+ }
+ len = pp - RSTRING_PTR(str);
+ STR_SET_LEN(str, len);
}
- else if (RSTRING_PTR(str)[len-1] == '\r') {
- STR_DEC_LEN(str);
- }
else {
- return Qnil;
+ if (RSTRING_PTR(str)[len-1] == '\n') {
+ STR_DEC_LEN(str);
+ if (RSTRING_LEN(str) > 0 &&
+ RSTRING_PTR(str)[RSTRING_LEN(str)-1] == '\r') {
+ STR_DEC_LEN(str);
+ }
+ }
+ else if (RSTRING_PTR(str)[len-1] == '\r') {
+ STR_DEC_LEN(str);
+ }
+ else {
+ return Qnil;
+ }
}
RSTRING_PTR(str)[RSTRING_LEN(str)] = '\0';
return str;
@@ -5294,10 +5337,6 @@
}
if (NIL_P(rs)) return Qnil;
StringValue(rs);
- enc = rb_enc_check(str, rs);
- len = RSTRING_LEN(str);
- if (len == 0) return Qnil;
- p = RSTRING_PTR(str);
rslen = RSTRING_LEN(rs);
if (rslen == 0) {
while (len>0 && p[len-1] == '\n') {
@@ -5321,8 +5360,8 @@
if (is_broken_string(rs)) {
return Qnil;
}
- e = p + len;
pp = e - rslen;
+ enc = rb_enc_check(str, rs);
if (p[len-1] == newline &&
(rslen <= 1 ||
memcmp(RSTRING_PTR(rs), pp, rslen) == 0)) {
Index: test/ruby/test_utf16.rb
===================================================================
--- test/ruby/test_utf16.rb (revision 15640)
+++ test/ruby/test_utf16.rb (revision 15641)
@@ -232,9 +232,11 @@
def test_chomp
s = "\1\n".force_encoding("utf-16be")
- assert_raise(ArgumentError, "#{encdump s}.chomp") {
- s.chomp
- }
+ assert_equal(s, s.chomp, "#{encdump s}.chomp")
+ s = "\0\n".force_encoding("utf-16be")
+ assert_equal("", s.chomp, "#{encdump s}.chomp")
+ s = "\0\r\0\n".force_encoding("utf-16be")
+ assert_equal("", s.chomp, "#{encdump s}.chomp")
end
def test_succ
Index: test/ruby/test_string.rb
===================================================================
--- test/ruby/test_string.rb (revision 15640)
+++ test/ruby/test_string.rb (revision 15641)
@@ -280,6 +280,7 @@
def test_chomp
assert_equal(S("hello"), S("hello").chomp("\n"))
assert_equal(S("hello"), S("hello\n").chomp("\n"))
+ save = $/
$/ = "\n"
@@ -289,7 +290,7 @@
$/ = "!"
assert_equal(S("hello"), S("hello").chomp)
assert_equal(S("hello"), S("hello!").chomp)
- $/ = "\n"
+ $/ = save
end
def test_chomp!
@@ -302,6 +303,7 @@
a = S("hello\n")
a.chomp!(S("\n"))
assert_equal(S("hello"), a)
+ save = $/
$/ = "\n"
a = S("hello")
@@ -321,7 +323,7 @@
a.chomp!
assert_equal(S("hello"), a)
- $/ = "\n"
+ $/ = save
a = S("hello\n")
b = a.dup
@@ -474,6 +476,7 @@
end
def test_each
+ save = $/
$/ = "\n"
res=[]
S("hello\nworld").lines.each {|x| res << x}
@@ -490,7 +493,7 @@
S("hello!world").lines.each {|x| res << x}
assert_equal(S("hello!"), res[0])
assert_equal(S("world"), res[1])
- $/ = "\n"
+ $/ = save
end
def test_each_byte
@@ -502,6 +505,7 @@
end
def test_each_line
+ save = $/
$/ = "\n"
res=[]
S("hello\nworld").lines.each {|x| res << x}
@@ -520,7 +524,7 @@
assert_equal(S("hello!"), res[0])
assert_equal(S("world"), res[1])
- $/ = "\n"
+ $/ = save
end
def test_empty?
--
ML: ruby-changes@q...
Info: http://www.atdot.net/~ko1/quickml/