[前][次][番号順一覧][スレッド一覧]

ruby-changes:7843

From: matz <ko1@a...>
Date: Mon, 15 Sep 2008 23:40:21 +0900 (JST)
Subject: [ruby-changes:7843] Ruby:r19364 (trunk): * string.c (rb_str_squeeze_bang): specialized for 7bit characters in

matz	2008-09-15 23:40:00 +0900 (Mon, 15 Sep 2008)

  New Revision: 19364

  http://svn.ruby-lang.org/cgi-bin/viewvc.cgi?view=rev&revision=19364

  Log:
    * string.c (rb_str_squeeze_bang): specialized for 7bit characters in
      ASCII compatible strings.
    
    * string.c (rb_str_count): ditto.
    
    * string.c (tr_trans): preserve 7bit/valid coderange flag.
    
    * string.c (rb_str_squeeze_bang): preserve previous coderange value.
    
    * string.c (rb_str_lstrip_bang): ditto.
    
    * string.c (rb_str_rstrip_bang): ditto.
    
    * encoding.c (rb_default_external_encoding): preserve
      default_external_encoding in a static variable.
    
    * string.c (single_byte_optimizable): check coderange first, to
      reduce number of calling rb_enc_from_index().

  Modified files:
    trunk/ChangeLog
    trunk/encoding.c
    trunk/string.c

Index: encoding.c
===================================================================
--- encoding.c	(revision 19363)
+++ encoding.c	(revision 19364)
@@ -997,11 +997,15 @@
 }
 
 static int default_external_index;
+static rb_encoding *default_external;
 
 rb_encoding *
 rb_default_external_encoding(void)
 {
-    return rb_enc_from_index(default_external_index);
+    if (!default_external) {
+	default_external = rb_enc_from_index(default_external_index);
+    }
+    return default_external;
 }
 
 VALUE
@@ -1028,6 +1032,7 @@
 rb_enc_set_default_external(VALUE encoding)
 {
     default_external_index = rb_enc_to_index(rb_to_encoding(encoding));
+    default_external = 0;
 }
 
 /*
Index: ChangeLog
===================================================================
--- ChangeLog	(revision 19363)
+++ ChangeLog	(revision 19364)
@@ -9,6 +9,27 @@
 	* io.c (rb_scan_open_args): call FilePathValue before encoding
 	  conversion.
 
+Mon Sep 15 22:11:07 2008  Yukihiro Matsumoto  <matz@r...>
+
+	* string.c (rb_str_squeeze_bang): specialized for 7bit characters in
+	  ASCII compatible strings.
+
+	* string.c (rb_str_count): ditto.
+
+	* string.c (tr_trans): preserve 7bit/valid coderange flag.
+
+	* string.c (rb_str_squeeze_bang): preserve previous coderange value.
+
+	* string.c (rb_str_lstrip_bang): ditto.
+
+	* string.c (rb_str_rstrip_bang): ditto.
+
+	* encoding.c (rb_default_external_encoding): preserve
+	  default_external_encoding in a static variable.
+
+	* string.c (single_byte_optimizable): check coderange first, to
+	  reduce number of calling rb_enc_from_index().
+
 Mon Sep 15 20:57:00 2008  Yuki Sonoda (Yugui)  <yugui@y...>
 
 	* lib/matrix.rb (Matrix#eql?): fixed [ruby-dev:36298].
Index: string.c
===================================================================
--- string.c	(revision 19363)
+++ string.c	(revision 19364)
@@ -115,15 +115,16 @@
 static int
 single_byte_optimizable(VALUE str)
 {
-    rb_encoding *enc = STR_ENC_GET(str);
+    rb_encoding *enc;
 
-    if (rb_enc_mbmaxlen(enc) == 1)
-        return 1;
-
     /* Conservative.  It may be ENC_CODERANGE_UNKNOWN. */
     if (ENC_CODERANGE(str) == ENC_CODERANGE_7BIT)
         return 1;
 
+    enc = STR_ENC_GET(str);
+    if (rb_enc_mbmaxlen(enc) == 1)
+        return 1;
+
     /* Conservative.  Possibly single byte.
      * "\xa1" in Shift_JIS for example. */
     return 0;
@@ -4325,6 +4326,7 @@
     char *s, *send;
     VALUE hash = 0;
     int singlebyte = single_byte_optimizable(str);
+    int cr, cr1, cr2;
 
     StringValue(src);
     StringValue(repl);
@@ -4333,6 +4335,12 @@
 	return rb_str_delete_bang(1, &src, str);
     }
 
+    cr = ENC_CODERANGE(str);
+    cr1 = ENC_CODERANGE(src);
+    cr2 = ENC_CODERANGE(repl);
+    if (cr != cr1 || cr1 != cr2) {
+	cr = ENC_CODERANGE_UNKNOWN;
+    }
     e1 = rb_enc_check(str, src);
     e2 = rb_enc_check(str, repl);
     if (e1 == e2) {
@@ -4517,6 +4525,7 @@
 	RSTRING(str)->as.heap.aux.capa = max;
     }
     
+    ENC_CODERANGE_SET(str, cr);
     if (modify) {
 	rb_enc_associate(str, enc);
 	return str;
@@ -4738,6 +4747,8 @@
     char *s, *send, *t;
     int save, modify = 0;
     int i;
+    int ascompat, singlebyte = single_byte_optimizable(str);
+    int cr = ENC_CODERANGE(str);
 
     if (argc == 0) {
 	enc = STR_ENC_GET(str);
@@ -4757,29 +4768,53 @@
     if (!s || RSTRING_LEN(str) == 0) return Qnil;
     send = RSTRING_END(str);
     save = -1;
-    while (s < send) {
-	unsigned int c = rb_enc_codepoint(s, send, enc);
-	int clen = rb_enc_codelen(c, enc);
+    ascompat = rb_enc_asciicompat(enc);
 
-	if (c != save || (argc > 0 && !tr_find(c, squeez, del, nodel))) {
-	    if (t != s) rb_enc_mbcput(c, t, enc);
-	    save = c;
-	    t += clen;
+    if (singlebyte) {
+        while (s < send) {
+	    unsigned int c = *(unsigned char*)s++;
+	    if (c != save || (argc > 0 && !squeez[c])) {
+	        *t++ = save = c;
+	    }
 	}
-	s += clen;
+    } else {
+	while (s < send) {
+	    unsigned int c;
+	    int clen;
+
+	    if (ascompat && (c = *(unsigned char*)s) < 0x80) {
+		if (c != save || (argc > 0 && !squeez[c])) {
+		    *t++ = save = c;
+		}
+		s++;
+	    }
+	    else {
+		c = rb_enc_codepoint(s, send, enc);
+		clen = rb_enc_codelen(c, enc);
+
+		if (c != save || (argc > 0 && !tr_find(c, squeez, del, nodel))) {
+		    if (t != s) rb_enc_mbcput(c, t, enc);
+		    save = c;
+		    t += clen;
+		}
+		s += clen;
+	    }
+	}
     }
+
     *t = '\0';
     if (t - RSTRING_PTR(str) != RSTRING_LEN(str)) {
 	STR_SET_LEN(str, t - RSTRING_PTR(str));
 	modify = 1;
     }
 
+    ENC_CODERANGE_SET(str, cr);
     if (modify) return str;
     return Qnil;
 }
 
 
-/*
+/* 
  *  call-seq:
  *     str.squeeze([other_str]*)    => new_str
  *  
@@ -4864,6 +4899,7 @@
     VALUE del = 0, nodel = 0;
     char *s, *send;
     int i;
+    int ascompat;
 
     if (argc < 1) {
 	rb_raise(rb_eArgError, "wrong number of arguments");
@@ -4873,22 +4909,36 @@
 
 	StringValue(s);
 	enc = rb_enc_check(str, s);
-	tr_setup_table(s, table,i==0, &del, &nodel, enc);
+	tr_setup_table(s, table, i==0, &del, &nodel, enc);
     }
 
     s = RSTRING_PTR(str);
     if (!s || RSTRING_LEN(str) == 0) return INT2FIX(0);
     send = RSTRING_END(str);
     i = 0;
+    ascompat = rb_enc_asciicompat(enc);
+
     while (s < send) {
-	unsigned int c = rb_enc_codepoint(s, send, enc);
-	int clen = rb_enc_codelen(c, enc);
+	unsigned int c;
+	int clen;
 
-	if (tr_find(c, table, del, nodel)) {
-	    i++;
+	if (ascompat && (c = *(unsigned char*)s) < 0x80) {
+	    clen = 1;
+	    if (table[c]) {
+		i++;
+	    }
+	    s++;
 	}
-	s += clen;
+	else {
+	    c = rb_enc_codepoint(s, send, enc);
+	    clen = rb_enc_codelen(c, enc);
+	    if (tr_find(c, table, del, nodel)) {
+		i++;
+	    }
+	    s += clen;
+	}
     }
+
     return INT2NUM(i);
 }
 
@@ -5549,8 +5599,10 @@
 {
     rb_encoding *enc;
     char *s, *t, *e;
+    int cr = ENC_CODERANGE(str);
 
     rb_str_modify(str);
+    ENC_CODERANGE_SET(str, cr);
     enc = STR_ENC_GET(str);
     s = RSTRING_PTR(str);
     if (!s || RSTRING_LEN(str) == 0) return Qnil;
@@ -5612,27 +5664,38 @@
     rb_encoding *enc;
     char *s, *t, *e;
     int space_seen = Qfalse;
+    int cr = ENC_CODERANGE(str);
 
     rb_str_modify(str);
+    ENC_CODERANGE_SET(str, cr);
     enc = STR_ENC_GET(str);
     s = RSTRING_PTR(str);
     if (!s || RSTRING_LEN(str) == 0) return Qnil;
     t = e = RSTRING_END(str);
-    while (s < e) {
-	unsigned int cc = rb_enc_codepoint(s, e, enc);
 
-	if (!cc || rb_enc_isspace(cc, enc)) {
-	    if (!space_seen) t = s;
-	    space_seen = Qtrue;
+    if (single_byte_optimizable(str)) {
+        /* remove trailing '\0's */
+        while (s < t && t[-1] == '\0') t--;
+      
+	/* remove trailing spaces */
+	while (s < t && rb_enc_isspace(*(t-1), enc)) t--;
+    } else {
+        while (s < e) {
+	    unsigned int cc = rb_enc_codepoint(s, e, enc);
+
+	    if (!cc || rb_enc_isspace(cc, enc)) {
+	        if (!space_seen) t = s;
+		space_seen = Qtrue;
+	    }
+	    else {
+	        space_seen = Qfalse;
+	    }
+	    s += rb_enc_codelen(cc, enc);
 	}
-	else {
-	    space_seen = Qfalse;
-	}
-	s += rb_enc_codelen(cc, enc);
+	if (!space_seen) t = s;
     }
-    if (!space_seen) t = s;
+
     if (t < e) {
-	rb_str_modify(str);
 	STR_SET_LEN(str, t-RSTRING_PTR(str));
 	RSTRING_PTR(str)[RSTRING_LEN(str)] = '\0';
 	return str;

--
ML: ruby-changes@q...
Info: http://www.atdot.net/~ko1/quickml/

[前][次][番号順一覧][スレッド一覧]