ruby-changes:36196
From: nobu <ko1@a...>
Date: Wed, 5 Nov 2014 12:14:04 +0900 (JST)
Subject: [ruby-changes:36196] nobu:r48277 (trunk): string.c: reduce memory copy
nobu 2014-11-05 12:13:55 +0900 (Wed, 05 Nov 2014) New Revision: 48277 http://svn.ruby-lang.org/cgi-bin/viewvc.cgi?view=revision&revision=48277 Log: string.c: reduce memory copy * string.c (rb_str_lstrip, rb_str_strip): reduce memory copy by copying necessary part only. * string.c (rb_str_strip_bang, rb_str_strip): ditto. Modified files: trunk/ChangeLog trunk/string.c Index: ChangeLog =================================================================== --- ChangeLog (revision 48276) +++ ChangeLog (revision 48277) @@ -1,3 +1,17 @@ https://github.com/ruby/ruby/blob/trunk/ChangeLog#L1 +Wed Nov 5 12:13:54 2014 Nobuyoshi Nakada <nobu@r...> + + * string.c (rb_str_lstrip, rb_str_strip): reduce memory copy by + copying necessary part only. + + * string.c (rb_str_strip_bang, rb_str_strip): ditto. + +Wed Nov 5 12:13:48 2014 Nobuyoshi Nakada <nobu@r...> + + * string.c (rb_str_lstrip, rb_str_strip): reduce memory copy by + copying necessary part only. + + * string.c (rb_str_strip_bang, rb_str_strip): ditto. + Wed Nov 5 10:54:19 2014 Nobuyoshi Nakada <nobu@r...> * string.c (rb_str_lstrip_bang, rb_str_rstrip_bang): terminate Index: string.c =================================================================== --- string.c (revision 48276) +++ string.c (revision 48277) @@ -7182,6 +7182,23 @@ rb_str_chomp(int argc, VALUE *argv, VALU https://github.com/ruby/ruby/blob/trunk/string.c#L7182 return str; } +static long +lstrip_offset(VALUE str, const char *s, const char *e, rb_encoding *enc) +{ + const char *const start = s; + + if (!s || s >= e) return 0; + /* remove spaces at head */ + while (s < e) { + int n; + unsigned int cc = rb_enc_codepoint_len(s, e, &n, enc); + + if (!rb_isspace(cc)) break; + s += n; + } + return s - start; +} + /* * call-seq: * str.lstrip! -> self or nil @@ -7198,24 +7215,16 @@ static VALUE https://github.com/ruby/ruby/blob/trunk/string.c#L7215 rb_str_lstrip_bang(VALUE str) { rb_encoding *enc; - char *start, *s, *t, *e; + char *start, *s; + long olen, loffset; str_modify_keep_cr(str); enc = STR_ENC_GET(str); - start = s = RSTRING_PTR(str); - if (!s || RSTRING_LEN(str) == 0) return Qnil; - e = t = RSTRING_END(str); - /* remove spaces at head */ - while (s < e) { - int n; - unsigned int cc = rb_enc_codepoint_len(s, e, &n, enc); - - if (!rb_isspace(cc)) break; - s += n; - } - - if (s > RSTRING_PTR(str)) { - long len = t - s; + RSTRING_GETMEM(str, start, olen); + loffset = lstrip_offset(str, start, start+olen, enc); + if (loffset > 0) { + long len = olen-loffset; + s = start + loffset; memmove(start, s, len); STR_SET_LEN(str, len); TERM_FILL(start+len, rb_enc_mbminlen(enc)); @@ -7239,11 +7248,39 @@ rb_str_lstrip_bang(VALUE str) https://github.com/ruby/ruby/blob/trunk/string.c#L7248 static VALUE rb_str_lstrip(VALUE str) { - str = rb_str_dup(str); - rb_str_lstrip_bang(str); - return str; + char *start; + long len, loffset; + RSTRING_GETMEM(str, start, len); + loffset = lstrip_offset(str, start, start+len, STR_ENC_GET(str)); + if (loffset <= 0) return rb_str_dup(str); + return rb_str_subseq(str, loffset, len - loffset); } +static long +rstrip_offset(VALUE str, const char *s, const char *e, rb_encoding *enc) +{ + const char *t; + + rb_str_check_dummy_enc(enc); + if (!s || s >= e) return 0; + t = e; + + /* remove trailing spaces or '\0's */ + if (single_byte_optimizable(str)) { + unsigned char c; + while (s < t && ((c = *(t-1)) == '\0' || ascii_isspace(c))) t--; + } + else { + char *tp; + + while ((tp = rb_enc_prev_char(s, t, e, enc)) != NULL) { + unsigned int c = rb_enc_codepoint(tp, e, enc); + if (c && !rb_isspace(c)) break; + t = tp; + } + } + return e - t; +} /* * call-seq: @@ -7261,31 +7298,15 @@ static VALUE https://github.com/ruby/ruby/blob/trunk/string.c#L7298 rb_str_rstrip_bang(VALUE str) { rb_encoding *enc; - char *start, *s, *t, *e; + char *start; + long olen, roffset; str_modify_keep_cr(str); enc = STR_ENC_GET(str); - rb_str_check_dummy_enc(enc); - start = s = RSTRING_PTR(str); - if (!s || RSTRING_LEN(str) == 0) return Qnil; - t = e = RSTRING_END(str); - - /* remove trailing spaces or '\0's */ - if (single_byte_optimizable(str)) { - unsigned char c; - while (s < t && ((c = *(t-1)) == '\0' || ascii_isspace(c))) t--; - } - else { - char *tp; - - while ((tp = rb_enc_prev_char(s, t, e, enc)) != NULL) { - unsigned int c = rb_enc_codepoint(tp, e, enc); - if (c && !rb_isspace(c)) break; - t = tp; - } - } - if (t < e) { - long len = t-start; + RSTRING_GETMEM(str, start, olen); + roffset = rstrip_offset(str, start, start+olen, enc); + if (roffset > 0) { + long len = olen - roffset; STR_SET_LEN(str, len); TERM_FILL(start+len, rb_enc_mbminlen(enc)); @@ -7309,9 +7330,16 @@ rb_str_rstrip_bang(VALUE str) https://github.com/ruby/ruby/blob/trunk/string.c#L7330 static VALUE rb_str_rstrip(VALUE str) { - str = rb_str_dup(str); - rb_str_rstrip_bang(str); - return str; + rb_encoding *enc; + char *start; + long olen, roffset; + + enc = STR_ENC_GET(str); + RSTRING_GETMEM(str, start, olen); + roffset = rstrip_offset(str, start, start+olen, enc); + + if (roffset <= 0) return rb_str_dup(str); + return rb_str_subseq(str, 0, olen-roffset); } @@ -7326,11 +7354,27 @@ rb_str_rstrip(VALUE str) https://github.com/ruby/ruby/blob/trunk/string.c#L7354 static VALUE rb_str_strip_bang(VALUE str) { - VALUE l = rb_str_lstrip_bang(str); - VALUE r = rb_str_rstrip_bang(str); + char *start; + long olen, loffset, roffset; + rb_encoding *enc; - if (NIL_P(l) && NIL_P(r)) return Qnil; - return str; + str_modify_keep_cr(str); + enc = STR_ENC_GET(str); + RSTRING_GETMEM(str, start, olen); + loffset = lstrip_offset(str, start, start+olen, enc); + roffset = rstrip_offset(str, start+loffset, start+olen, enc); + + if (loffset > 0 || roffset > 0) { + long len = olen-roffset; + if (loffset > 0) { + len -= loffset; + memmove(start, start + loffset, len); + } + STR_SET_LEN(str, len); + TERM_FILL(start+len, rb_enc_mbminlen(enc)); + return str; + } + return Qnil; } @@ -7347,9 +7391,16 @@ rb_str_strip_bang(VALUE str) https://github.com/ruby/ruby/blob/trunk/string.c#L7391 static VALUE rb_str_strip(VALUE str) { - str = rb_str_dup(str); - rb_str_strip_bang(str); - return str; + char *start; + long olen, loffset, roffset; + rb_encoding *enc = STR_ENC_GET(str); + + RSTRING_GETMEM(str, start, olen); + loffset = lstrip_offset(str, start, start+olen, enc); + roffset = rstrip_offset(str, start+loffset, start+olen, enc); + + if (loffset <= 0 && roffset <= 0) return rb_str_dup(str); + return rb_str_subseq(str, loffset, olen-loffset-roffset); } static VALUE -- ML: ruby-changes@q... Info: http://www.atdot.net/~ko1/quickml/