ruby-changes:43255
From: duerst <ko1@a...>
Date: Wed, 8 Jun 2016 21:28:47 +0900 (JST)
Subject: [ruby-changes:43255] duerst:r55329 (trunk): * string.c: New static function rb_str_ascii_casemap; special-casing
duerst 2016-06-08 21:28:42 +0900 (Wed, 08 Jun 2016) New Revision: 55329 https://svn.ruby-lang.org/cgi-bin/viewvc.cgi?view=revision&revision=55329 Log: * string.c: New static function rb_str_ascii_casemap; special-casing :ascii option in rb_str_upcase_bang and rb_str_downcase_bang. * regenc.c: Fix a bug (wrong use of unnecessary slack at end of string). * regenc.h -> include/ruby/oniguruma.h: Move declaration of onigenc_ascii_only_case_map so that it is visible in string.c. Modified files: trunk/ChangeLog trunk/include/ruby/oniguruma.h trunk/regenc.c trunk/regenc.h trunk/string.c Index: include/ruby/oniguruma.h =================================================================== --- include/ruby/oniguruma.h (revision 55328) +++ include/ruby/oniguruma.h (revision 55329) @@ -229,6 +229,14 @@ ONIG_EXTERN const OnigEncodingType OnigE https://github.com/ruby/ruby/blob/trunk/include/ruby/oniguruma.h#L229 #define ONIG_ENCODING_UNDEF ((OnigEncoding )0) +#ifdef ONIG_CASE_MAPPING + /* this declaration needs to be here because it is used in string.c */ + ONIG_EXTERN int onigenc_ascii_only_case_map P_((OnigCaseFoldType* flagP, + const OnigUChar** pp, const OnigUChar* end, + OnigUChar* to, OnigUChar* to_end, + const struct OnigEncodingTypeST* enc)); +#endif /* ONIG_CASE_MAPPING */ + /* work size */ #define ONIGENC_CODE_TO_MBC_MAXLEN 7 Index: regenc.c =================================================================== --- regenc.c (revision 55328) +++ regenc.c (revision 55329) @@ -968,9 +968,7 @@ onigenc_ascii_only_case_map (OnigCaseFol https://github.com/ruby/ruby/blob/trunk/regenc.c#L968 OnigCaseFoldType flags = *flagP; int codepoint_length; - to_end -= 4; /* longest possible length of a single character */ - - while (*pp<end && to<=to_end) { + while (*pp<end && to<to_end) { codepoint_length = ONIGENC_PRECISE_MBC_ENC_LEN(enc, *pp, end); if (codepoint_length < 0) return codepoint_length; /* encoding invalid */ Index: ChangeLog =================================================================== --- ChangeLog (revision 55328) +++ ChangeLog (revision 55329) @@ -1,3 +1,13 @@ https://github.com/ruby/ruby/blob/trunk/ChangeLog#L1 +Wed Jun 8 21:28:36 2016 Martin Duerst <duerst@i...> + + * string.c: New static function rb_str_ascii_casemap; special-casing + :ascii option in rb_str_upcase_bang and rb_str_downcase_bang. + + * regenc.c: Fix a bug (wrong use of unnecessary slack at end of string). + + * regenc.h -> include/ruby/oniguruma.h: Move declaration of + onigenc_ascii_only_case_map so that it is visible in string.c. + Wed Jun 8 20:33:44 2016 Naohisa Goto <ngotogenome@g...> * include/ruby/intern.h: Remove excess semicolons in PUREFUNC(). Index: regenc.h =================================================================== --- regenc.h (revision 55328) +++ regenc.h (revision 55329) @@ -133,7 +133,6 @@ CONSTFUNC(ONIG_EXTERN int onigenc_not_su https://github.com/ruby/ruby/blob/trunk/regenc.h#L133 PUREFUNC(ONIG_EXTERN int onigenc_is_mbc_newline_0x0a P_((const UChar* p, const UChar* end, OnigEncoding enc))); #ifdef ONIG_CASE_MAPPING - ONIG_EXTERN int onigenc_ascii_only_case_map P_((OnigCaseFoldType* flagP, const OnigUChar** pp, const OnigUChar* end, OnigUChar* to, OnigUChar* to_end, const struct OnigEncodingTypeST* enc)); ONIG_EXTERN int onigenc_single_byte_ascii_only_case_map P_((OnigCaseFoldType* flagP, const OnigUChar** pp, const OnigUChar* end, OnigUChar* to, OnigUChar* to_end, const struct OnigEncodingTypeST* enc)); #endif /* ONIG_CASE_MAPPING */ Index: string.c =================================================================== --- string.c (revision 55328) +++ string.c (revision 55329) @@ -5833,6 +5833,29 @@ rb_str_casemap(VALUE source, OnigCaseFol https://github.com/ruby/ruby/blob/trunk/string.c#L5833 return target; } +static void +rb_str_ascii_casemap(VALUE source, OnigCaseFoldType *flags, rb_encoding *enc) +{ + OnigUChar *source_current, *source_end; + int old_length = RSTRING_LEN(source); + int length_or_invalid; + + if (old_length == 0) return; + + source_current = (OnigUChar*)RSTRING_PTR(source); + source_end = (OnigUChar*)RSTRING_END(source); + + length_or_invalid = onigenc_ascii_only_case_map(flags, + (const OnigUChar**)&source_current, source_end, + source_current, source_end, enc); + if (length_or_invalid < 0) + rb_raise(rb_eArgError, "input string invalid"); +/* if (length_or_invalid != old_length) +printf("problem with rb_str_ascii_casemap; old_length=%d, new_length=%d\n", old_length, length_or_invalid), + rb_raise(rb_eArgError, "internal problem with rb_str_ascii_casemap"); +*/ +} + /* * call-seq: * str.upcase! -> str or nil @@ -5855,7 +5878,8 @@ rb_str_upcase_bang(int argc, VALUE *argv https://github.com/ruby/ruby/blob/trunk/string.c#L5878 str_modify_keep_cr(str); enc = STR_ENC_GET(str); rb_str_check_dummy_enc(enc); - if (!(flags&ONIGENC_CASE_FOLD_TURKISH_AZERI) && ENC_CODERANGE(str)==ENC_CODERANGE_7BIT) { + if ((flags&ONIGENC_CASE_ASCII_ONLY) && (enc==rb_utf8_encoding() || rb_enc_mbmaxlen(enc)==1) + || (!(flags&ONIGENC_CASE_FOLD_TURKISH_AZERI) && ENC_CODERANGE(str)==ENC_CODERANGE_7BIT)) { char *s = RSTRING_PTR(str), *send = RSTRING_END(str); while (s < send) { @@ -5914,14 +5938,14 @@ static VALUE https://github.com/ruby/ruby/blob/trunk/string.c#L5938 rb_str_downcase_bang(int argc, VALUE *argv, VALUE str) { rb_encoding *enc; - int modify = 0; OnigCaseFoldType flags = ONIGENC_CASE_DOWNCASE; flags = check_case_options(argc, argv, flags); str_modify_keep_cr(str); enc = STR_ENC_GET(str); rb_str_check_dummy_enc(enc); - if (!(flags&ONIGENC_CASE_FOLD_TURKISH_AZERI) && ENC_CODERANGE(str)==ENC_CODERANGE_7BIT) { + if ((flags&ONIGENC_CASE_ASCII_ONLY) && (enc==rb_utf8_encoding() || rb_enc_mbmaxlen(enc)==1) + || (!(flags&ONIGENC_CASE_FOLD_TURKISH_AZERI) && ENC_CODERANGE(str)==ENC_CODERANGE_7BIT)) { char *s = RSTRING_PTR(str), *send = RSTRING_END(str); while (s < send) { @@ -5929,17 +5953,17 @@ rb_str_downcase_bang(int argc, VALUE *ar https://github.com/ruby/ruby/blob/trunk/string.c#L5953 if (rb_enc_isascii(c, enc) && 'A' <= c && c <= 'Z') { *s = 'a' + (c - 'A'); - modify = 1; + flags |= ONIGENC_CASE_MODIFIED; } s++; } } - else { + else if (flags&ONIGENC_CASE_ASCII_ONLY) + rb_str_ascii_casemap(str, &flags, enc); + else str_shared_replace(str, rb_str_casemap(str, &flags, enc)); - modify = ONIGENC_CASE_MODIFIED & flags; - } - if (modify) return str; + if (ONIGENC_CASE_MODIFIED&flags) return str; return Qnil; } -- ML: ruby-changes@q... Info: http://www.atdot.net/~ko1/quickml/