ruby-changes:43677
From: duerst <ko1@a...>
Date: Tue, 26 Jul 2016 15:54:24 +0900 (JST)
Subject: [ruby-changes:43677] duerst:r55750 (trunk): * enc/windows_1251.c, test/ruby/enc/test_case_comprehensive.rb:
duerst 2016-07-26 15:54:18 +0900 (Tue, 26 Jul 2016) New Revision: 55750 https://svn.ruby-lang.org/cgi-bin/viewvc.cgi?view=revision&revision=55750 Log: * enc/windows_1251.c, test/ruby/enc/test_case_comprehensive.rb: Implement non-ASCII case conversion for Windows-1251, by Shunsuke Sato. Modified files: trunk/ChangeLog trunk/enc/windows_1253.c trunk/test/ruby/enc/test_case_comprehensive.rb Index: enc/windows_1253.c =================================================================== --- enc/windows_1253.c (revision 55749) +++ enc/windows_1253.c (revision 55750) @@ -62,9 +62,9 @@ static const UChar EncCP1253_ToLowerCase https://github.com/ruby/ruby/blob/trunk/enc/windows_1253.c#L62 '\210', '\211', '\212', '\213', '\214', '\215', '\216', '\217', '\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227', '\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237', - '\240', '\241', '\242', '\243', '\244', '\245', '\246', '\247', + '\240', '\241', '\334', '\243', '\244', '\245', '\246', '\247', '\250', '\251', '\252', '\253', '\254', '\255', '\256', '\257', - '\260', '\261', '\262', '\263', '\264', '\265', '\334', '\267', + '\260', '\261', '\262', '\263', '\264', '\354', '\334', '\267', '\335', '\336', '\337', '\273', '\374', '\275', '\375', '\376', '\300', '\341', '\342', '\343', '\344', '\345', '\346', '\347', '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357', @@ -97,9 +97,9 @@ static const unsigned short EncCP1253_Ct https://github.com/ruby/ruby/blob/trunk/enc/windows_1253.c#L97 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, - 0x0284, 0x01a0, 0x01a0, 0x00a0, 0x0000, 0x0000, 0x00a0, 0x00a0, + 0x0284, 0x01a0, 0x34a2, 0x00a0, 0x0000, 0x0000, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x0000, 0x01a0, 0x00a0, 0x01a0, 0x0000, 0x01a0, - 0x00a0, 0x00a0, 0x10a0, 0x10a0, 0x00a0, 0x00a0, 0x34a2, 0x01a0, + 0x00a0, 0x00a0, 0x10a0, 0x10a0, 0x00a0, 0x30e2, 0x34a2, 0x01a0, 0x34a2, 0x34a2, 0x34a2, 0x01a0, 0x34a2, 0x10a0, 0x34a2, 0x34a2, 0x30e2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, @@ -198,8 +198,8 @@ apply_all_case_fold(OnigCaseFoldType fla https://github.com/ruby/ruby/blob/trunk/enc/windows_1253.c#L198 OnigEncoding enc ARG_UNUSED) { return onigenc_apply_all_case_fold_with_map( - numberof(CaseFoldMap), CaseFoldMap, 0, - flag, f, arg); + numberof(CaseFoldMap), CaseFoldMap, 0, + flag, f, arg); } static int @@ -213,6 +213,66 @@ get_case_fold_codes_by_str(OnigCaseFoldT https://github.com/ruby/ruby/blob/trunk/enc/windows_1253.c#L213 flag, p, end, items); } +static int +case_map (OnigCaseFoldType* flagP, const OnigUChar** pp, + const OnigUChar* end, OnigUChar* to, OnigUChar* to_end, + const struct OnigEncodingTypeST* enc) +{ + OnigCodePoint code; + OnigUChar *to_start = to; + OnigCaseFoldType flags = *flagP; + + while (*pp<end && to<to_end) { + code = *(*pp)++; + if (code==0xF2) { + if (flags&ONIGENC_CASE_UPCASE) { + flags |= ONIGENC_CASE_MODIFIED; + code = 0xD3; + } + else if (flags&ONIGENC_CASE_FOLD) { + flags |= ONIGENC_CASE_MODIFIED; + code = 0xF3; + } + } + else if (code==0xB5) { + if (flags&ONIGENC_CASE_UPCASE) { + flags |= ONIGENC_CASE_MODIFIED; + code = 0xCC; + } + else if (flags&ONIGENC_CASE_FOLD) { + flags |= ONIGENC_CASE_MODIFIED; + code = 0xEC; + } + } + else if (code==0xC0 || code==0xE0 || code==0xB6) ; + else if ((EncCP1253_CtypeTable[code] & BIT_CTYPE_UPPER) + && (flags & (ONIGENC_CASE_DOWNCASE|ONIGENC_CASE_FOLD))) { + flags |= ONIGENC_CASE_MODIFIED; + code = ENC_CP1253_TO_LOWER_CASE(code); + } + else if ((EncCP1253_CtypeTable[code] & BIT_CTYPE_LOWER) + && (flags&ONIGENC_CASE_UPCASE)) { + flags |= ONIGENC_CASE_MODIFIED; + if (code==0xB5) + code = 0xCC; + else if (code==0xDC) + code = 0xA2; + else if (code>=0xDD && code<=0xDF) + code -= 0x25; + else if (code==0xFC) + code = 0xBC; + else if (code==0xFD || code==0xFE) + code -= 0x3F; + else + code -= 0x20; + } + *to++ = code; + if (flags&ONIGENC_CASE_TITLECASE) /* switch from titlecase to lowercase for capitalize */ + flags ^= (ONIGENC_CASE_UPCASE|ONIGENC_CASE_DOWNCASE|ONIGENC_CASE_TITLECASE); + } + *flagP = flags; + return (int)(to-to_start); +} OnigEncodingDefine(windows_1253, Windows_1253) = { onigenc_single_byte_mbc_enc_len, @@ -233,6 +293,6 @@ OnigEncodingDefine(windows_1253, Windows https://github.com/ruby/ruby/blob/trunk/enc/windows_1253.c#L293 onigenc_always_true_is_allowed_reverse_match, 0, ONIGENC_FLAG_NONE, - onigenc_single_byte_ascii_only_case_map, + case_map, }; ENC_ALIAS("CP1253", "Windows-1253") Index: test/ruby/enc/test_case_comprehensive.rb =================================================================== --- test/ruby/enc/test_case_comprehensive.rb (revision 55749) +++ test/ruby/enc/test_case_comprehensive.rb (revision 55750) @@ -289,7 +289,7 @@ TestComprehensiveCaseFold.data_files_ava https://github.com/ruby/ruby/blob/trunk/test/ruby/enc/test_case_comprehensive.rb#L289 generate_ascii_only_case_mapping_tests 'Windows-1250' generate_case_mapping_tests 'Windows-1251' generate_case_mapping_tests 'Windows-1252' - generate_ascii_only_case_mapping_tests 'Windows-1253' + generate_case_mapping_tests 'Windows-1253' generate_ascii_only_case_mapping_tests 'Windows-1254' generate_case_mapping_tests 'Windows-1255' generate_ascii_only_case_mapping_tests 'Windows-1256' Index: ChangeLog =================================================================== --- ChangeLog (revision 55749) +++ ChangeLog (revision 55750) @@ -1,8 +1,13 @@ https://github.com/ruby/ruby/blob/trunk/ChangeLog#L1 -Tue Jul 26 15:30:37 2016 Martin Duerst <duerst@i...> +Tue Jul 26 15:54:17 2016 Martin Duerst <duerst@i...> * enc/windows_1251.c, test/ruby/enc/test_case_comprehensive.rb: Implement non-ASCII case conversion for Windows-1251, by Shunsuke Sato. +Tue Jul 26 15:30:37 2016 Martin Duerst <duerst@i...> + + * enc/windows_1253.c, test/ruby/enc/test_case_comprehensive.rb: + Implement non-ASCII case conversion for Windows-1253, by Takumi Koyama. + Tue Jul 26 13:04:59 2016 Martin Duerst <duerst@i...> * test/ruby/enc/test_case_comprehensive.rb: Add explicit skip test for -- ML: ruby-changes@q... Info: http://www.atdot.net/~ko1/quickml/