ruby-changes:41832
From: duerst <ko1@a...>
Date: Tue, 23 Feb 2016 21:52:33 +0900 (JST)
Subject: [ruby-changes:41832] duerst:r53906 (trunk): * enc/unicode/case-folding.rb, casefold.h: Outputting actual titlecase
duerst 2016-02-23 21:53:10 +0900 (Tue, 23 Feb 2016) New Revision: 53906 https://svn.ruby-lang.org/cgi-bin/viewvc.cgi?view=revision&revision=53906 Log: * enc/unicode/case-folding.rb, casefold.h: Outputting actual titlecase data (new table, with indices from other tables). * enc/unicode.c: Ignoring titlecase data indices for the moment. (with Kimihito Matsui) Modified files: trunk/ChangeLog trunk/enc/unicode/case-folding.rb trunk/enc/unicode/casefold.h trunk/enc/unicode.c Index: enc/unicode/casefold.h =================================================================== --- enc/unicode/casefold.h (revision 53905) +++ enc/unicode/casefold.h (revision 53906) @@ -59,7 +59,7 @@ static const CaseFold_11_Type CaseFold_1 https://github.com/ruby/ruby/blob/trunk/enc/unicode/casefold.h#L59 {0x00dc, {1|F|D, {0x00fc}}}, {0x00dd, {1|F|D, {0x00fd}}}, {0x00de, {1|F|D, {0x00fe}}}, - {0x00df, {2|F|T, {0x0073, 0x0073}}}, + {0x00df, {2|F|T(0), {0x0073, 0x0073}}}, {0x0100, {1|F|D, {0x0101}}}, {0x0102, {1|F|D, {0x0103}}}, {0x0104, {1|F|D, {0x0105}}}, @@ -160,12 +160,12 @@ static const CaseFold_11_Type CaseFold_1 https://github.com/ruby/ruby/blob/trunk/enc/unicode/casefold.h#L160 {0x01b7, {1|F|D, {0x0292}}}, {0x01b8, {1|F|D, {0x01b9}}}, {0x01bc, {1|F|D, {0x01bd}}}, - {0x01c4, {1|F|D|T, {0x01c6}}}, - {0x01c5, {1|F|D|T, {0x01c6}}}, - {0x01c7, {1|F|D|T, {0x01c9}}}, - {0x01c8, {1|F|D|T, {0x01c9}}}, - {0x01ca, {1|F|D|T, {0x01cc}}}, - {0x01cb, {1|F|D|T, {0x01cc}}}, + {0x01c4, {1|F|D|T(1), {0x01c6}}}, + {0x01c5, {1|F|D|T(2), {0x01c6}}}, + {0x01c7, {1|F|D|T(3), {0x01c9}}}, + {0x01c8, {1|F|D|T(4), {0x01c9}}}, + {0x01ca, {1|F|D|T(5), {0x01cc}}}, + {0x01cb, {1|F|D|T(6), {0x01cc}}}, {0x01cd, {1|F|D, {0x01ce}}}, {0x01cf, {1|F|D, {0x01d0}}}, {0x01d1, {1|F|D, {0x01d2}}}, @@ -184,8 +184,8 @@ static const CaseFold_11_Type CaseFold_1 https://github.com/ruby/ruby/blob/trunk/enc/unicode/casefold.h#L184 {0x01ec, {1|F|D, {0x01ed}}}, {0x01ee, {1|F|D, {0x01ef}}}, {0x01f0, {2|F, {0x006a, 0x030c}}}, - {0x01f1, {1|F|D|T, {0x01f3}}}, - {0x01f2, {1|F|D|T, {0x01f3}}}, + {0x01f1, {1|F|D|T(7), {0x01f3}}}, + {0x01f2, {1|F|D|T(8), {0x01f3}}}, {0x01f4, {1|F|D, {0x01f5}}}, {0x01f6, {1|F|D, {0x0195}}}, {0x01f7, {1|F|D, {0x01bf}}}, @@ -486,7 +486,7 @@ static const CaseFold_11_Type CaseFold_1 https://github.com/ruby/ruby/blob/trunk/enc/unicode/casefold.h#L486 {0x0554, {1|F|D, {0x0584}}}, {0x0555, {1|F|D, {0x0585}}}, {0x0556, {1|F|D, {0x0586}}}, - {0x0587, {2|F|T, {0x0565, 0x0582}}}, + {0x0587, {2|F|T(9), {0x0565, 0x0582}}}, {0x10a0, {1|F|D, {0x2d00}}}, {0x10a1, {1|F|D, {0x2d01}}}, {0x10a2, {1|F|D, {0x2d02}}}, @@ -715,75 +715,75 @@ static const CaseFold_11_Type CaseFold_1 https://github.com/ruby/ruby/blob/trunk/enc/unicode/casefold.h#L715 {0x1f6d, {1|F|D, {0x1f65}}}, {0x1f6e, {1|F|D, {0x1f66}}}, {0x1f6f, {1|F|D, {0x1f67}}}, - {0x1f80, {2|F|T, {0x1f00, 0x03b9}}}, - {0x1f81, {2|F|T, {0x1f01, 0x03b9}}}, - {0x1f82, {2|F|T, {0x1f02, 0x03b9}}}, - {0x1f83, {2|F|T, {0x1f03, 0x03b9}}}, - {0x1f84, {2|F|T, {0x1f04, 0x03b9}}}, - {0x1f85, {2|F|T, {0x1f05, 0x03b9}}}, - {0x1f86, {2|F|T, {0x1f06, 0x03b9}}}, - {0x1f87, {2|F|T, {0x1f07, 0x03b9}}}, - {0x1f88, {2|F|T, {0x1f00, 0x03b9}}}, - {0x1f89, {2|F|T, {0x1f01, 0x03b9}}}, - {0x1f8a, {2|F|T, {0x1f02, 0x03b9}}}, - {0x1f8b, {2|F|T, {0x1f03, 0x03b9}}}, - {0x1f8c, {2|F|T, {0x1f04, 0x03b9}}}, - {0x1f8d, {2|F|T, {0x1f05, 0x03b9}}}, - {0x1f8e, {2|F|T, {0x1f06, 0x03b9}}}, - {0x1f8f, {2|F|T, {0x1f07, 0x03b9}}}, - {0x1f90, {2|F|T, {0x1f20, 0x03b9}}}, - {0x1f91, {2|F|T, {0x1f21, 0x03b9}}}, - {0x1f92, {2|F|T, {0x1f22, 0x03b9}}}, - {0x1f93, {2|F|T, {0x1f23, 0x03b9}}}, - {0x1f94, {2|F|T, {0x1f24, 0x03b9}}}, - {0x1f95, {2|F|T, {0x1f25, 0x03b9}}}, - {0x1f96, {2|F|T, {0x1f26, 0x03b9}}}, - {0x1f97, {2|F|T, {0x1f27, 0x03b9}}}, - {0x1f98, {2|F|T, {0x1f20, 0x03b9}}}, - {0x1f99, {2|F|T, {0x1f21, 0x03b9}}}, - {0x1f9a, {2|F|T, {0x1f22, 0x03b9}}}, - {0x1f9b, {2|F|T, {0x1f23, 0x03b9}}}, - {0x1f9c, {2|F|T, {0x1f24, 0x03b9}}}, - {0x1f9d, {2|F|T, {0x1f25, 0x03b9}}}, - {0x1f9e, {2|F|T, {0x1f26, 0x03b9}}}, - {0x1f9f, {2|F|T, {0x1f27, 0x03b9}}}, - {0x1fa0, {2|F|T, {0x1f60, 0x03b9}}}, - {0x1fa1, {2|F|T, {0x1f61, 0x03b9}}}, - {0x1fa2, {2|F|T, {0x1f62, 0x03b9}}}, - {0x1fa3, {2|F|T, {0x1f63, 0x03b9}}}, - {0x1fa4, {2|F|T, {0x1f64, 0x03b9}}}, - {0x1fa5, {2|F|T, {0x1f65, 0x03b9}}}, - {0x1fa6, {2|F|T, {0x1f66, 0x03b9}}}, - {0x1fa7, {2|F|T, {0x1f67, 0x03b9}}}, - {0x1fa8, {2|F|T, {0x1f60, 0x03b9}}}, - {0x1fa9, {2|F|T, {0x1f61, 0x03b9}}}, - {0x1faa, {2|F|T, {0x1f62, 0x03b9}}}, - {0x1fab, {2|F|T, {0x1f63, 0x03b9}}}, - {0x1fac, {2|F|T, {0x1f64, 0x03b9}}}, - {0x1fad, {2|F|T, {0x1f65, 0x03b9}}}, - {0x1fae, {2|F|T, {0x1f66, 0x03b9}}}, - {0x1faf, {2|F|T, {0x1f67, 0x03b9}}}, - {0x1fb2, {2|F|T, {0x1f70, 0x03b9}}}, - {0x1fb3, {2|F|T, {0x03b1, 0x03b9}}}, - {0x1fb4, {2|F|T, {0x03ac, 0x03b9}}}, + {0x1f80, {2|F|T(10), {0x1f00, 0x03b9}}}, + {0x1f81, {2|F|T(11), {0x1f01, 0x03b9}}}, + {0x1f82, {2|F|T(12), {0x1f02, 0x03b9}}}, + {0x1f83, {2|F|T(13), {0x1f03, 0x03b9}}}, + {0x1f84, {2|F|T(14), {0x1f04, 0x03b9}}}, + {0x1f85, {2|F|T(15), {0x1f05, 0x03b9}}}, + {0x1f86, {2|F|T(16), {0x1f06, 0x03b9}}}, + {0x1f87, {2|F|T(17), {0x1f07, 0x03b9}}}, + {0x1f88, {2|F|T(18), {0x1f00, 0x03b9}}}, + {0x1f89, {2|F|T(19), {0x1f01, 0x03b9}}}, + {0x1f8a, {2|F|T(20), {0x1f02, 0x03b9}}}, + {0x1f8b, {2|F|T(21), {0x1f03, 0x03b9}}}, + {0x1f8c, {2|F|T(22), {0x1f04, 0x03b9}}}, + {0x1f8d, {2|F|T(23), {0x1f05, 0x03b9}}}, + {0x1f8e, {2|F|T(24), {0x1f06, 0x03b9}}}, + {0x1f8f, {2|F|T(25), {0x1f07, 0x03b9}}}, + {0x1f90, {2|F|T(26), {0x1f20, 0x03b9}}}, + {0x1f91, {2|F|T(27), {0x1f21, 0x03b9}}}, + {0x1f92, {2|F|T(28), {0x1f22, 0x03b9}}}, + {0x1f93, {2|F|T(29), {0x1f23, 0x03b9}}}, + {0x1f94, {2|F|T(30), {0x1f24, 0x03b9}}}, + {0x1f95, {2|F|T(31), {0x1f25, 0x03b9}}}, + {0x1f96, {2|F|T(32), {0x1f26, 0x03b9}}}, + {0x1f97, {2|F|T(33), {0x1f27, 0x03b9}}}, + {0x1f98, {2|F|T(34), {0x1f20, 0x03b9}}}, + {0x1f99, {2|F|T(35), {0x1f21, 0x03b9}}}, + {0x1f9a, {2|F|T(36), {0x1f22, 0x03b9}}}, + {0x1f9b, {2|F|T(37), {0x1f23, 0x03b9}}}, + {0x1f9c, {2|F|T(38), {0x1f24, 0x03b9}}}, + {0x1f9d, {2|F|T(39), {0x1f25, 0x03b9}}}, + {0x1f9e, {2|F|T(40), {0x1f26, 0x03b9}}}, + {0x1f9f, {2|F|T(41), {0x1f27, 0x03b9}}}, + {0x1fa0, {2|F|T(42), {0x1f60, 0x03b9}}}, + {0x1fa1, {2|F|T(43), {0x1f61, 0x03b9}}}, + {0x1fa2, {2|F|T(44), {0x1f62, 0x03b9}}}, + {0x1fa3, {2|F|T(45), {0x1f63, 0x03b9}}}, + {0x1fa4, {2|F|T(46), {0x1f64, 0x03b9}}}, + {0x1fa5, {2|F|T(47), {0x1f65, 0x03b9}}}, + {0x1fa6, {2|F|T(48), {0x1f66, 0x03b9}}}, + {0x1fa7, {2|F|T(49), {0x1f67, 0x03b9}}}, + {0x1fa8, {2|F|T(50), {0x1f60, 0x03b9}}}, + {0x1fa9, {2|F|T(51), {0x1f61, 0x03b9}}}, + {0x1faa, {2|F|T(52), {0x1f62, 0x03b9}}}, + {0x1fab, {2|F|T(53), {0x1f63, 0x03b9}}}, + {0x1fac, {2|F|T(54), {0x1f64, 0x03b9}}}, + {0x1fad, {2|F|T(55), {0x1f65, 0x03b9}}}, + {0x1fae, {2|F|T(56), {0x1f66, 0x03b9}}}, + {0x1faf, {2|F|T(57), {0x1f67, 0x03b9}}}, + {0x1fb2, {2|F|T(58), {0x1f70, 0x03b9}}}, + {0x1fb3, {2|F|T(59), {0x03b1, 0x03b9}}}, + {0x1fb4, {2|F|T(60), {0x03ac, 0x03b9}}}, {0x1fb6, {2|F, {0x03b1, 0x0342}}}, - {0x1fb7, {3|F|T, {0x03b1, 0x0342, 0x03b9}}}, + {0x1fb7, {3|F|T(61), {0x03b1, 0x0342, 0x03b9}}}, {0x1fb8, {1|F|D, {0x1fb0}}}, {0x1fb9, {1|F|D, {0x1fb1}}}, {0x1fba, {1|F|D, {0x1f70}}}, {0x1fbb, {1|F|D, {0x1f71}}}, - {0x1fbc, {2|F|T, {0x03b1, 0x03b9}}}, + {0x1fbc, {2|F|T(62), {0x03b1, 0x03b9}}}, {0x1fbe, {1|F, {0x03b9}}}, - {0x1fc2, {2|F|T, {0x1f74, 0x03b9}}}, - {0x1fc3, {2|F|T, {0x03b7, 0x03b9}}}, - {0x1fc4, {2|F|T, {0x03ae, 0x03b9}}}, + {0x1fc2, {2|F|T(63), {0x1f74, 0x03b9}}}, + {0x1fc3, {2|F|T(64), {0x03b7, 0x03b9}}}, + {0x1fc4, {2|F|T(65), {0x03ae, 0x03b9}}}, {0x1fc6, {2|F, {0x03b7, 0x0342}}}, - {0x1fc7, {3|F|T, {0x03b7, 0x0342, 0x03b9}}}, + {0x1fc7, {3|F|T(66), {0x03b7, 0x0342, 0x03b9}}}, {0x1fc8, {1|F|D, {0x1f72}}}, {0x1fc9, {1|F|D, {0x1f73}}}, {0x1fca, {1|F|D, {0x1f74}}}, {0x1fcb, {1|F|D, {0x1f75}}}, - {0x1fcc, {2|F|T, {0x03b7, 0x03b9}}}, + {0x1fcc, {2|F|T(67), {0x03b7, 0x03b9}}}, {0x1fd2, {3|F, {0x03b9, 0x0308, 0x0300}}}, {0x1fd3, {3|F, {0x03b9, 0x0308, 0x0301}}}, {0x1fd6, {2|F, {0x03b9, 0x0342}}}, @@ -802,16 +802,16 @@ static const CaseFold_11_Type CaseFold_1 https://github.com/ruby/ruby/blob/trunk/enc/unicode/casefold.h#L802 {0x1fea, {1|F|D, {0x1f7a}}}, {0x1feb, {1|F|D, {0x1f7b}}}, {0x1fec, {1|F|D, {0x1fe5}}}, - {0x1ff2, {2|F|T, {0x1f7c, 0x03b9}}}, - {0x1ff3, {2|F|T, {0x03c9, 0x03b9}}}, - {0x1ff4, {2|F|T, {0x03ce, 0x03b9}}}, + {0x1ff2, {2|F|T(68), {0x1f7c, 0x03b9}}}, + {0x1ff3, {2|F|T(69), {0x03c9, 0x03b9}}}, + {0x1ff4, {2|F|T(70), {0x03ce, 0x03b9}}}, {0x1ff6, {2|F, {0x03c9, 0x0342}}}, - {0x1ff7, {3|F|T, {0x03c9, 0x0342, 0x03b9}}}, + {0x1ff7, {3|F|T(71), {0x03c9, 0x0342, 0x03b9}}}, {0x1ff8, {1|F|D, {0x1f78}}}, {0x1ff9, {1|F|D, {0x1f79}}}, {0x1ffa, {1|F|D, {0x1f7c}}}, {0x1ffb, {1|F|D, {0x1f7d}}}, - {0x1ffc, {2|F|T, {0x03c9, 0x03b9}}}, + {0x1ffc, {2|F|T(72), {0x03c9, 0x03b9}}}, {0x2126, {1|F|D, {0x03c9}}}, {0x212a, {1|F|D, {0x006b}}}, {0x212b, {1|F|D, {0x00e5}}}, @@ -1161,18 +1161,18 @@ static const CaseFold_11_Type CaseFold_1 https://github.com/ruby/ruby/blob/trunk/enc/unicode/casefold.h#L1161 {0xabbd, {1|F|U, {0x13ed}}}, {0xabbe, {1|F|U, {0x13ee}}}, {0xabbf, {1|F|U, {0x13ef}}}, - {0xfb00, {2|F|T, {0x0066, 0x0066}}}, - {0xfb01, {2|F|T, {0x0066, 0x0069}}}, - {0xfb02, {2|F|T, {0x0066, 0x006c}}}, - {0xfb03, {3|F|T, {0x0066, 0x0066, 0x0069}}}, - {0xfb04, {3|F|T, {0x0066, 0x0066, 0x006c}}}, - {0xfb05, {2|F|T, {0x0073, 0x0074}}}, - {0xfb06, {2|F|T, {0x0073, 0x0074}}}, - {0xfb13, {2|F|T, {0x0574, 0x0576}}}, - {0xfb14, {2|F|T, {0x0574, 0x0565}}}, - {0xfb15, {2|F|T, {0x0574, 0x056b}}}, - {0xfb16, {2|F|T, {0x057e, 0x0576}}}, - {0xfb17, {2|F|T, {0x0574, 0x056d}}}, + {0xfb00, {2|F|T(73), {0x0066, 0x0066}}}, + {0xfb01, {2|F|T(74), {0x0066, 0x0069}}}, + {0xfb02, {2|F|T(75), {0x0066, 0x006c}}}, + {0xfb03, {3|F|T(76), {0x0066, 0x0066, 0x0069}}}, + {0xfb04, {3|F|T(77), {0x0066, 0x0066, 0x006c}}}, + {0xfb05, {2|F|T(78), {0x0073, 0x0074}}}, + {0xfb06, {2|F|T(79), {0x0073, 0x0074}}}, + {0xfb13, {2|F|T(80), {0x0574, 0x0576}}}, + {0xfb14, {2|F|T(81), {0x0574, 0x0565}}}, + {0xfb15, {2|F|T(82), {0x0574, 0x056b}}}, + {0xfb16, {2|F|T(83), {0x057e, 0x0576}}}, + {0xfb17, {2|F|T(84), {0x0574, 0x056d}}}, {0xff21, {1|F|D, {0xff41}}}, {0xff22, {1|F|D, {0xff42}}}, {0xff23, {1|F|D, {0xff43}}}, @@ -3298,9 +3298,9 @@ static const CaseUnfold_11_Type CaseUnfo https://github.com/ruby/ruby/blob/trunk/enc/unicode/casefold.h#L3298 {0x01b9, {1|U, {0x01b8}}}, {0x01bd, {1|U, {0x01bc}}}, {0x01bf, {1|U, {0x01f7}}}, - {0x01c6, {2|U|T, {0x01c4, 0x01c5}}}, - {0x01c9, {2|U|T, {0x01c7, 0x01c8}}}, - {0x01cc, {2|U|T, {0x01ca, 0x01cb}}}, + {0x01c6, {2|U|T(85), {0x01c4, 0x01c5}}}, + {0x01c9, {2|U|T(86), {0x01c7, 0x01c8}}}, + {0x01cc, {2|U|T(87), {0x01ca, 0x01cb}}}, {0x01ce, {1|U, {0x01cd}}}, {0x01d0, {1|U, {0x01cf}}}, {0x01d2, {1|U, {0x01d1}}}, @@ -3319,7 +3319,7 @@ static const CaseUnfold_11_Type CaseUnfo https://github.com/ruby/ruby/blob/trunk/enc/unicode/casefold.h#L3319 {0x01eb, {1|U, {0x01ea}}}, {0x01ed, {1|U, {0x01ec}}}, {0x01ef, {1|U, {0x01ee}}}, - {0x01f3, {2|U|T, {0x01f1, 0x01f2}}}, + {0x01f3, {2|U|T(88), {0x01f1, 0x01f2}}}, {0x01f5, {1|U, {0x01f4}}}, {0x01f9, {1|U, {0x01f8}}}, {0x01fb, {1|U, {0x01fa}}}, @@ -6246,3 +6246,94 @@ onigenc_unicode_CaseUnfold_13_lookup(con https://github.com/ruby/ruby/blob/trunk/enc/unicode/casefold.h#L6246 return 0; } +CodePointList3 TitleCase[] = { + {2, {0x0053, 0x0073}}, + {1, {0x01C5}}, + {1, {0x01C5}}, + {1, {0x01C8}}, + {1, {0x01C8}}, + {1, {0x01CB}}, + {1, {0x01CB}}, + {1, {0x01F2}}, + {1, {0x01F2}}, + {2, {0x0535, 0x0582}}, + {1, {0x1F88}}, + {1, {0x1F89}}, + {1, {0x1F8A}}, + {1, {0x1F8B}}, + {1, {0x1F8C}}, + {1, {0x1F8D}}, + {1, {0x1F8E}}, + {1, {0x1F8F}}, + {1, {0x1F88}}, + {1, {0x1F89}}, + {1, {0x1F8A}}, + {1, {0x1F8B}}, + {1, {0x1F8C}}, + {1, {0x1F8D}}, + {1, {0x1F8E}}, + {1, {0x1F8F}}, + {1, {0x1F98}}, + {1, {0x1F99}}, + {1, {0x1F9A}}, + {1, {0x1F9B}}, + {1, {0x1F9C}}, + {1, {0x1F9D}}, + {1, {0x1F9E}}, + {1, {0x1F9F}}, + {1, {0x1F98}}, + {1, {0x1F99}}, + {1, {0x1F9A}}, + {1, {0x1F9B}}, + {1, {0x1F9C}}, + {1, {0x1F9D}}, + {1, {0x1F9E}}, + {1, {0x1F9F}}, + {1, {0x1FA8}}, + {1, {0x1FA9}}, + {1, {0x1FAA}}, + {1, {0x1FAB}}, + {1, {0x1FAC}}, + {1, {0x1FAD}}, + {1, {0x1FAE}}, + {1, {0x1FAF}}, + {1, {0x1FA8}}, + {1, {0x1FA9}}, + {1, {0x1FAA}}, + {1, {0x1FAB}}, + {1, {0x1FAC}}, + {1, {0x1FAD}}, + {1, {0x1FAE}}, + {1, {0x1FAF}}, + {2, {0x1FBA, 0x0345}}, + {1, {0x1FBC}}, + {2, {0x0386, 0x0345}}, + {3, {0x0391, 0x0342, 0x0345}}, + {1, {0x1FBC}}, + {2, {0x1FCA, 0x0345}}, + {1, {0x1FCC}}, + {2, {0x0389, 0x0345}}, + {3, {0x0397, 0x0342, 0x0345}}, + {1, {0x1FCC}}, + {2, {0x1FFA, 0x0345}}, + {1, {0x1FFC}}, + {2, {0x038F, 0x0345}}, + {3, {0x03A9, 0x0342, 0x0345}}, + {1, {0x1FFC}}, + {2, {0x0046, 0x0066}}, + {2, {0x0046, 0x0069}}, + {2, {0x0046, 0x006C}}, + {3, {0x0046, 0x0066, 0x0069}}, + {3, {0x0046, 0x0066, 0x006C}}, + {2, {0x0053, 0x0074}}, + {2, {0x0053, 0x0074}}, + {2, {0x0544, 0x0576}}, + {2, {0x0544, 0x0565}}, + {2, {0x0544, 0x056B}}, + {2, {0x054E, 0x0576}}, + {2, {0x0544, 0x056D}}, + {1, {0x01C5}}, + {1, {0x01C8}}, + {1, {0x01CB}}, + {1, {0x01F2}}, +}; Index: enc/unicode/case-folding.rb =================================================================== --- enc/unicode/case-folding.rb (revision 53905) +++ enc/unicode/case-folding.rb (revision 53906) @@ -175,6 +175,9 @@ class CaseFolding https://github.com/ruby/ruby/blob/trunk/enc/unicode/case-folding.rb#L175 name = "CaseUnfold_13" data = print_table(dest, name, mapping_data, name=>unfold[2]) dest.print lookup_hash(name, "CodePointList2", data) + + # TitleCase + dest.print mapping_data.titlecase_output end def debug! @@ -195,15 +198,12 @@ class MapItem https://github.com/ruby/ruby/blob/trunk/enc/unicode/case-folding.rb#L198 @lower = lower unless lower == '' @title = title unless title == '' end - - def flags - "" # preliminary implementation - end end class CaseMapping def initialize (mapping_directory) @mappings = {} + @titlecase = [] IO.readlines(File.expand_path('UnicodeData.txt', mapping_directory), encoding: Encoding::ASCII_8BIT).each do |line| next if line =~ /^</ code, _1,_2,_3,_4,_5,_6,_7,_8,_9,_10,_11, upper, lower, title = line.chomp.split ';' @@ -237,11 +237,22 @@ class CaseMapping https://github.com/ruby/ruby/blob/trunk/enc/unicode/case-folding.rb#L237 if item flags += '|U' if to==item.upper flags += '|D' if to==item.lower - flags += '|T' unless item.upper==item.title + unless item.upper == item.title + flags += "|T(#{@titlecase.length})" + @titlecase << item + end end flags end + def titlecase_output + "CodePointList3 TitleCase[] = {\n" + + @titlecase.map do |item| + chars = item.title.split(/ /) + " {#{chars.length}, {" + chars.map {|c| "0x"+c }.join(', ') + "}},\n" + end.join + "};\n" + end + def self.load(*args) new(*args) end @@ -251,6 +262,8 @@ class CaseMappingDummy https://github.com/ruby/ruby/blob/trunk/enc/unicode/case-folding.rb#L262 def flags(from, type, to) "" end + + def titlecase_output() '' end end if $0 == __FILE__ Index: enc/unicode.c =================================================================== --- enc/unicode.c (revision 53905) +++ enc/unicode.c (revision 53906) @@ -143,15 +143,15 @@ code3_equal(const OnigCodePoint *x, cons https://github.com/ruby/ruby/blob/trunk/enc/unicode.c#L143 #define U ONIGENC_CASE_UPCASE #define D ONIGENC_CASE_DOWNCASE -#define T ONIGENC_CASE_TITLECASE #define F ONIGENC_CASE_FOLD +#define T(n) ONIGENC_CASE_TITLECASE #include "enc/unicode/casefold.h" #undef U #undef D -#undef T #undef F +#undef T #include "enc/unicode/name2ctype.h" Index: ChangeLog =================================================================== --- ChangeLog (revision 53905) +++ ChangeLog (revision 53906) @@ -1,3 +1,10 @@ https://github.com/ruby/ruby/blob/trunk/ChangeLog#L1 +Tue Feb 23 21:52:24 2016 Martin Duerst <duerst@i...> + + * enc/unicode/case-folding.rb, casefold.h: Outputting actual titlecase + data (new table, with indices from other tables). + * enc/unicode.c: Ignoring titlecase data indices for the moment. + (with Kimihito Matsui) + Tue Feb 23 15:21:14 2016 Martin Duerst <duerst@i...> * enc/unicode/case-folding.rb, casefold.h: Reading casing data from -- ML: ruby-changes@q... Info: http://www.atdot.net/~ko1/quickml/