[前][次][番号順一覧][スレッド一覧]

ruby-changes:13484

From: naruse <ko1@a...>
Date: Thu, 8 Oct 2009 11:49:30 +0900 (JST)
Subject: [ruby-changes:13484] Ruby:r25260 (trunk): * tool/enc-unicode.rb: parse range notation of UnicodeData.txt.

naruse	2009-10-08 11:49:11 +0900 (Thu, 08 Oct 2009)

  New Revision: 25260

  http://svn.ruby-lang.org/cgi-bin/viewvc.cgi?view=rev&revision=25260

  Log:
    * tool/enc-unicode.rb: parse range notation of UnicodeData.txt.
    
    * enc/unicode/name2ctype.h, enc/unicode/name2ctype.h.blt,
      enc/unicode/name2ctype.kwd, enc/unicode/name2ctype.src:
      follow above change. [ruby-dev:39444]

  Modified files:
    trunk/ChangeLog
    trunk/enc/unicode/name2ctype.h
    trunk/enc/unicode/name2ctype.h.blt
    trunk/enc/unicode/name2ctype.kwd
    trunk/enc/unicode/name2ctype.src
    trunk/test/ruby/test_regexp.rb
    trunk/tool/enc-unicode.rb

Index: ChangeLog
===================================================================
--- ChangeLog	(revision 25259)
+++ ChangeLog	(revision 25260)
@@ -1,3 +1,11 @@
+Thu Oct  8 05:45:14 2009  NARUSE, Yui  <naruse@r...>
+
+	* tool/enc-unicode.rb: parse range notation of UnicodeData.txt.
+
+	* enc/unicode/name2ctype.h, enc/unicode/name2ctype.h.blt,
+	  enc/unicode/name2ctype.kwd, enc/unicode/name2ctype.src:
+	  follow above change. [ruby-dev:39444]
+
 Thu Oct  8 02:46:24 2009  Nobuyoshi Nakada  <nobu@r...>
 
 	* string.c (rb_str_inspect): copy by chunks.
Index: enc/unicode/name2ctype.kwd
===================================================================
--- enc/unicode/name2ctype.kwd	(revision 25259)
+++ enc/unicode/name2ctype.kwd	(revision 25260)
@@ -8,7 +8,7 @@
 
 /* 'Alpha': [[:Alpha:]] */
 static const OnigCodePoint CR_Alpha[] = {
-	482,
+	477,
 	0x0041, 0x005a,
 	0x0061, 0x007a,
 	0x00aa, 0x00aa,
@@ -342,10 +342,8 @@
 	0x3131, 0x318e,
 	0x31a0, 0x31b7,
 	0x31f0, 0x31ff,
-	0x3400, 0x3400,
-	0x4db5, 0x4db5,
-	0x4e00, 0x4e00,
-	0x9fcb, 0x9fcb,
+	0x3400, 0x4db5,
+	0x4e00, 0x9fcb,
 	0xa000, 0xa48c,
 	0xa4d0, 0xa4fd,
 	0xa500, 0xa60c,
@@ -378,8 +376,7 @@
 	0xaadb, 0xaadd,
 	0xabc0, 0xabea,
 	0xabec, 0xabed,
-	0xac00, 0xac00,
-	0xd7a3, 0xd7a3,
+	0xac00, 0xd7a3,
 	0xd7b0, 0xd7c6,
 	0xd7cb, 0xd7fb,
 	0xf900, 0xfa2d,
@@ -485,10 +482,8 @@
 	0x1d78a, 0x1d7a8,
 	0x1d7aa, 0x1d7c2,
 	0x1d7c4, 0x1d7cb,
-	0x20000, 0x20000,
-	0x2a6d6, 0x2a6d6,
-	0x2a700, 0x2a700,
-	0x2b734, 0x2b734,
+	0x20000, 0x2a6d6,
+	0x2a700, 0x2b734,
 	0x2f800, 0x2fa1d,
 	0xe0100, 0xe01ef,
 }; /* CR_Alpha */
@@ -558,7 +553,7 @@
 
 /* 'Graph': [[:Graph:]] */
 static const OnigCodePoint CR_Graph[] = {
-	492,
+	487,
 	0x0021, 0x007e,
 	0x00a1, 0x00ac,
 	0x00ae, 0x0377,
@@ -879,10 +874,8 @@
 	0x31c0, 0x31e3,
 	0x31f0, 0x321e,
 	0x3220, 0x32fe,
-	0x3300, 0x3400,
-	0x4db5, 0x4db5,
-	0x4dc0, 0x4e00,
-	0x9fcb, 0x9fcb,
+	0x3300, 0x4db5,
+	0x4dc0, 0x9fcb,
 	0xa000, 0xa48c,
 	0xa490, 0xa4c6,
 	0xa4d0, 0xa62b,
@@ -910,8 +903,7 @@
 	0xaadb, 0xaadf,
 	0xabc0, 0xabed,
 	0xabf0, 0xabf9,
-	0xac00, 0xac00,
-	0xd7a3, 0xd7a3,
+	0xac00, 0xd7a3,
 	0xd7b0, 0xd7c6,
 	0xd7cb, 0xd7fb,
 	0xf900, 0xfa2d,
@@ -1045,10 +1037,8 @@
 	0x1f200, 0x1f200,
 	0x1f210, 0x1f231,
 	0x1f240, 0x1f248,
-	0x20000, 0x20000,
-	0x2a6d6, 0x2a6d6,
-	0x2a700, 0x2a700,
-	0x2b734, 0x2b734,
+	0x20000, 0x2a6d6,
+	0x2a700, 0x2b734,
 	0x2f800, 0x2fa1d,
 	0xe0100, 0xe01ef,
 }; /* CR_Graph */
@@ -1659,7 +1649,7 @@
 
 /* 'Print': [[:Print:]] */
 static const OnigCodePoint CR_Print[] = {
-	494,
+	489,
 	0x0009, 0x000d,
 	0x0020, 0x007e,
 	0x0085, 0x0085,
@@ -1982,10 +1972,8 @@
 	0x31c0, 0x31e3,
 	0x31f0, 0x321e,
 	0x3220, 0x32fe,
-	0x3300, 0x3400,
-	0x4db5, 0x4db5,
-	0x4dc0, 0x4e00,
-	0x9fcb, 0x9fcb,
+	0x3300, 0x4db5,
+	0x4dc0, 0x9fcb,
 	0xa000, 0xa48c,
 	0xa490, 0xa4c6,
 	0xa4d0, 0xa62b,
@@ -2013,8 +2001,7 @@
 	0xaadb, 0xaadf,
 	0xabc0, 0xabed,
 	0xabf0, 0xabf9,
-	0xac00, 0xac00,
-	0xd7a3, 0xd7a3,
+	0xac00, 0xd7a3,
 	0xd7b0, 0xd7c6,
 	0xd7cb, 0xd7fb,
 	0xf900, 0xfa2d,
@@ -2148,10 +2135,8 @@
 	0x1f200, 0x1f200,
 	0x1f210, 0x1f231,
 	0x1f240, 0x1f248,
-	0x20000, 0x20000,
-	0x2a6d6, 0x2a6d6,
-	0x2a700, 0x2a700,
-	0x2b734, 0x2b734,
+	0x20000, 0x2a6d6,
+	0x2a700, 0x2b734,
 	0x2f800, 0x2fa1d,
 	0xe0100, 0xe01ef,
 }; /* CR_Print */
@@ -2915,7 +2900,7 @@
 
 /* 'Word': [[:Word:]] */
 static const OnigCodePoint CR_Word[] = {
-	508,
+	503,
 	0x0030, 0x0039,
 	0x0041, 0x005a,
 	0x005f, 0x005f,
@@ -3267,10 +3252,8 @@
 	0x3131, 0x318e,
 	0x31a0, 0x31b7,
 	0x31f0, 0x31ff,
-	0x3400, 0x3400,
-	0x4db5, 0x4db5,
-	0x4e00, 0x4e00,
-	0x9fcb, 0x9fcb,
+	0x3400, 0x4db5,
+	0x4e00, 0x9fcb,
 	0xa000, 0xa48c,
 	0xa4d0, 0xa4fd,
 	0xa500, 0xa60c,
@@ -3305,8 +3288,7 @@
 	0xabc0, 0xabea,
 	0xabec, 0xabed,
 	0xabf0, 0xabf9,
-	0xac00, 0xac00,
-	0xd7a3, 0xd7a3,
+	0xac00, 0xd7a3,
 	0xd7b0, 0xd7c6,
 	0xd7cb, 0xd7fb,
 	0xf900, 0xfa2d,
@@ -3418,17 +3400,15 @@
 	0x1d7aa, 0x1d7c2,
 	0x1d7c4, 0x1d7cb,
 	0x1d7ce, 0x1d7ff,
-	0x20000, 0x20000,
-	0x2a6d6, 0x2a6d6,
-	0x2a700, 0x2a700,
-	0x2b734, 0x2b734,
+	0x20000, 0x2a6d6,
+	0x2a700, 0x2b734,
 	0x2f800, 0x2fa1d,
 	0xe0100, 0xe01ef,
 }; /* CR_Word */
 
 /* 'Alnum': [[:Alnum:]] */
 static const OnigCodePoint CR_Alnum[] = {
-	502,
+	497,
 	0x0030, 0x0039,
 	0x0041, 0x005a,
 	0x0061, 0x007a,
@@ -3777,10 +3757,8 @@
 	0x3131, 0x318e,
 	0x31a0, 0x31b7,
 	0x31f0, 0x31ff,
-	0x3400, 0x3400,
-	0x4db5, 0x4db5,
-	0x4e00, 0x4e00,
-	0x9fcb, 0x9fcb,
+	0x3400, 0x4db5,
+	0x4e00, 0x9fcb,
 	0xa000, 0xa48c,
 	0xa4d0, 0xa4fd,
 	0xa500, 0xa60c,
@@ -3815,8 +3793,7 @@
 	0xabc0, 0xabea,
 	0xabec, 0xabed,
 	0xabf0, 0xabf9,
-	0xac00, 0xac00,
-	0xd7a3, 0xd7a3,
+	0xac00, 0xd7a3,
 	0xd7b0, 0xd7c6,
 	0xd7cb, 0xd7fb,
 	0xf900, 0xfa2d,
@@ -3925,10 +3902,8 @@
 	0x1d7aa, 0x1d7c2,
 	0x1d7c4, 0x1d7cb,
 	0x1d7ce, 0x1d7ff,
-	0x20000, 0x20000,
-	0x2a6d6, 0x2a6d6,
-	0x2a700, 0x2a700,
-	0x2b734, 0x2b734,
+	0x20000, 0x2a6d6,
+	0x2a700, 0x2b734,
 	0x2f800, 0x2fa1d,
 	0xe0100, 0xe01ef,
 }; /* CR_Alnum */
@@ -3948,7 +3923,7 @@
 
 /* 'Assigned': - */
 static const OnigCodePoint CR_Assigned[] = {
-	495,
+	484,
 	0x0000, 0x0377,
 	0x037a, 0x037e,
 	0x0384, 0x038a,
@@ -4264,10 +4239,8 @@
 	0x31c0, 0x31e3,
 	0x31f0, 0x321e,
 	0x3220, 0x32fe,
-	0x3300, 0x3400,
-	0x4db5, 0x4db5,
-	0x4dc0, 0x4e00,
-	0x9fcb, 0x9fcb,
+	0x3300, 0x4db5,
+	0x4dc0, 0x9fcb,
 	0xa000, 0xa48c,
 	0xa490, 0xa4c6,
 	0xa4d0, 0xa62b,
@@ -4295,15 +4268,10 @@
 	0xaadb, 0xaadf,
 	0xabc0, 0xabed,
 	0xabf0, 0xabf9,
-	0xac00, 0xac00,
-	0xd7a3, 0xd7a3,
+	0xac00, 0xd7a3,
 	0xd7b0, 0xd7c6,
 	0xd7cb, 0xd7fb,
-	0xd800, 0xd800,
-	0xdb7f, 0xdb80,
-	0xdbff, 0xdc00,
-	0xdfff, 0xe000,
-	0xf8ff, 0xfa2d,
+	0xd800, 0xfa2d,
 	0xfa30, 0xfa6d,
 	0xfa70, 0xfad9,
 	0xfb00, 0xfb06,
@@ -4433,22 +4401,18 @@
 	0x1f200, 0x1f200,
 	0x1f210, 0x1f231,
 	0x1f240, 0x1f248,
-	0x20000, 0x20000,
-	0x2a6d6, 0x2a6d6,
-	0x2a700, 0x2a700,
-	0x2b734, 0x2b734,
+	0x20000, 0x2a6d6,
+	0x2a700, 0x2b734,
 	0x2f800, 0x2fa1d,
 	0xe0001, 0xe0001,
 	0xe0020, 0xe007f,
 	0xe0100, 0xe01ef,
-	0xf0000, 0xf0000,
-	0xffffd, 0xffffd,
-	0x100000, 0x100000,
+	0xf0000, 0xffffd,
 }; /* CR_Assigned */
 
 /* 'C': Major Category */
 static const OnigCodePoint CR_C[] = {
-	26,
+	20,
 	0x0000, 0x001f,
 	0x007f, 0x009f,
 	0x00ad, 0x00ad,
@@ -4460,21 +4424,15 @@
 	0x202a, 0x202e,
 	0x2060, 0x2064,
 	0x206a, 0x206f,
-	0xd800, 0xd800,
-	0xdb7f, 0xdb80,
-	0xdbff, 0xdc00,
-	0xdfff, 0xe000,
-	0xf8ff, 0xf8ff,
+	0xd800, 0xf8ff,
 	0xfeff, 0xfeff,
 	0xfff9, 0xfffb,
 	0x110bd, 0x110bd,
 	0x1d173, 0x1d17a,
 	0xe0001, 0xe0001,
 	0xe0020, 0xe007f,
-	0xf0000, 0xf0000,
-	0xffffd, 0xffffd,
-	0x100000, 0x100000,
-	0x10fffd, 0x10ffff,
+	0xf0000, 0xffffd,
+	0x100000, 0x10ffff,
 }; /* CR_C */
 
 /* 'Cc': General Category */
@@ -4506,7 +4464,7 @@
 
 /* 'Cn': General Category */
 static const OnigCodePoint CR_Cn[] = {
-	495,
+	484,
 	0x0378, 0x0379,
 	0x037f, 0x0383,
 	0x038b, 0x038b,
@@ -4822,9 +4780,7 @@
 	0x31e4, 0x31ef,
 	0x321f, 0x321f,
 	0x32ff, 0x32ff,
-	0x3401, 0x4db4,
 	0x4db6, 0x4dbf,
-	0x4e01, 0x9fca,
 	0x9fcc, 0x9fff,
 	0xa48d, 0xa48f,
 	0xa4c7, 0xa4cf,
@@ -4853,14 +4809,9 @@
 	0xaae0, 0xabbf,
 	0xabee, 0xabef,
 	0xabfa, 0xabff,
-	0xac01, 0xd7a2,
 	0xd7a4, 0xd7af,
 	0xd7c7, 0xd7ca,
 	0xd7fc, 0xd7ff,
-	0xd801, 0xdb7e,
-	0xdb81, 0xdbfe,
-	0xdc01, 0xdffe,
-	0xe001, 0xf8fe,
 	0xfa2e, 0xfa2f,
 	0xfa6e, 0xfa6f,
 	0xfada, 0xfaff,
@@ -4991,42 +4942,32 @@
 	0x1f201, 0x1f20f,
 	0x1f232, 0x1f23f,
 	0x1f249, 0x1ffff,
-	0x20001, 0x2a6d5,
 	0x2a6d7, 0x2a6ff,
-	0x2a701, 0x2b733,
 	0x2b735, 0x2f7ff,
 	0x2fa1e, 0xe0000,
 	0xe0002, 0xe001f,
 	0xe0080, 0xe00ff,
 	0xe01f0, 0xeffff,
-	0xf0001, 0xffffc,
-	0xffffe, 0xfffff,
-	0x100001, 0x10ffff,
+	0xffffe, 0x10ffff,
 }; /* CR_Cn */
 
 /* 'Co': General Category */
 static const OnigCodePoint CR_Co[] = {
-	6,
-	0xe000, 0xe000,
-	0xf8ff, 0xf8ff,
-	0xf0000, 0xf0000,
-	0xffffd, 0xffffd,
-	0x100000, 0x100000,
-	0x10fffd, 0x10fffd,
+	3,
+	0xe000, 0xf8ff,
+	0xf0000, 0xffffd,
+	0x100000, 0x10fffd,
 }; /* CR_Co */
 
 /* 'Cs': General Category */
 static const OnigCodePoint CR_Cs[] = {
-	4,
-	0xd800, 0xd800,
-	0xdb7f, 0xdb80,
-	0xdbff, 0xdc00,
-	0xdfff, 0xdfff,
+	1,
+	0xd800, 0xdfff,
 }; /* CR_Cs */
 
 /* 'L': Major Category */
 static const OnigCodePoint CR_L[] = {
-	427,
+	422,
 	0x0041, 0x005a,
 	0x0061, 0x007a,
 	0x00aa, 0x00aa,
@@ -5311,10 +5252,8 @@
 	0x3131, 0x318e,
 	0x31a0, 0x31b7,
 	0x31f0, 0x31ff,
-	0x3400, 0x3400,
-	0x4db5, 0x4db5,
-	0x4e00, 0x4e00,
-	0x9fcb, 0x9fcb,
+	0x3400, 0x4db5,
+	0x4e00, 0x9fcb,
 	0xa000, 0xa48c,
 	0xa4d0, 0xa4fd,
 	0xa500, 0xa60c,
@@ -5353,8 +5292,7 @@
 	0xaac2, 0xaac2,
 	0xaadb, 0xaadd,
 	0xabc0, 0xabe2,
-	0xac00, 0xac00,
-	0xd7a3, 0xd7a3,
+	0xac00, 0xd7a3,
 	0xd7b0, 0xd7c6,
 	0xd7cb, 0xd7fb,
 	0xf900, 0xfa2d,
@@ -5449,10 +5387,8 @@
 	0x1d78a, 0x1d7a8,
 	0x1d7aa, 0x1d7c2,
 	0x1d7c4, 0x1d7cb,
-	0x20000, 0x20000,
-	0x2a6d6, 0x2a6d6,
-	0x2a700, 0x2a700,
-	0x2b734, 0x2b734,
+	0x20000, 0x2a6d6,
+	0x2a700, 0x2b734,
 	0x2f800, 0x2fa1d,
 }; /* CR_L */
 
@@ -6116,7 +6052,7 @@
 
 /* 'Lo': General Category */
 static const OnigCodePoint CR_Lo[] = {
-	316,
+	311,
 	0x01bb, 0x01bb,
 	0x01c0, 0x01c3,
 	0x0294, 0x0294,
@@ -6326,10 +6262,8 @@
 	0x3131, 0x318e,
 	0x31a0, 0x31b7,
 	0x31f0, 0x31ff,
-	0x3400, 0x3400,
-	0x4db5, 0x4db5,
-	0x4e00, 0x4e00,
-	0x9fcb, 0x9fcb,
+	0x3400, 0x4db5,
+	0x4e00, 0x9fcb,
 	0xa000, 0xa014,
 	0xa016, 0xa48c,
 	0xa4d0, 0xa4f7,
@@ -6364,8 +6298,7 @@
 	0xaac2, 0xaac2,
 	0xaadb, 0xaadc,
 	0xabc0, 0xabe2,
-	0xac00, 0xac00,
-	0xd7a3, 0xd7a3,
+	0xac00, 0xd7a3,
 	0xd7b0, 0xd7c6,
 	0xd7cb, 0xd7fb,
 	0xf900, 0xfa2d,
@@ -6428,10 +6361,8 @@
 	0x11083, 0x110af,
 	0x12000, 0x1236e,
 	0x13000, 0x1342e,
-	0x20000, 0x20000,
-	0x2a6d6, 0x2a6d6,
-	0x2a700, 0x2a700,
-	0x2b734, 0x2b734,
+	0x20000, 0x2a6d6,
+	0x2a700, 0x2b734,
 	0x2f800, 0x2fa1d,
 }; /* CR_Lo */
 
Index: enc/unicode/name2ctype.h
===================================================================
--- enc/unicode/name2ctype.h	(revision 25259)
+++ enc/unicode/name2ctype.h	(revision 25260)
@@ -43,7 +43,7 @@
 
 /* 'Alpha': [[:Alpha:]] */
 static const OnigCodePoint CR_Alpha[] = {
-	482,
+	477,
 	0x0041, 0x005a,
 	0x0061, 0x007a,
 	0x00aa, 0x00aa,
@@ -377,10 +377,8 @@
 	0x3131, 0x318e,
 	0x31a0, 0x31b7,
 	0x31f0, 0x31ff,
-	0x3400, 0x3400,
-	0x4db5, 0x4db5,
-	0x4e00, 0x4e00,
-	0x9fcb, 0x9fcb,
+	0x3400, 0x4db5,
+	0x4e00, 0x9fcb,
 	0xa000, 0xa48c,
 	0xa4d0, 0xa4fd,
 	0xa500, 0xa60c,
@@ -413,8 +411,7 @@
 	0xaadb, 0xaadd,
 	0xabc0, 0xabea,
 	0xabec, 0xabed,
-	0xac00, 0xac00,
-	0xd7a3, 0xd7a3,
+	0xac00, 0xd7a3,
 	0xd7b0, 0xd7c6,
 	0xd7cb, 0xd7fb,
 	0xf900, 0xfa2d,
@@ -520,10 +517,8 @@
 	0x1d78a, 0x1d7a8,
 	0x1d7aa, 0x1d7c2,
 	0x1d7c4, 0x1d7cb,
-	0x20000, 0x20000,
-	0x2a6d6, 0x2a6d6,
-	0x2a700, 0x2a700,
-	0x2b734, 0x2b734,
+	0x20000, 0x2a6d6,
+	0x2a700, 0x2b734,
 	0x2f800, 0x2fa1d,
 	0xe0100, 0xe01ef,
 }; /* CR_Alpha */
@@ -593,7 +588,7 @@
 
 /* 'Graph': [[:Graph:]] */
 static const OnigCodePoint CR_Graph[] = {
-	492,
+	487,
 	0x0021, 0x007e,
 	0x00a1, 0x00ac,
 	0x00ae, 0x0377,
@@ -914,10 +909,8 @@
 	0x31c0, 0x31e3,
 	0x31f0, 0x321e,
 	0x3220, 0x32fe,
-	0x3300, 0x3400,
-	0x4db5, 0x4db5,
-	0x4dc0, 0x4e00,
-	0x9fcb, 0x9fcb,
+	0x3300, 0x4db5,
+	0x4dc0, 0x9fcb,
 	0xa000, 0xa48c,
 	0xa490, 0xa4c6,
 	0xa4d0, 0xa62b,
@@ -945,8 +938,7 @@
 	0xaadb, 0xaadf,
 	0xabc0, 0xabed,
 	0xabf0, 0xabf9,
-	0xac00, 0xac00,
-	0xd7a3, 0xd7a3,
+	0xac00, 0xd7a3,
 	0xd7b0, 0xd7c6,
 	0xd7cb, 0xd7fb,
 	0xf900, 0xfa2d,
@@ -1080,10 +1072,8 @@
 	0x1f200, 0x1f200,
 	0x1f210, 0x1f231,
 	0x1f240, 0x1f248,
-	0x20000, 0x20000,
-	0x2a6d6, 0x2a6d6,
-	0x2a700, 0x2a700,
-	0x2b734, 0x2b734,
+	0x20000, 0x2a6d6,
+	0x2a700, 0x2b734,
 	0x2f800, 0x2fa1d,
 	0xe0100, 0xe01ef,
 }; /* CR_Graph */
@@ -1694,7 +1684,7 @@
 
 /* 'Print': [[:Print:]] */
 static const OnigCodePoint CR_Print[] = {
-	494,
+	489,
 	0x0009, 0x000d,
 	0x0020, 0x007e,
 	0x0085, 0x0085,
@@ -2017,10 +2007,8 @@
 	0x31c0, 0x31e3,
 	0x31f0, 0x321e,
 	0x3220, 0x32fe,
-	0x3300, 0x3400,
-	0x4db5, 0x4db5,
-	0x4dc0, 0x4e00,
-	0x9fcb, 0x9fcb,
+	0x3300, 0x4db5,
+	0x4dc0, 0x9fcb,
 	0xa000, 0xa48c,
 	0xa490, 0xa4c6,
 	0xa4d0, 0xa62b,
@@ -2048,8 +2036,7 @@
 	0xaadb, 0xaadf,
 	0xabc0, 0xabed,
 	0xabf0, 0xabf9,
-	0xac00, 0xac00,
-	0xd7a3, 0xd7a3,
+	0xac00, 0xd7a3,
 	0xd7b0, 0xd7c6,
 	0xd7cb, 0xd7fb,
 	0xf900, 0xfa2d,
@@ -2183,10 +2170,8 @@
 	0x1f200, 0x1f200,
 	0x1f210, 0x1f231,
 	0x1f240, 0x1f248,
-	0x20000, 0x20000,
-	0x2a6d6, 0x2a6d6,
-	0x2a700, 0x2a700,
-	0x2b734, 0x2b734,
+	0x20000, 0x2a6d6,
+	0x2a700, 0x2b734,
 	0x2f800, 0x2fa1d,
 	0xe0100, 0xe01ef,
 }; /* CR_Print */
@@ -2950,7 +2935,7 @@
 
 /* 'Word': [[:Word:]] */
 static const OnigCodePoint CR_Word[] = {
-	508,
+	503,
 	0x0030, 0x0039,
 	0x0041, 0x005a,
 	0x005f, 0x005f,
@@ -3302,10 +3287,8 @@
 	0x3131, 0x318e,
 	0x31a0, 0x31b7,
 	0x31f0, 0x31ff,
-	0x3400, 0x3400,
-	0x4db5, 0x4db5,
-	0x4e00, 0x4e00,
-	0x9fcb, 0x9fcb,
+	0x3400, 0x4db5,
+	0x4e00, 0x9fcb,
 	0xa000, 0xa48c,
 	0xa4d0, 0xa4fd,
 	0xa500, 0xa60c,
@@ -3340,8 +3323,7 @@
 	0xabc0, 0xabea,
 	0xabec, 0xabed,
 	0xabf0, 0xabf9,
-	0xac00, 0xac00,
-	0xd7a3, 0xd7a3,
+	0xac00, 0xd7a3,
 	0xd7b0, 0xd7c6,
 	0xd7cb, 0xd7fb,
 	0xf900, 0xfa2d,
@@ -3453,17 +3435,15 @@
 	0x1d7aa, 0x1d7c2,
 	0x1d7c4, 0x1d7cb,
 	0x1d7ce, 0x1d7ff,
-	0x20000, 0x20000,
-	0x2a6d6, 0x2a6d6,
-	0x2a700, 0x2a700,
-	0x2b734, 0x2b734,
+	0x20000, 0x2a6d6,
+	0x2a700, 0x2b734,
 	0x2f800, 0x2fa1d,
 	0xe0100, 0xe01ef,
 }; /* CR_Word */
 
 /* 'Alnum': [[:Alnum:]] */
 static const OnigCodePoint CR_Alnum[] = {
-	502,
+	497,
 	0x0030, 0x0039,
 	0x0041, 0x005a,
 	0x0061, 0x007a,
@@ -3812,10 +3792,8 @@
 	0x3131, 0x318e,
 	0x31a0, 0x31b7,
 	0x31f0, 0x31ff,
-	0x3400, 0x3400,
-	0x4db5, 0x4db5,
-	0x4e00, 0x4e00,
-	0x9fcb, 0x9fcb,
+	0x3400, 0x4db5,
+	0x4e00, 0x9fcb,
 	0xa000, 0xa48c,
 	0xa4d0, 0xa4fd,
 	0xa500, 0xa60c,
@@ -3850,8 +3828,7 @@
 	0xabc0, 0xabea,
 	0xabec, 0xabed,
 	0xabf0, 0xabf9,
-	0xac00, 0xac00,
-	0xd7a3, 0xd7a3,
+	0xac00, 0xd7a3,
 	0xd7b0, 0xd7c6,
 	0xd7cb, 0xd7fb,
 	0xf900, 0xfa2d,
@@ -3960,10 +3937,8 @@
 	0x1d7aa, 0x1d7c2,
 	0x1d7c4, 0x1d7cb,
 	0x1d7ce, 0x1d7ff,
-	0x20000, 0x20000,
-	0x2a6d6, 0x2a6d6,
-	0x2a700, 0x2a700,
-	0x2b734, 0x2b734,
+	0x20000, 0x2a6d6,
+	0x2a700, 0x2b734,
 	0x2f800, 0x2fa1d,
 	0xe0100, 0xe01ef,
 }; /* CR_Alnum */
@@ -3984,7 +3959,7 @@
 
 /* 'Assigned': - */
 static const OnigCodePoint CR_Assigned[] = {
-	495,
+	484,
 	0x0000, 0x0377,
 	0x037a, 0x037e,
 	0x0384, 0x038a,
@@ -4300,10 +4275,8 @@
 	0x31c0, 0x31e3,
 	0x31f0, 0x321e,
 	0x3220, 0x32fe,
-	0x3300, 0x3400,
-	0x4db5, 0x4db5,
-	0x4dc0, 0x4e00,
-	0x9fcb, 0x9fcb,
+	0x3300, 0x4db5,
+	0x4dc0, 0x9fcb,
 	0xa000, 0xa48c,
 	0xa490, 0xa4c6,
 	0xa4d0, 0xa62b,
@@ -4331,15 +4304,10 @@
 	0xaadb, 0xaadf,
 	0xabc0, 0xabed,
 	0xabf0, 0xabf9,
-	0xac00, 0xac00,
-	0xd7a3, 0xd7a3,
+	0xac00, 0xd7a3,
 	0xd7b0, 0xd7c6,
 	0xd7cb, 0xd7fb,
-	0xd800, 0xd800,
-	0xdb7f, 0xdb80,
-	0xdbff, 0xdc00,
-	0xdfff, 0xe000,
-	0xf8ff, 0xfa2d,
+	0xd800, 0xfa2d,
 	0xfa30, 0xfa6d,
 	0xfa70, 0xfad9,
 	0xfb00, 0xfb06,
@@ -4469,22 +4437,18 @@
 	0x1f200, 0x1f200,
 	0x1f210, 0x1f231,
 	0x1f240, 0x1f248,
-	0x20000, 0x20000,
-	0x2a6d6, 0x2a6d6,
-	0x2a700, 0x2a700,
-	0x2b734, 0x2b734,
+	0x20000, 0x2a6d6,
+	0x2a700, 0x2b734,
 	0x2f800, 0x2fa1d,
 	0xe0001, 0xe0001,
 	0xe0020, 0xe007f,
 	0xe0100, 0xe01ef,
-	0xf0000, 0xf0000,
-	0xffffd, 0xffffd,
-	0x100000, 0x100000,
+	0xf0000, 0xffffd,
 }; /* CR_Assigned */
 
 /* 'C': Major Category */
 static const OnigCodePoint CR_C[] = {
-	26,
+	20,
 	0x0000, 0x001f,
 	0x007f, 0x009f,
 	0x00ad, 0x00ad,
@@ -4496,21 +4460,15 @@
 	0x202a, 0x202e,
 	0x2060, 0x2064,
 	0x206a, 0x206f,
-	0xd800, 0xd800,
-	0xdb7f, 0xdb80,
-	0xdbff, 0xdc00,
-	0xdfff, 0xe000,
-	0xf8ff, 0xf8ff,
+	0xd800, 0xf8ff,
 	0xfeff, 0xfeff,
 	0xfff9, 0xfffb,
 	0x110bd, 0x110bd,
 	0x1d173, 0x1d17a,
 	0xe0001, 0xe0001,
 	0xe0020, 0xe007f,
-	0xf0000, 0xf0000,
-	0xffffd, 0xffffd,
-	0x100000, 0x100000,
-	0x10fffd, 0x10ffff,
+	0xf0000, 0xffffd,
+	0x100000, 0x10ffff,
 }; /* CR_C */
 
 /* 'Cc': General Category */
@@ -4542,7 +4500,7 @@
 
 /* 'Cn': General Category */
 static const OnigCodePoint CR_Cn[] = {
-	495,
+	484,
 	0x0378, 0x0379,
 	0x037f, 0x0383,
 	0x038b, 0x038b,
@@ -4858,9 +4816,7 @@
 	0x31e4, 0x31ef,
 	0x321f, 0x321f,
 	0x32ff, 0x32ff,
-	0x3401, 0x4db4,
 	0x4db6, 0x4dbf,
-	0x4e01, 0x9fca,
 	0x9fcc, 0x9fff,
 	0xa48d, 0xa48f,
 	0xa4c7, 0xa4cf,
@@ -4889,14 +4845,9 @@
 	0xaae0, 0xabbf,
 	0xabee, 0xabef,
 	0xabfa, 0xabff,
-	0xac01, 0xd7a2,
 	0xd7a4, 0xd7af,
 	0xd7c7, 0xd7ca,
 	0xd7fc, 0xd7ff,
-	0xd801, 0xdb7e,
-	0xdb81, 0xdbfe,
-	0xdc01, 0xdffe,
-	0xe001, 0xf8fe,
 	0xfa2e, 0xfa2f,
 	0xfa6e, 0xfa6f,
 	0xfada, 0xfaff,
@@ -5027,42 +4978,32 @@
 	0x1f201, 0x1f20f,
 	0x1f232, 0x1f23f,
 	0x1f249, 0x1ffff,
-	0x20001, 0x2a6d5,
 	0x2a6d7, 0x2a6ff,
-	0x2a701, 0x2b733,
 	0x2b735, 0x2f7ff,
 	0x2fa1e, 0xe0000,
 	0xe0002, 0xe001f,
 	0xe0080, 0xe00ff,
 	0xe01f0, 0xeffff,
-	0xf0001, 0xffffc,
-	0xffffe, 0xfffff,
-	0x100001, 0x10ffff,
+	0xffffe, 0x10ffff,
 }; /* CR_Cn */
 
 /* 'Co': General Category */
 static const OnigCodePoint CR_Co[] = {
-	6,
-	0xe000, 0xe000,
-	0xf8ff, 0xf8ff,
-	0xf0000, 0xf0000,
-	0xffffd, 0xffffd,
-	0x100000, 0x100000,
-	0x10fffd, 0x10fffd,
+	3,
+	0xe000, 0xf8ff,
+	0xf0000, 0xffffd,
+	0x100000, 0x10fffd,
 }; /* CR_Co */
 
 /* 'Cs': General Category */
 static const OnigCodePoint CR_Cs[] = {
-	4,
-	0xd800, 0xd800,
-	0xdb7f, 0xdb80,
-	0xdbff, 0xdc00,
-	0xdfff, 0xdfff,
+	1,
+	0xd800, 0xdfff,
 }; /* CR_Cs */
 
 /* 'L': Major Category */
 static const OnigCodePoint CR_L[] = {
-	427,
+	422,
 	0x0041, 0x005a,
 	0x0061, 0x007a,
 	0x00aa, 0x00aa,
@@ -5347,10 +5288,8 @@
 	0x3131, 0x318e,
 	0x31a0, 0x31b7,
 	0x31f0, 0x31ff,
-	0x3400, 0x3400,
-	0x4db5, 0x4db5,
-	0x4e00, 0x4e00,
-	0x9fcb, 0x9fcb,
+	0x3400, 0x4db5,
+	0x4e00, 0x9fcb,
 	0xa000, 0xa48c,
 	0xa4d0, 0xa4fd,
 	0xa500, 0xa60c,
@@ -5389,8 +5328,7 @@
 	0xaac2, 0xaac2,
 	0xaadb, 0xaadd,
 	0xabc0, 0xabe2,
-	0xac00, 0xac00,
-	0xd7a3, 0xd7a3,
+	0xac00, 0xd7a3,
 	0xd7b0, 0xd7c6,
 	0xd7cb, 0xd7fb,
 	0xf900, 0xfa2d,
@@ -5485,10 +5423,8 @@
 	0x1d78a, 0x1d7a8,
 	0x1d7aa, 0x1d7c2,
 	0x1d7c4, 0x1d7cb,
-	0x20000, 0x20000,
-	0x2a6d6, 0x2a6d6,
-	0x2a700, 0x2a700,
-	0x2b734, 0x2b734,
+	0x20000, 0x2a6d6,
+	0x2a700, 0x2b734,
 	0x2f800, 0x2fa1d,
 }; /* CR_L */
 
@@ -6152,7 +6088,7 @@
 
 /* 'Lo': General Category */
 static const OnigCodePoint CR_Lo[] = {
-	316,
+	311,
 	0x01bb, 0x01bb,
 	0x01c0, 0x01c3,
 	0x0294, 0x0294,
@@ -6362,10 +6298,8 @@
 	0x3131, 0x318e,
 	0x31a0, 0x31b7,
 	0x31f0, 0x31ff,
-	0x3400, 0x3400,
-	0x4db5, 0x4db5,
-	0x4e00, 0x4e00,
-	0x9fcb, 0x9fcb,
+	0x3400, 0x4db5,
+	0x4e00, 0x9fcb,
 	0xa000, 0xa014,
 	0xa016, 0xa48c,
 	0xa4d0, 0xa4f7,
@@ -6400,8 +6334,7 @@
 	0xaac2, 0xaac2,
 	0xaadb, 0xaadc,
 	0xabc0, 0xabe2,
-	0xac00, 0xac00,
-	0xd7a3, 0xd7a3,
+	0xac00, 0xd7a3,
 	0xd7b0, 0xd7c6,
 	0xd7cb, 0xd7fb,
 	0xf900, 0xfa2d,
@@ -6464,10 +6397,8 @@
 	0x11083, 0x110af,
 	0x12000, 0x1236e,
 	0x13000, 0x1342e,
-	0x20000, 0x20000,
-	0x2a6d6, 0x2a6d6,
-	0x2a700, 0x2a700,
-	0x2b734, 0x2b734,
+	0x20000, 0x2a6d6,
+	0x2a700, 0x2b734,
 	0x2f800, 0x2fa1d,
 }; /* CR_Lo */
 
Index: enc/unicode/name2ctype.h.blt
===================================================================
--- enc/unicode/name2ctype.h.blt	(revision 25259)
+++ enc/unicode/name2ctype.h.blt	(revision 25260)
@@ -43,7 +43,7 @@
 
 /* 'Alpha': [[:Alpha:]] */
 static const OnigCodePoint CR_Alpha[] = {
-	482,
+	477,
 	0x0041, 0x005a,
 	0x0061, 0x007a,
 	0x00aa, 0x00aa,
@@ -377,10 +377,8 @@
 	0x3131, 0x318e,
 	0x31a0, 0x31b7,
 	0x31f0, 0x31ff,
-	0x3400, 0x3400,
-	0x4db5, 0x4db5,
-	0x4e00, 0x4e00,
-	0x9fcb, 0x9fcb,
+	0x3400, 0x4db5,
+	0x4e00, 0x9fcb,
 	0xa000, 0xa48c,
 	0xa4d0, 0xa4fd,
 	0xa500, 0xa60c,
@@ -413,8 +411,7 @@
 	0xaadb, 0xaadd,
 	0xabc0, 0xabea,
 	0xabec, 0xabed,
-	0xac00, 0xac00,
-	0xd7a3, 0xd7a3,
+	0xac00, 0xd7a3,
 	0xd7b0, 0xd7c6,
 	0xd7cb, 0xd7fb,
 	0xf900, 0xfa2d,
@@ -520,10 +517,8 @@
 	0x1d78a, 0x1d7a8,
 	0x1d7aa, 0x1d7c2,
 	0x1d7c4, 0x1d7cb,
-	0x20000, 0x20000,
-	0x2a6d6, 0x2a6d6,
-	0x2a700, 0x2a700,
-	0x2b734, 0x2b734,
+	0x20000, 0x2a6d6,
+	0x2a700, 0x2b734,
 	0x2f800, 0x2fa1d,
 	0xe0100, 0xe01ef,
 }; /* CR_Alpha */
@@ -593,7 +588,7 @@
 
 /* 'Graph': [[:Graph:]] */
 static const OnigCodePoint CR_Graph[] = {
-	492,
+	487,
 	0x0021, 0x007e,
 	0x00a1, 0x00ac,
 	0x00ae, 0x0377,
@@ -914,10 +909,8 @@
 	0x31c0, 0x31e3,
 	0x31f0, 0x321e,
 	0x3220, 0x32fe,
-	0x3300, 0x3400,
-	0x4db5, 0x4db5,
-	0x4dc0, 0x4e00,
-	0x9fcb, 0x9fcb,
+	0x3300, 0x4db5,
+	0x4dc0, 0x9fcb,
 	0xa000, 0xa48c,
 	0xa490, 0xa4c6,
 	0xa4d0, 0xa62b,
@@ -945,8 +938,7 @@
 	0xaadb, 0xaadf,
 	0xabc0, 0xabed,
 	0xabf0, 0xabf9,
-	0xac00, 0xac00,
-	0xd7a3, 0xd7a3,
+	0xac00, 0xd7a3,
 	0xd7b0, 0xd7c6,
 	0xd7cb, 0xd7fb,
 	0xf900, 0xfa2d,
@@ -1080,10 +1072,8 @@
 	0x1f200, 0x1f200,
 	0x1f210, 0x1f231,
 	0x1f240, 0x1f248,
-	0x20000, 0x20000,
-	0x2a6d6, 0x2a6d6,
-	0x2a700, 0x2a700,
-	0x2b734, 0x2b734,
+	0x20000, 0x2a6d6,
+	0x2a700, 0x2b734,
 	0x2f800, 0x2fa1d,
 	0xe0100, 0xe01ef,
 }; /* CR_Graph */
@@ -1694,7 +1684,7 @@
 
 /* 'Print': [[:Print:]] */
 static const OnigCodePoint CR_Print[] = {
-	494,
+	489,
 	0x0009, 0x000d,
 	0x0020, 0x007e,
 	0x0085, 0x0085,
@@ -2017,10 +2007,8 @@
 	0x31c0, 0x31e3,
 	0x31f0, 0x321e,
 	0x3220, 0x32fe,
-	0x3300, 0x3400,
-	0x4db5, 0x4db5,
-	0x4dc0, 0x4e00,
-	0x9fcb, 0x9fcb,
+	0x3300, 0x4db5,
+	0x4dc0, 0x9fcb,
 	0xa000, 0xa48c,
 	0xa490, 0xa4c6,
 	0xa4d0, 0xa62b,
@@ -2048,8 +2036,7 @@
 	0xaadb, 0xaadf,
 	0xabc0, 0xabed,
 	0xabf0, 0xabf9,
-	0xac00, 0xac00,
-	0xd7a3, 0xd7a3,
+	0xac00, 0xd7a3,
 	0xd7b0, 0xd7c6,
 	0xd7cb, 0xd7fb,
 	0xf900, 0xfa2d,
@@ -2183,10 +2170,8 @@
 	0x1f200, 0x1f200,
 	0x1f210, 0x1f231,
 	0x1f240, 0x1f248,
-	0x20000, 0x20000,
-	0x2a6d6, 0x2a6d6,
-	0x2a700, 0x2a700,
-	0x2b734, 0x2b734,
+	0x20000, 0x2a6d6,
+	0x2a700, 0x2b734,
 	0x2f800, 0x2fa1d,
 	0xe0100, 0xe01ef,
 }; /* CR_Print */
@@ -2950,7 +2935,7 @@
 
 /* 'Word': [[:Word:]] */
 static const OnigCodePoint CR_Word[] = {
-	508,
+	503,
 	0x0030, 0x0039,
 	0x0041, 0x005a,
 	0x005f, 0x005f,
@@ -3302,10 +3287,8 @@
 	0x3131, 0x318e,
 	0x31a0, 0x31b7,
 	0x31f0, 0x31ff,
-	0x3400, 0x3400,
-	0x4db5, 0x4db5,
-	0x4e00, 0x4e00,
-	0x9fcb, 0x9fcb,
+	0x3400, 0x4db5,
+	0x4e00, 0x9fcb,
 	0xa000, 0xa48c,
 	0xa4d0, 0xa4fd,
 	0xa500, 0xa60c,
@@ -3340,8 +3323,7 @@
 	0xabc0, 0xabea,
 	0xabec, 0xabed,
 	0xabf0, 0xabf9,
-	0xac00, 0xac00,
-	0xd7a3, 0xd7a3,
+	0xac00, 0xd7a3,
 	0xd7b0, 0xd7c6,
 	0xd7cb, 0xd7fb,
 	0xf900, 0xfa2d,
@@ -3453,17 +3435,15 @@
 	0x1d7aa, 0x1d7c2,
 	0x1d7c4, 0x1d7cb,
 	0x1d7ce, 0x1d7ff,
-	0x20000, 0x20000,
-	0x2a6d6, 0x2a6d6,
-	0x2a700, 0x2a700,
-	0x2b734, 0x2b734,
+	0x20000, 0x2a6d6,
+	0x2a700, 0x2b734,
 	0x2f800, 0x2fa1d,
 	0xe0100, 0xe01ef,
 }; /* CR_Word */
 
 /* 'Alnum': [[:Alnum:]] */
 static const OnigCodePoint CR_Alnum[] = {
-	502,
+	497,
 	0x0030, 0x0039,
 	0x0041, 0x005a,
 	0x0061, 0x007a,
@@ -3812,10 +3792,8 @@
 	0x3131, 0x318e,
 	0x31a0, 0x31b7,
 	0x31f0, 0x31ff,
-	0x3400, 0x3400,
-	0x4db5, 0x4db5,
-	0x4e00, 0x4e00,
-	0x9fcb, 0x9fcb,
+	0x3400, 0x4db5,
+	0x4e00, 0x9fcb,
 	0xa000, 0xa48c,
 	0xa4d0, 0xa4fd,
 	0xa500, 0xa60c,
@@ -3850,8 +3828,7 @@
 	0xabc0, 0xabea,
 	0xabec, 0xabed,
 	0xabf0, 0xabf9,
-	0xac00, 0xac00,
-	0xd7a3, 0xd7a3,
+	0xac00, 0xd7a3,
 	0xd7b0, 0xd7c6,
 	0xd7cb, 0xd7fb,
 	0xf900, 0xfa2d,
@@ -3960,10 +3937,8 @@
 	0x1d7aa, 0x1d7c2,
 	0x1d7c4, 0x1d7cb,
 	0x1d7ce, 0x1d7ff,
-	0x20000, 0x20000,
-	0x2a6d6, 0x2a6d6,
-	0x2a700, 0x2a700,
-	0x2b734, 0x2b734,
+	0x20000, 0x2a6d6,
+	0x2a700, 0x2b734,
 	0x2f800, 0x2fa1d,
 	0xe0100, 0xe01ef,
 }; /* CR_Alnum */
@@ -3984,7 +3959,7 @@
 
 /* 'Assigned': - */
 static const OnigCodePoint CR_Assigned[] = {
-	495,
+	484,
 	0x0000, 0x0377,
 	0x037a, 0x037e,
 	0x0384, 0x038a,
@@ -4300,10 +4275,8 @@
 	0x31c0, 0x31e3,
 	0x31f0, 0x321e,
 	0x3220, 0x32fe,
-	0x3300, 0x3400,
-	0x4db5, 0x4db5,
-	0x4dc0, 0x4e00,
-	0x9fcb, 0x9fcb,
+	0x3300, 0x4db5,
+	0x4dc0, 0x9fcb,
 	0xa000, 0xa48c,
 	0xa490, 0xa4c6,
 	0xa4d0, 0xa62b,
@@ -4331,15 +4304,10 @@
 	0xaadb, 0xaadf,
 	0xabc0, 0xabed,
 	0xabf0, 0xabf9,
-	0xac00, 0xac00,
-	0xd7a3, 0xd7a3,
+	0xac00, 0xd7a3,
 	0xd7b0, 0xd7c6,
 	0xd7cb, 0xd7fb,
-	0xd800, 0xd800,
-	0xdb7f, 0xdb80,
-	0xdbff, 0xdc00,
-	0xdfff, 0xe000,
-	0xf8ff, 0xfa2d,
+	0xd800, 0xfa2d,
 	0xfa30, 0xfa6d,
 	0xfa70, 0xfad9,
 	0xfb00, 0xfb06,
@@ -4469,22 +4437,18 @@
 	0x1f200, 0x1f200,
 	0x1f210, 0x1f231,
 	0x1f240, 0x1f248,
-	0x20000, 0x20000,
-	0x2a6d6, 0x2a6d6,
-	0x2a700, 0x2a700,
-	0x2b734, 0x2b734,
+	0x20000, 0x2a6d6,
+	0x2a700, 0x2b734,
 	0x2f800, 0x2fa1d,
 	0xe0001, 0xe0001,
 	0xe0020, 0xe007f,
 	0xe0100, 0xe01ef,
-	0xf0000, 0xf0000,
-	0xffffd, 0xffffd,
-	0x100000, 0x100000,
+	0xf0000, 0xffffd,
 }; /* CR_Assigned */
 
 /* 'C': Major Category */
 static const OnigCodePoint CR_C[] = {
-	26,
+	20,
 	0x0000, 0x001f,
 	0x007f, 0x009f,
 	0x00ad, 0x00ad,
@@ -4496,21 +4460,15 @@
 	0x202a, 0x202e,
 	0x2060, 0x2064,
 	0x206a, 0x206f,
-	0xd800, 0xd800,
-	0xdb7f, 0xdb80,
-	0xdbff, 0xdc00,
-	0xdfff, 0xe000,
-	0xf8ff, 0xf8ff,
+	0xd800, 0xf8ff,
 	0xfeff, 0xfeff,
 	0xfff9, 0xfffb,
 	0x110bd, 0x110bd,
 	0x1d173, 0x1d17a,
 	0xe0001, 0xe0001,
 	0xe0020, 0xe007f,
-	0xf0000, 0xf0000,
-	0xffffd, 0xffffd,
-	0x100000, 0x100000,
-	0x10fffd, 0x10ffff,
+	0xf0000, 0xffffd,
+	0x100000, 0x10ffff,
 }; /* CR_C */
 
 /* 'Cc': General Category */
@@ -4542,7 +4500,7 @@
 
 /* 'Cn': General Category */
 static const OnigCodePoint CR_Cn[] = {
-	495,
+	484,
 	0x0378, 0x0379,
 	0x037f, 0x0383,
 	0x038b, 0x038b,
@@ -4858,9 +4816,7 @@
 	0x31e4, 0x31ef,
 	0x321f, 0x321f,
 	0x32ff, 0x32ff,
-	0x3401, 0x4db4,
 	0x4db6, 0x4dbf,
-	0x4e01, 0x9fca,
 	0x9fcc, 0x9fff,
 	0xa48d, 0xa48f,
 	0xa4c7, 0xa4cf,
@@ -4889,14 +4845,9 @@
 	0xaae0, 0xabbf,
 	0xabee, 0xabef,
 	0xabfa, 0xabff,
-	0xac01, 0xd7a2,
 	0xd7a4, 0xd7af,
 	0xd7c7, 0xd7ca,
 	0xd7fc, 0xd7ff,
-	0xd801, 0xdb7e,
-	0xdb81, 0xdbfe,
-	0xdc01, 0xdffe,
-	0xe001, 0xf8fe,
 	0xfa2e, 0xfa2f,
 	0xfa6e, 0xfa6f,
 	0xfada, 0xfaff,
@@ -5027,42 +4978,32 @@
 	0x1f201, 0x1f20f,
 	0x1f232, 0x1f23f,
 	0x1f249, 0x1ffff,
-	0x20001, 0x2a6d5,
 	0x2a6d7, 0x2a6ff,
-	0x2a701, 0x2b733,
 	0x2b735, 0x2f7ff,
 	0x2fa1e, 0xe0000,
 	0xe0002, 0xe001f,
 	0xe0080, 0xe00ff,
 	0xe01f0, 0xeffff,
-	0xf0001, 0xffffc,
-	0xffffe, 0xfffff,
-	0x100001, 0x10ffff,
+	0xffffe, 0x10ffff,
 }; /* CR_Cn */
 
 /* 'Co': General Category */
 static const OnigCodePoint CR_Co[] = {
-	6,
-	0xe000, 0xe000,
-	0xf8ff, 0xf8ff,
-	0xf0000, 0xf0000,
-	0xffffd, 0xffffd,
-	0x100000, 0x100000,
-	0x10fffd, 0x10fffd,
+	3,
+	0xe000, 0xf8ff,
+	0xf0000, 0xffffd,
+	0x100000, 0x10fffd,
 }; /* CR_Co */
 
 /* 'Cs': General Category */
 static const OnigCodePoint CR_Cs[] = {
-	4,
-	0xd800, 0xd800,
-	0xdb7f, 0xdb80,
-	0xdbff, 0xdc00,
-	0xdfff, 0xdfff,
+	1,
+	0xd800, 0xdfff,
 }; /* CR_Cs */
 
 /* 'L': Major Category */
 static const OnigCodePoint CR_L[] = {
-	427,
+	422,
 	0x0041, 0x005a,
 	0x0061, 0x007a,
 	0x00aa, 0x00aa,
@@ -5347,10 +5288,8 @@
 	0x3131, 0x318e,
 	0x31a0, 0x31b7,
 	0x31f0, 0x31ff,
-	0x3400, 0x3400,
-	0x4db5, 0x4db5,
-	0x4e00, 0x4e00,
-	0x9fcb, 0x9fcb,
+	0x3400, 0x4db5,
+	0x4e00, 0x9fcb,
 	0xa000, 0xa48c,
 	0xa4d0, 0xa4fd,
 	0xa500, 0xa60c,
@@ -5389,8 +5328,7 @@
 	0xaac2, 0xaac2,
 	0xaadb, 0xaadd,
 	0xabc0, 0xabe2,
-	0xac00, 0xac00,
-	0xd7a3, 0xd7a3,
+	0xac00, 0xd7a3,
 	0xd7b0, 0xd7c6,
 	0xd7cb, 0xd7fb,
 	0xf900, 0xfa2d,
@@ -5485,10 +5423,8 @@
 	0x1d78a, 0x1d7a8,
 	0x1d7aa, 0x1d7c2,
 	0x1d7c4, 0x1d7cb,
-	0x20000, 0x20000,
-	0x2a6d6, 0x2a6d6,
-	0x2a700, 0x2a700,
-	0x2b734, 0x2b734,
+	0x20000, 0x2a6d6,
+	0x2a700, 0x2b734,
 	0x2f800, 0x2fa1d,
 }; /* CR_L */
 
@@ -6152,7 +6088,7 @@
 
 /* 'Lo': General Category */
 static const OnigCodePoint CR_Lo[] = {
-	316,
+	311,
 	0x01bb, 0x01bb,
 	0x01c0, 0x01c3,
 	0x0294, 0x0294,
@@ -6362,10 +6298,8 @@
 	0x3131, 0x318e,
 	0x31a0, 0x31b7,
 	0x31f0, 0x31ff,
-	0x3400, 0x3400,
-	0x4db5, 0x4db5,
-	0x4e00, 0x4e00,
-	0x9fcb, 0x9fcb,
+	0x3400, 0x4db5,
+	0x4e00, 0x9fcb,
 	0xa000, 0xa014,
 	0xa016, 0xa48c,
 	0xa4d0, 0xa4f7,
@@ -6400,8 +6334,7 @@
 	0xaac2, 0xaac2,
 	0xaadb, 0xaadc,
 	0xabc0, 0xabe2,
-	0xac00, 0xac00,
-	0xd7a3, 0xd7a3,
+	0xac00, 0xd7a3,
 	0xd7b0, 0xd7c6,
 	0xd7cb, 0xd7fb,
 	0xf900, 0xfa2d,
@@ -6464,10 +6397,8 @@
 	0x11083, 0x110af,
 	0x12000, 0x1236e,
 	0x13000, 0x1342e,
-	0x20000, 0x20000,
-	0x2a6d6, 0x2a6d6,
-	0x2a700, 0x2a700,
-	0x2b734, 0x2b734,
+	0x20000, 0x2a6d6,
+	0x2a700, 0x2b734,
 	0x2f800, 0x2fa1d,
 }; /* CR_Lo */
 
Index: enc/unicode/name2ctype.src
===================================================================
--- enc/unicode/name2ctype.src	(revision 25259)
+++ enc/unicode/name2ctype.src	(revision 25260)
@@ -8,7 +8,7 @@
 
 /* 'Alpha': [[:Alpha:]] */
 static const OnigCodePoint CR_Alpha[] = {
-	482,
+	477,
 	0x0041, 0x005a,
 	0x0061, 0x007a,
 	0x00aa, 0x00aa,
@@ -342,10 +342,8 @@
 	0x3131, 0x318e,
 	0x31a0, 0x31b7,
 	0x31f0, 0x31ff,
-	0x3400, 0x3400,
-	0x4db5, 0x4db5,
-	0x4e00, 0x4e00,
-	0x9fcb, 0x9fcb,
+	0x3400, 0x4db5,
+	0x4e00, 0x9fcb,
 	0xa000, 0xa48c,
 	0xa4d0, 0xa4fd,
 	0xa500, 0xa60c,
@@ -378,8 +376,7 @@
 	0xaadb, 0xaadd,
 	0xabc0, 0xabea,
 	0xabec, 0xabed,
-	0xac00, 0xac00,
-	0xd7a3, 0xd7a3,
+	0xac00, 0xd7a3,
 	0xd7b0, 0xd7c6,
 	0xd7cb, 0xd7fb,
 	0xf900, 0xfa2d,
@@ -485,10 +482,8 @@
 	0x1d78a, 0x1d7a8,
 	0x1d7aa, 0x1d7c2,
 	0x1d7c4, 0x1d7cb,
-	0x20000, 0x20000,
-	0x2a6d6, 0x2a6d6,
-	0x2a700, 0x2a700,
-	0x2b734, 0x2b734,
+	0x20000, 0x2a6d6,
+	0x2a700, 0x2b734,
 	0x2f800, 0x2fa1d,
 	0xe0100, 0xe01ef,
 }; /* CR_Alpha */
@@ -558,7 +553,7 @@
 
 /* 'Graph': [[:Graph:]] */
 static const OnigCodePoint CR_Graph[] = {
-	492,
+	487,
 	0x0021, 0x007e,
 	0x00a1, 0x00ac,
 	0x00ae, 0x0377,
@@ -879,10 +874,8 @@
 	0x31c0, 0x31e3,
 	0x31f0, 0x321e,
 	0x3220, 0x32fe,
-	0x3300, 0x3400,
-	0x4db5, 0x4db5,
-	0x4dc0, 0x4e00,
-	0x9fcb, 0x9fcb,
+	0x3300, 0x4db5,
+	0x4dc0, 0x9fcb,
 	0xa000, 0xa48c,
 	0xa490, 0xa4c6,
 	0xa4d0, 0xa62b,
@@ -910,8 +903,7 @@
 	0xaadb, 0xaadf,
 	0xabc0, 0xabed,
 	0xabf0, 0xabf9,
-	0xac00, 0xac00,
-	0xd7a3, 0xd7a3,
+	0xac00, 0xd7a3,
 	0xd7b0, 0xd7c6,
 	0xd7cb, 0xd7fb,
 	0xf900, 0xfa2d,
@@ -1045,10 +1037,8 @@
 	0x1f200, 0x1f200,
 	0x1f210, 0x1f231,
 	0x1f240, 0x1f248,
-	0x20000, 0x20000,
-	0x2a6d6, 0x2a6d6,
-	0x2a700, 0x2a700,
-	0x2b734, 0x2b734,
+	0x20000, 0x2a6d6,
+	0x2a700, 0x2b734,
 	0x2f800, 0x2fa1d,
 	0xe0100, 0xe01ef,
 }; /* CR_Graph */
@@ -1659,7 +1649,7 @@
 
 /* 'Print': [[:Print:]] */
 static const OnigCodePoint CR_Print[] = {
-	494,
+	489,
 	0x0009, 0x000d,
 	0x0020, 0x007e,
 	0x0085, 0x0085,
@@ -1982,10 +1972,8 @@
 	0x31c0, 0x31e3,
 	0x31f0, 0x321e,
 	0x3220, 0x32fe,
-	0x3300, 0x3400,
-	0x4db5, 0x4db5,
-	0x4dc0, 0x4e00,
-	0x9fcb, 0x9fcb,
+	0x3300, 0x4db5,
+	0x4dc0, 0x9fcb,
 	0xa000, 0xa48c,
 	0xa490, 0xa4c6,
 	0xa4d0, 0xa62b,
@@ -2013,8 +2001,7 @@
 	0xaadb, 0xaadf,
 	0xabc0, 0xabed,
 	0xabf0, 0xabf9,
-	0xac00, 0xac00,
-	0xd7a3, 0xd7a3,
+	0xac00, 0xd7a3,
 	0xd7b0, 0xd7c6,
 	0xd7cb, 0xd7fb,
 	0xf900, 0xfa2d,
@@ -2148,10 +2135,8 @@
 	0x1f200, 0x1f200,
 	0x1f210, 0x1f231,
 	0x1f240, 0x1f248,
-	0x20000, 0x20000,
-	0x2a6d6, 0x2a6d6,
-	0x2a700, 0x2a700,
-	0x2b734, 0x2b734,
+	0x20000, 0x2a6d6,
+	0x2a700, 0x2b734,
 	0x2f800, 0x2fa1d,
 	0xe0100, 0xe01ef,
 }; /* CR_Print */
@@ -2915,7 +2900,7 @@
 
 /* 'Word': [[:Word:]] */
 static const OnigCodePoint CR_Word[] = {
-	508,
+	503,
 	0x0030, 0x0039,
 	0x0041, 0x005a,
 	0x005f, 0x005f,
@@ -3267,10 +3252,8 @@
 	0x3131, 0x318e,
 	0x31a0, 0x31b7,
 	0x31f0, 0x31ff,
-	0x3400, 0x3400,
-	0x4db5, 0x4db5,
-	0x4e00, 0x4e00,
-	0x9fcb, 0x9fcb,
+	0x3400, 0x4db5,
+	0x4e00, 0x9fcb,
 	0xa000, 0xa48c,
 	0xa4d0, 0xa4fd,
 	0xa500, 0xa60c,
@@ -3305,8 +3288,7 @@
 	0xabc0, 0xabea,
 	0xabec, 0xabed,
 	0xabf0, 0xabf9,
-	0xac00, 0xac00,
-	0xd7a3, 0xd7a3,
+	0xac00, 0xd7a3,
 	0xd7b0, 0xd7c6,
 	0xd7cb, 0xd7fb,
 	0xf900, 0xfa2d,
@@ -3418,17 +3400,15 @@
 	0x1d7aa, 0x1d7c2,
 	0x1d7c4, 0x1d7cb,
 	0x1d7ce, 0x1d7ff,
-	0x20000, 0x20000,
-	0x2a6d6, 0x2a6d6,
-	0x2a700, 0x2a700,
-	0x2b734, 0x2b734,
+	0x20000, 0x2a6d6,
+	0x2a700, 0x2b734,
 	0x2f800, 0x2fa1d,
 	0xe0100, 0xe01ef,
 }; /* CR_Word */
 
 /* 'Alnum': [[:Alnum:]] */
 static const OnigCodePoint CR_Alnum[] = {
-	502,
+	497,
 	0x0030, 0x0039,
 	0x0041, 0x005a,
 	0x0061, 0x007a,
@@ -3777,10 +3757,8 @@
 	0x3131, 0x318e,
 	0x31a0, 0x31b7,
 	0x31f0, 0x31ff,
-	0x3400, 0x3400,
-	0x4db5, 0x4db5,
-	0x4e00, 0x4e00,
-	0x9fcb, 0x9fcb,
+	0x3400, 0x4db5,
+	0x4e00, 0x9fcb,
 	0xa000, 0xa48c,
 	0xa4d0, 0xa4fd,
 	0xa500, 0xa60c,
@@ -3815,8 +3793,7 @@
 	0xabc0, 0xabea,
 	0xabec, 0xabed,
 	0xabf0, 0xabf9,
-	0xac00, 0xac00,
-	0xd7a3, 0xd7a3,
+	0xac00, 0xd7a3,
 	0xd7b0, 0xd7c6,
 	0xd7cb, 0xd7fb,
 	0xf900, 0xfa2d,
@@ -3925,10 +3902,8 @@
 	0x1d7aa, 0x1d7c2,
 	0x1d7c4, 0x1d7cb,
 	0x1d7ce, 0x1d7ff,
-	0x20000, 0x20000,
-	0x2a6d6, 0x2a6d6,
-	0x2a700, 0x2a700,
-	0x2b734, 0x2b734,
+	0x20000, 0x2a6d6,
+	0x2a700, 0x2b734,
 	0x2f800, 0x2fa1d,
 	0xe0100, 0xe01ef,
 }; /* CR_Alnum */
@@ -3948,7 +3923,7 @@
 
 /* 'Assigned': - */
 static const OnigCodePoint CR_Assigned[] = {
-	495,
+	484,
 	0x0000, 0x0377,
 	0x037a, 0x037e,
 	0x0384, 0x038a,
@@ -4264,10 +4239,8 @@
 	0x31c0, 0x31e3,
 	0x31f0, 0x321e,
 	0x3220, 0x32fe,
-	0x3300, 0x3400,
-	0x4db5, 0x4db5,
-	0x4dc0, 0x4e00,
-	0x9fcb, 0x9fcb,
+	0x3300, 0x4db5,
+	0x4dc0, 0x9fcb,
 	0xa000, 0xa48c,
 	0xa490, 0xa4c6,
 	0xa4d0, 0xa62b,
@@ -4295,15 +4268,10 @@
 	0xaadb, 0xaadf,
 	0xabc0, 0xabed,
 	0xabf0, 0xabf9,
-	0xac00, 0xac00,
-	0xd7a3, 0xd7a3,
+	0xac00, 0xd7a3,
 	0xd7b0, 0xd7c6,
 	0xd7cb, 0xd7fb,
-	0xd800, 0xd800,
-	0xdb7f, 0xdb80,
-	0xdbff, 0xdc00,
-	0xdfff, 0xe000,
-	0xf8ff, 0xfa2d,
+	0xd800, 0xfa2d,
 	0xfa30, 0xfa6d,
 	0xfa70, 0xfad9,
 	0xfb00, 0xfb06,
@@ -4433,22 +4401,18 @@
 	0x1f200, 0x1f200,
 	0x1f210, 0x1f231,
 	0x1f240, 0x1f248,
-	0x20000, 0x20000,
-	0x2a6d6, 0x2a6d6,
-	0x2a700, 0x2a700,
-	0x2b734, 0x2b734,
+	0x20000, 0x2a6d6,
+	0x2a700, 0x2b734,
 	0x2f800, 0x2fa1d,
 	0xe0001, 0xe0001,
 	0xe0020, 0xe007f,
 	0xe0100, 0xe01ef,
-	0xf0000, 0xf0000,
-	0xffffd, 0xffffd,
-	0x100000, 0x100000,
+	0xf0000, 0xffffd,
 }; /* CR_Assigned */
 
 /* 'C': Major Category */
 static const OnigCodePoint CR_C[] = {
-	26,
+	20,
 	0x0000, 0x001f,
 	0x007f, 0x009f,
 	0x00ad, 0x00ad,
@@ -4460,21 +4424,15 @@
 	0x202a, 0x202e,
 	0x2060, 0x2064,
 	0x206a, 0x206f,
-	0xd800, 0xd800,
-	0xdb7f, 0xdb80,
-	0xdbff, 0xdc00,
-	0xdfff, 0xe000,
-	0xf8ff, 0xf8ff,
+	0xd800, 0xf8ff,
 	0xfeff, 0xfeff,
 	0xfff9, 0xfffb,
 	0x110bd, 0x110bd,
 	0x1d173, 0x1d17a,
 	0xe0001, 0xe0001,
 	0xe0020, 0xe007f,
-	0xf0000, 0xf0000,
-	0xffffd, 0xffffd,
-	0x100000, 0x100000,
-	0x10fffd, 0x10ffff,
+	0xf0000, 0xffffd,
+	0x100000, 0x10ffff,
 }; /* CR_C */
 
 /* 'Cc': General Category */
@@ -4506,7 +4464,7 @@
 
 /* 'Cn': General Category */
 static const OnigCodePoint CR_Cn[] = {
-	495,
+	484,
 	0x0378, 0x0379,
 	0x037f, 0x0383,
 	0x038b, 0x038b,
@@ -4822,9 +4780,7 @@
 	0x31e4, 0x31ef,
 	0x321f, 0x321f,
 	0x32ff, 0x32ff,
-	0x3401, 0x4db4,
 	0x4db6, 0x4dbf,
-	0x4e01, 0x9fca,
 	0x9fcc, 0x9fff,
 	0xa48d, 0xa48f,
 	0xa4c7, 0xa4cf,
@@ -4853,14 +4809,9 @@
 	0xaae0, 0xabbf,
 	0xabee, 0xabef,
 	0xabfa, 0xabff,
-	0xac01, 0xd7a2,
 	0xd7a4, 0xd7af,
 	0xd7c7, 0xd7ca,
 	0xd7fc, 0xd7ff,
-	0xd801, 0xdb7e,
-	0xdb81, 0xdbfe,
-	0xdc01, 0xdffe,
-	0xe001, 0xf8fe,
 	0xfa2e, 0xfa2f,
 	0xfa6e, 0xfa6f,
 	0xfada, 0xfaff,
@@ -4991,42 +4942,32 @@
 	0x1f201, 0x1f20f,
 	0x1f232, 0x1f23f,
 	0x1f249, 0x1ffff,
-	0x20001, 0x2a6d5,
 	0x2a6d7, 0x2a6ff,
-	0x2a701, 0x2b733,
 	0x2b735, 0x2f7ff,
 	0x2fa1e, 0xe0000,
 	0xe0002, 0xe001f,
 	0xe0080, 0xe00ff,
 	0xe01f0, 0xeffff,
-	0xf0001, 0xffffc,
-	0xffffe, 0xfffff,
-	0x100001, 0x10ffff,
+	0xffffe, 0x10ffff,
 }; /* CR_Cn */
 
 /* 'Co': General Category */
 static const OnigCodePoint CR_Co[] = {
-	6,
-	0xe000, 0xe000,
-	0xf8ff, 0xf8ff,
-	0xf0000, 0xf0000,
-	0xffffd, 0xffffd,
-	0x100000, 0x100000,
-	0x10fffd, 0x10fffd,
+	3,
+	0xe000, 0xf8ff,
+	0xf0000, 0xffffd,
+	0x100000, 0x10fffd,
 }; /* CR_Co */
 
 /* 'Cs': General Category */
 static const OnigCodePoint CR_Cs[] = {
-	4,
-	0xd800, 0xd800,
-	0xdb7f, 0xdb80,
-	0xdbff, 0xdc00,
-	0xdfff, 0xdfff,
+	1,
+	0xd800, 0xdfff,
 }; /* CR_Cs */
 
 /* 'L': Major Category */
 static const OnigCodePoint CR_L[] = {
-	427,
+	422,
 	0x0041, 0x005a,
 	0x0061, 0x007a,
 	0x00aa, 0x00aa,
@@ -5311,10 +5252,8 @@
 	0x3131, 0x318e,
 	0x31a0, 0x31b7,
 	0x31f0, 0x31ff,
-	0x3400, 0x3400,
-	0x4db5, 0x4db5,
-	0x4e00, 0x4e00,
-	0x9fcb, 0x9fcb,
+	0x3400, 0x4db5,
+	0x4e00, 0x9fcb,
 	0xa000, 0xa48c,
 	0xa4d0, 0xa4fd,
 	0xa500, 0xa60c,
@@ -5353,8 +5292,7 @@
 	0xaac2, 0xaac2,
 	0xaadb, 0xaadd,
 	0xabc0, 0xabe2,
-	0xac00, 0xac00,
-	0xd7a3, 0xd7a3,
+	0xac00, 0xd7a3,
 	0xd7b0, 0xd7c6,
 	0xd7cb, 0xd7fb,
 	0xf900, 0xfa2d,
@@ -5449,10 +5387,8 @@
 	0x1d78a, 0x1d7a8,
 	0x1d7aa, 0x1d7c2,
 	0x1d7c4, 0x1d7cb,
-	0x20000, 0x20000,
-	0x2a6d6, 0x2a6d6,
-	0x2a700, 0x2a700,
-	0x2b734, 0x2b734,
+	0x20000, 0x2a6d6,
+	0x2a700, 0x2b734,
 	0x2f800, 0x2fa1d,
 }; /* CR_L */
 
@@ -6116,7 +6052,7 @@
 
 /* 'Lo': General Category */
 static const OnigCodePoint CR_Lo[] = {
-	316,
+	311,
 	0x01bb, 0x01bb,
 	0x01c0, 0x01c3,
 	0x0294, 0x0294,
@@ -6326,10 +6262,8 @@
 	0x3131, 0x318e,
 	0x31a0, 0x31b7,
 	0x31f0, 0x31ff,
-	0x3400, 0x3400,
-	0x4db5, 0x4db5,
-	0x4e00, 0x4e00,
-	0x9fcb, 0x9fcb,
+	0x3400, 0x4db5,
+	0x4e00, 0x9fcb,
 	0xa000, 0xa014,
 	0xa016, 0xa48c,
 	0xa4d0, 0xa4f7,
@@ -6364,8 +6298,7 @@
 	0xaac2, 0xaac2,
 	0xaadb, 0xaadc,
 	0xabc0, 0xabe2,
-	0xac00, 0xac00,
-	0xd7a3, 0xd7a3,
+	0xac00, 0xd7a3,
 	0xd7b0, 0xd7c6,
 	0xd7cb, 0xd7fb,
 	0xf900, 0xfa2d,
@@ -6428,10 +6361,8 @@
 	0x11083, 0x110af,
 	0x12000, 0x1236e,
 	0x13000, 0x1342e,
-	0x20000, 0x20000,
-	0x2a6d6, 0x2a6d6,
-	0x2a700, 0x2a700,
-	0x2b734, 0x2b734,
+	0x20000, 0x2a6d6,
+	0x2a700, 0x2b734,
 	0x2f800, 0x2fa1d,
 }; /* CR_Lo */
 
Index: tool/enc-unicode.rb
===================================================================
--- tool/enc-unicode.rb	(revision 25259)
+++ tool/enc-unicode.rb	(revision 25260)
@@ -40,26 +40,36 @@
 def parse_unicode_data(file)
   last_cp = 0
   data = {'Cn' => []}
+  beg_cp = nil
   IO.foreach(file) do |line|
     fields = line.split(';')
     cp = fields[0].to_i(16)
 
+    case fields[1]
+    when /\A<(.*),\s*First>\z/
+      beg_cp = cp
+      next
+    when /\A<(.*),\s*Last>\z/
+      cps = (beg_cp..cp).to_a
+    else
+      beg_cp = cp
+      cps = [cp]
+    end
+
     # The Cn category represents unassigned characters. These are not listed in
     # UnicodeData.txt so we must derive them by looking for 'holes' in the range
     # of listed codepoints. We increment the last codepoint seen and compare it
     # with the current codepoint. If the current codepoint is less than
     # last_cp.next we have found a hole, so we add the missing codepoint to the
     # Cn category.
-    while ((last_cp = last_cp.next) < cp)
-      data['Cn'] << last_cp
-    end
+    data['Cn'].concat((last_cp.next...beg_cp).to_a)
 
     # The third field denotes the 'General' category, e.g. Lu
-    (data[fields[2]] ||= []) << cp
+    (data[fields[2]] ||= []).concat(cps)
 
     # The 'Major' category is the first letter of the 'General' category, e.g.
     # 'Lu' -> 'L'
-    (data[fields[2][0,1]] ||= []) << cp
+    (data[fields[2][0,1]] ||= []).concat(cps)
     last_cp = cp
   end
 
Index: test/ruby/test_regexp.rb
===================================================================
--- test/ruby/test_regexp.rb	(revision 25259)
+++ test/ruby/test_regexp.rb	(revision 25260)
@@ -745,6 +745,7 @@
     assert_match(/^\u3042{0}\p{Any}$/, "a")
     assert_match(/^\u3042{0}\p{Any}$/, "\u3041")
     assert_match(/^\u3042{0}\p{Any}$/, "\0")
+    assert_match(/^\p{Lo}{4}$/u, "\u3401\u4E01\u{20001}\u{2A701}")
     assert_no_match(/^\u3042{0}\p{Any}$/, "\0\0")
     assert_no_match(/^\u3042{0}\p{Any}$/, "")
     assert_raise(SyntaxError) { eval('/^\u3042{0}\p{' + "\u3042" + '}$/') }

--
ML: ruby-changes@q...
Info: http://www.atdot.net/~ko1/quickml/

[前][次][番号順一覧][スレッド一覧]