[前][次][番号順一覧][スレッド一覧]

ruby-changes:42038

From: duerst <ko1@a...>
Date: Tue, 15 Mar 2016 13:49:29 +0900 (JST)
Subject: [ruby-changes:42038] duerst:r54112 (trunk): * enc/unicode.c: Additional macros and code to use mapping data in

duerst	2016-03-15 13:49:24 +0900 (Tue, 15 Mar 2016)

  New Revision: 54112

  https://svn.ruby-lang.org/cgi-bin/viewvc.cgi?view=revision&revision=54112

  Log:
    * enc/unicode.c: Additional macros and code to use mapping data in
      CaseMappingSpecials array.
      (with Kimihito Matsui)

  Modified files:
    trunk/ChangeLog
    trunk/enc/unicode.c
Index: ChangeLog
===================================================================
--- ChangeLog	(revision 54111)
+++ ChangeLog	(revision 54112)
@@ -1,3 +1,9 @@ https://github.com/ruby/ruby/blob/trunk/ChangeLog#L1
+Tue Mar 15 13:49:23 2016  Martin Duerst  <duerst@i...>
+
+	* enc/unicode.c: Additional macros and code to use mapping data in
+	  CaseMappingSpecials array.
+	  (with Kimihito Matsui)
+
 Tue Mar 15 13:41:22 2016  Nobuyoshi Nakada  <nobu@r...>
 
 	* internal.h (rb_gc_mark_global_tbl): should be private,
Index: enc/unicode.c
===================================================================
--- enc/unicode.c	(revision 54111)
+++ enc/unicode.c	(revision 54112)
@@ -137,15 +137,29 @@ code3_equal(const OnigCodePoint *x, cons https://github.com/ruby/ruby/blob/trunk/enc/unicode.c#L137
   return 1;
 }
 
+/* macros related to ONIGENC_CASE flags */
+/* defined here because not used in other files */
+#define ONIGENC_CASE_SPECIALS       (ONIGENC_CASE_TITLECASE|ONIGENC_CASE_UP_SPECIAL|ONIGENC_CASE_DOWN_SPECIAL)
+
+/* macros for length in CaseMappingSpecials array in enc/unicode/casefold.h */
+#define SpecialsLengthOffset 25  /* needs to be higher than the 22 bits used for Unicode codepoints */
+#define SpecialsLengthExtract(n)    ((n)>>SpecialsLengthOffset)
+#define SpecialsCodepointExtract(n) ((n)&((1<<SpecialsLengthOffset)-1))
+#define SpecialsLengthEncode(n)     ((n)<<SpecialsLengthOffset)
+
+#define OnigSpecialIndexMask (((1<<OnigSpecialIndexWidth)-1)<<OnigSpecialIndexWidth)
+#define OnigSpecialIndexEncode(n)   ((n)<<OnigSpecialIndexShift)
+#define OnigSpecialIndexDecode(n)   (((n)&OnigSpecialIndexMask)>>OnigSpecialIndexShift)
+
 /* macros to shorten "enc/unicode/casefold.h", undefined immediately after including the file */
 #define U ONIGENC_CASE_UPCASE
 #define D ONIGENC_CASE_DOWNCASE
 #define F ONIGENC_CASE_FOLD
-#define ST 0
-#define SU 0
-#define SL 0
+#define ST ONIGENC_CASE_TITLECASE
+#define SU ONIGENC_CASE_UP_SPECIAL
+#define SL ONIGENC_CASE_DOWN_SPECIAL
 #define I(n) 0
-#define L(n) 0
+#define L(n) SpecialsLengthEncode(n)
 
 #include "enc/unicode/casefold.h"
 
@@ -158,12 +172,6 @@ code3_equal(const OnigCodePoint *x, cons https://github.com/ruby/ruby/blob/trunk/enc/unicode.c#L172
 #undef I
 #undef L
 
-/* macros related to ONIGENC_CASE flags */
-/* defined here because not used in other files */
-#define OnigSpecialIndexMask (((1<<OnigSpecialIndexWidth)-1)<<OnigSpecialIndexWidth)
-#define OnigSpecialIndexEncode(n) (((n)<<OnigSpecialIndexShift)&OnigSpecialIndexMask)
-#define OnigSpecialIndexDecode(n) (((n)&OnigSpecialIndexMask)>>OnigSpecialIndexShift)
-
 #include "enc/unicode/name2ctype.h"
 
 #define CODE_RANGES_NUM numberof(CodeRanges)
@@ -654,6 +662,7 @@ onigenc_unicode_case_map(OnigCaseFoldTyp https://github.com/ruby/ruby/blob/trunk/enc/unicode.c#L662
     OnigUChar *to_start = to;
     OnigCaseFoldType flags = *flagP;
     to_end -= CASE_MAPPING_SLACK;
+    flags |= (flags&(ONIGENC_CASE_UPCASE|ONIGENC_CASE_DOWNCASE))<<ONIGENC_CASE_SPECIAL_OFFSET;
 
     /* hopelessly preliminary implementation, just dealing with ASCII and Turkic */
     while (*pp<end && to<=to_end) {
@@ -701,19 +710,56 @@ onigenc_unicode_case_map(OnigCaseFoldTyp https://github.com/ruby/ruby/blob/trunk/enc/unicode.c#L710
 	    }
 	    else if ((folded = onigenc_unicode_fold_lookup(code)) != 0) {
 		if (flags&OnigCaseFoldFlags(folded->n)) {
-		    int count = OnigCodePointCount(folded->n);
-		    const OnigCodePoint *next = folded->code;
+		    const OnigCodePoint *next;
+		    int count;
+
 		    MODIFIED;
-		    if (count==1)
-		        code = *next;
-		    else if (count==2) {
-			to += ONIGENC_CODE_TO_MBC(enc, *next++, to);
-			code = *next;
+		    if (flags&OnigCaseFoldFlags(folded->n)&ONIGENC_CASE_SPECIALS) {
+			OnigCodePoint *SpecialsStart = CaseMappingSpecials + OnigSpecialIndexDecode(folded->n);
+			int count;
+			
+			if (OnigCaseFoldFlags(folded->n)&ONIGENC_CASE_TITLECASE) {
+			    if (flags&ONIGENC_CASE_TITLECASE)
+				goto SpecialsCopy;
+			    else
+				SpecialsStart += SpecialsLengthExtract(*SpecialsStart);
+			}
+			if (OnigCaseFoldFlags(folded->n)&ONIGENC_CASE_DOWN_SPECIAL) {
+			    if (flags&ONIGENC_CASE_DOWN_SPECIAL)
+				goto SpecialsCopy;
+			    else
+				SpecialsStart += SpecialsLengthExtract(*SpecialsStart);
+			}
+			/* if we pass here, we know we use special upcasing, and are at the right position */
+		      SpecialsCopy:
+		        count = SpecialsLengthExtract(*SpecialsStart);
+			next = SpecialsStart;
+			if (count==1)
+			    code = SpecialsCodepointExtract(*next);
+			else if (count==2) {
+			    to += ONIGENC_CODE_TO_MBC(enc, SpecialsCodepointExtract(*next++), to);
+			    code = *next;
+			}
+			else { /* count == 3 */
+			    to += ONIGENC_CODE_TO_MBC(enc, SpecialsCodepointExtract(*next++), to);
+			    to += ONIGENC_CODE_TO_MBC(enc, *next++, to);
+			    code = *next;
+			}
 		    }
-		    else { /* count == 3 */
-			to += ONIGENC_CODE_TO_MBC(enc, *next++, to);
-			to += ONIGENC_CODE_TO_MBC(enc, *next++, to);
-			code = *next;
+		    else { /* no specials */
+			count = OnigCodePointCount(folded->n);
+			next = folded->code;
+			if (count==1)
+			    code = *next;
+			else if (count==2) {
+			    to += ONIGENC_CODE_TO_MBC(enc, *next++, to);
+			    code = *next;
+			}
+			else { /* count == 3 */
+			    to += ONIGENC_CODE_TO_MBC(enc, *next++, to);
+			    to += ONIGENC_CODE_TO_MBC(enc, *next++, to);
+			    code = *next;
+			}
 		    }
 		}
 	    }

--
ML: ruby-changes@q...
Info: http://www.atdot.net/~ko1/quickml/

[前][次][番号順一覧][スレッド一覧]