[前][次][番号順一覧][スレッド一覧]

ruby-changes:43255

From: duerst <ko1@a...>
Date: Wed, 8 Jun 2016 21:28:47 +0900 (JST)
Subject: [ruby-changes:43255] duerst:r55329 (trunk): * string.c: New static function rb_str_ascii_casemap; special-casing

duerst	2016-06-08 21:28:42 +0900 (Wed, 08 Jun 2016)

  New Revision: 55329

  https://svn.ruby-lang.org/cgi-bin/viewvc.cgi?view=revision&revision=55329

  Log:
    * string.c: New static function rb_str_ascii_casemap; special-casing
      :ascii option in rb_str_upcase_bang and rb_str_downcase_bang.
    * regenc.c: Fix a bug (wrong use of unnecessary slack at end of string).
    * regenc.h -> include/ruby/oniguruma.h: Move declaration of
      onigenc_ascii_only_case_map so that it is visible in string.c.

  Modified files:
    trunk/ChangeLog
    trunk/include/ruby/oniguruma.h
    trunk/regenc.c
    trunk/regenc.h
    trunk/string.c
Index: include/ruby/oniguruma.h
===================================================================
--- include/ruby/oniguruma.h	(revision 55328)
+++ include/ruby/oniguruma.h	(revision 55329)
@@ -229,6 +229,14 @@ ONIG_EXTERN const OnigEncodingType OnigE https://github.com/ruby/ruby/blob/trunk/include/ruby/oniguruma.h#L229
 
 #define ONIG_ENCODING_UNDEF    ((OnigEncoding )0)
 
+#ifdef ONIG_CASE_MAPPING
+  /* this declaration needs to be here because it is used in string.c */
+  ONIG_EXTERN int    onigenc_ascii_only_case_map P_((OnigCaseFoldType* flagP,
+			  const OnigUChar** pp, const OnigUChar* end,
+			  OnigUChar* to, OnigUChar* to_end,
+			  const struct OnigEncodingTypeST* enc));
+#endif   /* ONIG_CASE_MAPPING */
+
 
 /* work size */
 #define ONIGENC_CODE_TO_MBC_MAXLEN       7
Index: regenc.c
===================================================================
--- regenc.c	(revision 55328)
+++ regenc.c	(revision 55329)
@@ -968,9 +968,7 @@ onigenc_ascii_only_case_map (OnigCaseFol https://github.com/ruby/ruby/blob/trunk/regenc.c#L968
   OnigCaseFoldType flags = *flagP;
   int codepoint_length;
 
-  to_end -= 4; /* longest possible length of a single character */
-
-  while (*pp<end && to<=to_end) {
+  while (*pp<end && to<to_end) {
     codepoint_length = ONIGENC_PRECISE_MBC_ENC_LEN(enc, *pp, end);
     if (codepoint_length < 0)
       return codepoint_length; /* encoding invalid */
Index: ChangeLog
===================================================================
--- ChangeLog	(revision 55328)
+++ ChangeLog	(revision 55329)
@@ -1,3 +1,13 @@ https://github.com/ruby/ruby/blob/trunk/ChangeLog#L1
+Wed Jun  8 21:28:36 2016  Martin Duerst  <duerst@i...>
+
+	* string.c: New static function rb_str_ascii_casemap; special-casing
+	  :ascii option in rb_str_upcase_bang and rb_str_downcase_bang.
+
+	* regenc.c: Fix a bug (wrong use of unnecessary slack at end of string).
+
+	* regenc.h -> include/ruby/oniguruma.h: Move declaration of
+	  onigenc_ascii_only_case_map so that it is visible in string.c.
+
 Wed Jun  8 20:33:44 2016  Naohisa Goto  <ngotogenome@g...>
 
 	* include/ruby/intern.h: Remove excess semicolons in PUREFUNC().
Index: regenc.h
===================================================================
--- regenc.h	(revision 55328)
+++ regenc.h	(revision 55329)
@@ -133,7 +133,6 @@ CONSTFUNC(ONIG_EXTERN int onigenc_not_su https://github.com/ruby/ruby/blob/trunk/regenc.h#L133
 PUREFUNC(ONIG_EXTERN int onigenc_is_mbc_newline_0x0a P_((const UChar* p, const UChar* end, OnigEncoding enc)));
 
 #ifdef ONIG_CASE_MAPPING
-  ONIG_EXTERN int    onigenc_ascii_only_case_map P_((OnigCaseFoldType* flagP, const OnigUChar** pp, const OnigUChar* end, OnigUChar* to, OnigUChar* to_end, const struct OnigEncodingTypeST* enc));
   ONIG_EXTERN int    onigenc_single_byte_ascii_only_case_map P_((OnigCaseFoldType* flagP, const OnigUChar** pp, const OnigUChar* end, OnigUChar* to, OnigUChar* to_end, const struct OnigEncodingTypeST* enc));
 #endif   /* ONIG_CASE_MAPPING */
 
Index: string.c
===================================================================
--- string.c	(revision 55328)
+++ string.c	(revision 55329)
@@ -5833,6 +5833,29 @@ rb_str_casemap(VALUE source, OnigCaseFol https://github.com/ruby/ruby/blob/trunk/string.c#L5833
     return target;
 }
 
+static void
+rb_str_ascii_casemap(VALUE source, OnigCaseFoldType *flags, rb_encoding *enc)
+{
+    OnigUChar *source_current, *source_end;
+    int old_length = RSTRING_LEN(source);
+    int length_or_invalid;
+
+    if (old_length == 0) return;
+
+    source_current = (OnigUChar*)RSTRING_PTR(source);
+    source_end = (OnigUChar*)RSTRING_END(source);
+
+    length_or_invalid = onigenc_ascii_only_case_map(flags,
+			       (const OnigUChar**)&source_current, source_end,
+			       source_current, source_end, enc);
+    if (length_or_invalid < 0)
+        rb_raise(rb_eArgError, "input string invalid");
+/*    if (length_or_invalid != old_length)
+printf("problem with rb_str_ascii_casemap; old_length=%d, new_length=%d\n", old_length, length_or_invalid),
+        rb_raise(rb_eArgError, "internal problem with rb_str_ascii_casemap");
+*/
+}
+
 /*
  *  call-seq:
  *     str.upcase!              -> str or nil
@@ -5855,7 +5878,8 @@ rb_str_upcase_bang(int argc, VALUE *argv https://github.com/ruby/ruby/blob/trunk/string.c#L5878
     str_modify_keep_cr(str);
     enc = STR_ENC_GET(str);
     rb_str_check_dummy_enc(enc);
-    if (!(flags&ONIGENC_CASE_FOLD_TURKISH_AZERI) && ENC_CODERANGE(str)==ENC_CODERANGE_7BIT) {
+    if ((flags&ONIGENC_CASE_ASCII_ONLY) && (enc==rb_utf8_encoding() || rb_enc_mbmaxlen(enc)==1)
+	|| (!(flags&ONIGENC_CASE_FOLD_TURKISH_AZERI) && ENC_CODERANGE(str)==ENC_CODERANGE_7BIT)) {
         char *s = RSTRING_PTR(str), *send = RSTRING_END(str);
 
 	while (s < send) {
@@ -5914,14 +5938,14 @@ static VALUE https://github.com/ruby/ruby/blob/trunk/string.c#L5938
 rb_str_downcase_bang(int argc, VALUE *argv, VALUE str)
 {
     rb_encoding *enc;
-    int modify = 0;
     OnigCaseFoldType flags = ONIGENC_CASE_DOWNCASE;
 
     flags = check_case_options(argc, argv, flags);
     str_modify_keep_cr(str);
     enc = STR_ENC_GET(str);
     rb_str_check_dummy_enc(enc);
-    if (!(flags&ONIGENC_CASE_FOLD_TURKISH_AZERI) && ENC_CODERANGE(str)==ENC_CODERANGE_7BIT) {
+    if ((flags&ONIGENC_CASE_ASCII_ONLY) && (enc==rb_utf8_encoding() || rb_enc_mbmaxlen(enc)==1)
+	|| (!(flags&ONIGENC_CASE_FOLD_TURKISH_AZERI) && ENC_CODERANGE(str)==ENC_CODERANGE_7BIT)) {
         char *s = RSTRING_PTR(str), *send = RSTRING_END(str);
 
 	while (s < send) {
@@ -5929,17 +5953,17 @@ rb_str_downcase_bang(int argc, VALUE *ar https://github.com/ruby/ruby/blob/trunk/string.c#L5953
 
 	    if (rb_enc_isascii(c, enc) && 'A' <= c && c <= 'Z') {
 		*s = 'a' + (c - 'A');
-		modify = 1;
+		flags |= ONIGENC_CASE_MODIFIED;
 	    }
 	    s++;
 	}
     }
-    else {
+    else if (flags&ONIGENC_CASE_ASCII_ONLY)
+        rb_str_ascii_casemap(str, &flags, enc);
+    else
 	str_shared_replace(str, rb_str_casemap(str, &flags, enc));
-	modify = ONIGENC_CASE_MODIFIED & flags;
-    }
 
-    if (modify) return str;
+    if (ONIGENC_CASE_MODIFIED&flags) return str;
     return Qnil;
 }
 

--
ML: ruby-changes@q...
Info: http://www.atdot.net/~ko1/quickml/

[前][次][番号順一覧][スレッド一覧]