[前][次][番号順一覧][スレッド一覧]

ruby-changes:1856

From: ko1@a...
Date: 6 Sep 2007 12:42:22 +0900
Subject: [ruby-changes:1856] nobu - Ruby:r13347 (trunk): * string.c (rb_str_succ, rb_str_chop_bang, rb_str_chop): m17n support.

nobu	2007-09-06 12:42:12 +0900 (Thu, 06 Sep 2007)

  New Revision: 13347

  Modified files:
    trunk/ChangeLog
    trunk/include/ruby/encoding.h
    trunk/string.c
    trunk/version.h

  Log:
    * string.c (rb_str_succ, rb_str_chop_bang, rb_str_chop): m17n support.
      [ruby-dev:31734]


  http://svn.ruby-lang.org/cgi-bin/viewvc.cgi/trunk/version.h?r1=13347&r2=13346
  http://svn.ruby-lang.org/cgi-bin/viewvc.cgi/trunk/string.c?r1=13347&r2=13346
  http://svn.ruby-lang.org/cgi-bin/viewvc.cgi/trunk/ChangeLog?r1=13347&r2=13346
  http://svn.ruby-lang.org/cgi-bin/viewvc.cgi/trunk/include/ruby/encoding.h?r1=13347&r2=13346

Index: include/ruby/encoding.h
===================================================================
--- include/ruby/encoding.h	(revision 13346)
+++ include/ruby/encoding.h	(revision 13347)
@@ -62,7 +62,7 @@
 #define rb_enc_codepoint(p,e,enc) ONIGENC_MBC_TO_CODE(enc,(UChar*)p,(UChar*)e) 
 
 /* ptr, ptr, encoding -> prev_char */
-#define rb_enc_prev_char(s,p,enc) onigenc_get_prev_char_head(enc,(UChar*)s,(UChar*)p)
+#define rb_enc_prev_char(s,p,enc) (char *)onigenc_get_prev_char_head(enc,(UChar*)s,(UChar*)p)
 
 #define rb_enc_isascii(c,enc) ONIGENC_IS_CODE_ASCII(c)
 #define rb_enc_isalpha(c,enc) ONIGENC_IS_CODE_ALPHA(enc,c)
Index: ChangeLog
===================================================================
--- ChangeLog	(revision 13346)
+++ ChangeLog	(revision 13347)
@@ -1,3 +1,8 @@
+Thu Sep  6 12:42:10 2007  Nobuyoshi Nakada  <nobu@r...>
+
+	* string.c (rb_str_succ, rb_str_chop_bang, rb_str_chop): m17n support.
+	  [ruby-dev:31734]
+
 Wed Sep  5 22:02:27 2007  Yukihiro Matsumoto  <matz@r...>
 
 	* array.c (rb_ary_subseq): need integer overflow check.
Index: string.c
===================================================================
--- string.c	(revision 13346)
+++ string.c	(revision 13347)
@@ -1584,7 +1584,53 @@
     return 0;
 }
 
+/*
+  overwrite +s+ by succeeding letter of +c+ in +enc+ and returns
+  carried-out letter.  assuming each ranges are successive, and mbclen
+  never change in each ranges.
+ */
+static int
+enc_succ_char(unsigned int c, char *s, rb_encoding *enc)
+{
+    unsigned int cs;
 
+    /* numerics */
+    if (rb_enc_isdigit(c, enc)) {
+	cs = c++;
+	if (rb_enc_isdigit(c, enc)) {
+	    rb_enc_mbcput(c, s, enc);
+	    return 0;
+	}
+	do c = cs--; while (rb_enc_isdigit(cs, enc));
+	rb_enc_mbcput(c, s, enc);
+	return ++c;
+    }
+    /* small alphabets */
+    if (rb_enc_islower(c, enc)) {
+	cs = c++;
+	if (rb_enc_islower(c, enc)) {
+	    rb_enc_mbcput(c, s, enc);
+	    return 0;
+	}
+	do c = cs--; while (rb_enc_islower(cs, enc));
+	rb_enc_mbcput(c, s, enc);
+	return c;
+    }
+    /* capital alphabets */
+    if (rb_enc_isupper(c, enc)) {
+	cs = c++;
+	if (rb_enc_isupper(c, enc)) {
+	    rb_enc_mbcput(c, s, enc);
+	    return 0;
+	}
+	do c = cs--; while (rb_enc_isupper(cs, enc));
+	rb_enc_mbcput(c, s, enc);
+	return c;
+    }
+    return -1;
+}
+
+
 /*
  *  call-seq:
  *     str.succ   => new_str
@@ -1617,38 +1663,51 @@
     VALUE str;
     char *sbeg, *s, *e;
     int c = -1;
-    long n = 0;
+    long n = 0, o = 0, l;
+    char carry[ONIGENC_CODE_TO_MBC_MAXLEN];
 
     str = rb_str_new5(orig, RSTRING_PTR(orig), RSTRING_LEN(orig));
+    rb_enc_copy(str, orig);
     OBJ_INFECT(str, orig);
     if (RSTRING_LEN(str) == 0) return str;
 
     enc = rb_enc_get(orig);
-    sbeg = RSTRING_PTR(str); s = sbeg + RSTRING_LEN(str) - 1;
-    e = RSTRING_END(str);
+    sbeg = RSTRING_PTR(str);
+    s = e = sbeg + RSTRING_LEN(str);
 
-    while (sbeg <= s) {
+    while ((s = rb_enc_prev_char(sbeg, s, enc)) != 0) {
 	unsigned int cc = rb_enc_codepoint(s, e, enc);
 	if (rb_enc_isalnum(cc, enc)) {
-	    if ((c = succ_char(s)) == 0) break;
+	    if (isascii(cc)) {
+		if ((c = succ_char(s)) == 0) break;
+	    }
+	    else {
+		if ((c = enc_succ_char(cc, s, enc)) == 0) break;
+	    }
 	    n = s - sbeg;
 	}
-	s--;
     }
     if (c == -1) {		/* str contains no alnum */
-	sbeg = RSTRING_PTR(str); s = sbeg + RSTRING_LEN(str) - 1;
 	c = '\001';
-	while (sbeg <= s) {
-	    if ((*s += 1) != 0) break;
-	    s--;
+	s = e;
+	while ((s = rb_enc_prev_char(sbeg, e, enc)) != 0) {
+	    unsigned int cc = rb_enc_codepoint(s, e, enc) + 1;
+	    l = rb_enc_mbcput(cc, carry, enc);
+	    if (l > 0) {
+		if (l == (o = e - s)) goto overlay;
+		n = s - sbeg;
+		goto insert;
+	    }
 	}
     }
-    if (s < sbeg) {
-	RESIZE_CAPA(str, RSTRING_LEN(str) + 1);
+    if (!s && (l = rb_enc_mbcput(c, carry, enc)) > 0) {
+      insert:
+	RESIZE_CAPA(str, RSTRING_LEN(str) + l - o);
 	s = RSTRING_PTR(str) + n;
-	memmove(s+1, s, RSTRING_LEN(str) - n);
-	*s = c;
-	STR_SET_LEN(str, RSTRING_LEN(str) + 1);
+	memmove(s + l, s + o, RSTRING_LEN(str) - n - o);
+      overlay:
+	memmove(s, carry, l);
+	STR_SET_LEN(str, RSTRING_LEN(str) + l - o);
 	RSTRING_PTR(str)[RSTRING_LEN(str)] = '\0';
     }
 
@@ -4040,6 +4099,24 @@
     return str;
 }
 
+static long
+chopped_length(VALUE str)
+{
+    rb_encoding *enc = rb_enc_get(str);
+    const char *p, *p2, *beg, *end;
+
+    beg = RSTRING_PTR(str);
+    end = beg + RSTRING_LEN(str);
+    if (beg > end) return 0;
+    p = rb_enc_prev_char(beg, end, enc);
+    if (!p) return 0;
+    if (p > beg && rb_enc_codepoint(p, end, enc) == '\n') {
+	p2 = rb_enc_prev_char(beg, p, enc);
+	if (p2 && rb_enc_codepoint(p2, end, enc) == '\r') p = p2;
+    }
+    return p - beg;
+}
+
 /*
  *  call-seq:
  *     str.chop!   => str or nil
@@ -4053,15 +4130,11 @@
 rb_str_chop_bang(VALUE str)
 {
     if (RSTRING_LEN(str) > 0) {
+	long len;
 	rb_str_modify(str);
-	STR_DEC_LEN(str);
-	if (RSTRING_PTR(str)[RSTRING_LEN(str)] == '\n') {
-	    if (RSTRING_LEN(str) > 0 &&
-		RSTRING_PTR(str)[RSTRING_LEN(str)-1] == '\r') {
-		STR_DEC_LEN(str);
-	    }
-	}
-	RSTRING_PTR(str)[RSTRING_LEN(str)] = '\0';
+	len = chopped_length(str);
+	STR_SET_LEN(str, len);
+	RSTRING_PTR(str)[len] = '\0';
 	return str;
     }
     return Qnil;
@@ -4088,9 +4161,10 @@
 static VALUE
 rb_str_chop(VALUE str)
 {
-    str = rb_str_dup(str);
-    rb_str_chop_bang(str);
-    return str;
+    VALUE str2 = rb_str_new5(str, RSTRING_PTR(str), chopped_length(str));
+    rb_enc_copy(str2, str);
+    OBJ_INFECT(str2, str);
+    return str2;
 }
 
 
Index: version.h
===================================================================
--- version.h	(revision 13346)
+++ version.h	(revision 13347)
@@ -1,7 +1,7 @@
 #define RUBY_VERSION "1.9.0"
-#define RUBY_RELEASE_DATE "2007-09-05"
+#define RUBY_RELEASE_DATE "2007-09-06"
 #define RUBY_VERSION_CODE 190
-#define RUBY_RELEASE_CODE 20070905
+#define RUBY_RELEASE_CODE 20070906
 #define RUBY_PATCHLEVEL 0
 
 #define RUBY_VERSION_MAJOR 1
@@ -9,7 +9,7 @@
 #define RUBY_VERSION_TEENY 0
 #define RUBY_RELEASE_YEAR 2007
 #define RUBY_RELEASE_MONTH 9
-#define RUBY_RELEASE_DAY 5
+#define RUBY_RELEASE_DAY 6
 
 #ifdef RUBY_EXTERN
 RUBY_EXTERN const char ruby_version[];

--
ML: ruby-changes@q...
Info: http://www.atdot.net/~ko1/quickml

[前][次][番号順一覧][スレッド一覧]