[前][次][番号順一覧][スレッド一覧]

ruby-changes:2065

From: ko1@a...
Date: 29 Sep 2007 04:27:23 +0900
Subject: [ruby-changes:2065] nobu - Ruby:r13556 (trunk): * encoding.c (rb_enc_alias): allow encodings multiple aliases.

nobu	2007-09-29 04:27:10 +0900 (Sat, 29 Sep 2007)

  New Revision: 13556

  Modified files:
    trunk/ChangeLog
    trunk/encoding.c
    trunk/include/ruby/encoding.h
    trunk/include/ruby/st.h
    trunk/st.c
    trunk/string.c

  Log:
    * encoding.c (rb_enc_alias): allow encodings multiple aliases.
    
    * encoding.c (rb_enc_find_index): search the encoding which has the
      given name and return its index if found, or -1.
    
    * st.c (type_strcasehash): case-insensitive string hash type.
    
    * string.c (rb_str_force_encoding): force encoding of self.  this name
      comes from [ruby-dev:31894] by Martin Duerst.  [ruby-dev:31744]
    
    * include/ruby/encoding.h (rb_enc_find_index, rb_enc_associate_index):
      prototyped.
    
    * include/ruby/encoding.h (rb_enc_isctype): direct interface to ctype.
    
    * include/ruby/st.h (st_init_strcasetable): prototyped.


  http://svn.ruby-lang.org/cgi-bin/viewvc.cgi/trunk/string.c?r1=13556&r2=13555
  http://svn.ruby-lang.org/cgi-bin/viewvc.cgi/trunk/include/ruby/st.h?r1=13556&r2=13555
  http://svn.ruby-lang.org/cgi-bin/viewvc.cgi/trunk/ChangeLog?r1=13556&r2=13555
  http://svn.ruby-lang.org/cgi-bin/viewvc.cgi/trunk/include/ruby/encoding.h?r1=13556&r2=13555
  http://svn.ruby-lang.org/cgi-bin/viewvc.cgi/trunk/encoding.c?r1=13556&r2=13555
  http://svn.ruby-lang.org/cgi-bin/viewvc.cgi/trunk/st.c?r1=13556&r2=13555

Index: encoding.c
===================================================================
--- encoding.c	(revision 13555)
+++ encoding.c	(revision 13556)
@@ -23,6 +23,7 @@
 
 static struct rb_encoding_entry *enc_table;
 static int enc_table_size;
+static st_table *enc_table_alias;
 
 void
 rb_enc_register(const char *name, rb_encoding *encoding)
@@ -43,12 +44,25 @@
 }
 
 void
+rb_enc_alias(const char *alias, const char *orig)
+{
+    if (!enc_table_alias) {
+	enc_table_alias = st_init_strcasetable();
+    }
+    st_insert(enc_table_alias, (st_data_t)alias, (st_data_t)orig);
+}
+
+void
 rb_enc_init(void)
 {
-    rb_enc_register("ascii", ONIG_ENCODING_ASCII);
-    rb_enc_register("sjis", ONIG_ENCODING_SJIS);
-    rb_enc_register("euc-jp", ONIG_ENCODING_EUC_JP);
-    rb_enc_register("utf-8", ONIG_ENCODING_UTF8);
+#define ENC_REGISTER(enc) rb_enc_register(rb_enc_name(enc), enc)
+    ENC_REGISTER(ONIG_ENCODING_ASCII);
+    ENC_REGISTER(ONIG_ENCODING_SJIS);
+    ENC_REGISTER(ONIG_ENCODING_EUC_JP);
+    ENC_REGISTER(ONIG_ENCODING_UTF8);
+#undef ENC_REGISTER
+    rb_enc_alias("binary", "ascii");
+    rb_enc_alias("sjis", "shift_jis");
 }
 
 rb_encoding *
@@ -63,22 +77,39 @@
     return enc_table[index].enc;
 }
 
-rb_encoding *
-rb_enc_find(const char *name)
+int
+rb_enc_find_index(const char *name)
 {
     int i;
+    st_data_t alias = 0;
 
+    if (!name) return -1;
     if (!enc_table) {
 	rb_enc_init();
     }
+  find:
     for (i=0; i<enc_table_size; i++) {
-	if (strcmp(name, enc_table[i].name) == 0) {
-	    return enc_table[i].enc;
+	if (strcasecmp(name, enc_table[i].name) == 0) {
+	    return i;
 	}
     }
-    return ONIG_ENCODING_ASCII;
+    if (!alias && enc_table_alias) {
+	if (st_lookup(enc_table_alias, (st_data_t)name, &alias)) {
+	    name = (const char *)alias;
+	    goto find;
+	}
+    }
+    return -1;
 }
 
+rb_encoding *
+rb_enc_find(const char *name)
+{
+    rb_encoding *enc = rb_enc_from_index(rb_enc_find_index(name));
+    if (!enc) enc = ONIG_ENCODING_ASCII;
+    return enc;
+}
+
 static int
 enc_capable(VALUE obj)
 {
@@ -163,7 +194,7 @@
 {
     int i;
 
-    enc_check_capable(obj);
+    if (!enc_capable(obj)) return -1;
     i = ENCODING_GET(obj);
     if (i == ENCODING_INLINE_MAX) {
 	VALUE iv;
Index: include/ruby/encoding.h
===================================================================
--- include/ruby/encoding.h	(revision 13555)
+++ include/ruby/encoding.h	(revision 13556)
@@ -39,8 +39,10 @@
 
 int rb_enc_to_index(rb_encoding*);
 int rb_enc_get_index(VALUE obj);
+int rb_enc_find_index(const char *name);
 rb_encoding* rb_enc_get(VALUE);
 rb_encoding* rb_enc_check(VALUE,VALUE);
+void rb_enc_associate_index(VALUE, int);
 void rb_enc_associate(VALUE, rb_encoding*);
 void rb_enc_copy(VALUE, VALUE);
 
@@ -76,6 +78,7 @@
 /* ptr, ptr, encoding -> prev_char */
 #define rb_enc_prev_char(s,p,enc) (char *)onigenc_get_prev_char_head(enc,(UChar*)s,(UChar*)p)
 
+#define rb_enc_isctype(c,t,enc) ONIGENC_IS_CODE_CTYPE(enc,c,t)
 #define rb_enc_isascii(c,enc) ONIGENC_IS_CODE_ASCII(c)
 #define rb_enc_isalpha(c,enc) ONIGENC_IS_CODE_ALPHA(enc,c)
 #define rb_enc_islower(c,enc) ONIGENC_IS_CODE_LOWER(enc,c)
Index: include/ruby/st.h
===================================================================
--- include/ruby/st.h	(revision 13555)
+++ include/ruby/st.h	(revision 13556)
@@ -71,6 +71,8 @@
 st_table *st_init_numtable_with_size(int);
 st_table *st_init_strtable(void);
 st_table *st_init_strtable_with_size(int);
+st_table *st_init_strcasetable(void);
+st_table *st_init_strcasetable_with_size(int);
 int st_delete(st_table *, st_data_t *, st_data_t *);
 int st_delete_safe(st_table *, st_data_t *, st_data_t *, st_data_t);
 int st_insert(st_table *, st_data_t, st_data_t);
Index: ChangeLog
===================================================================
--- ChangeLog	(revision 13555)
+++ ChangeLog	(revision 13556)
@@ -1,3 +1,22 @@
+Sat Sep 29 04:27:08 2007  Nobuyoshi Nakada  <nobu@r...>
+
+	* encoding.c (rb_enc_alias): allow encodings multiple aliases.
+
+	* encoding.c (rb_enc_find_index): search the encoding which has the
+	  given name and return its index if found, or -1.
+
+	* st.c (type_strcasehash): case-insensitive string hash type.
+
+	* string.c (rb_str_force_encoding): force encoding of self.  this name
+	  comes from [ruby-dev:31894] by Martin Duerst.  [ruby-dev:31744]
+
+	* include/ruby/encoding.h (rb_enc_find_index, rb_enc_associate_index):
+	  prototyped.
+
+	* include/ruby/encoding.h (rb_enc_isctype): direct interface to ctype.
+
+	* include/ruby/st.h (st_init_strcasetable): prototyped.
+
 Sat Sep 29 03:53:26 2007  Koichi Sasada  <ko1@a...>
 
 	* cont.c: Thread local storage should be fiber local.
Index: string.c
===================================================================
--- string.c	(revision 13555)
+++ string.c	(revision 13556)
@@ -228,7 +228,7 @@
 }
 
 static VALUE
-str_new3(VALUE klass, VALUE str)
+str_new_shared(VALUE klass, VALUE str)
 {
     VALUE str2 = str_alloc(klass);
 
@@ -244,11 +244,19 @@
 	RSTRING(str2)->as.heap.aux.shared = str;
 	FL_SET(str2, ELTS_SHARED);
     }
-    rb_enc_copy((VALUE)str2, str);
 
     return str2;
 }
 
+static VALUE
+str_new3(VALUE klass, VALUE str)
+{
+    VALUE str2 = str_new_shared(klass, str);
+
+    rb_enc_copy(str2, str);
+    return str2;
+}
+
 VALUE
 rb_str_new3(VALUE str)
 {
@@ -5108,6 +5116,21 @@
 }
 
 
+/*
+ *  call-seq:
+ *     str.force_encoding(encoding)   => str
+ *
+ *  Changes the encoding to +encoding+ and returns self.
+ */
+
+static VALUE
+rb_str_force_encoding(VALUE str, VALUE encname)
+{
+    str_modifiable(str);
+    rb_enc_associate(str, rb_enc_find(StringValueCStr(encname)));
+    return str;
+}
+
 /**********************************************************************
  * Document-class: Symbol
  *
@@ -5519,6 +5542,7 @@
     rb_define_method(rb_cString, "rpartition", rb_str_rpartition, 1);
 
     rb_define_method(rb_cString, "encoding", str_encoding, 0);
+    rb_define_method(rb_cString, "force_encoding", rb_str_force_encoding, 1);
 
     id_to_s = rb_intern("to_s");
 
Index: st.c
===================================================================
--- st.c	(revision 13555)
+++ st.c	(revision 13556)
@@ -52,6 +52,12 @@
     strhash,
 };
 
+static int strcasehash(const char *);
+static const struct st_hash_type type_strcasehash = {
+    strcasecmp,
+    strcasehash,
+};
+
 static void rehash(st_table *);
 
 #ifdef RUBY
@@ -202,6 +208,18 @@
     return st_init_table_with_size(&type_strhash, size);
 }
 
+st_table*
+st_init_strcasetable(void)
+{
+    return st_init_table(&type_strcasehash);
+}
+
+st_table*
+st_init_strcasetable_with_size(int size)
+{
+    return st_init_table_with_size(&type_strcasehash, size);
+}
+
 void
 st_clear(st_table *table)
 {
@@ -814,6 +832,25 @@
     return hval;
 }
 
+static int
+strcasehash(register const char *string)
+{
+    register unsigned int hval = FNV1_32A_INIT;
+
+    /*
+     * FNV-1a hash each octet in the buffer
+     */
+    while (*string) {
+	unsigned int c = (unsigned char)*string++;
+	if ((unsigned int)(c - 'A') > ('Z' - 'A')) c += 'a' - 'A';
+	hval ^= c;
+
+	/* multiply by the 32 bit FNV magic prime mod 2^32 */
+	hval *= FNV_32_PRIME;
+    }
+    return hval;
+}
+
 int
 st_numcmp(st_data_t x, st_data_t y)
 {

--
ML: ruby-changes@q...
Info: http://www.atdot.net/~ko1/quickml

[前][次][番号順一覧][スレッド一覧]