[前][次][番号順一覧][スレッド一覧]

ruby-changes:3597

From: ko1@a...
Date: Thu, 17 Jan 2008 23:56:52 +0900 (JST)
Subject: [ruby-changes:3597] nobu - Ruby:r15086 (trunk): * common.mk (encdb.h): give output file name to make_encdb.rb.

nobu	2008-01-17 23:56:22 +0900 (Thu, 17 Jan 2008)

  New Revision: 15086

  Added files:
    trunk/enc/iso_2022_jp.h
  Modified files:
    trunk/ChangeLog
    trunk/common.mk
    trunk/enc/make_encdb.rb
    trunk/encoding.c
    trunk/regenc.h

  Log:
    * common.mk (encdb.h): give output file name to make_encdb.rb.
    
    * encoding.c (enc_table): simplified.
    
    * encoding.c (enc_register_at): lazy loading.  [ruby-dev:33013]
    
    * regenc.h (ENC_DUMMY): added.
    
    * enc/make_encdb.rb: now emits macros only.
    
    * enc/iso_2022_jp.h: split from encoding.c.


  http://svn.ruby-lang.org/cgi-bin/viewvc.cgi/trunk/enc/make_encdb.rb?r1=15086&r2=15085&diff_format=u
  http://svn.ruby-lang.org/cgi-bin/viewvc.cgi/trunk/enc/iso_2022_jp.h?revision=15086&view=markup
  http://svn.ruby-lang.org/cgi-bin/viewvc.cgi/trunk/enc/iso_2022_jp.h?r1=15086&r2=15085&diff_format=u
  http://svn.ruby-lang.org/cgi-bin/viewvc.cgi/trunk/ChangeLog?r1=15086&r2=15085&diff_format=u
  http://svn.ruby-lang.org/cgi-bin/viewvc.cgi/trunk/encoding.c?r1=15086&r2=15085&diff_format=u
  http://svn.ruby-lang.org/cgi-bin/viewvc.cgi/trunk/regenc.h?r1=15086&r2=15085&diff_format=u
  http://svn.ruby-lang.org/cgi-bin/viewvc.cgi/trunk/common.mk?r1=15086&r2=15085&diff_format=u

Index: encoding.c
===================================================================
--- encoding.c	(revision 15085)
+++ encoding.c	(revision 15086)
@@ -29,35 +29,62 @@
     struct rb_encoding_entry *list;
     int count;
     int size;
-    st_table *alias;
-    st_table *replica_name;
-    st_table *alias_name;
+    st_table *names;
 } enc_table;
 
+void rb_enc_init(void);
+
 #undef ENC_REPLICATE
 #undef ENC_ALIAS
-#define ENC_REPLICATE(name, orig) st_insert(enc_table.replica_name, (st_data_t)(name), (st_data_t)(orig))
-#define ENC_ALIAS(name, orig) st_insert(enc_table.alias_name, (st_data_t)(name), (st_data_t)(orig))
-#define enc_name_list_size (sizeof(enc_name_list)/sizeof(enc_name_list[0]))
+#undef ENC_DUMMY
+static int encdb_replicate(const char *alias, const char *orig);
+static int encdb_alias(const char *alias, const char *orig);
+static int encdb_dummy(const char *name);
+static void enc_declare(const char *name);
+#define ENC_REPLICATE(name, orig) encdb_replicate(name, orig)
+#define ENC_ALIAS(name, orig) encdb_alias(name, orig)
+#define ENC_DUMMY(name) encdb_dummy(name)
+#define ENC_DEFINE(name) enc_declare(name)
 
+static void
+enc_init_db(void)
+{
 #include "encdb.h"
+}
 
+#define enc_autoload_p(enc) (!rb_enc_mbmaxlen(enc))
+
 #define ENC_UNINITIALIZED (&rb_cEncoding)
 #define enc_initialized_p(enc) ((enc)->auxiliary_data != &rb_cEncoding)
 #define ENC_FROM_ENCODING(enc) ((VALUE)(enc)->auxiliary_data)
 
-#define ENC_DUMMY FL_USER2
-#define ENC_DUMMY_P(enc) (RBASIC(enc)->flags & ENC_DUMMY)
+#define ENC_DUMMY_FLAG FL_USER2
+#define ENC_DUMMY_P(enc) (RBASIC(enc)->flags & ENC_DUMMY_FLAG)
+#define ENC_SET_DUMMY(enc) (RBASIC(enc)->flags |= ENC_DUMMY_FLAG)
 
+static int load_encoding(const char *name);
+static VALUE enc_base_encoding(VALUE self);
+
 static void
 enc_mark(void *ptr)
 {
 }
 
+static void
+enc_free(void *ptr)
+{
+    rb_encoding *enc = ptr;
+    struct rb_encoding_entry *ent = &enc_table.list[enc->ruby_encoding_index];
+    xfree((char *)ent->name);
+    ent->name = 0;
+    ent->enc = 0;
+    xfree(ptr);
+}
+
 static VALUE
 enc_new(rb_encoding *encoding)
 {
-    VALUE enc = Data_Wrap_Struct(rb_cEncoding, enc_mark, -1, encoding);
+    VALUE enc = Data_Wrap_Struct(rb_cEncoding, enc_mark, enc_free, encoding);
     encoding->auxiliary_data = (void *)enc;
     return enc;
 }
@@ -75,13 +102,19 @@
 enc_check_encoding(VALUE obj)
 {
     int index;
+    rb_encoding *enc;
+
     if (SPECIAL_CONST_P(obj) || BUILTIN_TYPE(obj) != T_DATA ||
 	RDATA(obj)->dmark != enc_mark) {
 	return -1;
     }
-    index = rb_enc_to_index((rb_encoding*)RDATA(obj)->data);
-    if (rb_enc_from_index(index) != RDATA(obj)->data)
+    enc = (rb_encoding*)RDATA(obj)->data;
+    index = rb_enc_to_index(enc);
+    if (rb_enc_from_index(index) != enc)
 	return -1;
+    if (enc_autoload_p(enc)) {
+	index = load_encoding(enc->name);
+    }
     return index;
 }
 
@@ -119,7 +152,7 @@
 rb_gc_mark_encodings(void)
 {
     int i;
-    for (i = 0; i < enc_table.size; ++i) {
+    for (i = 0; i < enc_table.count; ++i) {
 	rb_encoding *enc = enc_table.list[i].enc;
 	if (enc && enc_initialized_p(enc)) {
 	    rb_gc_mark(ENC_FROM_ENCODING(enc));
@@ -131,29 +164,50 @@
 enc_table_expand(int newsize)
 {
     struct rb_encoding_entry *ent;
+    int count = newsize;
 
     if (enc_table.size >= newsize) return newsize;
+    newsize = (newsize + 7) / 8 * 8;
     ent = realloc(enc_table.list, sizeof(*enc_table.list) * newsize);
     if (!ent) return -1;
     memset(ent + enc_table.size, 0, sizeof(*ent)*(newsize - enc_table.size));
     enc_table.list = ent;
     enc_table.size = newsize;
-    return newsize;
+    return count;
 }
 
 static int
 enc_register_at(int index, const char *name, rb_encoding *encoding)
 {
     struct rb_encoding_entry *ent = &enc_table.list[index];
+    void *obj = ENC_UNINITIALIZED;
 
-    name = strdup(name);
-    ent->name = name;
-    if (!ent->enc) ent->enc = malloc(sizeof(rb_encoding));
-    *ent->enc = *encoding;
+    if (!ent->name) {
+	ent->name = name = strdup(name);
+    }
+    else if (STRCASECMP(name, ent->name)) {
+	return -1;
+    }
+    if (!ent->enc) {
+	ent->enc = malloc(sizeof(rb_encoding));
+    }
+    else {
+	obj = ent->enc->auxiliary_data;
+    }
+    if (encoding) {
+	*ent->enc = *encoding;
+    }
+    else {
+	memset(ent->enc, 0, sizeof(*ent->enc));
+    }
     encoding = ent->enc;
     encoding->name = name;
     encoding->ruby_encoding_index = index;
-    if (rb_cEncoding) {
+    st_insert(enc_table.names, (st_data_t)name, (st_data_t)index);
+    if (obj != ENC_UNINITIALIZED) {
+	encoding->auxiliary_data = obj;
+    }
+    else if (rb_cEncoding) {
 	/* initialize encoding data */
 	enc_new(encoding);
     }
@@ -168,7 +222,6 @@
 {
     int index = enc_table.count;
 
-    if (index >= ENCODING_INLINE_MAX) index = enc_table.size;
     if ((index = enc_table_expand(index + 1)) < 0) return -1;
     enc_table.count = index;
     return enc_register_at(index - 1, name, encoding);
@@ -185,12 +238,11 @@
     if (index >= 0) {
 	rb_encoding *oldenc = rb_enc_from_index(index);
 	if (STRCASECMP(name, rb_enc_name(oldenc))) {
-	    st_data_t key = (st_data_t)name, alias;
-	    st_delete(enc_table.alias, &key, &alias);
 	    index = enc_register(name, encoding);
 	}
-	else if (enc_initialized_p(oldenc) &&
-		 !ENC_DUMMY_P(ENC_FROM_ENCODING(oldenc))) {
+	else if (!enc_autoload_p(oldenc) ||
+		 (enc_initialized_p(oldenc) &&
+		  !ENC_DUMMY_P(ENC_FROM_ENCODING(oldenc)))) {
 	    enc_register_at(index, name, encoding);
 	}
 	else {
@@ -199,12 +251,22 @@
     }
     else {
 	index = enc_register(name, encoding);
+	set_encoding_const(name, rb_enc_from_index(index));
     }
-    set_encoding_const(name, rb_enc_from_index(index));
     return index;
 }
 
 static void
+enc_declare(const char *name)
+{
+    int idx = rb_enc_registered(name);
+    if (idx < 0) {
+	idx = enc_register(name, 0);
+    }
+    set_encoding_const(name, rb_enc_from_index(idx));
+}
+
+static void
 enc_check_duplication(const char *name)
 {
     if (rb_enc_registered(name) >= 0) {
@@ -218,39 +280,71 @@
     VALUE enc = rb_enc_from_encoding(enc_table.list[index].enc);
 
     rb_ivar_set(enc, id_base_encoding, rb_enc_from_encoding(base));
-    if (rb_enc_dummy_p(base)) FL_SET(enc, ENC_DUMMY);
+    if (rb_enc_dummy_p(base)) ENC_SET_DUMMY(enc);
     return enc;
 }
 
 int
 rb_enc_replicate(const char *name, rb_encoding *encoding)
 {
-    int index = enc_table.size;
+    int idx;
 
     enc_check_duplication(name);
-    if (enc_table_expand(index + 1) < 0) return -1;
-    enc_register_at(index, name, encoding);
-    set_base_encoding(index, encoding);
-    return index;
+    idx = enc_register(name, encoding);
+    set_base_encoding(idx, encoding);
+    set_encoding_const(name, rb_enc_from_index(idx));
+    return idx;
 }
 
+static int
+enc_replicate(int idx, const char *name, rb_encoding *origenc)
+{
+    if (idx < 0) {
+	idx = enc_register(name, origenc);
+    }
+    else {
+	idx = enc_register_at(idx, name, origenc);
+    }
+    if (idx >= 0) {
+	set_base_encoding(idx, origenc);
+	set_encoding_const(name, rb_enc_from_index(idx));
+    }
+    return idx;
+}
+
+static int
+encdb_replicate(const char *name, const char *orig)
+{
+    int origidx = rb_enc_registered(orig);
+    int idx = rb_enc_registered(name);
+
+    if (origidx < 0) {
+	origidx = enc_register(orig, 0);
+    }
+    return enc_replicate(idx, name, rb_enc_from_index(origidx));
+}
+
 int
 rb_define_dummy_encoding(const char *name)
 {
-    int index = enc_table.size;
-    rb_encoding *encoding;
-    VALUE enc;
+    int index = rb_enc_replicate(name, rb_ascii8bit_encoding());
+    VALUE enc = rb_enc_from_encoding(enc_table.list[index].enc);
 
-    enc_check_duplication(name);
-    if (index < ENCODING_INLINE_MAX) index = ENCODING_INLINE_MAX;
-    if (enc_table_expand(index + 1) < 0) return -1;
-    encoding = rb_ascii8bit_encoding();
-    enc_register_at(index, name, encoding);
-    enc = set_base_encoding(index, encoding);
-    FL_SET(enc, ENC_DUMMY);
+    ENC_SET_DUMMY(enc);
     return index;
 }
 
+static int
+encdb_dummy(const char *name)
+{
+    int index = enc_replicate(rb_enc_registered(name), name,
+			      rb_ascii8bit_encoding());
+    VALUE enc = rb_enc_from_encoding(enc_table.list[index].enc);
+
+    ENC_SET_DUMMY(enc);
+    return index;
+}
+
 int
 rb_enc_dummy_p(rb_encoding *enc)
 {
@@ -278,28 +372,38 @@
 }
 
 static int
-enc_alias(const char *alias, const char *orig)
+enc_alias(const char *alias, int idx)
 {
-    st_data_t data;
+    alias = strdup(alias);
+    st_insert(enc_table.names, (st_data_t)alias, (st_data_t)idx);
+    set_encoding_const(alias, rb_enc_from_index(idx));
+    return idx;
+}
+
+int
+rb_enc_alias(const char *alias, const char *orig)
+{
     int idx;
 
-    if (!enc_table.alias) {
-	enc_table.alias = st_init_strcasetable();
+    enc_check_duplication(alias);
+    if (!enc_table.list) {
+	rb_enc_init();
     }
     if ((idx = rb_enc_find_index(orig)) < 0) {
-	if (!st_lookup(enc_table.alias, (st_data_t)orig, &data))
-	    return -1;
-	idx = (int)data;
+	return -1;
     }
-    st_insert(enc_table.alias, (st_data_t)alias, (st_data_t)idx);
-    return idx;
+    return enc_alias(alias, idx);
 }
 
-int
-rb_enc_alias(const char *alias, const char *orig)
+static int
+encdb_alias(const char *alias, const char *orig)
 {
-    enc_check_duplication(alias);
-    return enc_alias(alias, orig);
+    int idx = rb_enc_registered(orig);
+
+    if (idx < 0) {
+	idx = enc_register(orig, 0);
+    }
+    return enc_alias(alias, idx);
 }
 
 enum {
@@ -315,12 +419,16 @@
 void
 rb_enc_init(void)
 {
-    enc_table.count = enc_table_expand(ENCINDEX_BUILTIN_MAX);
+    enc_table_expand(ENCODING_COUNT + 1);
+    if (!enc_table.names) {
+	enc_table.names = st_init_strcasetable();
+    }
 #define ENC_REGISTER(enc) enc_register_at(ENCINDEX_##enc, rb_enc_name(&OnigEncoding##enc), &OnigEncoding##enc)
     ENC_REGISTER(ASCII);
     ENC_REGISTER(UTF_8);
     ENC_REGISTER(US_ASCII);
 #undef ENC_REGISTER
+    enc_table.count = ENCINDEX_BUILTIN_MAX;
 }
 
 rb_encoding *
@@ -329,7 +437,7 @@
     if (!enc_table.list) {
 	rb_enc_init();
     }
-    if (index < 0 || enc_table.size <= index) {
+    if (index < 0 || enc_table.count <= index) {
 	return 0;
     }
     return enc_table.list[index].enc;
@@ -338,27 +446,13 @@
 int
 rb_enc_registered(const char *name)
 {
-    int i;
-    st_data_t alias = 0;
+    st_data_t idx = 0;
 
     if (!name) return -1;
-    if (!enc_table.list) {
-	rb_enc_init();
+    if (!enc_table.list) return -1;
+    if (st_lookup(enc_table.names, (st_data_t)name, &idx)) {
+	return (int)idx;
     }
-    for (i=0; i<enc_table.size; i++) {
-	if (!enc_table.list[i].name) {
-	    if (i < ENCODING_INLINE_MAX - 1) i = ENCODING_INLINE_MAX - 1;
-	    continue;
-	}
-	if (STRCASECMP(name, enc_table.list[i].name) == 0) {
-	    return i;
-	}
-    }
-    if (!alias && enc_table.alias) {
-	if (st_lookup(enc_table.alias, (st_data_t)name, &alias)) {
-	    return (int)alias;
-	}
-    }
     return -1;
 }
 
@@ -368,39 +462,53 @@
     return rb_require_safe(enclib, rb_safe_level());
 }
 
+static int
+load_encoding(const char *name)
+{
+    VALUE enclib = rb_sprintf("enc/%s", name);
+    VALUE verbose = ruby_verbose;
+    VALUE debug = ruby_debug;
+    char *s = RSTRING_PTR(enclib) + 4, *e = RSTRING_END(enclib);
+    while (s < e) {
+	if (!ISALNUM(*s)) *s = '_';
+	else if (ISUPPER(*s)) *s = TOLOWER(*s);
+	++s;
+    }
+    OBJ_FREEZE(enclib);
+    ruby_verbose = Qfalse;
+    ruby_debug = Qfalse;
+    rb_protect(require_enc, enclib, 0);
+    ruby_verbose = verbose;
+    ruby_debug = debug;
+    rb_set_errinfo(Qnil);
+    return rb_enc_registered(name);
+}
+
 int
 rb_enc_find_index(const char *name)
 {
-    int i = rb_enc_registered(name);
+    int i = rb_enc_registered(name), b;
+    rb_encoding *enc;
+    VALUE base;
+
     if (i < 0) {
-	VALUE enclib = rb_sprintf("enc/%s", name);
-	char *s = RSTRING_PTR(enclib) + 4, *e = RSTRING_END(enclib);
-	while (s < e) {
-	    if (!ISALNUM(*s)) *s = '_';
-	    else if (ISUPPER(*s)) *s = TOLOWER(*s);
-	    ++s;
-	}
-	OBJ_FREEZE(enclib);
-	if (RTEST(rb_protect(require_enc, enclib, 0)))
-	    i = rb_enc_registered(name);
-	rb_set_errinfo(Qnil);
+	i = load_encoding(name);
     }
-    if (i < 0) {
-	st_data_t key = (st_data_t)name, orig;
-	if (st_lookup(enc_table.replica_name, key, &orig)) {
-	    i = rb_enc_find_index((char *)orig);
-	    if (i < 0) {
-		rb_raise(rb_eRuntimeError, "unknown original encoding name - '%s' for replica '%s'", (char *)orig, name);
+    else if (enc_autoload_p(enc = rb_enc_from_index(i))) {
+	if (enc_initialized_p(enc) &&
+	    (base = enc_base_encoding(ENC_FROM_ENCODING(enc)), !NIL_P(base))) {
+	    if ((b = enc_check_encoding(base)) < 0) {
+		st_data_t key, val;
+		key = (st_data_t)name;
+		if (st_delete(enc_table.names, &key, &val)) {
+		    if (enc->name != (char *)key) xfree((char *)key);
+		}
+		return -1;
 	    }
-	    i = rb_enc_replicate(name, rb_enc_from_index(i));
-	    st_delete(enc_table.replica_name, &key, &orig);
+	    enc_register_at(i, name, rb_enc_from_index(b));
 	}
-	else if (st_lookup(enc_table.alias_name, key, &orig)) {
-	    i = rb_enc_alias(name, (char *)orig);
-	    if (i < 0) {
-		rb_raise(rb_eRuntimeError, "unknown original encoding name - '%s' for alias '%s'", (char *)orig, name);
-	    }
-	    st_delete(enc_table.alias_name, &key, &orig);
+	else {
+	    i = load_encoding(name);
 	}
     }
     return i;
@@ -777,9 +885,9 @@
 static VALUE
 enc_list(VALUE klass)
 {
-    VALUE ary = rb_ary_new2(enc_table.size);
+    VALUE ary = rb_ary_new2(enc_table.count);
     int i;
-    for (i = 0; i < enc_table.size; ++i) {
+    for (i = 0; i < enc_table.count; ++i) {
 	rb_encoding *enc = enc_table.list[i].enc;
 	if (enc) {
 	    rb_ary_push(ary, rb_enc_from_encoding(enc));
@@ -963,9 +1071,11 @@
 {
     VALUE encoding = rb_enc_from_encoding(enc);
     char *s = (char *)name;
-    int haslower = 0, valid = 0;
+    int haslower = 0, hasupper = 0, valid = 0;
 
+    if (ISDIGIT(*s)) return;
     if (ISUPPER(*s)) {
+	hasupper = 1;
 	while (*++s && (ISALNUM(*s) || *s == '_')) {
 	    if (ISLOWER(*s)) haslower = 1;
 	}
@@ -976,9 +1086,11 @@
     }
     if (!valid || haslower) {
 	int len = strlen(name) + 1;
-	if (!haslower) {
-	    while (!ISLOWER(*s) && *++s);
-	    if (*s) haslower = 1;
+	if (!haslower || !hasupper) {
+	    do {
+		if (ISLOWER(*s)) haslower = 1;
+		if (ISUPPER(*s)) hasupper = 1;
+	    } while (*++s && (!haslower || !hasupper));
 	}
 	MEMCPY(s = ALLOCA_N(char, len), name, char, len);
 	name = s;
@@ -987,7 +1099,9 @@
 	    for (; *s; ++s) {
 		if (!ISALNUM(*s)) *s = '_';
 	    }
-	    rb_define_const(rb_cEncoding, name, encoding);
+	    if (hasupper) {
+		rb_define_const(rb_cEncoding, name, encoding);
+	    }
 	}
 	if (haslower) {
 	    for (s = (char *)name; *s; ++s) {
@@ -999,10 +1113,10 @@
 }
 
 static int
-set_encoding_alias(st_data_t name, st_data_t orig, st_data_t arg)
+rb_enc_name_list_i(st_data_t name, st_data_t idx, st_data_t arg)
 {
-    rb_encoding *enc = rb_enc_from_index((int)orig);
-    set_encoding_const((const char *)name, enc);
+    VALUE ary = (VALUE)arg;
+    rb_ary_push(ary, rb_str_new2((char *)name));
     return ST_CONTINUE;
 }
 
@@ -1025,25 +1139,25 @@
 static VALUE
 rb_enc_name_list(VALUE klass)
 {
-    VALUE ary = rb_ary_new2(enc_name_list_size);
-    int i;
-    for (i = 0; i < enc_name_list_size; i++) {
-	rb_ary_push(ary, rb_str_new2(enc_name_list[i]));
-    }
+    VALUE ary = rb_ary_new2(enc_table.names->num_entries);
+    st_foreach(enc_table.names, rb_enc_name_list_i, (st_data_t)ary);
     return ary;
 }
 
 static int
 rb_enc_aliases_enc_i(st_data_t name, st_data_t orig, st_data_t arg)
 {
-    rb_hash_aset((VALUE)arg, rb_str_new2((char *)name), rb_str_new2(rb_enc_name(rb_enc_from_index((int)orig))));
-    return 0;
-}
+    VALUE *p = (VALUE *)arg;
+    VALUE aliases = p[0];
+    VALUE ary = p[1];
+    int idx = (int)orig;
+    VALUE str = rb_ary_entry(ary, idx);
 
-static int
-rb_enc_aliases_str_i(st_data_t name, st_data_t orig, st_data_t arg)
-{
-    rb_hash_aset((VALUE)arg, rb_str_new2((char *)name), rb_str_new2((char *)orig));
+    if (NIL_P(str)) {
+	str = rb_str_new2(rb_enc_name(rb_enc_from_index(idx)));
+	rb_ary_store(ary, idx, str);
+    }
+    rb_hash_aset(aliases, rb_str_new2((char *)name), str);
     return 0;
 }
 
@@ -1062,10 +1176,11 @@
 static VALUE
 rb_enc_aliases(VALUE klass)
 {
-    VALUE aliases = rb_hash_new();
-    if (enc_table.alias) st_foreach(enc_table.alias, rb_enc_aliases_enc_i, (st_data_t)aliases);
-    st_foreach(enc_table.alias_name, rb_enc_aliases_str_i, (st_data_t)aliases);
-    return aliases;
+    VALUE aliases[2];
+    aliases[0] = rb_hash_new();
+    aliases[1] = rb_ary_new();
+    st_foreach(enc_table.names, rb_enc_aliases_enc_i, (st_data_t)aliases);
+    return aliases[0];
 }
 
 void
@@ -1073,9 +1188,6 @@
 {
     id_base_encoding = rb_intern("#base_encoding");
 
-    enc_table.replica_name = st_init_strcasetable();
-    enc_table.alias_name = st_init_strcasetable();
-
     rb_cEncoding = rb_define_class("Encoding", rb_cObject);
     rb_undef_alloc_func(rb_cEncoding);
     rb_define_method(rb_cEncoding, "to_s", enc_name, 0);
@@ -1096,12 +1208,6 @@
     rb_define_singleton_method(rb_cEncoding, "locale_charmap", rb_locale_charmap, 0);
 
     enc_init_db();
-
-    /* dummy for unsupported, statefull encoding */
-    rb_define_dummy_encoding("ISO-2022-JP");
-    rb_enc_alias("ISO2022-JP", "ISO-2022-JP");
-    rb_define_dummy_encoding("ISO-2022-JP-2");
-    rb_enc_alias("ISO2022-JP2", "ISO-2022-JP-2");
 }
 
 /* locale insensitive functions */
Index: ChangeLog
===================================================================
--- ChangeLog	(revision 15085)
+++ ChangeLog	(revision 15086)
@@ -1,3 +1,17 @@
+Thu Jan 17 23:56:20 2008  Nobuyoshi Nakada  <nobu@r...>
+
+	* common.mk (encdb.h): give output file name to make_encdb.rb.
+
+	* encoding.c (enc_table): simplified.
+
+	* encoding.c (enc_register_at): lazy loading.  [ruby-dev:33013]
+
+	* regenc.h (ENC_DUMMY): added.
+
+	* enc/make_encdb.rb: now emits macros only.
+
+	* enc/iso_2022_jp.h: split from encoding.c.
+
 Thu Jan 17 21:48:21 2008  Nobuyoshi Nakada  <nobu@r...>
 
 	* re.c (rb_char_to_option_kcode): fixed typo.
Index: regenc.h
===================================================================
--- regenc.h	(revision 15085)
+++ regenc.h	(revision 15086)
@@ -202,5 +202,6 @@
 /* macros for define replica encoding and encoding alias */
 #define ENC_REPLICATE(name, orig)
 #define ENC_ALIAS(name, orig)
+#define ENC_DUMMY(name)
 
 #endif /* REGENC_H */
Index: enc/make_encdb.rb
===================================================================
--- enc/make_encdb.rb	(revision 15085)
+++ enc/make_encdb.rb	(revision 15086)
@@ -15,56 +15,59 @@
   end
 end
 
+count = 0
+lines = []
 encodings = []
-replicas = {}
-aliases = {}
 encdir = ARGV[0]
-Dir.open(encdir) {|d| d.grep(/.+\.c\z/)}.sort.each do |fn|
+outhdr = ARGV[1] || 'encdb.h'
+Dir.open(encdir) {|d| d.grep(/.+\.[ch]\z/)}.sort.each do |fn|
   open(File.join(encdir,fn)) do |f|
     orig = nil
     name = nil
     encs = []
     f.each_line do |line|
-      break if /^OnigEncodingDefine/o =~ line
-    end
-    f.each_line do |line|
-      break if /"(.*?)"/ =~ line
-    end
-    if $1
-      check_duplication(encs, $1, fn, $.)
-      encs << $1.upcase
-      encodings << $1 
-      f.each_line do |line|
-	if /^ENC_REPLICATE\(\s*"([^"]+)"\s*,\s*"([^"]+)"/o =~ line
-	  raise ArgumentError,
-	    '%s:%d: ENC_REPLICATE: %s is not defined yet. (replica %s)' %
-	    [fn, $., $2, $1] unless encs.include?($2.upcase)
-	  check_duplication(encs, $1, fn, $.)
-	  encs << $1.upcase
-	  encodings << $1
-	  replicas[$1] = $2
-	elsif /^ENC_ALIAS\(\s*"([^"]+)"\s*,\s*"([^"]+)"/o =~ line
-	  raise ArgumentError,
-	    '%s:%d: ENC_ALIAS: %s is not defined yet. (alias %s)' %
-	    [fn, $., $2, $1] unless encs.include?($2.upcase)
-	  check_duplication(encs, $1, fn, $.)
-	  encodings << $1
-	  aliases[$1] = $2
-	end
+      if (/^OnigEncodingDefine/ =~ line)..(/"(.*?)"/ =~ line)
+        if $1
+          check_duplication(encs, $1, fn, $.)
+          encs << $1.upcase
+          encodings << $1
+          count += 1
+        end
+      else
+        case line
+        when /^\s*rb_enc_register\(\s*"([^"]+)"/
+          count += 1
+          line = nil
+        when /^ENC_REPLICATE\(\s*"([^"]+)"\s*,\s*"([^"]+)"/
+          raise ArgumentError,
+          '%s:%d: ENC_REPLICATE: %s is not defined yet. (replica %s)' %
+            [fn, $., $2, $1] unless encs.include?($2.upcase)
+          count += 1
+        when /^ENC_ALIAS\(\s*"([^"]+)"\s*,\s*"([^"]+)"/
+          raise ArgumentError,
+          '%s:%d: ENC_ALIAS: %s is not defined yet. (alias %s)' %
+            [fn, $., $2, $1] unless encs.include?($2.upcase)
+        when /^ENC_DUMMY\(\s*"([^"]+)"/
+          count += 1
+        else
+          next
+        end
+        check_duplication(encs, $1, fn, $.)
+        encs << $1.upcase
+        lines << line.sub(/;.*/m, ";\n") if line
       end
     end
   end
 end
 
-open('encdb.h', 'wb') do |f|
-  f.puts 'static const char *const enc_name_list[] = {'
-  encodings.each {|name| f.puts'    "%s",' % name}
-  f.puts('};', '', 'static void', 'enc_init_db(void)', '{')
-  replicas.each_pair {|name, orig|
-    f.puts '    ENC_REPLICATE("%s", "%s");' % [name, orig]
-  }
-  aliases.each_pair {|name, orig|
-    f.puts '    ENC_ALIAS("%s", "%s");' % [name, orig]
-  }
-  f.puts '}'
+result = encodings.map {|e| %[ENC_DEFINE("#{e}");\n]}.join + lines.join + 
+  "\n#define ENCODING_COUNT #{count}\n"
+mode = IO::RDWR|IO::CREAT
+mode |= IO::BINARY if defined?(IO::BINARY)
+open(outhdr, mode) do |f|
+  unless f.read == result
+    f.rewind
+    f.truncate(0)
+    f.print result
+  end
 end
Index: enc/iso_2022_jp.h
===================================================================
--- enc/iso_2022_jp.h	(revision 0)
+++ enc/iso_2022_jp.h	(revision 15086)
@@ -0,0 +1,6 @@
+#include "regenc.h"
+/* dummy for unsupported, statefull encoding */
+ENC_DUMMY("ISO-2022-JP");
+ENC_ALIAS("ISO2022-JP", "ISO-2022-JP");
+ENC_REPLICATE("ISO-2022-JP-2", "ISO-2022-JP");
+ENC_ALIAS("ISO2022-JP2", "ISO-2022-JP-2");

Property changes on: enc/iso_2022_jp.h
___________________________________________________________________
Name: svn:eol-style
   + LF
Name: svn:keywords
   + Author Id Revision

Index: common.mk
===================================================================
--- common.mk	(revision 15085)
+++ common.mk	(revision 15086)
@@ -704,7 +704,7 @@
 	$(BASERUBY) -n $(srcdir)/tool/node_name.rb $? > $@
 
 encdb.h: $(srcdir)/enc/make_encdb.rb
-	$(BASERUBY) $(srcdir)/enc/make_encdb.rb $(srcdir)/enc
+	$(BASERUBY) $(srcdir)/enc/make_encdb.rb $(srcdir)/enc $@
 
 miniprelude.c: $(srcdir)/tool/compile_prelude.rb $(srcdir)/prelude.rb
 	$(BASERUBY) -I$(srcdir) $(srcdir)/tool/compile_prelude.rb $(srcdir)/prelude.rb $@

--
ML: ruby-changes@q...
Info: http://www.atdot.net/~ko1/quickml/

[前][次][番号順一覧][スレッド一覧]