ruby-changes:23393
From: nobu <ko1@a...>
Date: Mon, 23 Apr 2012 22:56:34 +0900 (JST)
Subject: [ruby-changes:23393] nobu:r35444 (trunk): * ext/iconv: deprecated. [Feature #6322]
nobu 2012-04-23 22:56:11 +0900 (Mon, 23 Apr 2012) New Revision: 35444 http://svn.ruby-lang.org/cgi-bin/viewvc.cgi?view=rev&revision=35444 Log: * ext/iconv: deprecated. [Feature #6322] Removed files: trunk/ext/iconv/charset_alias.rb trunk/ext/iconv/depend trunk/ext/iconv/extconf.rb trunk/ext/iconv/iconv.c trunk/ext/iconv/mkwrapper.rb trunk/test/iconv/test_basic.rb trunk/test/iconv/test_option.rb trunk/test/iconv/test_partial.rb trunk/test/iconv/utils.rb Modified files: trunk/ChangeLog trunk/ext/.document trunk/ext/Setup trunk/ext/Setup.atheos trunk/ext/Setup.emx trunk/ext/Setup.nt Index: ChangeLog =================================================================== --- ChangeLog (revision 35443) +++ ChangeLog (revision 35444) @@ -1,3 +1,7 @@ +Mon Apr 23 22:56:08 2012 Nobuyoshi Nakada <nobu@r...> + + * ext/iconv: deprecated. [Feature #6322] + Mon Apr 23 22:07:00 2012 Tanaka Akira <akr@f...> * test/socket/test_unix.rb (bound_unix_socket): make temporary Index: ext/Setup.nt =================================================================== --- ext/Setup.nt (revision 35443) +++ ext/Setup.nt (revision 35444) @@ -14,7 +14,6 @@ etc fcntl #gdbm -#iconv #io/wait nkf #openssl Index: ext/.document =================================================================== --- ext/.document (revision 35443) +++ ext/.document (revision 35444) @@ -27,7 +27,6 @@ fiddle/function.c fiddle/lib gdbm/gdbm.c -iconv/iconv.c io/console/console.c io/nonblock/nonblock.c io/wait/lib Index: ext/Setup =================================================================== --- ext/Setup (revision 35443) +++ ext/Setup (revision 35444) @@ -13,7 +13,6 @@ #etc #fcntl #gdbm -#iconv #io/wait #nkf #openssl Index: ext/Setup.atheos =================================================================== --- ext/Setup.atheos (revision 35443) +++ ext/Setup.atheos (revision 35444) @@ -14,7 +14,6 @@ etc fcntl gdbm -iconv io/wait nkf #openssl Index: ext/Setup.emx =================================================================== --- ext/Setup.emx (revision 35443) +++ ext/Setup.emx (revision 35444) @@ -14,7 +14,6 @@ etc fcntl #gdbm -#iconv #io/wait nkf #openssl Index: ext/iconv/depend =================================================================== --- ext/iconv/depend (revision 35443) +++ ext/iconv/depend (revision 35444) @@ -1,2 +0,0 @@ -iconv.o: iconv.c $(hdrdir)/ruby.h $(topdir)/config.h $(hdrdir)/defines.h \ - $(hdrdir)/st.h $(hdrdir)/intern.h $(hdrdir)/encoding.h Index: ext/iconv/iconv.c =================================================================== --- ext/iconv/iconv.c (revision 35443) +++ ext/iconv/iconv.c (revision 35444) @@ -1,1263 +0,0 @@ -/* -*- mode:c; c-file-style:"ruby" -*- */ -/********************************************************************** - - iconv.c - - - $Author$ - created at: Wed Dec 1 20:28:09 JST 1999 - - All the files in this distribution are covered under the Ruby's - license (see the file COPYING). - - Documentation by Yukihiro Matsumoto and Gavin Sinclair. - -**********************************************************************/ - -#include "ruby/ruby.h" -#include <errno.h> -#include <iconv.h> -#include <assert.h> -#include "ruby/st.h" -#include "ruby/encoding.h" - -/* - * Document-class: Iconv - * - * == Summary - * - * Ruby extension for charset conversion. - * - * == Abstract - * - * Iconv is a wrapper class for the UNIX 95 <tt>iconv()</tt> function family, - * which translates string between various encoding systems. - * - * See Open Group's on-line documents for more details. - * * <tt>iconv.h</tt>: http://www.opengroup.org/onlinepubs/007908799/xsh/iconv.h.html - * * <tt>iconv_open()</tt>: http://www.opengroup.org/onlinepubs/007908799/xsh/iconv_open.html - * * <tt>iconv()</tt>: http://www.opengroup.org/onlinepubs/007908799/xsh/iconv.html - * * <tt>iconv_close()</tt>: http://www.opengroup.org/onlinepubs/007908799/xsh/iconv_close.html - * - * Which coding systems are available is platform-dependent. - * - * == Examples - * - * 1. Simple conversion between two charsets. - * - * converted_text = Iconv.conv('iso-8859-15', 'utf-8', text) - * - * 2. Instantiate a new Iconv and use method Iconv#iconv. - * - * cd = Iconv.new(to, from) - * begin - * input.each { |s| output << cd.iconv(s) } - * output << cd.iconv(nil) # Don't forget this! - * ensure - * cd.close - * end - * - * 3. Invoke Iconv.open with a block. - * - * Iconv.open(to, from) do |cd| - * input.each { |s| output << cd.iconv(s) } - * output << cd.iconv(nil) - * end - * - * 4. Shorthand for (3). - * - * Iconv.iconv(to, from, *input.to_a) - * - * == Attentions - * - * Even if some extentions of implementation dependent are useful, - * DON'T USE those extentions in libraries and scripts to widely distribute. - * If you want to use those feature, use String#encode. - */ - -/* Invalid value for iconv_t is -1 but 0 for VALUE, I hope VALUE is - big enough to keep iconv_t */ -#define VALUE2ICONV(v) ((iconv_t)((VALUE)(v) ^ -1)) -#define ICONV2VALUE(c) ((VALUE)(c) ^ -1) - -struct iconv_env_t -{ - iconv_t cd; - int argc; - VALUE *argv; - VALUE ret; - int toidx; - VALUE (*append)_((VALUE, VALUE)); -}; - -struct rb_iconv_opt_t -{ - VALUE transliterate; - VALUE discard_ilseq; -}; - -static ID id_transliterate, id_discard_ilseq; - -static VALUE rb_eIconvInvalidEncoding; -static VALUE rb_eIconvFailure; -static VALUE rb_eIconvIllegalSeq; -static VALUE rb_eIconvInvalidChar; -static VALUE rb_eIconvOutOfRange; -static VALUE rb_eIconvBrokenLibrary; - -static ID rb_success, rb_failed; -static VALUE iconv_fail _((VALUE error, VALUE success, VALUE failed, struct iconv_env_t* env, VALUE mesg)); -static VALUE iconv_fail_retry _((VALUE error, VALUE success, VALUE failed, struct iconv_env_t* env, VALUE mesg)); -static VALUE iconv_failure_initialize _((VALUE error, VALUE mesg, VALUE success, VALUE failed)); -static VALUE iconv_failure_success _((VALUE self)); -static VALUE iconv_failure_failed _((VALUE self)); - -static iconv_t iconv_create _((VALUE to, VALUE from, struct rb_iconv_opt_t *opt, int *idx)); -static void iconv_dfree _((void *cd)); -static VALUE iconv_free _((VALUE cd)); -static VALUE iconv_try _((iconv_t cd, const char **inptr, size_t *inlen, char **outptr, size_t *outlen)); -static VALUE rb_str_derive _((VALUE str, const char* ptr, long len)); -static VALUE iconv_convert _((iconv_t cd, VALUE str, long start, long length, int toidx, - struct iconv_env_t* env)); -static VALUE iconv_s_allocate _((VALUE klass)); -static VALUE iconv_initialize _((int argc, VALUE *argv, VALUE self)); -static VALUE iconv_s_open _((int argc, VALUE *argv, VALUE self)); -static VALUE iconv_s_convert _((struct iconv_env_t* env)); -static VALUE iconv_s_iconv _((int argc, VALUE *argv, VALUE self)); -static VALUE iconv_init_state _((VALUE cd)); -static VALUE iconv_finish _((VALUE self)); -static VALUE iconv_iconv _((int argc, VALUE *argv, VALUE self)); -static VALUE iconv_conv _((int argc, VALUE *argv, VALUE self)); - -static VALUE charset_map; - -/* - * Document-method: charset_map - * call-seq: Iconv.charset_map - * - * Returns the map from canonical name to system dependent name. - */ -static VALUE -charset_map_get(void) -{ - return charset_map; -} - -static VALUE -strip_glibc_option(VALUE *code) -{ - VALUE val = StringValue(*code); - const char *ptr = RSTRING_PTR(val), *pend = RSTRING_END(val); - const char *slash = memchr(ptr, '/', pend - ptr); - - if (slash && slash < pend - 1 && slash[1] == '/') { - VALUE opt = rb_str_subseq(val, slash - ptr, pend - slash); - val = rb_str_subseq(val, 0, slash - ptr); - *code = val; - return opt; - } - return 0; -} - -static char * -map_charset(VALUE *code) -{ - VALUE val = StringValue(*code); - - if (RHASH_SIZE(charset_map)) { - st_data_t data; - VALUE key = rb_funcall2(val, rb_intern("downcase"), 0, 0); - StringValuePtr(key); - if (st_lookup(RHASH_TBL(charset_map), key, &data)) { - *code = (VALUE)data; - } - } - return StringValuePtr(*code); -} - -NORETURN(static void rb_iconv_sys_fail_str(VALUE msg)); -static void -rb_iconv_sys_fail_str(VALUE msg) -{ - if (errno == 0) { - rb_exc_raise(iconv_fail(rb_eIconvBrokenLibrary, Qnil, Qnil, NULL, msg)); - } - rb_sys_fail_str(msg); -} - -#define rb_sys_fail_str(s) rb_iconv_sys_fail_str(s) - -NORETURN(static void rb_iconv_sys_fail(const char *s)); -static void -rb_iconv_sys_fail(const char *s) -{ - rb_iconv_sys_fail_str(rb_str_new_cstr(s)); -} - -#define rb_sys_fail(s) rb_iconv_sys_fail(s) - -static iconv_t -iconv_create(VALUE to, VALUE from, struct rb_iconv_opt_t *opt, int *idx) -{ - VALUE toopt = strip_glibc_option(&to); - VALUE fromopt = strip_glibc_option(&from); - VALUE toenc = 0, fromenc = 0; - const char* tocode = map_charset(&to); - const char* fromcode = map_charset(&from); - iconv_t cd; - int retry = 0; - - *idx = rb_enc_find_index(tocode); - - if (toopt) { - toenc = rb_str_plus(to, toopt); - tocode = RSTRING_PTR(toenc); - } - if (fromopt) { - fromenc = rb_str_plus(from, fromopt); - fromcode = RSTRING_PTR(fromenc); - } - while ((cd = iconv_open(tocode, fromcode)) == (iconv_t)-1) { - int inval = 0; - switch (errno) { - case EMFILE: - case ENFILE: - case ENOMEM: - if (!retry++) { - rb_gc(); - continue; - } - break; - case EINVAL: - retry = 0; - inval = 1; - if (toenc) { - tocode = RSTRING_PTR(to); - rb_str_resize(toenc, 0); - toenc = 0; - continue; - } - if (fromenc) { - fromcode = RSTRING_PTR(from); - rb_str_resize(fromenc, 0); - fromenc = 0; - continue; - } - break; - } - { - const char *s = inval ? "invalid encoding " : "iconv"; - VALUE msg = rb_sprintf("%s(\"%s\", \"%s\")", - s, RSTRING_PTR(to), RSTRING_PTR(from)); - if (!inval) rb_sys_fail_str(msg); - rb_exc_raise(iconv_fail(rb_eIconvInvalidEncoding, Qnil, - rb_ary_new3(2, to, from), NULL, msg)); - } - } - - if (toopt || fromopt) { - if (toopt && fromopt && RTEST(rb_str_equal(toopt, fromopt))) { - fromopt = 0; - } - if (toopt && fromopt) { - rb_warning("encoding option isn't portable: %s, %s", - RSTRING_PTR(toopt) + 2, RSTRING_PTR(fromopt) + 2); - } - else { - rb_warning("encoding option isn't portable: %s", - (toopt ? RSTRING_PTR(toopt) : RSTRING_PTR(fromopt)) + 2); - } - } - - if (opt) { -#ifdef ICONV_SET_TRANSLITERATE - if (opt->transliterate != Qundef) { - int flag = RTEST(opt->transliterate); - rb_warning("encoding option isn't portable: transliterate"); - if (iconvctl(cd, ICONV_SET_TRANSLITERATE, (void *)&flag)) - rb_sys_fail("ICONV_SET_TRANSLITERATE"); - } -#endif -#ifdef ICONV_SET_DISCARD_ILSEQ - if (opt->discard_ilseq != Qundef) { - int flag = RTEST(opt->discard_ilseq); - rb_warning("encoding option isn't portable: discard_ilseq"); - if (iconvctl(cd, ICONV_SET_DISCARD_ILSEQ, (void *)&flag)) - rb_sys_fail("ICONV_SET_DISCARD_ILSEQ"); - } -#endif - } - - return cd; -} - -static void -iconv_dfree(void *cd) -{ - iconv_close(VALUE2ICONV(cd)); -} - -#define ICONV_FREE iconv_dfree - -static VALUE -iconv_free(VALUE cd) -{ - if (cd && iconv_close(VALUE2ICONV(cd)) == -1) - rb_sys_fail("iconv_close"); - return Qnil; -} - -static VALUE -check_iconv(VALUE obj) -{ - Check_Type(obj, T_DATA); - if (RDATA(obj)->dfree != ICONV_FREE) { - rb_raise(rb_eArgError, "Iconv expected (%s)", rb_class2name(CLASS_OF(obj))); - } - return (VALUE)DATA_PTR(obj); -} - -static VALUE -iconv_try(iconv_t cd, const char **inptr, size_t *inlen, char **outptr, size_t *outlen) -{ -#ifdef ICONV_INPTR_CONST -#define ICONV_INPTR_CAST -#else -#define ICONV_INPTR_CAST (char **) -#endif - size_t ret; - - errno = 0; - ret = iconv(cd, ICONV_INPTR_CAST inptr, inlen, outptr, outlen); - if (ret == (size_t)-1) { - if (!*inlen) - return Qfalse; - switch (errno) { - case E2BIG: - /* try the left in next loop */ - break; - case EILSEQ: - return rb_eIconvIllegalSeq; - case EINVAL: - return rb_eIconvInvalidChar; - case 0: - return rb_eIconvBrokenLibrary; - default: - rb_sys_fail("iconv"); - } - } - else if (*inlen > 0) { - /* something goes wrong */ - return rb_eIconvIllegalSeq; - } - else if (ret) { - return Qnil; /* conversion */ - } - return Qfalse; -} - -#define FAILED_MAXLEN 16 - -static VALUE -iconv_failure_initialize(VALUE error, VALUE mesg, VALUE success, VALUE failed) -{ - rb_call_super(1, &mesg); - rb_ivar_set(error, rb_success, success); - rb_ivar_set(error, rb_failed, failed); - return error; -} - -static VALUE -iconv_fail(VALUE error, VALUE success, VALUE failed, struct iconv_env_t* env, VALUE mesg) -{ - VALUE args[3]; - - if (!NIL_P(mesg)) { - args[0] = mesg; - } - else if (TYPE(failed) != T_STRING || RSTRING_LEN(failed) < FAILED_MAXLEN) { - args[0] = rb_inspect(failed); - } - else { - args[0] = rb_inspect(rb_str_substr(failed, 0, FAILED_MAXLEN)); - rb_str_cat2(args[0], "..."); - } - args[1] = success; - args[2] = failed; - if (env) { - args[1] = env->append(rb_obj_dup(env->ret), success); - if (env->argc > 0) { - *(env->argv) = failed; - args[2] = rb_ary_new4(env->argc, env->argv); - } - } - return rb_class_new_instance(3, args, error); -} - -static VALUE -iconv_fail_retry(VALUE error, VALUE success, VALUE failed, struct iconv_env_t* env, VALUE mesg) -{ - error = iconv_fail(error, success, failed, env, mesg); - if (!rb_block_given_p()) rb_exc_raise(error); - rb_set_errinfo(error); - return rb_yield(failed); -} - -static VALUE -rb_str_derive(VALUE str, const char* ptr, long len) -{ - VALUE ret; - - if (NIL_P(str)) - return rb_str_new(ptr, len); - if (RSTRING_PTR(str) + RSTRING_LEN(str) == ptr + len) - ret = rb_str_subseq(str, ptr - RSTRING_PTR(str), len); - else - ret = rb_str_new(ptr, len); - OBJ_INFECT(ret, str); - return ret; -} - -static VALUE -iconv_convert(iconv_t cd, VALUE str, long start, long length, int toidx, struct iconv_env_t* env) -{ - VALUE ret = Qfalse; - VALUE error = Qfalse; - VALUE rescue; - const char *inptr, *instart; - size_t inlen; - /* I believe ONE CHARACTER never exceed this. */ - char buffer[BUFSIZ]; - char *outptr; - size_t outlen; - - if (cd == (iconv_t)-1) - rb_raise(rb_eArgError, "closed iconv"); - - if (NIL_P(str)) { - /* Reset output pointer or something. */ - inptr = ""; - inlen = 0; - outptr = buffer; - outlen = sizeof(buffer); - error = iconv_try(cd, &inptr, &inlen, &outptr, &outlen); - if (RTEST(error)) { - unsigned int i; - rescue = iconv_fail_retry(error, Qnil, Qnil, env, Qnil); - if (TYPE(rescue) == T_ARRAY) { - str = RARRAY_LEN(rescue) > 0 ? RARRAY_PTR(rescue)[0] : Qnil; - } - if (FIXNUM_P(str) && (i = FIX2INT(str)) <= 0xff) { - char c = i; - str = rb_str_new(&c, 1); - } - else if (!NIL_P(str)) { - StringValue(str); - } - } - - inptr = NULL; - length = 0; - } - else { - long slen; - - StringValue(str); - slen = RSTRING_LEN(str); - inptr = RSTRING_PTR(str); - - inptr += start; - if (length < 0 || length > start + slen) - length = slen - start; - } - instart = inptr; - inlen = length; - - do { - VALUE errmsg = Qnil; - const char *tmpstart = inptr; - outptr = buffer; - outlen = sizeof(buffer); - - error = iconv_try(cd, &inptr, &inlen, &outptr, &outlen); - - if ( -#if SIGNEDNESS_OF_SIZE_T < 0 - 0 <= outlen && -#endif - outlen <= sizeof(buffer)) { - outlen = sizeof(buffer) - outlen; - if (NIL_P(error) || /* something converted */ - outlen > (size_t)(inptr - tmpstart) || /* input can't contain output */ - (outlen < (size_t)(inptr - tmpstart) && inlen > 0) || /* something skipped */ - memcmp(buffer, tmpstart, outlen)) /* something differs */ - { - if (NIL_P(str)) { - ret = rb_str_new(buffer, outlen); - if (toidx >= 0) rb_enc_associate_index(ret, toidx); - } - else { - if (ret) { - ret = rb_str_buf_cat(ret, instart, tmpstart - instart); - } - else { - ret = rb_str_new(instart, tmpstart - instart); - if (toidx >= 0) rb_enc_associate_index(ret, toidx); - OBJ_INFECT(ret, str); - } - ret = rb_str_buf_cat(ret, buffer, outlen); - instart = inptr; - } - } - else if (!inlen) { - inptr = tmpstart + outlen; - } - } - else { - /* Some iconv() have a bug, return *outlen out of range */ - errmsg = rb_sprintf("bug?(output length = %ld)", (long)(sizeof(buffer) - outlen)); - error = rb_eIconvOutOfRange; - } - - if (RTEST(error)) { - long len = 0; - - if (!ret) { - ret = rb_str_derive(str, instart, inptr - instart); - if (toidx >= 0) rb_enc_associate_index(ret, toidx); - } - else if (inptr > instart) { - rb_str_cat(ret, instart, inptr - instart); - } - str = rb_str_derive(str, inptr, inlen); - rescue = iconv_fail_retry(error, ret, str, env, errmsg); - if (TYPE(rescue) == T_ARRAY) { - if ((len = RARRAY_LEN(rescue)) > 0) - rb_str_concat(ret, RARRAY_PTR(rescue)[0]); - if (len > 1 && !NIL_P(str = RARRAY_PTR(rescue)[1])) { - StringValue(str); - inlen = length = RSTRING_LEN(str); - instart = inptr = RSTRING_PTR(str); - continue; - } - } - else if (!NIL_P(rescue)) { - rb_str_concat(ret, rescue); - } - break; - } - } while (inlen > 0); - - if (!ret) { - ret = rb_str_derive(str, instart, inptr - instart); - if (toidx >= 0) rb_enc_associate_index(ret, toidx); - } - else if (inptr > instart) { - rb_str_cat(ret, instart, inptr - instart); - } - return ret; -} - -static VALUE -iconv_s_allocate(VALUE klass) -{ - return Data_Wrap_Struct(klass, 0, ICONV_FREE, 0); -} - -static VALUE -get_iconv_opt_i(VALUE i, VALUE arg) -{ - VALUE name; -#if defined ICONV_SET_TRANSLITERATE || defined ICONV_SET_DISCARD_ILSEQ - VALUE val; - struct rb_iconv_opt_t *opt = (struct rb_iconv_opt_t *)arg; -#endif - - i = rb_Array(i); - name = rb_ary_entry(i, 0); -#if defined ICONV_SET_TRANSLITERATE || defined ICONV_SET_DISCARD_ILSEQ - val = rb_ary_entry(i, 1); -#endif - do { - if (SYMBOL_P(name)) { - ID id = SYM2ID(name); - if (id == id_transliterate) { -#ifdef ICONV_SET_TRANSLITERATE - opt->transliterate = val; -#else - rb_notimplement(); -#endif - break; - } - if (id == id_discard_ilseq) { -#ifdef ICONV_SET_DISCARD_ILSEQ - opt->discard_ilseq = val; -#else - rb_notimplement(); -#endif - break; - } - } - else { - const char *s = StringValueCStr(name); - if (strcmp(s, "transliterate") == 0) { -#ifdef ICONV_SET_TRANSLITERATE - opt->transliterate = val; -#else - rb_notimplement(); -#endif - break; - } - if (strcmp(s, "discard_ilseq") == 0) { -#ifdef ICONV_SET_DISCARD_ILSEQ - opt->discard_ilseq = val; -#else - rb_notimplement(); -#endif - break; - } - } - name = rb_inspect(name); - rb_raise(rb_eArgError, "unknown option - %s", StringValueCStr(name)); - } while (0); - return Qnil; -} - -static void -get_iconv_opt(struct rb_iconv_opt_t *opt, VALUE options) -{ - opt->transliterate = Qundef; - opt->discard_ilseq = Qundef; - if (!NIL_P(options)) { - rb_block_call(options, rb_intern("each"), 0, 0, get_iconv_opt_i, (VALUE)opt); - } -} - -#define iconv_ctl(self, func, val) (\ - iconvctl(VALUE2ICONV(check_iconv(self)), func, (void *)&(val)) ? \ - rb_sys_fail(#func) : (void)0) - -/* - * Document-method: new - * call-seq: Iconv.new(to, from, [options]) - * - * Creates new code converter from a coding-system designated with +from+ - * to another one designated with +to+. - * - * === Parameters - * - * +to+:: encoding name for destination - * +from+:: encoding name for source - * +options+:: options for converter - * - * === Exceptions - * - * TypeError:: if +to+ or +from+ aren't String - * InvalidEncoding:: if designated converter couldn't find out - * SystemCallError:: if <tt>iconv_open(3)</tt> fails - */ -static VALUE -iconv_initialize(int argc, VALUE *argv, VALUE self) -{ - VALUE to, from, options; - struct rb_iconv_opt_t opt; - int idx; - - rb_scan_args(argc, argv, "21", &to, &from, &options); - get_iconv_opt(&opt, options); - iconv_free(check_iconv(self)); - DATA_PTR(self) = NULL; - DATA_PTR(self) = (void *)ICONV2VALUE(iconv_create(to, from, &opt, &idx)); - if (idx >= 0) ENCODING_SET(self, idx); - re (... truncated) -- ML: ruby-changes@q... Info: http://www.atdot.net/~ko1/quickml/