ruby-changes:8363
From: matz <ko1@a...>
Date: Thu, 23 Oct 2008 01:54:10 +0900 (JST)
Subject: [ruby-changes:8363] Ruby:r19892 (trunk): * string.c (rb_str_conv_enc_opts): new function to convert with
matz 2008-10-23 01:53:50 +0900 (Thu, 23 Oct 2008) New Revision: 19892 http://svn.ruby-lang.org/cgi-bin/viewvc.cgi?view=rev&revision=19892 Log: * string.c (rb_str_conv_enc_opts): new function to convert with specifying ecflags and ecopts. * ext/zlib/zlib.c (gzfile_newstr): specify ecflags and ecopts for conversion using above function. * ext/zlib/zlib.c (gzfile_newstr): use own rb_econv_t for dummy encoding to handling stateful encoding (e.g. iso-2022-jp). [ruby-dev:36857] * ext/zlib/zlib.c (gzfile_getc): ditto. Modified files: trunk/ChangeLog trunk/ext/zlib/zlib.c trunk/string.c Index: ChangeLog =================================================================== --- ChangeLog (revision 19891) +++ ChangeLog (revision 19892) @@ -1,3 +1,17 @@ +Thu Oct 23 01:26:25 2008 Yukihiro Matsumoto <matz@r...> + + * string.c (rb_str_conv_enc_opts): new function to convert with + specifying ecflags and ecopts. + + * ext/zlib/zlib.c (gzfile_newstr): specify ecflags and ecopts for + conversion using above function. + + * ext/zlib/zlib.c (gzfile_newstr): use own rb_econv_t for dummy + encoding to handling stateful encoding (e.g. iso-2022-jp). + [ruby-dev:36857] + + * ext/zlib/zlib.c (gzfile_getc): ditto. + Thu Oct 23 01:24:49 2008 Nobuyoshi Nakada <nobu@r...> * lib/mkmf.rb (create_tmpsrc): get rid of side effects. Index: string.c =================================================================== --- string.c (revision 19891) +++ string.c (revision 19892) @@ -473,7 +473,7 @@ #define rb_tainted_str_new2 rb_tainted_str_new_cstr VALUE -rb_str_conv_enc(VALUE str, rb_encoding *from, rb_encoding *to) +rb_str_conv_enc_opts(VALUE str, rb_encoding *from, rb_encoding *to, int ecflags, VALUE ecopts) { rb_econv_t *ec; rb_econv_result_t ret; @@ -497,7 +497,7 @@ newstr = rb_str_new(0, len); retry: - ec = rb_econv_open_opts(from->name, to->name, 0, Qnil); + ec = rb_econv_open_opts(from->name, to->name, ecflags, ecopts); if (!ec) return str; sp = (unsigned char*)RSTRING_PTR(str); @@ -525,6 +525,12 @@ } VALUE +rb_str_conv_enc(VALUE str, rb_encoding *from, rb_encoding *to) +{ + return rb_str_conv_enc_opts(str, from, to, 0, Qnil); +} + +VALUE rb_external_str_new_with_enc(const char *ptr, long len, rb_encoding *eenc) { VALUE str; Index: ext/zlib/zlib.c =================================================================== --- ext/zlib/zlib.c (revision 19891) +++ ext/zlib/zlib.c (revision 19892) @@ -194,6 +194,7 @@ void Init_zlib(void); int rb_io_extract_encoding_option(VALUE opt, rb_encoding **enc_p, rb_encoding **enc2_p); +VALUE rb_str_conv_enc_opts(VALUE, rb_encoding*, rb_encoding*, int, VALUE); /*--------- Exceptions --------*/ @@ -540,7 +541,7 @@ return zstream_detach_buffer(z); } - dst = rb_str_substr(z->buf, 0, len); + dst = rb_str_subseq(z->buf, 0, len); RBASIC(dst)->klass = rb_cString; z->buf_filled -= len; memmove(RSTRING_PTR(z->buf), RSTRING_PTR(z->buf) + len, @@ -1670,7 +1671,12 @@ void (*end)(struct gzfile *); rb_encoding *enc; rb_encoding *enc2; + rb_econv_t *ec; + int ecflags; + VALUE ecopts; + char *cbuf; }; +#define GZFILE_CBUF_CAPA 10 #define GZFILE_FLAG_SYNC ZSTREAM_FLAG_UNUSED #define GZFILE_FLAG_HEADER_FINISHED (ZSTREAM_FLAG_UNUSED << 1) @@ -1689,6 +1695,7 @@ rb_gc_mark(gz->orig_name); rb_gc_mark(gz->comment); zstream_mark(&gz->z); + rb_gc_mark(gz->ecopts); } static void @@ -1702,6 +1709,9 @@ } zstream_finalize(z); } + if (gz->cbuf) { + xfree(gz->cbuf); + } xfree(gz); } @@ -1728,6 +1738,10 @@ gz->end = endfunc; gz->enc = rb_default_external_encoding(); gz->enc2 = 0; + gz->ec = NULL; + gz->ecflags = 0; + gz->ecopts = Qnil; + gz->cbuf = 0; return obj; } @@ -1742,6 +1756,11 @@ gz->crc = crc32(0, Z_NULL, 0); gz->lineno = 0; gz->ungetc = 0; + if (gz->ec) { + rb_econv_close(gz->ec); + gz->ec = rb_econv_open_opts(gz->enc2->name, gz->enc->name, + gz->ecflags, gz->ecopts); + } } static void @@ -2076,12 +2095,19 @@ static VALUE gzfile_newstr(struct gzfile *gz, VALUE str) { - OBJ_TAINT(str); /* for safe */ - if (gz->enc && !gz->enc2) { + if (!gz->enc2) { rb_enc_associate(str, gz->enc); + OBJ_TAINT(str); /* for safe */ return str; } - return rb_str_conv_enc(str, gz->enc2, gz->enc); + if (gz->ec && rb_enc_dummy_p(gz->enc2)) { + str = rb_econv_str_convert(gz->ec, str, ECONV_PARTIAL_INPUT); + rb_enc_associate(str, gz->enc); + OBJ_TAINT(str); + return str; + } + return rb_str_conv_enc_opts(str, gz->enc2, gz->enc, + gz->ecflags, gz->ecopts); } static VALUE @@ -2105,7 +2131,7 @@ dst = zstream_shift_buffer(&gz->z, len); gzfile_calc_crc(gz, dst); - return gzfile_newstr(gz, dst); + return dst; } static VALUE @@ -2142,15 +2168,13 @@ dst = zstream_shift_buffer(&gz->z, len); gzfile_calc_crc(gz, dst); - if (NIL_P(outbuf)) { - OBJ_TAINT(dst); /* for safe */ - } - else { + if (!NIL_P(outbuf)) { rb_str_resize(outbuf, RSTRING_LEN(dst)); memcpy(RSTRING_PTR(outbuf), RSTRING_PTR(dst), RSTRING_LEN(dst)); dst = outbuf; } - return gzfile_newstr(gz, dst); + OBJ_TAINT(dst); /* for safe */ + return dst; } static VALUE @@ -2170,13 +2194,14 @@ dst = zstream_detach_buffer(&gz->z); gzfile_calc_crc(gz, dst); - return gzfile_newstr(gz, dst); + OBJ_TAINT(dst); + return dst; } static VALUE gzfile_getc(struct gzfile *gz) { - VALUE buf, dst; + VALUE buf, dst = 0; int len; len = rb_enc_mbmaxlen(gz->enc); @@ -2190,11 +2215,33 @@ return Qnil; } - buf = gz->z.buf; - len = rb_enc_mbclen(RSTRING_PTR(buf), RSTRING_PTR(buf)+len, gz->enc); - dst = zstream_shift_buffer(&gz->z, len); - gzfile_calc_crc(gz, dst); - return gzfile_newstr(gz, dst); + if (gz->ec && rb_enc_dummy_p(gz->enc2)) { + const unsigned char *ss, *sp, *se; + unsigned char *ds, *dp, *de; + rb_econv_result_t res; + + if (!gz->cbuf) { + gz->cbuf = ALLOC_N(char, GZFILE_CBUF_CAPA); + } + ss = sp = (const unsigned char*)RSTRING_PTR(gz->z.buf); + se = sp + gz->z.buf_filled; + ds = dp = (unsigned char *)gz->cbuf; + de = (unsigned char *)ds + GZFILE_CBUF_CAPA; + res = rb_econv_convert(gz->ec, &sp, se, &dp, de, ECONV_PARTIAL_INPUT|ECONV_AFTER_OUTPUT); + rb_econv_check_error(gz->ec); + dst = zstream_shift_buffer(&gz->z, sp - ss); + gzfile_calc_crc(gz, dst); + dst = rb_str_new(gz->cbuf, dp - ds); + rb_enc_associate(dst, gz->enc); + OBJ_TAINT(dst); + return dst; + } + else { + buf = gz->z.buf; + len = rb_enc_mbclen(RSTRING_PTR(buf), RSTRING_END(buf), gz->enc); + dst = gzfile_read(gz, len); + return gzfile_newstr(gz, dst); + } } static void @@ -2624,6 +2671,20 @@ } +static void +rb_gzfile_ecopts(struct gzfile *gz, VALUE opts) +{ + if (!NIL_P(opts)) { + rb_io_extract_encoding_option(opts, &gz->enc, &gz->enc2); + } + if (gz->enc2) { + gz->ecflags = rb_econv_prepare_opts(opts, &opts); + gz->ec = rb_econv_open_opts(gz->enc2->name, gz->enc->name, + gz->ecflags, opts); + gz->ecopts = opts; + } +} + /* ------------------------------------------------------------------------- */ /* @@ -2704,9 +2765,7 @@ } gz->io = io; ZSTREAM_READY(&gz->z); - if (!NIL_P(opt)) { - rb_io_extract_encoding_option(opt, &gz->enc, &gz->enc2); - } + rb_gzfile_ecopts(gz, opt); return obj; } @@ -2901,9 +2960,7 @@ gz->io = io; ZSTREAM_READY(&gz->z); gzfile_read_header(gz); - if (!NIL_P(opt)) { - rb_io_extract_encoding_option(opt, &gz->enc, &gz->enc2); - } + rb_gzfile_ecopts(gz, opt); return obj; } @@ -3214,7 +3271,7 @@ gzreader_skip_linebreaks(gz); } - return dst; + return gzfile_newstr(gz, dst); } /* -- ML: ruby-changes@q... Info: http://www.atdot.net/~ko1/quickml/