ruby-changes:7128
From: akr <ko1@a...>
Date: Fri, 15 Aug 2008 20:02:25 +0900 (JST)
Subject: [ruby-changes:7128] Ruby:r18646 (trunk): * include/ruby/encoding.h (rb_econv_output): add str_encoding
akr 2008-08-15 20:02:07 +0900 (Fri, 15 Aug 2008) New Revision: 18646 http://svn.ruby-lang.org/cgi-bin/viewvc.cgi?view=rev&revision=18646 Log: * include/ruby/encoding.h (rb_econv_output): add str_encoding argument. * transcode.c (get_replacement_character): add repl_enc_ptr argument. (rb_econv_output_with_destination_encoding): renamed from rb_econv_output and make it static. (rb_econv_output): convert str and call rb_econv_output_with_destination_encoding. (output_replacement_character): follow above interface change. Modified files: trunk/ChangeLog trunk/include/ruby/encoding.h trunk/transcode.c Index: include/ruby/encoding.h =================================================================== --- include/ruby/encoding.h (revision 18645) +++ include/ruby/encoding.h (revision 18646) @@ -247,10 +247,13 @@ const unsigned char **source_buffer_ptr, const unsigned char *source_buffer_end, unsigned char **destination_buffer_ptr, unsigned char *destination_buffer_end, int flags); + +/* result: 0:success -1:failure -2:conversion-failure-to-destination-encoding */ int rb_econv_output(rb_econv_t *ec, - const unsigned char *str, size_t len, + const unsigned char *str, size_t len, const char *str_encoding, unsigned char **destination_buffer_ptr, unsigned char *destination_buffer_end, size_t *required_size); + void rb_econv_close(rb_econv_t *ec); /* flags for rb_econv_open */ Index: ChangeLog =================================================================== --- ChangeLog (revision 18645) +++ ChangeLog (revision 18646) @@ -1,3 +1,15 @@ +Fri Aug 15 19:57:01 2008 Tanaka Akira <akr@f...> + + * include/ruby/encoding.h (rb_econv_output): add str_encoding + argument. + + * transcode.c (get_replacement_character): add repl_enc_ptr argument. + (rb_econv_output_with_destination_encoding): renamed from + rb_econv_output and make it static. + (rb_econv_output): convert str and call + rb_econv_output_with_destination_encoding. + (output_replacement_character): follow above interface change. + Fri Aug 15 19:33:57 2008 Nobuyoshi Nakada <nobu@r...> * string.c (rb_str_drop_bytes): use memmove. Index: transcode.c =================================================================== --- transcode.c (revision 18645) +++ transcode.c (revision 18646) @@ -266,7 +266,7 @@ } static const char* -get_replacement_character(rb_encoding *enc, int *len_ret) +get_replacement_character(rb_encoding *enc, int *len_ret, const char **repl_enc_ptr) { static rb_encoding *utf16be_encoding, *utf16le_encoding; static rb_encoding *utf32be_encoding, *utf32le_encoding; @@ -278,26 +278,32 @@ } if (rb_utf8_encoding() == enc) { *len_ret = 3; + *repl_enc_ptr = "UTF-8"; return "\xEF\xBF\xBD"; } else if (utf16be_encoding == enc) { *len_ret = 2; + *repl_enc_ptr = "UTF-16BE"; return "\xFF\xFD"; } else if (utf16le_encoding == enc) { *len_ret = 2; + *repl_enc_ptr = "UTF-16LE"; return "\xFD\xFF"; } else if (utf32be_encoding == enc) { *len_ret = 4; + *repl_enc_ptr = "UTF-32BE"; return "\x00\x00\xFF\xFD"; } else if (utf32le_encoding == enc) { *len_ret = 4; + *repl_enc_ptr = "UTF-32LE"; return "\xFD\xFF\x00\x00"; } else { *len_ret = 1; + *repl_enc_ptr = "US-ASCII"; return "?"; } } @@ -962,8 +968,9 @@ return res; } -int -rb_econv_output(rb_econv_t *ec, +/* result: 0:success -1:failure */ +static int +rb_econv_output_with_destination_encoding(rb_econv_t *ec, const unsigned char *str, size_t len, /* string in destination encoding */ unsigned char **destination_buffer_ptr, unsigned char *destination_buffer_end, size_t *required_size) @@ -982,7 +989,7 @@ * Currently the replacement character for stateful encoding such as * ISO-2022-JP is "?" and it has no state changing sequence. * So the extra state changing sequence don't occur when - * rb_econv_output is used for replacement characters. + * rb_econv_output_with_destination_encoding is used for replacement characters. * * Thease assumption may be removed in future. * It needs to scan str to check state changing sequences in it. @@ -1014,6 +1021,77 @@ return 0; } +/* result: 0:success -1:failure -2:conversion-failure-to-destination-encoding */ +int +rb_econv_output(rb_econv_t *ec, + const unsigned char *str, size_t str_len, const char *str_encoding, + unsigned char **destination_buffer_ptr, unsigned char *destination_buffer_end, + size_t *required_size) +{ + rb_econv_t *from_ascii = NULL;; + unsigned char buf[1024], *buf2; + size_t dst_len; + const unsigned char *src_ptr; + unsigned char *dst_ptr; + rb_econv_result_t res; + int ret; + + if (encoding_equal(str_encoding, ec->last_tc->transcoder->to_encoding)) { + return rb_econv_output_with_destination_encoding(ec, str, str_len, destination_buffer_ptr, destination_buffer_end, required_size); + } + + if (required_size) + *required_size = 0; + + from_ascii = rb_econv_open(str_encoding, ec->last_tc->transcoder->to_encoding, 0); + if (!from_ascii) + return -2; + + src_ptr = str; + dst_len = 0; + do { + dst_ptr = buf; + res = rb_econv_convert(from_ascii, &src_ptr, str+str_len, &dst_ptr, buf+sizeof(buf), 0); + if (dst_len + (dst_ptr - buf) < dst_len) + goto convfail; + dst_len += dst_ptr - buf; + } while (res == econv_destination_buffer_full); + + if (res != econv_finished) + goto convfail; + + rb_econv_close(from_ascii); + from_ascii = NULL; + + if (dst_len <= sizeof(buf)) { + return rb_econv_output_with_destination_encoding(ec, buf, dst_len, destination_buffer_ptr, destination_buffer_end, required_size); + } + + buf2 = xmalloc(dst_len); + + from_ascii = rb_econv_open(str_encoding, ec->last_tc->transcoder->to_encoding, 0); + if (!from_ascii) + goto convfail; + + src_ptr = str; + dst_ptr = buf2; + res = rb_econv_convert(from_ascii, &src_ptr, str+str_len, &dst_ptr, buf2+dst_len, 0); + if (res != econv_finished) + goto convfail; + rb_econv_close(from_ascii); + from_ascii = NULL; + + ret = rb_econv_output_with_destination_encoding(ec, buf2, dst_len, destination_buffer_ptr, destination_buffer_end, required_size); + + xfree(buf2); + return ret; + +convfail: + if (from_ascii) + rb_econv_close(from_ascii); + return -2; +} + void rb_econv_close(rb_econv_t *ec) { @@ -1059,15 +1137,18 @@ const rb_transcoder *tr; rb_encoding *enc; const unsigned char *replacement; + const char *repl_enc; int len; size_t required_size; + int ret; tr = tc->transcoder; enc = rb_enc_find(tr->to_encoding); - replacement = (const unsigned char *)get_replacement_character(enc, &len); + replacement = (const unsigned char *)get_replacement_character(enc, &len, &repl_enc); - if (rb_econv_output(ec, replacement, len, out_pos, *out_stop_ptr, &required_size) == 0) + ret = rb_econv_output(ec, replacement, len, repl_enc, out_pos, *out_stop_ptr, &required_size); + if (ret == 0) return 0; if (required_size < len) @@ -1075,7 +1156,8 @@ more_output_buffer(destination, resize_destination, required_size, out_start_ptr, out_pos, out_stop_ptr); - if (rb_econv_output(ec, replacement, len, out_pos, *out_stop_ptr, &required_size) == 0) + ret = rb_econv_output(ec, replacement, len, repl_enc, out_pos, *out_stop_ptr, &required_size); + if (ret == 0) return 0; return -1; -- ML: ruby-changes@q... Info: http://www.atdot.net/~ko1/quickml/