ruby-changes:7728
From: akr <ko1@a...>
Date: Mon, 8 Sep 2008 23:33:42 +0900 (JST)
Subject: [ruby-changes:7728] Ruby:r19249 (trunk): * include/ruby/encoding.h (rb_econv_asciicompat_encoding): renamed
akr 2008-09-08 23:33:17 +0900 (Mon, 08 Sep 2008) New Revision: 19249 http://svn.ruby-lang.org/cgi-bin/viewvc.cgi?view=rev&revision=19249 Log: * include/ruby/encoding.h (rb_econv_asciicompat_encoding): renamed from rb_econv_stateless_encoding to apply stateless ASCII incompatible encodings such as UTF-16BE. * io.c (make_writeconv): use rb_econv_asciicompat_encoding. * transcode_data.h (rb_transcoder_asciicompat_type_t): renamed from rb_transcoder_stateful_type_t. (rb_transcoder): use rb_transcoder_asciicompat_type_t. * transcode.c: follow the type change. (asciicompat_encoding_i): renamed from stateless_encoding_i. (rb_econv_asciicompat_encoding): renamed from rb_econv_stateless_encoding. (econv_s_asciicompat_encoding): method renamed. * tool/transcode-tblgen.rb: follow the type change. * enc/trans/utf_16_32.trans: follow the type change. rb_from_UTF_16BE to UTF-8 is asciicompat_decoder. rb_from_UTF_16LE to UTF-8 is asciicompat_decoder. rb_from_UTF_32BE to UTF-8 is asciicompat_decoder. rb_from_UTF_32LE to UTF-8 is asciicompat_decoder. UTF-8 to rb_to_UTF_16BE is asciicompat_encoder. UTF-8 to rb_to_UTF_16LE is asciicompat_encoder. UTF-8 to rb_to_UTF_32BE is asciicompat_encoder. UTF-8 to rb_to_UTF_32LE is asciicompat_encoder. * enc/trans/newline.trans: follow the type change. universal newline decoder is asciicompat_converter. * enc/trans/escape.trans: follow the type change. * enc/trans/iso2022.trans: ditto. * enc/trans/japanese.trans: ditto. Modified files: trunk/ChangeLog trunk/enc/trans/escape.trans trunk/enc/trans/iso2022.trans trunk/enc/trans/japanese.trans trunk/enc/trans/newline.trans trunk/enc/trans/utf_16_32.trans trunk/include/ruby/encoding.h trunk/io.c trunk/test/ruby/test_econv.rb trunk/tool/transcode-tblgen.rb trunk/transcode.c trunk/transcode_data.h Index: include/ruby/encoding.h =================================================================== --- include/ruby/encoding.h (revision 19248) +++ include/ruby/encoding.h (revision 19249) @@ -239,8 +239,9 @@ int rb_econv_putbackable(rb_econv_t *ec); void rb_econv_putback(rb_econv_t *ec, unsigned char *p, int n); -/* returns corresponding stateless encoding, or NULL if not stateful. */ -const char *rb_econv_stateless_encoding(const char *stateful_enc); +/* returns the corresponding ASCII compatible encoding for encname, + * or NULL if encname is not ASCII incompatible encoding. */ +const char *rb_econv_asciicompat_encoding(const char *encname); VALUE rb_econv_str_convert(rb_econv_t *ec, VALUE src, int flags); VALUE rb_econv_substr_convert(rb_econv_t *ec, VALUE src, long byteoff, long bytesize, int flags); Index: ChangeLog =================================================================== --- ChangeLog (revision 19248) +++ ChangeLog (revision 19249) @@ -1,3 +1,42 @@ +Mon Sep 8 23:24:54 2008 Tanaka Akira <akr@f...> + + * include/ruby/encoding.h (rb_econv_asciicompat_encoding): renamed + from rb_econv_stateless_encoding to apply stateless ASCII + incompatible encodings such as UTF-16BE. + + * io.c (make_writeconv): use rb_econv_asciicompat_encoding. + + * transcode_data.h (rb_transcoder_asciicompat_type_t): renamed from + rb_transcoder_stateful_type_t. + (rb_transcoder): use rb_transcoder_asciicompat_type_t. + + * transcode.c: follow the type change. + (asciicompat_encoding_i): renamed from stateless_encoding_i. + (rb_econv_asciicompat_encoding): renamed from + rb_econv_stateless_encoding. + (econv_s_asciicompat_encoding): method renamed. + + * tool/transcode-tblgen.rb: follow the type change. + + * enc/trans/utf_16_32.trans: follow the type change. + rb_from_UTF_16BE to UTF-8 is asciicompat_decoder. + rb_from_UTF_16LE to UTF-8 is asciicompat_decoder. + rb_from_UTF_32BE to UTF-8 is asciicompat_decoder. + rb_from_UTF_32LE to UTF-8 is asciicompat_decoder. + UTF-8 to rb_to_UTF_16BE is asciicompat_encoder. + UTF-8 to rb_to_UTF_16LE is asciicompat_encoder. + UTF-8 to rb_to_UTF_32BE is asciicompat_encoder. + UTF-8 to rb_to_UTF_32LE is asciicompat_encoder. + + * enc/trans/newline.trans: follow the type change. universal newline + decoder is asciicompat_converter. + + * enc/trans/escape.trans: follow the type change. + + * enc/trans/iso2022.trans: ditto. + + * enc/trans/japanese.trans: ditto. + Mon Sep 8 23:05:42 2008 Tanaka Akira <akr@f...> * transcode.c (rb_econv_insert_output): "readagain" part should be Index: enc/trans/escape.trans =================================================================== --- enc/trans/escape.trans (revision 19248) +++ enc/trans/escape.trans (revision 19249) @@ -79,7 +79,7 @@ 1, /* input_unit_length */ 1, /* max_input */ 7, /* max_output */ - stateful_encoder, /* stateful_type */ + asciicompat_encoder, /* asciicompat_type */ 1, escape_xml_attr_quote_init, escape_xml_attr_quote_init, NULL, NULL, NULL, fun_so_escape_xml_attr_quote, escape_xml_attr_quote_finish Index: enc/trans/iso2022.trans =================================================================== --- enc/trans/iso2022.trans (revision 19248) +++ enc/trans/iso2022.trans (revision 19249) @@ -114,7 +114,7 @@ 1, /* input_unit_length */ 3, /* max_input */ 3, /* max_output */ - stateful_decoder, /* stateful_type */ + asciicompat_decoder, /* asciicompat_type */ 1, iso2022jp_init, iso2022jp_init, /* state_size, state_init, state_fini */ NULL, fun_si_iso2022jp_decoder, NULL, fun_so_iso2022jp_decoder }; @@ -196,7 +196,7 @@ 1, /* input_unit_length */ 3, /* max_input */ 5, /* max_output */ - stateful_encoder, /* stateful_type */ + asciicompat_encoder, /* asciicompat_type */ 1, iso2022jp_init, iso2022jp_init, /* state_size, state_init, state_fini */ NULL, NULL, NULL, fun_so_iso2022jp_encoder, finish_iso2022jp_encoder, @@ -218,7 +218,7 @@ 1, /* input_unit_length */ 3, /* max_input */ 2, /* max_output */ - stateless_converter, /* stateful_type */ + asciicompat_converter, /* asciicompat_type */ 0, NULL, NULL, /* state_size, state_init, state_fini */ NULL, NULL, NULL, fun_so_stateless_iso2022jp_to_eucjp, }; @@ -239,7 +239,7 @@ 1, /* input_unit_length */ 3, /* max_input */ 3, /* max_output */ - stateless_converter, /* stateful_type */ + asciicompat_converter, /* asciicompat_type */ 0, NULL, NULL, /* state_size, state_init, state_fini */ NULL, NULL, NULL, fun_so_eucjp_to_stateless_iso2022jp, }; Index: enc/trans/newline.trans =================================================================== --- enc/trans/newline.trans (revision 19248) +++ enc/trans/newline.trans (revision 19249) @@ -92,7 +92,7 @@ 1, /* input_unit_length */ 1, /* max_input */ 1, /* max_output */ - stateful_decoder, /* stateful_type */ + asciicompat_converter, /* asciicompat_type */ 2, universal_newline_init, universal_newline_init, /* state_size, state_init, state_fini */ NULL, NULL, NULL, fun_so_universal_newline, universal_newline_finish @@ -105,7 +105,7 @@ 1, /* input_unit_length */ 1, /* max_input */ 2, /* max_output */ - stateless_converter, /* stateful_type */ + asciicompat_converter, /* asciicompat_type */ 0, NULL, NULL, /* state_size, state_init, state_fini */ NULL, NULL, NULL, NULL }; @@ -117,7 +117,7 @@ 1, /* input_unit_length */ 1, /* max_input */ 1, /* max_output */ - stateless_converter, /* stateful_type */ + asciicompat_converter, /* asciicompat_type */ 0, NULL, NULL, /* state_size, state_init, state_fini */ NULL, NULL, NULL, NULL }; Index: enc/trans/utf_16_32.trans =================================================================== --- enc/trans/utf_16_32.trans (revision 19248) +++ enc/trans/utf_16_32.trans (revision 19249) @@ -266,7 +266,7 @@ 2, /* input_unit_length */ 4, /* max_input */ 4, /* max_output */ - stateless_converter, /* stateful_type */ + asciicompat_decoder, /* asciicompat_type */ 0, NULL, NULL, /* state_size, state_init, state_fini */ NULL, NULL, NULL, fun_so_from_utf_16be }; @@ -278,7 +278,7 @@ 1, /* input_unit_length */ 4, /* max_input */ 4, /* max_output */ - stateless_converter, /* stateful_type */ + asciicompat_encoder, /* asciicompat_type */ 0, NULL, NULL, /* state_size, state_init, state_fini */ NULL, NULL, NULL, fun_so_to_utf_16be }; @@ -290,7 +290,7 @@ 2, /* input_unit_length */ 4, /* max_input */ 4, /* max_output */ - stateless_converter, /* stateful_type */ + asciicompat_decoder, /* asciicompat_type */ 0, NULL, NULL, /* state_size, state_init, state_fini */ NULL, NULL, NULL, fun_so_from_utf_16le }; @@ -302,7 +302,7 @@ 1, /* input_unit_length */ 4, /* max_input */ 4, /* max_output */ - stateless_converter, /* stateful_type */ + asciicompat_encoder, /* asciicompat_type */ 0, NULL, NULL, /* state_size, state_init, state_fini */ NULL, NULL, NULL, fun_so_to_utf_16le }; @@ -314,7 +314,7 @@ 4, /* input_unit_length */ 4, /* max_input */ 4, /* max_output */ - stateless_converter, /* stateful_type */ + asciicompat_decoder, /* asciicompat_type */ 0, NULL, NULL, /* state_size, state_init, state_fini */ NULL, NULL, NULL, fun_so_from_utf_32be }; @@ -326,7 +326,7 @@ 1, /* input_unit_length */ 4, /* max_input */ 4, /* max_output */ - stateless_converter, /* stateful_type */ + asciicompat_encoder, /* asciicompat_type */ 0, NULL, NULL, /* state_size, state_init, state_fini */ NULL, NULL, NULL, fun_so_to_utf_32be }; @@ -338,7 +338,7 @@ 4, /* input_unit_length */ 4, /* max_input */ 4, /* max_output */ - stateless_converter, /* stateful_type */ + asciicompat_decoder, /* asciicompat_type */ 0, NULL, NULL, /* state_size, state_init, state_fini */ NULL, NULL, NULL, fun_so_from_utf_32le }; @@ -350,7 +350,7 @@ 1, /* input_unit_length */ 4, /* max_input */ 4, /* max_output */ - stateless_converter, /* stateful_type */ + asciicompat_encoder, /* asciicompat_type */ 0, NULL, NULL, /* state_size, state_init, state_fini */ NULL, NULL, NULL, fun_so_to_utf_32le }; Index: enc/trans/japanese.trans =================================================================== --- enc/trans/japanese.trans (revision 19248) +++ enc/trans/japanese.trans (revision 19249) @@ -73,7 +73,7 @@ 1, /* input_unit_length */ 3, /* max_input */ 2, /* max_output */ - stateless_converter, /* stateful_type */ + asciicompat_converter, /* asciicompat_type */ 0, NULL, NULL, /* state_size, state_init, state_fini */ NULL, NULL, NULL, fun_so_eucjp2sjis }; @@ -85,7 +85,7 @@ 1, /* input_unit_length */ 2, /* max_input */ 2, /* max_output */ - stateless_converter, /* stateful_type */ + asciicompat_converter, /* asciicompat_type */ 0, NULL, NULL, /* state_size, state_init, state_fini */ NULL, NULL, NULL, fun_so_sjis2eucjp }; Index: io.c =================================================================== --- io.c (revision 19248) +++ io.c (revision 19249) @@ -713,7 +713,7 @@ } else { enc = fptr->encs.enc2 ? fptr->encs.enc2 : fptr->encs.enc; - senc = rb_econv_stateless_encoding(enc->name); + senc = rb_econv_asciicompat_encoding(enc->name); if (!senc && !(fptr->encs.ecflags & ECONV_STATEFUL_ENCODER_MASK)) { /* single conversion */ fptr->writeconv_pre_ecflags = ecflags; Index: transcode_data.h =================================================================== --- transcode_data.h (revision 19248) +++ transcode_data.h (revision 19249) @@ -57,11 +57,11 @@ #define THREETRAIL /* legal but undefined if three more trailing UTF-8 */ typedef enum { - stateless_converter, /* stateless -> stateless */ - stateful_decoder, /* stateful -> stateless */ - stateful_encoder /* stateless -> stateful */ - /* stateful -> stateful is intentionally ommitted. */ -} rb_transcoder_stateful_type_t; + asciicompat_converter, /* ASCII-compatible -> ASCII-compatible */ + asciicompat_decoder, /* ASCII-incompatible -> ASCII-compatible */ + asciicompat_encoder /* ASCII-compatible -> ASCII-incompatible */ + /* ASCII-incompatible -> ASCII-incompatible is intentionally ommitted. */ +} rb_transcoder_asciicompat_type_t; typedef struct rb_transcoder rb_transcoder; @@ -78,7 +78,7 @@ int input_unit_length; int max_input; int max_output; - rb_transcoder_stateful_type_t stateful_type; + rb_transcoder_asciicompat_type_t asciicompat_type; size_t state_size; int (*state_init_func)(void*); /* ret==0:success ret!=0:failure(errno) */ int (*state_fini_func)(void*); /* ret==0:success ret!=0:failure(errno) */ Index: tool/transcode-tblgen.rb =================================================================== --- tool/transcode-tblgen.rb (revision 19248) +++ tool/transcode-tblgen.rb (revision 19249) @@ -634,7 +634,7 @@ #{input_unit_length}, /* input_unit_length */ #{max_input}, /* max_input */ #{max_output}, /* max_output */ - stateless_converter, /* stateful_type */ + asciicompat_converter, /* asciicompat_type */ 0, NULL, NULL, /* state_size, state_init, state_fini */ NULL, NULL, NULL, NULL, NULL, NULL, NULL Index: test/ruby/test_econv.rb =================================================================== --- test/ruby/test_econv.rb (revision 19248) +++ test/ruby/test_econv.rb (revision 19249) @@ -27,20 +27,24 @@ ec.primitive_errinfo) end - def test_s_stateless_encoding - assert_equal(Encoding::STATELESS_ISO_2022_JP, Encoding::Converter.stateless_encoding("ISO-2022-JP")) - assert_equal(Encoding::STATELESS_ISO_2022_JP, Encoding::Converter.stateless_encoding(Encoding::ISO_2022_JP)) - assert_nil(Encoding::Converter.stateless_encoding("EUC-JP")) - assert_nil(Encoding::Converter.stateless_encoding("UTF-8")) - assert_nil(Encoding::Converter.stateless_encoding("UTF-16BE")) - assert_nil(Encoding::Converter.stateless_encoding(Encoding::UTF_8)) - assert_nil(Encoding::Converter.stateless_encoding("xml-attr-escaped")) + def test_s_asciicompat_encoding + assert_equal(Encoding::STATELESS_ISO_2022_JP, Encoding::Converter.asciicompat_encoding("ISO-2022-JP")) + assert_equal(Encoding::STATELESS_ISO_2022_JP, Encoding::Converter.asciicompat_encoding(Encoding::ISO_2022_JP)) + assert_equal(Encoding::UTF_8, Encoding::Converter.asciicompat_encoding("UTF-16BE")) + assert_equal(Encoding::UTF_8, Encoding::Converter.asciicompat_encoding("UTF-16LE")) + assert_equal(Encoding::UTF_8, Encoding::Converter.asciicompat_encoding("UTF-32BE")) + assert_equal(Encoding::UTF_8, Encoding::Converter.asciicompat_encoding("UTF-32LE")) + assert_nil(Encoding::Converter.asciicompat_encoding("EUC-JP")) + assert_nil(Encoding::Converter.asciicompat_encoding("UTF-8")) + assert_nil(Encoding::Converter.asciicompat_encoding(Encoding::UTF_8)) + assert_nil(Encoding::Converter.asciicompat_encoding("xml-attr-escaped")) + assert_nil(Encoding::Converter.asciicompat_encoding("encoding-not-exist")) end - def test_stateless_encoding_iso2022jp - slenc = Encoding::Converter.stateless_encoding("ISO-2022-JP") + def test_asciicompat_encoding_iso2022jp + acenc = Encoding::Converter.asciicompat_encoding("ISO-2022-JP") str = "\e$B~~\(B".force_encoding("iso-2022-jp") - str2 = str.encode(slenc) + str2 = str.encode(acenc) str3 = str.encode("ISO-2022-JP") assert_equal(str, str3) end Index: transcode.c =================================================================== --- transcode.c (revision 19248) +++ transcode.c (revision 19249) @@ -1414,7 +1414,7 @@ tr = tc->transcoder; - if (tr->stateful_type == stateful_encoder) + if (tr->asciicompat_type == asciicompat_encoder) return tr->src_encoding; return tr->dst_encoding; } @@ -1528,7 +1528,7 @@ data_end_p = &ec->in_data_end; buf_end_p = &ec->in_buf_end; } - else if (tc->transcoder->stateful_type == stateful_encoder) { + else if (tc->transcoder->asciicompat_type == asciicompat_encoder) { need += tc->readagain_len; if (need < insert_len) goto fail; @@ -1580,7 +1580,7 @@ memcpy(*data_end_p, insert_str, insert_len); *data_end_p += insert_len; - if (tc && tc->transcoder->stateful_type == stateful_encoder) { + if (tc && tc->transcoder->asciicompat_type == asciicompat_encoder) { memcpy(*data_end_p, TRANSCODING_READBUF(tc)+tc->recognized_len, tc->readagain_len); *data_end_p += tc->readagain_len; tc->readagain_len = 0; @@ -1633,27 +1633,31 @@ tc->readagain_len -= n; } -struct stateless_encoding_t { - const char *stateless_enc; - const char *stateful_enc; +struct asciicompat_encoding_t { + const char *ascii_compat_name; + const char *ascii_incompat_name; }; static int -stateless_encoding_i(st_data_t key, st_data_t val, st_data_t arg) +asciicompat_encoding_i(st_data_t key, st_data_t val, st_data_t arg) { - struct stateless_encoding_t *data = (struct stateless_encoding_t *)arg; + struct asciicompat_encoding_t *data = (struct asciicompat_encoding_t *)arg; st_table *table2 = (st_table *)val; st_data_t v; - if (st_lookup(table2, (st_data_t)data->stateful_enc, &v)) { + if (st_lookup(table2, (st_data_t)data->ascii_incompat_name, &v)) { transcoder_entry_t *entry = (transcoder_entry_t *)v; const rb_transcoder *tr; - if (SUPPLEMENTAL_CONVERSION(entry->sname, entry->dname)) { + if (SUPPLEMENTAL_CONVERSION(entry->sname, entry->dname)) return ST_CONTINUE; - } tr = load_transcoder_entry(entry); - if (tr && tr->stateful_type == stateful_encoder) { - data->stateless_enc = tr->src_encoding; + if (tr && tr->asciicompat_type == asciicompat_encoder) { + /* + * Assumption: + * There is only one transcoder for + * converting to ASCII incompatible encoding. + */ + data->ascii_compat_name = tr->src_encoding; return ST_STOP; } } @@ -1661,14 +1665,14 @@ } const char * -rb_econv_stateless_encoding(const char *stateful_enc) +rb_econv_asciicompat_encoding(const char *ascii_incompat_name) { - struct stateless_encoding_t data; - data.stateful_enc = stateful_enc; - data.stateless_enc = NULL; - st_foreach(transcoder_table, stateless_encoding_i, (st_data_t)&data); - if (data.stateless_enc) - return data.stateless_enc; + struct asciicompat_encoding_t data; + data.ascii_incompat_name = ascii_incompat_name; + data.ascii_compat_name = NULL; + st_foreach(transcoder_table, asciicompat_encoding_i, (st_data_t)&data); + if (data.ascii_compat_name) + return data.ascii_compat_name; return NULL; } @@ -2510,42 +2514,42 @@ /* * call-seq: - * Encoding::Converter.stateless_encoding(string) => encoding or nil - * Encoding::Converter.stateless_encoding(encoding) => encoding or nil + * Encoding::Converter.asciicompat_encoding(string) => encoding or nil + * Encoding::Converter.asciicompat_encoding(encoding) => encoding or nil * - * returns the corresponding stateless encoding. + * returns the corresponding ASCII compatible encoding. * - * It returns nil if the argument is not a stateful encoding. + * It returns nil if the argument is an ASCII compatible encoding. * - * "corresponding stateless encoding" is a stateless encoding which - * represents same characters in the statefull encoding. + * "corresponding ASCII compatible encoding" is a ASCII compatible encoding which + * represents same characters in the given ASCII incompatible encoding. * - * So, no conversion undefined error occur between the stateful encoding and the stateless encoding. + * So, no conversion undefined error occur between the ASCII compatible and incompatible encoding. * - * For ISO-2022-JP, the dedicated stateless encoding, stateless-ISO-2022-JP, is defined. - * * Encoding::Converter.stateless_encoding("ISO-2022-JP") #=> #<Encoding:stateless-ISO-2022-JP> + * Encoding::Converter.stateless_encoding("UTF-16BE") #=> #<Encoding:UTF-8> + * Encoding::Converter.stateless_encoding("UTF-8") #=> nil * */ static VALUE -econv_s_stateless_encoding(VALUE klass, VALUE arg) +econv_s_asciicompat_encoding(VALUE klass, VALUE arg) { - const char *stateful_name, *stateless_name; - rb_encoding *stateful_enc, *stateless_enc; + const char *arg_name, *result_name; + rb_encoding *arg_enc, *result_enc; - enc_arg(arg, &stateful_name, &stateful_enc); + enc_arg(arg, &arg_name, &arg_enc); - stateless_name = rb_econv_stateless_encoding(stateful_name); + result_name = rb_econv_asciicompat_encoding(arg_name); - if (stateless_name == NULL) + if (result_name == NULL) return Qnil; - stateless_enc = rb_enc_find(stateless_name); + result_enc = rb_enc_find(result_name); - if (!stateless_enc) - stateless_enc = make_dummy_encoding(stateless_name); + if (!result_enc) + result_enc = make_dummy_encoding(result_name); - return rb_enc_from_encoding(stateless_enc); + return rb_enc_from_encoding(result_enc); } /* @@ -3563,7 +3567,7 @@ rb_cEncodingConverter = rb_define_class_under(rb_cEncoding, "Converter", rb_cData); rb_define_alloc_func(rb_cEncodingConverter, econv_s_allocate); - rb_define_singleton_method(rb_cEncodingConverter, "stateless_encoding", econv_s_stateless_encoding, 1); + rb_define_singleton_method(rb_cEncodingConverter, "asciicompat_encoding", econv_s_asciicompat_encoding, 1); rb_define_method(rb_cEncodingConverter, "initialize", econv_init, -1); rb_define_method(rb_cEncodingConverter, "inspect", econv_inspect, 0); rb_define_method(rb_cEncodingConverter, "source_encoding", econv_source_encoding, 0); -- ML: ruby-changes@q... Info: http://www.atdot.net/~ko1/quickml/