ruby-changes:7688
From: akr <ko1@a...>
Date: Sun, 7 Sep 2008 12:13:50 +0900 (JST)
Subject: [ruby-changes:7688] Ruby:r19209 (trunk): * include/ruby/encoding.h (ECONV_XML_ATTR_CONTENT_ENCODER): defined.
akr 2008-09-07 12:13:29 +0900 (Sun, 07 Sep 2008) New Revision: 19209 http://svn.ruby-lang.org/cgi-bin/viewvc.cgi?view=rev&revision=19209 Log: * include/ruby/encoding.h (ECONV_XML_ATTR_CONTENT_ENCODER): defined. (ECONV_STATEFUL_ENCODER_MASK): defined. (ECONV_XML_ATTR_QUOTE_ENCODER): defined. (ECONV_XML_ATTR_ENCODER): removed. * enc/trans/escape.trans (rb_escape_xml_attr_content): defined. (rb_escape_xml_attr_quote): defined. (rb_escape_xml_attr): removed. * io.c (NEED_WRITECONV): writeconv is required if supplemental converter is used. (make_writeconv): apply stateful encoder in writeconv. * transcode.c: follow the constant change. Modified files: trunk/ChangeLog trunk/enc/trans/escape.trans trunk/include/ruby/encoding.h trunk/io.c trunk/test/ruby/test_econv.rb trunk/test/ruby/test_io_m17n.rb trunk/transcode.c Index: include/ruby/encoding.h =================================================================== --- include/ruby/encoding.h (revision 19208) +++ include/ruby/encoding.h (revision 19209) @@ -269,8 +269,11 @@ #define ECONV_CRLF_NEWLINE_ENCODER 0x00001000 #define ECONV_CR_NEWLINE_ENCODER 0x00002000 #define ECONV_XML_TEXT_ENCODER 0x00004000 -#define ECONV_XML_ATTR_ENCODER 0x00008000 +#define ECONV_XML_ATTR_CONTENT_ENCODER 0x00008000 +#define ECONV_STATEFUL_ENCODER_MASK 0x00f00000 +#define ECONV_XML_ATTR_QUOTE_ENCODER 0x00100000 + /* end of flags for rb_econv_open */ /* flags for rb_econv_convert */ Index: ChangeLog =================================================================== --- ChangeLog (revision 19208) +++ ChangeLog (revision 19209) @@ -1,3 +1,20 @@ +Sun Sep 7 12:09:29 2008 Tanaka Akira <akr@f...> + + * include/ruby/encoding.h (ECONV_XML_ATTR_CONTENT_ENCODER): defined. + (ECONV_STATEFUL_ENCODER_MASK): defined. + (ECONV_XML_ATTR_QUOTE_ENCODER): defined. + (ECONV_XML_ATTR_ENCODER): removed. + + * enc/trans/escape.trans (rb_escape_xml_attr_content): defined. + (rb_escape_xml_attr_quote): defined. + (rb_escape_xml_attr): removed. + + * io.c (NEED_WRITECONV): writeconv is required if supplemental + converter is used. + (make_writeconv): apply stateful encoder in writeconv. + + * transcode.c: follow the constant change. + Sun Sep 7 07:24:09 2008 Yukihiro Matsumoto <matz@r...> * misc/*.el: merged the following patches from Nathan Weizenbaum Index: enc/trans/escape.trans =================================================================== --- enc/trans/escape.trans (revision 19208) +++ enc/trans/escape.trans (revision 19209) @@ -52,9 +52,18 @@ map_xml_text["3E"] = :func_so transcode_generate_node(ActionMap.parse(map_xml_text), "escape_xml_text") - map_xml_attr = {} - map_xml_attr["{00-FF}"] = :func_so - transcode_generate_node(ActionMap.parse(map_xml_attr), "escape_xml_attr") + map_xml_attr_content = {} + map_xml_attr_content["{00-21,23-25,27-3B,3D,3F-FF}"] = :nomap + map_xml_attr_content["22"] = :func_so + map_xml_attr_content["26"] = :func_so + map_xml_attr_content["3C"] = :func_so + map_xml_attr_content["3E"] = :func_so + transcode_generate_node(ActionMap.parse(map_xml_attr_content), "escape_xml_attr_content") + + map_xml_attr_quote = {} + map_xml_attr_quote["{00-FF}"] = :func_so + transcode_generate_node(ActionMap.parse(map_xml_attr_quote), "escape_xml_attr_quote") + %> <%= transcode_generated_code %> @@ -83,11 +92,23 @@ NULL, NULL, NULL, &fun_so_escape_xml_chref }; +static const rb_transcoder +rb_escape_xml_attr_content = { + "", "xml-attr-content-escaped", escape_xml_attr_content, + TRANSCODE_TABLE_INFO, + 1, /* input_unit_length */ + 1, /* max_input */ + 6, /* max_output */ + stateless_converter, /* stateful_type */ + 0, NULL, NULL, + NULL, NULL, NULL, &fun_so_escape_xml_chref +}; + #define END 0 #define NORMAL 1 static int -escape_xml_attr_init(void *statep) +escape_xml_attr_quote_init(void *statep) { unsigned char *sp = statep; *sp = END; @@ -95,7 +116,7 @@ } static int -fun_so_escape_xml_attr(void *statep, const unsigned char *s, size_t l, unsigned char *o) +fun_so_escape_xml_attr_quote(void *statep, const unsigned char *s, size_t l, unsigned char *o) { unsigned char *sp = statep; int n = 0; @@ -103,23 +124,12 @@ *sp = NORMAL; o[n++] = '"'; } - switch (s[0]) { - case '&': - case '<': - case '>': - case '"': - n += fun_so_escape_xml_chref(statep, s, l, o+n); - break; - - default: - o[n++] = s[0]; - break; - } + o[n++] = s[0]; return n; } static int -escape_xml_attr_finish(void *statep, unsigned char *o) +escape_xml_attr_quote_finish(void *statep, unsigned char *o) { unsigned char *sp = statep; int n = 0; @@ -135,16 +145,16 @@ } static const rb_transcoder -rb_escape_xml_attr = { - "", "xml-attr-escaped", escape_xml_attr, +rb_escape_xml_attr_quote = { + "", "xml-attr-quoted", escape_xml_attr_quote, TRANSCODE_TABLE_INFO, 1, /* input_unit_length */ 1, /* max_input */ 7, /* max_output */ stateful_encoder, /* stateful_type */ - 1, escape_xml_attr_init, escape_xml_attr_init, - NULL, NULL, NULL, fun_so_escape_xml_attr, - escape_xml_attr_finish + 1, escape_xml_attr_quote_init, escape_xml_attr_quote_init, + NULL, NULL, NULL, fun_so_escape_xml_attr_quote, + escape_xml_attr_quote_finish }; void @@ -152,6 +162,7 @@ { rb_register_transcoder(&rb_escape_amp_as_chref); rb_register_transcoder(&rb_escape_xml_text); - rb_register_transcoder(&rb_escape_xml_attr); + rb_register_transcoder(&rb_escape_xml_attr_content); + rb_register_transcoder(&rb_escape_xml_attr_quote); } Index: io.c =================================================================== --- io.c (revision 19208) +++ io.c (revision 19209) @@ -682,7 +682,7 @@ # define NEED_NEWLINE_ENCODER(fptr) 0 #endif #define NEED_READCONV(fptr) (fptr->encs.enc2 != NULL || NEED_NEWLINE_DECODER(fptr)) -#define NEED_WRITECONV(fptr) (fptr->encs.enc != NULL || NEED_NEWLINE_ENCODER(fptr)) +#define NEED_WRITECONV(fptr) (fptr->encs.enc != NULL || NEED_NEWLINE_ENCODER(fptr) || (fptr->encs.ecflags & (ECONV_DECODER_MASK|ECONV_ENCODER_MASK|ECONV_STATEFUL_ENCODER_MASK))) static void make_writeconv(rb_io_t *fptr) @@ -695,42 +695,50 @@ fptr->writeconv_initialized = 1; - /* ECONV_INVALID_XXX and ECONV_UNDEF_XXX should be set both. - * But ECONV_CRLF_NEWLINE_ENCODER should be set only for the first. */ - fptr->writeconv_pre_ecflags = fptr->encs.ecflags; - fptr->writeconv_pre_ecopts = fptr->encs.ecopts; ecflags = fptr->encs.ecflags; ecopts = fptr->encs.ecopts; +#ifdef TEXTMODE_NEWLINE_ENCODER + if (NEED_NEWLINE_ENCODER(fptr)) + ecflags |= TEXTMODE_NEWLINE_ENCODER; +#endif -#ifdef TEXTMODE_NEWLINE_ENCODER if (!fptr->encs.enc) { - if (NEED_NEWLINE_ENCODER(fptr)) - ecflags |= TEXTMODE_NEWLINE_ENCODER; + /* no encoding conversion */ + fptr->writeconv_pre_ecflags = 0; + fptr->writeconv_pre_ecopts = Qnil; fptr->writeconv = rb_econv_open_opts("", "", ecflags, ecopts); if (!fptr->writeconv) rb_exc_raise(rb_econv_open_exc("", "", ecflags)); fptr->writeconv_stateless = Qnil; - return; } - - if (NEED_NEWLINE_ENCODER(fptr)) - fptr->writeconv_pre_ecflags |= TEXTMODE_NEWLINE_ENCODER; -#endif - ecflags &= ECONV_ERROR_HANDLER_MASK; - - enc = fptr->encs.enc2 ? fptr->encs.enc2 : fptr->encs.enc; - senc = rb_econv_stateless_encoding(enc->name); - if (senc) { - denc = enc->name; - fptr->writeconv_stateless = rb_str_new2(senc); - fptr->writeconv = rb_econv_open_opts(senc, denc, ecflags, ecopts); - if (!fptr->writeconv) - rb_exc_raise(rb_econv_open_exc(senc, denc, ecflags)); - } else { - denc = NULL; - fptr->writeconv_stateless = Qnil; - fptr->writeconv = NULL; + enc = fptr->encs.enc2 ? fptr->encs.enc2 : fptr->encs.enc; + senc = rb_econv_stateless_encoding(enc->name); + if (!senc && !(fptr->encs.ecflags & ECONV_STATEFUL_ENCODER_MASK)) { + /* single conversion */ + fptr->writeconv_pre_ecflags = ecflags; + fptr->writeconv_pre_ecopts = ecopts; + fptr->writeconv = NULL; + fptr->writeconv_stateless = Qnil; + } + else { + /* double conversion */ + fptr->writeconv_pre_ecflags = ecflags & ~ECONV_STATEFUL_ENCODER_MASK; + fptr->writeconv_pre_ecopts = ecopts; + if (senc) { + denc = enc->name; + fptr->writeconv_stateless = rb_str_new2(senc); + } + else { + senc = denc = ""; + fptr->writeconv_stateless = rb_str_new2(enc->name); + } + ecflags = fptr->encs.ecflags & (ECONV_ERROR_HANDLER_MASK|ECONV_STATEFUL_ENCODER_MASK); + ecopts = fptr->encs.ecopts; + fptr->writeconv = rb_econv_open_opts(senc, denc, ecflags, ecopts); + if (!fptr->writeconv) + rb_exc_raise(rb_econv_open_exc(senc, denc, ecflags)); + } } } } Index: test/ruby/test_io_m17n.rb =================================================================== --- test/ruby/test_io_m17n.rb (revision 19208) +++ test/ruby/test_io_m17n.rb (revision 19209) @@ -1461,6 +1461,18 @@ def test_w_xml_attr with_tmpdir { + open("raw.txt", "wb", xml: :attr) {|f| f.print '&<>"\''; f.puts "\u4E02\u3042" } + content = File.read("raw.txt", :mode=>"rb:ascii-8bit") + assert_equal("\"&<>"'\u4E02\u3042\n\"".force_encoding("ascii-8bit"), content) + + open("ascii.txt", "wb:us-ascii", xml: :attr) {|f| f.print '&<>"\''; f.puts "\u4E02\u3042" } + content = File.read("ascii.txt", :mode=>"rb:ascii-8bit") + assert_equal("\"&<>"'丂あ\n\"".force_encoding("ascii-8bit"), content) + + open("iso-2022-jp.txt", "wb:iso-2022-jp", xml: :attr) {|f| f.print '&<>"\''; f.puts "\u4E02\u3042" } + content = File.read("iso-2022-jp.txt", :mode=>"rb:ascii-8bit") + assert_equal("\"&<>"'丂\e$B$\"\e(B\n\"".force_encoding("ascii-8bit"), content) + open("eucjp.txt", "w:euc-jp:utf-8", xml: :attr) {|f| f.print "\u4E02" # U+4E02 is 0x3021 in JIS X 0212 } @@ -1480,6 +1492,5 @@ assert_equal("\"丂\"".force_encoding("ascii-8bit"), content) } end - end Index: test/ruby/test_econv.rb =================================================================== --- test/ruby/test_econv.rb (revision 19208) +++ test/ruby/test_econv.rb (revision 19209) @@ -738,20 +738,37 @@ assert_equal('', ec.finish) end - def test_xml_escape_attr - ec = Encoding::Converter.new("", "xml-attr-escaped") + def test_xml_escape_attr_content + ec = Encoding::Converter.new("", "xml-attr-content-escaped") + assert_equal('', ec.finish) + + ec = Encoding::Converter.new("", "xml-attr-content-escaped") + assert_equal('', ec.convert("")) + assert_equal('', ec.finish) + + ec = Encoding::Converter.new("", "xml-attr-content-escaped") + assert_equal('"', ec.convert('"')) + assert_equal('', ec.finish) + + ec = Encoding::Converter.new("", "xml-attr-content-escaped") + assert_equal('&<>"', ec.convert("&<>\"")) + assert_equal('', ec.finish) + end + + def test_xml_escape_attr_quote + ec = Encoding::Converter.new("", "xml-attr-quoted") assert_equal('""', ec.finish) - ec = Encoding::Converter.new("", "xml-attr-escaped") + ec = Encoding::Converter.new("", "xml-attr-quoted") assert_equal('', ec.convert("")) assert_equal('""', ec.finish) - ec = Encoding::Converter.new("", "xml-attr-escaped") - assert_equal('""', ec.convert('"')) + ec = Encoding::Converter.new("", "xml-attr-quoted") + assert_equal('""', ec.convert('"')) assert_equal('"', ec.finish) - ec = Encoding::Converter.new("", "xml-attr-escaped") - assert_equal('"&<>"', ec.convert("&<>\"")) + ec = Encoding::Converter.new("", "xml-attr-quoted") + assert_equal('"&<>"', ec.convert("&<>\"")) assert_equal('"', ec.finish) end @@ -760,7 +777,10 @@ assert_equal('<♥>&"♡"', ec.convert("<\u2665>&\"\u2661\"")) assert_equal('', ec.finish) - ec = Encoding::Converter.new("utf-8", "euc-jp", Encoding::Converter::XML_ATTR_ENCODER|Encoding::Converter::UNDEF_HEX_CHARREF) + ec = Encoding::Converter.new("utf-8", "euc-jp", + Encoding::Converter::XML_ATTR_CONTENT_ENCODER| + Encoding::Converter::XML_ATTR_QUOTE_ENCODER| + Encoding::Converter::UNDEF_HEX_CHARREF) assert_equal('"<♥>&"♡"', ec.convert("<\u2665>&\"\u2661\"")) assert_equal('"', ec.finish) Index: transcode.c =================================================================== --- transcode.c (revision 19208) +++ transcode.c (revision 19209) @@ -896,7 +896,7 @@ return NULL; if ((ecflags & ECONV_XML_TEXT_ENCODER) && - (ecflags & ECONV_XML_ATTR_ENCODER)) + (ecflags & ECONV_XML_ATTR_CONTENT_ENCODER)) return NULL; num_encoders = 0; @@ -909,9 +909,12 @@ if (ecflags & ECONV_XML_TEXT_ENCODER) if (!(encoders[num_encoders++] = get_transcoder_entry("", "xml-text-escaped"))) return NULL; - if (ecflags & ECONV_XML_ATTR_ENCODER) - if (!(encoders[num_encoders++] = get_transcoder_entry("", "xml-attr-escaped"))) + if (ecflags & ECONV_XML_ATTR_CONTENT_ENCODER) + if (!(encoders[num_encoders++] = get_transcoder_entry("", "xml-attr-content-escaped"))) return NULL; + if (ecflags & ECONV_XML_ATTR_QUOTE_ENCODER) + if (!(encoders[num_encoders++] = get_transcoder_entry("", "xml-attr-quoted"))) + return NULL; num_decoders = 0; if (ecflags & ECONV_UNIVERSAL_NEWLINE_DECODER) @@ -1792,7 +1795,8 @@ ECONV_CRLF_NEWLINE_ENCODER| ECONV_CR_NEWLINE_ENCODER| ECONV_XML_TEXT_ENCODER| - ECONV_XML_ATTR_ENCODER)) { + ECONV_XML_ATTR_CONTENT_ENCODER| + ECONV_XML_ATTR_QUOTE_ENCODER)) { const char *pre = ""; if (has_description) rb_str_cat2(mesg, " with "); @@ -1812,10 +1816,14 @@ rb_str_cat2(mesg, pre); pre = ","; rb_str_cat2(mesg, "XML-text"); } - if (ecflags & ECONV_XML_ATTR_ENCODER) { + if (ecflags & ECONV_XML_ATTR_CONTENT_ENCODER) { rb_str_cat2(mesg, pre); pre = ","; - rb_str_cat2(mesg, "XML-attr"); + rb_str_cat2(mesg, "XML-attr-content"); } + if (ecflags & ECONV_XML_ATTR_QUOTE_ENCODER) { + rb_str_cat2(mesg, pre); pre = ","; + rb_str_cat2(mesg, "XML-attr-quote"); + } has_description = 1; } if (!has_description) { @@ -2173,7 +2181,7 @@ ecflags |= ECONV_XML_TEXT_ENCODER|ECONV_UNDEF_HEX_CHARREF; } else if (v==sym_attr) { - ecflags |= ECONV_XML_ATTR_ENCODER|ECONV_UNDEF_HEX_CHARREF; + ecflags |= ECONV_XML_ATTR_CONTENT_ENCODER|ECONV_XML_ATTR_QUOTE_ENCODER|ECONV_UNDEF_HEX_CHARREF; } else { rb_raise(rb_eArgError, "unexpected value for xml option: %s", rb_id2name(SYM2ID(v))); @@ -2329,7 +2337,8 @@ ECONV_CRLF_NEWLINE_ENCODER| ECONV_CR_NEWLINE_ENCODER| ECONV_XML_TEXT_ENCODER| - ECONV_XML_ATTR_ENCODER)) == 0) { + ECONV_XML_ATTR_CONTENT_ENCODER| + ECONV_XML_ATTR_QUOTE_ENCODER)) == 0) { if (senc && senc == denc) { return -1; } @@ -3573,7 +3582,8 @@ rb_define_const(rb_cEncodingConverter, "CRLF_NEWLINE_ENCODER", INT2FIX(ECONV_CRLF_NEWLINE_ENCODER)); rb_define_const(rb_cEncodingConverter, "CR_NEWLINE_ENCODER", INT2FIX(ECONV_CR_NEWLINE_ENCODER)); rb_define_const(rb_cEncodingConverter, "XML_TEXT_ENCODER", INT2FIX(ECONV_XML_TEXT_ENCODER)); - rb_define_const(rb_cEncodingConverter, "XML_ATTR_ENCODER", INT2FIX(ECONV_XML_ATTR_ENCODER)); + rb_define_const(rb_cEncodingConverter, "XML_ATTR_CONTENT_ENCODER", INT2FIX(ECONV_XML_ATTR_CONTENT_ENCODER)); + rb_define_const(rb_cEncodingConverter, "XML_ATTR_QUOTE_ENCODER", INT2FIX(ECONV_XML_ATTR_QUOTE_ENCODER)); rb_define_method(rb_eConversionUndefined, "source_encoding_name", ecerr_source_encoding_name, 0); rb_define_method(rb_eConversionUndefined, "destination_encoding_name", ecerr_destination_encoding_name, 0); -- ML: ruby-changes@q... Info: http://www.atdot.net/~ko1/quickml/