[前][次][番号順一覧][スレッド一覧]

ruby-changes:7688

From: akr <ko1@a...>
Date: Sun, 7 Sep 2008 12:13:50 +0900 (JST)
Subject: [ruby-changes:7688] Ruby:r19209 (trunk): * include/ruby/encoding.h (ECONV_XML_ATTR_CONTENT_ENCODER): defined.

akr	2008-09-07 12:13:29 +0900 (Sun, 07 Sep 2008)

  New Revision: 19209

  http://svn.ruby-lang.org/cgi-bin/viewvc.cgi?view=rev&revision=19209

  Log:
    * include/ruby/encoding.h (ECONV_XML_ATTR_CONTENT_ENCODER): defined.
      (ECONV_STATEFUL_ENCODER_MASK): defined.
      (ECONV_XML_ATTR_QUOTE_ENCODER): defined.
      (ECONV_XML_ATTR_ENCODER): removed.
    
    * enc/trans/escape.trans (rb_escape_xml_attr_content): defined.
      (rb_escape_xml_attr_quote): defined.
      (rb_escape_xml_attr): removed.
    
    * io.c (NEED_WRITECONV): writeconv is required if supplemental
      converter is used.
      (make_writeconv): apply stateful encoder in writeconv.
    
    * transcode.c: follow the constant change.

  Modified files:
    trunk/ChangeLog
    trunk/enc/trans/escape.trans
    trunk/include/ruby/encoding.h
    trunk/io.c
    trunk/test/ruby/test_econv.rb
    trunk/test/ruby/test_io_m17n.rb
    trunk/transcode.c

Index: include/ruby/encoding.h
===================================================================
--- include/ruby/encoding.h	(revision 19208)
+++ include/ruby/encoding.h	(revision 19209)
@@ -269,8 +269,11 @@
 #define ECONV_CRLF_NEWLINE_ENCODER              0x00001000
 #define ECONV_CR_NEWLINE_ENCODER                0x00002000
 #define ECONV_XML_TEXT_ENCODER                  0x00004000
-#define ECONV_XML_ATTR_ENCODER                  0x00008000
+#define ECONV_XML_ATTR_CONTENT_ENCODER          0x00008000
 
+#define ECONV_STATEFUL_ENCODER_MASK             0x00f00000
+#define ECONV_XML_ATTR_QUOTE_ENCODER            0x00100000
+
 /* end of flags for rb_econv_open */
 
 /* flags for rb_econv_convert */
Index: ChangeLog
===================================================================
--- ChangeLog	(revision 19208)
+++ ChangeLog	(revision 19209)
@@ -1,3 +1,20 @@
+Sun Sep  7 12:09:29 2008  Tanaka Akira  <akr@f...>
+
+	* include/ruby/encoding.h (ECONV_XML_ATTR_CONTENT_ENCODER): defined.
+	  (ECONV_STATEFUL_ENCODER_MASK): defined.
+	  (ECONV_XML_ATTR_QUOTE_ENCODER): defined.
+	  (ECONV_XML_ATTR_ENCODER): removed.
+
+	* enc/trans/escape.trans (rb_escape_xml_attr_content): defined.
+	  (rb_escape_xml_attr_quote): defined.
+	  (rb_escape_xml_attr): removed.
+
+	* io.c (NEED_WRITECONV): writeconv is required if supplemental
+	  converter is used.
+	  (make_writeconv): apply stateful encoder in writeconv.
+
+	* transcode.c: follow the constant change.
+
 Sun Sep  7 07:24:09 2008  Yukihiro Matsumoto  <matz@r...>
 
 	* misc/*.el:  merged the following patches from Nathan Weizenbaum
Index: enc/trans/escape.trans
===================================================================
--- enc/trans/escape.trans	(revision 19208)
+++ enc/trans/escape.trans	(revision 19209)
@@ -52,9 +52,18 @@
   map_xml_text["3E"] = :func_so
   transcode_generate_node(ActionMap.parse(map_xml_text), "escape_xml_text")
 
-  map_xml_attr = {}
-  map_xml_attr["{00-FF}"] = :func_so
-  transcode_generate_node(ActionMap.parse(map_xml_attr), "escape_xml_attr")
+  map_xml_attr_content = {}
+  map_xml_attr_content["{00-21,23-25,27-3B,3D,3F-FF}"] = :nomap
+  map_xml_attr_content["22"] = :func_so
+  map_xml_attr_content["26"] = :func_so
+  map_xml_attr_content["3C"] = :func_so
+  map_xml_attr_content["3E"] = :func_so
+  transcode_generate_node(ActionMap.parse(map_xml_attr_content), "escape_xml_attr_content")
+
+  map_xml_attr_quote = {}
+  map_xml_attr_quote["{00-FF}"] = :func_so
+  transcode_generate_node(ActionMap.parse(map_xml_attr_quote), "escape_xml_attr_quote")
+
 %>
 
 <%= transcode_generated_code %>
@@ -83,11 +92,23 @@
     NULL, NULL, NULL, &fun_so_escape_xml_chref
 };
 
+static const rb_transcoder
+rb_escape_xml_attr_content = {
+    "", "xml-attr-content-escaped", escape_xml_attr_content,
+    TRANSCODE_TABLE_INFO,
+    1, /* input_unit_length */
+    1, /* max_input */
+    6, /* max_output */
+    stateless_converter, /* stateful_type */
+    0, NULL, NULL,
+    NULL, NULL, NULL, &fun_so_escape_xml_chref
+};
+
 #define END 0
 #define NORMAL  1
 
 static int
-escape_xml_attr_init(void *statep)
+escape_xml_attr_quote_init(void *statep)
 {
     unsigned char *sp = statep;
     *sp = END;
@@ -95,7 +116,7 @@
 }
 
 static int
-fun_so_escape_xml_attr(void *statep, const unsigned char *s, size_t l, unsigned char *o)
+fun_so_escape_xml_attr_quote(void *statep, const unsigned char *s, size_t l, unsigned char *o)
 {
     unsigned char *sp = statep;
     int n = 0;
@@ -103,23 +124,12 @@
         *sp = NORMAL;
         o[n++] = '"';
     }
-    switch (s[0]) {
-      case '&':
-      case '<':
-      case '>':
-      case '"':
-        n += fun_so_escape_xml_chref(statep, s, l, o+n);
-        break;
-
-      default:
-        o[n++] = s[0];
-        break;
-    }
+    o[n++] = s[0];
     return n;
 }
 
 static int
-escape_xml_attr_finish(void *statep, unsigned char *o)
+escape_xml_attr_quote_finish(void *statep, unsigned char *o)
 {
     unsigned char *sp = statep;
     int n = 0;
@@ -135,16 +145,16 @@
 }
 
 static const rb_transcoder
-rb_escape_xml_attr = {
-    "", "xml-attr-escaped", escape_xml_attr,
+rb_escape_xml_attr_quote = {
+    "", "xml-attr-quoted", escape_xml_attr_quote,
     TRANSCODE_TABLE_INFO,
     1, /* input_unit_length */
     1, /* max_input */
     7, /* max_output */
     stateful_encoder, /* stateful_type */
-    1, escape_xml_attr_init, escape_xml_attr_init,
-    NULL, NULL, NULL, fun_so_escape_xml_attr,
-    escape_xml_attr_finish
+    1, escape_xml_attr_quote_init, escape_xml_attr_quote_init,
+    NULL, NULL, NULL, fun_so_escape_xml_attr_quote,
+    escape_xml_attr_quote_finish
 };
 
 void
@@ -152,6 +162,7 @@
 {
     rb_register_transcoder(&rb_escape_amp_as_chref);
     rb_register_transcoder(&rb_escape_xml_text);
-    rb_register_transcoder(&rb_escape_xml_attr);
+    rb_register_transcoder(&rb_escape_xml_attr_content);
+    rb_register_transcoder(&rb_escape_xml_attr_quote);
 }
 
Index: io.c
===================================================================
--- io.c	(revision 19208)
+++ io.c	(revision 19209)
@@ -682,7 +682,7 @@
 # define NEED_NEWLINE_ENCODER(fptr) 0
 #endif
 #define NEED_READCONV(fptr) (fptr->encs.enc2 != NULL || NEED_NEWLINE_DECODER(fptr))
-#define NEED_WRITECONV(fptr) (fptr->encs.enc != NULL || NEED_NEWLINE_ENCODER(fptr))
+#define NEED_WRITECONV(fptr) (fptr->encs.enc != NULL || NEED_NEWLINE_ENCODER(fptr) || (fptr->encs.ecflags & (ECONV_DECODER_MASK|ECONV_ENCODER_MASK|ECONV_STATEFUL_ENCODER_MASK)))
 
 static void
 make_writeconv(rb_io_t *fptr)
@@ -695,42 +695,50 @@
 
         fptr->writeconv_initialized = 1;
 
-        /* ECONV_INVALID_XXX and ECONV_UNDEF_XXX should be set both.
-         * But ECONV_CRLF_NEWLINE_ENCODER should be set only for the first. */
-        fptr->writeconv_pre_ecflags = fptr->encs.ecflags;
-        fptr->writeconv_pre_ecopts = fptr->encs.ecopts;
         ecflags = fptr->encs.ecflags;
         ecopts = fptr->encs.ecopts;
+#ifdef TEXTMODE_NEWLINE_ENCODER
+        if (NEED_NEWLINE_ENCODER(fptr))
+            ecflags |= TEXTMODE_NEWLINE_ENCODER;
+#endif
 
-#ifdef TEXTMODE_NEWLINE_ENCODER
         if (!fptr->encs.enc) {
-            if (NEED_NEWLINE_ENCODER(fptr))
-                ecflags |= TEXTMODE_NEWLINE_ENCODER;
+            /* no encoding conversion */
+            fptr->writeconv_pre_ecflags = 0;
+            fptr->writeconv_pre_ecopts = Qnil;
             fptr->writeconv = rb_econv_open_opts("", "", ecflags, ecopts);
             if (!fptr->writeconv)
                 rb_exc_raise(rb_econv_open_exc("", "", ecflags));
             fptr->writeconv_stateless = Qnil;
-            return;
         }
-
-        if (NEED_NEWLINE_ENCODER(fptr))
-            fptr->writeconv_pre_ecflags |= TEXTMODE_NEWLINE_ENCODER;
-#endif
-        ecflags &= ECONV_ERROR_HANDLER_MASK;
-
-        enc = fptr->encs.enc2 ? fptr->encs.enc2 : fptr->encs.enc;
-        senc = rb_econv_stateless_encoding(enc->name);
-        if (senc) {
-            denc = enc->name;
-            fptr->writeconv_stateless = rb_str_new2(senc);
-            fptr->writeconv = rb_econv_open_opts(senc, denc, ecflags, ecopts);
-            if (!fptr->writeconv)
-                rb_exc_raise(rb_econv_open_exc(senc, denc, ecflags));
-        }
         else {
-            denc = NULL;
-            fptr->writeconv_stateless = Qnil;
-            fptr->writeconv = NULL;
+            enc = fptr->encs.enc2 ? fptr->encs.enc2 : fptr->encs.enc;
+            senc = rb_econv_stateless_encoding(enc->name);
+            if (!senc && !(fptr->encs.ecflags & ECONV_STATEFUL_ENCODER_MASK)) {
+                /* single conversion */
+                fptr->writeconv_pre_ecflags = ecflags;
+                fptr->writeconv_pre_ecopts = ecopts;
+                fptr->writeconv = NULL;
+                fptr->writeconv_stateless = Qnil;
+            }
+            else {
+                /* double conversion */
+                fptr->writeconv_pre_ecflags = ecflags & ~ECONV_STATEFUL_ENCODER_MASK;
+                fptr->writeconv_pre_ecopts = ecopts;
+                if (senc) {
+                    denc = enc->name;
+                    fptr->writeconv_stateless = rb_str_new2(senc);
+                }
+                else {
+                    senc = denc = "";
+                    fptr->writeconv_stateless = rb_str_new2(enc->name);
+                }
+                ecflags = fptr->encs.ecflags & (ECONV_ERROR_HANDLER_MASK|ECONV_STATEFUL_ENCODER_MASK);
+                ecopts = fptr->encs.ecopts;
+                fptr->writeconv = rb_econv_open_opts(senc, denc, ecflags, ecopts);
+                if (!fptr->writeconv)
+                    rb_exc_raise(rb_econv_open_exc(senc, denc, ecflags));
+            }
         }
     }
 }
Index: test/ruby/test_io_m17n.rb
===================================================================
--- test/ruby/test_io_m17n.rb	(revision 19208)
+++ test/ruby/test_io_m17n.rb	(revision 19209)
@@ -1461,6 +1461,18 @@
 
   def test_w_xml_attr
     with_tmpdir {
+      open("raw.txt", "wb", xml: :attr) {|f| f.print '&<>"\''; f.puts "\u4E02\u3042" }
+      content = File.read("raw.txt", :mode=>"rb:ascii-8bit")
+      assert_equal("\"&amp;&lt;&gt;&quot;'\u4E02\u3042\n\"".force_encoding("ascii-8bit"), content)
+
+      open("ascii.txt", "wb:us-ascii", xml: :attr) {|f| f.print '&<>"\''; f.puts "\u4E02\u3042" }
+      content = File.read("ascii.txt", :mode=>"rb:ascii-8bit")
+      assert_equal("\"&amp;&lt;&gt;&quot;'&#x4E02;&#x3042;\n\"".force_encoding("ascii-8bit"), content)
+
+      open("iso-2022-jp.txt", "wb:iso-2022-jp", xml: :attr) {|f| f.print '&<>"\''; f.puts "\u4E02\u3042" }
+      content = File.read("iso-2022-jp.txt", :mode=>"rb:ascii-8bit")
+      assert_equal("\"&amp;&lt;&gt;&quot;'&#x4E02;\e$B$\"\e(B\n\"".force_encoding("ascii-8bit"), content)
+
       open("eucjp.txt", "w:euc-jp:utf-8", xml: :attr) {|f|
         f.print "\u4E02" # U+4E02 is 0x3021 in JIS X 0212
       }
@@ -1480,6 +1492,5 @@
       assert_equal("\"&#x4E02;\"".force_encoding("ascii-8bit"), content)
     }
   end
-
 end
 
Index: test/ruby/test_econv.rb
===================================================================
--- test/ruby/test_econv.rb	(revision 19208)
+++ test/ruby/test_econv.rb	(revision 19209)
@@ -738,20 +738,37 @@
     assert_equal('', ec.finish)
   end
 
-  def test_xml_escape_attr
-    ec = Encoding::Converter.new("", "xml-attr-escaped")
+  def test_xml_escape_attr_content
+    ec = Encoding::Converter.new("", "xml-attr-content-escaped")
+    assert_equal('', ec.finish)
+
+    ec = Encoding::Converter.new("", "xml-attr-content-escaped")
+    assert_equal('', ec.convert(""))
+    assert_equal('', ec.finish)
+
+    ec = Encoding::Converter.new("", "xml-attr-content-escaped")
+    assert_equal('&quot;', ec.convert('"'))
+    assert_equal('', ec.finish)
+
+    ec = Encoding::Converter.new("", "xml-attr-content-escaped")
+    assert_equal('&amp;&lt;&gt;&quot;', ec.convert("&<>\""))
+    assert_equal('', ec.finish)
+  end
+
+  def test_xml_escape_attr_quote
+    ec = Encoding::Converter.new("", "xml-attr-quoted")
     assert_equal('""', ec.finish)
 
-    ec = Encoding::Converter.new("", "xml-attr-escaped")
+    ec = Encoding::Converter.new("", "xml-attr-quoted")
     assert_equal('', ec.convert(""))
     assert_equal('""', ec.finish)
 
-    ec = Encoding::Converter.new("", "xml-attr-escaped")
-    assert_equal('"&quot;', ec.convert('"'))
+    ec = Encoding::Converter.new("", "xml-attr-quoted")
+    assert_equal('""', ec.convert('"'))
     assert_equal('"', ec.finish)
 
-    ec = Encoding::Converter.new("", "xml-attr-escaped")
-    assert_equal('"&amp;&lt;&gt;&quot;', ec.convert("&<>\""))
+    ec = Encoding::Converter.new("", "xml-attr-quoted")
+    assert_equal('"&<>"', ec.convert("&<>\""))
     assert_equal('"', ec.finish)
   end
 
@@ -760,7 +777,10 @@
     assert_equal('&lt;&#x2665;&gt;&amp;"&#x2661;"', ec.convert("<\u2665>&\"\u2661\""))
     assert_equal('', ec.finish)
 
-    ec = Encoding::Converter.new("utf-8", "euc-jp", Encoding::Converter::XML_ATTR_ENCODER|Encoding::Converter::UNDEF_HEX_CHARREF)
+    ec = Encoding::Converter.new("utf-8", "euc-jp",
+                                 Encoding::Converter::XML_ATTR_CONTENT_ENCODER|
+                                 Encoding::Converter::XML_ATTR_QUOTE_ENCODER|
+                                 Encoding::Converter::UNDEF_HEX_CHARREF)
     assert_equal('"&lt;&#x2665;&gt;&amp;&quot;&#x2661;&quot;', ec.convert("<\u2665>&\"\u2661\""))
     assert_equal('"', ec.finish)
 
Index: transcode.c
===================================================================
--- transcode.c	(revision 19208)
+++ transcode.c	(revision 19209)
@@ -896,7 +896,7 @@
         return NULL;
 
     if ((ecflags & ECONV_XML_TEXT_ENCODER) &&
-        (ecflags & ECONV_XML_ATTR_ENCODER))
+        (ecflags & ECONV_XML_ATTR_CONTENT_ENCODER))
         return NULL;
 
     num_encoders = 0;
@@ -909,9 +909,12 @@
     if (ecflags & ECONV_XML_TEXT_ENCODER)
         if (!(encoders[num_encoders++] = get_transcoder_entry("", "xml-text-escaped")))
             return NULL;
-    if (ecflags & ECONV_XML_ATTR_ENCODER)
-        if (!(encoders[num_encoders++] = get_transcoder_entry("", "xml-attr-escaped")))
+    if (ecflags & ECONV_XML_ATTR_CONTENT_ENCODER)
+        if (!(encoders[num_encoders++] = get_transcoder_entry("", "xml-attr-content-escaped")))
             return NULL;
+    if (ecflags & ECONV_XML_ATTR_QUOTE_ENCODER)
+        if (!(encoders[num_encoders++] = get_transcoder_entry("", "xml-attr-quoted")))
+            return NULL;
 
     num_decoders = 0;
     if (ecflags & ECONV_UNIVERSAL_NEWLINE_DECODER)
@@ -1792,7 +1795,8 @@
                    ECONV_CRLF_NEWLINE_ENCODER|
                    ECONV_CR_NEWLINE_ENCODER|
                    ECONV_XML_TEXT_ENCODER|
-                   ECONV_XML_ATTR_ENCODER)) {
+                   ECONV_XML_ATTR_CONTENT_ENCODER|
+                   ECONV_XML_ATTR_QUOTE_ENCODER)) {
         const char *pre = "";
         if (has_description)
             rb_str_cat2(mesg, " with ");
@@ -1812,10 +1816,14 @@
             rb_str_cat2(mesg, pre); pre = ",";
             rb_str_cat2(mesg, "XML-text");
         }
-        if (ecflags & ECONV_XML_ATTR_ENCODER) {
+        if (ecflags & ECONV_XML_ATTR_CONTENT_ENCODER) {
             rb_str_cat2(mesg, pre); pre = ",";
-            rb_str_cat2(mesg, "XML-attr");
+            rb_str_cat2(mesg, "XML-attr-content");
         }
+        if (ecflags & ECONV_XML_ATTR_QUOTE_ENCODER) {
+            rb_str_cat2(mesg, pre); pre = ",";
+            rb_str_cat2(mesg, "XML-attr-quote");
+        }
         has_description = 1;
     }
     if (!has_description) {
@@ -2173,7 +2181,7 @@
             ecflags |= ECONV_XML_TEXT_ENCODER|ECONV_UNDEF_HEX_CHARREF;
         }
         else if (v==sym_attr) {
-            ecflags |= ECONV_XML_ATTR_ENCODER|ECONV_UNDEF_HEX_CHARREF;
+            ecflags |= ECONV_XML_ATTR_CONTENT_ENCODER|ECONV_XML_ATTR_QUOTE_ENCODER|ECONV_UNDEF_HEX_CHARREF;
         }
         else {
             rb_raise(rb_eArgError, "unexpected value for xml option: %s", rb_id2name(SYM2ID(v)));
@@ -2329,7 +2337,8 @@
                     ECONV_CRLF_NEWLINE_ENCODER|
                     ECONV_CR_NEWLINE_ENCODER|
                     ECONV_XML_TEXT_ENCODER|
-                    ECONV_XML_ATTR_ENCODER)) == 0) {
+                    ECONV_XML_ATTR_CONTENT_ENCODER|
+                    ECONV_XML_ATTR_QUOTE_ENCODER)) == 0) {
         if (senc && senc == denc) {
             return -1;
         }
@@ -3573,7 +3582,8 @@
     rb_define_const(rb_cEncodingConverter, "CRLF_NEWLINE_ENCODER", INT2FIX(ECONV_CRLF_NEWLINE_ENCODER));
     rb_define_const(rb_cEncodingConverter, "CR_NEWLINE_ENCODER", INT2FIX(ECONV_CR_NEWLINE_ENCODER));
     rb_define_const(rb_cEncodingConverter, "XML_TEXT_ENCODER", INT2FIX(ECONV_XML_TEXT_ENCODER));
-    rb_define_const(rb_cEncodingConverter, "XML_ATTR_ENCODER", INT2FIX(ECONV_XML_ATTR_ENCODER));
+    rb_define_const(rb_cEncodingConverter, "XML_ATTR_CONTENT_ENCODER", INT2FIX(ECONV_XML_ATTR_CONTENT_ENCODER));
+    rb_define_const(rb_cEncodingConverter, "XML_ATTR_QUOTE_ENCODER", INT2FIX(ECONV_XML_ATTR_QUOTE_ENCODER));
 
     rb_define_method(rb_eConversionUndefined, "source_encoding_name", ecerr_source_encoding_name, 0);
     rb_define_method(rb_eConversionUndefined, "destination_encoding_name", ecerr_destination_encoding_name, 0);

--
ML: ruby-changes@q...
Info: http://www.atdot.net/~ko1/quickml/

[前][次][番号順一覧][スレッド一覧]