ruby-changes:7738
From: nobu <ko1@a...>
Date: Tue, 9 Sep 2008 00:59:49 +0900 (JST)
Subject: [ruby-changes:7738] Ruby:r19259 (mvm): * merged from trunk r19235:19258.
nobu 2008-09-09 00:59:16 +0900 (Tue, 09 Sep 2008) New Revision: 19259 http://svn.ruby-lang.org/cgi-bin/viewvc.cgi?view=rev&revision=19259 Log: * merged from trunk r19235:19258. Modified files: branches/mvm/.merged-trunk-revision branches/mvm/ChangeLog branches/mvm/dir.c branches/mvm/enc/trans/escape.trans branches/mvm/enc/trans/iso2022.trans branches/mvm/enc/trans/japanese.trans branches/mvm/enc/trans/newline.trans branches/mvm/enc/trans/utf_16_32.trans branches/mvm/include/ruby/encoding.h branches/mvm/io.c branches/mvm/lib/cgi.rb branches/mvm/process.c branches/mvm/re.c branches/mvm/test/cgi/test_cgi_tag_helper.rb branches/mvm/test/ruby/test_econv.rb branches/mvm/test/ruby/test_io.rb branches/mvm/test/ruby/test_transcode.rb branches/mvm/tool/transcode-tblgen.rb branches/mvm/transcode.c branches/mvm/transcode_data.h branches/mvm/vm.c branches/mvm/win32/Makefile.sub branches/mvm/win32/win32.c Index: mvm/include/ruby/encoding.h =================================================================== --- mvm/include/ruby/encoding.h (revision 19258) +++ mvm/include/ruby/encoding.h (revision 19259) @@ -238,8 +238,9 @@ int rb_econv_putbackable(rb_econv_t *ec); void rb_econv_putback(rb_econv_t *ec, unsigned char *p, int n); -/* returns corresponding stateless encoding, or NULL if not stateful. */ -const char *rb_econv_stateless_encoding(const char *stateful_enc); +/* returns the corresponding ASCII compatible encoding for encname, + * or NULL if encname is not ASCII incompatible encoding. */ +const char *rb_econv_asciicompat_encoding(const char *encname); VALUE rb_econv_str_convert(rb_econv_t *ec, VALUE src, int flags); VALUE rb_econv_substr_convert(rb_econv_t *ec, VALUE src, long byteoff, long bytesize, int flags); Index: mvm/re.c =================================================================== --- mvm/re.c (revision 19258) +++ mvm/re.c (revision 19259) @@ -2243,15 +2243,13 @@ onig_errmsg_buffer err = ""; int i; VALUE result = 0; - int argc = RARRAY_LEN(ary); - VALUE *argv = RARRAY_PTR(ary); - if (argc == 0) { + if (RARRAY_LEN(ary) == 0) { rb_raise(rb_eArgError, "no arguments given"); } - for (i = 0; i < argc; i++) { - VALUE str = argv[i]; + for (i = 0; i < RARRAY_LEN(ary); i++) { + VALUE str = RARRAY_PTR(ary)[i]; VALUE buf; char *p, *end; rb_encoding *src_enc; Index: mvm/ChangeLog =================================================================== --- mvm/ChangeLog (revision 19258) +++ mvm/ChangeLog (revision 19259) @@ -19,14 +19,107 @@ * st.c (st_init_table_with_size, ADD_DIRECT): fixed typo. +Tue Sep 9 00:20:10 2008 Tanaka Akira <akr@f...> + + * transcode.c (rb_econv_asciicompat_encoding): check decoder. + +Tue Sep 9 00:00:47 2008 Tanaka Akira <akr@f...> + + * transcode.c (rb_econv_t): last_error.partial_input removed. + +Mon Sep 8 23:24:54 2008 Tanaka Akira <akr@f...> + + * include/ruby/encoding.h (rb_econv_asciicompat_encoding): renamed + from rb_econv_stateless_encoding to apply stateless ASCII + incompatible encodings such as UTF-16BE. + + * io.c (make_writeconv): use rb_econv_asciicompat_encoding. + + * transcode_data.h (rb_transcoder_asciicompat_type_t): renamed from + rb_transcoder_stateful_type_t. + (rb_transcoder): use rb_transcoder_asciicompat_type_t. + + * transcode.c: follow the type change. + (asciicompat_encoding_i): renamed from stateless_encoding_i. + (rb_econv_asciicompat_encoding): renamed from + rb_econv_stateless_encoding. + (econv_s_asciicompat_encoding): method renamed. + + * tool/transcode-tblgen.rb: follow the type change. + + * enc/trans/utf_16_32.trans: follow the type change. + rb_from_UTF_16BE to UTF-8 is asciicompat_decoder. + rb_from_UTF_16LE to UTF-8 is asciicompat_decoder. + rb_from_UTF_32BE to UTF-8 is asciicompat_decoder. + rb_from_UTF_32LE to UTF-8 is asciicompat_decoder. + UTF-8 to rb_to_UTF_16BE is asciicompat_encoder. + UTF-8 to rb_to_UTF_16LE is asciicompat_encoder. + UTF-8 to rb_to_UTF_32BE is asciicompat_encoder. + UTF-8 to rb_to_UTF_32LE is asciicompat_encoder. + + * enc/trans/newline.trans: follow the type change. universal newline + decoder is asciicompat_converter. + + * enc/trans/escape.trans: follow the type change. + + * enc/trans/iso2022.trans: ditto. + + * enc/trans/japanese.trans: ditto. + +Mon Sep 8 23:05:42 2008 Tanaka Akira <akr@f...> + + * transcode.c (rb_econv_insert_output): "readagain" part should be + after replacement. + +Mon Sep 8 22:30:03 2008 NARUSE, Yui <naruse@r...> + + * dir.c (dir_initialize): rename option name of Dir.open + from :external_encoding to :encoding. + +Mon Sep 8 22:16:20 2008 Takeyuki FUJIOKA <xibbar@r...> + + * lib/cgi.rb : obsolete regex "n" option. [ruby-dev:36130] + Mon Sep 8 20:10:12 2008 Nobuyoshi Nakada <nobu@r...> * bignum.c (power_cache_get_power0): no need to register address. +Mon Sep 8 18:13:20 2008 NAKAMURA Usaku <usa@r...> + + * win32/win32.c (getppid): typo. [ruby-dev:36202] + + * process.c (get_ppid): mention the return value on Windows. + +Mon Sep 8 18:15:59 2008 Tanaka Akira <akr@f...> + + * tool/transcode-tblgen.rb (ArrayCode): less string substitutions. + +Mon Sep 8 18:13:13 2008 Tanaka Akira <akr@f...> + + * vm.c (rb_mRubyVMFrozenCore): registered for GC. + + * re.c (rb_reg_preprocess_dregexp): fix GC problem on MacOS X with + powerpc-apple-darwin8-gcc-4.0.1 (GCC) 4.0.1 (Apple Computer, Inc. + build 5367). + +Mon Sep 8 18:09:07 2008 NAKAMURA Usaku <usa@r...> + + * win32/Makefile.sub (config.h): define SIZE_MAX for VC++6/7. + +Mon Sep 8 17:46:09 2008 NAKAMURA Usaku <usa@r...> + + * win32/win32.c (filetime_to_unixtime): remove unused variable. + [ruby-dev:36191] + Mon Sep 8 17:26:51 2008 Nobuyoshi Nakada <nobu@r...> * st.c (garbage_collect): checks if memory can be reclaimed. +Mon Sep 8 13:47:39 2008 Kazuhiro NISHIYAMA <zn@m...> + + * test/ruby/test_io.rb (TestIO#test_dup): add open in block. + see [ruby-dev:35957]. + Mon Sep 8 07:09:42 2008 Tadayoshi Funaba <tadf@d...> * complex.c: some adjustments. Index: mvm/enc/trans/escape.trans =================================================================== --- mvm/enc/trans/escape.trans (revision 19258) +++ mvm/enc/trans/escape.trans (revision 19259) @@ -79,7 +79,7 @@ 1, /* input_unit_length */ 1, /* max_input */ 7, /* max_output */ - stateful_encoder, /* stateful_type */ + asciicompat_encoder, /* asciicompat_type */ 1, escape_xml_attr_quote_init, escape_xml_attr_quote_init, NULL, NULL, NULL, fun_so_escape_xml_attr_quote, escape_xml_attr_quote_finish Index: mvm/enc/trans/iso2022.trans =================================================================== --- mvm/enc/trans/iso2022.trans (revision 19258) +++ mvm/enc/trans/iso2022.trans (revision 19259) @@ -114,7 +114,7 @@ 1, /* input_unit_length */ 3, /* max_input */ 3, /* max_output */ - stateful_decoder, /* stateful_type */ + asciicompat_decoder, /* asciicompat_type */ 1, iso2022jp_init, iso2022jp_init, /* state_size, state_init, state_fini */ NULL, fun_si_iso2022jp_decoder, NULL, fun_so_iso2022jp_decoder }; @@ -196,7 +196,7 @@ 1, /* input_unit_length */ 3, /* max_input */ 5, /* max_output */ - stateful_encoder, /* stateful_type */ + asciicompat_encoder, /* asciicompat_type */ 1, iso2022jp_init, iso2022jp_init, /* state_size, state_init, state_fini */ NULL, NULL, NULL, fun_so_iso2022jp_encoder, finish_iso2022jp_encoder, @@ -218,7 +218,7 @@ 1, /* input_unit_length */ 3, /* max_input */ 2, /* max_output */ - stateless_converter, /* stateful_type */ + asciicompat_converter, /* asciicompat_type */ 0, NULL, NULL, /* state_size, state_init, state_fini */ NULL, NULL, NULL, fun_so_stateless_iso2022jp_to_eucjp, }; @@ -239,7 +239,7 @@ 1, /* input_unit_length */ 3, /* max_input */ 3, /* max_output */ - stateless_converter, /* stateful_type */ + asciicompat_converter, /* asciicompat_type */ 0, NULL, NULL, /* state_size, state_init, state_fini */ NULL, NULL, NULL, fun_so_eucjp_to_stateless_iso2022jp, }; Index: mvm/enc/trans/newline.trans =================================================================== --- mvm/enc/trans/newline.trans (revision 19258) +++ mvm/enc/trans/newline.trans (revision 19259) @@ -92,7 +92,7 @@ 1, /* input_unit_length */ 1, /* max_input */ 1, /* max_output */ - stateful_decoder, /* stateful_type */ + asciicompat_converter, /* asciicompat_type */ 2, universal_newline_init, universal_newline_init, /* state_size, state_init, state_fini */ NULL, NULL, NULL, fun_so_universal_newline, universal_newline_finish @@ -105,7 +105,7 @@ 1, /* input_unit_length */ 1, /* max_input */ 2, /* max_output */ - stateless_converter, /* stateful_type */ + asciicompat_converter, /* asciicompat_type */ 0, NULL, NULL, /* state_size, state_init, state_fini */ NULL, NULL, NULL, NULL }; @@ -117,7 +117,7 @@ 1, /* input_unit_length */ 1, /* max_input */ 1, /* max_output */ - stateless_converter, /* stateful_type */ + asciicompat_converter, /* asciicompat_type */ 0, NULL, NULL, /* state_size, state_init, state_fini */ NULL, NULL, NULL, NULL }; Index: mvm/enc/trans/utf_16_32.trans =================================================================== --- mvm/enc/trans/utf_16_32.trans (revision 19258) +++ mvm/enc/trans/utf_16_32.trans (revision 19259) @@ -266,7 +266,7 @@ 2, /* input_unit_length */ 4, /* max_input */ 4, /* max_output */ - stateless_converter, /* stateful_type */ + asciicompat_decoder, /* asciicompat_type */ 0, NULL, NULL, /* state_size, state_init, state_fini */ NULL, NULL, NULL, fun_so_from_utf_16be }; @@ -278,7 +278,7 @@ 1, /* input_unit_length */ 4, /* max_input */ 4, /* max_output */ - stateless_converter, /* stateful_type */ + asciicompat_encoder, /* asciicompat_type */ 0, NULL, NULL, /* state_size, state_init, state_fini */ NULL, NULL, NULL, fun_so_to_utf_16be }; @@ -290,7 +290,7 @@ 2, /* input_unit_length */ 4, /* max_input */ 4, /* max_output */ - stateless_converter, /* stateful_type */ + asciicompat_decoder, /* asciicompat_type */ 0, NULL, NULL, /* state_size, state_init, state_fini */ NULL, NULL, NULL, fun_so_from_utf_16le }; @@ -302,7 +302,7 @@ 1, /* input_unit_length */ 4, /* max_input */ 4, /* max_output */ - stateless_converter, /* stateful_type */ + asciicompat_encoder, /* asciicompat_type */ 0, NULL, NULL, /* state_size, state_init, state_fini */ NULL, NULL, NULL, fun_so_to_utf_16le }; @@ -314,7 +314,7 @@ 4, /* input_unit_length */ 4, /* max_input */ 4, /* max_output */ - stateless_converter, /* stateful_type */ + asciicompat_decoder, /* asciicompat_type */ 0, NULL, NULL, /* state_size, state_init, state_fini */ NULL, NULL, NULL, fun_so_from_utf_32be }; @@ -326,7 +326,7 @@ 1, /* input_unit_length */ 4, /* max_input */ 4, /* max_output */ - stateless_converter, /* stateful_type */ + asciicompat_encoder, /* asciicompat_type */ 0, NULL, NULL, /* state_size, state_init, state_fini */ NULL, NULL, NULL, fun_so_to_utf_32be }; @@ -338,7 +338,7 @@ 4, /* input_unit_length */ 4, /* max_input */ 4, /* max_output */ - stateless_converter, /* stateful_type */ + asciicompat_decoder, /* asciicompat_type */ 0, NULL, NULL, /* state_size, state_init, state_fini */ NULL, NULL, NULL, fun_so_from_utf_32le }; @@ -350,7 +350,7 @@ 1, /* input_unit_length */ 4, /* max_input */ 4, /* max_output */ - stateless_converter, /* stateful_type */ + asciicompat_encoder, /* asciicompat_type */ 0, NULL, NULL, /* state_size, state_init, state_fini */ NULL, NULL, NULL, fun_so_to_utf_32le }; Index: mvm/enc/trans/japanese.trans =================================================================== --- mvm/enc/trans/japanese.trans (revision 19258) +++ mvm/enc/trans/japanese.trans (revision 19259) @@ -73,7 +73,7 @@ 1, /* input_unit_length */ 3, /* max_input */ 2, /* max_output */ - stateless_converter, /* stateful_type */ + asciicompat_converter, /* asciicompat_type */ 0, NULL, NULL, /* state_size, state_init, state_fini */ NULL, NULL, NULL, fun_so_eucjp2sjis }; @@ -85,7 +85,7 @@ 1, /* input_unit_length */ 2, /* max_input */ 2, /* max_output */ - stateless_converter, /* stateful_type */ + asciicompat_converter, /* asciicompat_type */ 0, NULL, NULL, /* state_size, state_init, state_fini */ NULL, NULL, NULL, fun_so_sjis2eucjp }; Index: mvm/io.c =================================================================== --- mvm/io.c (revision 19258) +++ mvm/io.c (revision 19259) @@ -715,7 +715,7 @@ } else { enc = fptr->encs.enc2 ? fptr->encs.enc2 : fptr->encs.enc; - senc = rb_econv_stateless_encoding(enc->name); + senc = rb_econv_asciicompat_encoding(enc->name); if (!senc && !(fptr->encs.ecflags & ECONV_STATEFUL_ENCODER_MASK)) { /* single conversion */ fptr->writeconv_pre_ecflags = ecflags; Index: mvm/lib/cgi.rb =================================================================== --- mvm/lib/cgi.rb (revision 19258) +++ mvm/lib/cgi.rb (revision 19259) @@ -286,7 +286,7 @@ REVISION = '$Id$' #:nodoc: - NEEDS_BINMODE = true if /WIN/ni.match(RUBY_PLATFORM) + NEEDS_BINMODE = true if /WIN/i.match(RUBY_PLATFORM) # Path separators in different environments. PATH_SEPARATOR = {'UNIX'=>'/', 'WINDOWS'=>'\\', 'MACINTOSH'=>':'} @@ -441,7 +441,7 @@ def CGI::escapeElement(string, *elements) elements = elements[0] if elements[0].kind_of?(Array) unless elements.empty? - string.gsub(/<\/?(?:#{elements.join("|")})(?!\w)(?:.|\n)*?>/ni) do + string.gsub(/<\/?(?:#{elements.join("|")})(?!\w)(?:.|\n)*?>/i) do CGI::escapeHTML($&) end else @@ -462,7 +462,7 @@ def CGI::unescapeElement(string, *elements) elements = elements[0] if elements[0].kind_of?(Array) unless elements.empty? - string.gsub(/<\/?(?:#{elements.join("|")})(?!\w)(?:.|\n)*?>/ni) do + string.gsub(/<\/?(?:#{elements.join("|")})(?!\w)(?:.|\n)*?>/i) do CGI::unescapeHTML($&) end else @@ -586,7 +586,7 @@ options.delete("nph") if defined?(MOD_RUBY) if options.delete("nph") or - (/IIS\/(\d+)/n.match(env_table['SERVER_SOFTWARE']) and $1.to_i < 5) + (/IIS\/(\d+)/.match(env_table['SERVER_SOFTWARE']) and $1.to_i < 5) buf += (env_table["SERVER_PROTOCOL"] or "HTTP/1.0") + " " + (HTTP_STATUS[options["status"]] or options["status"] or "200 OK") + EOL + @@ -657,19 +657,19 @@ if defined?(MOD_RUBY) table = Apache::request.headers_out - buf.scan(/([^:]+): (.+)#{EOL}/n){ |name, value| + buf.scan(/([^:]+): (.+)#{EOL}/){ |name, value| warn sprintf("name:%s value:%s\n", name, value) if $DEBUG case name when 'Set-Cookie' table.add(name, value) - when /^status$/ni + when /^status$/i Apache::request.status_line = value Apache::request.status = value.to_i - when /^content-type$/ni + when /^content-type$/i Apache::request.content_type = value - when /^content-encoding$/ni + when /^content-encoding$/i Apache::request.content_encoding = value - when /^location$/ni + when /^location$/i if Apache::request.status == 200 Apache::request.status = 302 end @@ -911,7 +911,7 @@ def CGI::parse(query) params = Hash.new([].freeze) - query.split(/[&;]/n).each do |pairs| + query.split(/[&;]/).each do |pairs| key, value = pairs.split('=',2).collect{|v| CGI::unescape(v) } if params.has_key?(key) params[key].push(value) @@ -940,7 +940,7 @@ module QueryExtension %w[ CONTENT_LENGTH SERVER_PORT ].each do |env| - define_method(env.sub(/^HTTP_/n, '').downcase) do + define_method(env.sub(/^HTTP_/, '').downcase) do (val = env_table[env]) && Integer(val) end end @@ -953,7 +953,7 @@ HTTP_ACCEPT HTTP_ACCEPT_CHARSET HTTP_ACCEPT_ENCODING HTTP_ACCEPT_LANGUAGE HTTP_CACHE_CONTROL HTTP_FROM HTTP_HOST HTTP_NEGOTIATE HTTP_PRAGMA HTTP_REFERER HTTP_USER_AGENT ].each do |env| - define_method(env.sub(/^HTTP_/n, '').downcase) do + define_method(env.sub(/^HTTP_/, '').downcase) do env_table[env] end end @@ -1004,9 +1004,9 @@ head = nil body = MorphingBody.new - until head and /#{quoted_boundary}(?:#{EOL}|--)/n.match(buf) - if (not head) and /#{EOL}#{EOL}/n.match(buf) - buf = buf.sub(/\A((?:.|\n)*?#{EOL})#{EOL}/n) do + until head and /#{quoted_boundary}(?:#{EOL}|--)/.match(buf) + if (not head) and /#{EOL}#{EOL}/.match(buf) + buf = buf.sub(/\A((?:.|\n)*?#{EOL})#{EOL}/) do head = $1.dup "" end @@ -1030,7 +1030,7 @@ content_length -= c.bytesize end - buf = buf.sub(/\A((?:.|\n)*?)(?:[\r\n]{1,2})?#{quoted_boundary}([\r\n]{1,2}|--)/n) do + buf = buf.sub(/\A((?:.|\n)*?)(?:[\r\n]{1,2})?#{quoted_boundary}([\r\n]{1,2}|--)/) do body.print $1 if "--" == $2 content_length = -1 @@ -1041,15 +1041,15 @@ body.rewind - /Content-Disposition:.* filename=(?:"((?:\\.|[^\"])*)"|([^;\s]*))/ni.match(head) + /Content-Disposition:.* filename=(?:"((?:\\.|[^\"])*)"|([^;\s]*))/i.match(head) filename = ($1 or $2 or "") - if /Mac/ni.match(env_table['HTTP_USER_AGENT']) and - /Mozilla/ni.match(env_table['HTTP_USER_AGENT']) and - (not /MSIE/ni.match(env_table['HTTP_USER_AGENT'])) + if /Mac/i.match(env_table['HTTP_USER_AGENT']) and + /Mozilla/i.match(env_table['HTTP_USER_AGENT']) and + (not /MSIE/i.match(env_table['HTTP_USER_AGENT'])) filename = CGI::unescape(filename) end - /Content-Type: ([^\s]*)/ni.match(head) + /Content-Type: ([^\s]*)/i.match(head) content_type = ($1 or "") (class << body; self; end).class_eval do @@ -1058,7 +1058,7 @@ define_method(:content_type) {content_type.dup.taint} end - /Content-Disposition:.* name="?([^\";\s]*)"?/ni.match(head) + /Content-Disposition:.* name="?([^\";\s]*)"?/i.match(head) name = ($1 || "").dup if params.has_key?(name) @@ -1087,12 +1087,12 @@ %|(offline mode: enter name=value pairs on standard input)\n| ) end - readlines.join(' ').gsub(/\n/n, '') - end.gsub(/\\=/n, '%3D').gsub(/\\&/n, '%26') + readlines.join(' ').gsub(/\n/, '') + end.gsub(/\\=/, '%3D').gsub(/\\&/, '%26') words = Shellwords.shellwords(string) - if words.find{|x| /=/n.match(x) } + if words.find{|x| /=/.match(x) } words.join('&') else words.join('+') @@ -1159,7 +1159,7 @@ # Reads query parameters in the @params field, and cookies into @cookies. def initialize_query() if ("POST" == env_table['REQUEST_METHOD']) and - %r|\Amultipart/form-data.*boundary=\"?([^\";,]+)\"?|n.match(env_table['CONTENT_TYPE']) + %r|\Amultipart/form-data.*boundary=\"?([^\";,]+)\"?|.match(env_table['CONTENT_TYPE']) boundary = $1.dup @multipart = true @params = read_multipart(boundary, Integer(env_table['CONTENT_LENGTH'])) @@ -1245,14 +1245,14 @@ # # </HTML> # def CGI::pretty(string, shift = " ") - lines = string.gsub(/(?!\A)<(?:.|\n)*?>/n, "\n\\0").gsub(/<(?:.|\n)*?>(?!\n)/n, "\\0\n") + lines = string.gsub(/(?!\A)<(?:.|\n)*?>/, "\n\\0").gsub(/<(?:.|\n)*?>(?!\n)/, "\\0\n") end_pos = 0 - while end_pos = lines.index(/^<\/(\w+)/n, end_pos) + while end_pos = lines.index(/^<\/(\w+)/, end_pos) element = $1.dup - start_pos = lines.rindex(/^\s*<#{element}/ni, end_pos) - lines[start_pos ... end_pos] = "__" + lines[start_pos ... end_pos].gsub(/\n(?!\z)/n, "\n" + shift) + "__" + start_pos = lines.rindex(/^\s*<#{element}/i, end_pos) + lines[start_pos ... end_pos] = "__" + lines[start_pos ... end_pos].gsub(/\n(?!\z)/, "\n" + shift) + "__" end - lines.gsub(/^((?:#{Regexp::quote(shift)})*)__(?=<\/?\w)/n, '\1') + lines.gsub(/^((?:#{Regexp::quote(shift)})*)__(?=<\/?\w)/, '\1') end Index: mvm/dir.c =================================================================== --- mvm/dir.c (revision 19258) +++ mvm/dir.c (revision 19259) @@ -295,7 +295,7 @@ struct dir_data { DIR *dir; VALUE path; - rb_encoding *extenc; + rb_encoding *enc; }; static void @@ -323,7 +323,7 @@ dirp->dir = NULL; dirp->path = Qnil; - dirp->extenc = NULL; + dirp->enc = NULL; return obj; } @@ -338,26 +338,26 @@ dir_initialize(int argc, VALUE *argv, VALUE dir) { struct dir_data *dp; - rb_encoding *extencoding; + rb_encoding *fsenc; VALUE dirname, opt; - static VALUE sym_extenc; + static VALUE sym_enc; - if (!sym_extenc) { - sym_extenc = ID2SYM(rb_intern("external_encoding")); + if (!sym_enc) { + sym_enc = ID2SYM(rb_intern("encoding")); } - extencoding = rb_filesystem_encoding(); + fsenc = rb_filesystem_encoding(); rb_scan_args(argc, argv, "11", &dirname, &opt); if (!NIL_P(opt)) { - VALUE v, extenc=Qnil; + VALUE v, enc=Qnil; opt = rb_convert_type(opt, T_HASH, "Hash", "to_hash"); - v = rb_hash_aref(opt, sym_extenc); - if (!NIL_P(v)) extenc = v; + v = rb_hash_aref(opt, sym_enc); + if (!NIL_P(v)) enc = v; - if (!NIL_P(extenc)) { - extencoding = rb_to_encoding(extenc); + if (!NIL_P(enc)) { + fsenc = rb_to_encoding(enc); } } @@ -367,7 +367,7 @@ if (dp->dir) closedir(dp->dir); dp->dir = NULL; dp->path = Qnil; - dp->extenc = extencoding; + dp->enc = fsenc; dp->dir = opendir(RSTRING_PTR(dirname)); if (dp->dir == NULL) { if (errno == EMFILE || errno == ENFILE) { @@ -499,7 +499,7 @@ errno = 0; dp = readdir(dirp->dir); if (dp) { - return dir_enc_str_new(dp->d_name, NAMLEN(dp), dirp->extenc); + return dir_enc_str_new(dp->d_name, NAMLEN(dp), dirp->enc); } else if (errno == 0) { /* end of stream */ return Qnil; @@ -537,7 +537,7 @@ GetDIR(dir, dirp); rewinddir(dirp->dir); for (dp = readdir(dirp->dir); dp != NULL; dp = readdir(dirp->dir)) { - rb_yield(dir_enc_str_new(dp->d_name, NAMLEN(dp), dirp->extenc)); + rb_yield(dir_enc_str_new(dp->d_name, NAMLEN(dp), dirp->enc)); if (dirp->dir == NULL) dir_closed(); } return dir; Index: mvm/win32/win32.c =================================================================== --- mvm/win32/win32.c (revision 19258) +++ mvm/win32/win32.c (revision 19259) @@ -3440,7 +3440,6 @@ static time_t filetime_to_unixtime(const FILETIME *ft) { - FILETIME loc; struct timeval tv; if (filetime_to_timeval(ft, &tv) == (time_t)-1) @@ -3966,23 +3965,23 @@ HANDLE hNtDll = GetModuleHandle("ntdll.dll"); if (hNtDll) { pNtQueryInformationProcess = (long (WINAPI *)(HANDLE, int, void *, ULONG, ULONG *))GetProcAddress(hNtDll, "NtQueryInformationProcess"); - if (pNtQueryInformationProcess) { - struct { - long ExitStatus; - void* PebBaseAddress; - ULONG AffinityMask; - ULONG BasePriority; - ULONG UniqueProcessId; - ULONG ParentProcessId; - } pbi; - ULONG len; - long ret = pNtQueryInformationProcess(GetCurrentProcess(), 0, &pbi, sizeof(pbi), &len); - if (!ret) { - ppid = pbi.ParentProcessId; - } - } } } + if (pNtQueryInformationProcess) { + struct { + long ExitStatus; + void* PebBaseAddress; + ULONG AffinityMask; + ULONG BasePriority; + ULONG UniqueProcessId; + ULONG ParentProcessId; + } pbi; + ULONG len; + long ret = pNtQueryInformationProcess(GetCurrentProcess(), 0, &pbi, sizeof(pbi), &len); + if (!ret) { + ppid = pbi.ParentProcessId; + } + } } return ppid; Index: mvm/win32/Makefile.sub =================================================================== --- mvm/win32/Makefile.sub (revision 19258) +++ mvm/win32/Makefile.sub (revision 19259) @@ -355,6 +355,9 @@ #define SIZEOF_SIZE_T 4 #define SIZEOF_PTRDIFF_T 4 !endif +!if $(MSC_VER) < 1400 +#define SIZE_MAX UINT_MAX +!endif #define HAVE_PROTOTYPES 1 #define TOKEN_PASTE(x,y) x##y #define HAVE_STDARG_PROTOTYPES 1 Index: mvm/process.c =================================================================== --- mvm/process.c (revision 19258) +++ mvm/process.c (revision 19259) @@ -158,8 +158,8 @@ * call-seq: * Process.ppid => fixnum * - * Returns the process id of the parent of this process. Always - * returns 0 on NT. Not available on all platforms. + * Returns the process id of the parent of this process. Returns + * untrustworthy value on Win32/64. Not available on all platforms. * * puts "I am #{Process.pid}" * Process.fork { puts "Dad is #{Process.ppid}" } Index: mvm/.merged-trunk-revision =================================================================== --- mvm/.merged-trunk-revision (revision 19258) +++ mvm/.merged-trunk-revision (revision 19259) @@ -1 +1 @@ -19235 +19258 Index: mvm/vm.c =================================================================== --- mvm/vm.c (revision 19258) +++ mvm/vm.c (revision 19259) @@ -1808,6 +1808,7 @@ rb_define_method_id(klass, id_core_define_singleton_method, m_core_define_singleton_method, 3); rb_define_method_id(klass, id_core_set_postexe, m_core_set_postexe, 1); rb_obj_freeze(fcore); + rb_global_variable(&rb_mRubyVMFrozenCore); rb_mRubyVMFrozenCore = fcore; /* ::VM::Env */ Index: mvm/transcode_data.h =================================================================== --- mvm/transcode_data.h (revision 19258) +++ mvm/transcode_data.h (revision 19259) @@ -57,11 +57,11 @@ #define THREETRAIL /* legal but undefined if three more trailing UTF-8 */ typedef enum { - stateless_converter, /* stateless -> stateless */ - stateful_decoder, /* stateful -> stateless */ - stateful_encoder /* stateless -> stateful */ - /* stateful -> stateful is intentionally ommitted. */ -} rb_transcoder_stateful_type_t; + asciicompat_converter, /* ASCII-compatible -> ASCII-compatible */ + asciicompat_decoder, /* ASCII-incompatible -> ASCII-compatible */ + asciicompat_encoder /* ASCII-compatible -> ASCII-incompatible */ + /* ASCII-incompatible -> ASCII-incompatible is intentionally ommitted. */ +} rb_transcoder_asciicompat_type_t; typedef struct rb_transcoder rb_transcoder; @@ -78,7 +78,7 @@ int input_unit_length; int max_input; int max_output; - rb_transcoder_stateful_type_t stateful_type; + rb_transcoder_asciicompat_type_t asciicompat_type; size_t state_size; int (*state_init_func)(void*); /* ret==0:success ret!=0:failure(errno) */ int (*state_fini_func)(void*); /* ret==0:success ret!=0:failure(errno) */ Index: mvm/tool/transcode-tblgen.rb =================================================================== --- mvm/tool/transcode-tblgen.rb (revision 19258) +++ mvm/tool/transcode-tblgen.rb (revision 19259) @@ -157,27 +157,28 @@ class ArrayCode def initialize(type, name) - @code = <<"End" -static const #{type} -#{name}[0] = { -}; -End + @type = type + @name = name + @len = 0; + @content = '' end def length - @code[/\[\d+\]/][1...-1].to_i + @len end def insert_at_last(num, str) newnum = self.length + num - @code.sub!(/^(\};\n\z)/) { - str + $1 - } - @code.sub!(/\[\d+\]/) { "[#{newnum}]" } + @content << str + @len += num end def to_s - @code.dup + <<"End" +static const #{@type} +#{@name}[#{@len}] = { +#{@content}}; +End end end @@ -633,7 +634,7 @@ #{input_unit_length}, /* input_unit_length */ #{max_input}, /* max_input */ #{max_output}, /* max_output */ - stateless_converter, /* stateful_type */ + asciicompat_converter, /* asciicompat_type */ 0, NULL, NULL, /* state_size, state_init, state_fini */ NULL, NULL, NULL, NULL, NULL, NULL, NULL Index: mvm/test/ruby/test_transcode.rb =================================================================== --- mvm/test/ruby/test_transcode.rb (revision 19258) +++ mvm/test/ruby/test_transcode.rb (revision 19259) @@ -546,6 +546,7 @@ check_both_ways("\u005C", "\x5C", "eucJP-ms") check_both_ways("\u005C", "\x5C", "CP51932") check_both_ways("\u005C", "\x5C", "ISO-2022-JP") + assert_equal("\u005C", "\e(B\x5C\e(B".encode("UTF-8", "ISO-2022-JP")) assert_equal("\u005C", "\e(J\x5C\e(B".encode("UTF-8", "ISO-2022-JP")) assert_raise(Encoding::ConversionUndefined) { "\u00A5".encode("Shift_JIS") } assert_raise(Encoding::ConversionUndefined) { "\u00A5".encode("Windows-31J") } @@ -568,6 +569,7 @@ check_both_ways("\u007E", "\x7E", "eucJP-ms") check_both_ways("\u007E", "\x7E", "CP51932") check_both_ways("\u007E", "\x7E", "ISO-2022-JP") + assert_equal("\u007E", "\e(B\x7E\e(B".encode("UTF-8", "ISO-2022-JP")) assert_equal("\u007E", "\e(J\x7E\e(B".encode("UTF-8", "ISO-2022-JP")) assert_raise(Encoding::ConversionUndefined) { "\u203E".encode("Shift_JIS") } assert_raise(Encoding::ConversionUndefined) { "\u203E".encode("Windows-31J") } Index: mvm/test/ruby/test_io.rb =================================================================== --- mvm/test/ruby/test_io.rb (revision 19258) +++ mvm/test/ruby/test_io.rb (revision 19259) @@ -636,6 +636,10 @@ assert_equal("", f2.read) end + proc do + open(__FILE__) # see Bug #493 [ruby-dev:35957] + end.call + pipe2 do |r, w| assert_raise(Errno::EMFILE, Errno::ENFILE, Errno::ENOMEM) do r2, w2 = r.dup, w.dup Index: mvm/test/ruby/test_econv.rb =================================================================== --- mvm/test/ruby/test_econv.rb (revision 19258) +++ mvm/test/ruby/test_econv.rb (revision 19259) @@ -27,20 +27,24 @@ ec.primitive_errinfo) end - def test_s_stateless_encoding - assert_equal(Encoding::STATELESS_ISO_2022_JP, Encoding::Converter.stateless_encoding("ISO-2022-JP")) - assert_equal(Encoding::STATELESS_ISO_2022_JP, Encoding::Converter.stateless_encoding(Encoding::ISO_2022_JP)) - assert_nil(Encoding::Converter.stateless_encoding("EUC-JP")) - assert_nil(Encoding::Converter.stateless_encoding("UTF-8")) - assert_nil(Encoding::Converter.stateless_encoding("UTF-16BE")) - assert_nil(Encoding::Converter.stateless_encoding(Encoding::UTF_8)) - assert_nil(Encoding::Converter.stateless_encoding("xml-attr-escaped")) + def test_s_asciicompat_encoding + assert_equal(Encoding::STATELESS_ISO_2022_JP, Encoding::Converter.asciicompat_encoding("ISO-2022-JP")) + assert_equal(Encoding::STATELESS_ISO_2022_JP, Encoding::Converter.asciicompat_encoding(Encoding::ISO_2022_JP)) + assert_equal(Encoding::UTF_8, Encoding::Converter.asciicompat_encoding("UTF-16BE")) + assert_equal(Encoding::UTF_8, Encoding::Converter.asciicompat_encoding("UTF-16LE")) + assert_equal(Encoding::UTF_8, Encoding::Converter.asciicompat_encoding("UTF-32BE")) + assert_equal(Encoding::UTF_8, Encoding::Converter.asciicompat_encoding("UTF-32LE")) + assert_nil(Encoding::Converter.asciicompat_encoding("EUC-JP")) + assert_nil(Encoding::Converter.asciicompat_encoding("UTF-8")) + assert_nil(Encoding::Converter.asciicompat_encoding(Encoding::UTF_8)) + assert_nil(Encoding::Converter.asciicompat_encoding("xml-attr-escaped")) + assert_nil(Encoding::Converter.asciicompat_encoding("encoding-not-exist")) end - def test_stateless_encoding_iso2022jp - slenc = Encoding::Converter.stateless_encoding("ISO-2022-JP") + def test_asciicompat_encoding_iso2022jp + acenc = Encoding::Converter.asciicompat_encoding("ISO-2022-JP") str = "\e$B~~\(B".force_encoding("iso-2022-jp") - str2 = str.encode(slenc) + str2 = str.encode(acenc) str3 = str.encode("ISO-2022-JP") assert_equal(str, str3) end @@ -798,4 +802,9 @@ assert_equal("&\u3046\u2661&\"'".force_encoding("utf-8"), "&\u3046\u2661&\"'".encode("utf-8", xml: :text)) end + + def test_iso2022jp_invalid_replace + assert_equal("?x".force_encoding("iso-2022-jp"), + "\222\xA1x".encode("iso-2022-jp", "stateless-iso-2022-jp", :invalid => :replace)) + end end Index: mvm/test/cgi/test_cgi_tag_helper.rb =================================================================== --- mvm/test/cgi/test_cgi_tag_helper.rb (revision 19258) +++ mvm/test/cgi/test_cgi_tag_helper.rb (revision 19259) @@ -3,7 +3,7 @@ require 'stringio' -class CGICoreTest < Test::Unit::TestCase +class CGITagHelperTest < Test::Unit::TestCase def setup Index: mvm/transcode.c =================================================================== --- mvm/transcode.c (revision 19258) +++ mvm/transcode.c (revision 19259) @@ -121,7 +121,6 @@ const unsigned char *error_bytes_start; size_t error_bytes_len; size_t readagain_len; - int partial_input; } last_error; /* The following fields are only for Encoding::Converter. @@ -837,7 +836,6 @@ ec->last_error.error_bytes_start = NULL; ec->last_error.error_bytes_len = 0; ec->last_error.readagain_len = 0; - ec->last_error.partial_input = 0; ec->source_encoding = NULL; ec->destination_encoding = NULL; for (i = 0; i < ec->num_trans; i++) { @@ -1274,7 +1272,6 @@ gotresult: ec->last_error.result = res; - ec->last_error.partial_input = flags & ECONV_PARTIAL_INPUT; if (res == econv_invalid_byte_sequence || res == econv_incomplete_input || res == econv_undefined_conversion) { @@ -1414,7 +1411,7 @@ tr = tc->transcoder; - if (tr->stateful_type == stateful_encoder) + if (tr->asciicompat_type == asciicompat_encoder) return tr->src_encoding; return tr->dst_encoding; } @@ -1528,7 +1525,7 @@ data_end_p = &ec->in_data_end; buf_end_p = &ec->in_buf_end; } - else if (tc->transcoder->stateful_type == stateful_encoder) { + else if (tc->transcoder->asciicompat_type == asciicompat_encoder) { need += tc->readagain_len; if (need < insert_len) goto fail; @@ -1578,13 +1575,13 @@ } } - if (tc && tc->transcoder->stateful_type == stateful_encoder) { + memcpy(*data_end_p, insert_str, insert_len); + *data_end_p += insert_len; + if (tc && tc->transcoder->asciicompat_type == asciicompat_encoder) { memcpy(*data_end_p, TRANSCODING_READBUF(tc)+tc->recognized_len, tc->readagain_len); *data_end_p += tc->readagain_len; tc->readagain_len = 0; } - memcpy(*data_end_p, insert_str, insert_len); - *data_end_p += insert_len; if (insert_str != str && insert_str != insert_buf) xfree((void*)insert_str); @@ -1633,43 +1630,53 @@ tc->readagain_len -= n; } -struct stateless_encoding_t { - const char *stateless_enc; - const char *stateful_enc; +struct asciicompat_encoding_t { + const char *ascii_compat_name; + const char *ascii_incompat_name; }; static int -stateless_encoding_i(st_data_t key, st_data_t val, st_data_t arg) +asciicompat_encoding_i(st_data_t key, st_data_t val, st_data_t arg) { - struct stateless_encoding_t *data = (struct stateless_encoding_t *)arg; - st_table *table2 = (st_table *)val; - st_data_t v; + struct asciicompat_encoding_t *data = (struct asciicompat_encoding_t *)arg; + transcoder_entry_t *entry = (transcoder_entry_t *)val; + const rb_transcoder *tr; - if (st_lookup(table2, (st_data_t)data->stateful_enc, &v)) { - transcoder_entry_t *entry = (transcoder_entry_t *)v; - const rb_transcoder *tr; - if (SUPPLEMENTAL_CONVERSION(entry->sname, entry->dname)) { - return ST_CONTINUE; - } - tr = load_transcoder_entry(entry); - if (tr && tr->stateful_type == stateful_encoder) { - data->stateless_enc = tr->src_encoding; - return ST_STOP; - } + if (SUPPLEMENTAL_CONVERSION(entry->sname, entry->dname)) + return ST_CONTINUE; + tr = load_transcoder_entry(entry); + if (tr && tr->asciicompat_type == asciicompat_decoder) { + data->ascii_compat_name = tr->dst_encoding; + return ST_STOP; } return ST_CONTINUE; } const char * -rb_econv_stateless_encoding(const char *stateful_enc) +rb_econv_asciicompat_encoding(const char *ascii_incompat_name) { - struct stateless_encoding_t data; - data.stateful_enc = stateful_enc; - data.stateless_enc = NULL; - st_foreach(transcoder_table, stateless_encoding_i, (st_data_t)&data); - if (data.stateless_enc) - return data.stateless_enc; - return NULL; + st_data_t v; + st_table *table2; + struct asciicompat_encoding_t data; + + if (!st_lookup(transcoder_table, (st_data_t)ascii_incompat_name, &v)) + return NULL; + table2 = (st_table *)v; + + /* + * Assumption: + * There are at most one transcoder for + * converting from ASCII incompatible encoding. + * + * For ISO-2022-JP, there is ISO-2022-JP -> stateless-ISO-2022-JP and no others. + */ + if (table2->num_entries != 1) + return NULL; + + data.ascii_incompat_name = ascii_incompat_name; + data.ascii_compat_name = NULL; + st_foreach(table2, asciicompat_encoding_i, (st_data_t)&data); + return data.ascii_compat_name; } VALUE @@ -2510,42 +2517,42 @@ /* * call-seq: - * Encoding::Converter.stateless_encoding(string) => encoding or nil - * Encoding::Converter.stateless_encoding(encoding) => encoding or nil + * Encoding::Converter.asciicompat_encoding(string) => encoding or nil + * Encoding::Converter.asciicompat_encoding(encoding) => encoding or nil * - * returns the corresponding stateless encoding. + * returns the corresponding ASCII compatible encoding. * - * It returns nil if the argument is not a stateful encoding. + * It returns nil if the argument is an ASCII compatible encoding. * - * "corresponding stateless encoding" is a stateless encoding which - * represents same characters in the statefull encoding. + * "corresponding ASCII compatible encoding" is a ASCII compatible encoding which + * represents same characters in the given ASCII incompatible encoding. * - * So, no conversion undefined error occur between the stateful encoding and the stateless encoding. + * So, no conversion undefined error occur between the ASCII compatible and incompatible encoding. * - * For ISO-2022-JP, the dedicated stateless encoding, stateless-ISO-2022-JP, is defined. - * * Encoding::Converter.stateless_encoding("ISO-2022-JP") #=> #<Encoding:stateless-ISO-2022-JP> + * Encoding::Converter.stateless_encoding("UTF-16BE") #=> #<Encoding:UTF-8> + * Encoding::Converter.stateless_encoding("UTF-8") #=> nil * */ static VALUE -econv_s_stateless_encoding(VALUE klass, VALUE arg) +econv_s_asciicompat_encoding(VALUE klass, VALUE arg) { - const char *stateful_name, *stateless_name; - rb_encoding *stateful_enc, *stateless_enc; + const char *arg_name, *result_name; + rb_encoding *arg_enc, *result_enc; - enc_arg(arg, &stateful_name, &stateful_enc); + enc_arg(arg, &arg_name, &arg_enc); - stateless_name = rb_econv_stateless_encoding(stateful_name); + result_name = rb_econv_asciicompat_encoding(arg_name); - if (stateless_name == NULL) + if (result_name == NULL) return Qnil; - stateless_enc = rb_enc_find(stateless_name); + result_enc = rb_enc_find(result_name); - if (!stateless_enc) - stateless_enc = make_dummy_encoding(stateless_name); + if (!result_enc) + result_enc = make_dummy_encoding(result_name); - return rb_enc_from_encoding(stateless_enc); + return rb_enc_from_encoding(result_enc); } /* @@ -3570,7 +3577,7 @@ rb_cEncodingConverter = rb_define_class_under(rb_cEncoding, "Converter", rb_cData); rb_define_alloc_func(rb_cEncodingConverter, econv_s_allocate); - rb_define_singleton_method(rb_cEncodingConverter, "stateless_encoding", econv_s_stateless_encoding, 1); + rb_define_singleton_method(rb_cEncodingConverter, "asciicompat_encoding", econv_s_asciicompat_encoding, 1); rb_define_method(rb_cEncodingConverter, "initialize", econv_init, -1); rb_define_method(rb_cEncodingConverter, "inspect", econv_inspect, 0); rb_define_method(rb_cEncodingConverter, "source_encoding", econv_source_encoding, 0); -- ML: ruby-changes@q... Info: http://www.atdot.net/~ko1/quickml/