ruby-changes:24228
From: naruse <ko1@a...>
Date: Tue, 3 Jul 2012 11:45:19 +0900 (JST)
Subject: [ruby-changes:24228] naruse:r36279 (ruby_1_9_3): merge revision(s) 34372:
naruse 2012-07-03 11:44:36 +0900 (Tue, 03 Jul 2012) New Revision: 36279 http://svn.ruby-lang.org/cgi-bin/viewvc.cgi?view=rev&revision=36279 Log: merge revision(s) 34372: * file.c (rb_enc_path_next, rb_enc_path_skip_prefix) (rb_enc_path_last_separator, rb_enc_path_end) (ruby_enc_find_basename, ruby_enc_find_extname): encoding-aware path handling functions. * file.c (rb_home_dir, file_expand_path, rb_realpath_internal) (rb_file_s_basename, rb_file_dirname, rb_file_s_extname) (rb_file_join): should respect the encodings of arguments than file system encoding. [ruby-dev:45145] [Bug #5919] * dir.c (check_dirname, ruby_glob0): ditto. * ext/pathname/pathname.c (path_sub_ext): ditto. Modified files: branches/ruby_1_9_3/ChangeLog branches/ruby_1_9_3/dir.c branches/ruby_1_9_3/ext/pathname/pathname.c branches/ruby_1_9_3/file.c branches/ruby_1_9_3/include/ruby/encoding.h branches/ruby_1_9_3/include/ruby/intern.h branches/ruby_1_9_3/test/pathname/test_pathname.rb branches/ruby_1_9_3/test/ruby/test_file_exhaustive.rb branches/ruby_1_9_3/version.h Index: ruby_1_9_3/include/ruby/intern.h =================================================================== --- ruby_1_9_3/include/ruby/intern.h (revision 36278) +++ ruby_1_9_3/include/ruby/intern.h (revision 36279) @@ -402,15 +402,9 @@ VALUE rb_find_file_safe(VALUE, int); int rb_find_file_ext(VALUE*, const char* const*); VALUE rb_find_file(VALUE); -char *rb_path_next(const char *); -char *rb_path_skip_prefix(const char *); -char *rb_path_last_separator(const char *); -char *rb_path_end(const char *); VALUE rb_file_directory_p(VALUE,VALUE); VALUE rb_str_encode_ospath(VALUE); int rb_is_absolute_path(const char *); -const char *ruby_find_basename(const char *name, long *baselen, long *alllen); -const char *ruby_find_extname(const char *name, long *len); /* gc.c */ void ruby_set_stack_size(size_t); NORETURN(void rb_memerror(void)); Index: ruby_1_9_3/include/ruby/encoding.h =================================================================== --- ruby_1_9_3/include/ruby/encoding.h (revision 36278) +++ ruby_1_9_3/include/ruby/encoding.h (revision 36279) @@ -211,6 +211,12 @@ void rb_enc_set_default_internal(VALUE encoding); VALUE rb_locale_charmap(VALUE klass); long rb_memsearch(const void*,long,const void*,long,rb_encoding*); +char *rb_enc_path_next(const char *,const char *,rb_encoding*); +char *rb_enc_path_skip_prefix(const char *,const char *,rb_encoding*); +char *rb_enc_path_last_separator(const char *,const char *,rb_encoding*); +char *rb_enc_path_end(const char *,const char *,rb_encoding*); +const char *ruby_enc_find_basename(const char *name, long *baselen, long *alllen, rb_encoding *enc); +const char *ruby_enc_find_extname(const char *name, long *len, rb_encoding *enc); RUBY_EXTERN VALUE rb_cEncoding; #define ENC_DUMMY_FLAG (1<<24) Index: ruby_1_9_3/ChangeLog =================================================================== --- ruby_1_9_3/ChangeLog (revision 36278) +++ ruby_1_9_3/ChangeLog (revision 36279) @@ -1,3 +1,19 @@ +Tue Jul 3 11:44:23 2012 Nobuyoshi Nakada <nobu@r...> + + * file.c (rb_enc_path_next, rb_enc_path_skip_prefix) + (rb_enc_path_last_separator, rb_enc_path_end) + (ruby_enc_find_basename, ruby_enc_find_extname): encoding-aware + path handling functions. + + * file.c (rb_home_dir, file_expand_path, rb_realpath_internal) + (rb_file_s_basename, rb_file_dirname, rb_file_s_extname) + (rb_file_join): should respect the encodings of arguments than + file system encoding. [ruby-dev:45145] [Bug #5919] + + * dir.c (check_dirname, ruby_glob0): ditto. + + * ext/pathname/pathname.c (path_sub_ext): ditto. + Tue Jul 3 11:43:46 2012 Nobuyoshi Nakada <nobu@r...> * dir.c (dir_chdir, check_dirname): get rid of optimization-out. Index: ruby_1_9_3/dir.c =================================================================== --- ruby_1_9_3/dir.c (revision 36278) +++ ruby_1_9_3/dir.c (revision 36279) @@ -910,11 +910,16 @@ { VALUE d = *dir; char *path, *pend; + long len; + rb_encoding *enc; rb_secure(2); FilePathValue(d); - path = RSTRING_PTR(d); - if (path && *(pend = rb_path_end(rb_path_skip_prefix(path)))) { + enc = rb_enc_get(d); + RSTRING_GETMEM(d, path, len); + pend = path + len; + pend = rb_enc_path_end(rb_enc_path_skip_prefix(path, pend, enc), pend, enc); + if (pend - path < len) { d = rb_str_subseq(d, 0, pend - path); } *dir = rb_str_encode_ospath(d); @@ -1494,7 +1499,7 @@ start = root = path; flags |= FNM_SYSCASE; #if defined DOSISH - root = rb_path_skip_prefix(root); + root = rb_enc_path_skip_prefix(root, root + strlen(root), enc); #endif if (root && *root == '/') root++; Index: ruby_1_9_3/ext/pathname/pathname.c =================================================================== --- ruby_1_9_3/ext/pathname/pathname.c (revision 36278) +++ ruby_1_9_3/ext/pathname/pathname.c (revision 36279) @@ -1,4 +1,5 @@ #include "ruby.h" +#include "ruby/encoding.h" static VALUE rb_cPathname; static ID id_at_path, id_to_path; @@ -184,15 +185,15 @@ StringValue(repl); p = RSTRING_PTR(str); - ext = ruby_find_extname(p, &extlen); + extlen = RSTRING_LEN(str); + ext = ruby_enc_find_extname(p, &extlen, rb_enc_get(str)); if (ext == NULL) { ext = p + RSTRING_LEN(str); } else if (extlen <= 1) { ext += extlen; } - str2 = rb_str_dup(str); - rb_str_resize(str2, ext-p); + str2 = rb_str_subseq(str, 0, ext-p); rb_str_append(str2, repl); OBJ_INFECT(str2, str); return rb_class_new_instance(1, &str2, rb_obj_class(self)); Index: ruby_1_9_3/version.h =================================================================== --- ruby_1_9_3/version.h (revision 36278) +++ ruby_1_9_3/version.h (revision 36279) @@ -1,5 +1,5 @@ #define RUBY_VERSION "1.9.3" -#define RUBY_PATCHLEVEL 244 +#define RUBY_PATCHLEVEL 245 #define RUBY_RELEASE_DATE "2012-07-03" #define RUBY_RELEASE_YEAR 2012 Index: ruby_1_9_3/test/ruby/test_file_exhaustive.rb =================================================================== --- ruby_1_9_3/test/ruby/test_file_exhaustive.rb (revision 36278) +++ ruby_1_9_3/test/ruby/test_file_exhaustive.rb (revision 36279) @@ -3,6 +3,8 @@ require "tmpdir" class TestFileExhaustive < Test::Unit::TestCase + DRIVE = Dir.pwd[%r'\A(?:[a-z]:|//[^/]+/[^/]+)'i] + def assert_incompatible_encoding d = "\u{3042}\u{3044}".encode("utf-16le") assert_raise(Encoding::CompatibilityError) {yield d} @@ -400,13 +402,29 @@ assert_match(/\Ac:\//i, File.expand_path('c:foo', 'd:/bar')) assert_match(%r'\Ac:/bar/foo\z'i, File.expand_path('c:foo', 'c:/bar')) end - if drive = Dir.pwd[%r'\A(?:[a-z]:|//[^/]+/[^/]+)'i] + if DRIVE assert_match(%r"\Az:/foo\z"i, File.expand_path('/foo', "z:/bar")) assert_match(%r"\A//host/share/foo\z"i, File.expand_path('/foo', "//host/share/bar")) - assert_match(%r"\A#{drive}/foo\z"i, File.expand_path('/foo')) + assert_match(%r"\A#{DRIVE}/foo\z"i, File.expand_path('/foo')) else assert_equal("/foo", File.expand_path('/foo')) end + drive = (DRIVE ? 'C:' : '') + if Encoding.find("filesystem") == Encoding::CP1251 + a = "#{drive}/\u3042\u3044\u3046\u3048\u304a".encode("cp932") + else + a = "#{drive}/\u043f\u0440\u0438\u0432\u0435\u0442".encode("cp1251") + end + assert_equal(a, File.expand_path(a)) + a = "#{drive}/\225\\\\" + if File::ALT_SEPARATOR == '\\' + [%W"cp437 #{drive}/\225", %W"cp932 #{drive}/\225\\"] + else + [["cp437", a], ["cp932", a]] + end.each do |cp, expected| + assert_equal(expected.force_encoding(cp), File.expand_path(a.dup.force_encoding(cp)), cp) + end + assert_kind_of(String, File.expand_path("~")) if ENV["HOME"] assert_raise(ArgumentError) { File.expand_path("~foo_bar_baz_unknown_user_wahaha") } assert_raise(ArgumentError) { File.expand_path("~foo_bar_baz_unknown_user_wahaha", "/") } @@ -447,16 +465,31 @@ assert_equal(basename, File.basename(@file + ".", ".*")) assert_equal(basename, File.basename(@file + "::$DATA", ".*")) end + if File::ALT_SEPARATOR == '\\' + a = "foo/\225\\\\" + [%W"cp437 \225", %W"cp932 \225\\"].each do |cp, expected| + assert_equal(expected.force_encoding(cp), File.basename(a.dup.force_encoding(cp)), cp) + end + end assert_incompatible_encoding {|d| File.basename(d)} assert_incompatible_encoding {|d| File.basename(d, ".*")} assert_raise(Encoding::CompatibilityError) {File.basename("foo.ext", ".*".encode("utf-16le"))} + + s = "foo\x93_a".force_encoding("cp932") + assert_equal(s, File.basename(s, "_a")) end def test_dirname assert(@file.start_with?(File.dirname(@file))) assert_equal(".", File.dirname("")) assert_incompatible_encoding {|d| File.dirname(d)} + if File::ALT_SEPARATOR == '\\' + a = "\225\\\\foo" + [%W"cp437 \225", %W"cp932 \225\\"].each do |cp, expected| + assert_equal(expected.force_encoding(cp), File.dirname(a.dup.force_encoding(cp)), cp) + end + end end def test_extname @@ -500,6 +533,13 @@ def o.to_path; "foo"; end assert_equal(s, File.join(o, "bar", "baz")) assert_equal(s, File.join("foo" + File::SEPARATOR, "bar", File::SEPARATOR + "baz")) + if File::ALT_SEPARATOR == '\\' + a = "\225\\" + b = "foo" + [%W"cp437 \225\\foo", %W"cp932 \225\\/foo"].each do |cp, expected| + assert_equal(expected.force_encoding(cp), File.join(a.dup.force_encoding(cp), b.dup.force_encoding(cp)), cp) + end + end end def test_truncate Index: ruby_1_9_3/test/pathname/test_pathname.rb =================================================================== --- ruby_1_9_3/test/pathname/test_pathname.rb (revision 36278) +++ ruby_1_9_3/test/pathname/test_pathname.rb (revision 36279) @@ -1,5 +1,3 @@ -#!/usr/bin/env ruby - require 'test/unit' require 'pathname' @@ -185,10 +183,8 @@ if DOSISH defassert(:del_trailing_separator, "a", "a\\") - require 'Win32API' - if Win32API.new('kernel32', 'GetACP', nil, 'L').call == 932 - defassert(:del_trailing_separator, "\225\\", "\225\\\\") # SJIS - end + defassert(:del_trailing_separator, "\225\\".force_encoding("cp932"), "\225\\\\".force_encoding("cp932")) + defassert(:del_trailing_separator, "\225".force_encoding("cp437"), "\225\\\\".force_encoding("cp437")) end def test_plus Index: ruby_1_9_3/file.c =================================================================== --- ruby_1_9_3/file.c (revision 36278) +++ ruby_1_9_3/file.c (revision 36279) @@ -2425,6 +2425,8 @@ #endif #ifdef HAVE_READLINK +static VALUE rb_readlink(VALUE path); + /* * call-seq: * File.readlink(link_name) -> file_name @@ -2439,6 +2441,12 @@ static VALUE rb_file_s_readlink(VALUE klass, VALUE path) { + return rb_readlink(path); +} + +static VALUE +rb_readlink(VALUE path) +{ char *buf; int size = 100; ssize_t rv; @@ -2604,9 +2612,8 @@ #define istrailinggarbage(x) 0 #endif -#ifndef CharNext /* defined as CharNext[AW] on Windows. */ -# define CharNext(p) ((p) + 1) -#endif +#define Next(p, e, enc) ((p) + rb_enc_mbclen((p), (e), (enc))) +#define Inc(p, e, enc) ((p) = Next((p), (e), (enc))) #if defined(DOSISH_UNC) #define has_unc(buf) (isdirsep((buf)[0]) && isdirsep((buf)[1])) @@ -2668,40 +2675,40 @@ #endif static inline char * -skiproot(const char *path) +skiproot(const char *path, const char *end, rb_encoding *enc) { #ifdef DOSISH_DRIVE_LETTER - if (has_drive_letter(path)) path += 2; + if (path + 2 <= end && has_drive_letter(path)) path += 2; #endif - while (isdirsep(*path)) path++; + while (path < end && isdirsep(*path)) path++; return (char *)path; } -#define nextdirsep rb_path_next +#define nextdirsep rb_enc_path_next char * -rb_path_next(const char *s) +rb_enc_path_next(const char *s, const char *e, rb_encoding *enc) { - while (*s && !isdirsep(*s)) { - s = CharNext(s); + while (s < e && !isdirsep(*s)) { + Inc(s, e, enc); } return (char *)s; } #if defined(DOSISH_UNC) || defined(DOSISH_DRIVE_LETTER) -#define skipprefix rb_path_skip_prefix +#define skipprefix rb_enc_path_skip_prefix #else -#define skipprefix(path) (path) +#define skipprefix(path, end, enc) (path) #endif char * -rb_path_skip_prefix(const char *path) +rb_enc_path_skip_prefix(const char *path, const char *end, rb_encoding *enc) { #if defined(DOSISH_UNC) || defined(DOSISH_DRIVE_LETTER) #ifdef DOSISH_UNC - if (isdirsep(path[0]) && isdirsep(path[1])) { + if (path + 2 <= end && isdirsep(path[0]) && isdirsep(path[1])) { path += 2; - while (isdirsep(*path)) path++; - if (*(path = nextdirsep(path)) && path[1] && !isdirsep(path[1])) - path = nextdirsep(path + 1); + while (path < end && isdirsep(*path)) path++; + if ((path = rb_enc_path_next(path, end, enc)) < end && path[0] && path[1] && !isdirsep(path[1])) + path = rb_enc_path_next(path + 1, end, enc); return (char *)path; } #endif @@ -2714,78 +2721,78 @@ } static inline char * -skipprefixroot(const char *path) +skipprefixroot(const char *path, const char *end, rb_encoding *enc) { #if defined(DOSISH_UNC) || defined(DOSISH_DRIVE_LETTER) - char *p = skipprefix(path); + char *p = skipprefix(path, end, enc); while (isdirsep(*p)) p++; return p; #else - return skiproot(path); + return skiproot(path, end, enc); #endif } -#define strrdirsep rb_path_last_separator +#define strrdirsep rb_enc_path_last_separator char * -rb_path_last_separator(const char *path) +rb_enc_path_last_separator(const char *path, const char *end, rb_encoding *enc) { char *last = NULL; - while (*path) { + while (path < end) { if (isdirsep(*path)) { const char *tmp = path++; - while (isdirsep(*path)) path++; - if (!*path) break; + while (path < end && isdirsep(*path)) path++; + if (path >= end) break; last = (char *)tmp; } else { - path = CharNext(path); + Inc(path, end, enc); } } return last; } static char * -chompdirsep(const char *path) +chompdirsep(const char *path, const char *end, rb_encoding *enc) { - while (*path) { + while (path < end) { if (isdirsep(*path)) { const char *last = path++; - while (isdirsep(*path)) path++; - if (!*path) return (char *)last; + while (path < end && isdirsep(*path)) path++; + if (path >= end) return (char *)last; } else { - path = CharNext(path); + Inc(path, end, enc); } } return (char *)path; } char * -rb_path_end(const char *path) +rb_enc_path_end(const char *path, const char *end, rb_encoding *enc) { - if (isdirsep(*path)) path++; - return chompdirsep(path); + if (path < end && isdirsep(*path)) path++; + return chompdirsep(path, end, enc); } #if USE_NTFS static char * -ntfs_tail(const char *path) +ntfs_tail(const char *path, const char *end, rb_encoding *enc) { - while (*path == '.') path++; - while (*path && *path != ':') { + while (path < end && *path == '.') path++; + while (path < end && *path != ':') { if (istrailinggarbage(*path)) { const char *last = path++; - while (istrailinggarbage(*path)) path++; - if (!*path || *path == ':') return (char *)last; + while (path < end && istrailinggarbage(*path)) path++; + if (path >= end || *path == ':') return (char *)last; } else if (isdirsep(*path)) { const char *last = path++; - while (isdirsep(*path)) path++; - if (!*path) return (char *)last; + while (path < end && isdirsep(*path)) path++; + if (path >= end) return (char *)last; if (*path == ':') path++; } else { - path = CharNext(path); + Inc(path, end, enc); } } return (char *)path; @@ -2814,9 +2821,10 @@ const char *dir; char *buf; #if defined DOSISH || defined __CYGWIN__ - char *p; + char *p, *bend; #endif long dirlen; + rb_encoding *enc; if (!user || !*user) { if (!(dir = getenv("HOME"))) { @@ -2835,32 +2843,61 @@ } dirlen = strlen(pwPtr->pw_dir); rb_str_resize(result, dirlen); - strcpy(buf = RSTRING_PTR(result), pwPtr->pw_dir); + memcpy(buf = RSTRING_PTR(result), pwPtr->pw_dir, dirlen + 1); endpwent(); #else return Qnil; #endif } + enc = rb_filesystem_encoding(); + rb_enc_associate(result, enc); #if defined DOSISH || defined __CYGWIN__ - for (p = buf; *p; p = CharNext(p)) { + for (bend = (p = buf) + dirlen; p < bend; Inc(p, bend, enc)) { if (*p == '\\') { *p = '/'; } } #endif - rb_enc_associate_index(result, rb_filesystem_encindex()); return result; } +static char * +append_fspath(VALUE result, VALUE fname, char *dir, rb_encoding **enc, rb_encoding *fsenc) +{ + char *buf, *cwdp = dir; + VALUE dirname = Qnil; + size_t dirlen = strlen(dir), buflen = rb_str_capacity(result); + + if (*enc != fsenc) { + rb_encoding *direnc = rb_enc_check(fname, dirname = rb_enc_str_new(dir, dirlen, fsenc)); + if (direnc != fsenc) { + dirname = rb_str_conv_enc(dirname, fsenc, direnc); + RSTRING_GETMEM(dirname, cwdp, dirlen); + } + *enc = direnc; + rb_enc_associate(result, direnc); + } + do {buflen *= 2;} while (dirlen > buflen); + rb_str_resize(result, buflen); + buf = RSTRING_PTR(result); + memcpy(buf, cwdp, dirlen); + xfree(dir); + if (!NIL_P(dirname)) rb_str_resize(dirname, 0); + return buf + dirlen; +} + static VALUE file_expand_path(VALUE fname, VALUE dname, int abs_mode, VALUE result) { - const char *s, *b; + const char *s, *b, *fend; char *buf, *p, *pend, *root; - size_t buflen, dirlen, bdiff; + size_t buflen, bdiff; int tainted; + rb_encoding *enc, *fsenc = rb_filesystem_encoding(); s = StringValuePtr(fname); + fend = s + RSTRING_LEN(fname); + enc = rb_enc_get(fname); BUFINIT(); tainted = OBJ_TAINTED(fname); @@ -2874,7 +2911,7 @@ if (*++s) ++s; } else { - s = nextdirsep(b = s); + s = nextdirsep(b = s, fend, enc); userlen = s - b; BUFCHECK(bdiff + userlen >= buflen); memcpy(p, b, userlen); @@ -2920,18 +2957,16 @@ } } if (!same) { - char *dir = getcwdofdrv(*s); - + char *e = append_fspath(result, fname, getcwdofdrv(*s), &enc, fsenc); tainted = 1; - dirlen = strlen(dir); - BUFCHECK(dirlen > buflen); - strcpy(buf, dir); - xfree(dir); - rb_enc_associate_index(result, rb_filesystem_encindex()); + BUFINIT(); + p = e; } - else - rb_enc_associate(result, rb_enc_check(result, fname)); - p = chompdirsep(skiproot(buf)); + else { + rb_enc_associate(result, enc = rb_enc_check(result, fname)); + p = pend; + } + p = chompdirsep(skiproot(buf, p, enc), p, enc); s += 2; } } @@ -2939,28 +2974,25 @@ else if (!rb_is_absolute_path(s)) { if (!NIL_P(dname)) { file_expand_path(dname, Qnil, abs_mode, result); + rb_enc_associate(result, rb_enc_check(result, fname)); BUFINIT(); - rb_enc_associate(result, rb_enc_check(result, fname)); + p = pend; } else { - char *dir = my_getcwd(); - + char *e = append_fspath(result, fname, my_getcwd(), &enc, fsenc); tainted = 1; - dirlen = strlen(dir); - BUFCHECK(dirlen > buflen); - strcpy(buf, dir); - xfree(dir); - rb_enc_associate_index(result, rb_filesystem_encindex()); + BUFINIT(); + p = e; } #if defined DOSISH || defined __CYGWIN__ if (isdirsep(*s)) { /* specified full path, but not drive letter nor UNC */ /* we need to get the drive letter or UNC share name */ - p = skipprefix(buf); + p = skipprefix(buf, p, enc); } else #endif - p = chompdirsep(skiproot(buf)); + p = chompdirsep(skiproot(buf, p, enc), p, enc); } else { size_t len; @@ -2984,7 +3016,7 @@ rb_str_set_len(result, p-buf+1); BUFCHECK(bdiff + 1 >= buflen); p[1] = 0; - root = skipprefix(buf); + root = skipprefix(buf, p+1, enc); b = s; while (*s) { @@ -3000,7 +3032,7 @@ /* We must go back to the parent */ char *n; *p = '\0'; - if (!(n = strrdirsep(root))) { + if (!(n = strrdirsep(root, p, enc))) { *p = '/'; } else { @@ -3030,7 +3062,7 @@ --s; case ' ': { const char *e = s; - while (istrailinggarbage(*s)) s++; + while (s < fend && istrailinggarbage(*s)) s++; if (!*s) { s = e; goto endpath; @@ -3055,7 +3087,7 @@ b = ++s; break; default: - s = CharNext(s); + Inc(s, fend, enc); break; } } @@ -3080,14 +3112,18 @@ BUFCHECK(bdiff + (s-b) >= buflen); memcpy(++p, b, s-b); p += s-b; + rb_str_set_len(result, p-buf); } - if (p == skiproot(buf) - 1) p++; + if (p == skiproot(buf, p + !!*p, enc) - 1) p++; #if USE_NTFS *p = '\0'; - if ((s = strrdirsep(b = buf)) != 0 && !strpbrk(s, "*?")) { + if ((s = strrdirsep(b (... truncated) -- ML: ruby-changes@q... Info: http://www.atdot.net/~ko1/quickml/