ruby-changes:30419
From: naruse <ko1@a...>
Date: Sun, 11 Aug 2013 05:44:19 +0900 (JST)
Subject: [ruby-changes:30419] naruse:r42498 (trunk): * file.c (rb_str_normalize_ospath):
naruse 2013-08-11 05:44:10 +0900 (Sun, 11 Aug 2013) New Revision: 42498 http://svn.ruby-lang.org/cgi-bin/viewvc.cgi?view=rev&revision=42498 Log: * file.c (rb_str_normalize_ospath): HFS Plus (Mac OS Extended) uses a variant of Normal Form D in which U+2000 through U+2FFF, U+F900 through U+FAFF, and U+2F800 through U+2FAFF are not decomposed (this avoids problems with round trip conversions from old Mac text encodings). http://developer.apple.com/library/mac/qa/qa1173/_index.html Therefore fix r42457 to exclude the range. Added files: trunk/ext/-test-/string/normalize.c trunk/test/-ext-/string/test_normalize.rb Modified files: trunk/ChangeLog trunk/dir.c trunk/ext/-test-/string/depend trunk/ext/-test-/string/extconf.rb trunk/file.c trunk/internal.h Index: ChangeLog =================================================================== --- ChangeLog (revision 42497) +++ ChangeLog (revision 42498) @@ -1,3 +1,13 @@ https://github.com/ruby/ruby/blob/trunk/ChangeLog#L1 +Sun Aug 11 04:48:14 2013 NARUSE, Yui <naruse@r...> + + * file.c (rb_str_normalize_ospath): + HFS Plus (Mac OS Extended) uses a variant of Normal Form D in which + U+2000 through U+2FFF, U+F900 through U+FAFF, and U+2F800 through + U+2FAFF are not decomposed (this avoids problems with round trip + conversions from old Mac text encodings). + http://developer.apple.com/library/mac/qa/qa1173/_index.html + Therefore fix r42457 to exclude the range. + Sun Aug 11 03:26:07 2013 Tanaka Akira <akr@f...> * bignum.c (bitsize): Fix a conditional expression. Index: dir.c =================================================================== --- dir.c (revision 42497) +++ dir.c (revision 42498) @@ -84,8 +84,6 @@ char *strchr(char*,char); https://github.com/ruby/ruby/blob/trunk/dir.c#L84 #include <sys/param.h> #include <sys/mount.h> -VALUE rb_str_normalize_ospath(const char *ptr, long len); - static inline int is_hfs(DIR *dirp) { @@ -1420,7 +1418,7 @@ glob_helper( https://github.com/ruby/ruby/blob/trunk/dir.c#L1418 name = dp->d_name; namlen = NAMLEN(dp); # if HAVE_HFS - if (hfs_p && has_nonascii(name, namlen)) { + if (0&&hfs_p && has_nonascii(name, namlen)) { if (!NIL_P(utf8str = rb_str_normalize_ospath(name, namlen))) { RSTRING_GETMEM(utf8str, name, namlen); } Index: ext/-test-/string/depend =================================================================== --- ext/-test-/string/depend (revision 42497) +++ ext/-test-/string/depend (revision 42498) @@ -2,3 +2,4 @@ $(OBJS): $(HDRS) $(ruby_headers) \ https://github.com/ruby/ruby/blob/trunk/ext/-test-/string/depend#L2 $(hdrdir)/ruby/encoding.h \ $(hdrdir)/ruby/oniguruma.h qsort.o: $(hdrdir)/ruby/util.h +normalize.o: $(top_srcdir)/internal.h Index: ext/-test-/string/normalize.c =================================================================== --- ext/-test-/string/normalize.c (revision 0) +++ ext/-test-/string/normalize.c (revision 42498) @@ -0,0 +1,18 @@ https://github.com/ruby/ruby/blob/trunk/ext/-test-/string/normalize.c#L1 +#include "ruby.h" +#include "internal.h" + +#ifdef __APPLE__ +static VALUE +normalize_ospath(VALUE str) +{ + return rb_str_normalize_ospath(RSTRING_PTR(str), RSTRING_LEN(str)); +} +#else +#define normalize_ospath rb_f_notimplement +#endif + +void +Init_normalize(VALUE klass) +{ + rb_define_method(klass, "normalize_ospath", normalize_ospath, 0); +} Index: ext/-test-/string/extconf.rb =================================================================== --- ext/-test-/string/extconf.rb (revision 42497) +++ ext/-test-/string/extconf.rb (revision 42498) @@ -1,3 +1,4 @@ https://github.com/ruby/ruby/blob/trunk/ext/-test-/string/extconf.rb#L1 +$INCFLAGS << " -I$(topdir) -I$(top_srcdir)" $srcs = Dir[File.join($srcdir, "*.{#{SRC_EXT.join(%q{,})}}")] inits = $srcs.map {|s| File.basename(s, ".*")} inits.delete("init") Index: internal.h =================================================================== --- internal.h (revision 42497) +++ internal.h (revision 42498) @@ -513,6 +513,11 @@ VALUE rb_big_mul_karatsuba(VALUE x, VALU https://github.com/ruby/ruby/blob/trunk/internal.h#L513 VALUE rb_big_mul_toom3(VALUE x, VALUE y); VALUE rb_big_sq_fast(VALUE x); +/* file.c */ +#ifdef __APPLE__ +VALUE rb_str_normalize_ospath(const char *ptr, long len); +#endif + /* io.c */ void rb_maygvl_fd_fix_cloexec(int fd); Index: test/-ext-/string/test_normalize.rb =================================================================== --- test/-ext-/string/test_normalize.rb (revision 0) +++ test/-ext-/string/test_normalize.rb (revision 42498) @@ -0,0 +1,105 @@ https://github.com/ruby/ruby/blob/trunk/test/-ext-/string/test_normalize.rb#L1 +require 'test/unit' +require "-test-/string/string" +require "tempfile" + +class Test_StringNormalize < Test::Unit::TestCase +=begin + def test_normalize_all + exclude = [ + #0x340, 0x341, 0x343, 0x344 + ] + (0x0080..0xFFFD).each do |n| + next if 0xD800 <= n && n <= 0xDFFF + next if exclude.include? n + code = n.to_s(16) + Tempfile.create("#{code}-#{n.chr(Encoding::UTF_8)}-") do |tempfile| + ary = Dir.glob(File.expand_path("../#{code}-*", tempfile.path)) + assert_equal 1, ary.size + result = ary[0] + rn = result[/\/\h+-(.+?)-/, 1] + #assert_equal tempfile.path, result, "#{rn.dump} is not U+#{n.to_s(16)}" + r2 = Bug::String.new(result ).normalize_ospath + rn2 = r2[/\/\h+-(.+?)-/, 1] + if tempfile.path == result + if tempfile.path == r2 + else + puts "U+#{n.to_s(16)} shouldn't be r2#{rn2.dump}" + end + else + if tempfile.path == r2 + # puts "U+#{n.to_s(16)} shouldn't be r#{rn.dump}" + elsif result == r2 + puts "U+#{n.to_s(16)} shouldn't be #{rn.dump}" + else + puts "U+#{n.to_s(16)} shouldn't be r#{rn.dump} r2#{rn2.dump}" + end + end + end + end + end +=end + + def test_normalize + %[ + \u304C \u304B\u3099 + \u3077 \u3075\u309A + \u308F\u3099 \u308F\u3099 + \u30F4 \u30A6\u3099 + \u30DD \u30DB\u309A + \u30AB\u303A \u30AB\u303A + \u00C1 A\u0301 + B\u030A B\u030A + \u0386 \u0391\u0301 + \u03D3 \u03D2\u0301 + \u0401 \u0415\u0308 + \u2260 =\u0338 + ].scan(/(\S+)\s+(\S+)/) do |expected, src| + result = Bug::String.new(src).normalize_ospath + assert_equal expected, result, + "#{expected.dump} is expected but #{src.dump}" + end + rescue NotImplementedError + end + + def test_not_normalize_kc + %[ + \u2460 + \u2162 + \u3349 + \u33A1 + \u337B + \u2116 + \u33CD + \u2121 + \u32A4 + \u3231 + ].split.each do |src| + result = Bug::String.new(src).normalize_ospath + assert_equal src, result, + "#{src.dump} is expected not to be normalized, but #{result.dump}" + end + rescue NotImplementedError + end + + def test_dont_normalize_hfsplus + %[ + \u2190\u0338 + \u219A + \u212B + \uF90A + \uF9F4 + \uF961 \uF9DB + \uF96F \uF3AA + \uF915 \uF95C \uF9BF + \uFA0C + \uFA10 + \uFA19 + \uFA26 + ].split.each do |src| + result = Bug::String.new(src).normalize_ospath + assert_equal src, result, + "#{src.dump} is expected not to be normalized, but #{result.dump}" + end + rescue NotImplementedError + end +end Index: file.c =================================================================== --- file.c (revision 42497) +++ file.c (revision 42498) @@ -245,7 +245,7 @@ rb_str_encode_ospath(VALUE path) https://github.com/ruby/ruby/blob/trunk/file.c#L245 #ifdef __APPLE__ VALUE -rb_str_normalize_ospath(const char *ptr, long len) +rb_str_normalize_ospath0(const char *ptr, long len) { VALUE str; CFIndex buflen = 0; @@ -267,6 +267,47 @@ rb_str_normalize_ospath(const char *ptr, https://github.com/ruby/ruby/blob/trunk/file.c#L267 CFRelease(s); return str; } + +VALUE +rb_str_normalize_ospath(const char *ptr, long len) +{ + const char *p = ptr; + const char *e = ptr + len; + const char *p1 = p; + VALUE str = rb_str_buf_new(len); + rb_encoding *enc = rb_utf8_encoding(); + rb_enc_associate(str, enc); + + while (p < e) { + int l; + int r = rb_enc_precise_mbclen(p, e, enc); + if (!MBCLEN_CHARFOUND_P(r)) { + /* invalid byte shall not happen but */ + rb_str_append(str, rb_str_normalize_ospath0(p1, p-p1)); + rb_str_cat2(str, "\xEF\xBF\xBD"); + p += 1; + } + l = MBCLEN_CHARFOUND_LEN(r); + int c = rb_enc_mbc_to_codepoint(p, e, enc); + if ((0x2000 <= c && c <= 0x2FFF) || (0xF900 <= c && c <= 0xFAFF) || + (0x2F800 <= c && c <= 0x2FAFF)) { + if (p - p1 > 0) { + rb_str_append(str, rb_str_normalize_ospath0(p1, p-p1)); + } + rb_str_cat(str, p, l); + p += l; + p1 = p; + } + else { + p += l; + } + } + if (p - p1 > 0) { + rb_str_append(str, rb_str_normalize_ospath0(p1, p-p1)); + } + + return str; +} #endif static long -- ML: ruby-changes@q... Info: http://www.atdot.net/~ko1/quickml/