[前][次][番号順一覧][スレッド一覧]

ruby-changes:30419

From: naruse <ko1@a...>
Date: Sun, 11 Aug 2013 05:44:19 +0900 (JST)
Subject: [ruby-changes:30419] naruse:r42498 (trunk): * file.c (rb_str_normalize_ospath):

naruse	2013-08-11 05:44:10 +0900 (Sun, 11 Aug 2013)

  New Revision: 42498

  http://svn.ruby-lang.org/cgi-bin/viewvc.cgi?view=rev&revision=42498

  Log:
    * file.c (rb_str_normalize_ospath):
      HFS Plus (Mac OS Extended) uses a variant of Normal Form D in which
      U+2000 through U+2FFF, U+F900 through U+FAFF, and U+2F800 through
      U+2FAFF are not decomposed (this avoids problems with round trip
      conversions from old Mac text encodings).
      http://developer.apple.com/library/mac/qa/qa1173/_index.html
      Therefore fix r42457 to exclude the range.

  Added files:
    trunk/ext/-test-/string/normalize.c
    trunk/test/-ext-/string/test_normalize.rb
  Modified files:
    trunk/ChangeLog
    trunk/dir.c
    trunk/ext/-test-/string/depend
    trunk/ext/-test-/string/extconf.rb
    trunk/file.c
    trunk/internal.h
Index: ChangeLog
===================================================================
--- ChangeLog	(revision 42497)
+++ ChangeLog	(revision 42498)
@@ -1,3 +1,13 @@ https://github.com/ruby/ruby/blob/trunk/ChangeLog#L1
+Sun Aug 11 04:48:14 2013  NARUSE, Yui  <naruse@r...>
+
+	* file.c (rb_str_normalize_ospath):
+	  HFS Plus (Mac OS Extended) uses a variant of Normal Form D in which
+	  U+2000 through U+2FFF, U+F900 through U+FAFF, and U+2F800 through
+	  U+2FAFF are not decomposed (this avoids problems with round trip
+	  conversions from old Mac text encodings).
+	  http://developer.apple.com/library/mac/qa/qa1173/_index.html
+	  Therefore fix r42457 to exclude the range.
+
 Sun Aug 11 03:26:07 2013  Tanaka Akira  <akr@f...>
 
 	* bignum.c (bitsize): Fix a conditional expression.
Index: dir.c
===================================================================
--- dir.c	(revision 42497)
+++ dir.c	(revision 42498)
@@ -84,8 +84,6 @@ char *strchr(char*,char); https://github.com/ruby/ruby/blob/trunk/dir.c#L84
 #include <sys/param.h>
 #include <sys/mount.h>
 
-VALUE rb_str_normalize_ospath(const char *ptr, long len);
-
 static inline int
 is_hfs(DIR *dirp)
 {
@@ -1420,7 +1418,7 @@ glob_helper( https://github.com/ruby/ruby/blob/trunk/dir.c#L1418
 	    name = dp->d_name;
 	    namlen = NAMLEN(dp);
 # if HAVE_HFS
-	    if (hfs_p && has_nonascii(name, namlen)) {
+	    if (0&&hfs_p && has_nonascii(name, namlen)) {
 		if (!NIL_P(utf8str = rb_str_normalize_ospath(name, namlen))) {
 		    RSTRING_GETMEM(utf8str, name, namlen);
 		}
Index: ext/-test-/string/depend
===================================================================
--- ext/-test-/string/depend	(revision 42497)
+++ ext/-test-/string/depend	(revision 42498)
@@ -2,3 +2,4 @@ $(OBJS): $(HDRS) $(ruby_headers) \ https://github.com/ruby/ruby/blob/trunk/ext/-test-/string/depend#L2
   $(hdrdir)/ruby/encoding.h \
   $(hdrdir)/ruby/oniguruma.h
 qsort.o: $(hdrdir)/ruby/util.h
+normalize.o: $(top_srcdir)/internal.h
Index: ext/-test-/string/normalize.c
===================================================================
--- ext/-test-/string/normalize.c	(revision 0)
+++ ext/-test-/string/normalize.c	(revision 42498)
@@ -0,0 +1,18 @@ https://github.com/ruby/ruby/blob/trunk/ext/-test-/string/normalize.c#L1
+#include "ruby.h"
+#include "internal.h"
+
+#ifdef __APPLE__
+static VALUE
+normalize_ospath(VALUE str)
+{
+    return rb_str_normalize_ospath(RSTRING_PTR(str), RSTRING_LEN(str));
+}
+#else
+#define normalize_ospath rb_f_notimplement
+#endif
+
+void
+Init_normalize(VALUE klass)
+{
+    rb_define_method(klass, "normalize_ospath", normalize_ospath, 0);
+}
Index: ext/-test-/string/extconf.rb
===================================================================
--- ext/-test-/string/extconf.rb	(revision 42497)
+++ ext/-test-/string/extconf.rb	(revision 42498)
@@ -1,3 +1,4 @@ https://github.com/ruby/ruby/blob/trunk/ext/-test-/string/extconf.rb#L1
+$INCFLAGS << " -I$(topdir) -I$(top_srcdir)"
 $srcs = Dir[File.join($srcdir, "*.{#{SRC_EXT.join(%q{,})}}")]
 inits = $srcs.map {|s| File.basename(s, ".*")}
 inits.delete("init")
Index: internal.h
===================================================================
--- internal.h	(revision 42497)
+++ internal.h	(revision 42498)
@@ -513,6 +513,11 @@ VALUE rb_big_mul_karatsuba(VALUE x, VALU https://github.com/ruby/ruby/blob/trunk/internal.h#L513
 VALUE rb_big_mul_toom3(VALUE x, VALUE y);
 VALUE rb_big_sq_fast(VALUE x);
 
+/* file.c */
+#ifdef __APPLE__
+VALUE rb_str_normalize_ospath(const char *ptr, long len);
+#endif
+
 /* io.c */
 void rb_maygvl_fd_fix_cloexec(int fd);
 
Index: test/-ext-/string/test_normalize.rb
===================================================================
--- test/-ext-/string/test_normalize.rb	(revision 0)
+++ test/-ext-/string/test_normalize.rb	(revision 42498)
@@ -0,0 +1,105 @@ https://github.com/ruby/ruby/blob/trunk/test/-ext-/string/test_normalize.rb#L1
+require 'test/unit'
+require "-test-/string/string"
+require "tempfile"
+
+class Test_StringNormalize < Test::Unit::TestCase
+=begin
+  def test_normalize_all
+    exclude = [
+      #0x340, 0x341, 0x343, 0x344
+    ]
+    (0x0080..0xFFFD).each do |n|
+      next if 0xD800 <= n && n <= 0xDFFF
+      next if exclude.include? n
+      code = n.to_s(16)
+      Tempfile.create("#{code}-#{n.chr(Encoding::UTF_8)}-") do |tempfile|
+        ary = Dir.glob(File.expand_path("../#{code}-*", tempfile.path))
+        assert_equal 1, ary.size
+        result = ary[0]
+        rn = result[/\/\h+-(.+?)-/, 1]
+        #assert_equal tempfile.path, result, "#{rn.dump} is not U+#{n.to_s(16)}"
+        r2 = Bug::String.new(result ).normalize_ospath
+        rn2 = r2[/\/\h+-(.+?)-/, 1]
+        if tempfile.path == result
+          if tempfile.path == r2
+          else
+            puts "U+#{n.to_s(16)} shouldn't be r2#{rn2.dump}"
+          end
+        else
+          if tempfile.path == r2
+            # puts "U+#{n.to_s(16)} shouldn't be r#{rn.dump}"
+          elsif result == r2
+            puts "U+#{n.to_s(16)} shouldn't be #{rn.dump}"
+          else
+            puts "U+#{n.to_s(16)} shouldn't be r#{rn.dump} r2#{rn2.dump}"
+          end
+        end
+      end
+    end
+  end
+=end
+
+  def test_normalize
+    %[
+      \u304C \u304B\u3099
+      \u3077 \u3075\u309A
+      \u308F\u3099 \u308F\u3099
+      \u30F4 \u30A6\u3099
+      \u30DD \u30DB\u309A
+      \u30AB\u303A \u30AB\u303A
+      \u00C1 A\u0301
+      B\u030A B\u030A
+      \u0386 \u0391\u0301
+      \u03D3 \u03D2\u0301
+      \u0401 \u0415\u0308
+      \u2260 =\u0338
+    ].scan(/(\S+)\s+(\S+)/) do |expected, src|
+      result = Bug::String.new(src).normalize_ospath
+      assert_equal expected, result,
+        "#{expected.dump} is expected but #{src.dump}"
+    end
+  rescue NotImplementedError
+  end
+
+  def test_not_normalize_kc
+    %[
+      \u2460
+      \u2162
+      \u3349
+      \u33A1
+      \u337B
+      \u2116
+      \u33CD
+      \u2121
+      \u32A4
+      \u3231
+    ].split.each do |src|
+      result = Bug::String.new(src).normalize_ospath
+      assert_equal src, result,
+        "#{src.dump} is expected not to be normalized, but #{result.dump}"
+    end
+  rescue NotImplementedError
+  end
+
+  def test_dont_normalize_hfsplus
+    %[
+      \u2190\u0338
+      \u219A
+      \u212B
+      \uF90A
+      \uF9F4
+      \uF961 \uF9DB
+      \uF96F \uF3AA
+      \uF915 \uF95C \uF9BF
+      \uFA0C
+      \uFA10
+      \uFA19
+      \uFA26
+    ].split.each do |src|
+      result = Bug::String.new(src).normalize_ospath
+      assert_equal src, result,
+        "#{src.dump} is expected not to be normalized, but #{result.dump}"
+    end
+  rescue NotImplementedError
+  end
+end
Index: file.c
===================================================================
--- file.c	(revision 42497)
+++ file.c	(revision 42498)
@@ -245,7 +245,7 @@ rb_str_encode_ospath(VALUE path) https://github.com/ruby/ruby/blob/trunk/file.c#L245
 
 #ifdef __APPLE__
 VALUE
-rb_str_normalize_ospath(const char *ptr, long len)
+rb_str_normalize_ospath0(const char *ptr, long len)
 {
     VALUE str;
     CFIndex buflen = 0;
@@ -267,6 +267,47 @@ rb_str_normalize_ospath(const char *ptr, https://github.com/ruby/ruby/blob/trunk/file.c#L267
     CFRelease(s);
     return str;
 }
+
+VALUE
+rb_str_normalize_ospath(const char *ptr, long len)
+{
+    const char *p = ptr;
+    const char *e = ptr + len;
+    const char *p1 = p;
+    VALUE str = rb_str_buf_new(len);
+    rb_encoding *enc = rb_utf8_encoding();
+    rb_enc_associate(str, enc);
+
+    while (p < e) {
+	int l;
+	int r = rb_enc_precise_mbclen(p, e, enc);
+	if (!MBCLEN_CHARFOUND_P(r)) {
+	    /* invalid byte shall not happen but */
+	    rb_str_append(str, rb_str_normalize_ospath0(p1, p-p1));
+	    rb_str_cat2(str, "\xEF\xBF\xBD");
+	    p += 1;
+	}
+	l = MBCLEN_CHARFOUND_LEN(r);
+	int c = rb_enc_mbc_to_codepoint(p, e, enc);
+	if ((0x2000 <= c && c <= 0x2FFF) || (0xF900 <= c && c <= 0xFAFF) ||
+		(0x2F800 <= c && c <= 0x2FAFF)) {
+	    if (p - p1 > 0) {
+		rb_str_append(str, rb_str_normalize_ospath0(p1, p-p1));
+	    }
+	    rb_str_cat(str, p, l);
+	    p += l;
+	    p1 = p;
+	}
+	else {
+	    p += l;
+	}
+    }
+    if (p - p1 > 0) {
+	rb_str_append(str, rb_str_normalize_ospath0(p1, p-p1));
+    }
+
+    return str;
+}
 #endif
 
 static long

--
ML: ruby-changes@q...
Info: http://www.atdot.net/~ko1/quickml/

[前][次][番号順一覧][スレッド一覧]