[前][次][番号順一覧][スレッド一覧]

ruby-changes:59747

From: Nobuyoshi <ko1@a...>
Date: Sun, 19 Jan 2020 15:54:19 +0900 (JST)
Subject: [ruby-changes:59747] 2f1081a451 (master): Sort globbed results by default [Feature #8709]

https://git.ruby-lang.org/ruby.git/commit/?id=2f1081a451

From 2f1081a451f21ca017cc9fdc585883e5c6ebf618 Mon Sep 17 00:00:00 2001
From: Nobuyoshi Nakada <nobu@r...>
Date: Sat, 18 Jan 2020 00:21:11 +0900
Subject: Sort globbed results by default [Feature #8709]

Sort the results which matched single wildcard or character set in
binary ascending order, unless `sort: false` is given.  The order
of an Array of pattern strings and braces are not affected.

diff --git a/NEWS.md b/NEWS.md
index 1c0feb6..a53f4f2 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -14,6 +14,13 @@ sufficient information, see the ChangeLog file or Redmine https://github.com/ruby/ruby/blob/trunk/NEWS.md#L14
 
 ## Core classes updates (outstanding ones only)
 
+* Dir
+
+    * Modified method
+
+        * Dir.glob and Dir.[] now sort the results by default, and
+          accept `sort:` keyword option.  [[Feature #8709]]
+
 * Hash
 
     * Modified method
@@ -46,6 +53,7 @@ sufficient information, see the ChangeLog file or Redmine https://github.com/ruby/ruby/blob/trunk/NEWS.md#L53
 ## Miscellaneous changes
 
 
+[Feature #8709]:  https://bugs.ruby-lang.org/issues/8709
 [Feature #8948]:  https://bugs.ruby-lang.org/issues/8948
 [Feature #16274]: https://bugs.ruby-lang.org/issues/16274
 [Feature #16377]: https://bugs.ruby-lang.org/issues/16377
diff --git a/dir.c b/dir.c
index 6734559..d54ba64 100644
--- a/dir.c
+++ b/dir.c
@@ -219,6 +219,7 @@ typedef enum { https://github.com/ruby/ruby/blob/trunk/dir.c#L219
 #else
 #define FNM_SHORTNAME	0
 #endif
+#define FNM_GLOB_NOSORT 0x40
 
 #define FNM_NOMATCH	1
 #define FNM_ERROR	2
@@ -1350,21 +1351,34 @@ sys_enc_warning_in(const char *func, const char *mesg, rb_encoding *enc) https://github.com/ruby/ruby/blob/trunk/dir.c#L1351
 #define sys_warning(val, enc) \
     ((flags & GLOB_VERBOSE) ? sys_enc_warning_in(RUBY_FUNCTION_NAME_STRING, (val), (enc)) :(void)0)
 
-static inline void *
-glob_alloc_n(size_t x, size_t y)
+static inline size_t
+glob_alloc_size(size_t x, size_t y)
 {
     size_t z;
     if (rb_mul_size_overflow(x, y, SSIZE_MAX, &z)) {
         rb_memerror();          /* or...? */
     }
     else {
-        return malloc(z);
+        return z;
     }
 }
 
+static inline void *
+glob_alloc_n(size_t x, size_t y)
+{
+    return malloc(glob_alloc_size(x, y));
+}
+
+static inline void *
+glob_realloc_n(void *p, size_t x, size_t y)
+{
+    return realloc(p, glob_alloc_size(x, y));
+}
+
 #define GLOB_ALLOC(type) ((type *)malloc(sizeof(type)))
 #define GLOB_ALLOC_N(type, n) ((type *)glob_alloc_n(sizeof(type), n))
 #define GLOB_REALLOC(ptr, size) realloc((ptr), (size))
+#define GLOB_REALLOC_N(ptr, n) glob_realloc_n(ptr, sizeof(*(ptr)), n)
 #define GLOB_FREE(ptr) free(ptr)
 #define GLOB_JUMP_TAG(status) (((status) == -1) ? rb_memerror() : rb_jump_tag(status))
 
@@ -2016,8 +2030,17 @@ rb_glob_error(const char *path, VALUE a, const void *enc, int error) https://github.com/ruby/ruby/blob/trunk/dir.c#L2030
     return status;
 }
 
+typedef struct rb_dirent {
+    long d_namlen;
+    const char *d_name;
+#ifdef _WIN32
+    const char *d_altname;
+#endif
+    uint8_t d_type;
+} rb_dirent_t;
+
 static inline int
-dirent_match(const char *pat, rb_encoding *enc, const char *name, const struct dirent *dp, int flags)
+dirent_match(const char *pat, rb_encoding *enc, const char *name, const rb_dirent_t *dp, int flags)
 {
     if (fnmatch(pat, enc, name, flags) == 0) return 1;
 #ifdef _WIN32
@@ -2042,7 +2065,7 @@ struct push_glob_args { https://github.com/ruby/ruby/blob/trunk/dir.c#L2065
 
 struct dirent_brace_args {
     const char *name;
-    const struct dirent *dp;
+    const rb_dirent_t *dp;
     int flags;
 };
 
@@ -2105,6 +2128,154 @@ static int push_caller(const char *path, VALUE val, void *enc); https://github.com/ruby/ruby/blob/trunk/dir.c#L2128
 static int ruby_brace_expand(const char *str, int flags, ruby_glob_func *func, VALUE arg,
 			     rb_encoding *enc, VALUE var);
 
+static const size_t rb_dirent_name_offset =
+    offsetof(rb_dirent_t, d_type) + sizeof(uint8_t);
+
+static rb_dirent_t *
+dirent_copy(const struct dirent *dp, rb_dirent_t *rdp)
+{
+    if (!dp) return NULL;
+    size_t namlen = NAMLEN(dp);
+    const size_t altlen =
+#ifdef _WIN32
+        dp->d_altlen ? dp->d_altlen + 1 :
+#endif
+        0;
+    rb_dirent_t *newrdp = rdp;
+    if (!rdp && !(newrdp = malloc(rb_dirent_name_offset + namlen + 1 + altlen)))
+        return NULL;
+    newrdp->d_namlen = namlen;
+    if (!rdp) {
+        char *name = (char *)newrdp + rb_dirent_name_offset;
+        memcpy(name, dp->d_name, namlen);
+        name[namlen] = '\0';
+#ifdef _WIN32
+        newrdp->d_altname = NULL;
+        if (altlen) {
+            char *const altname = name + namlen + 1;
+            memcpy(altname, dp->d_altname, altlen - 1);
+            altname[altlen - 1] = '\0';
+            newrdp->d_altname = altname;
+        }
+#endif
+        newrdp->d_name = name;
+    }
+    else {
+        newrdp->d_name = dp->d_name;
+#ifdef _WIN32
+        newrdp->d_altname = dp->d_altname;
+#endif
+    }
+#ifdef DT_UNKNOWN
+    newrdp->d_type = dp->d_type;
+#else
+    newrdp->d_type = 0;
+#endif
+    return newrdp;
+}
+
+typedef union {
+    struct {
+        DIR *dirp;
+        rb_dirent_t ent;
+    } nosort;
+    struct {
+        size_t count, idx;
+        rb_dirent_t **entries;
+    } sort;
+} ruby_glob_entries_t;
+
+static int
+glob_sort_cmp(const void *a, const void *b, void *e)
+{
+    const rb_dirent_t *ent1 = *(void **)a;
+    const rb_dirent_t *ent2 = *(void **)b;
+    return strcmp(ent1->d_name, ent2->d_name);
+}
+
+static void
+glob_dir_finish(ruby_glob_entries_t *ent, int flags)
+{
+    if (flags & FNM_GLOB_NOSORT) {
+        closedir(ent->nosort.dirp);
+        ent->nosort.dirp = NULL;
+    }
+    else if (ent->sort.entries) {
+        for (size_t i = 0, count = ent->sort.count; i < count;) {
+            GLOB_FREE(ent->sort.entries[i++]);
+        }
+        GLOB_FREE(ent->sort.entries);
+        ent->sort.entries = NULL;
+        ent->sort.count = ent->sort.idx = 0;
+    }
+}
+
+static ruby_glob_entries_t *
+glob_opendir(ruby_glob_entries_t *ent, DIR *dirp, int flags, rb_encoding *enc)
+{
+    MEMZERO(ent, ruby_glob_entries_t, 1);
+    if (flags & FNM_GLOB_NOSORT) {
+        ent->nosort.dirp = dirp;
+    }
+    else {
+        void *newp;
+        struct dirent *dp;
+        size_t count = 0, capacity = 0;
+        ent->sort.count = 0;
+        ent->sort.idx = 0;
+        ent->sort.entries = 0;
+#ifdef _WIN32
+        if ((capacity = dirp->nfiles) > 0) {
+            if (!(newp = GLOB_ALLOC_N(rb_dirent_t, capacity))) {
+                closedir(dirp);
+                return NULL;
+            }
+            ent->sort.entries = newp;
+        }
+#endif
+	while ((dp = READDIR(dirp, enc)) != NULL) {
+            rb_dirent_t *rdp = dirent_copy(dp, NULL);
+            if (!rdp) {
+              nomem:
+                glob_dir_finish(ent, 0);
+                closedir(dirp);
+                return NULL;
+            }
+            if (count >= capacity) {
+                capacity += 256;
+                if (!(newp = GLOB_REALLOC_N(ent->sort.entries, capacity)))
+                    goto nomem;
+                ent->sort.entries = newp;
+            }
+            ent->sort.entries[count++] = rdp;
+            ent->sort.count = count;
+        }
+        closedir(dirp);
+        if (count < capacity) {
+            if (!(newp = GLOB_REALLOC_N(ent->sort.entries, count)))
+                goto nomem;
+            ent->sort.entries = newp;
+        }
+        ruby_qsort(ent->sort.entries, ent->sort.count, sizeof(ent->sort.entries[0]),
+                   glob_sort_cmp, NULL);
+    }
+    return ent;
+}
+
+static rb_dirent_t *
+glob_getent(ruby_glob_entries_t *ent, int flags, rb_encoding *enc)
+{
+    if (flags & FNM_GLOB_NOSORT) {
+        return dirent_copy(READDIR(ent->nosort.dirp, enc), &ent->nosort.ent);
+    }
+    else if (ent->sort.idx < ent->sort.count) {
+        return ent->sort.entries[ent->sort.idx++];
+    }
+    else {
+        return NULL;
+    }
+}
+
 static int
 glob_helper(
     int fd,
@@ -2217,7 +2388,7 @@ glob_helper( https://github.com/ruby/ruby/blob/trunk/dir.c#L2388
     if (pathtype == path_noent) return 0;
 
     if (magical || recursive) {
-	struct dirent *dp;
+	rb_dirent_t *dp;
 	DIR *dirp;
 # if USE_NAME_ON_FS == USE_NAME_ON_FS_BY_FNMATCH
 	char *plainname = 0;
@@ -2256,7 +2427,18 @@ glob_helper( https://github.com/ruby/ruby/blob/trunk/dir.c#L2427
 	if (is_case_sensitive(dirp, path) == 0)
 	    flags |= FNM_CASEFOLD;
 # endif
-	while ((dp = READDIR(dirp, enc)) != NULL) {
+        ruby_glob_entries_t globent;
+        if (!glob_opendir(&globent, dirp, flags, enc)) {
+            status = 0;
+            if (funcs->error) {
+                status = (*funcs->error)(path, arg, enc, ENOMEM);
+            }
+            else {
+                sys_warning(path, enc);
+            }
+            return status;
+        }
+	while ((dp = glob_getent(&globent, flags, enc)) != NULL) {
 	    char *buf;
 	    rb_pathtype_t new_pathtype = path_unknown;
 	    const char *name;
@@ -2265,7 +2447,7 @@ glob_helper( https://github.com/ruby/ruby/blob/trunk/dir.c#L2447
 	    IF_NORMALIZE_UTF8PATH(VALUE utf8str = Qnil);
 
 	    name = dp->d_name;
-	    namlen = NAMLEN(dp);
+	    namlen = dp->d_namlen;
 	    if (recursive && name[0] == '.') {
 		++dotfile;
 		if (namlen == 1) {
@@ -2360,7 +2542,7 @@ glob_helper( https://github.com/ruby/ruby/blob/trunk/dir.c#L2542
 	    if (status) break;
 	}
 
-	closedir(dirp);
+        glob_dir_finish(&globent, flags);
     }
     else if (plain) {
 	struct glob_pattern **copy_beg, **copy_end, **cur2;
@@ -2753,15 +2935,16 @@ dir_globs(long argc, const VALUE *argv, VALUE base, int flags) https://github.com/ruby/ruby/blob/trunk/dir.c#L2935
 }
 
 static void
-dir_glob_options(VALUE opt, VALUE *base, int *flags)
+dir_glob_options(VALUE opt, VALUE *base, int *sort, int *flags)
 {
-    static ID kw[2];
-    VALUE args[2];
+    static ID kw[3];
+    VALUE args[3 (... truncated)

--
ML: ruby-changes@q...
Info: http://www.atdot.net/~ko1/quickml/

[前][次][番号順一覧][スレッド一覧]