ruby-changes:59747
From: Nobuyoshi <ko1@a...>
Date: Sun, 19 Jan 2020 15:54:19 +0900 (JST)
Subject: [ruby-changes:59747] 2f1081a451 (master): Sort globbed results by default [Feature #8709]
https://git.ruby-lang.org/ruby.git/commit/?id=2f1081a451 From 2f1081a451f21ca017cc9fdc585883e5c6ebf618 Mon Sep 17 00:00:00 2001 From: Nobuyoshi Nakada <nobu@r...> Date: Sat, 18 Jan 2020 00:21:11 +0900 Subject: Sort globbed results by default [Feature #8709] Sort the results which matched single wildcard or character set in binary ascending order, unless `sort: false` is given. The order of an Array of pattern strings and braces are not affected. diff --git a/NEWS.md b/NEWS.md index 1c0feb6..a53f4f2 100644 --- a/NEWS.md +++ b/NEWS.md @@ -14,6 +14,13 @@ sufficient information, see the ChangeLog file or Redmine https://github.com/ruby/ruby/blob/trunk/NEWS.md#L14 ## Core classes updates (outstanding ones only) +* Dir + + * Modified method + + * Dir.glob and Dir.[] now sort the results by default, and + accept `sort:` keyword option. [[Feature #8709]] + * Hash * Modified method @@ -46,6 +53,7 @@ sufficient information, see the ChangeLog file or Redmine https://github.com/ruby/ruby/blob/trunk/NEWS.md#L53 ## Miscellaneous changes +[Feature #8709]: https://bugs.ruby-lang.org/issues/8709 [Feature #8948]: https://bugs.ruby-lang.org/issues/8948 [Feature #16274]: https://bugs.ruby-lang.org/issues/16274 [Feature #16377]: https://bugs.ruby-lang.org/issues/16377 diff --git a/dir.c b/dir.c index 6734559..d54ba64 100644 --- a/dir.c +++ b/dir.c @@ -219,6 +219,7 @@ typedef enum { https://github.com/ruby/ruby/blob/trunk/dir.c#L219 #else #define FNM_SHORTNAME 0 #endif +#define FNM_GLOB_NOSORT 0x40 #define FNM_NOMATCH 1 #define FNM_ERROR 2 @@ -1350,21 +1351,34 @@ sys_enc_warning_in(const char *func, const char *mesg, rb_encoding *enc) https://github.com/ruby/ruby/blob/trunk/dir.c#L1351 #define sys_warning(val, enc) \ ((flags & GLOB_VERBOSE) ? sys_enc_warning_in(RUBY_FUNCTION_NAME_STRING, (val), (enc)) :(void)0) -static inline void * -glob_alloc_n(size_t x, size_t y) +static inline size_t +glob_alloc_size(size_t x, size_t y) { size_t z; if (rb_mul_size_overflow(x, y, SSIZE_MAX, &z)) { rb_memerror(); /* or...? */ } else { - return malloc(z); + return z; } } +static inline void * +glob_alloc_n(size_t x, size_t y) +{ + return malloc(glob_alloc_size(x, y)); +} + +static inline void * +glob_realloc_n(void *p, size_t x, size_t y) +{ + return realloc(p, glob_alloc_size(x, y)); +} + #define GLOB_ALLOC(type) ((type *)malloc(sizeof(type))) #define GLOB_ALLOC_N(type, n) ((type *)glob_alloc_n(sizeof(type), n)) #define GLOB_REALLOC(ptr, size) realloc((ptr), (size)) +#define GLOB_REALLOC_N(ptr, n) glob_realloc_n(ptr, sizeof(*(ptr)), n) #define GLOB_FREE(ptr) free(ptr) #define GLOB_JUMP_TAG(status) (((status) == -1) ? rb_memerror() : rb_jump_tag(status)) @@ -2016,8 +2030,17 @@ rb_glob_error(const char *path, VALUE a, const void *enc, int error) https://github.com/ruby/ruby/blob/trunk/dir.c#L2030 return status; } +typedef struct rb_dirent { + long d_namlen; + const char *d_name; +#ifdef _WIN32 + const char *d_altname; +#endif + uint8_t d_type; +} rb_dirent_t; + static inline int -dirent_match(const char *pat, rb_encoding *enc, const char *name, const struct dirent *dp, int flags) +dirent_match(const char *pat, rb_encoding *enc, const char *name, const rb_dirent_t *dp, int flags) { if (fnmatch(pat, enc, name, flags) == 0) return 1; #ifdef _WIN32 @@ -2042,7 +2065,7 @@ struct push_glob_args { https://github.com/ruby/ruby/blob/trunk/dir.c#L2065 struct dirent_brace_args { const char *name; - const struct dirent *dp; + const rb_dirent_t *dp; int flags; }; @@ -2105,6 +2128,154 @@ static int push_caller(const char *path, VALUE val, void *enc); https://github.com/ruby/ruby/blob/trunk/dir.c#L2128 static int ruby_brace_expand(const char *str, int flags, ruby_glob_func *func, VALUE arg, rb_encoding *enc, VALUE var); +static const size_t rb_dirent_name_offset = + offsetof(rb_dirent_t, d_type) + sizeof(uint8_t); + +static rb_dirent_t * +dirent_copy(const struct dirent *dp, rb_dirent_t *rdp) +{ + if (!dp) return NULL; + size_t namlen = NAMLEN(dp); + const size_t altlen = +#ifdef _WIN32 + dp->d_altlen ? dp->d_altlen + 1 : +#endif + 0; + rb_dirent_t *newrdp = rdp; + if (!rdp && !(newrdp = malloc(rb_dirent_name_offset + namlen + 1 + altlen))) + return NULL; + newrdp->d_namlen = namlen; + if (!rdp) { + char *name = (char *)newrdp + rb_dirent_name_offset; + memcpy(name, dp->d_name, namlen); + name[namlen] = '\0'; +#ifdef _WIN32 + newrdp->d_altname = NULL; + if (altlen) { + char *const altname = name + namlen + 1; + memcpy(altname, dp->d_altname, altlen - 1); + altname[altlen - 1] = '\0'; + newrdp->d_altname = altname; + } +#endif + newrdp->d_name = name; + } + else { + newrdp->d_name = dp->d_name; +#ifdef _WIN32 + newrdp->d_altname = dp->d_altname; +#endif + } +#ifdef DT_UNKNOWN + newrdp->d_type = dp->d_type; +#else + newrdp->d_type = 0; +#endif + return newrdp; +} + +typedef union { + struct { + DIR *dirp; + rb_dirent_t ent; + } nosort; + struct { + size_t count, idx; + rb_dirent_t **entries; + } sort; +} ruby_glob_entries_t; + +static int +glob_sort_cmp(const void *a, const void *b, void *e) +{ + const rb_dirent_t *ent1 = *(void **)a; + const rb_dirent_t *ent2 = *(void **)b; + return strcmp(ent1->d_name, ent2->d_name); +} + +static void +glob_dir_finish(ruby_glob_entries_t *ent, int flags) +{ + if (flags & FNM_GLOB_NOSORT) { + closedir(ent->nosort.dirp); + ent->nosort.dirp = NULL; + } + else if (ent->sort.entries) { + for (size_t i = 0, count = ent->sort.count; i < count;) { + GLOB_FREE(ent->sort.entries[i++]); + } + GLOB_FREE(ent->sort.entries); + ent->sort.entries = NULL; + ent->sort.count = ent->sort.idx = 0; + } +} + +static ruby_glob_entries_t * +glob_opendir(ruby_glob_entries_t *ent, DIR *dirp, int flags, rb_encoding *enc) +{ + MEMZERO(ent, ruby_glob_entries_t, 1); + if (flags & FNM_GLOB_NOSORT) { + ent->nosort.dirp = dirp; + } + else { + void *newp; + struct dirent *dp; + size_t count = 0, capacity = 0; + ent->sort.count = 0; + ent->sort.idx = 0; + ent->sort.entries = 0; +#ifdef _WIN32 + if ((capacity = dirp->nfiles) > 0) { + if (!(newp = GLOB_ALLOC_N(rb_dirent_t, capacity))) { + closedir(dirp); + return NULL; + } + ent->sort.entries = newp; + } +#endif + while ((dp = READDIR(dirp, enc)) != NULL) { + rb_dirent_t *rdp = dirent_copy(dp, NULL); + if (!rdp) { + nomem: + glob_dir_finish(ent, 0); + closedir(dirp); + return NULL; + } + if (count >= capacity) { + capacity += 256; + if (!(newp = GLOB_REALLOC_N(ent->sort.entries, capacity))) + goto nomem; + ent->sort.entries = newp; + } + ent->sort.entries[count++] = rdp; + ent->sort.count = count; + } + closedir(dirp); + if (count < capacity) { + if (!(newp = GLOB_REALLOC_N(ent->sort.entries, count))) + goto nomem; + ent->sort.entries = newp; + } + ruby_qsort(ent->sort.entries, ent->sort.count, sizeof(ent->sort.entries[0]), + glob_sort_cmp, NULL); + } + return ent; +} + +static rb_dirent_t * +glob_getent(ruby_glob_entries_t *ent, int flags, rb_encoding *enc) +{ + if (flags & FNM_GLOB_NOSORT) { + return dirent_copy(READDIR(ent->nosort.dirp, enc), &ent->nosort.ent); + } + else if (ent->sort.idx < ent->sort.count) { + return ent->sort.entries[ent->sort.idx++]; + } + else { + return NULL; + } +} + static int glob_helper( int fd, @@ -2217,7 +2388,7 @@ glob_helper( https://github.com/ruby/ruby/blob/trunk/dir.c#L2388 if (pathtype == path_noent) return 0; if (magical || recursive) { - struct dirent *dp; + rb_dirent_t *dp; DIR *dirp; # if USE_NAME_ON_FS == USE_NAME_ON_FS_BY_FNMATCH char *plainname = 0; @@ -2256,7 +2427,18 @@ glob_helper( https://github.com/ruby/ruby/blob/trunk/dir.c#L2427 if (is_case_sensitive(dirp, path) == 0) flags |= FNM_CASEFOLD; # endif - while ((dp = READDIR(dirp, enc)) != NULL) { + ruby_glob_entries_t globent; + if (!glob_opendir(&globent, dirp, flags, enc)) { + status = 0; + if (funcs->error) { + status = (*funcs->error)(path, arg, enc, ENOMEM); + } + else { + sys_warning(path, enc); + } + return status; + } + while ((dp = glob_getent(&globent, flags, enc)) != NULL) { char *buf; rb_pathtype_t new_pathtype = path_unknown; const char *name; @@ -2265,7 +2447,7 @@ glob_helper( https://github.com/ruby/ruby/blob/trunk/dir.c#L2447 IF_NORMALIZE_UTF8PATH(VALUE utf8str = Qnil); name = dp->d_name; - namlen = NAMLEN(dp); + namlen = dp->d_namlen; if (recursive && name[0] == '.') { ++dotfile; if (namlen == 1) { @@ -2360,7 +2542,7 @@ glob_helper( https://github.com/ruby/ruby/blob/trunk/dir.c#L2542 if (status) break; } - closedir(dirp); + glob_dir_finish(&globent, flags); } else if (plain) { struct glob_pattern **copy_beg, **copy_end, **cur2; @@ -2753,15 +2935,16 @@ dir_globs(long argc, const VALUE *argv, VALUE base, int flags) https://github.com/ruby/ruby/blob/trunk/dir.c#L2935 } static void -dir_glob_options(VALUE opt, VALUE *base, int *flags) +dir_glob_options(VALUE opt, VALUE *base, int *sort, int *flags) { - static ID kw[2]; - VALUE args[2]; + static ID kw[3]; + VALUE args[3 (... truncated) -- ML: ruby-changes@q... Info: http://www.atdot.net/~ko1/quickml/