ruby-changes:52598
From: shirosaki <ko1@a...>
Date: Sat, 22 Sep 2018 10:11:46 +0900 (JST)
Subject: [ruby-changes:52598] shirosaki:r64810 (trunk): dir.c: performance fix with braces
shirosaki 2018-09-22 10:11:40 +0900 (Sat, 22 Sep 2018) New Revision: 64810 https://svn.ruby-lang.org/cgi-bin/viewvc.cgi?view=revision&revision=64810 Log: dir.c: performance fix with braces Braces were expended before ruby_glob0(). This caused to call replace_real_basename() for same plain patterns repeatedly. Move blace expansion into glob_helper() in ruby_glob0() to reduce replace_real_basename() call. This fix changes the order of glob results. [Feature #13167] [Fix GH-1864] Modified files: trunk/dir.c Index: dir.c =================================================================== --- dir.c (revision 64809) +++ dir.c (revision 64810) @@ -1534,7 +1534,7 @@ do_opendir(const int basefd, size_t base https://github.com/ruby/ruby/blob/trunk/dir.c#L1534 } /* Globing pattern */ -enum glob_pattern_type { PLAIN, ALPHA, MAGICAL, RECURSIVE, MATCH_ALL, MATCH_DIR }; +enum glob_pattern_type { PLAIN, ALPHA, BRACE, MAGICAL, RECURSIVE, MATCH_ALL, MATCH_DIR }; /* Return nonzero if S has any special globbing chars in it. */ static enum glob_pattern_type @@ -1542,15 +1542,20 @@ has_magic(const char *p, const char *pen https://github.com/ruby/ruby/blob/trunk/dir.c#L1542 { const int escape = !(flags & FNM_NOESCAPE); int hasalpha = 0; + int hasmagical = 0; register char c; while (p < pend && (c = *p++) != 0) { switch (c) { + case '{': + return BRACE; + case '*': case '?': case '[': - return MAGICAL; + hasmagical = 1; + break; case '\\': if (escape && p++ >= pend) @@ -1575,7 +1580,7 @@ has_magic(const char *p, const char *pen https://github.com/ruby/ruby/blob/trunk/dir.c#L1580 p = Next(p-1, pend, enc); } - return hasalpha ? ALPHA : PLAIN; + return hasmagical ? MAGICAL : hasalpha ? ALPHA : PLAIN; } /* Find separator in globbing pattern. */ @@ -1596,6 +1601,13 @@ find_dirsep(const char *p, const char *p https://github.com/ruby/ruby/blob/trunk/dir.c#L1601 open = 0; continue; + case '{': + open = 1; + continue; + case '}': + open = 0; + continue; + case '/': if (!open) return (char *)p-1; @@ -1671,6 +1683,21 @@ glob_make_pattern(const char *p, const c https://github.com/ruby/ruby/blob/trunk/dir.c#L1683 const enum glob_pattern_type non_magic = (USE_NAME_ON_FS || FNM_SYSCASE) ? PLAIN : ALPHA; char *buf; + if (magic == BRACE) { + /* brace pattern is parsed after expansion */ + buf = GLOB_ALLOC_N(char, e-p+1); + if (!buf) { + GLOB_FREE(tmp); + goto error; + } + memcpy(buf, p, e-p); + buf[e-p] = '\0'; + tmp->type = BRACE; + tmp->str = buf; + *tail = tmp; + tmp->next = 0; + return list; + } if (!(FNM_SYSCASE || magic > non_magic) && !recursive && *m) { const char *m2; while (has_magic(m+1, m2 = find_dirsep(m+1, e, flags, enc), flags, enc) <= non_magic && @@ -2001,6 +2028,58 @@ dirent_match(const char *pat, rb_encodin https://github.com/ruby/ruby/blob/trunk/dir.c#L2028 return 0; } +struct push_glob_args { + int fd; + const char *path; + size_t baselen; + size_t namelen; + int dirsep; /* '/' should be placed before appending child entry's name to 'path'. */ + rb_pathtype_t pathtype; /* type of 'path' */ + int flags; + const ruby_glob_funcs_t *funcs; + VALUE arg; +}; + +struct dirent_brace_args { + const char *name; + const struct dirent *dp; + int flags; +}; + +static int +dirent_match_brace(const char *pattern, VALUE val, void *enc) +{ + struct dirent_brace_args *arg = (struct dirent_brace_args *)val; + + return dirent_match(pattern, enc, arg->name, arg->dp, arg->flags); +} + +/* join paths from pattern list of glob_make_pattern() */ +static const char* +join_path_from_pattern(struct glob_pattern **beg) +{ + struct glob_pattern *p; + const char *path = ""; + + for (p = *beg; p; p = p->next) { + const char *str; + switch (p->type) { + case RECURSIVE: + str = "**"; + break; + default: + str = p->str; + } + path = join_path(path, strlen(path), (p != *beg), str, strlen(str)); + } + return path; +} + +static int push_caller(const char *path, VALUE val, void *enc); + +static int ruby_brace_expand(const char *str, int flags, ruby_glob_func *func, VALUE arg, + rb_encoding *enc, VALUE var); + static int glob_helper( int fd, @@ -2019,7 +2098,7 @@ glob_helper( https://github.com/ruby/ruby/blob/trunk/dir.c#L2098 struct stat st; int status = 0; struct glob_pattern **cur, **new_beg, **new_end; - int plain = 0, magical = 0, recursive = 0, match_all = 0, match_dir = 0; + int plain = 0, brace = 0, magical = 0, recursive = 0, match_all = 0, match_dir = 0; int escape = !(flags & FNM_NOESCAPE); size_t pathlen = baselen + namelen; @@ -2040,6 +2119,9 @@ glob_helper( https://github.com/ruby/ruby/blob/trunk/dir.c#L2119 magical = 1; #endif break; + case BRACE: + brace = 1; + break; case MAGICAL: magical = 2; break; @@ -2054,6 +2136,20 @@ glob_helper( https://github.com/ruby/ruby/blob/trunk/dir.c#L2136 } } + if (brace) { + struct push_glob_args args; + const char* brace_path = join_path_from_pattern(beg); + args.fd = fd; + args.path = path; + args.baselen = baselen; + args.namelen = namelen; + args.dirsep = dirsep; + args.flags = flags; + args.funcs = funcs; + args.arg = arg; + return ruby_brace_expand(brace_path, flags, push_caller, (VALUE)&args, enc, Qfalse); + } + if (*path) { if (match_all && pathtype == path_unknown) { if (do_lstat(fd, baselen, path, &st, flags, enc) == 0) { @@ -2191,6 +2287,7 @@ glob_helper( https://github.com/ruby/ruby/blob/trunk/dir.c#L2287 for (cur = beg; cur < end; ++cur) { struct glob_pattern *p = *cur; + struct dirent_brace_args args; if (p->type == RECURSIVE) { if (new_pathtype == path_directory || /* not symlink but real directory */ new_pathtype == path_exist) { @@ -2200,6 +2297,14 @@ glob_helper( https://github.com/ruby/ruby/blob/trunk/dir.c#L2297 p = p->next; /* 0 times recursion */ } switch (p->type) { + case BRACE: + args.name = name; + args.dp = dp; + args.flags = flags; + if (ruby_brace_expand(p->str, flags, dirent_match_brace, + (VALUE)&args, enc, Qfalse) > 0) + *new_end++ = p->next; + break; case ALPHA: # if USE_NAME_ON_FS == USE_NAME_ON_FS_BY_FNMATCH if (plainname) { @@ -2298,6 +2403,24 @@ glob_helper( https://github.com/ruby/ruby/blob/trunk/dir.c#L2403 } static int +push_caller(const char *path, VALUE val, void *enc) +{ + struct push_glob_args *arg = (struct push_glob_args *)val; + struct glob_pattern *list; + int status; + + list = glob_make_pattern(path, path + strlen(path), arg->flags, enc); + if (!list) { + return -1; + } + status = glob_helper(arg->fd, arg->path, arg->baselen, arg->namelen, arg->dirsep, + arg->pathtype, &list, &list + 1, arg->flags, arg->funcs, + arg->arg, enc); + glob_free_pattern(list); + return status; +} + +static int ruby_glob0(const char *path, int fd, const char *base, int flags, const ruby_glob_funcs_t *funcs, VALUE arg, rb_encoding *enc) @@ -2489,25 +2612,11 @@ ruby_brace_glob(const char *str, int fla https://github.com/ruby/ruby/blob/trunk/dir.c#L2612 return ruby_brace_glob_with_enc(str, flags, func, arg, rb_ascii8bit_encoding()); } -struct push_glob_args { - struct glob_args glob; - int flags; - int fd; -}; - -static int -push_caller(const char *path, VALUE val, void *enc) -{ - struct push_glob_args *arg = (struct push_glob_args *)val; - - return ruby_glob0(path, arg->fd, arg->glob.base, arg->flags, &rb_glob_funcs, - (VALUE)&arg->glob, enc); -} - static int push_glob(VALUE ary, VALUE str, VALUE base, int flags) { - struct push_glob_args args; + struct glob_args args; + int fd; rb_encoding *enc = rb_enc_get(str); #if defined _WIN32 || defined __APPLE__ @@ -2518,30 +2627,29 @@ push_glob(VALUE ary, VALUE str, VALUE ba https://github.com/ruby/ruby/blob/trunk/dir.c#L2627 if (rb_enc_to_index(enc) == ENCINDEX_US_ASCII) enc = rb_ascii8bit_encoding(); flags |= GLOB_VERBOSE; - args.glob.func = push_pattern; - args.glob.value = ary; - args.glob.enc = enc; - args.glob.base = 0; - args.flags = flags; - args.fd = AT_FDCWD; + args.func = push_pattern; + args.value = ary; + args.enc = enc; + args.base = 0; + fd = AT_FDCWD; if (!NIL_P(base)) { if (!RB_TYPE_P(base, T_STRING) || !rb_enc_check(str, base)) { struct dir_data *dirp = DATA_PTR(base); if (!dirp->dir) dir_closed(); #ifdef HAVE_DIRFD - if ((args.fd = dirfd(dirp->dir)) == -1) + if ((fd = dirfd(dirp->dir)) == -1) rb_sys_fail_path(dir_inspect(base)); #endif base = dirp->path; } - args.glob.base = RSTRING_PTR(base); + args.base = RSTRING_PTR(base); } #if defined _WIN32 || defined __APPLE__ enc = rb_utf8_encoding(); #endif - return ruby_brace_expand(RSTRING_PTR(str), flags, - push_caller, (VALUE)&args, enc, str); + return ruby_glob0(RSTRING_PTR(str), fd, args.base, flags, &rb_glob_funcs, + (VALUE)&args, enc); } static VALUE -- ML: ruby-changes@q... Info: http://www.atdot.net/~ko1/quickml/