[前][次][番号順一覧][スレッド一覧]

ruby-changes:52598

From: shirosaki <ko1@a...>
Date: Sat, 22 Sep 2018 10:11:46 +0900 (JST)
Subject: [ruby-changes:52598] shirosaki:r64810 (trunk): dir.c: performance fix with braces

shirosaki	2018-09-22 10:11:40 +0900 (Sat, 22 Sep 2018)

  New Revision: 64810

  https://svn.ruby-lang.org/cgi-bin/viewvc.cgi?view=revision&revision=64810

  Log:
    dir.c: performance fix with braces
    
    Braces were expended before ruby_glob0(). This caused to call
    replace_real_basename() for same plain patterns repeatedly.
    Move blace expansion into glob_helper() in ruby_glob0() to reduce
    replace_real_basename() call.
    This fix changes the order of glob results.
    [Feature #13167] [Fix GH-1864]

  Modified files:
    trunk/dir.c
Index: dir.c
===================================================================
--- dir.c	(revision 64809)
+++ dir.c	(revision 64810)
@@ -1534,7 +1534,7 @@ do_opendir(const int basefd, size_t base https://github.com/ruby/ruby/blob/trunk/dir.c#L1534
 }
 
 /* Globing pattern */
-enum glob_pattern_type { PLAIN, ALPHA, MAGICAL, RECURSIVE, MATCH_ALL, MATCH_DIR };
+enum glob_pattern_type { PLAIN, ALPHA, BRACE, MAGICAL, RECURSIVE, MATCH_ALL, MATCH_DIR };
 
 /* Return nonzero if S has any special globbing chars in it.  */
 static enum glob_pattern_type
@@ -1542,15 +1542,20 @@ has_magic(const char *p, const char *pen https://github.com/ruby/ruby/blob/trunk/dir.c#L1542
 {
     const int escape = !(flags & FNM_NOESCAPE);
     int hasalpha = 0;
+    int hasmagical = 0;
 
     register char c;
 
     while (p < pend && (c = *p++) != 0) {
 	switch (c) {
+	  case '{':
+	    return BRACE;
+
 	  case '*':
 	  case '?':
 	  case '[':
-	    return MAGICAL;
+	    hasmagical = 1;
+	    break;
 
 	  case '\\':
 	    if (escape && p++ >= pend)
@@ -1575,7 +1580,7 @@ has_magic(const char *p, const char *pen https://github.com/ruby/ruby/blob/trunk/dir.c#L1580
 	p = Next(p-1, pend, enc);
     }
 
-    return hasalpha ? ALPHA : PLAIN;
+    return hasmagical ? MAGICAL : hasalpha ? ALPHA : PLAIN;
 }
 
 /* Find separator in globbing pattern. */
@@ -1596,6 +1601,13 @@ find_dirsep(const char *p, const char *p https://github.com/ruby/ruby/blob/trunk/dir.c#L1601
 	    open = 0;
 	    continue;
 
+	  case '{':
+	    open = 1;
+	    continue;
+	  case '}':
+	    open = 0;
+	    continue;
+
 	  case '/':
 	    if (!open)
 		return (char *)p-1;
@@ -1671,6 +1683,21 @@ glob_make_pattern(const char *p, const c https://github.com/ruby/ruby/blob/trunk/dir.c#L1683
 	    const enum glob_pattern_type non_magic = (USE_NAME_ON_FS || FNM_SYSCASE) ? PLAIN : ALPHA;
 	    char *buf;
 
+	    if (magic == BRACE) {
+		/* brace pattern is parsed after expansion */
+		buf = GLOB_ALLOC_N(char, e-p+1);
+		if (!buf) {
+		    GLOB_FREE(tmp);
+		    goto error;
+		}
+		memcpy(buf, p, e-p);
+		buf[e-p] = '\0';
+		tmp->type = BRACE;
+		tmp->str = buf;
+		*tail = tmp;
+		tmp->next = 0;
+		return list;
+	    }
 	    if (!(FNM_SYSCASE || magic > non_magic) && !recursive && *m) {
 		const char *m2;
 		while (has_magic(m+1, m2 = find_dirsep(m+1, e, flags, enc), flags, enc) <= non_magic &&
@@ -2001,6 +2028,58 @@ dirent_match(const char *pat, rb_encodin https://github.com/ruby/ruby/blob/trunk/dir.c#L2028
     return 0;
 }
 
+struct push_glob_args {
+    int fd;
+    const char *path;
+    size_t baselen;
+    size_t namelen;
+    int dirsep; /* '/' should be placed before appending child entry's name to 'path'. */
+    rb_pathtype_t pathtype; /* type of 'path' */
+    int flags;
+    const ruby_glob_funcs_t *funcs;
+    VALUE arg;
+};
+
+struct dirent_brace_args {
+    const char *name;
+    const struct dirent *dp;
+    int flags;
+};
+
+static int
+dirent_match_brace(const char *pattern, VALUE val, void *enc)
+{
+    struct dirent_brace_args *arg = (struct dirent_brace_args *)val;
+
+    return dirent_match(pattern, enc, arg->name, arg->dp, arg->flags);
+}
+
+/* join paths from pattern list of glob_make_pattern() */
+static const char*
+join_path_from_pattern(struct glob_pattern **beg)
+{
+    struct glob_pattern *p;
+    const char *path = "";
+
+    for (p = *beg; p; p = p->next) {
+	const char *str;
+	switch (p->type) {
+	  case RECURSIVE:
+	    str = "**";
+	    break;
+	  default:
+	    str = p->str;
+	}
+	path = join_path(path, strlen(path), (p != *beg), str, strlen(str));
+    }
+    return path;
+}
+
+static int push_caller(const char *path, VALUE val, void *enc);
+
+static int ruby_brace_expand(const char *str, int flags, ruby_glob_func *func, VALUE arg,
+			     rb_encoding *enc, VALUE var);
+
 static int
 glob_helper(
     int fd,
@@ -2019,7 +2098,7 @@ glob_helper( https://github.com/ruby/ruby/blob/trunk/dir.c#L2098
     struct stat st;
     int status = 0;
     struct glob_pattern **cur, **new_beg, **new_end;
-    int plain = 0, magical = 0, recursive = 0, match_all = 0, match_dir = 0;
+    int plain = 0, brace = 0, magical = 0, recursive = 0, match_all = 0, match_dir = 0;
     int escape = !(flags & FNM_NOESCAPE);
     size_t pathlen = baselen + namelen;
 
@@ -2040,6 +2119,9 @@ glob_helper( https://github.com/ruby/ruby/blob/trunk/dir.c#L2119
 	    magical = 1;
 #endif
 	    break;
+	  case BRACE:
+	    brace = 1;
+	    break;
 	  case MAGICAL:
 	    magical = 2;
 	    break;
@@ -2054,6 +2136,20 @@ glob_helper( https://github.com/ruby/ruby/blob/trunk/dir.c#L2136
 	}
     }
 
+    if (brace) {
+	struct push_glob_args args;
+	const char* brace_path = join_path_from_pattern(beg);
+	args.fd = fd;
+	args.path = path;
+	args.baselen = baselen;
+	args.namelen = namelen;
+	args.dirsep = dirsep;
+	args.flags = flags;
+	args.funcs = funcs;
+	args.arg = arg;
+	return ruby_brace_expand(brace_path, flags, push_caller, (VALUE)&args, enc, Qfalse);
+    }
+
     if (*path) {
 	if (match_all && pathtype == path_unknown) {
 	    if (do_lstat(fd, baselen, path, &st, flags, enc) == 0) {
@@ -2191,6 +2287,7 @@ glob_helper( https://github.com/ruby/ruby/blob/trunk/dir.c#L2287
 
 	    for (cur = beg; cur < end; ++cur) {
 		struct glob_pattern *p = *cur;
+		struct dirent_brace_args args;
 		if (p->type == RECURSIVE) {
 		    if (new_pathtype == path_directory || /* not symlink but real directory */
 			new_pathtype == path_exist) {
@@ -2200,6 +2297,14 @@ glob_helper( https://github.com/ruby/ruby/blob/trunk/dir.c#L2297
 		    p = p->next; /* 0 times recursion */
 		}
 		switch (p->type) {
+		  case BRACE:
+		    args.name = name;
+		    args.dp = dp;
+		    args.flags = flags;
+		    if (ruby_brace_expand(p->str, flags, dirent_match_brace,
+					  (VALUE)&args, enc, Qfalse) > 0)
+			*new_end++ = p->next;
+		    break;
 		  case ALPHA:
 # if USE_NAME_ON_FS == USE_NAME_ON_FS_BY_FNMATCH
 		    if (plainname) {
@@ -2298,6 +2403,24 @@ glob_helper( https://github.com/ruby/ruby/blob/trunk/dir.c#L2403
 }
 
 static int
+push_caller(const char *path, VALUE val, void *enc)
+{
+    struct push_glob_args *arg = (struct push_glob_args *)val;
+    struct glob_pattern *list;
+    int status;
+
+    list = glob_make_pattern(path, path + strlen(path), arg->flags, enc);
+    if (!list) {
+	return -1;
+    }
+    status = glob_helper(arg->fd, arg->path, arg->baselen, arg->namelen, arg->dirsep,
+			 arg->pathtype, &list, &list + 1, arg->flags, arg->funcs,
+			 arg->arg, enc);
+    glob_free_pattern(list);
+    return status;
+}
+
+static int
 ruby_glob0(const char *path, int fd, const char *base, int flags,
 	   const ruby_glob_funcs_t *funcs, VALUE arg,
 	   rb_encoding *enc)
@@ -2489,25 +2612,11 @@ ruby_brace_glob(const char *str, int fla https://github.com/ruby/ruby/blob/trunk/dir.c#L2612
     return ruby_brace_glob_with_enc(str, flags, func, arg, rb_ascii8bit_encoding());
 }
 
-struct push_glob_args {
-    struct glob_args glob;
-    int flags;
-    int fd;
-};
-
-static int
-push_caller(const char *path, VALUE val, void *enc)
-{
-    struct push_glob_args *arg = (struct push_glob_args *)val;
-
-    return ruby_glob0(path, arg->fd, arg->glob.base, arg->flags, &rb_glob_funcs,
-		      (VALUE)&arg->glob, enc);
-}
-
 static int
 push_glob(VALUE ary, VALUE str, VALUE base, int flags)
 {
-    struct push_glob_args args;
+    struct glob_args args;
+    int fd;
     rb_encoding *enc = rb_enc_get(str);
 
 #if defined _WIN32 || defined __APPLE__
@@ -2518,30 +2627,29 @@ push_glob(VALUE ary, VALUE str, VALUE ba https://github.com/ruby/ruby/blob/trunk/dir.c#L2627
     if (rb_enc_to_index(enc) == ENCINDEX_US_ASCII)
 	enc = rb_ascii8bit_encoding();
     flags |= GLOB_VERBOSE;
-    args.glob.func = push_pattern;
-    args.glob.value = ary;
-    args.glob.enc = enc;
-    args.glob.base = 0;
-    args.flags = flags;
-    args.fd = AT_FDCWD;
+    args.func = push_pattern;
+    args.value = ary;
+    args.enc = enc;
+    args.base = 0;
+    fd = AT_FDCWD;
     if (!NIL_P(base)) {
 	if (!RB_TYPE_P(base, T_STRING) || !rb_enc_check(str, base)) {
 	    struct dir_data *dirp = DATA_PTR(base);
 	    if (!dirp->dir) dir_closed();
 #ifdef HAVE_DIRFD
-	    if ((args.fd = dirfd(dirp->dir)) == -1)
+	    if ((fd = dirfd(dirp->dir)) == -1)
 		rb_sys_fail_path(dir_inspect(base));
 #endif
 	    base = dirp->path;
 	}
-	args.glob.base = RSTRING_PTR(base);
+	args.base = RSTRING_PTR(base);
     }
 #if defined _WIN32 || defined __APPLE__
     enc = rb_utf8_encoding();
 #endif
 
-    return ruby_brace_expand(RSTRING_PTR(str), flags,
-			     push_caller, (VALUE)&args, enc, str);
+    return ruby_glob0(RSTRING_PTR(str), fd, args.base, flags, &rb_glob_funcs,
+		      (VALUE)&args, enc);
 }
 
 static VALUE

--
ML: ruby-changes@q...
Info: http://www.atdot.net/~ko1/quickml/

[前][次][番号順一覧][スレッド一覧]