bug-gnulib
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: fts vs. simulated-inode file systems: FUSE-based, FAT, smbfs, cifs,


From: Jim Meyering
Subject: Re: fts vs. simulated-inode file systems: FUSE-based, FAT, smbfs, cifs, ...
Date: Thu, 12 Oct 2006 12:32:43 +0200

I wrote:
> This started with the bug report:
>
>   http://savannah.gnu.org/bugs/?17877
>   Invalid "No such file or directory" error on filesystem without stable 
> inode numbers
...
> Anyhow, here's the diff:
> [tested via valgrind on coreutils' tests of du, chmod, chown, chgrp,
> and via "make check" for find]
>
> FWIW, I plan to separate this into at least two separate patches
> before checking it in.

Separating the patch into parts wasn't really an option after all.
I've checked this in:

2006-10-12  Jim Meyering  <address@hidden>

        Big performance improvement for fts-based tools that use FTS_NOSTAT.
        Avoid spurious inode-mismatch problems on non-POSIX file systems.
        Details: http://article.gmane.org/gmane.comp.lib.gnulib.bugs/7416
        * lib/fts_.h (FTS_DEFER_STAT): Define new flag.
        (FTS_OPTIONMASK): Extend the mask to reflect this addition.
        * lib/fts.c (DT_IS_KNOWN, DT_MUST_BE): Define.
        (FTS_NO_STAT_REQUIRED, FTS_STAT_REQUIRED): Define.
        (fts_set_stat_required): New function.
        (fts_open): Defer the calls to fts_stat, if possible or requested.
        Move the code that maps a command-line fts_info value FTS_DOT to FTS_D
        into fts_stat itself.
        (fts_read): Perform any required (deferred) fts_stat call.
        (fts_build): Likewise, for the directory we're about to open and read.
        In the readdir loop, carefully decide whether each entry will require
        an eventual call to fts_stat, using dirent.d_type info if available.
        (fts_stat): Move the test for whether to honor FTS_COMFOLLOW on
        a command line argument into this function.  Update all callers.
        Map a return value of FTS_DOT to FTS_D for a command line argument.
        * modules/fts (Depends-on): Add d-type.  Alphabetize.
        Thanks to Miklos Szeredi for his tenacity and for the initial
        bug report about "find" failing on a FUSE-based file system.

Index: lib/fts_.h
===================================================================
RCS file: /sources/gnulib/gnulib/lib/fts_.h,v
retrieving revision 1.6
diff -u -r1.6 fts_.h
--- lib/fts_.h  17 Aug 2006 20:34:21 -0000      1.6
+++ lib/fts_.h  12 Oct 2006 10:22:23 -0000
@@ -123,7 +123,19 @@
      through the file descriptor member, fts_cwd_fd.  */
 # define FTS_CWDFD             0x0200

-# define FTS_OPTIONMASK        0x03ff          /* valid user option mask */
+  /* Historically, for each directory that fts initially encounters, it would
+     open it, read all entries, and stat each entry, storing the results, and
+     then it would process the first entry.  But that behavior is bad for
+     locality of reference, and also causes trouble with inode-simulating
+     file systems like FAT, CIFS, FUSE-based ones, etc., when entries from
+     their name/inode cache are flushed too early.
+     Use this flag to make fts_open and fts_read defer the stat/lstat/fststat
+     of each entry until it actually processed.  However, note that if you use
+     this option and also specify a comparison function, that function may not
+     examine any data via fts_statp.  */
+# define FTS_DEFER_STAT                0x0400
+
+# define FTS_OPTIONMASK        0x07ff          /* valid user option mask */

 # define FTS_NAMEONLY  0x1000          /* (private) child names only */
 # define FTS_STOP      0x2000          /* (private) unrecoverable error */
Index: lib/fts.c
===================================================================
RCS file: /sources/gnulib/gnulib/lib/fts.c,v
retrieving revision 1.19
diff -u -r1.19 fts.c
--- lib/fts.c   5 Oct 2006 21:38:10 -0000       1.19
+++ lib/fts.c   12 Oct 2006 10:22:23 -0000
@@ -81,6 +81,22 @@
 # define _D_EXACT_NAMLEN(dirent) strlen ((dirent)->d_name)
 #endif

+#if HAVE_STRUCT_DIRENT_D_TYPE
+/* True if the type of the directory entry D is known.  */
+# define DT_IS_KNOWN(d) ((d)->d_type != DT_UNKNOWN)
+/* True if the type of the directory entry D must be T.  */
+# define DT_MUST_BE(d, t) ((d)->d_type == (t))
+#else
+# define DT_IS_KNOWN(d) false
+# define DT_MUST_BE(d, t) false
+#endif
+
+enum
+{
+  FTS_NO_STAT_REQUIRED = 1,
+  FTS_STAT_REQUIRED = 2
+};
+
 #ifdef _LIBC
 # undef close
 # define close __close
@@ -195,6 +211,19 @@
     }                                                          \
   while (false)

+/* Overload the fts_statp->st_size member (otherwise unused, when
+   fts_info is FTS_NSOK) to indicate whether fts_read should stat
+   this entry or not.  */
+static void
+fts_set_stat_required (FTSENT *p, bool required)
+{
+  if (p->fts_info != FTS_NSOK)
+    abort ();
+  p->fts_statp->st_size = (required
+                          ? FTS_STAT_REQUIRED
+                          : FTS_NO_STAT_REQUIRED);
+}
+
 /* file-descriptor-relative opendir.  */
 /* FIXME: if others need this function, move it into lib/openat.c */
 static inline DIR *
@@ -258,6 +287,7 @@
        FTSENT *parent = NULL;
        FTSENT *tmp = NULL;     /* pacify gcc */
        size_t len;
+       bool defer_stat;

        /* Options check. */
        if (options & ~FTS_OPTIONMASK) {
@@ -340,6 +370,19 @@
                parent->fts_level = FTS_ROOTPARENTLEVEL;
          }

+       /* The classic fts implementation would call fts_stat with
+          a new entry for each iteration of the loop below.
+          If the comparison function is not specified or if the
+          FTS_DEFER_STAT option is in effect, don't stat any entry
+          in this loop.  This is an attempt to minimize the interval
+          between the initial stat/lstat/fstatat and the point at which
+          a directory argument is first opened.  This matters for any
+          directory command line argument that resides on a file system
+          without genuine i-nodes.  If you specify FTS_DEFER_STAT along
+          with a comparison function, that function must not access any
+          data via the fts_statp pointer.  */
+       defer_stat = (compar == NULL || ISSET(FTS_DEFER_STAT));
+
        /* Allocate/initialize root(s). */
        for (root = NULL, nitems = 0; *argv != NULL; ++argv, ++nitems) {
                /* Don't allow zero-length file names. */
@@ -353,11 +396,12 @@
                p->fts_level = FTS_ROOTLEVEL;
                p->fts_parent = parent;
                p->fts_accpath = p->fts_name;
-               p->fts_info = fts_stat(sp, p, ISSET(FTS_COMFOLLOW) != 0);
-
-               /* Command-line "." and ".." are real directories. */
-               if (p->fts_info == FTS_DOT)
-                       p->fts_info = FTS_D;
+               if (defer_stat) {
+                       p->fts_info = FTS_NSOK;
+                       fts_set_stat_required(p, true);
+               } else {
+                       p->fts_info = fts_stat(sp, p, false);
+               }

                /*
                 * If comparison routine supplied, traverse in sorted
@@ -645,6 +689,19 @@
                *t++ = '/';
                memmove(t, p->fts_name, p->fts_namelen + 1);
 check_for_dir:
+               if (p->fts_info == FTS_NSOK)
+                 {
+                   switch (p->fts_statp->st_size)
+                     {
+                     case FTS_STAT_REQUIRED:
+                       p->fts_info = fts_stat(sp, p, false);
+                       break;
+                     case FTS_NO_STAT_REQUIRED:
+                       break;
+                     default:
+                       abort ();
+                     }
+                 }
                sp->fts_cur = p;
                if (p->fts_info == FTS_D)
                  {
@@ -672,6 +729,9 @@
                return (sp->fts_cur = NULL);
        }

+       if (p->fts_info == FTS_NSOK)
+         abort ();
+
        /* NUL terminate the file name.  */
        sp->fts_path[p->fts_pathlen] = '\0';

@@ -862,6 +922,11 @@
                }
                return (NULL);
        }
+       /* Rather than calling fts_stat for each and every entry encountered
+         in the readdir loop (below), stat each directory only right after
+         opening it.  */
+       if (cur->fts_info == FTS_NSOK)
+        cur->fts_info = fts_stat(sp, cur, false);

        /*
         * Nlinks is the number of possible entries of type directory in the
@@ -1004,12 +1069,38 @@
                        memmove(cp, p->fts_name, p->fts_namelen + 1);
                } else
                        p->fts_accpath = p->fts_name;
-               /* Stat it. */
-               p->fts_info = fts_stat(sp, p, false);
+
+               bool is_dir;
+               if (sp->fts_compar == NULL || ISSET(FTS_DEFER_STAT)) {
+                       /* Record what fts_read will have to do with this
+                          entry. In many cases, it will simply fts_stat it,
+                          but we can take advantage of any d_type information
+                          to optimize away the unnecessary stat calls.  I.e.,
+                          if FTS_NOSTAT is in effect and we're not following
+                          symlinks (FTS_PHYSICAL) and d_type indicates this
+                          is *not* a directory, then we won't have to stat it
+                          at all.  If it *is* a directory, then (currently)
+                          we stat it regardless, in order to get device and
+                          inode numbers.  Some day we might optimize that
+                          away, too, for directories where d_ino is known to
+                          be valid.  */
+                       bool skip_stat = (ISSET(FTS_PHYSICAL)
+                                         && ISSET(FTS_NOSTAT)
+                                         && DT_IS_KNOWN(dp)
+                                         && ! DT_MUST_BE(dp, DT_DIR));
+                       p->fts_info = FTS_NSOK;
+                       fts_set_stat_required(p, !skip_stat);
+                       is_dir = (ISSET(FTS_PHYSICAL) && ISSET(FTS_NOSTAT)
+                                 && DT_MUST_BE(dp, DT_DIR));
+               } else {
+                       p->fts_info = fts_stat(sp, p, false);
+                       is_dir = (p->fts_info == FTS_D
+                                 || p->fts_info == FTS_DC
+                                 || p->fts_info == FTS_DOT);
+               }

                /* Decrement link count if applicable. */
-               if (nlinks > 0 && (p->fts_info == FTS_D ||
-                   p->fts_info == FTS_DC || p->fts_info == FTS_DOT))
+               if (nlinks > 0 && is_dir)
                        nlinks -= nostat;

                /* We walk in directory order so "ls -f" doesn't get upset. */
@@ -1143,6 +1234,9 @@
        struct stat *sbp = p->fts_statp;
        int saved_errno;

+       if (p->fts_level == FTS_ROOTLEVEL && ISSET(FTS_COMFOLLOW))
+               follow = true;
+
 #if defined FTS_WHITEOUT && 0
        /* check for whiteout */
        if (p->fts_flags & FTS_ISW) {
@@ -1176,8 +1270,10 @@
        }

        if (S_ISDIR(sbp->st_mode)) {
-               if (ISDOT(p->fts_name))
-                       return (FTS_DOT);
+               if (ISDOT(p->fts_name)) {
+                       /* Command-line "." and ".." are real directories. */
+                       return (p->fts_level == FTS_ROOTLEVEL ? FTS_D : 
FTS_DOT);
+               }

 #if _LGPL_PACKAGE
                {
Index: modules/fts
===================================================================
RCS file: /sources/gnulib/gnulib/modules/fts,v
retrieving revision 1.7
diff -u -r1.7 fts
--- modules/fts 28 Aug 2006 22:59:17 -0000      1.7
+++ modules/fts 12 Oct 2006 10:22:23 -0000
@@ -9,13 +9,14 @@

 Depends-on:
 cycle-check
+d-type
 dirfd
 fcntl
+fcntl-safer
 hash
 lstat
 openat
 stdbool
-fcntl-safer
 unistd-safer

 configure.ac:




reply via email to

[Prev in Thread] Current Thread [Next in Thread]