bug-guix
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

bug#24937: "deleting unused links" GC phase is too slow


From: Ludovic Courtès
Subject: bug#24937: "deleting unused links" GC phase is too slow
Date: Sun, 11 Dec 2016 14:46:13 +0100
User-agent: Gnus/5.13 (Gnus v5.13) Emacs/25.1 (gnu/linux)

Hello!

Here’s a proposed patch that follows your suggestion, Mark, but places
an upper bound on the number of directory entries loaded in memory.

On my laptop, which has ~500k entries in /gnu/store/.links, the result
is something like this (notice the inode numbers in ‘lstat’ calls):

--8<---------------cut here---------------start------------->8---
13738 write(4, "gmlo\0\0\0\0\31\0\0\0\0\0\0\0deleting unused "..., 48) = 48
13738 open("/gnu/store/.links", O_RDONLY|O_NONBLOCK|O_DIRECTORY|O_CLOEXEC) = 8
13738 fstat(8, {st_dev=makedev(8, 2), st_ino=4014083, st_mode=S_IFDIR|0755, 
st_nlink=2, st_uid=0, st_gid=0, st_blksize=4096, st_blocks=119224, 
st_size=60977152, st_atime=2016/12/11-12:01:59, st_mtime=2016/12/11-09:39:45, 
st_ctime=2016/12/11-09:39:45}) = 0
13738 getdents(8, {{d_ino=4014083, d_off=4294967296, d_reclen=24, d_name="."}
[...]
13738 
lstat("/gnu/store/.links/1f2f3g8waxwymp9sl2slcfyara164i8w1y2sz3h9js2fcviv2rnc", 
{st_dev=makedev(8, 2), st_ino=47, st_mode=S_IFREG|0444, st_nlink=2, st_uid=0, 
st_gid=0, st_blksize=4096, st_blocks=88, st_size=41482, 
st_atime=2015/03/10-11:29:06, st_mtime=1970/01/01-01:00:01, 
st_ctime=2015/11/25-11:29:20}) = 0
13738 
lstat("/gnu/store/.links/1p25kpyfw354in4kykmgh5sy9h925hnil1jdzgxhz7n6abbws8px", 
{st_dev=makedev(8, 2), st_ino=65, st_mode=S_IFREG|0444, st_nlink=9, st_uid=0, 
st_gid=0, st_blksize=4096, st_blocks=8, st_size=2313, 
st_atime=2016/12/08-21:02:26, st_mtime=1970/01/01-01:00:01, 
st_ctime=2016/12/11-01:44:49}) = 0
13738 
lstat("/gnu/store/.links/163187g637br9ys5pmshb01wjav53bs1g1a83m7c2alpdyx3yqz2", 
{st_dev=makedev(8, 2), st_ino=68, st_mode=S_IFREG|0444, st_nlink=2, st_uid=0, 
st_gid=0, st_blksize=4096, st_blocks=32, st_size=13561, 
st_atime=2015/03/10-11:29:06, st_mtime=1970/01/01-01:00:01, 
st_ctime=2015/11/25-11:29:20}) = 0
[...]
13738 getdents(8, {{d_ino=4257114, d_off=1734093409898198492,
[...]
13738 
lstat("/gnu/store/.links/1m6g06i01ybbkhjjbirjnj7fckw1b772cwygkvbd6v6zgkln7f7m", 
{st_dev=makedev(8, 2), st_ino=19, st_mode=S_IFREG|0444, st_nlink=4, st_uid=0, 
st_gid=0, st_blksize=4096, st_blocks=8, st_size=2553, 
st_atime=2016/12/08-21:02:54, st_mtime=1970/01/01-01:00:01, 
st_ctime=2016/12/07-00:05:19}) = 0
13738 
lstat("/gnu/store/.links/1ml8n3q55ikn8h60sn67jq1y7z7mvdp5kwr33pqrid2r6kk1d4kb", 
{st_dev=makedev(8, 2), st_ino=30, st_mode=S_IFREG|0444, st_nlink=2, st_uid=0, 
st_gid=0, st_blksize=4096, st_blocks=8, st_size=2090, 
st_atime=2015/03/10-11:29:06, st_mtime=1970/01/01-01:00:01, 
st_ctime=2015/11/25-11:29:21}) = 0
13738 
lstat("/gnu/store/.links/1c8cvwlqyyqby3k13cwm40g26pwca5iiz5dcj43xrgn9y91lfvc2", 
{st_dev=makedev(8, 2), st_ino=33, st_mode=S_IFREG|0444, st_nlink=2, st_uid=0, 
st_gid=0, st_blksize=4096, st_blocks=16, st_size=7958, 
st_atime=2015/11/04-18:55:32, st_mtime=1970/01/01-01:00:01, 
st_ctime=2016/01/05-11:35:49}) = 0
[...]
13738 getdents(8, {{d_ino=328672,
[...]
13738 
lstat("/gnu/store/.links/1l55l59dxb74himmkfzx5v63cv7287i6rjhdns1fdlwajqd73lnz", 
{st_dev=makedev(8, 2), st_ino=21, st_mode=S_IFREG|0444, st_nlink=31, st_uid=0, 
st_gid=0, st_blksize=4096, st_blocks=8, st_size=615, 
st_atime=2016/12/08-21:02:30, st_mtime=1970/01/01-01:00:01, 
st_ctime=2016/12/11-01:44:47}) = 0
13738 
lstat("/gnu/store/.links/1c7mm5amw743mb45f1zg4d4r3g549ch35wks9izkcgkx0jirpxsg", 
{st_dev=makedev(8, 2), st_ino=48, st_mode=S_IFREG|0444, st_nlink=2, st_uid=0, 
st_gid=0, st_blksize=4096, st_blocks=360, st_size=176750, 
st_atime=2015/03/10-11:29:06, st_mtime=1970/01/01-01:00:01, 
st_ctime=2015/11/25-11:29:20}) = 0
13738 
lstat("/gnu/store/.links/0z5s7b0yk8mfn1np6gk3cdbmpnjgxg1g0l8vfq1aa01zwp06d3f0", 
{st_dev=makedev(8, 2), st_ino=61, st_mode=S_IFREG|0444, st_nlink=2, st_uid=0, 
st_gid=0, st_blksize=4096, st_blocks=8, st_size=1440, 
st_atime=2016/11/03-20:37:50, st_mtime=1970/01/01-01:00:01, 
st_ctime=2016/11/07-00:01:57}) = 0
--8<---------------cut here---------------end--------------->8---

I can’t tell exactly how this affects performance because my machine has
an SSD where this operation takes ~3 seconds on a cold cache.  I suspect
it has performance comparable to that of doing all the ‘readdir’ at once
followed by all the ‘lstat’.

Mark, how does that sound?

I’d like to commit it soon if there are no objections.

Thanks,
Ludo’.

diff --git a/nix/libstore/gc.cc b/nix/libstore/gc.cc
index 72eff52..db58603 100644
--- a/nix/libstore/gc.cc
+++ b/nix/libstore/gc.cc
@@ -545,6 +545,9 @@ void LocalStore::tryToDelete(GCState & state, const Path & 
path)
 }
 
 
+/* Like 'dirent', but with just what we need.  */
+typedef std::pair<Path, ino_t> MiniDirEntry;
+
 /* Unlink all files in /nix/store/.links that have a link count of 1,
    which indicates that there are no other links and so they can be
    safely deleted.  FIXME: race condition with optimisePath(): we
@@ -555,32 +558,57 @@ void LocalStore::removeUnusedLinks(const GCState & state)
     AutoCloseDir dir = opendir(linksDir.c_str());
     if (!dir) throw SysError(format("opening directory `%1%'") % linksDir);
 
+    /* Maximum number of entries stored in memory; each 'MiniDirEntry' takes
+       ~80 bytes.  */
+    const size_t maxEntries = 100000;
+
     long long actualSize = 0, unsharedSize = 0;
 
-    struct dirent * dirent;
-    while (errno = 0, dirent = readdir(dir)) {
-        checkInterrupt();
-        string name = dirent->d_name;
-        if (name == "." || name == "..") continue;
-        Path path = linksDir + "/" + name;
-
-        struct stat st;
-        if (lstat(path.c_str(), &st) == -1)
-            throw SysError(format("statting `%1%'") % path);
-
-        if (st.st_nlink != 1) {
-            unsigned long long size = st.st_blocks * 512ULL;
-            actualSize += size;
-            unsharedSize += (st.st_nlink - 1) * size;
-            continue;
-        }
-
-        printMsg(lvlTalkative, format("deleting unused link `%1%'") % path);
-
-        if (unlink(path.c_str()) == -1)
-            throw SysError(format("deleting `%1%'") % path);
-
-        state.results.bytesFreed += st.st_blocks * 512;
+    bool endOfDir = false;
+
+    while (!endOfDir) {
+       /* Read as many entries as possible at once, up to 'maxEntries'.  */
+       std::list<MiniDirEntry> entries;
+       struct dirent * dirent;
+       while (errno = 0,
+              (entries.size() < maxEntries) && (dirent = readdir(dir))) {
+           checkInterrupt();
+           string name = dirent->d_name;
+           if (name == "." || name == "..") continue;
+           entries.emplace_back(MiniDirEntry(name, dirent->d_ino));
+       }
+
+       endOfDir = (dirent == NULL);
+
+       /* Sort entries by inode number to minimize disk seeks induced by the
+          following 'lstat' calls.  */
+       entries.sort([] (const MiniDirEntry& e1, const MiniDirEntry& e2) {
+               return e1.second < e2.second;
+           });
+
+       for (auto && item: entries) {
+           checkInterrupt();
+
+           Path path = linksDir + "/" + item.first;
+
+           struct stat st;
+           if (lstat(path.c_str(), &st) == -1)
+               throw SysError(format("statting `%1%'") % path);
+
+           if (st.st_nlink != 1) {
+               unsigned long long size = st.st_blocks * 512ULL;
+               actualSize += size;
+               unsharedSize += (st.st_nlink - 1) * size;
+               continue;
+           }
+
x+          printMsg(lvlTalkative, format("deleting unused link `%1%'") % path);
+
+           if (unlink(path.c_str()) == -1)
+               throw SysError(format("deleting `%1%'") % path);
+
+           state.results.bytesFreed += st.st_blocks * 512;
+       }
     }
 
     struct stat st;

reply via email to

[Prev in Thread] Current Thread [Next in Thread]