qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Qemu-devel] [PATCH] raw-posix: Linearize direct I/O on Linux NFS


From: Stefan Hajnoczi
Subject: [Qemu-devel] [PATCH] raw-posix: Linearize direct I/O on Linux NFS
Date: Fri, 15 Apr 2011 14:40:55 +0100

The Linux NFS client issues separate NFS requests for vectored direct
I/O writes.  For example, a pwritev() with 8 elements results in 8 write
requests to the server.  This is very inefficient and a kernel-side fix
is not trivial or likely to be available soon.

This patch detects files on NFS and uses the QEMU_AIO_MISALIGNED flag to
force requests to bounce through a linear buffer.

Khoa Huynh <address@hidden> reports the following ffsb benchmark
results over 1 Gbit Ethernet:

Test (threads=8)               unpatched patched
                                  (MB/s)  (MB/s)
Large File Creates (bs=256 KB)      20.5   112.0
Sequential Reads (bs=256 KB)        58.7   112.0
Large File Creates (bs=8 KB)         5.2     5.8
Sequential Reads (bs=8 KB)          46.7    80.9
Random Reads (bs=8 KB)               8.7    23.4
Random Writes (bs=8 KB)             39.6    44.0
Mail Server (bs=8 KB)               10.2    23.6

Test (threads=1)               unpatched patched
                                  (MB/s)  (MB/s)
Large File Creates (bs=256 KB)      14.5    49.8
Sequential Reads (bs=256 KB)        87.9    83.9
Large File Creates (bs=8 KB)         4.8     4.8
Sequential Reads (bs=8 KB)          23.2    23.1
Random Reads (bs=8 KB)               4.8     4.7
Random Writes (bs=8 KB)              9.4    12.8
Mail Server (bs=8 KB)                5.4     7.3

Signed-off-by: Stefan Hajnoczi <address@hidden>
---
 block/raw-posix.c |   55 ++++++++++++++++++++++++++++++++++++++++++++--------
 1 files changed, 46 insertions(+), 9 deletions(-)

diff --git a/block/raw-posix.c b/block/raw-posix.c
index 6b72470..40b7c61 100644
--- a/block/raw-posix.c
+++ b/block/raw-posix.c
@@ -49,8 +49,10 @@
 #ifdef __linux__
 #include <sys/ioctl.h>
 #include <sys/param.h>
+#include <sys/vfs.h>
 #include <linux/cdrom.h>
 #include <linux/fd.h>
+#include <linux/magic.h>
 #endif
 #if defined (__FreeBSD__) || defined(__FreeBSD_kernel__)
 #include <signal.h>
@@ -124,6 +126,7 @@ typedef struct BDRVRawState {
 #endif
     uint8_t *aligned_buf;
     unsigned aligned_buf_size;
+    bool force_linearize;
 #ifdef CONFIG_XFS
     bool is_xfs : 1;
 #endif
@@ -136,6 +139,32 @@ static int64_t raw_getlength(BlockDriverState *bs);
 static int cdrom_reopen(BlockDriverState *bs);
 #endif
 
+#if defined(__linux__)
+static bool is_vectored_io_slow(int fd, int open_flags)
+{
+    struct statfs stfs;
+    int ret;
+
+    do {
+        ret = fstatfs(fd, &stfs);
+    } while (ret != 0 && errno == EINTR);
+
+    /*
+     * Linux NFS client splits vectored direct I/O requests into separate NFS
+     * requests so it is faster to submit a single buffer instead.
+     */
+    if (!ret && stfs.f_type == NFS_SUPER_MAGIC && (open_flags & O_DIRECT)) {
+        return true;
+    }
+    return false;
+}
+#else /* !defined(__linux__) */
+static bool is_vectored_io_slow(int fd, int open_flags)
+{
+    return false;
+}
+#endif
+
 static int raw_open_common(BlockDriverState *bs, const char *filename,
                            int bdrv_flags, int open_flags)
 {
@@ -167,6 +196,7 @@ static int raw_open_common(BlockDriverState *bs, const char 
*filename,
     }
     s->fd = fd;
     s->aligned_buf = NULL;
+    s->force_linearize = is_vectored_io_slow(fd, s->open_flags);
 
     if ((bdrv_flags & BDRV_O_NOCACHE)) {
         /*
@@ -536,20 +566,27 @@ static BlockDriverAIOCB *raw_aio_submit(BlockDriverState 
*bs,
         return NULL;
 
     /*
+     * Check if buffers need to be copied into a single linear buffer.
+     */
+    if (s->force_linearize && qiov->niov > 1) {
+        type |= QEMU_AIO_MISALIGNED;
+    }
+
+    /*
      * If O_DIRECT is used the buffer needs to be aligned on a sector
-     * boundary.  Check if this is the case or telll the low-level
+     * boundary.  Check if this is the case or tell the low-level
      * driver that it needs to copy the buffer.
      */
-    if (s->aligned_buf) {
-        if (!qiov_is_aligned(bs, qiov)) {
-            type |= QEMU_AIO_MISALIGNED;
+    if (s->aligned_buf && !qiov_is_aligned(bs, qiov)) {
+        type |= QEMU_AIO_MISALIGNED;
+    }
+
 #ifdef CONFIG_LINUX_AIO
-        } else if (s->use_aio) {
-            return laio_submit(bs, s->aio_ctx, s->fd, sector_num, qiov,
-                               nb_sectors, cb, opaque, type);
-#endif
-        }
+    if (s->use_aio && (type & QEMU_AIO_MISALIGNED) == 0) {
+        return laio_submit(bs, s->aio_ctx, s->fd, sector_num, qiov,
+                           nb_sectors, cb, opaque, type);
     }
+#endif
 
     return paio_submit(bs, s->fd, sector_num, qiov, nb_sectors,
                        cb, opaque, type);
-- 
1.7.4.1




reply via email to

[Prev in Thread] Current Thread [Next in Thread]