qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Qemu-devel] [PATCH 2/3] Move aio implementation out of raw block driver


From: Ryan Harper
Subject: [Qemu-devel] [PATCH 2/3] Move aio implementation out of raw block driver
Date: Mon, 22 Sep 2008 18:17:34 -0500

This patch adds a linux aio raw block driver implementation.  If a raw block
device is opened with cached=off (O_DIRECT) then we can utilize linux aio to
submit io to/from the block device.  Utilizing linux aio allows for multiple
outstanding requests to be in flight against the io device potentially providing
higher IO throughput.  This implementation uses eventfd for event completion
notification.

Block devices with cache enabled will utilize posix aio since linux aio will
fallback to synchronous IO when used without O_DIRECT[1].


Signed-off-by: Ryan Harper <address@hidden>

1. http://lse.sourceforge.net/io/aio.html

diff --git a/Makefile b/Makefile
index 18477ba..92ca5d9 100644
--- a/Makefile
+++ b/Makefile
@@ -60,7 +60,7 @@ BLOCK_OBJS += block-raw-posix.o
 endif
 
 ifdef CONFIG_AIO
-BLOCK_OBJS += compatfd.o aio-posix.o
+BLOCK_OBJS += compatfd.o aio-posix.o aio-linux.o
 endif
 
 ######################################################################
diff --git a/Makefile.target b/Makefile.target
index 4c6b3d5..599fa8a 100644
--- a/Makefile.target
+++ b/Makefile.target
@@ -482,7 +482,7 @@ OBJS+=block-raw-posix.o
 endif
 
 ifdef CONFIG_AIO
-OBJS+=compatfd.o aio-posix.o
+OBJS+=compatfd.o aio-posix.o aio-linux.o
 endif
 
 LIBS+=-lz
diff --git a/aio-linux.c b/aio-linux.c
new file mode 100644
index 0000000..0043fd1
--- /dev/null
+++ b/aio-linux.c
@@ -0,0 +1,225 @@
+/*
+ * QEMU Linux AIO implementation for Block Raw devices
+ *
+ * Copyright IBM, Corp. 2008
+ *
+ * Authors:
+ * Anthony Liguori <address@hidden>
+ * Ryan Harper <address@hidden>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2. See
+ * the COPYING file in the top-level directory.
+ *
+ */
+
+/***********************************************************/
+/* Unix AIO using LINUX AIO */
+
+#include "qemu-common.h"
+#include "block_int.h"
+#include "block.h"
+#include "block-aio.h"
+#include "compatfd.h"
+#include <sys/types.h>
+#include <sys/syscall.h>
+#include <linux/aio_abi.h>
+
+#define MAX_LINUX_AIO_EVENTS 256
+
+int eventfd(unsigned int initval)
+{
+    return syscall(SYS_eventfd, initval);
+}
+
+int io_setup(unsigned nr_reqs, aio_context_t *ctx_id)
+{
+    return syscall(SYS_io_setup, nr_reqs, ctx_id);
+}
+
+int io_destroy(aio_context_t ctx_id)
+{
+    return syscall(SYS_io_destroy, ctx_id);
+}
+
+int io_getevents(aio_context_t ctx_id, long min_nr, long nr,
+                 struct io_event *events, struct timespec *timeout)
+{
+    return syscall(SYS_io_getevents, ctx_id, min_nr, nr, events, timeout);
+}
+
+int io_submit(aio_context_t ctx_id, long nr, struct iocb **iocb)
+{
+    return syscall(SYS_io_submit, ctx_id, nr, iocb);
+}
+
+int io_cancel(aio_context_t ctx_id, struct iocb *iocb, struct io_event *result)
+{
+    return syscall(SYS_io_cancel, ctx_id, iocb, result);
+}
+
+typedef AIOState LinuxAioState;
+
+static int aio_efd;
+static aio_context_t aio_ctxt_id;
+static int outstanding_requests;
+static LinuxAioState *linux_aio_state;
+
+static RawAIOCB *la_submit(BlockDriverState *bs, int fd,
+                                   int64_t sector_num, uint8_t *buf,
+                                   int nb_sectors, int write,
+                                   BlockDriverCompletionFunc *cb, void *opaque)
+{
+    RawAIOCB *acb;
+    struct iocb *iocbs[1];
+    int err;
+
+    acb = qemu_aio_get(bs, cb, opaque);
+    if (!acb) {
+        fprintf(stderr, "%s: qemu_aio_get returned NULL!?!\n", __FUNCTION__);
+        return NULL;
+    }
+
+    if (write)
+        acb->linux_aiocb.aio_lio_opcode = IOCB_CMD_PWRITE;
+    else
+        acb->linux_aiocb.aio_lio_opcode = IOCB_CMD_PREAD;
+
+    acb->linux_aiocb.aio_data = (unsigned long)acb;
+    acb->linux_aiocb.aio_fildes = fd;
+    acb->linux_aiocb.aio_flags = IOCB_FLAG_RESFD;
+    acb->linux_aiocb.aio_resfd = aio_efd;
+    acb->linux_aiocb.aio_buf = (unsigned long)buf;
+    acb->linux_aiocb.aio_nbytes = nb_sectors * 512;
+    acb->linux_aiocb.aio_offset = sector_num * 512;
+
+    acb->next = linux_aio_state->first_aio;
+    linux_aio_state->first_aio = acb;
+
+    iocbs[0] = &acb->linux_aiocb;
+
+    do {
+        err = io_submit(aio_ctxt_id, 1, iocbs);
+    } while (err == -1 && errno == EINTR);
+
+    if (err != 1) {
+        qemu_aio_release(acb);
+        return NULL;
+    }
+
+    outstanding_requests++;
+
+    return acb;
+}
+
+static int la_flush(void)
+{
+    return outstanding_requests;
+}
+
+static void la_cancel(BlockDriverAIOCB *baiocb)
+{
+    RawAIOCB *acb = (void *)baiocb;
+    struct io_event result;
+    int err;
+
+    do {
+        err = io_cancel(aio_ctxt_id, &acb->linux_aiocb, &result);
+    } while (err == -1 && errno == EINTR);
+
+    /* it may have happened...  we probably should check and complete */
+
+    outstanding_requests--;
+
+    qemu_aio_release(acb);
+}
+
+static void la_completion(void *opaque)
+{
+    struct io_event events[MAX_LINUX_AIO_EVENTS];
+    struct timespec ts = {0, 0};
+    uint64_t count;
+    int i, ret;
+
+    BLPRINTF("%s ->\n", __FUNCTION__);
+    do {
+        ret = read(aio_efd, &count, sizeof(count));
+        if (ret == -1 && errno == EAGAIN) {
+            BLPRINTF("linux: got EAGAIN\n");
+            return;
+        }
+    } while (ret == -1 && errno == EINTR);
+
+    if (ret != 8) {
+        BLPRINTF("bad read from eventfd (ret=%d errno=%d)\n", ret, errno);
+        exit(1);
+    }
+
+    BLPRINTF("%s: after fd read\n", __FUNCTION__);
+    BLPRINTF("%s: calling io_getevents, min=%lu events\n", __FUNCTION__, 
count);
+    do {
+        ret = io_getevents(aio_ctxt_id, count, ARRAY_SIZE(events),
+                           events, &ts);
+    } while (ret == -1 && errno == EINTR);
+
+    if (ret < 0) {
+        BLPRINTF("io_getevents failed: %d %m\n", ret);
+        exit(1);
+    }
+
+    for (i = 0; i < ret; i++) {
+        RawAIOCB *acb;
+        int res;
+
+        acb = (RawAIOCB *)(unsigned long)events[i].data;
+        res = events[i].res;
+
+        if (res > 0)
+            res = 0;
+
+        acb->common.cb(acb->common.opaque, res);
+        qemu_aio_release(acb);
+
+        outstanding_requests--;
+    }
+    BLPRINTF("%s <-\n", __FUNCTION__);
+}
+
+static int la_init(void)
+{
+    LinuxAioState *s;
+
+    if (linux_aio_state)
+        return 0;
+
+    s = qemu_malloc(sizeof(LinuxAioState));
+    if (s == NULL)
+        return -ENOMEM;
+
+    /* setup eventfd and init linux aio context, register fd handler */
+    aio_efd = eventfd(0);
+    io_setup(MAX_LINUX_AIO_EVENTS, &aio_ctxt_id);
+    s->first_aio = NULL;
+    s->fd = aio_efd;
+
+    /* switch to non-blocking eventfd mode */
+    fcntl(aio_efd, F_SETFL, O_NONBLOCK);
+
+    qemu_aio_set_fd_handler(aio_efd, la_completion, NULL, la_flush, NULL);
+
+    linux_aio_state = s;
+
+    return 0;
+}
+
+static AIODriver linux_aio_drv = {
+    .name = "linux",
+    .submit = la_submit,
+    .cancel = la_cancel,
+    .flush = la_flush,
+};
+
+AIODriver *linux_aio_init(void) {
+    if (la_init() != 0)
+        return NULL;
+    return &linux_aio_drv;
+}
diff --git a/block-aio.h b/block-aio.h
index b8597d0..b1492d9 100644
--- a/block-aio.h
+++ b/block-aio.h
@@ -21,6 +21,7 @@
 #include "qemu-aio.h"
 #ifdef CONFIG_AIO
 #include <aio.h>
+#include <linux/aio_abi.h>
 #endif
 
 //#define DEBUG_BLOCK_AIO
@@ -33,6 +34,7 @@
 typedef struct RawAIOCB {
     BlockDriverAIOCB common;
     struct aiocb posix_aiocb;
+    struct iocb linux_aiocb;
     struct RawAIOCB *next;
     int ret;
 } RawAIOCB;
@@ -75,4 +77,5 @@ typedef struct AIOState
  
 AIODriver* posix_aio_init(void);
 
+AIODriver* linux_aio_init(void);
 #endif /* QEMU_BLOCK_AIO_H */
diff --git a/block-raw-posix.c b/block-raw-posix.c
index cab7094..80034ac 100644
--- a/block-raw-posix.c
+++ b/block-raw-posix.c
@@ -125,8 +125,11 @@ static int raw_open(BlockDriverState *bs, const char 
*filename, int flags)
         }
     }
 #endif
-    /* init aio driver for this block device */
-    s->aio_dvr = posix_aio_init();
+    /* init aio driver for this block device, linux if O_DIRECT is enabled */
+    if (flags & BDRV_O_DIRECT)
+        s->aio_dvr = linux_aio_init();
+    else
+        s->aio_dvr = posix_aio_init();
     return 0;
 }
 
@@ -756,8 +759,11 @@ static int hdev_open(BlockDriverState *bs, const char 
*filename, int flags)
         s->fd_media_changed = 1;
     }
 #endif
-    /* init aio driver for this block device */
-    s->aio_dvr = posix_aio_init();
+    /* init aio driver for this block device, linux if O_DIRECT is enabled */
+    if (flags & BDRV_O_DIRECT)
+        s->aio_dvr = linux_aio_init();
+    else
+        s->aio_dvr = posix_aio_init();
     return 0;
 }
 




reply via email to

[Prev in Thread] Current Thread [Next in Thread]