qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Qemu-devel] [PATCH 1/3] Refactor AIO interface to allow other AIO imple


From: Anthony Liguori
Subject: [Qemu-devel] [PATCH 1/3] Refactor AIO interface to allow other AIO implementations
Date: Thu, 17 Apr 2008 14:26:50 -0500

Posix AIO, especially as used by QEMU, is not very efficient for disk IO.
This patch introduces an AIO abstract to allow multiple AIO implements to be
used.  We can't simply replace posix-aio by linux-aio because linux-aio only
works on some filesystems and only with files opened with O_DIRECT.

This patch adds a command line option (-aio) to select the AIO implementation
to be used.  It avoids code motion to allow for easy review.  The next patch
separates out the posix-aio implementation.

Signed-off-by: Anthony Liguori <address@hidden>

diff --git a/block-aio.h b/block-aio.h
new file mode 100644
index 0000000..2fe8c58
--- /dev/null
+++ b/block-aio.h
@@ -0,0 +1,46 @@
+/*
+ * QEMU Block AIO API
+ *
+ * Copyright IBM, Corp. 2008
+ *
+ * Authors:
+ *  Anthony Liguori   <address@hidden>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.  See
+ * the COPYING file in the top-level directory.
+ *
+ */
+
+#ifndef QEMU_AIO_H
+#define QEMU_AIO_H
+
+#include "qemu-common.h"
+#include "block.h"
+
+typedef struct AIODriver
+{
+    const char *name;
+    size_t aiocb_size;
+    void (*aio_init)(void);
+    void (*aio_wait_start)(void);
+    void (*aio_wait)(void);
+    void (*aio_wait_end)(void);
+    void (*aio_flush)(void);
+    BlockDriverAIOCB *(*aio_submit)(BlockDriverState *bs, int fd,
+                                   int64_t sector_num, void *buf,
+                                   int sectors, int write,
+                                   BlockDriverCompletionFunc *cb,
+                                   void *opaque);
+    void (*aio_cancel)(BlockDriverAIOCB *aiocb);
+    struct AIODriver *next;
+} AIODriver;
+
+int qemu_register_aio(AIODriver *drv);
+
+int qemu_set_aio_driver(const char *name);
+
+extern AIODriver *aio_drv;
+
+int posix_aio_init(void);
+
+#endif
diff --git a/block-raw-posix.c b/block-raw-posix.c
index 6b0009e..fee8422 100644
--- a/block-raw-posix.c
+++ b/block-raw-posix.c
@@ -27,6 +27,7 @@
 #include "exec-all.h"
 #endif
 #include "block_int.h"
+#include "block-aio.h"
 #include <assert.h>
 #include <aio.h>
 
@@ -243,6 +244,11 @@ static int aio_sig_num = SIGUSR2;
 static RawAIOCB *first_aio; /* AIO issued */
 static int aio_initialized = 0;
 
+static void pa_poll(void *opaque);
+static void pa_wait_start(void);
+static void pa_wait(void);
+static void pa_wait_end(void);
+
 static void aio_signal_handler(int signum)
 {
 #ifndef QEMU_IMG
@@ -259,11 +265,13 @@ static void aio_signal_handler(int signum)
 #endif
 }
 
-void qemu_aio_init(void)
+static void pa_init(void)
 {
     struct sigaction act;
 
-    aio_initialized = 1;
+#ifndef QEMU_IMG
+    qemu_register_poll(pa_poll, NULL);
+#endif
 
     sigfillset(&act.sa_mask);
     act.sa_flags = 0; /* do not restart syscalls to interrupt select() */
@@ -284,7 +292,7 @@ void qemu_aio_init(void)
 #endif
 }
 
-void qemu_aio_poll(void)
+static void pa_poll(void *opaque)
 {
     RawAIOCB *acb, **pacb;
     int ret;
@@ -326,31 +334,29 @@ void qemu_aio_poll(void)
 }
 
 /* Wait for all IO requests to complete.  */
-void qemu_aio_flush(void)
+static void pa_flush(void)
 {
-    qemu_aio_wait_start();
-    qemu_aio_poll();
+    pa_wait_start();
+    pa_poll(NULL);
     while (first_aio) {
-        qemu_aio_wait();
+        pa_wait();
     }
-    qemu_aio_wait_end();
+    pa_wait_end();
 }
 
 /* wait until at least one AIO was handled */
 static sigset_t wait_oset;
 
-void qemu_aio_wait_start(void)
+static void pa_wait_start(void)
 {
     sigset_t set;
 
-    if (!aio_initialized)
-        qemu_aio_init();
     sigemptyset(&set);
     sigaddset(&set, aio_sig_num);
     sigprocmask(SIG_BLOCK, &set, &wait_oset);
 }
 
-void qemu_aio_wait(void)
+static void pa_wait(void)
 {
     sigset_t set;
     int nb_sigs;
@@ -362,19 +368,18 @@ void qemu_aio_wait(void)
     sigemptyset(&set);
     sigaddset(&set, aio_sig_num);
     sigwait(&set, &nb_sigs);
-    qemu_aio_poll();
+    pa_poll(NULL);
 }
 
-void qemu_aio_wait_end(void)
+static void pa_wait_end(void)
 {
     sigprocmask(SIG_SETMASK, &wait_oset, NULL);
 }
 
-static RawAIOCB *raw_aio_setup(BlockDriverState *bs,
+static RawAIOCB *raw_aio_setup(BlockDriverState *bs, int fd,
         int64_t sector_num, uint8_t *buf, int nb_sectors,
         BlockDriverCompletionFunc *cb, void *opaque)
 {
-    BDRVRawState *s = bs->opaque;
     RawAIOCB *acb;
 
     if (fd_open(bs) < 0)
@@ -383,7 +388,7 @@ static RawAIOCB *raw_aio_setup(BlockDriverState *bs,
     acb = qemu_aio_get(bs, cb, opaque);
     if (!acb)
         return NULL;
-    acb->aiocb.aio_fildes = s->fd;
+    acb->aiocb.aio_fildes = fd;
     acb->aiocb.aio_sigevent.sigev_signo = aio_sig_num;
     acb->aiocb.aio_sigevent.sigev_notify = SIGEV_SIGNAL;
     acb->aiocb.aio_buf = buf;
@@ -397,39 +402,32 @@ static RawAIOCB *raw_aio_setup(BlockDriverState *bs,
     return acb;
 }
 
-static BlockDriverAIOCB *raw_aio_read(BlockDriverState *bs,
-        int64_t sector_num, uint8_t *buf, int nb_sectors,
-        BlockDriverCompletionFunc *cb, void *opaque)
+static BlockDriverAIOCB *pa_submit(BlockDriverState *bs,
+                                  int fd, int64_t sector_num,
+                                  void *buf, int nb_sectors, int write,
+                                  BlockDriverCompletionFunc *cb,
+                                  void *opaque)
 {
     RawAIOCB *acb;
+    int err;
 
-    acb = raw_aio_setup(bs, sector_num, buf, nb_sectors, cb, opaque);
+    acb = raw_aio_setup(bs, fd, sector_num, buf, nb_sectors, cb, opaque);
     if (!acb)
         return NULL;
-    if (aio_read(&acb->aiocb) < 0) {
-        qemu_aio_release(acb);
-        return NULL;
-    }
-    return &acb->common;
-}
 
-static BlockDriverAIOCB *raw_aio_write(BlockDriverState *bs,
-        int64_t sector_num, const uint8_t *buf, int nb_sectors,
-        BlockDriverCompletionFunc *cb, void *opaque)
-{
-    RawAIOCB *acb;
+    if (write) 
+       err = aio_write(&acb->aiocb);
+    else
+       err = aio_read(&acb->aiocb);
 
-    acb = raw_aio_setup(bs, sector_num, (uint8_t*)buf, nb_sectors, cb, opaque);
-    if (!acb)
-        return NULL;
-    if (aio_write(&acb->aiocb) < 0) {
+    if (err < 0) {
         qemu_aio_release(acb);
         return NULL;
     }
     return &acb->common;
 }
 
-static void raw_aio_cancel(BlockDriverAIOCB *blockacb)
+static void pa_cancel(BlockDriverAIOCB *blockacb)
 {
     int ret;
     RawAIOCB *acb = (RawAIOCB *)blockacb;
@@ -456,6 +454,91 @@ static void raw_aio_cancel(BlockDriverAIOCB *blockacb)
     }
 }
 
+static AIODriver posix_aio_drv = {
+    .name = "posix",
+    .aiocb_size = sizeof(RawAIOCB),
+    .aio_init = pa_init,
+    .aio_wait_start = pa_wait_start,
+    .aio_wait = pa_wait,
+    .aio_wait_end = pa_wait_end,
+    .aio_flush = pa_flush,
+    .aio_submit = pa_submit,
+    .aio_cancel = pa_cancel,
+};
+
+int posix_aio_init(void)
+{
+    return qemu_register_aio(&posix_aio_drv);
+}
+       
+void qemu_aio_init(void)
+{
+    if (aio_initialized)
+       return;
+ 
+    aio_initialized = 1;
+    bdrv_host_device.aiocb_size = aio_drv->aiocb_size;
+    bdrv_raw.aiocb_size = aio_drv->aiocb_size;
+    if (aio_drv->aio_init)
+       aio_drv->aio_init();
+}
+
+void qemu_aio_flush(void)
+{
+    qemu_aio_init();
+    aio_drv->aio_flush();
+}
+
+void qemu_aio_wait_start(void)
+{
+    qemu_aio_init();
+    if (aio_drv->aio_wait_start)
+       aio_drv->aio_wait_start();
+}
+
+void qemu_aio_wait(void)
+{
+    qemu_aio_init();
+    aio_drv->aio_wait();
+}
+
+void qemu_aio_wait_end(void)
+{
+    if (aio_drv->aio_wait_end)
+       aio_drv->aio_wait_end();
+}
+ 
+static BlockDriverAIOCB *raw_aio_read(BlockDriverState *bs,
+        int64_t sector_num, uint8_t *buf, int nb_sectors,
+        BlockDriverCompletionFunc *cb, void *opaque)
+{
+    BDRVRawState *s = bs->opaque;
+
+    if (fd_open(bs) < 0)
+       return NULL;
+
+    return aio_drv->aio_submit(bs, s->fd, sector_num, buf, nb_sectors, 0,
+                              cb, opaque);
+}
+
+static BlockDriverAIOCB *raw_aio_write(BlockDriverState *bs,
+        int64_t sector_num, const uint8_t *buf, int nb_sectors,
+        BlockDriverCompletionFunc *cb, void *opaque)
+{
+    BDRVRawState *s = bs->opaque;
+
+    if (fd_open(bs) < 0)
+       return NULL;
+
+    return aio_drv->aio_submit(bs, s->fd, sector_num, (void *)buf, nb_sectors,
+                              1, cb, opaque);
+}
+
+static void raw_aio_cancel(BlockDriverAIOCB *blockacb)
+{
+    aio_drv->aio_cancel(blockacb);
+}
+ 
 static void raw_close(BlockDriverState *bs)
 {
     BDRVRawState *s = bs->opaque;
@@ -559,7 +642,6 @@ BlockDriver bdrv_raw = {
     .bdrv_aio_read = raw_aio_read,
     .bdrv_aio_write = raw_aio_write,
     .bdrv_aio_cancel = raw_aio_cancel,
-    .aiocb_size = sizeof(RawAIOCB),
     .protocol_name = "file",
     .bdrv_pread = raw_pread,
     .bdrv_pwrite = raw_pwrite,
@@ -911,7 +993,6 @@ BlockDriver bdrv_host_device = {
     .bdrv_aio_read = raw_aio_read,
     .bdrv_aio_write = raw_aio_write,
     .bdrv_aio_cancel = raw_aio_cancel,
-    .aiocb_size = sizeof(RawAIOCB),
     .bdrv_pread = raw_pread,
     .bdrv_pwrite = raw_pwrite,
     .bdrv_getlength = raw_getlength,
diff --git a/block-raw-win32.c b/block-raw-win32.c
index 43d3f6c..6b40a27 100644
--- a/block-raw-win32.c
+++ b/block-raw-win32.c
@@ -350,10 +350,6 @@ void qemu_aio_init(void)
 {
 }
 
-void qemu_aio_poll(void)
-{
-}
-
 void qemu_aio_flush(void)
 {
 }
diff --git a/block.c b/block.c
index eb610e0..44cb747 100644
--- a/block.c
+++ b/block.c
@@ -26,6 +26,7 @@
 #include "console.h"
 #endif
 #include "block_int.h"
+#include "block-aio.h"
 
 #ifdef _BSD
 #include <sys/types.h>
@@ -1347,6 +1348,9 @@ void bdrv_init(void)
     bdrv_register(&bdrv_vvfat);
     bdrv_register(&bdrv_qcow2);
     bdrv_register(&bdrv_parallels);
+#ifndef _WIN32
+    posix_aio_init();
+#endif
 }
 
 void *qemu_aio_get(BlockDriverState *bs, BlockDriverCompletionFunc *cb,
@@ -1378,6 +1382,40 @@ void qemu_aio_release(void *p)
     drv->free_aiocb = acb;
 }
 
+static AIODriver *aio_driver_list;
+AIODriver *aio_drv;
+
+int qemu_register_aio(AIODriver *drv)
+{
+    drv->next = aio_driver_list;
+    aio_driver_list = drv;
+    aio_drv = aio_driver_list;
+
+    return 0;
+}
+
+int qemu_set_aio_driver(const char *name)
+{
+    AIODriver *drv;
+
+    if (!strcmp(name, "?")) {
+       printf("Available aio drivers:\n");
+       for (drv = aio_driver_list; drv; drv = drv->next) {
+           printf("%s\n", drv->name);
+       }
+       exit(0);
+    }
+
+    for (drv = aio_driver_list; drv; drv = drv->next) {
+       if (!strcmp(name, drv->name))
+           break;
+    }
+
+    aio_drv = drv;
+
+    return 0;
+}
+
 /**************************************************************/
 /* removable device support */
 
diff --git a/block.h b/block.h
index 9d30db2..ff19425 100644
--- a/block.h
+++ b/block.h
@@ -94,7 +94,6 @@ BlockDriverAIOCB *bdrv_aio_write(BlockDriverState *bs, 
int64_t sector_num,
 void bdrv_aio_cancel(BlockDriverAIOCB *acb);
 
 void qemu_aio_init(void);
-void qemu_aio_poll(void);
 void qemu_aio_flush(void);
 void qemu_aio_wait_start(void);
 void qemu_aio_wait(void);
diff --git a/sysemu.h b/sysemu.h
index 0078190..9931139 100644
--- a/sysemu.h
+++ b/sysemu.h
@@ -41,6 +41,8 @@ void qemu_system_powerdown(void);
 #endif
 void qemu_system_reset(void);
 
+void qemu_register_poll(IOHandler *poll, void *opaque);
+
 void cpu_save(QEMUFile *f, void *opaque);
 int cpu_load(QEMUFile *f, void *opaque, int version_id);
 
diff --git a/vl.c b/vl.c
index cc328b0..cebcdc3 100644
--- a/vl.c
+++ b/vl.c
@@ -36,6 +36,7 @@
 #include "qemu-timer.h"
 #include "qemu-char.h"
 #include "block.h"
+#include "block-aio.h"
 #include "audio/audio.h"
 #include "balloon.h"
 
@@ -7371,6 +7372,33 @@ void qemu_bh_delete(QEMUBH *bh)
     qemu_free(bh);
 }
 
+ /***********************************************************/
+/* poll handlers */
+
+typedef struct PollHandler
+{
+    IOHandler *func;
+    void *opaque;
+    struct PollHandler *next;
+} PollHandler;
+
+static PollHandler *poll_handlers;
+
+void qemu_register_poll(IOHandler *poll, void *opaque)
+{
+    PollHandler *p;
+
+    p = qemu_mallocz(sizeof(*p));
+    if (p == NULL)
+       return;
+
+    p->func = poll;
+    p->opaque = opaque;
+    p->next = poll_handlers;
+
+    poll_handlers = p;
+}
+
 /***********************************************************/
 /* machine registration */
 
@@ -7689,7 +7717,12 @@ void main_loop_wait(int timeout)
         slirp_select_poll(&rfds, &wfds, &xfds);
     }
 #endif
-    qemu_aio_poll();
+    if (poll_handlers) {
+       PollHandler *poll;
+
+       for (poll = poll_handlers; poll; poll = poll->next)
+           poll->func(poll->opaque);
+    }
 
     if (vm_running) {
         qemu_run_timers(&active_timers[QEMU_TIMER_VIRTUAL],
@@ -7928,6 +7961,8 @@ static void help(int exitcode)
            "-clock          force the use of the given methods for timer 
alarm.\n"
            "                To see what timers are available use -clock ?\n"
            "-startdate      select initial date of the clock\n"
+          "-aio string     Force aio type `string'\n"
+          "                Use -aio ? to see available aio types.\n"
            "\n"
            "During emulation, the following keys are useful:\n"
            "ctrl-alt-f      toggle full screen\n"
@@ -8031,6 +8066,7 @@ enum {
     QEMU_OPTION_old_param,
     QEMU_OPTION_clock,
     QEMU_OPTION_startdate,
+    QEMU_OPTION_aio,
 };
 
 typedef struct QEMUOption {
@@ -8142,6 +8178,7 @@ const QEMUOption qemu_options[] = {
 #endif
     { "clock", HAS_ARG, QEMU_OPTION_clock },
     { "startdate", HAS_ARG, QEMU_OPTION_startdate },
+    { "aio", HAS_ARG, QEMU_OPTION_aio },
     { NULL },
 };
 
@@ -8417,6 +8454,7 @@ int main(int argc, char **argv)
     int fds[2];
     const char *pid_file = NULL;
     VLANState *vlan;
+    const char *aio_opt = NULL;
 
     LIST_INIT (&vm_change_state_head);
 #ifndef _WIN32
@@ -8991,6 +9029,9 @@ int main(int argc, char **argv)
                     }
                 }
                 break;
+           case QEMU_OPTION_aio:
+               aio_opt = optarg;
+               break;
             }
         }
     }
@@ -9075,7 +9116,6 @@ int main(int argc, char **argv)
 
     init_timers();
     init_timer_alarm();
-    qemu_aio_init();
 
 #ifdef _WIN32
     socket_init();
@@ -9146,6 +9186,11 @@ int main(int argc, char **argv)
 
     bdrv_init();
 
+    if (aio_opt)
+       qemu_set_aio_driver(aio_opt);
+
+    qemu_aio_init();
+
     /* we always create the cdrom drive, even if no disk is there */
 
     if (nb_drives_opt < MAX_DRIVES)




reply via email to

[Prev in Thread] Current Thread [Next in Thread]