qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Qemu-devel] [RFC PATCH 2/3] raw-posix: Convert Linux AIO submission


From: Peter Lieven
Subject: Re: [Qemu-devel] [RFC PATCH 2/3] raw-posix: Convert Linux AIO submission to coroutines
Date: Thu, 27 Nov 2014 10:50:41 +0100
User-agent: Mozilla/5.0 (X11; Linux x86_64; rv:31.0) Gecko/20100101 Thunderbird/31.2.0

On 26.11.2014 15:46, Kevin Wolf wrote:
This improves the performance of requests because an ACB doesn't need to
be allocated on the heap any more. It also makes the code nicer and
smaller.

As a side effect, the codepath taken by aio=threads is changed to use
paio_submit_co(). This doesn't change the performance at this point.

Results of qemu-img bench -t none -c 10000000 [-n] /dev/loop0:

       |      aio=native       |     aio=threads
       | before   | with patch | before   | with patch
------+----------+------------+----------+------------
run 1 | 29.921s  | 26.932s    | 35.286s  | 35.447s
run 2 | 29.793s  | 26.252s    | 35.276s  | 35.111s
run 3 | 30.186s  | 27.114s    | 35.042s  | 34.921s
run 4 | 30.425s  | 26.600s    | 35.169s  | 34.968s
run 5 | 30.041s  | 26.263s    | 35.224s  | 35.000s

TODO: Do some more serious benchmarking in VMs with less variance.
Results of a quick fio run are vaguely positive.

I still see the main-loop spun warnings with this patches applied to master.
It wasn't there with the original patch from August.

~/git/qemu$ ./qemu-img bench -t none -c 10000000 -n /dev/ram1
Sending 10000000 requests, 4096 bytes each, 64 in parallel
main-loop: WARNING: I/O thread spun for 1000 iterations
Run completed in 31.947 seconds.

Peter


Signed-off-by: Kevin Wolf <address@hidden>
---
  block/linux-aio.c | 70 +++++++++++++++++--------------------------------------
  block/raw-aio.h   |  5 ++--
  block/raw-posix.c | 62 ++++++++++++++++++++++--------------------------
  3 files changed, 52 insertions(+), 85 deletions(-)

diff --git a/block/linux-aio.c b/block/linux-aio.c
index d92513b..99b259d 100644
--- a/block/linux-aio.c
+++ b/block/linux-aio.c
@@ -12,6 +12,7 @@
  #include "qemu/queue.h"
  #include "block/raw-aio.h"
  #include "qemu/event_notifier.h"
+#include "block/coroutine.h"
#include <libaio.h> @@ -28,7 +29,7 @@
  #define MAX_QUEUED_IO  128
struct qemu_laiocb {
-    BlockAIOCB common;
+    Coroutine *co;
      struct qemu_laio_state *ctx;
      struct iocb iocb;
      ssize_t ret;
@@ -86,9 +87,9 @@ static void qemu_laio_process_completion(struct 
qemu_laio_state *s,
              }
          }
      }
-    laiocb->common.cb(laiocb->common.opaque, ret);
- qemu_aio_unref(laiocb);
+    laiocb->ret = ret;
+    qemu_coroutine_enter(laiocb->co, NULL);
  }
/* The completion BH fetches completed I/O requests and invokes their
@@ -146,30 +147,6 @@ static void qemu_laio_completion_cb(EventNotifier *e)
      }
  }
-static void laio_cancel(BlockAIOCB *blockacb)
-{
-    struct qemu_laiocb *laiocb = (struct qemu_laiocb *)blockacb;
-    struct io_event event;
-    int ret;
-
-    if (laiocb->ret != -EINPROGRESS) {
-        return;
-    }
-    ret = io_cancel(laiocb->ctx->ctx, &laiocb->iocb, &event);
-    laiocb->ret = -ECANCELED;
-    if (ret != 0) {
-        /* iocb is not cancelled, cb will be called by the event loop later */
-        return;
-    }
-
-    laiocb->common.cb(laiocb->common.opaque, laiocb->ret);
-}
-
-static const AIOCBInfo laio_aiocb_info = {
-    .aiocb_size         = sizeof(struct qemu_laiocb),
-    .cancel_async       = laio_cancel,
-};
-
  static void ioq_init(LaioQueue *io_q)
  {
      io_q->size = MAX_QUEUED_IO;
@@ -243,23 +220,21 @@ int laio_io_unplug(BlockDriverState *bs, void *aio_ctx, 
bool unplug)
      return ret;
  }
-BlockAIOCB *laio_submit(BlockDriverState *bs, void *aio_ctx, int fd,
-        int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
-        BlockCompletionFunc *cb, void *opaque, int type)
+int laio_submit_co(BlockDriverState *bs, void *aio_ctx, int fd,
+        int64_t sector_num, QEMUIOVector *qiov, int nb_sectors, int type)
  {
      struct qemu_laio_state *s = aio_ctx;
-    struct qemu_laiocb *laiocb;
-    struct iocb *iocbs;
      off_t offset = sector_num * 512;
- laiocb = qemu_aio_get(&laio_aiocb_info, bs, cb, opaque);
-    laiocb->nbytes = nb_sectors * 512;
-    laiocb->ctx = s;
-    laiocb->ret = -EINPROGRESS;
-    laiocb->is_read = (type == QEMU_AIO_READ);
-    laiocb->qiov = qiov;
-
-    iocbs = &laiocb->iocb;
+    struct qemu_laiocb laiocb = {
+        .co = qemu_coroutine_self(),
+        .nbytes = nb_sectors * 512,
+        .ctx = s,
+        .is_read = (type == QEMU_AIO_READ),
+        .qiov = qiov,
+    };
+    struct iocb *iocbs = &laiocb.iocb;
+    int ret;
switch (type) {
      case QEMU_AIO_WRITE:
@@ -272,22 +247,21 @@ BlockAIOCB *laio_submit(BlockDriverState *bs, void 
*aio_ctx, int fd,
      default:
          fprintf(stderr, "%s: invalid AIO request type 0x%x.\n",
                          __func__, type);
-        goto out_free_aiocb;
+        return -EIO;
      }
-    io_set_eventfd(&laiocb->iocb, event_notifier_get_fd(&s->e));
+    io_set_eventfd(&laiocb.iocb, event_notifier_get_fd(&s->e));
if (!s->io_q.plugged) {
-        if (io_submit(s->ctx, 1, &iocbs) < 0) {
-            goto out_free_aiocb;
+        ret = io_submit(s->ctx, 1, &iocbs);
+        if (ret < 0) {
+            return ret;
          }
      } else {
          ioq_enqueue(s, iocbs);
      }
-    return &laiocb->common;
-out_free_aiocb:
-    qemu_aio_unref(laiocb);
-    return NULL;
+    qemu_coroutine_yield();
+    return laiocb.ret;
  }
void laio_detach_aio_context(void *s_, AioContext *old_context)
diff --git a/block/raw-aio.h b/block/raw-aio.h
index 80681ce..b83c8af 100644
--- a/block/raw-aio.h
+++ b/block/raw-aio.h
@@ -35,9 +35,8 @@
  #ifdef CONFIG_LINUX_AIO
  void *laio_init(void);
  void laio_cleanup(void *s);
-BlockAIOCB *laio_submit(BlockDriverState *bs, void *aio_ctx, int fd,
-        int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
-        BlockCompletionFunc *cb, void *opaque, int type);
+int laio_submit_co(BlockDriverState *bs, void *aio_ctx, int fd,
+        int64_t sector_num, QEMUIOVector *qiov, int nb_sectors, int type);
  void laio_detach_aio_context(void *s, AioContext *old_context);
  void laio_attach_aio_context(void *s, AioContext *new_context);
  void laio_io_plug(BlockDriverState *bs, void *aio_ctx);
diff --git a/block/raw-posix.c b/block/raw-posix.c
index b1af77e..aa16b53 100644
--- a/block/raw-posix.c
+++ b/block/raw-posix.c
@@ -1075,14 +1075,13 @@ static BlockAIOCB *paio_submit(BlockDriverState *bs, 
int fd,
      return thread_pool_submit_aio(pool, aio_worker, acb, cb, opaque);
  }
-static BlockAIOCB *raw_aio_submit(BlockDriverState *bs,
-        int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
-        BlockCompletionFunc *cb, void *opaque, int type)
+static int coroutine_fn raw_co_rw(BlockDriverState *bs, int64_t sector_num,
+                                  int nb_sectors, QEMUIOVector *qiov, int type)
  {
      BDRVRawState *s = bs->opaque;
if (fd_open(bs) < 0)
-        return NULL;
+        return -EIO;
/*
       * Check if the underlying device requires requests to be aligned,
@@ -1095,14 +1094,25 @@ static BlockAIOCB *raw_aio_submit(BlockDriverState *bs,
              type |= QEMU_AIO_MISALIGNED;
  #ifdef CONFIG_LINUX_AIO
          } else if (s->use_aio) {
-            return laio_submit(bs, s->aio_ctx, s->fd, sector_num, qiov,
-                               nb_sectors, cb, opaque, type);
+            return laio_submit_co(bs, s->aio_ctx, s->fd, sector_num, qiov,
+                               nb_sectors, type);
  #endif
          }
      }
- return paio_submit(bs, s->fd, sector_num, qiov, nb_sectors,
-                       cb, opaque, type);
+    return paio_submit_co(bs, s->fd, sector_num, qiov, nb_sectors, type);
+}
+
+static int coroutine_fn raw_co_readv(BlockDriverState *bs, int64_t sector_num,
+                                     int nb_sectors, QEMUIOVector *qiov)
+{
+    return raw_co_rw(bs, sector_num, nb_sectors, qiov, QEMU_AIO_READ);
+}
+
+static int coroutine_fn raw_co_writev(BlockDriverState *bs, int64_t sector_num,
+                                      int nb_sectors, QEMUIOVector *qiov)
+{
+    return raw_co_rw(bs, sector_num, nb_sectors, qiov, QEMU_AIO_WRITE);
  }
static void raw_aio_plug(BlockDriverState *bs)
@@ -1135,22 +1145,6 @@ static void raw_aio_flush_io_queue(BlockDriverState *bs)
  #endif
  }
-static BlockAIOCB *raw_aio_readv(BlockDriverState *bs,
-        int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
-        BlockCompletionFunc *cb, void *opaque)
-{
-    return raw_aio_submit(bs, sector_num, qiov, nb_sectors,
-                          cb, opaque, QEMU_AIO_READ);
-}
-
-static BlockAIOCB *raw_aio_writev(BlockDriverState *bs,
-        int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
-        BlockCompletionFunc *cb, void *opaque)
-{
-    return raw_aio_submit(bs, sector_num, qiov, nb_sectors,
-                          cb, opaque, QEMU_AIO_WRITE);
-}
-
  static BlockAIOCB *raw_aio_flush(BlockDriverState *bs,
          BlockCompletionFunc *cb, void *opaque)
  {
@@ -1701,8 +1695,8 @@ static BlockDriver bdrv_file = {
      .bdrv_co_get_block_status = raw_co_get_block_status,
      .bdrv_co_write_zeroes = raw_co_write_zeroes,
- .bdrv_aio_readv = raw_aio_readv,
-    .bdrv_aio_writev = raw_aio_writev,
+    .bdrv_co_readv          = raw_co_readv,
+    .bdrv_co_writev         = raw_co_writev,
      .bdrv_aio_flush = raw_aio_flush,
      .bdrv_aio_discard = raw_aio_discard,
      .bdrv_refresh_limits = raw_refresh_limits,
@@ -2103,8 +2097,8 @@ static BlockDriver bdrv_host_device = {
      .create_opts         = &raw_create_opts,
      .bdrv_co_write_zeroes = hdev_co_write_zeroes,
- .bdrv_aio_readv = raw_aio_readv,
-    .bdrv_aio_writev   = raw_aio_writev,
+    .bdrv_co_readv          = raw_co_readv,
+    .bdrv_co_writev         = raw_co_writev,
      .bdrv_aio_flush   = raw_aio_flush,
      .bdrv_aio_discard   = hdev_aio_discard,
      .bdrv_refresh_limits = raw_refresh_limits,
@@ -2252,8 +2246,8 @@ static BlockDriver bdrv_host_floppy = {
      .bdrv_create         = hdev_create,
      .create_opts         = &raw_create_opts,
- .bdrv_aio_readv = raw_aio_readv,
-    .bdrv_aio_writev    = raw_aio_writev,
+    .bdrv_co_readv          = raw_co_readv,
+    .bdrv_co_writev         = raw_co_writev,
      .bdrv_aio_flush   = raw_aio_flush,
      .bdrv_refresh_limits = raw_refresh_limits,
      .bdrv_io_plug = raw_aio_plug,
@@ -2383,8 +2377,8 @@ static BlockDriver bdrv_host_cdrom = {
      .bdrv_create         = hdev_create,
      .create_opts         = &raw_create_opts,
- .bdrv_aio_readv = raw_aio_readv,
-    .bdrv_aio_writev    = raw_aio_writev,
+    .bdrv_co_readv          = raw_co_readv,
+    .bdrv_co_writev         = raw_co_writev,
      .bdrv_aio_flush   = raw_aio_flush,
      .bdrv_refresh_limits = raw_refresh_limits,
      .bdrv_io_plug = raw_aio_plug,
@@ -2520,8 +2514,8 @@ static BlockDriver bdrv_host_cdrom = {
      .bdrv_create        = hdev_create,
      .create_opts        = &raw_create_opts,
- .bdrv_aio_readv = raw_aio_readv,
-    .bdrv_aio_writev    = raw_aio_writev,
+    .bdrv_co_readv          = raw_co_readv,
+    .bdrv_co_writev         = raw_co_writev,
      .bdrv_aio_flush   = raw_aio_flush,
      .bdrv_refresh_limits = raw_refresh_limits,
      .bdrv_io_plug = raw_aio_plug,


--

Mit freundlichen Grüßen

Peter Lieven

...........................................................

  KAMP Netzwerkdienste GmbH
  Vestische Str. 89-91 | 46117 Oberhausen
  Tel: +49 (0) 208.89 402-50 | Fax: +49 (0) 208.89 402-40
  address@hidden | http://www.kamp.de

  Geschäftsführer: Heiner Lante | Michael Lante
  Amtsgericht Duisburg | HRB Nr. 12154
  USt-Id-Nr.: DE 120607556

...........................................................




reply via email to

[Prev in Thread] Current Thread [Next in Thread]