qemu-block
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Qemu-block] [PATCH 3/3] aio: Introduce aio-epoll.c


From: Fam Zheng
Subject: [Qemu-block] [PATCH 3/3] aio: Introduce aio-epoll.c
Date: Mon, 12 Oct 2015 17:55:31 +0800

To minimize code duplication, epoll is hooked into aio-posix's
aio_poll() instead of rolling its own. This approach also has the
advantage that both compile time and run time ability to switch from
between the two:

1) If configure script didn't find epoll, the libqemustub.a nop
functions will be used, which selects the usual ppoll.

2) When QEMU starts with a small number of fds in the event loop, ppoll
is used.

3) When QEMU starts with a big number of fds, or when more devices are
hot plugged after starting up, epoll automatically kicks in after the
number of fds hits the threshold.

4) Some fds may not support epoll, such as tty based stdio. In this
case, we can fall back to ppoll.

Signed-off-by: Fam Zheng <address@hidden>
---
 Makefile.objs                |   1 +
 aio-epoll.c                  | 150 +++++++++++++++++++++++++++++++++++++++++++
 aio-posix.c                  |  16 ++++-
 include/block/aio-internal.h |  15 +++++
 include/block/aio.h          |   5 ++
 stubs/Makefile.objs          |   1 +
 stubs/aio-epoll.c            |  37 +++++++++++
 7 files changed, 223 insertions(+), 2 deletions(-)
 create mode 100644 aio-epoll.c
 create mode 100644 stubs/aio-epoll.c

diff --git a/Makefile.objs b/Makefile.objs
index bc43e5c..8f401b7 100644
--- a/Makefile.objs
+++ b/Makefile.objs
@@ -10,6 +10,7 @@ util-obj-y += qmp-introspect.o qapi-types.o qapi-visit.o 
qapi-event.o
 block-obj-y = async.o thread-pool.o
 block-obj-y += nbd.o block.o blockjob.o
 block-obj-y += main-loop.o iohandler.o qemu-timer.o
+block-obj-$(CONFIG_EPOLL) += aio-epoll.o
 block-obj-$(CONFIG_POSIX) += aio-posix.o
 block-obj-$(CONFIG_WIN32) += aio-win32.o
 block-obj-y += block/
diff --git a/aio-epoll.c b/aio-epoll.c
new file mode 100644
index 0000000..4557dcb
--- /dev/null
+++ b/aio-epoll.c
@@ -0,0 +1,150 @@
+/*
+ * QEMU aio implementation
+ *
+ * Copyright Red Hat, Inc, 2015
+ *
+ * Authors:
+ *  Fam Zheng <address@hidden>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.  See
+ * the COPYING file in the top-level directory.
+ *
+ * Contributions after 2012-01-13 are licensed under the terms of the
+ * GNU GPL, version 2 or (at your option) any later version.
+ */
+
+#include "qemu-common.h"
+#include "block/block.h"
+#include "qemu/queue.h"
+#include "block/aio-internal.h"
+#include <sys/epoll.h>
+
+/* The fd number threashold to switch to epoll */
+#define EPOLL_ENABLE_THRESHOLD 64
+
+static void aio_epoll_disable(AioContext *ctx)
+{
+    ctx->epoll_available = false;
+    if (!ctx->epoll_enabled) {
+        return;
+    }
+    ctx->epoll_enabled = false;
+    close(ctx->epollfd);
+}
+
+static inline int epoll_events_from_pfd(int pfd_events)
+{
+    return (pfd_events & G_IO_IN ? EPOLLIN : 0) |
+           (pfd_events & G_IO_OUT ? EPOLLOUT : 0) |
+           (pfd_events & G_IO_HUP ? EPOLLHUP : 0) |
+           (pfd_events & G_IO_ERR ? EPOLLERR : 0);
+}
+
+static bool aio_epoll_try_enable(AioContext *ctx)
+{
+    AioHandler *node;
+    struct epoll_event event;
+    if (!ctx->epoll_available) {
+        return false;
+    }
+
+    QLIST_FOREACH(node, &ctx->aio_handlers, node) {
+        int r;
+        if (node->deleted || !node->pfd.events) {
+            continue;
+        }
+        event.events = epoll_events_from_pfd(node->pfd.events);
+        event.data.ptr = node;
+        r = epoll_ctl(ctx->epollfd, EPOLL_CTL_ADD, node->pfd.fd, &event);
+        if (r) {
+            return false;
+        }
+    }
+    ctx->epoll_enabled = true;
+    return true;
+}
+
+void aio_epoll_update(AioContext *ctx, AioHandler *node, bool is_new)
+{
+    struct epoll_event event;
+    int r;
+
+    if (!ctx->epoll_enabled) {
+        return;
+    }
+    if (!node->pfd.events) {
+        r = epoll_ctl(ctx->epollfd, EPOLL_CTL_DEL, node->pfd.fd, &event);
+        assert(!r);
+    } else {
+        event.data.ptr = node;
+        event.events = epoll_events_from_pfd(node->pfd.events);
+        if (is_new) {
+            r = epoll_ctl(ctx->epollfd, EPOLL_CTL_ADD, node->pfd.fd, &event);
+            if (r) {
+                aio_epoll_disable(ctx);
+            }
+        } else {
+            r = epoll_ctl(ctx->epollfd, EPOLL_CTL_MOD, node->pfd.fd, &event);
+            assert(!r);
+        }
+    }
+}
+
+int aio_epoll(AioContext *ctx, GPollFD *pfds, unsigned npfd, int64_t timeout)
+{
+    AioHandler *node;
+    int i, ret = 0;
+    struct epoll_event events[128];
+
+    assert(npfd == 1);
+    assert(pfds[0].fd == ctx->epollfd);
+    if (timeout > 0) {
+        ret = qemu_poll_ns(pfds, npfd, timeout);
+    }
+    if (timeout <= 0 || ret > 0) {
+        ret = epoll_wait(ctx->epollfd, events,
+                         sizeof(events) / sizeof(events[0]),
+                         timeout);
+        if (ret <= 0) {
+            goto out;
+        }
+        for (i = 0; i < ret; i++) {
+            int ev = events[i].events;
+            node = events[i].data.ptr;
+            node->pfd.revents = (ev & EPOLLIN ? G_IO_IN : 0) |
+                (ev & EPOLLOUT ? G_IO_OUT : 0) |
+                (ev & EPOLLHUP ? G_IO_HUP : 0) |
+                (ev & EPOLLERR ? G_IO_ERR : 0);
+        }
+    }
+out:
+    return ret;
+}
+
+bool aio_epoll_check_poll(AioContext *ctx, GPollFD *pfds,
+                          unsigned npfd, int64_t timeout)
+{
+    if (!ctx->epoll_available) {
+        return false;
+    }
+    if (ctx->epoll_enabled) {
+        return true;
+    }
+    if (npfd >= EPOLL_ENABLE_THRESHOLD && aio_epoll_try_enable(ctx)) {
+        return true;
+    } else {
+        aio_epoll_disable(ctx);
+        return false;
+    }
+}
+
+void aio_context_setup_epoll(AioContext *ctx, Error **errp)
+{
+    assert(!ctx->epollfd);
+    ctx->epollfd = epoll_create1(EPOLL_CLOEXEC);
+    if (ctx->epollfd == -1) {
+        ctx->epoll_available = false;
+    } else {
+        ctx->epoll_available = true;
+    }
+}
diff --git a/aio-posix.c b/aio-posix.c
index 4fd2383..2c0fe29 100644
--- a/aio-posix.c
+++ b/aio-posix.c
@@ -39,6 +39,7 @@ void aio_set_fd_handler(AioContext *ctx,
                         void *opaque)
 {
     AioHandler *node;
+    bool is_new = false;
 
     node = find_aio_handler(ctx, fd);
 
@@ -68,6 +69,7 @@ void aio_set_fd_handler(AioContext *ctx,
             QLIST_INSERT_HEAD(&ctx->aio_handlers, node, node);
 
             g_source_add_poll(&ctx->source, &node->pfd);
+            is_new = true;
         }
         /* Update handler with latest information */
         node->io_read = io_read;
@@ -78,6 +80,7 @@ void aio_set_fd_handler(AioContext *ctx,
         node->pfd.events |= (io_write ? G_IO_OUT | G_IO_ERR : 0);
     }
 
+    aio_epoll_update(ctx, node, is_new);
     aio_notify(ctx);
 }
 
@@ -248,7 +251,7 @@ bool aio_poll(AioContext *ctx, bool blocking)
 
     /* fill pollfds */
     QLIST_FOREACH(node, &ctx->aio_handlers, node) {
-        if (!node->deleted && node->pfd.events) {
+        if (!node->deleted && node->pfd.events && !ctx->epoll_enabled) {
             add_pollfd(node);
         }
     }
@@ -259,7 +262,15 @@ bool aio_poll(AioContext *ctx, bool blocking)
     if (timeout) {
         aio_context_release(ctx);
     }
-    ret = qemu_poll_ns((GPollFD *)pollfds, npfd, timeout);
+    if (aio_epoll_check_poll(ctx, pollfds, npfd, timeout)) {
+        npfd = 0;
+        AioHandler epoll_handler = { 0 };
+        aio_epoll_fill(ctx, &epoll_handler);
+        add_pollfd(&epoll_handler);
+        ret = aio_epoll(ctx, pollfds, npfd, timeout);
+    } else  {
+        ret = qemu_poll_ns(pollfds, npfd, timeout);
+    }
     if (blocking) {
         atomic_sub(&ctx->notify_me, 2);
     }
@@ -291,4 +302,5 @@ bool aio_poll(AioContext *ctx, bool blocking)
 
 void aio_context_setup(AioContext *ctx, Error **errp)
 {
+    aio_context_setup_epoll(ctx, errp);
 }
diff --git a/include/block/aio-internal.h b/include/block/aio-internal.h
index f50a37c..330647c 100644
--- a/include/block/aio-internal.h
+++ b/include/block/aio-internal.h
@@ -29,4 +29,19 @@ struct AioHandler {
 
 void aio_context_setup(AioContext *ctx, Error **errp);
 
+void aio_epoll_update(AioContext *ctx, AioHandler *node, bool is_new);
+
+int aio_epoll(AioContext *ctx, GPollFD *pfds, unsigned npfd, int64_t timeout);
+
+bool aio_epoll_check_poll(AioContext *ctx, GPollFD *pfds,
+                          unsigned npfd, int64_t timeout);
+
+void aio_context_setup_epoll(AioContext *ctx, Error **errp);
+
+static inline void aio_epoll_fill(AioContext *ctx, AioHandler *node)
+{
+    node->pfd.fd = ctx->epollfd;
+    node->pfd.events = G_IO_IN | G_IO_OUT | G_IO_HUP | G_IO_ERR;
+}
+
 #endif
diff --git a/include/block/aio.h b/include/block/aio.h
index 400b1b0..e4e0563 100644
--- a/include/block/aio.h
+++ b/include/block/aio.h
@@ -122,6 +122,11 @@ struct AioContext {
 
     /* TimerLists for calling timers - one per clock type */
     QEMUTimerListGroup tlg;
+
+    /* Fields below are used by aio-epoll. */
+    int epollfd;
+    bool epoll_enabled;
+    bool epoll_available;
 };
 
 /**
diff --git a/stubs/Makefile.objs b/stubs/Makefile.objs
index 85e4e81..c2c90a2 100644
--- a/stubs/Makefile.objs
+++ b/stubs/Makefile.objs
@@ -39,3 +39,4 @@ stub-obj-y += cpus.o
 stub-obj-y += kvm.o
 stub-obj-y += qmp_pc_dimm_device_list.o
 stub-obj-y += target-monitor-defs.o
+stub-obj-y += aio-epoll.o
diff --git a/stubs/aio-epoll.c b/stubs/aio-epoll.c
new file mode 100644
index 0000000..b482897
--- /dev/null
+++ b/stubs/aio-epoll.c
@@ -0,0 +1,37 @@
+/*
+ * QEMU aio epoll stub functions
+ *
+ * Copyright Red Hat, Inc, 2015
+ *
+ * Authors:
+ *  Fam Zheng <address@hidden>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.  See
+ * the COPYING file in the top-level directory.
+ *
+ * Contributions after 2012-01-13 are licensed under the terms of the
+ * GNU GPL, version 2 or (at your option) any later version.
+ */
+
+#include "qemu-common.h"
+#include "block/aio-internal.h"
+
+void aio_epoll_update(AioContext *ctx, AioHandler *node, bool is_new)
+{
+}
+
+int aio_epoll(AioContext *ctx, GPollFD *pfds, unsigned npfd, int64_t timeout)
+{
+    assert(false);
+}
+
+bool aio_epoll_check_poll(AioContext *ctx, GPollFD *pfds,
+                          unsigned npfd, int64_t timeout)
+{
+    return false;
+}
+
+void aio_context_setup_epoll(AioContext *ctx, Error **errp)
+{
+}
+
-- 
2.6.1




reply via email to

[Prev in Thread] Current Thread [Next in Thread]