qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Qemu-devel] [PATCH 2/2] iohandler: Add Linux implementation of iohandle


From: Fam Zheng
Subject: [Qemu-devel] [PATCH 2/2] iohandler: Add Linux implementation of iohandler GSource
Date: Fri, 26 Sep 2014 01:21:49 +0800

The Linux-specific syscall epoll(7) has a constant complexity, whereas
ppoll/g_poll is linear complexity, depending on the number of fds.

The event loop is more efficient with epoll, because we only need to
poll on few fds now.

Sometimes EPOLL_CTL_ADD returns -1 with errno = EPERM, when the target
file descriptor doesn't support epoll and they are always ready for
read/write. We mark such fds and always dispatch.

Signed-off-by: Fam Zheng <address@hidden>
---
 Makefile.objs            |   4 +-
 include/qemu/iohandler.h |  13 +++
 iohandler-linux.c        | 213 +++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 229 insertions(+), 1 deletion(-)
 create mode 100644 iohandler-linux.c

diff --git a/Makefile.objs b/Makefile.objs
index 55dbc36..3244c65 100644
--- a/Makefile.objs
+++ b/Makefile.objs
@@ -8,7 +8,9 @@ util-obj-y = util/ qobject/ qapi/ qapi-types.o qapi-visit.o 
qapi-event.o
 
 block-obj-y = async.o thread-pool.o
 block-obj-y += nbd.o block.o blockjob.o
-block-obj-y += main-loop.o iohandler.o qemu-timer.o iohandler-posix.o
+block-obj-y += main-loop.o iohandler.o qemu-timer.o
+block-obj-$(call lnot,$(CONFIG_LINUX)) += iohandler-posix.o
+block-obj-$(CONFIG_LINUX) += iohandler-linux.o
 block-obj-$(CONFIG_POSIX) += aio-posix.o
 block-obj-$(CONFIG_WIN32) += aio-win32.o
 block-obj-y += block/
diff --git a/include/qemu/iohandler.h b/include/qemu/iohandler.h
index e2af47d..a879796 100644
--- a/include/qemu/iohandler.h
+++ b/include/qemu/iohandler.h
@@ -27,7 +27,11 @@
 #ifndef QEMU_IOHANDLER_H
 #define QEMU_IOHANDLER_H
 
+#include "config-host.h"
 #include "qemu/main-loop.h"
+#ifdef CONFIG_LINUX
+#include <sys/epoll.h>
+#endif
 
 typedef struct IOHandlerRecord {
     IOCanReadHandler *fd_read_poll;
@@ -39,11 +43,20 @@ typedef struct IOHandlerRecord {
     bool deleted;
     GPollFD gpfd;
     bool attached;
+#ifdef CONFIG_LINUX
+    struct epoll_event epoll_event;
+    bool fallback;
+#endif
+
 } IOHandlerRecord;
 
 typedef struct {
     GSource source;
 
+#ifdef CONFIG_LINUX
+    GPollFD epollfd;
+#endif
+
     QLIST_HEAD(, IOHandlerRecord) io_handlers;
 } IOHandlerSource;
 
diff --git a/iohandler-linux.c b/iohandler-linux.c
new file mode 100644
index 0000000..61f569b
--- /dev/null
+++ b/iohandler-linux.c
@@ -0,0 +1,213 @@
+/*
+ * I/O Handler posix implementation
+ *
+ * Copyright (c) 2014 Red Hat, Inc.
+ *
+ * Author: Fam Zheng <address@hidden>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to 
deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 
FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include "config-host.h"
+#include "qemu-common.h"
+#include "qemu/iohandler.h"
+
+static int iohandler_get_events(IOHandlerSource *s, IOHandlerRecord *ioh)
+{
+    int events = 0;
+
+    if (!ioh->deleted) {
+        if (ioh->fd_read &&
+                (!ioh->fd_read_poll ||
+                 ioh->fd_read_poll(ioh->opaque) != 0)) {
+            events |= EPOLLIN | EPOLLHUP | EPOLLERR;
+        }
+        if (ioh->fd_write) {
+            events |= EPOLLOUT | EPOLLERR;
+        }
+    }
+
+    return events;
+}
+
+static gboolean iohandler_source_prepare(GSource *source, gint *timeout)
+{
+    IOHandlerRecord *ioh;
+    IOHandlerSource *s = (IOHandlerSource *)source;
+    int old_events, new_events, r;
+
+    QLIST_FOREACH(ioh, &s->io_handlers, next) {
+        old_events = ioh->epoll_event.events;
+        new_events = iohandler_get_events(s, ioh) & (EPOLLIN | EPOLLOUT);
+        ioh->epoll_event.events = new_events;
+        ioh->epoll_event.data.ptr = ioh;
+        if (old_events != new_events) {
+            if (!old_events) {
+                r = epoll_ctl(s->epollfd.fd, EPOLL_CTL_ADD, ioh->fd,
+                              &ioh->epoll_event);
+                if (r) {
+                    if (errno == EPERM) {
+                        /* Some fds don't work with epoll, let's mark it as
+                         * always ready. */
+                        ioh->fallback = true;
+                    } else {
+                        perror("epoll_ctl add");
+                        abort();
+                    }
+                }
+            } else if (new_events) {
+                /* Modify could fail when the fd is not available any more.
+                 * */
+                r = epoll_ctl(s->epollfd.fd, EPOLL_CTL_MOD, ioh->fd,
+                              &ioh->epoll_event);
+            }
+        }
+    }
+
+    *timeout = -1;
+    return false;
+}
+
+static gboolean iohandler_source_check(GSource *source)
+{
+    IOHandlerRecord *ioh;
+    IOHandlerSource *s = (IOHandlerSource *)source;
+
+    QLIST_FOREACH(ioh, &s->io_handlers, next) {
+        int events;
+        events = s->epollfd.revents;
+        if (ioh->fd_read &&
+                (events & (G_IO_IN | G_IO_HUP | G_IO_ERR)) &&
+                (!ioh->fd_read_poll || ioh->fd_read_poll(ioh->opaque) != 0)) {
+            return true;
+        }
+        if (ioh->fd_write && (events & (G_IO_OUT | G_IO_ERR))) {
+            return true;
+        }
+        if (ioh->fallback) {
+            return true;
+        }
+    }
+    return false;
+}
+
+static inline void iohandler_dispatch_event(IOHandlerSource *s,
+                                            struct epoll_event *ev)
+{
+    IOHandlerRecord *ioh = ev->data.ptr;
+    int revents;
+
+    if (!ioh->deleted) {
+        revents = iohandler_get_events(s, ioh) & ev->events;
+
+        if (ioh->fd_read && (revents & (EPOLLIN | EPOLLHUP | EPOLLERR))) {
+            ioh->fd_read(ioh->opaque);
+        }
+        if (ioh->fd_write && (revents & (EPOLLOUT | EPOLLERR))) {
+            ioh->fd_write(ioh->opaque);
+        }
+    }
+
+    /* Do this last in case read/write handlers marked it for deletion */
+    if (ioh->deleted) {
+        /* Delete could fail when the fd is not available any more.
+         * */
+        epoll_ctl(s->epollfd.fd, EPOLL_CTL_DEL, ioh->fd,
+                      &ioh->epoll_event);
+        QLIST_REMOVE(ioh, next);
+        g_free(ioh);
+    }
+}
+
+#define MAX_EVENTS 10
+
+static gboolean iohandler_source_dispatch(GSource *source,
+                                          GSourceFunc callback,
+                                          gpointer data)
+{
+    IOHandlerSource *s = (IOHandlerSource *)source;
+    struct epoll_event events[MAX_EVENTS];
+    int i, r, revents;
+    sigset_t origmask;
+    IOHandlerRecord *ioh;
+
+    assert(callback == NULL);
+
+    while (true) {
+        r = epoll_pwait(s->epollfd.fd, events, MAX_EVENTS, 0, &origmask);
+        if (r < 0) {
+            break;
+        } else if (r == 0) {
+            break;
+        } else {
+            for (i = 0; i < r; i++) {
+                iohandler_dispatch_event(s, &events[i]);
+            }
+            if (r < MAX_EVENTS) {
+                break;
+            }
+        }
+    }
+
+    QLIST_FOREACH(ioh, &s->io_handlers, next) {
+        if (!ioh->fallback) {
+            continue;
+        }
+        revents = iohandler_get_events(s, ioh);
+
+        if (ioh->fd_read && (revents & (EPOLLIN | EPOLLHUP | EPOLLERR))) {
+            ioh->fd_read(ioh->opaque);
+        }
+        if (ioh->fd_write && (revents & (EPOLLOUT | EPOLLERR))) {
+            ioh->fd_write(ioh->opaque);
+        }
+    }
+
+    return true;
+}
+
+static GSourceFuncs iohandler_source_funcs = {
+    iohandler_source_prepare,
+    iohandler_source_check,
+    iohandler_source_dispatch,
+    /* finalize */ NULL
+};
+
+GSource *qemu_iohandler_get_source(void)
+{
+    static IOHandlerSource *ioh_source;
+    if (!ioh_source) {
+        int epollfd;
+        GSource *source = g_source_new(&iohandler_source_funcs,
+                                       sizeof(IOHandlerSource));
+        ioh_source = (IOHandlerSource *)source;
+        QLIST_INIT(&ioh_source->io_handlers);
+        epollfd = epoll_create(1);
+        if (epollfd == -1) {
+            perror("epoll_create");
+            exit(1);
+        }
+        ioh_source->epollfd = (GPollFD) {
+            .fd = epollfd,
+            .events = G_IO_IN | G_IO_OUT | G_IO_HUP | G_IO_ERR,
+        };
+        g_source_add_poll(source, &ioh_source->epollfd);
+    }
+    return &ioh_source->source;
+}
-- 
1.9.3




reply via email to

[Prev in Thread] Current Thread [Next in Thread]