[Qemu-devel] [PULL v2 21/24] test-aio-multithread: add performance compa

qemu-devel

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Qemu-devel] [PULL v2 21/24] test-aio-multithread: add performance compa

From:	Stefan Hajnoczi
Subject:	[Qemu-devel] [PULL v2 21/24] test-aio-multithread: add performance comparison with thread-based mutexes
Date:	Tue, 21 Feb 2017 11:56:41 +0000

From: Paolo Bonzini <address@hidden>

Add two implementations of the same benchmark as the previous patch,
but using pthreads.  One uses a normal QemuMutex, the other is Linux
only and implements a fair mutex based on MCS locks and futexes.
This shows that the slower performance of the 5-thread case is due to
the fairness of CoMutex, rather than to coroutines.  If fairness does
not matter, as is the case with two threads, CoMutex can actually be
faster than pthreads.

Signed-off-by: Paolo Bonzini <address@hidden>
Reviewed-by: Fam Zheng <address@hidden>
Message-id: address@hidden
Signed-off-by: Stefan Hajnoczi <address@hidden>
---
 tests/test-aio-multithread.c | 164 +++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 164 insertions(+)

diff --git a/tests/test-aio-multithread.c b/tests/test-aio-multithread.c
index 4fa2e9b..f11e990 100644
--- a/tests/test-aio-multithread.c
+++ b/tests/test-aio-multithread.c
@@ -278,6 +278,162 @@ static void test_multi_co_mutex_2_30(void)
     test_multi_co_mutex(2, 30);
 }
 
+/* Same test with fair mutexes, for performance comparison.  */
+
+#ifdef CONFIG_LINUX
+#include "qemu/futex.h"
+
+/* The nodes for the mutex reside in this structure (on which we try to avoid
+ * false sharing).  The head of the mutex is in the "mutex_head" variable.
+ */
+static struct {
+    int next, locked;
+    int padding[14];
+} nodes[NUM_CONTEXTS] __attribute__((__aligned__(64)));
+
+static int mutex_head = -1;
+
+static void mcs_mutex_lock(void)
+{
+    int prev;
+
+    nodes[id].next = -1;
+    nodes[id].locked = 1;
+    prev = atomic_xchg(&mutex_head, id);
+    if (prev != -1) {
+        atomic_set(&nodes[prev].next, id);
+        qemu_futex_wait(&nodes[id].locked, 1);
+    }
+}
+
+static void mcs_mutex_unlock(void)
+{
+    int next;
+    if (nodes[id].next == -1) {
+        if (atomic_read(&mutex_head) == id &&
+            atomic_cmpxchg(&mutex_head, id, -1) == id) {
+            /* Last item in the list, exit.  */
+            return;
+        }
+        while (atomic_read(&nodes[id].next) == -1) {
+            /* mcs_mutex_lock did the xchg, but has not updated
+             * nodes[prev].next yet.
+             */
+        }
+    }
+
+    /* Wake up the next in line.  */
+    next = nodes[id].next;
+    nodes[next].locked = 0;
+    qemu_futex_wake(&nodes[next].locked, 1);
+}
+
+static void test_multi_fair_mutex_entry(void *opaque)
+{
+    while (!atomic_mb_read(&now_stopping)) {
+        mcs_mutex_lock();
+        counter++;
+        mcs_mutex_unlock();
+        atomic_inc(&atomic_counter);
+    }
+    atomic_dec(&running);
+}
+
+static void test_multi_fair_mutex(int threads, int seconds)
+{
+    int i;
+
+    assert(mutex_head == -1);
+    counter = 0;
+    atomic_counter = 0;
+    now_stopping = false;
+
+    create_aio_contexts();
+    assert(threads <= NUM_CONTEXTS);
+    running = threads;
+    for (i = 0; i < threads; i++) {
+        Coroutine *co1 = qemu_coroutine_create(test_multi_fair_mutex_entry, 
NULL);
+        aio_co_schedule(ctx[i], co1);
+    }
+
+    g_usleep(seconds * 1000000);
+
+    atomic_mb_set(&now_stopping, true);
+    while (running > 0) {
+        g_usleep(100000);
+    }
+
+    join_aio_contexts();
+    g_test_message("%d iterations/second\n", counter / seconds);
+    g_assert_cmpint(counter, ==, atomic_counter);
+}
+
+static void test_multi_fair_mutex_1(void)
+{
+    test_multi_fair_mutex(NUM_CONTEXTS, 1);
+}
+
+static void test_multi_fair_mutex_10(void)
+{
+    test_multi_fair_mutex(NUM_CONTEXTS, 10);
+}
+#endif
+
+/* Same test with pthread mutexes, for performance comparison and
+ * portability.  */
+
+static QemuMutex mutex;
+
+static void test_multi_mutex_entry(void *opaque)
+{
+    while (!atomic_mb_read(&now_stopping)) {
+        qemu_mutex_lock(&mutex);
+        counter++;
+        qemu_mutex_unlock(&mutex);
+        atomic_inc(&atomic_counter);
+    }
+    atomic_dec(&running);
+}
+
+static void test_multi_mutex(int threads, int seconds)
+{
+    int i;
+
+    qemu_mutex_init(&mutex);
+    counter = 0;
+    atomic_counter = 0;
+    now_stopping = false;
+
+    create_aio_contexts();
+    assert(threads <= NUM_CONTEXTS);
+    running = threads;
+    for (i = 0; i < threads; i++) {
+        Coroutine *co1 = qemu_coroutine_create(test_multi_mutex_entry, NULL);
+        aio_co_schedule(ctx[i], co1);
+    }
+
+    g_usleep(seconds * 1000000);
+
+    atomic_mb_set(&now_stopping, true);
+    while (running > 0) {
+        g_usleep(100000);
+    }
+
+    join_aio_contexts();
+    g_test_message("%d iterations/second\n", counter / seconds);
+    g_assert_cmpint(counter, ==, atomic_counter);
+}
+
+static void test_multi_mutex_1(void)
+{
+    test_multi_mutex(NUM_CONTEXTS, 1);
+}
+
+static void test_multi_mutex_10(void)
+{
+    test_multi_mutex(NUM_CONTEXTS, 10);
+}
+
 /* End of tests.  */
 
 int main(int argc, char **argv)
@@ -290,10 +446,18 @@ int main(int argc, char **argv)
         g_test_add_func("/aio/multi/schedule", test_multi_co_schedule_1);
         g_test_add_func("/aio/multi/mutex/contended", test_multi_co_mutex_1);
         g_test_add_func("/aio/multi/mutex/handoff", test_multi_co_mutex_2_3);
+#ifdef CONFIG_LINUX
+        g_test_add_func("/aio/multi/mutex/mcs", test_multi_fair_mutex_1);
+#endif
+        g_test_add_func("/aio/multi/mutex/pthread", test_multi_mutex_1);
     } else {
         g_test_add_func("/aio/multi/schedule", test_multi_co_schedule_10);
         g_test_add_func("/aio/multi/mutex/contended", test_multi_co_mutex_10);
         g_test_add_func("/aio/multi/mutex/handoff", test_multi_co_mutex_2_30);
+#ifdef CONFIG_LINUX
+        g_test_add_func("/aio/multi/mutex/mcs", test_multi_fair_mutex_10);
+#endif
+        g_test_add_func("/aio/multi/mutex/pthread", test_multi_mutex_10);
     }
     return g_test_run();
 }
-- 
2.9.3

[Prev in Thread]

Current Thread

[Next in Thread]

[Qemu-devel] [PULL v2 07/24] nbd: convert to use qio_channel_yield, (continued)
- [Qemu-devel] [PULL v2 07/24] nbd: convert to use qio_channel_yield, Stefan Hajnoczi, 2017/02/21
- [Qemu-devel] [PULL v2 12/24] block: explicitly acquire aiocontext in timers that need it, Stefan Hajnoczi, 2017/02/21
- [Qemu-devel] [PULL v2 13/24] block: explicitly acquire aiocontext in callbacks that need it, Stefan Hajnoczi, 2017/02/21
- [Qemu-devel] [PULL v2 14/24] block: explicitly acquire aiocontext in bottom halves that need it, Stefan Hajnoczi, 2017/02/21
- [Qemu-devel] [PULL v2 15/24] block: explicitly acquire aiocontext in aio callbacks that need it, Stefan Hajnoczi, 2017/02/21
- [Qemu-devel] [PULL v2 17/24] async: remove unnecessary inc/dec pairs, Stefan Hajnoczi, 2017/02/21
- [Qemu-devel] [PULL v2 18/24] block: document fields protected by AioContext lock, Stefan Hajnoczi, 2017/02/21
- [Qemu-devel] [PULL v2 16/24] aio-posix: partially inline aio_dispatch into aio_poll, Stefan Hajnoczi, 2017/02/21
- [Qemu-devel] [PULL v2 19/24] coroutine-lock: make CoMutex thread-safe, Stefan Hajnoczi, 2017/02/21
- [Qemu-devel] [PULL v2 20/24] coroutine-lock: add limited spinning to CoMutex, Stefan Hajnoczi, 2017/02/21
- [Qemu-devel] [PULL v2 21/24] test-aio-multithread: add performance comparison with thread-based mutexes, Stefan Hajnoczi <=
- [Qemu-devel] [PULL v2 22/24] coroutine-lock: place CoMutex before CoQueue in header, Stefan Hajnoczi, 2017/02/21
- [Qemu-devel] [PULL v2 23/24] coroutine-lock: add mutex argument to CoQueue APIs, Stefan Hajnoczi, 2017/02/21
- [Qemu-devel] [PULL v2 24/24] coroutine-lock: make CoRwlock thread-safe and fair, Stefan Hajnoczi, 2017/02/21
- Re: [Qemu-devel] [PULL v2 00/24] Block patches, Peter Maydell, 2017/02/21

Prev by Date: [Qemu-devel] [PULL v2 20/24] coroutine-lock: add limited spinning to CoMutex
Next by Date: [Qemu-devel] [PULL v2 22/24] coroutine-lock: place CoMutex before CoQueue in header
Previous by thread: [Qemu-devel] [PULL v2 20/24] coroutine-lock: add limited spinning to CoMutex
Next by thread: [Qemu-devel] [PULL v2 22/24] coroutine-lock: place CoMutex before CoQueue in header
Index(es):
- Date
- Thread