[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[Qemu-devel] [PATCH 12/12] cpus-common: lock-free fast path for cpu_exec
From: |
Paolo Bonzini |
Subject: |
[Qemu-devel] [PATCH 12/12] cpus-common: lock-free fast path for cpu_exec_start/end |
Date: |
Thu, 1 Sep 2016 12:20:27 +0200 |
Set cpu->running without taking the cpu_list lock, only look at it if
there is a concurrent exclusive section. This requires adding a new
field to CPUState, which records whether a running CPU is being counted
in pending_cpus. When an exclusive section is started concurrently with
cpu_exec_start, cpu_exec_start can use the new field to wait for the end
of the exclusive section.
This a separate patch for easier bisection of issues.
Signed-off-by: Paolo Bonzini <address@hidden>
---
cpus-common.c | 72 +++++++++++++--
docs/tcg-exclusive.promela | 224 +++++++++++++++++++++++++++++++++++++++++++++
include/qom/cpu.h | 5 +-
3 files changed, 289 insertions(+), 12 deletions(-)
create mode 100644 docs/tcg-exclusive.promela
diff --git a/cpus-common.c b/cpus-common.c
index 88cf5ec..443617a 100644
--- a/cpus-common.c
+++ b/cpus-common.c
@@ -170,8 +170,12 @@ void start_exclusive(void)
/* Make all other cpus stop executing. */
pending_cpus = 1;
+
+ /* Write pending_cpus before reading other_cpu->running. */
+ smp_mb();
CPU_FOREACH(other_cpu) {
if (other_cpu->running) {
+ other_cpu->has_waiter = true;
pending_cpus++;
qemu_cpu_kick(other_cpu);
}
@@ -192,25 +196,73 @@ void end_exclusive(void)
/* Wait for exclusive ops to finish, and begin cpu execution. */
void cpu_exec_start(CPUState *cpu)
{
- qemu_mutex_lock(&qemu_cpu_list_mutex);
- exclusive_idle();
cpu->running = true;
- qemu_mutex_unlock(&qemu_cpu_list_mutex);
+
+ /* Write cpu->running before reading pending_cpus. */
+ smp_mb();
+
+ /* 1. start_exclusive saw cpu->running == true and pending_cpus >= 1.
+ * After taking the lock we'll see cpu->has_waiter == true and run---not
+ * for long because start_exclusive kicked us. cpu_exec_end will
+ * decrement pending_cpus and signal the waiter.
+ *
+ * 2. start_exclusive saw cpu->running == false but pending_cpus >= 1.
+ * This includes the case when an exclusive item is running now.
+ * Then we'll see cpu->has_waiter == false and wait for the item to
+ * complete.
+ *
+ * 3. pending_cpu == 0. Then start_exclusive is definitely going to
+ * see cpu->running == true, and it will kick the CPU.
+ */
+ if (pending_cpus) {
+ qemu_mutex_lock(&qemu_cpu_list_mutex);
+ if (!cpu->has_waiter) {
+ /* Not counted in pending_cpus, let the exclusive item
+ * run. Since we have the lock, set cpu->running to true
+ * while holding it instead of retrying.
+ */
+ cpu->running = false;
+ exclusive_idle();
+ cpu->running = true;
+ } else {
+ /* Counted in pending_cpus, go ahead. */
+ }
+ qemu_mutex_unlock(&qemu_cpu_list_mutex);
+ }
}
/* Mark cpu as not executing, and release pending exclusive ops. */
void cpu_exec_end(CPUState *cpu)
{
- qemu_mutex_lock(&qemu_cpu_list_mutex);
cpu->running = false;
- if (pending_cpus > 1) {
- pending_cpus--;
- if (pending_cpus == 1) {
- qemu_cond_signal(&exclusive_cond);
+
+ /* Write cpu->running before reading pending_cpus. */
+ smp_mb();
+
+ /* 1. start_exclusive saw cpu->running == true. Then it will increment
+ * pending_cpus and wait for exclusive_cond. After taking the lock
+ * we'll see cpu->has_waiter == true.
+ *
+ * 2. start_exclusive saw cpu->running == false but here pending_cpus >= 1.
+ * This includes the case when an exclusive item is running now.
+ * Then we'll see cpu->has_waiter == false and not touch pending_cpus,
+ * but will run exclusive_idle to wait for the item to complete.
+ *
+ * 3. pending_cpu == 0. Then start_exclusive is definitely going to
+ * see cpu->running == false, and it can ignore this CPU until the
+ * next cpu_exec_start.
+ */
+ if (pending_cpus) {
+ qemu_mutex_lock(&qemu_cpu_list_mutex);
+ if (cpu->has_waiter) {
+ cpu->has_waiter = false;
+ if (--pending_cpus == 1) {
+ qemu_cond_signal(&exclusive_cond);
+ }
+ exclusive_idle();
}
+ qemu_mutex_unlock(&qemu_cpu_list_mutex);
}
- exclusive_idle();
- qemu_mutex_unlock(&qemu_cpu_list_mutex);
}
static void async_safe_run_on_cpu_fn(CPUState *cpu, void *data)
diff --git a/docs/tcg-exclusive.promela b/docs/tcg-exclusive.promela
new file mode 100644
index 0000000..293b530
--- /dev/null
+++ b/docs/tcg-exclusive.promela
@@ -0,0 +1,224 @@
+/*
+ * This model describes the implementation of exclusive sections in
+ * cpus-common.c (start_exclusive, end_exclusive, cpu_exec_start,
+ * cpu_exec_end).
+ *
+ * Author: Paolo Bonzini <address@hidden>
+ *
+ * This file is in the public domain. If you really want a license,
+ * the WTFPL will do.
+ *
+ * To verify it:
+ * spin -a docs/event.promela
+ * ./a.out -a
+ *
+ * Tunable processor macros: N_CPUS, N_EXCLUSIVE, N_CYCLES, USE_MUTEX,
+ * TEST_EXPENSIVE.
+ */
+
+// Define the missing parameters for the model
+#ifndef N_CPUS
+#define N_CPUS 2
+#warning defaulting to 2 CPU processes
+#endif
+
+// the expensive test is not so expensive for <= 2 CPUs
+// If the mutex is used, it's also cheap (300 MB / 4 seconds) for 3 CPUs
+// For 3 CPUs and the lock-free option it needs 1.5 GB of RAM
+#if N_CPUS <= 2 || (N_CPUS <= 3 && defined USE_MUTEX)
+#define TEST_EXPENSIVE
+#endif
+
+#ifndef N_EXCLUSIVE
+# if !defined N_CYCLES || N_CYCLES <= 1 || defined TEST_EXPENSIVE
+# define N_EXCLUSIVE 2
+# warning defaulting to 2 concurrent exclusive sections
+# else
+# define N_EXCLUSIVE 1
+# warning defaulting to 1 concurrent exclusive sections
+# endif
+#endif
+#ifndef N_CYCLES
+# if N_EXCLUSIVE <= 1 || defined TEST_EXPENSIVE
+# define N_CYCLES 2
+# warning defaulting to 2 CPU cycles
+# else
+# define N_CYCLES 1
+# warning defaulting to 1 CPU cycles
+# endif
+#endif
+
+
+// synchronization primitives. condition variables require a
+// process-local "cond_t saved;" variable.
+
+#define mutex_t byte
+#define MUTEX_LOCK(m) atomic { m == 0 -> m = 1 }
+#define MUTEX_UNLOCK(m) m = 0
+
+#define cond_t int
+#define COND_WAIT(c, m) { \
+ saved = c; \
+ MUTEX_UNLOCK(m); \
+ c != saved -> MUTEX_LOCK(m); \
+ }
+#define COND_BROADCAST(c) c++
+
+// this is the logic from cpus-common.c
+
+mutex_t mutex;
+cond_t exclusive_cond;
+cond_t exclusive_resume;
+byte pending_cpus;
+
+byte running[N_CPUS];
+byte has_waiter[N_CPUS];
+
+#define exclusive_idle() \
+ do \
+ :: pending_cpus -> COND_WAIT(exclusive_resume, mutex); \
+ :: else -> break; \
+ od
+
+#define start_exclusive() \
+ MUTEX_LOCK(mutex); \
+ exclusive_idle(); \
+ pending_cpus = 1; \
+ \
+ i = 0; \
+ do \
+ :: i < N_CPUS -> { \
+ if \
+ :: running[i] -> has_waiter[i] = 1; pending_cpus++; \
+ :: else -> skip; \
+ fi; \
+ i++; \
+ } \
+ :: else -> break; \
+ od; \
+ \
+ do \
+ :: pending_cpus > 1 -> COND_WAIT(exclusive_cond, mutex); \
+ :: else -> break; \
+ od;
+
+#define end_exclusive() \
+ pending_cpus = 0; \
+ COND_BROADCAST(exclusive_resume); \
+ MUTEX_UNLOCK(mutex);
+
+#ifdef USE_MUTEX
+// Simple version using mutexes
+#define cpu_exec_start(id) \
+ MUTEX_LOCK(mutex); \
+ exclusive_idle(); \
+ running[id] = 1; \
+ MUTEX_UNLOCK(mutex);
+
+#define cpu_exec_end(id) \
+ MUTEX_LOCK(mutex); \
+ running[id] = 0; \
+ if \
+ :: pending_cpus -> { \
+ pending_cpus--; \
+ if \
+ :: pending_cpus == 1 -> COND_BROADCAST(exclusive_cond); \
+ :: else -> skip; \
+ fi; \
+ exclusive_idle(); \
+ } \
+ :: else -> skip; \
+ fi; \
+ MUTEX_UNLOCK(mutex);
+#else
+// Wait-free fast path, only needs mutex when concurrent with
+// an exclusive section
+#define cpu_exec_start(id) \
+ running[id] = 1; \
+ if \
+ :: pending_cpus -> { \
+ MUTEX_LOCK(mutex); \
+ if \
+ :: !has_waiter[id] -> { \
+ running[id] = 0; \
+ exclusive_idle(); \
+ running[id] = 1; \
+ } \
+ :: else -> skip; \
+ fi; \
+ MUTEX_UNLOCK(mutex); \
+ } \
+ :: else -> skip; \
+ fi;
+
+#define cpu_exec_end(id) \
+ running[id] = 0; \
+ if \
+ :: pending_cpus -> { \
+ MUTEX_LOCK(mutex); \
+ if \
+ :: has_waiter[id] -> { \
+ has_waiter[id] = 0; \
+ pending_cpus--; \
+ if \
+ :: pending_cpus == 1 ->
COND_BROADCAST(exclusive_cond); \
+ :: else -> skip; \
+ fi; \
+ exclusive_idle(); \
+ } \
+ :: else -> skip; \
+ fi; \
+ MUTEX_UNLOCK(mutex); \
+ } \
+ :: else -> skip; \
+ fi
+#endif
+
+// Promela processes
+
+byte done_cpu;
+byte in_cpu;
+active[N_CPUS] proctype cpu()
+{
+ byte id = _pid % N_CPUS;
+ byte cycles = 0;
+ cond_t saved;
+
+ do
+ :: cycles == N_CYCLES -> break;
+ :: else -> {
+ cycles++;
+ cpu_exec_start(id)
+ in_cpu++;
+ done_cpu++;
+ in_cpu--;
+ cpu_exec_end(id)
+ }
+ od;
+}
+
+byte done_exclusive;
+byte in_exclusive;
+active[N_EXCLUSIVE] proctype exclusive()
+{
+ cond_t saved;
+ byte i;
+
+ start_exclusive();
+ in_exclusive = 1;
+ done_exclusive++;
+ in_exclusive = 0;
+ end_exclusive();
+}
+
+#define LIVENESS (done_cpu == N_CPUS * N_CYCLES && done_exclusive ==
N_EXCLUSIVE)
+#define SAFETY !(in_exclusive && in_cpu)
+
+never { /* ! ([] SAFETY && <> [] LIVENESS) */
+ do
+ // once the liveness property is satisfied, this is not executable
+ // and the never clause is not accepted
+ :: ! LIVENESS -> accept_liveness: skip
+ :: 1 -> assert(SAFETY)
+ od;
+}
diff --git a/include/qom/cpu.h b/include/qom/cpu.h
index 3817a98..14cf97c 100644
--- a/include/qom/cpu.h
+++ b/include/qom/cpu.h
@@ -249,7 +249,8 @@ struct qemu_work_item {
* @nr_threads: Number of threads within this CPU.
* @numa_node: NUMA node this CPU is belonging to.
* @host_tid: Host thread ID.
- * @running: #true if CPU is currently running;
+ * @running: #true if CPU is currently running (lockless).
+ * @has_waiter: #true if a CPU is currently waiting for the cpu_exec_end;
* valid under cpu_list_lock.
* @created: Indicates whether the CPU thread has been successfully created.
* @interrupt_request: Indicates a pending interrupt request.
@@ -303,7 +304,7 @@ struct CPUState {
#endif
int thread_id;
uint32_t host_tid;
- bool running;
+ bool running, has_waiter;
struct QemuCond *halt_cond;
bool thread_kicked;
bool created;
--
2.7.4
- [Qemu-devel] [PATCH 02/12] cpus: Move common code out of {async_, }run_on_cpu(), (continued)
- [Qemu-devel] [PATCH 02/12] cpus: Move common code out of {async_, }run_on_cpu(), Paolo Bonzini, 2016/09/01
- [Qemu-devel] [PATCH 01/12] cpus: pass CPUState to run_on_cpu helpers, Paolo Bonzini, 2016/09/01
- [Qemu-devel] [PATCH 08/12] cpus-common: move exclusive work infrastructure from, Paolo Bonzini, 2016/09/01
- [Qemu-devel] [PATCH 03/12] cpus: Rename flush_queued_work(), Paolo Bonzini, 2016/09/01
- [Qemu-devel] [PATCH 07/12] cpus-common: move CPU work item management to common, Paolo Bonzini, 2016/09/01
- [Qemu-devel] [PATCH 06/12] cpus-common: move CPU list management to common code, Paolo Bonzini, 2016/09/01
- [Qemu-devel] [PATCH 12/12] cpus-common: lock-free fast path for cpu_exec_start/end,
Paolo Bonzini <=
- [Qemu-devel] [PATCH 11/12] tcg: Make tb_flush() thread safe, Paolo Bonzini, 2016/09/01
- [Qemu-devel] [PATCH 09/12] cpus-common: always defer async_run_on_cpu work items, Paolo Bonzini, 2016/09/01
- [Qemu-devel] [PATCH 10/12] cpus-common: Introduce async_safe_run_on_cpu(), Paolo Bonzini, 2016/09/01
- Re: [Qemu-devel] [PATCH 10/12] cpus-common: Introduce async_safe_run_on_cpu(), Pranith Kumar, 2016/09/12