qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Qemu-devel] [RFC 7/8] cpu-exec-common: Introduce async_safe_run_on_cpu(


From: Sergey Fedorov
Subject: [Qemu-devel] [RFC 7/8] cpu-exec-common: Introduce async_safe_run_on_cpu()
Date: Mon, 20 Jun 2016 01:28:32 +0300

From: Sergey Fedorov <address@hidden>

This patch is based on the ideas found in work of KONRAD Frederic [1],
Alex Bennée [2], and Alvise Rigo [3].

This mechanism allows to perform an operation safely in a quiescent
state. Quiescent state means: (1) no vCPU is running and (2) BQL in
system-mode or 'exclusive_lock' in user-mode emulation is held while
performing the operation. This functionality is required e.g. for
performing translation buffer flush safely in multi-threaded user-mode
emulation.

The existing CPU work queue is used to schedule such safe operations. A
new 'safe' flag is added into struct qemu_work_item to designate the
special requirements of the safe work. An operation in a quiescent sate
can be scheduled by using async_safe_run_on_cpu() function which is
actually the same as sync_run_on_cpu() except that it marks the queued
work item with the 'safe' flag set to true. Given this flag set
queue_work_on_cpu() atomically increments 'safe_work_pending' global
counter and kicks all the CPUs instead of just the target CPU as in case
of normal CPU work. This allows to force other CPUs to exit their
execution loops and wait in wait_safe_cpu_work() function for the safe
work to finish. When a CPU drains its work queue, if it encounters a
work item marked as safe, it first waits for other CPUs to exit their
execution loops, then called the work item function, and finally
decrements 'safe_work_pending' counter with signalling other CPUs to let
them continue execution as soon as all pending safe work items have been
processed. The 'tcg_pending_cpus' protected by 'exclusive_lock' in
user-mode or by 'qemu_global_mutex' in system-mode emulation is used to
determine if there is any CPU run and wait for it to exit the execution
loop. The fairness of all the CPU work queues is ensured by draining all
the pending safe work items before any CPU can run.

[1] http://lists.nongnu.org/archive/html/qemu-devel/2015-08/msg01128.html
[2] http://lists.nongnu.org/archive/html/qemu-devel/2016-04/msg02531.html
[3] http://lists.nongnu.org/archive/html/qemu-devel/2016-05/msg04792.html

Signed-off-by: Sergey Fedorov <address@hidden>
Signed-off-by: Sergey Fedorov <address@hidden>
---
 cpu-exec-common.c       | 45 ++++++++++++++++++++++++++++++++++++++++++++-
 cpus.c                  | 16 ++++++++++++++++
 include/exec/exec-all.h |  2 ++
 include/qom/cpu.h       | 14 ++++++++++++++
 linux-user/main.c       |  2 +-
 5 files changed, 77 insertions(+), 2 deletions(-)

diff --git a/cpu-exec-common.c b/cpu-exec-common.c
index 8184e0662cbd..3056324738f8 100644
--- a/cpu-exec-common.c
+++ b/cpu-exec-common.c
@@ -25,6 +25,7 @@
 
 bool exit_request;
 CPUState *tcg_current_cpu;
+int tcg_pending_cpus;
 
 /* exit the current TB, but without causing any exception to be raised */
 void cpu_loop_exit_noexc(CPUState *cpu)
@@ -78,6 +79,15 @@ void cpu_loop_exit_restore(CPUState *cpu, uintptr_t pc)
     siglongjmp(cpu->jmp_env, 1);
 }
 
+static int safe_work_pending;
+
+void wait_safe_cpu_work(void)
+{
+    while (atomic_mb_read(&safe_work_pending) > 0) {
+        wait_cpu_work();
+    }
+}
+
 static void queue_work_on_cpu(CPUState *cpu, struct qemu_work_item *wi)
 {
     qemu_mutex_lock(&cpu->work_mutex);
@@ -89,9 +99,18 @@ static void queue_work_on_cpu(CPUState *cpu, struct 
qemu_work_item *wi)
     cpu->queued_work_last = wi;
     wi->next = NULL;
     wi->done = false;
+    if (wi->safe) {
+        atomic_inc(&safe_work_pending);
+    }
     qemu_mutex_unlock(&cpu->work_mutex);
 
-    qemu_cpu_kick(cpu);
+    if (!wi->safe) {
+        qemu_cpu_kick(cpu);
+    } else {
+        CPU_FOREACH(cpu) {
+            qemu_cpu_kick(cpu);
+        }
+    }
 }
 
 void run_on_cpu(CPUState *cpu, run_on_cpu_func func, void *data)
@@ -106,6 +125,7 @@ void run_on_cpu(CPUState *cpu, run_on_cpu_func func, void 
*data)
     wi.func = func;
     wi.data = data;
     wi.free = false;
+    wi.safe = false;
 
     queue_work_on_cpu(cpu, &wi);
     while (!atomic_mb_read(&wi.done)) {
@@ -129,6 +149,20 @@ void async_run_on_cpu(CPUState *cpu, run_on_cpu_func func, 
void *data)
     wi->func = func;
     wi->data = data;
     wi->free = true;
+    wi->safe = false;
+
+    queue_work_on_cpu(cpu, wi);
+}
+
+void async_safe_run_on_cpu(CPUState *cpu, run_on_cpu_func func, void *data)
+{
+    struct qemu_work_item *wi;
+
+    wi = g_malloc0(sizeof(struct qemu_work_item));
+    wi->func = func;
+    wi->data = data;
+    wi->free = true;
+    wi->safe = true;
 
     queue_work_on_cpu(cpu, wi);
 }
@@ -148,9 +182,18 @@ void flush_queued_work(CPUState *cpu)
         if (!cpu->queued_work_first) {
             cpu->queued_work_last = NULL;
         }
+        if (wi->safe) {
+            while (tcg_pending_cpus) {
+                wait_cpu_work();
+            }
+        }
         qemu_mutex_unlock(&cpu->work_mutex);
         wi->func(cpu, wi->data);
         qemu_mutex_lock(&cpu->work_mutex);
+        if (wi->safe) {
+            atomic_dec(&safe_work_pending);
+            signal_cpu_work();
+        }
         if (wi->free) {
             g_free(wi);
         } else {
diff --git a/cpus.c b/cpus.c
index 98f60f6f98f5..bb6bd8615cfc 100644
--- a/cpus.c
+++ b/cpus.c
@@ -932,6 +932,18 @@ static void qemu_tcg_destroy_vcpu(CPUState *cpu)
 {
 }
 
+static void tcg_cpu_exec_start(CPUState *cpu)
+{
+    tcg_pending_cpus++;
+}
+
+static void tcg_cpu_exec_end(CPUState *cpu)
+{
+    if (--tcg_pending_cpus) {
+        signal_cpu_work();
+    }
+}
+
 static void qemu_wait_io_event_common(CPUState *cpu)
 {
     if (cpu->stop) {
@@ -956,6 +968,8 @@ static void qemu_tcg_wait_io_event(CPUState *cpu)
     CPU_FOREACH(cpu) {
         qemu_wait_io_event_common(cpu);
     }
+
+    wait_safe_cpu_work();
 }
 
 static void qemu_kvm_wait_io_event(CPUState *cpu)
@@ -1491,7 +1505,9 @@ static void tcg_exec_all(void)
                           (cpu->singlestep_enabled & SSTEP_NOTIMER) == 0);
 
         if (cpu_can_run(cpu)) {
+            tcg_cpu_exec_start(cpu);
             r = tcg_cpu_exec(cpu);
+            tcg_cpu_exec_end(cpu);
             if (r == EXCP_DEBUG) {
                 cpu_handle_guest_debug(cpu);
                 break;
diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h
index 23b4b50e0a45..3bc44ed81473 100644
--- a/include/exec/exec-all.h
+++ b/include/exec/exec-all.h
@@ -405,10 +405,12 @@ extern int singlestep;
 
 /* cpu-exec.c, accessed with atomic_mb_read/atomic_mb_set */
 extern CPUState *tcg_current_cpu;
+extern int tcg_pending_cpus;
 extern bool exit_request;
 
 void wait_cpu_work(void);
 void signal_cpu_work(void);
 void flush_queued_work(CPUState *cpu);
+void wait_safe_cpu_work(void);
 
 #endif
diff --git a/include/qom/cpu.h b/include/qom/cpu.h
index 4e688f645b4a..5128fcc1745a 100644
--- a/include/qom/cpu.h
+++ b/include/qom/cpu.h
@@ -231,6 +231,7 @@ struct qemu_work_item {
     void *data;
     int done;
     bool free;
+    bool safe;
 };
 
 /**
@@ -625,6 +626,19 @@ void run_on_cpu(CPUState *cpu, run_on_cpu_func func, void 
*data);
 void async_run_on_cpu(CPUState *cpu, run_on_cpu_func func, void *data);
 
 /**
+ * async_safe_run_on_cpu:
+ * @cpu: The vCPU to run on.
+ * @func: The function to be executed.
+ * @data: Data to pass to the function.
+ *
+ * Schedules the function @func for execution on the vCPU @cpu asynchronously
+ * and in quiescent state. Quiescent state means: (1) all other vCPUs are
+ * halted and (2) #qemu_global_mutex (a.k.a. BQL) in system-mode or
+ * #exclusive_lock in user-mode emulation is held while @func is executing.
+ */
+void async_safe_run_on_cpu(CPUState *cpu, run_on_cpu_func func, void *data);
+
+/**
  * qemu_get_cpu:
  * @index: The address@hidden value of the CPU to obtain.
  *
diff --git a/linux-user/main.c b/linux-user/main.c
index 5a68651159c2..6da3bb32186b 100644
--- a/linux-user/main.c
+++ b/linux-user/main.c
@@ -113,7 +113,6 @@ static pthread_cond_t exclusive_cond = 
PTHREAD_COND_INITIALIZER;
 static pthread_cond_t exclusive_resume = PTHREAD_COND_INITIALIZER;
 static pthread_cond_t work_cond = PTHREAD_COND_INITIALIZER;
 static bool exclusive_pending;
-static int tcg_pending_cpus;
 
 /* Make sure everything is in a consistent state for calling fork().  */
 void fork_start(void)
@@ -219,6 +218,7 @@ static inline void cpu_exec_end(CPUState *cpu)
     }
     exclusive_idle();
     flush_queued_work(cpu);
+    wait_safe_cpu_work();
     pthread_mutex_unlock(&exclusive_lock);
 }
 
-- 
1.9.1




reply via email to

[Prev in Thread] Current Thread [Next in Thread]