qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Qemu-devel] [PATCH V5 4/6] cpu_exec: Add sleeping algorithm


From: Sebastian Tanase
Subject: [Qemu-devel] [PATCH V5 4/6] cpu_exec: Add sleeping algorithm
Date: Fri, 25 Jul 2014 11:56:31 +0200

The goal is to sleep qemu whenever the guest clock
is in advance compared to the host clock (we use
the monotonic clocks). The amount of time to sleep
is calculated in the execution loop in cpu_exec.

At first, we tried to approximate at each for loop the real time elapsed
while searching for a TB (generating or retrieving from cache) and
executing it. We would then approximate the virtual time corresponding
to the number of virtual instructions executed. The difference between
these 2 values would allow us to know if the guest is in advance or delayed.
However, the function used for measuring the real time
(qemu_clock_get_ns(QEMU_CLOCK_REALTIME)) proved to be very expensive.
We had an added overhead of 13% of the total run time.

Therefore, we modified the algorithm and only take into account the
difference between the 2 clocks at the begining of the cpu_exec function.
During the for loop we try to reduce the advance of the guest only by
computing the virtual time elapsed and sleeping if necessary. The overhead
is thus reduced to 3%. Even though this method still has a noticeable
overhead, it no longer is a bottleneck in trying to achieve a better
guest frequency for which the guest clock is faster than the host one.

As for the the alignement of the 2 clocks, with the first algorithm
the guest clock was oscillating between -1 and 1ms compared to the host clock.
Using the second algorithm we notice that the guest is 5ms behind the host, 
which
is still acceptable for our use case.

The tests where conducted using fio and stress. The host machine in an i5 CPU at
3.10GHz running Debian Jessie (kernel 3.12). The guest machine is an arm 
versatile-pb
built with buildroot.

Currently, on our test machine, the lowest icount we can achieve that is 
suitable for
aligning the 2 clocks is 6. However, we observe that the IO tests (using fio) 
are
slower than the cpu tests (using stress).

Signed-off-by: Sebastian Tanase <address@hidden>
Tested-by: Camille Bégué <address@hidden>
Signed-off-by: Paolo Bonzini <address@hidden>
---
 cpu-exec.c           | 91 ++++++++++++++++++++++++++++++++++++++++++++++++++++
 cpus.c               | 17 ++++++++++
 include/qemu/timer.h |  1 +
 3 files changed, 109 insertions(+)

diff --git a/cpu-exec.c b/cpu-exec.c
index 38e5f02..1a725b6 100644
--- a/cpu-exec.c
+++ b/cpu-exec.c
@@ -22,6 +22,84 @@
 #include "tcg.h"
 #include "qemu/atomic.h"
 #include "sysemu/qtest.h"
+#include "qemu/timer.h"
+
+/* -icount align implementation. */
+
+typedef struct SyncClocks {
+    int64_t diff_clk;
+    int64_t original_instr_counter;
+} SyncClocks;
+
+#if !defined(CONFIG_USER_ONLY)
+/* Allow the guest to have a max 3ms advance.
+ * The difference between the 2 clocks could therefore
+ * oscillate around 0.
+ */
+#define VM_CLOCK_ADVANCE 3000000
+
+static int64_t delay_host(int64_t diff_clk)
+{
+    if (diff_clk > VM_CLOCK_ADVANCE) {
+#ifndef _WIN32
+        struct timespec sleep_delay, rem_delay;
+        sleep_delay.tv_sec = diff_clk / 1000000000LL;
+        sleep_delay.tv_nsec = diff_clk % 1000000000LL;
+        if (nanosleep(&sleep_delay, &rem_delay) < 0) {
+            diff_clk -= (sleep_delay.tv_sec - rem_delay.tv_sec) * 1000000000LL;
+            diff_clk -= sleep_delay.tv_nsec - rem_delay.tv_nsec;
+        } else {
+            diff_clk = 0;
+        }
+#else
+        Sleep(diff_clk / SCALE_MS);
+        diff_clk = 0;
+#endif
+    }
+    return diff_clk;
+}
+
+static int64_t instr_to_vtime(int64_t instr_counter, const CPUState *cpu)
+{
+    int64_t instr_exec_time;
+    instr_exec_time = instr_counter -
+                      (cpu->icount_extra +
+                       cpu->icount_decr.u16.low);
+    instr_exec_time = instr_exec_time << icount_time_shift;
+
+    return instr_exec_time;
+}
+
+static void align_clocks(SyncClocks *sc, const CPUState *cpu)
+{
+    if (!icount_align_option) {
+        return;
+    }
+    sc->diff_clk += instr_to_vtime(sc->original_instr_counter, cpu);
+    sc->original_instr_counter = cpu->icount_extra + cpu->icount_decr.u16.low;
+    sc->diff_clk = delay_host(sc->diff_clk);
+}
+
+static void init_delay_params(SyncClocks *sc,
+                              const CPUState *cpu)
+{
+    if (!icount_align_option) {
+        return;
+    }
+    sc->diff_clk = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) -
+                   qemu_clock_get_ns(QEMU_CLOCK_REALTIME) +
+                   cpu_get_clock_offset();
+    sc->original_instr_counter = cpu->icount_extra + cpu->icount_decr.u16.low;
+}
+#else
+static void align_clocks(SyncClocks *sc, const CPUState *cpu)
+{
+}
+
+static void init_delay_params(SyncClocks *sc, const CPUState *cpu)
+{
+}
+#endif /* CONFIG USER ONLY */
 
 void cpu_loop_exit(CPUState *cpu)
 {
@@ -227,6 +305,8 @@ int cpu_exec(CPUArchState *env)
     TranslationBlock *tb;
     uint8_t *tc_ptr;
     uintptr_t next_tb;
+    SyncClocks sc;
+
     /* This must be volatile so it is not trashed by longjmp() */
     volatile bool have_tb_lock = false;
 
@@ -283,6 +363,13 @@ int cpu_exec(CPUArchState *env)
 #endif
     cpu->exception_index = -1;
 
+    /* Calculate difference between guest clock and host clock.
+     * This delay includes the delay of the last cycle, so
+     * what we have to do is sleep until it is 0. As for the
+     * advance/delay we gain here, we try to fix it next time.
+     */
+    init_delay_params(&sc, cpu);
+
     /* prepare setjmp context for exception handling */
     for(;;) {
         if (sigsetjmp(cpu->jmp_env, 0) == 0) {
@@ -672,6 +759,7 @@ int cpu_exec(CPUArchState *env)
                             if (insns_left > 0) {
                                 /* Execute remaining instructions.  */
                                 cpu_exec_nocache(env, insns_left, tb);
+                                align_clocks(&sc, cpu);
                             }
                             cpu->exception_index = EXCP_INTERRUPT;
                             next_tb = 0;
@@ -684,6 +772,9 @@ int cpu_exec(CPUArchState *env)
                     }
                 }
                 cpu->current_tb = NULL;
+                /* Try to align the host and virtual clocks
+                   if the guest is in advance */
+                align_clocks(&sc, cpu);
                 /* reset soft MMU for next block (it can currently
                    only be set by a memory fault) */
             } /* for(;;) */
diff --git a/cpus.c b/cpus.c
index de0a8b2..fcc5308 100644
--- a/cpus.c
+++ b/cpus.c
@@ -218,6 +218,23 @@ int64_t cpu_get_clock(void)
     return ti;
 }
 
+/* return the offset between the host clock and virtual CPU clock */
+int64_t cpu_get_clock_offset(void)
+{
+    int64_t ti;
+    unsigned start;
+
+    do {
+        start = seqlock_read_begin(&timers_state.vm_clock_seqlock);
+        ti = timers_state.cpu_clock_offset;
+        if (!timers_state.cpu_ticks_enabled) {
+            ti -= get_clock();
+        }
+    } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, start));
+
+    return -ti;
+}
+
 /* enable cpu_get_ticks()
  * Caller must hold BQL which server as mutex for vm_clock_seqlock.
  */
diff --git a/include/qemu/timer.h b/include/qemu/timer.h
index 7f9a074..102f442 100644
--- a/include/qemu/timer.h
+++ b/include/qemu/timer.h
@@ -745,6 +745,7 @@ static inline int64_t get_clock(void)
 /* icount */
 int64_t cpu_get_icount(void);
 int64_t cpu_get_clock(void);
+int64_t cpu_get_clock_offset(void);
 
 /*******************************************/
 /* host CPU ticks (if available) */
-- 
2.0.0.rc2




reply via email to

[Prev in Thread] Current Thread [Next in Thread]