qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Qemu-devel] [mttcg RFC v4 4/6] softmmu_llsc_template.h: move to multith


From: Alvise Rigo
Subject: [Qemu-devel] [mttcg RFC v4 4/6] softmmu_llsc_template.h: move to multithreading
Date: Fri, 14 Aug 2015 17:55:30 +0200

Update the TCG LL/SC helpers to work in multi-threading.
The basic idea remains untouched, but the whole mechanism takes now into
account of the multiple, concurrent, vCPUs execution.

In essence, if a vCPU does a LL it checks the vCPUs that have not the
excl bit set for the accessed page. For those vCPUs it then:
- sets the excl bit
- queries a TLB flush

Doing so, we make sure that all the vCPUs will have the EXCL flag in the
TLB entry for that specific page *before* entering the next TB

Changes from v3:
- The rendez-vous mechanism has been removed since the reworked
  TLB flush query addresses the same purpose.

Suggested-by: Jani Kokkonen <address@hidden>
Suggested-by: Claudio Fontana <address@hidden>
Signed-off-by: Alvise Rigo <address@hidden>
---
 cputlb.c                |  4 ++++
 softmmu_llsc_template.h | 59 ++++++++++++++++++++++++++++++++++++-------------
 2 files changed, 48 insertions(+), 15 deletions(-)

diff --git a/cputlb.c b/cputlb.c
index 7cbaaca..08949df 100644
--- a/cputlb.c
+++ b/cputlb.c
@@ -36,6 +36,10 @@
 /* statistics */
 int tlb_flush_count;
 
+/* For atomic instruction handling. */
+int exit_flush_request = 0;
+QemuMutex tcg_excl_access_lock;
+
 /* NOTE:
  * If flush_global is true (the usual case), flush all tlb entries.
  * If flush_global is false, flush (at least) all tlb entries not
diff --git a/softmmu_llsc_template.h b/softmmu_llsc_template.h
index d2e92b4..9486385 100644
--- a/softmmu_llsc_template.h
+++ b/softmmu_llsc_template.h
@@ -33,25 +33,39 @@
 
 #define helper_ldlink_name  glue(glue(helper_be_ldlink, USUFFIX), MMUSUFFIX)
 #define helper_stcond_name  glue(glue(helper_be_stcond, SUFFIX), MMUSUFFIX)
-#define helper_ld_legacy glue(glue(helper_be_ld, USUFFIX), MMUSUFFIX)
-#define helper_st_legacy glue(glue(helper_be_st, SUFFIX), MMUSUFFIX)
+#define helper_ld glue(glue(helper_be_ld, USUFFIX), MMUSUFFIX)
+#define helper_st glue(glue(helper_be_st, SUFFIX), MMUSUFFIX)
 
 #else /* LE helpers + 8bit helpers (generated only once for both LE end BE) */
 
 #if DATA_SIZE > 1
 #define helper_ldlink_name  glue(glue(helper_le_ldlink, USUFFIX), MMUSUFFIX)
 #define helper_stcond_name  glue(glue(helper_le_stcond, SUFFIX), MMUSUFFIX)
-#define helper_ld_legacy glue(glue(helper_le_ld, USUFFIX), MMUSUFFIX)
-#define helper_st_legacy glue(glue(helper_le_st, SUFFIX), MMUSUFFIX)
+#define helper_ld glue(glue(helper_le_ld, USUFFIX), MMUSUFFIX)
+#define helper_st glue(glue(helper_le_st, SUFFIX), MMUSUFFIX)
 #else /* DATA_SIZE <= 1 */
 #define helper_ldlink_name  glue(glue(helper_ret_ldlink, USUFFIX), MMUSUFFIX)
 #define helper_stcond_name  glue(glue(helper_ret_stcond, SUFFIX), MMUSUFFIX)
-#define helper_ld_legacy glue(glue(helper_ret_ld, USUFFIX), MMUSUFFIX)
-#define helper_st_legacy glue(glue(helper_ret_st, SUFFIX), MMUSUFFIX)
+#define helper_ld glue(glue(helper_ret_ld, USUFFIX), MMUSUFFIX)
+#define helper_st glue(glue(helper_ret_st, SUFFIX), MMUSUFFIX)
 #endif
 
 #endif
 
+#define is_read_tlb_entry_set(env, page, index)                              \
+({                                                                           \
+    (addr & TARGET_PAGE_MASK)                                                \
+         == ((env->tlb_table[mmu_idx][index].addr_read) &                    \
+                 (TARGET_PAGE_MASK | TLB_INVALID_MASK));                     \
+})
+/* Whenever a SC operation fails, we add a small delay to reduce the
+ * concurrency among the atomic instruction emulation code. Without this delay,
+ * in very congested situation where plain stores make all the pending LLs
+ * fail, the code could reach a stalling situation in which all the SCs happen
+ * to fail.
+ * */
+#define TCG_ATOMIC_INSN_EMUL_DELAY 100
+
 WORD_TYPE helper_ldlink_name(CPUArchState *env, target_ulong addr,
                                 TCGMemOpIdx oi, uintptr_t retaddr)
 {
@@ -61,11 +75,13 @@ WORD_TYPE helper_ldlink_name(CPUArchState *env, 
target_ulong addr,
     hwaddr hw_addr;
     unsigned mmu_idx = get_mmuidx(oi);
 
-    /* Use the proper load helper from cpu_ldst.h */
-    ret = helper_ld_legacy(env, addr, mmu_idx, retaddr);
-
     index = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
 
+    if (!is_read_tlb_entry_set(env, addr, index) ||
+                        !VICTIM_TLB_HIT(addr_read)) {
+        tlb_fill(ENV_GET_CPU(env), addr, READ_ACCESS_TYPE, mmu_idx, retaddr);
+    }
+
     /* hw_addr = hwaddr of the page (i.e. section->mr->ram_addr + xlat)
      * plus the offset (i.e. addr & ~TARGET_PAGE_MASK) */
     hw_addr = (env->iotlb[mmu_idx][index].addr & TARGET_PAGE_MASK) + addr;
@@ -73,22 +89,34 @@ WORD_TYPE helper_ldlink_name(CPUArchState *env, 
target_ulong addr,
     cpu_physical_memory_clear_excl_dirty(hw_addr, ENV_GET_CPU(env)->cpu_index);
     /* If all the vCPUs have the EXCL bit set for this page there is no need
      * to request any flush. */
-    if (cpu_physical_memory_excl_is_dirty(hw_addr, smp_cpus)) {
+    if (unlikely(!atomic_xchg(&exit_flush_request, 1) &&
+        cpu_physical_memory_excl_is_dirty(hw_addr, smp_cpus))) {
         CPU_FOREACH(cpu) {
-            if (current_cpu != cpu) {
+            if (cpu->thread_id != qemu_get_thread_id()) {
                 if (cpu_physical_memory_excl_is_dirty(hw_addr,
                                                     cpu->cpu_index)) {
                     cpu_physical_memory_clear_excl_dirty(hw_addr,
                                                          cpu->cpu_index);
-                    tlb_flush(cpu, 1);
+                    tlb_query_flush_cpu(cpu, 1);
                 }
             }
         }
+
+        atomic_set(&exit_flush_request, 0);
     }
 
+    env->ll_sc_context = true;
+
+    qemu_mutex_lock(&tcg_excl_access_lock);
+
+    /* Use the proper load helper from cpu_ldst.h */
+    ret = helper_ld(env, addr, mmu_idx, retaddr);
+
     env->excl_protected_range.begin = hw_addr;
     env->excl_protected_range.end = hw_addr + DATA_SIZE;
 
+    qemu_mutex_unlock(&tcg_excl_access_lock);
+
     /* For this vCPU, just update the TLB entry, no need to flush. */
     env->tlb_table[mmu_idx][index].addr_write |= TLB_EXCL;
 
@@ -106,12 +134,13 @@ WORD_TYPE helper_stcond_name(CPUArchState *env, 
target_ulong addr,
      * access as one made by the store conditional wrapper. If the store
      * conditional does not succeed, the value will be set to 0.*/
     env->excl_succeeded = 1;
-    helper_st_legacy(env, addr, val, mmu_idx, retaddr);
+    helper_st(env, addr, val, mmu_idx, retaddr);
 
     if (env->excl_succeeded) {
         env->excl_succeeded = 0;
         ret = 0;
     } else {
+        g_usleep(TCG_ATOMIC_INSN_EMUL_DELAY);
         ret = 1;
     }
 
@@ -120,5 +149,5 @@ WORD_TYPE helper_stcond_name(CPUArchState *env, 
target_ulong addr,
 
 #undef helper_ldlink_name
 #undef helper_stcond_name
-#undef helper_ld_legacy
-#undef helper_st_legacy
+#undef helper_ld
+#undef helper_st
-- 
2.5.0




reply via email to

[Prev in Thread] Current Thread [Next in Thread]