[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[Qemu-devel] [mttcg RFC v4 4/6] softmmu_llsc_template.h: move to multith
From: |
Alvise Rigo |
Subject: |
[Qemu-devel] [mttcg RFC v4 4/6] softmmu_llsc_template.h: move to multithreading |
Date: |
Fri, 14 Aug 2015 17:55:30 +0200 |
Update the TCG LL/SC helpers to work in multi-threading.
The basic idea remains untouched, but the whole mechanism takes now into
account of the multiple, concurrent, vCPUs execution.
In essence, if a vCPU does a LL it checks the vCPUs that have not the
excl bit set for the accessed page. For those vCPUs it then:
- sets the excl bit
- queries a TLB flush
Doing so, we make sure that all the vCPUs will have the EXCL flag in the
TLB entry for that specific page *before* entering the next TB
Changes from v3:
- The rendez-vous mechanism has been removed since the reworked
TLB flush query addresses the same purpose.
Suggested-by: Jani Kokkonen <address@hidden>
Suggested-by: Claudio Fontana <address@hidden>
Signed-off-by: Alvise Rigo <address@hidden>
---
cputlb.c | 4 ++++
softmmu_llsc_template.h | 59 ++++++++++++++++++++++++++++++++++++-------------
2 files changed, 48 insertions(+), 15 deletions(-)
diff --git a/cputlb.c b/cputlb.c
index 7cbaaca..08949df 100644
--- a/cputlb.c
+++ b/cputlb.c
@@ -36,6 +36,10 @@
/* statistics */
int tlb_flush_count;
+/* For atomic instruction handling. */
+int exit_flush_request = 0;
+QemuMutex tcg_excl_access_lock;
+
/* NOTE:
* If flush_global is true (the usual case), flush all tlb entries.
* If flush_global is false, flush (at least) all tlb entries not
diff --git a/softmmu_llsc_template.h b/softmmu_llsc_template.h
index d2e92b4..9486385 100644
--- a/softmmu_llsc_template.h
+++ b/softmmu_llsc_template.h
@@ -33,25 +33,39 @@
#define helper_ldlink_name glue(glue(helper_be_ldlink, USUFFIX), MMUSUFFIX)
#define helper_stcond_name glue(glue(helper_be_stcond, SUFFIX), MMUSUFFIX)
-#define helper_ld_legacy glue(glue(helper_be_ld, USUFFIX), MMUSUFFIX)
-#define helper_st_legacy glue(glue(helper_be_st, SUFFIX), MMUSUFFIX)
+#define helper_ld glue(glue(helper_be_ld, USUFFIX), MMUSUFFIX)
+#define helper_st glue(glue(helper_be_st, SUFFIX), MMUSUFFIX)
#else /* LE helpers + 8bit helpers (generated only once for both LE end BE) */
#if DATA_SIZE > 1
#define helper_ldlink_name glue(glue(helper_le_ldlink, USUFFIX), MMUSUFFIX)
#define helper_stcond_name glue(glue(helper_le_stcond, SUFFIX), MMUSUFFIX)
-#define helper_ld_legacy glue(glue(helper_le_ld, USUFFIX), MMUSUFFIX)
-#define helper_st_legacy glue(glue(helper_le_st, SUFFIX), MMUSUFFIX)
+#define helper_ld glue(glue(helper_le_ld, USUFFIX), MMUSUFFIX)
+#define helper_st glue(glue(helper_le_st, SUFFIX), MMUSUFFIX)
#else /* DATA_SIZE <= 1 */
#define helper_ldlink_name glue(glue(helper_ret_ldlink, USUFFIX), MMUSUFFIX)
#define helper_stcond_name glue(glue(helper_ret_stcond, SUFFIX), MMUSUFFIX)
-#define helper_ld_legacy glue(glue(helper_ret_ld, USUFFIX), MMUSUFFIX)
-#define helper_st_legacy glue(glue(helper_ret_st, SUFFIX), MMUSUFFIX)
+#define helper_ld glue(glue(helper_ret_ld, USUFFIX), MMUSUFFIX)
+#define helper_st glue(glue(helper_ret_st, SUFFIX), MMUSUFFIX)
#endif
#endif
+#define is_read_tlb_entry_set(env, page, index) \
+({ \
+ (addr & TARGET_PAGE_MASK) \
+ == ((env->tlb_table[mmu_idx][index].addr_read) & \
+ (TARGET_PAGE_MASK | TLB_INVALID_MASK)); \
+})
+/* Whenever a SC operation fails, we add a small delay to reduce the
+ * concurrency among the atomic instruction emulation code. Without this delay,
+ * in very congested situation where plain stores make all the pending LLs
+ * fail, the code could reach a stalling situation in which all the SCs happen
+ * to fail.
+ * */
+#define TCG_ATOMIC_INSN_EMUL_DELAY 100
+
WORD_TYPE helper_ldlink_name(CPUArchState *env, target_ulong addr,
TCGMemOpIdx oi, uintptr_t retaddr)
{
@@ -61,11 +75,13 @@ WORD_TYPE helper_ldlink_name(CPUArchState *env,
target_ulong addr,
hwaddr hw_addr;
unsigned mmu_idx = get_mmuidx(oi);
- /* Use the proper load helper from cpu_ldst.h */
- ret = helper_ld_legacy(env, addr, mmu_idx, retaddr);
-
index = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
+ if (!is_read_tlb_entry_set(env, addr, index) ||
+ !VICTIM_TLB_HIT(addr_read)) {
+ tlb_fill(ENV_GET_CPU(env), addr, READ_ACCESS_TYPE, mmu_idx, retaddr);
+ }
+
/* hw_addr = hwaddr of the page (i.e. section->mr->ram_addr + xlat)
* plus the offset (i.e. addr & ~TARGET_PAGE_MASK) */
hw_addr = (env->iotlb[mmu_idx][index].addr & TARGET_PAGE_MASK) + addr;
@@ -73,22 +89,34 @@ WORD_TYPE helper_ldlink_name(CPUArchState *env,
target_ulong addr,
cpu_physical_memory_clear_excl_dirty(hw_addr, ENV_GET_CPU(env)->cpu_index);
/* If all the vCPUs have the EXCL bit set for this page there is no need
* to request any flush. */
- if (cpu_physical_memory_excl_is_dirty(hw_addr, smp_cpus)) {
+ if (unlikely(!atomic_xchg(&exit_flush_request, 1) &&
+ cpu_physical_memory_excl_is_dirty(hw_addr, smp_cpus))) {
CPU_FOREACH(cpu) {
- if (current_cpu != cpu) {
+ if (cpu->thread_id != qemu_get_thread_id()) {
if (cpu_physical_memory_excl_is_dirty(hw_addr,
cpu->cpu_index)) {
cpu_physical_memory_clear_excl_dirty(hw_addr,
cpu->cpu_index);
- tlb_flush(cpu, 1);
+ tlb_query_flush_cpu(cpu, 1);
}
}
}
+
+ atomic_set(&exit_flush_request, 0);
}
+ env->ll_sc_context = true;
+
+ qemu_mutex_lock(&tcg_excl_access_lock);
+
+ /* Use the proper load helper from cpu_ldst.h */
+ ret = helper_ld(env, addr, mmu_idx, retaddr);
+
env->excl_protected_range.begin = hw_addr;
env->excl_protected_range.end = hw_addr + DATA_SIZE;
+ qemu_mutex_unlock(&tcg_excl_access_lock);
+
/* For this vCPU, just update the TLB entry, no need to flush. */
env->tlb_table[mmu_idx][index].addr_write |= TLB_EXCL;
@@ -106,12 +134,13 @@ WORD_TYPE helper_stcond_name(CPUArchState *env,
target_ulong addr,
* access as one made by the store conditional wrapper. If the store
* conditional does not succeed, the value will be set to 0.*/
env->excl_succeeded = 1;
- helper_st_legacy(env, addr, val, mmu_idx, retaddr);
+ helper_st(env, addr, val, mmu_idx, retaddr);
if (env->excl_succeeded) {
env->excl_succeeded = 0;
ret = 0;
} else {
+ g_usleep(TCG_ATOMIC_INSN_EMUL_DELAY);
ret = 1;
}
@@ -120,5 +149,5 @@ WORD_TYPE helper_stcond_name(CPUArchState *env,
target_ulong addr,
#undef helper_ldlink_name
#undef helper_stcond_name
-#undef helper_ld_legacy
-#undef helper_st_legacy
+#undef helper_ld
+#undef helper_st
--
2.5.0
- [Qemu-devel] [mttcg RFC v4 0/6] Atomic slow-path for mttcg, Alvise Rigo, 2015/08/14
- [Qemu-devel] [mttcg RFC v4 4/6] softmmu_llsc_template.h: move to multithreading,
Alvise Rigo <=
- [Qemu-devel] [mttcg RFC v4 1/6] cpus: async_run_on_cpu: kick only if needed, Alvise Rigo, 2015/08/14
- [Qemu-devel] [mttcg RFC v4 2/6] cputlb: wrap tlb_flush with the a new function, Alvise Rigo, 2015/08/14
- [Qemu-devel] [mttcg RFC v4 3/6] exec: ram_addr: Fix exclusive bitmap accessor, Alvise Rigo, 2015/08/14
- [Qemu-devel] [mttcg RFC v4 6/6] target-arm: Use a runtime helper for excl accesses, Alvise Rigo, 2015/08/14
- [Qemu-devel] [mttcg RFC v4 5/6] softmmu_template.h: move to multithreading, Alvise Rigo, 2015/08/14