[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[Qemu-devel] [RFC 27/38] cpu-exec: convert tb_invalidated_flag into a pe
From: |
Emilio G. Cota |
Subject: |
[Qemu-devel] [RFC 27/38] cpu-exec: convert tb_invalidated_flag into a per-TB flag |
Date: |
Sun, 23 Aug 2015 20:23:56 -0400 |
This will allow us to safely look up TB's without taking any locks.
Note however that tb_lock protects the valid field, so if chaining
is an option then we'll have to acquire the lock.
Signed-off-by: Emilio G. Cota <address@hidden>
---
cpu-exec.c | 23 +++++++---------------
include/exec/exec-all.h | 3 +--
translate-all.c | 51 +++++++++++++++++--------------------------------
3 files changed, 25 insertions(+), 52 deletions(-)
diff --git a/cpu-exec.c b/cpu-exec.c
index 5ad578d..826ec25 100644
--- a/cpu-exec.c
+++ b/cpu-exec.c
@@ -239,9 +239,7 @@ static void cpu_exec_nocache(CPUState *cpu, int max_cycles,
tb_lock();
tb = tb_gen_code(cpu, orig_tb->pc, orig_tb->cs_base, orig_tb->flags,
max_cycles | CF_NOCACHE);
- tb->orig_tb = (atomic_mb_read(&tcg_ctx.tb_ctx.tb_invalidated_flag)
- ? NULL
- : orig_tb);
+ tb->orig_tb = orig_tb->valid ? orig_tb : NULL;
cpu->current_tb = tb;
tb_unlock();
@@ -268,8 +266,6 @@ static TranslationBlock *tb_find_physical(CPUState *cpu,
tb_page_addr_t phys_pc, phys_page1;
target_ulong virt_page2;
- atomic_mb_set(&tcg_ctx.tb_ctx.tb_invalidated_flag, 0);
-
/* find translated block using physical mappings */
phys_pc = get_page_addr_code(env, pc);
phys_page1 = phys_pc & TARGET_PAGE_MASK;
@@ -536,15 +532,6 @@ int cpu_exec(CPUState *cpu)
cpu_loop_exit(cpu);
}
tb = tb_find_fast(cpu);
- /* Note: we do it here to avoid a gcc bug on Mac OS X when
- doing it in tb_find_slow */
- if (atomic_mb_read(&tcg_ctx.tb_ctx.tb_invalidated_flag)) {
- /* as some TB could have been invalidated because
- of memory exceptions while generating the code, we
- must recompute the hash index here */
- next_tb = 0;
- atomic_mb_set(&tcg_ctx.tb_ctx.tb_invalidated_flag, 0);
- }
if (qemu_loglevel_mask(CPU_LOG_EXEC)) {
qemu_log("Trace %p [" TARGET_FMT_lx "] %s\n",
tb->tc_ptr, tb->pc, lookup_symbol(tb->pc));
@@ -553,9 +540,13 @@ int cpu_exec(CPUState *cpu)
spans two pages, we cannot safely do a direct
jump. */
if (next_tb != 0 && tb->page_addr[1] == -1) {
+ TranslationBlock *next;
+
tb_lock_recursive();
- tb_add_jump((TranslationBlock *)(next_tb & ~TB_EXIT_MASK),
- next_tb & TB_EXIT_MASK, tb);
+ next = (TranslationBlock *)(next_tb & ~TB_EXIT_MASK);
+ if (tb->valid && next->valid) {
+ tb_add_jump(next, next_tb & TB_EXIT_MASK, tb);
+ }
}
/* The lock may not be taken if we went through the
* fast lookup path and did not have to do any patching.
diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h
index 3b8399a..7e4aea7 100644
--- a/include/exec/exec-all.h
+++ b/include/exec/exec-all.h
@@ -178,6 +178,7 @@ struct TranslationBlock {
jmp_first */
struct TranslationBlock *jmp_next[2];
struct TranslationBlock *jmp_first;
+ bool valid; /* protected by tb_lock */
};
#include "qemu/thread.h"
@@ -195,8 +196,6 @@ struct TBContext {
/* statistics */
int tb_flush_count;
int tb_phys_invalidate_count;
-
- int tb_invalidated_flag;
};
void tb_free(TranslationBlock *tb);
diff --git a/translate-all.c b/translate-all.c
index 668b43a..94adcd0 100644
--- a/translate-all.c
+++ b/translate-all.c
@@ -791,6 +791,17 @@ static inline void invalidate_page_bitmap(PageDesc *p)
#endif
}
+static void tb_invalidate_all(void)
+{
+ int i;
+
+ for (i = 0; i < tcg_ctx.tb_ctx.nb_tbs; i++) {
+ TranslationBlock *tb = &tcg_ctx.tb_ctx.tbs[i];
+
+ tb->valid = false;
+ }
+}
+
/* Set to NULL all the 'first_tb' fields in all PageDescs. */
static void page_flush_tb_1(int level, void **lp)
{
@@ -866,6 +877,7 @@ void tb_flush(CPUState *cpu)
cpu_tb_jmp_cache_clear(cpu);
}
+ tb_invalidate_all();
memset(tcg_ctx.tb_ctx.tb_phys_hash, 0,
sizeof(tcg_ctx.tb_ctx.tb_phys_hash));
page_flush_tb();
@@ -1021,11 +1033,6 @@ void tb_phys_invalidate(TranslationBlock *tb,
tb_page_addr_t page_addr)
tb_page_addr_t phys_pc;
TranslationBlock *tb1, *tb2;
- /* Set the invalidated_flag first, to block patching a
- * jump to tb. FIXME: invalidated_flag should be per TB.
- */
- atomic_mb_set(&tcg_ctx.tb_ctx.tb_invalidated_flag, 1);
-
/* Now remove the TB from the hash list, so that tb_find_slow
* cannot find it anymore.
*/
@@ -1045,8 +1052,6 @@ void tb_phys_invalidate(TranslationBlock *tb,
tb_page_addr_t page_addr)
invalidate_page_bitmap(p);
}
- tcg_ctx.tb_ctx.tb_invalidated_flag = 1;
-
/* remove the TB from the hash list */
CPU_FOREACH(cpu) {
tb_jmp_cache_entry_clear(cpu, tb);
@@ -1070,33 +1075,7 @@ void tb_phys_invalidate(TranslationBlock *tb,
tb_page_addr_t page_addr)
}
tb->jmp_first = (TranslationBlock *)((uintptr_t)tb | 2); /* fail safe */
-#if 0
- /* TODO: I think this barrier is not necessary. On the
- * cpu_exec side, it is okay if the read from tb_jmp_cache
- * comes after the read from tb_phys_hash. This is because
- * the read would be bleeding into the tb_lock critical
- * section, hence there cannot be any concurrent tb_invalidate.
- * And if you don't need a barrier there, you shouldn't need
- * one here, either.
- */
- smp_wmb();
-#endif
-
- /* Finally, remove the TB from the per-CPU cache that is
- * accessed without tb_lock. The tb can still be executed
- * once after returning, if the cache was accessed before
- * this point, but that's it.
- *
- * The cache cannot be filled with this tb anymore, because
- * the lists are accessed with tb_lock held.
- */
- h = tb_jmp_cache_hash_func(tb->pc);
- CPU_FOREACH(cpu) {
- if (cpu->tb_jmp_cache[h] == tb) {
- cpu->tb_jmp_cache[h] = NULL;
- }
- }
-
+ tb->valid = false;
tcg_ctx.tb_ctx.tb_phys_invalidate_count++;
}
@@ -1157,12 +1136,16 @@ TranslationBlock *tb_gen_code(CPUState *cpu,
tb_flush_safe(cpu);
#endif
cpu_loop_exit(cpu);
+ tb_flush(cpu);
+ /* cannot fail at this point */
+ tb = tb_alloc(pc);
}
tb->tc_ptr = tcg_ctx.code_gen_ptr;
tb->cs_base = cs_base;
tb->flags = flags;
tb->cflags = cflags;
+ tb->valid = true;
cpu_gen_code(env, tb, &code_gen_size);
tcg_ctx.code_gen_ptr = (void *)(((uintptr_t)tcg_ctx.code_gen_ptr +
code_gen_size + CODE_GEN_ALIGN - 1) & ~(CODE_GEN_ALIGN - 1));
--
1.9.1
- Re: [Qemu-devel] [RFC 35/38] cputlb: use cpu_tcg_sched_work for tlb_flush_all, (continued)
- [Qemu-devel] [RFC 31/38] cpu: protect l1_map with tb_lock in full-system mode, Emilio G. Cota, 2015/08/23
- [Qemu-devel] [RFC 27/38] cpu-exec: convert tb_invalidated_flag into a per-TB flag,
Emilio G. Cota <=
- [Qemu-devel] [RFC 33/38] cpu: introduce cpu_tcg_sched_work to run work while other CPUs sleep, Emilio G. Cota, 2015/08/23
- [Qemu-devel] [RFC 21/38] target-i386: emulate atomic instructions + barriers using AIE, Emilio G. Cota, 2015/08/23
- [Qemu-devel] [RFC 38/38] Revert "target-i386: yield to another VCPU on PAUSE", Emilio G. Cota, 2015/08/23
- [Qemu-devel] [RFC 37/38] cpus: remove async_run_safe_work_on_cpu, Emilio G. Cota, 2015/08/23
- [Qemu-devel] [RFC 32/38] cpu list: convert to RCU QLIST, Emilio G. Cota, 2015/08/23
- [Qemu-devel] [RFC 28/38] cpu-exec: use RCU to perform lockless TB lookups, Emilio G. Cota, 2015/08/23
- Re: [Qemu-devel] [RFC 00/38] MTTCG: i386, user+system mode, Paolo Bonzini, 2015/08/24