[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
Re: [Qemu-devel] [RFC PATCH V6 05/18] protect TBContext with tb_lock.
From: |
Alex Bennée |
Subject: |
Re: [Qemu-devel] [RFC PATCH V6 05/18] protect TBContext with tb_lock. |
Date: |
Tue, 07 Jul 2015 13:22:40 +0100 |
address@hidden writes:
> From: KONRAD Frederic <address@hidden>
>
> This protects TBContext with tb_lock to make tb_* thread safe.
>
> We can still have issue with tb_flush in case of multithread TCG:
> An other CPU can be executing code during a flush.
>
> This can be fixed later by making all other TCG thread exiting before calling
> tb_flush().
>
> tb_find_slow is separated into tb_find_slow and tb_find_physical as the whole
> tb_find_slow doesn't require to lock the tb.
>
> Signed-off-by: KONRAD Frederic <address@hidden>
So my comments from earlier about the different locking between
CONFIG_USER and system emulation still stand. Ultimately we need a good
reason (or an abstraction) before sprinkling #ifdefs in the code if only
for ease of reading.
>
> Changes:
> V1 -> V2:
> * Drop a tb_lock arround tb_find_fast in cpu-exec.c.
> ---
> cpu-exec.c | 60 ++++++++++++++--------
> target-arm/translate.c | 5 ++
> tcg/tcg.h | 7 +++
> translate-all.c | 137
> ++++++++++++++++++++++++++++++++++++++-----------
> 4 files changed, 158 insertions(+), 51 deletions(-)
>
> diff --git a/cpu-exec.c b/cpu-exec.c
> index d6336d9..5d9b518 100644
> --- a/cpu-exec.c
> +++ b/cpu-exec.c
> @@ -130,6 +130,8 @@ static void init_delay_params(SyncClocks *sc, const
> CPUState *cpu)
> void cpu_loop_exit(CPUState *cpu)
> {
> cpu->current_tb = NULL;
> + /* Release those mutex before long jump so other thread can work. */
> + tb_lock_reset();
> siglongjmp(cpu->jmp_env, 1);
> }
>
> @@ -142,6 +144,8 @@ void cpu_resume_from_signal(CPUState *cpu, void *puc)
> /* XXX: restore cpu registers saved in host registers */
>
> cpu->exception_index = -1;
> + /* Release those mutex before long jump so other thread can work. */
> + tb_lock_reset();
> siglongjmp(cpu->jmp_env, 1);
> }
>
> @@ -253,12 +257,9 @@ static void cpu_exec_nocache(CPUArchState *env, int
> max_cycles,
> tb_free(tb);
> }
>
> -static TranslationBlock *tb_find_slow(CPUArchState *env,
> - target_ulong pc,
> - target_ulong cs_base,
> - uint64_t flags)
> +static TranslationBlock *tb_find_physical(CPUArchState *env, target_ulong pc,
> + target_ulong cs_base, uint64_t
> flags)
> {
As Paolo has already mentioned comments on functions expecting to have
locks held when called.
> - CPUState *cpu = ENV_GET_CPU(env);
> TranslationBlock *tb, **ptb1;
> unsigned int h;
> tb_page_addr_t phys_pc, phys_page1;
> @@ -273,8 +274,9 @@ static TranslationBlock *tb_find_slow(CPUArchState *env,
> ptb1 = &tcg_ctx.tb_ctx.tb_phys_hash[h];
> for(;;) {
> tb = *ptb1;
> - if (!tb)
> - goto not_found;
> + if (!tb) {
> + return tb;
> + }
> if (tb->pc == pc &&
> tb->page_addr[0] == phys_page1 &&
> tb->cs_base == cs_base &&
> @@ -282,28 +284,43 @@ static TranslationBlock *tb_find_slow(CPUArchState *env,
> /* check next page if needed */
> if (tb->page_addr[1] != -1) {
> tb_page_addr_t phys_page2;
> -
> virt_page2 = (pc & TARGET_PAGE_MASK) +
> TARGET_PAGE_SIZE;
> phys_page2 = get_page_addr_code(env, virt_page2);
> - if (tb->page_addr[1] == phys_page2)
> - goto found;
> + if (tb->page_addr[1] == phys_page2) {
> + return tb;
> + }
> } else {
> - goto found;
> + return tb;
> }
> }
> ptb1 = &tb->phys_hash_next;
> }
> - not_found:
> - /* if no translated code available, then translate it now */
> - tb = tb_gen_code(cpu, pc, cs_base, flags, 0);
> -
> - found:
> - /* Move the last found TB to the head of the list */
> - if (likely(*ptb1)) {
> - *ptb1 = tb->phys_hash_next;
> - tb->phys_hash_next = tcg_ctx.tb_ctx.tb_phys_hash[h];
> - tcg_ctx.tb_ctx.tb_phys_hash[h] = tb;
> + return tb;
> +}
> +
> +static TranslationBlock *tb_find_slow(CPUArchState *env, target_ulong pc,
> + target_ulong cs_base, uint64_t flags)
> +{
> + /*
> + * First try to get the tb if we don't find it we need to lock and
> compile
> + * it.
> + */
> + CPUState *cpu = ENV_GET_CPU(env);
> + TranslationBlock *tb;
> +
> + tb = tb_find_physical(env, pc, cs_base, flags);
> + if (!tb) {
> + tb_lock();
> + /*
> + * Retry to get the TB in case a CPU just translate it to avoid
> having
> + * duplicated TB in the pool.
> + */
> + tb = tb_find_physical(env, pc, cs_base, flags);
> + if (!tb) {
> + tb = tb_gen_code(cpu, pc, cs_base, flags, 0);
> + }
> + tb_unlock();
> }
> /* we add the TB in the virtual pc hash table */
> cpu->tb_jmp_cache[tb_jmp_cache_hash_func(pc)] = tb;
> @@ -326,6 +343,7 @@ static inline TranslationBlock *tb_find_fast(CPUArchState
> *env)
> tb->flags != flags)) {
> tb = tb_find_slow(env, pc, cs_base, flags);
> }
> +
> return tb;
> }
>
> diff --git a/target-arm/translate.c b/target-arm/translate.c
> index 971b6db..47345aa 100644
> --- a/target-arm/translate.c
> +++ b/target-arm/translate.c
> @@ -11162,6 +11162,8 @@ static inline void
> gen_intermediate_code_internal(ARMCPU *cpu,
>
> dc->tb = tb;
>
> + tb_lock();
> +
> dc->is_jmp = DISAS_NEXT;
> dc->pc = pc_start;
> dc->singlestep_enabled = cs->singlestep_enabled;
> @@ -11499,6 +11501,7 @@ done_generating:
> tb->size = dc->pc - pc_start;
> tb->icount = num_insns;
> }
> + tb_unlock();
> }
>
> void gen_intermediate_code(CPUARMState *env, TranslationBlock *tb)
> @@ -11567,6 +11570,7 @@ void arm_cpu_dump_state(CPUState *cs, FILE *f,
> fprintf_function cpu_fprintf,
>
> void restore_state_to_opc(CPUARMState *env, TranslationBlock *tb, int pc_pos)
> {
> + tb_lock();
> if (is_a64(env)) {
> env->pc = tcg_ctx.gen_opc_pc[pc_pos];
> env->condexec_bits = 0;
> @@ -11574,4 +11578,5 @@ void restore_state_to_opc(CPUARMState *env,
> TranslationBlock *tb, int pc_pos)
> env->regs[15] = tcg_ctx.gen_opc_pc[pc_pos];
> env->condexec_bits = gen_opc_condexec_bits[pc_pos];
> }
> + tb_unlock();
> }
> diff --git a/tcg/tcg.h b/tcg/tcg.h
> index 41e4869..032fe10 100644
> --- a/tcg/tcg.h
> +++ b/tcg/tcg.h
> @@ -592,17 +592,24 @@ void *tcg_malloc_internal(TCGContext *s, int size);
> void tcg_pool_reset(TCGContext *s);
> void tcg_pool_delete(TCGContext *s);
>
> +void tb_lock(void);
> +void tb_unlock(void);
> +void tb_lock_reset(void);
> +
> static inline void *tcg_malloc(int size)
> {
> TCGContext *s = &tcg_ctx;
> uint8_t *ptr, *ptr_end;
> + tb_lock();
> size = (size + sizeof(long) - 1) & ~(sizeof(long) - 1);
> ptr = s->pool_cur;
> ptr_end = ptr + size;
> if (unlikely(ptr_end > s->pool_end)) {
> + tb_unlock();
> return tcg_malloc_internal(&tcg_ctx, size);
If the purpose of the lock is to protect the global tcg_ctx then we
shouldn't be unlocking before calling the _internal function which also
messes with the context.
> } else {
> s->pool_cur = ptr_end;
> + tb_unlock();
> return ptr;
> }
> }
> diff --git a/translate-all.c b/translate-all.c
> index b6b0e1c..c25b79b 100644
> --- a/translate-all.c
> +++ b/translate-all.c
> @@ -127,6 +127,34 @@ static void *l1_map[V_L1_SIZE];
> /* code generation context */
> TCGContext tcg_ctx;
>
> +/* translation block context */
> +__thread volatile int have_tb_lock;
> +
> +void tb_lock(void)
> +{
> + if (!have_tb_lock) {
> + qemu_mutex_lock(&tcg_ctx.tb_ctx.tb_lock);
> + }
> + have_tb_lock++;
> +}
> +
> +void tb_unlock(void)
> +{
> + assert(have_tb_lock > 0);
> + have_tb_lock--;
> + if (!have_tb_lock) {
> + qemu_mutex_unlock(&tcg_ctx.tb_ctx.tb_lock);
> + }
> +}
> +
> +void tb_lock_reset(void)
> +{
> + if (have_tb_lock) {
> + qemu_mutex_unlock(&tcg_ctx.tb_ctx.tb_lock);
> + }
> + have_tb_lock = 0;
> +}
> +
> static void tb_link_page(TranslationBlock *tb, tb_page_addr_t phys_pc,
> tb_page_addr_t phys_page2);
> static TranslationBlock *tb_find_pc(uintptr_t tc_ptr);
> @@ -215,6 +243,7 @@ static int cpu_restore_state_from_tb(CPUState *cpu,
> TranslationBlock *tb,
> #ifdef CONFIG_PROFILER
> ti = profile_getclock();
> #endif
> + tb_lock();
> tcg_func_start(s);
>
> gen_intermediate_code_pc(env, tb);
> @@ -228,8 +257,10 @@ static int cpu_restore_state_from_tb(CPUState *cpu,
> TranslationBlock *tb,
>
> /* find opc index corresponding to search_pc */
> tc_ptr = (uintptr_t)tb->tc_ptr;
> - if (searched_pc < tc_ptr)
> + if (searched_pc < tc_ptr) {
> + tb_unlock();
> return -1;
> + }
>
> s->tb_next_offset = tb->tb_next_offset;
> #ifdef USE_DIRECT_JUMP
> @@ -241,8 +272,10 @@ static int cpu_restore_state_from_tb(CPUState *cpu,
> TranslationBlock *tb,
> #endif
> j = tcg_gen_code_search_pc(s, (tcg_insn_unit *)tc_ptr,
> searched_pc - tc_ptr);
> - if (j < 0)
> + if (j < 0) {
> + tb_unlock();
> return -1;
> + }
> /* now find start of instruction before */
> while (s->gen_opc_instr_start[j] == 0) {
> j--;
> @@ -255,6 +288,8 @@ static int cpu_restore_state_from_tb(CPUState *cpu,
> TranslationBlock *tb,
> s->restore_time += profile_getclock() - ti;
> s->restore_count++;
> #endif
> +
> + tb_unlock();
> return 0;
> }
>
> @@ -672,6 +707,7 @@ static inline void code_gen_alloc(size_t tb_size)
> CODE_GEN_AVG_BLOCK_SIZE;
> tcg_ctx.tb_ctx.tbs =
> g_malloc(tcg_ctx.code_gen_max_blocks * sizeof(TranslationBlock));
> + qemu_mutex_init(&tcg_ctx.tb_ctx.tb_lock);
> }
>
> /* Must be called before using the QEMU cpus. 'tb_size' is the size
> @@ -696,16 +732,22 @@ bool tcg_enabled(void)
> return tcg_ctx.code_gen_buffer != NULL;
> }
>
> -/* Allocate a new translation block. Flush the translation buffer if
> - too many translation blocks or too much generated code. */
> +/*
> + * Allocate a new translation block. Flush the translation buffer if
> + * too many translation blocks or too much generated code.
> + * tb_alloc is not thread safe but tb_gen_code is protected by a mutex so
> this
> + * function is called only by one thread.
maybe: "..is not thread safe but tb_gen_code is protected by tb_lock so
only one thread calls it at a time."?
> + */
> static TranslationBlock *tb_alloc(target_ulong pc)
> {
> - TranslationBlock *tb;
> + TranslationBlock *tb = NULL;
>
> if (tcg_ctx.tb_ctx.nb_tbs >= tcg_ctx.code_gen_max_blocks ||
> (tcg_ctx.code_gen_ptr - tcg_ctx.code_gen_buffer) >=
> tcg_ctx.code_gen_buffer_max_size) {
> - return NULL;
> + tb = &tcg_ctx.tb_ctx.tbs[tcg_ctx.tb_ctx.nb_tbs++];
> + tb->pc = pc;
> + tb->cflags = 0;
> }
> tb = &tcg_ctx.tb_ctx.tbs[tcg_ctx.tb_ctx.nb_tbs++];
> tb->pc = pc;
That looks weird.
if (cond) return
&tcg_ctx.tb_ctx.tbs[tcg_ctx.tb_ctx.nb_tbs++] then return
&tcg_ctx.tb_ctx.tbs[tcg_ctx.tb_ctx.nb_tbs++];?
Also rendering the setting of tb = NULL pointless as it will always be
from the array?
> @@ -718,11 +760,16 @@ void tb_free(TranslationBlock *tb)
> /* In pr actice this is mostly used for single use temporary TB
> Ignore the hard cases and just back up if this TB happens to
> be the last one generated. */
> +
> + tb_lock();
> +
> if (tcg_ctx.tb_ctx.nb_tbs > 0 &&
> tb == &tcg_ctx.tb_ctx.tbs[tcg_ctx.tb_ctx.nb_tbs - 1]) {
> tcg_ctx.code_gen_ptr = tb->tc_ptr;
> tcg_ctx.tb_ctx.nb_tbs--;
> }
> +
> + tb_unlock();
> }
>
> static inline void invalidate_page_bitmap(PageDesc *p)
> @@ -773,6 +820,8 @@ void tb_flush(CPUArchState *env1)
> {
> CPUState *cpu = ENV_GET_CPU(env1);
>
> + tb_lock();
> +
> #if defined(DEBUG_FLUSH)
> printf("qemu: flush code_size=%ld nb_tbs=%d avg_tb_size=%ld\n",
> (unsigned long)(tcg_ctx.code_gen_ptr - tcg_ctx.code_gen_buffer),
> @@ -797,6 +846,8 @@ void tb_flush(CPUArchState *env1)
> /* XXX: flush processor icache at this point if cache flush is
> expensive */
> tcg_ctx.tb_ctx.tb_flush_count++;
> +
> + tb_unlock();
> }
>
> #ifdef DEBUG_TB_CHECK
> @@ -806,6 +857,8 @@ static void tb_invalidate_check(target_ulong address)
> TranslationBlock *tb;
> int i;
>
> + tb_lock();
> +
> address &= TARGET_PAGE_MASK;
> for (i = 0; i < CODE_GEN_PHYS_HASH_SIZE; i++) {
> for (tb = tb_ctx.tb_phys_hash[i]; tb != NULL; tb =
> tb->phys_hash_next) {
> @@ -817,6 +870,8 @@ static void tb_invalidate_check(target_ulong address)
> }
> }
> }
> +
> + tb_unlock();
> }
>
> /* verify that all the pages have correct rights for code */
> @@ -825,6 +880,8 @@ static void tb_page_check(void)
> TranslationBlock *tb;
> int i, flags1, flags2;
>
> + tb_lock();
> +
> for (i = 0; i < CODE_GEN_PHYS_HASH_SIZE; i++) {
> for (tb = tcg_ctx.tb_ctx.tb_phys_hash[i]; tb != NULL;
> tb = tb->phys_hash_next) {
> @@ -836,6 +893,8 @@ static void tb_page_check(void)
> }
> }
> }
> +
> + tb_unlock();
> }
>
> #endif
> @@ -916,6 +975,8 @@ void tb_phys_invalidate(TranslationBlock *tb,
> tb_page_addr_t page_addr)
> tb_page_addr_t phys_pc;
> TranslationBlock *tb1, *tb2;
>
> + tb_lock();
> +
> /* remove the TB from the hash list */
> phys_pc = tb->page_addr[0] + (tb->pc & ~TARGET_PAGE_MASK);
> h = tb_phys_hash_func(phys_pc);
> @@ -963,6 +1024,7 @@ void tb_phys_invalidate(TranslationBlock *tb,
> tb_page_addr_t page_addr)
> tb->jmp_first = (TranslationBlock *)((uintptr_t)tb | 2); /* fail safe */
>
> tcg_ctx.tb_ctx.tb_phys_invalidate_count++;
> + tb_unlock();
> }
>
> static void build_page_bitmap(PageDesc *p)
> @@ -1004,6 +1066,8 @@ TranslationBlock *tb_gen_code(CPUState *cpu,
> target_ulong virt_page2;
> int code_gen_size;
>
> + tb_lock();
> +
> phys_pc = get_page_addr_code(env, pc);
> if (use_icount) {
> cflags |= CF_USE_ICOUNT;
> @@ -1032,6 +1096,8 @@ TranslationBlock *tb_gen_code(CPUState *cpu,
> phys_page2 = get_page_addr_code(env, virt_page2);
> }
> tb_link_page(tb, phys_pc, phys_page2);
> +
> + tb_unlock();
> return tb;
> }
>
> @@ -1330,13 +1396,15 @@ static inline void tb_alloc_page(TranslationBlock *tb,
> }
>
> /* add a new TB and link it to the physical page tables. phys_page2 is
> - (-1) to indicate that only one page contains the TB. */
> + * (-1) to indicate that only one page contains the TB. */
> static void tb_link_page(TranslationBlock *tb, tb_page_addr_t phys_pc,
> tb_page_addr_t phys_page2)
> {
> unsigned int h;
> TranslationBlock **ptb;
>
> + tb_lock();
> +
> /* Grab the mmap lock to stop another thread invalidating this TB
> before we are done. */
> mmap_lock();
> @@ -1370,6 +1438,8 @@ static void tb_link_page(TranslationBlock *tb,
> tb_page_addr_t phys_pc,
> tb_page_check();
> #endif
> mmap_unlock();
> +
> + tb_unlock();
> }
>
> /* find the TB 'tb' such that tb[0].tc_ptr <= tc_ptr <
> @@ -1378,31 +1448,34 @@ static TranslationBlock *tb_find_pc(uintptr_t tc_ptr)
> {
> int m_min, m_max, m;
> uintptr_t v;
> - TranslationBlock *tb;
> -
> - if (tcg_ctx.tb_ctx.nb_tbs <= 0) {
> - return NULL;
> - }
> - if (tc_ptr < (uintptr_t)tcg_ctx.code_gen_buffer ||
> - tc_ptr >= (uintptr_t)tcg_ctx.code_gen_ptr) {
> - return NULL;
> - }
> - /* binary search (cf Knuth) */
> - m_min = 0;
> - m_max = tcg_ctx.tb_ctx.nb_tbs - 1;
> - while (m_min <= m_max) {
> - m = (m_min + m_max) >> 1;
> - tb = &tcg_ctx.tb_ctx.tbs[m];
> - v = (uintptr_t)tb->tc_ptr;
> - if (v == tc_ptr) {
> - return tb;
> - } else if (tc_ptr < v) {
> - m_max = m - 1;
> - } else {
> - m_min = m + 1;
> + TranslationBlock *tb = NULL;
> +
> + tb_lock();
> +
> + if ((tcg_ctx.tb_ctx.nb_tbs > 0)
> + && (tc_ptr >= (uintptr_t)tcg_ctx.code_gen_buffer &&
> + tc_ptr < (uintptr_t)tcg_ctx.code_gen_ptr)) {
> + /* binary search (cf Knuth) */
> + m_min = 0;
> + m_max = tcg_ctx.tb_ctx.nb_tbs - 1;
> + while (m_min <= m_max) {
> + m = (m_min + m_max) >> 1;
> + tb = &tcg_ctx.tb_ctx.tbs[m];
> + v = (uintptr_t)tb->tc_ptr;
> + if (v == tc_ptr) {
> + tb_unlock();
> + return tb;
> + } else if (tc_ptr < v) {
> + m_max = m - 1;
> + } else {
> + m_min = m + 1;
> + }
> }
> + tb = &tcg_ctx.tb_ctx.tbs[m_max];
> }
> - return &tcg_ctx.tb_ctx.tbs[m_max];
> +
> + tb_unlock();
> + return tb;
> }
>
> #if !defined(CONFIG_USER_ONLY)
> @@ -1564,6 +1637,8 @@ void dump_exec_info(FILE *f, fprintf_function
> cpu_fprintf)
> int direct_jmp_count, direct_jmp2_count, cross_page;
> TranslationBlock *tb;
>
> + tb_lock();
> +
> target_code_size = 0;
> max_target_code_size = 0;
> cross_page = 0;
> @@ -1619,6 +1694,8 @@ void dump_exec_info(FILE *f, fprintf_function
> cpu_fprintf)
> tcg_ctx.tb_ctx.tb_phys_invalidate_count);
> cpu_fprintf(f, "TLB flush count %d\n", tlb_flush_count);
> tcg_dump_info(f, cpu_fprintf);
> +
> + tb_unlock();
> }
>
> void dump_opcount_info(FILE *f, fprintf_function cpu_fprintf)
--
Alex Bennée
- Re: [Qemu-devel] [RFC PATCH V6 05/18] protect TBContext with tb_lock.,
Alex Bennée <=