Re: [Qemu-devel] [PATCH] tcg: consistently access cpu->tb_jmp

qemu-devel

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Qemu-devel] [PATCH] tcg: consistently access cpu->tb_jmp_cache atom

From:	Paolo Bonzini
Subject:	Re: [Qemu-devel] [PATCH] tcg: consistently access cpu->tb_jmp_cache atomically
Date:	Thu, 15 Jun 2017 10:00:16 +0200
User-agent:	Mozilla/5.0 (X11; Linux x86_64; rv:52.0) Gecko/20100101 Thunderbird/52.1.0


On 15/06/2017 02:36, Emilio G. Cota wrote:
> Some code paths can lead to atomic accesses racing with memset()
> on cpu->tb_jmp_cache, which can result in torn reads/writes
> and is undefined behaviour in C11.
> 
> These torn accesses are unlikely to show up as bugs, but from code
> inspection they seem possible. For example, tb_phys_invalidate does:
>     /* remove the TB from the hash list */
>     h = tb_jmp_cache_hash_func(tb->pc);
>     CPU_FOREACH(cpu) {
>         if (atomic_read(&cpu->tb_jmp_cache[h]) == tb) {
>             atomic_set(&cpu->tb_jmp_cache[h], NULL);
>         }
>     }
> Here atomic_set might race with a concurrent memset (such as the
> ones scheduled via "unsafe" async work, e.g. tlb_flush_page) and
> therefore we might end up with a torn pointer (or who knows what,
> because we are under undefined behaviour).

Also atomic_read could, which is worse (though in this case it would
only lead to a useless NULL write).

Reviewed-by: Paolo Bonzini <address@hidden>

> 
> This patch converts parallel accesses to cpu->tb_jmp_cache to use
> atomic primitives, thereby bringing these accesses back to defined
> behaviour. The price to pay is to potentially execute more instructions
> when clearing cpu->tb_jmp_cache, but given how infrequently they happen
> and the small size of the cache, the performance impact I have measured
> is within noise range when booting debian-arm.
> 
> Note that under "safe async" work (e.g. do_tb_flush) we could use memset
> because no other vcpus are running. However I'm keeping these accesses
> atomic as well to keep things simple and to avoid confusing analysis
> tools such as ThreadSanitizer.
> 
> Signed-off-by: Emilio G. Cota <address@hidden>
> ---
>  cputlb.c          |  4 ++--
>  include/qom/cpu.h | 11 ++++++++++-
>  qom/cpu.c         |  5 +----
>  translate-all.c   | 25 ++++++++++++-------------
>  4 files changed, 25 insertions(+), 20 deletions(-)
> 
> diff --git a/cputlb.c b/cputlb.c
> index 743776a..e85b6d3 100644
> --- a/cputlb.c
> +++ b/cputlb.c
> @@ -118,7 +118,7 @@ static void tlb_flush_nocheck(CPUState *cpu)
>  
>      memset(env->tlb_table, -1, sizeof(env->tlb_table));
>      memset(env->tlb_v_table, -1, sizeof(env->tlb_v_table));
> -    memset(cpu->tb_jmp_cache, 0, sizeof(cpu->tb_jmp_cache));
> +    cpu_tb_jmp_cache_clear(cpu);
>  
>      env->vtlb_index = 0;
>      env->tlb_flush_addr = -1;
> @@ -183,7 +183,7 @@ static void tlb_flush_by_mmuidx_async_work(CPUState *cpu, 
> run_on_cpu_data data)
>          }
>      }
>  
> -    memset(cpu->tb_jmp_cache, 0, sizeof(cpu->tb_jmp_cache));
> +    cpu_tb_jmp_cache_clear(cpu);
>  
>      tlb_debug("done\n");
>  
> diff --git a/include/qom/cpu.h b/include/qom/cpu.h
> index 89ddb68..2fe7cff 100644
> --- a/include/qom/cpu.h
> +++ b/include/qom/cpu.h
> @@ -346,7 +346,7 @@ struct CPUState {
>  
>      void *env_ptr; /* CPUArchState */
>  
> -    /* Writes protected by tb_lock, reads not thread-safe  */
> +    /* Accessed in parallel; all accesses must be atomic */
>      struct TranslationBlock *tb_jmp_cache[TB_JMP_CACHE_SIZE];
>  
>      struct GDBRegisterState *gdb_regs;
> @@ -422,6 +422,15 @@ extern struct CPUTailQ cpus;
>  
>  extern __thread CPUState *current_cpu;
>  
> +static inline void cpu_tb_jmp_cache_clear(CPUState *cpu)
> +{
> +    unsigned int i;
> +
> +    for (i = 0; i < TB_JMP_CACHE_SIZE; i++) {
> +        atomic_set(&cpu->tb_jmp_cache[i], NULL);
> +    }
> +}
> +
>  /**
>   * qemu_tcg_mttcg_enabled:
>   * Check whether we are running MultiThread TCG or not.
> diff --git a/qom/cpu.c b/qom/cpu.c
> index 5069876..585419b 100644
> --- a/qom/cpu.c
> +++ b/qom/cpu.c
> @@ -274,7 +274,6 @@ void cpu_reset(CPUState *cpu)
>  static void cpu_common_reset(CPUState *cpu)
>  {
>      CPUClass *cc = CPU_GET_CLASS(cpu);
> -    int i;
>  
>      if (qemu_loglevel_mask(CPU_LOG_RESET)) {
>          qemu_log("CPU Reset (CPU %d)\n", cpu->cpu_index);
> @@ -292,9 +291,7 @@ static void cpu_common_reset(CPUState *cpu)
>      cpu->crash_occurred = false;
>  
>      if (tcg_enabled()) {
> -        for (i = 0; i < TB_JMP_CACHE_SIZE; ++i) {
> -            atomic_set(&cpu->tb_jmp_cache[i], NULL);
> -        }
> +        cpu_tb_jmp_cache_clear(cpu);
>  
>  #ifdef CONFIG_SOFTMMU
>          tlb_flush(cpu, 0);
> diff --git a/translate-all.c b/translate-all.c
> index b3ee876..af3c4df 100644
> --- a/translate-all.c
> +++ b/translate-all.c
> @@ -923,11 +923,7 @@ static void do_tb_flush(CPUState *cpu, run_on_cpu_data 
> tb_flush_count)
>      }
>  
>      CPU_FOREACH(cpu) {
> -        int i;
> -
> -        for (i = 0; i < TB_JMP_CACHE_SIZE; ++i) {
> -            atomic_set(&cpu->tb_jmp_cache[i], NULL);
> -        }
> +        cpu_tb_jmp_cache_clear(cpu);
>      }
>  
>      tcg_ctx.tb_ctx.nb_tbs = 0;
> @@ -1806,19 +1802,22 @@ void cpu_io_recompile(CPUState *cpu, uintptr_t 
> retaddr)
>      cpu_loop_exit_noexc(cpu);
>  }
>  
> -void tb_flush_jmp_cache(CPUState *cpu, target_ulong addr)
> +static void tb_jmp_cache_clear_page(CPUState *cpu, target_ulong page_addr)
>  {
> +    unsigned int i0 = tb_jmp_cache_hash_page(page_addr);
>      unsigned int i;
>  
> +    for (i = 0; i < TB_JMP_PAGE_SIZE; i++) {
> +        atomic_set(&cpu->tb_jmp_cache[i0 + i], NULL);
> +    }
> +}
> +
> +void tb_flush_jmp_cache(CPUState *cpu, target_ulong addr)
> +{
>      /* Discard jump cache entries for any tb which might potentially
>         overlap the flushed page.  */
> -    i = tb_jmp_cache_hash_page(addr - TARGET_PAGE_SIZE);
> -    memset(&cpu->tb_jmp_cache[i], 0,
> -           TB_JMP_PAGE_SIZE * sizeof(TranslationBlock *));
> -
> -    i = tb_jmp_cache_hash_page(addr);
> -    memset(&cpu->tb_jmp_cache[i], 0,
> -           TB_JMP_PAGE_SIZE * sizeof(TranslationBlock *));
> +    tb_jmp_cache_clear_page(cpu, addr - TARGET_PAGE_SIZE);
> +    tb_jmp_cache_clear_page(cpu, addr);
>  }
>  
>  static void print_qht_statistics(FILE *f, fprintf_function cpu_fprintf,
>

[Prev in Thread]

Current Thread

[Next in Thread]

[Qemu-devel] [PATCH] tcg: consistently access cpu->tb_jmp_cache atomically, Emilio G. Cota, 2017/06/14
- Re: [Qemu-devel] [PATCH] tcg: consistently access cpu->tb_jmp_cache atomically, no-reply, 2017/06/14
- Re: [Qemu-devel] [PATCH] tcg: consistently access cpu->tb_jmp_cache atomically, Paolo Bonzini <=
- Re: [Qemu-devel] [PATCH] tcg: consistently access cpu->tb_jmp_cache atomically, Richard Henderson, 2017/06/26

Prev by Date: Re: [Qemu-devel] [PATCH 0/5] More s390x improvements
Next by Date: Re: [Qemu-devel] [PATCH 3/4] include/hw/i386/pc.h: Move CONFIG_KVM related definitions to kvm_i386.h
Previous by thread: Re: [Qemu-devel] [PATCH] tcg: consistently access cpu->tb_jmp_cache atomically
Next by thread: Re: [Qemu-devel] [PATCH] tcg: consistently access cpu->tb_jmp_cache atomically
Index(es):
- Date
- Thread