[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
Re: [Qemu-devel] [PATCH] tcg: consistently access cpu->tb_jmp_cache atom
From: |
Paolo Bonzini |
Subject: |
Re: [Qemu-devel] [PATCH] tcg: consistently access cpu->tb_jmp_cache atomically |
Date: |
Thu, 15 Jun 2017 10:00:16 +0200 |
User-agent: |
Mozilla/5.0 (X11; Linux x86_64; rv:52.0) Gecko/20100101 Thunderbird/52.1.0 |
On 15/06/2017 02:36, Emilio G. Cota wrote:
> Some code paths can lead to atomic accesses racing with memset()
> on cpu->tb_jmp_cache, which can result in torn reads/writes
> and is undefined behaviour in C11.
>
> These torn accesses are unlikely to show up as bugs, but from code
> inspection they seem possible. For example, tb_phys_invalidate does:
> /* remove the TB from the hash list */
> h = tb_jmp_cache_hash_func(tb->pc);
> CPU_FOREACH(cpu) {
> if (atomic_read(&cpu->tb_jmp_cache[h]) == tb) {
> atomic_set(&cpu->tb_jmp_cache[h], NULL);
> }
> }
> Here atomic_set might race with a concurrent memset (such as the
> ones scheduled via "unsafe" async work, e.g. tlb_flush_page) and
> therefore we might end up with a torn pointer (or who knows what,
> because we are under undefined behaviour).
Also atomic_read could, which is worse (though in this case it would
only lead to a useless NULL write).
Reviewed-by: Paolo Bonzini <address@hidden>
>
> This patch converts parallel accesses to cpu->tb_jmp_cache to use
> atomic primitives, thereby bringing these accesses back to defined
> behaviour. The price to pay is to potentially execute more instructions
> when clearing cpu->tb_jmp_cache, but given how infrequently they happen
> and the small size of the cache, the performance impact I have measured
> is within noise range when booting debian-arm.
>
> Note that under "safe async" work (e.g. do_tb_flush) we could use memset
> because no other vcpus are running. However I'm keeping these accesses
> atomic as well to keep things simple and to avoid confusing analysis
> tools such as ThreadSanitizer.
>
> Signed-off-by: Emilio G. Cota <address@hidden>
> ---
> cputlb.c | 4 ++--
> include/qom/cpu.h | 11 ++++++++++-
> qom/cpu.c | 5 +----
> translate-all.c | 25 ++++++++++++-------------
> 4 files changed, 25 insertions(+), 20 deletions(-)
>
> diff --git a/cputlb.c b/cputlb.c
> index 743776a..e85b6d3 100644
> --- a/cputlb.c
> +++ b/cputlb.c
> @@ -118,7 +118,7 @@ static void tlb_flush_nocheck(CPUState *cpu)
>
> memset(env->tlb_table, -1, sizeof(env->tlb_table));
> memset(env->tlb_v_table, -1, sizeof(env->tlb_v_table));
> - memset(cpu->tb_jmp_cache, 0, sizeof(cpu->tb_jmp_cache));
> + cpu_tb_jmp_cache_clear(cpu);
>
> env->vtlb_index = 0;
> env->tlb_flush_addr = -1;
> @@ -183,7 +183,7 @@ static void tlb_flush_by_mmuidx_async_work(CPUState *cpu,
> run_on_cpu_data data)
> }
> }
>
> - memset(cpu->tb_jmp_cache, 0, sizeof(cpu->tb_jmp_cache));
> + cpu_tb_jmp_cache_clear(cpu);
>
> tlb_debug("done\n");
>
> diff --git a/include/qom/cpu.h b/include/qom/cpu.h
> index 89ddb68..2fe7cff 100644
> --- a/include/qom/cpu.h
> +++ b/include/qom/cpu.h
> @@ -346,7 +346,7 @@ struct CPUState {
>
> void *env_ptr; /* CPUArchState */
>
> - /* Writes protected by tb_lock, reads not thread-safe */
> + /* Accessed in parallel; all accesses must be atomic */
> struct TranslationBlock *tb_jmp_cache[TB_JMP_CACHE_SIZE];
>
> struct GDBRegisterState *gdb_regs;
> @@ -422,6 +422,15 @@ extern struct CPUTailQ cpus;
>
> extern __thread CPUState *current_cpu;
>
> +static inline void cpu_tb_jmp_cache_clear(CPUState *cpu)
> +{
> + unsigned int i;
> +
> + for (i = 0; i < TB_JMP_CACHE_SIZE; i++) {
> + atomic_set(&cpu->tb_jmp_cache[i], NULL);
> + }
> +}
> +
> /**
> * qemu_tcg_mttcg_enabled:
> * Check whether we are running MultiThread TCG or not.
> diff --git a/qom/cpu.c b/qom/cpu.c
> index 5069876..585419b 100644
> --- a/qom/cpu.c
> +++ b/qom/cpu.c
> @@ -274,7 +274,6 @@ void cpu_reset(CPUState *cpu)
> static void cpu_common_reset(CPUState *cpu)
> {
> CPUClass *cc = CPU_GET_CLASS(cpu);
> - int i;
>
> if (qemu_loglevel_mask(CPU_LOG_RESET)) {
> qemu_log("CPU Reset (CPU %d)\n", cpu->cpu_index);
> @@ -292,9 +291,7 @@ static void cpu_common_reset(CPUState *cpu)
> cpu->crash_occurred = false;
>
> if (tcg_enabled()) {
> - for (i = 0; i < TB_JMP_CACHE_SIZE; ++i) {
> - atomic_set(&cpu->tb_jmp_cache[i], NULL);
> - }
> + cpu_tb_jmp_cache_clear(cpu);
>
> #ifdef CONFIG_SOFTMMU
> tlb_flush(cpu, 0);
> diff --git a/translate-all.c b/translate-all.c
> index b3ee876..af3c4df 100644
> --- a/translate-all.c
> +++ b/translate-all.c
> @@ -923,11 +923,7 @@ static void do_tb_flush(CPUState *cpu, run_on_cpu_data
> tb_flush_count)
> }
>
> CPU_FOREACH(cpu) {
> - int i;
> -
> - for (i = 0; i < TB_JMP_CACHE_SIZE; ++i) {
> - atomic_set(&cpu->tb_jmp_cache[i], NULL);
> - }
> + cpu_tb_jmp_cache_clear(cpu);
> }
>
> tcg_ctx.tb_ctx.nb_tbs = 0;
> @@ -1806,19 +1802,22 @@ void cpu_io_recompile(CPUState *cpu, uintptr_t
> retaddr)
> cpu_loop_exit_noexc(cpu);
> }
>
> -void tb_flush_jmp_cache(CPUState *cpu, target_ulong addr)
> +static void tb_jmp_cache_clear_page(CPUState *cpu, target_ulong page_addr)
> {
> + unsigned int i0 = tb_jmp_cache_hash_page(page_addr);
> unsigned int i;
>
> + for (i = 0; i < TB_JMP_PAGE_SIZE; i++) {
> + atomic_set(&cpu->tb_jmp_cache[i0 + i], NULL);
> + }
> +}
> +
> +void tb_flush_jmp_cache(CPUState *cpu, target_ulong addr)
> +{
> /* Discard jump cache entries for any tb which might potentially
> overlap the flushed page. */
> - i = tb_jmp_cache_hash_page(addr - TARGET_PAGE_SIZE);
> - memset(&cpu->tb_jmp_cache[i], 0,
> - TB_JMP_PAGE_SIZE * sizeof(TranslationBlock *));
> -
> - i = tb_jmp_cache_hash_page(addr);
> - memset(&cpu->tb_jmp_cache[i], 0,
> - TB_JMP_PAGE_SIZE * sizeof(TranslationBlock *));
> + tb_jmp_cache_clear_page(cpu, addr - TARGET_PAGE_SIZE);
> + tb_jmp_cache_clear_page(cpu, addr);
> }
>
> static void print_qht_statistics(FILE *f, fprintf_function cpu_fprintf,
>