[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
Re: [Qemu-devel] [PATCH v6 13/19] cputlb: atomically update tlb fields u
From: |
Pranith Kumar |
Subject: |
Re: [Qemu-devel] [PATCH v6 13/19] cputlb: atomically update tlb fields used by tlb_reset_dirty |
Date: |
Wed, 09 Nov 2016 14:36:37 -0500 |
Hi Alex,
This patch is causing some build errors on a 32-bit box:
In file included from /home/pranith/qemu/include/exec/exec-all.h:44:0,
from /home/pranith/qemu/cputlb.c:23:
/home/pranith/qemu/cputlb.c: In function ‘tlb_flush_page_by_mmuidx_async_work’:
/home/pranith/qemu/cputlb.c:54:36: error: format ‘%x’ expects argument of type
‘unsigned int’, but argument 5 has type ‘long unsigned int’ [-Werror=format=]
qemu_log_mask(CPU_LOG_MMU, "%s: " fmt, __func__, \
^
/home/pranith/qemu/include/qemu/log.h:94:22: note: in definition of macro
‘qemu_log_mask’
qemu_log(FMT, ## __VA_ARGS__); \
^~~
/home/pranith/qemu/cputlb.c:286:5: note: in expansion of macro ‘tlb_debug’
tlb_debug("page:%d addr:"TARGET_FMT_lx" mmu_idx%" PRIxPTR "\n",
^~~~~~~~~
/home/pranith/qemu/cputlb.c:57:25: error: format ‘%x’ expects argument of type
‘unsigned int’, but argument 6 has type ‘long unsigned int’ [-Werror=format=]
fprintf(stderr, "%s: " fmt, __func__, ## __VA_ARGS__); \
^
/home/pranith/qemu/cputlb.c:286:5: note: in expansion of macro ‘tlb_debug’
tlb_debug("page:%d addr:"TARGET_FMT_lx" mmu_idx%" PRIxPTR "\n",
^~~~~~~~~
cc1: all warnings being treated as errors
Thanks,
Alex Bennée writes:
> The main use case for tlb_reset_dirty is to set the TLB_NOTDIRTY flags
> in TLB entries to force the slow-path on writes. This is used to mark
> page ranges containing code which has been translated so it can be
> invalidated if written to. To do this safely we need to ensure the TLB
> entries in question for all vCPUs are updated before we attempt to run
> the code otherwise a race could be introduced.
>
> To achieve this we atomically set the flag in tlb_reset_dirty_range and
> take care when setting it when the TLB entry is filled.
>
> The helper function is made static as it isn't used outside of cputlb.
>
> Signed-off-by: Alex Bennée <address@hidden>
>
> ---
> v6
> - use TARGET_PAGE_BITS_MIN
> - use run_on_cpu helpers
> ---
> cputlb.c | 250
> +++++++++++++++++++++++++++++++++++++-------------
> include/exec/cputlb.h | 2 -
> include/qom/cpu.h | 12 +--
> 3 files changed, 194 insertions(+), 70 deletions(-)
>
> diff --git a/cputlb.c b/cputlb.c
> index cd1ff71..ae94b7f 100644
> --- a/cputlb.c
> +++ b/cputlb.c
> @@ -68,6 +68,11 @@
> * target_ulong even on 32 bit builds */
> QEMU_BUILD_BUG_ON(sizeof(target_ulong) > sizeof(run_on_cpu_data));
>
> +/* We currently can't handle more than 16 bits in the MMUIDX bitmask.
> + */
> +QEMU_BUILD_BUG_ON(NB_MMU_MODES > 16);
> +#define ALL_MMUIDX_BITS ((1 << NB_MMU_MODES) - 1)
> +
> /* statistics */
> int tlb_flush_count;
>
> @@ -98,7 +103,7 @@ static void tlb_flush_nocheck(CPUState *cpu, int
> flush_global)
>
> tb_unlock();
>
> - atomic_mb_set(&cpu->pending_tlb_flush, false);
> + atomic_mb_set(&cpu->pending_tlb_flush, 0);
> }
>
> static void tlb_flush_global_async_work(CPUState *cpu, run_on_cpu_data data)
> @@ -121,7 +126,8 @@ static void tlb_flush_global_async_work(CPUState *cpu,
> run_on_cpu_data data)
> void tlb_flush(CPUState *cpu, int flush_global)
> {
> if (cpu->created && !qemu_cpu_is_self(cpu)) {
> - if (atomic_cmpxchg(&cpu->pending_tlb_flush, false, true) == true) {
> + if (atomic_mb_read(&cpu->pending_tlb_flush) != ALL_MMUIDX_BITS) {
> + atomic_mb_set(&cpu->pending_tlb_flush, ALL_MMUIDX_BITS);
> async_run_on_cpu(cpu, tlb_flush_global_async_work,
> RUN_ON_CPU_HOST_INT(flush_global));
> }
> @@ -130,39 +136,78 @@ void tlb_flush(CPUState *cpu, int flush_global)
> }
> }
>
> -static inline void v_tlb_flush_by_mmuidx(CPUState *cpu, va_list argp)
> +static void tlb_flush_by_mmuidx_async_work(CPUState *cpu, run_on_cpu_data
> data)
> {
> CPUArchState *env = cpu->env_ptr;
> + unsigned long mmu_idx_bitmask = data.host_ulong;
> + int mmu_idx;
>
> assert_cpu_is_self(cpu);
> - tlb_debug("start\n");
>
> tb_lock();
>
> - for (;;) {
> - int mmu_idx = va_arg(argp, int);
> + tlb_debug("start: mmu_idx:0x%04lx\n", mmu_idx_bitmask);
>
> - if (mmu_idx < 0) {
> - break;
> - }
> + for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) {
>
> - tlb_debug("%d\n", mmu_idx);
> + if (test_bit(mmu_idx, &mmu_idx_bitmask)) {
> + tlb_debug("%d\n", mmu_idx);
>
> - memset(env->tlb_table[mmu_idx], -1, sizeof(env->tlb_table[0]));
> - memset(env->tlb_v_table[mmu_idx], -1, sizeof(env->tlb_v_table[0]));
> + memset(env->tlb_table[mmu_idx], -1, sizeof(env->tlb_table[0]));
> + memset(env->tlb_v_table[mmu_idx], -1,
> sizeof(env->tlb_v_table[0]));
> + }
> }
>
> memset(cpu->tb_jmp_cache, 0, sizeof(cpu->tb_jmp_cache));
>
> + tlb_debug("done\n");
> +
> tb_unlock();
> }
>
> +/* Helper function to slurp va_args list into a bitmap
> + */
> +static inline unsigned long make_mmu_index_bitmap(va_list args)
> +{
> + unsigned long bitmap = 0;
> + int mmu_index = va_arg(args, int);
> +
> + /* An empty va_list would be a bad call */
> + g_assert(mmu_index > 0);
> +
> + do {
> + set_bit(mmu_index, &bitmap);
> + mmu_index = va_arg(args, int);
> + } while (mmu_index >= 0);
> +
> + return bitmap;
> +}
> +
> void tlb_flush_by_mmuidx(CPUState *cpu, ...)
> {
> va_list argp;
> + unsigned long mmu_idx_bitmap;
> +
> va_start(argp, cpu);
> - v_tlb_flush_by_mmuidx(cpu, argp);
> + mmu_idx_bitmap = make_mmu_index_bitmap(argp);
> va_end(argp);
> +
> + tlb_debug("mmu_idx: 0x%04lx\n", mmu_idx_bitmap);
> +
> + if (!qemu_cpu_is_self(cpu)) {
> + uint16_t pending_flushes =
> + mmu_idx_bitmap & ~atomic_mb_read(&cpu->pending_tlb_flush);
> + if (pending_flushes) {
> + tlb_debug("reduced mmu_idx: 0x%" PRIx16 "\n", pending_flushes);
> +
> + atomic_or(&cpu->pending_tlb_flush, pending_flushes);
> + async_run_on_cpu(cpu, tlb_flush_by_mmuidx_async_work,
> + RUN_ON_CPU_HOST_INT(pending_flushes));
> + }
> + } else {
> + tlb_flush_by_mmuidx_async_work(cpu,
> +
> RUN_ON_CPU_HOST_ULONG(mmu_idx_bitmap));
> + }
> }
>
> static inline void tlb_flush_entry(CPUTLBEntry *tlb_entry, target_ulong addr)
> @@ -227,16 +272,50 @@ void tlb_flush_page(CPUState *cpu, target_ulong addr)
> }
> }
>
> -void tlb_flush_page_by_mmuidx(CPUState *cpu, target_ulong addr, ...)
> +/* As we are going to hijack the bottom bits of the page address for a
> + * mmuidx bit mask we need to fail to build if we can't do that
> + */
> +QEMU_BUILD_BUG_ON(NB_MMU_MODES > TARGET_PAGE_BITS_MIN);
> +
> +static void tlb_flush_page_by_mmuidx_async_work(CPUState *cpu,
> + run_on_cpu_data data)
> {
> CPUArchState *env = cpu->env_ptr;
> - int i, k;
> - va_list argp;
> -
> - va_start(argp, addr);
> + target_ulong addr_and_mmuidx = (target_ulong) data.target_ptr;
> + target_ulong addr = addr_and_mmuidx & TARGET_PAGE_MASK;
> + unsigned long mmu_idx_bitmap = addr_and_mmuidx & ALL_MMUIDX_BITS;
> + int page = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
> + int mmu_idx;
> + int i;
>
> assert_cpu_is_self(cpu);
> - tlb_debug("addr "TARGET_FMT_lx"\n", addr);
> +
> + tlb_debug("page:%d addr:"TARGET_FMT_lx" mmu_idx%" PRIxPTR "\n",
> + page, addr, mmu_idx_bitmap);
> +
> + for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) {
> + if (test_bit(mmu_idx, &mmu_idx_bitmap)) {
> + tlb_flush_entry(&env->tlb_table[mmu_idx][page], addr);
> +
> + /* check whether there are vltb entries that need to be flushed
> */
> + for (i = 0; i < CPU_VTLB_SIZE; i++) {
> + tlb_flush_entry(&env->tlb_v_table[mmu_idx][i], addr);
> + }
> + }
> + }
> +
> + tb_flush_jmp_cache(cpu, addr);
> +}
> +
> +static void tlb_check_page_and_flush_by_mmuidx_async_work(CPUState *cpu,
> + run_on_cpu_data
> data)
> +{
> + CPUArchState *env = cpu->env_ptr;
> + target_ulong addr_and_mmuidx = (target_ulong) data.target_ptr;
> + target_ulong addr = addr_and_mmuidx & TARGET_PAGE_MASK;
> + unsigned long mmu_idx_bitmap = addr_and_mmuidx & ALL_MMUIDX_BITS;
> +
> + tlb_debug("addr:"TARGET_FMT_lx" mmu_idx: %04lx\n", addr, mmu_idx_bitmap);
>
> /* Check if we need to flush due to large pages. */
> if ((addr & env->tlb_flush_mask) == env->tlb_flush_addr) {
> @@ -244,33 +323,35 @@ void tlb_flush_page_by_mmuidx(CPUState *cpu,
> target_ulong addr, ...)
> TARGET_FMT_lx "/" TARGET_FMT_lx ")\n",
> env->tlb_flush_addr, env->tlb_flush_mask);
>
> - v_tlb_flush_by_mmuidx(cpu, argp);
> - va_end(argp);
> - return;
> + tlb_flush_by_mmuidx_async_work(cpu,
> RUN_ON_CPU_HOST_ULONG(mmu_idx_bitmap));
> + } else {
> + tlb_flush_page_by_mmuidx_async_work(cpu, data);
> }
> +}
>
> - addr &= TARGET_PAGE_MASK;
> - i = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
> -
> - for (;;) {
> - int mmu_idx = va_arg(argp, int);
> +void tlb_flush_page_by_mmuidx(CPUState *cpu, target_ulong addr, ...)
> +{
> + unsigned long mmu_idx_bitmap;
> + target_ulong addr_and_mmu_idx;
> + va_list argp;
>
> - if (mmu_idx < 0) {
> - break;
> - }
> + va_start(argp, addr);
> + mmu_idx_bitmap = make_mmu_index_bitmap(argp);
> + va_end(argp);
>
> - tlb_debug("idx %d\n", mmu_idx);
> + tlb_debug("addr: "TARGET_FMT_lx" mmu_idx:%lx\n", addr, mmu_idx_bitmap);
>
> - tlb_flush_entry(&env->tlb_table[mmu_idx][i], addr);
> + /* This should already be page aligned */
> + addr_and_mmu_idx = addr & TARGET_PAGE_MASK;
> + addr_and_mmu_idx |= mmu_idx_bitmap;
>
> - /* check whether there are vltb entries that need to be flushed */
> - for (k = 0; k < CPU_VTLB_SIZE; k++) {
> - tlb_flush_entry(&env->tlb_v_table[mmu_idx][k], addr);
> - }
> + if (!qemu_cpu_is_self(cpu)) {
> + async_run_on_cpu(cpu, tlb_check_page_and_flush_by_mmuidx_async_work,
> + RUN_ON_CPU_TARGET_PTR(addr_and_mmu_idx));
> + } else {
> + tlb_check_page_and_flush_by_mmuidx_async_work(
> + cpu, RUN_ON_CPU_TARGET_PTR(addr_and_mmu_idx));
> }
> - va_end(argp);
> -
> - tb_flush_jmp_cache(cpu, addr);
> }
>
> void tlb_flush_page_all(target_ulong addr)
> @@ -298,32 +379,50 @@ void tlb_unprotect_code(ram_addr_t ram_addr)
> cpu_physical_memory_set_dirty_flag(ram_addr, DIRTY_MEMORY_CODE);
> }
>
> -static bool tlb_is_dirty_ram(CPUTLBEntry *tlbe)
> -{
> - return (tlbe->addr_write & (TLB_INVALID_MASK|TLB_MMIO|TLB_NOTDIRTY)) ==
> 0;
> -}
>
> -void tlb_reset_dirty_range(CPUTLBEntry *tlb_entry, uintptr_t start,
> +/*
> + * Dirty write flag handling
> + *
> + * When the TCG code writes to a location it looks up the address in
> + * the TLB and uses that data to compute the final address. If any of
> + * the lower bits of the address are set then the slow path is forced.
> + * There are a number of reasons to do this but for normal RAM the
> + * most usual is detecting writes to code regions which may invalidate
> + * generated code.
> + *
> + * Because we want other vCPUs to respond to changes straight away we
> + * update the te->addr_write field atomically. If the TLB entry has
> + * been changed by the vCPU in the mean time we skip the update.
> + */
> +
> +static void tlb_reset_dirty_range(CPUTLBEntry *tlb_entry, uintptr_t start,
> uintptr_t length)
> {
> - uintptr_t addr;
> + /* paired with atomic_mb_set in tlb_set_page_with_attrs */
> + uintptr_t orig_addr = atomic_mb_read(&tlb_entry->addr_write);
> + uintptr_t addr = orig_addr;
>
> - if (tlb_is_dirty_ram(tlb_entry)) {
> - addr = (tlb_entry->addr_write & TARGET_PAGE_MASK) +
> tlb_entry->addend;
> + if ((addr & (TLB_INVALID_MASK | TLB_MMIO | TLB_NOTDIRTY)) == 0) {
> + addr &= TARGET_PAGE_MASK;
> + addr += atomic_read(&tlb_entry->addend);
> if ((addr - start) < length) {
> - tlb_entry->addr_write |= TLB_NOTDIRTY;
> + uintptr_t notdirty_addr = orig_addr | TLB_NOTDIRTY;
> + atomic_cmpxchg(&tlb_entry->addr_write, orig_addr, notdirty_addr);
> }
> }
> }
>
> +/* This is a cross vCPU call (i.e. another vCPU resetting the flags of
> + * the target vCPU). As such care needs to be taken that we don't
> + * dangerously race with another vCPU update. The only thing actually
> + * updated is the target TLB entry ->addr_write flags.
> + */
> void tlb_reset_dirty(CPUState *cpu, ram_addr_t start1, ram_addr_t length)
> {
> CPUArchState *env;
>
> int mmu_idx;
>
> - assert_cpu_is_self(cpu);
> -
> env = cpu->env_ptr;
> for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) {
> unsigned int i;
> @@ -409,9 +508,9 @@ void tlb_set_page_with_attrs(CPUState *cpu, target_ulong
> vaddr,
> MemoryRegionSection *section;
> unsigned int index;
> target_ulong address;
> - target_ulong code_address;
> + target_ulong code_address, write_address;
> uintptr_t addend;
> - CPUTLBEntry *te;
> + CPUTLBEntry *te, *tv;
> hwaddr iotlb, xlat, sz;
> unsigned vidx = env->vtlb_index++ % CPU_VTLB_SIZE;
> int asidx = cpu_asidx_from_attrs(cpu, attrs);
> @@ -446,15 +545,21 @@ void tlb_set_page_with_attrs(CPUState *cpu,
> target_ulong vaddr,
>
> index = (vaddr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
> te = &env->tlb_table[mmu_idx][index];
> -
> /* do not discard the translation in te, evict it into a victim tlb */
> - env->tlb_v_table[mmu_idx][vidx] = *te;
> + tv = &env->tlb_v_table[mmu_idx][vidx];
> +
> + /* addr_write can race with tlb_reset_dirty_range_all */
> + tv->addr_read = te->addr_read;
> + atomic_set(&tv->addr_write, atomic_read(&te->addr_write));
> + tv->addr_code = te->addr_code;
> + atomic_set(&tv->addend, atomic_read(&te->addend));
> +
> env->iotlb_v[mmu_idx][vidx] = env->iotlb[mmu_idx][index];
>
> /* refill the tlb */
> env->iotlb[mmu_idx][index].addr = iotlb - vaddr;
> env->iotlb[mmu_idx][index].attrs = attrs;
> - te->addend = addend - vaddr;
> + atomic_set(&te->addend, addend - vaddr);
> if (prot & PAGE_READ) {
> te->addr_read = address;
> } else {
> @@ -466,21 +571,24 @@ void tlb_set_page_with_attrs(CPUState *cpu,
> target_ulong vaddr,
> } else {
> te->addr_code = -1;
> }
> +
> + write_address = -1;
> if (prot & PAGE_WRITE) {
> if ((memory_region_is_ram(section->mr) && section->readonly)
> || memory_region_is_romd(section->mr)) {
> /* Write access calls the I/O callback. */
> - te->addr_write = address | TLB_MMIO;
> + write_address = address | TLB_MMIO;
> } else if (memory_region_is_ram(section->mr)
> && cpu_physical_memory_is_clean(
> memory_region_get_ram_addr(section->mr) + xlat)) {
> - te->addr_write = address | TLB_NOTDIRTY;
> + write_address = address | TLB_NOTDIRTY;
> } else {
> - te->addr_write = address;
> + write_address = address;
> }
> - } else {
> - te->addr_write = -1;
> }
> +
> + /* Pairs with flag setting in tlb_reset_dirty_range */
> + atomic_mb_set(&te->addr_write, write_address);
> }
>
> /* Add a new TLB entry, but without specifying the memory
> @@ -643,10 +751,28 @@ static bool victim_tlb_hit(CPUArchState *env, size_t
> mmu_idx, size_t index,
> if (cmp == page) {
> /* Found entry in victim tlb, swap tlb and iotlb. */
> CPUTLBEntry tmptlb, *tlb = &env->tlb_table[mmu_idx][index];
> +
> + /* tmptlb = *tlb; */
> + /* addr_write can race with tlb_reset_dirty_range_all */
> + tmptlb.addr_read = tlb->addr_read;
> + tmptlb.addr_write = atomic_read(&tlb->addr_write);
> + tmptlb.addr_code = tlb->addr_code;
> + tmptlb.addend = atomic_read(&tlb->addend);
> +
> + /* *tlb = *vtlb; */
> + tlb->addr_read = vtlb->addr_read;
> + atomic_set(&tlb->addr_write, atomic_read(&vtlb->addr_write));
> + tlb->addr_code = vtlb->addr_code;
> + atomic_set(&tlb->addend, atomic_read(&vtlb->addend));
> +
> + /* *vtlb = tmptlb; */
> + vtlb->addr_read = tmptlb.addr_read;
> + atomic_set(&vtlb->addr_write, tmptlb.addr_write);
> + vtlb->addr_code = tmptlb.addr_code;
> + atomic_set(&vtlb->addend, tmptlb.addend);
> +
> CPUIOTLBEntry tmpio, *io = &env->iotlb[mmu_idx][index];
> CPUIOTLBEntry *vio = &env->iotlb_v[mmu_idx][vidx];
> -
> - tmptlb = *tlb; *tlb = *vtlb; *vtlb = tmptlb;
> tmpio = *io; *io = *vio; *vio = tmpio;
> return true;
> }
> diff --git a/include/exec/cputlb.h b/include/exec/cputlb.h
> index d454c00..3f94178 100644
> --- a/include/exec/cputlb.h
> +++ b/include/exec/cputlb.h
> @@ -23,8 +23,6 @@
> /* cputlb.c */
> void tlb_protect_code(ram_addr_t ram_addr);
> void tlb_unprotect_code(ram_addr_t ram_addr);
> -void tlb_reset_dirty_range(CPUTLBEntry *tlb_entry, uintptr_t start,
> - uintptr_t length);
> extern int tlb_flush_count;
>
> #endif
> diff --git a/include/qom/cpu.h b/include/qom/cpu.h
> index 880ba42..d945221 100644
> --- a/include/qom/cpu.h
> +++ b/include/qom/cpu.h
> @@ -388,17 +388,17 @@ struct CPUState {
> */
> bool throttle_thread_scheduled;
>
> + /* The pending_tlb_flush flag is set and cleared atomically to
> + * avoid potential races. The aim of the flag is to avoid
> + * unnecessary flushes.
> + */
> + uint16_t pending_tlb_flush;
> +
> /* Note that this is accessed at the start of every TB via a negative
> offset from AREG0. Leave this field at the end so as to make the
> (absolute value) offset as small as possible. This reduces code
> size, especially for hosts without large memory offsets. */
> uint32_t tcg_exit_req;
> -
> - /* The pending_tlb_flush flag is set and cleared atomically to
> - * avoid potential races. The aim of the flag is to avoid
> - * unnecessary flushes.
> - */
> - bool pending_tlb_flush;
> };
>
> QTAILQ_HEAD(CPUTailQ, CPUState);
--
Pranith
- [Qemu-devel] [PATCH v6 15/19] target-arm/cpu: don't reset TLB structures, use cputlb to do it, (continued)
- [Qemu-devel] [PATCH v6 15/19] target-arm/cpu: don't reset TLB structures, use cputlb to do it, Alex Bennée, 2016/11/09
- [Qemu-devel] [PATCH v6 16/19] target-arm: ensure BQL taken for ARM_CP_IO register access, Alex Bennée, 2016/11/09
- [Qemu-devel] [PATCH v6 18/19] target-arm: don't generate WFE/YIELD calls for MTTCG, Alex Bennée, 2016/11/09
- [Qemu-devel] [PATCH v6 14/19] target-arm/powerctl: defer cpu reset work to CPU context, Alex Bennée, 2016/11/09
- [Qemu-devel] [PATCH v6 13/19] cputlb: atomically update tlb fields used by tlb_reset_dirty, Alex Bennée, 2016/11/09
- Re: [Qemu-devel] [PATCH v6 13/19] cputlb: atomically update tlb fields used by tlb_reset_dirty,
Pranith Kumar <=
Re: [Qemu-devel] [PATCH v6 13/19] cputlb: atomically update tlb fields used by tlb_reset_dirty, Richard Henderson, 2016/11/10
[Qemu-devel] [PATCH v6 17/19] target-arm: helpers which may affect global state need the BQL, Alex Bennée, 2016/11/09
[Qemu-devel] [PATCH v6 19/19] tcg: enable MTTCG by default for ARM on x86 hosts, Alex Bennée, 2016/11/09