qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Qemu-devel] [PATCH v6 13/19] cputlb: atomically update tlb fields u


From: Pranith Kumar
Subject: Re: [Qemu-devel] [PATCH v6 13/19] cputlb: atomically update tlb fields used by tlb_reset_dirty
Date: Wed, 09 Nov 2016 14:36:37 -0500

Hi Alex,

This patch is causing some build errors on a 32-bit box:

In file included from /home/pranith/qemu/include/exec/exec-all.h:44:0,
                 from /home/pranith/qemu/cputlb.c:23:
/home/pranith/qemu/cputlb.c: In function ‘tlb_flush_page_by_mmuidx_async_work’:
/home/pranith/qemu/cputlb.c:54:36: error: format ‘%x’ expects argument of type 
‘unsigned int’, but argument 5 has type ‘long unsigned int’ [-Werror=format=]
         qemu_log_mask(CPU_LOG_MMU, "%s: " fmt, __func__, \
                                    ^
/home/pranith/qemu/include/qemu/log.h:94:22: note: in definition of macro 
‘qemu_log_mask’
             qemu_log(FMT, ## __VA_ARGS__);              \
                      ^~~
/home/pranith/qemu/cputlb.c:286:5: note: in expansion of macro ‘tlb_debug’
     tlb_debug("page:%d addr:"TARGET_FMT_lx" mmu_idx%" PRIxPTR "\n",
     ^~~~~~~~~
/home/pranith/qemu/cputlb.c:57:25: error: format ‘%x’ expects argument of type 
‘unsigned int’, but argument 6 has type ‘long unsigned int’ [-Werror=format=]
         fprintf(stderr, "%s: " fmt, __func__, ## __VA_ARGS__); \
                         ^
/home/pranith/qemu/cputlb.c:286:5: note: in expansion of macro ‘tlb_debug’
     tlb_debug("page:%d addr:"TARGET_FMT_lx" mmu_idx%" PRIxPTR "\n",
     ^~~~~~~~~
cc1: all warnings being treated as errors

Thanks,

Alex Bennée writes:

> The main use case for tlb_reset_dirty is to set the TLB_NOTDIRTY flags
> in TLB entries to force the slow-path on writes. This is used to mark
> page ranges containing code which has been translated so it can be
> invalidated if written to. To do this safely we need to ensure the TLB
> entries in question for all vCPUs are updated before we attempt to run
> the code otherwise a race could be introduced.
>
> To achieve this we atomically set the flag in tlb_reset_dirty_range and
> take care when setting it when the TLB entry is filled.
>
> The helper function is made static as it isn't used outside of cputlb.
>
> Signed-off-by: Alex Bennée <address@hidden>
>
> ---
> v6
>   - use TARGET_PAGE_BITS_MIN
>   - use run_on_cpu helpers
> ---
>  cputlb.c              | 250 
> +++++++++++++++++++++++++++++++++++++-------------
>  include/exec/cputlb.h |   2 -
>  include/qom/cpu.h     |  12 +--
>  3 files changed, 194 insertions(+), 70 deletions(-)
>
> diff --git a/cputlb.c b/cputlb.c
> index cd1ff71..ae94b7f 100644
> --- a/cputlb.c
> +++ b/cputlb.c
> @@ -68,6 +68,11 @@
>   * target_ulong even on 32 bit builds */
>  QEMU_BUILD_BUG_ON(sizeof(target_ulong) > sizeof(run_on_cpu_data));
>  
> +/* We currently can't handle more than 16 bits in the MMUIDX bitmask.
> + */
> +QEMU_BUILD_BUG_ON(NB_MMU_MODES > 16);
> +#define ALL_MMUIDX_BITS ((1 << NB_MMU_MODES) - 1)
> +
>  /* statistics */
>  int tlb_flush_count;
>  
> @@ -98,7 +103,7 @@ static void tlb_flush_nocheck(CPUState *cpu, int 
> flush_global)
>  
>      tb_unlock();
>  
> -    atomic_mb_set(&cpu->pending_tlb_flush, false);
> +    atomic_mb_set(&cpu->pending_tlb_flush, 0);
>  }
>  
>  static void tlb_flush_global_async_work(CPUState *cpu, run_on_cpu_data data)
> @@ -121,7 +126,8 @@ static void tlb_flush_global_async_work(CPUState *cpu, 
> run_on_cpu_data data)
>  void tlb_flush(CPUState *cpu, int flush_global)
>  {
>      if (cpu->created && !qemu_cpu_is_self(cpu)) {
> -        if (atomic_cmpxchg(&cpu->pending_tlb_flush, false, true) == true) {
> +        if (atomic_mb_read(&cpu->pending_tlb_flush) != ALL_MMUIDX_BITS) {
> +            atomic_mb_set(&cpu->pending_tlb_flush, ALL_MMUIDX_BITS);
>              async_run_on_cpu(cpu, tlb_flush_global_async_work,
>                               RUN_ON_CPU_HOST_INT(flush_global));
>          }
> @@ -130,39 +136,78 @@ void tlb_flush(CPUState *cpu, int flush_global)
>      }
>  }
>  
> -static inline void v_tlb_flush_by_mmuidx(CPUState *cpu, va_list argp)
> +static void tlb_flush_by_mmuidx_async_work(CPUState *cpu, run_on_cpu_data 
> data)
>  {
>      CPUArchState *env = cpu->env_ptr;
> +    unsigned long mmu_idx_bitmask = data.host_ulong;
> +    int mmu_idx;
>  
>      assert_cpu_is_self(cpu);
> -    tlb_debug("start\n");
>  
>      tb_lock();
>  
> -    for (;;) {
> -        int mmu_idx = va_arg(argp, int);
> +    tlb_debug("start: mmu_idx:0x%04lx\n", mmu_idx_bitmask);
>  
> -        if (mmu_idx < 0) {
> -            break;
> -        }
> +    for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) {
>  
> -        tlb_debug("%d\n", mmu_idx);
> +        if (test_bit(mmu_idx, &mmu_idx_bitmask)) {
> +            tlb_debug("%d\n", mmu_idx);
>  
> -        memset(env->tlb_table[mmu_idx], -1, sizeof(env->tlb_table[0]));
> -        memset(env->tlb_v_table[mmu_idx], -1, sizeof(env->tlb_v_table[0]));
> +            memset(env->tlb_table[mmu_idx], -1, sizeof(env->tlb_table[0]));
> +            memset(env->tlb_v_table[mmu_idx], -1, 
> sizeof(env->tlb_v_table[0]));
> +        }
>      }
>  
>      memset(cpu->tb_jmp_cache, 0, sizeof(cpu->tb_jmp_cache));
>  
> +    tlb_debug("done\n");
> +
>      tb_unlock();
>  }
>  
> +/* Helper function to slurp va_args list into a bitmap
> + */
> +static inline unsigned long make_mmu_index_bitmap(va_list args)
> +{
> +    unsigned long bitmap = 0;
> +    int mmu_index = va_arg(args, int);
> +
> +    /* An empty va_list would be a bad call */
> +    g_assert(mmu_index > 0);
> +
> +    do {
> +        set_bit(mmu_index, &bitmap);
> +        mmu_index = va_arg(args, int);
> +    } while (mmu_index >= 0);
> +
> +    return bitmap;
> +}
> +
>  void tlb_flush_by_mmuidx(CPUState *cpu, ...)
>  {
>      va_list argp;
> +    unsigned long mmu_idx_bitmap;
> +
>      va_start(argp, cpu);
> -    v_tlb_flush_by_mmuidx(cpu, argp);
> +    mmu_idx_bitmap = make_mmu_index_bitmap(argp);
>      va_end(argp);
> +
> +    tlb_debug("mmu_idx: 0x%04lx\n", mmu_idx_bitmap);
> +
> +    if (!qemu_cpu_is_self(cpu)) {
> +        uint16_t pending_flushes =
> +            mmu_idx_bitmap & ~atomic_mb_read(&cpu->pending_tlb_flush);
> +        if (pending_flushes) {
> +            tlb_debug("reduced mmu_idx: 0x%" PRIx16 "\n", pending_flushes);
> +
> +            atomic_or(&cpu->pending_tlb_flush, pending_flushes);
> +            async_run_on_cpu(cpu, tlb_flush_by_mmuidx_async_work,
> +                             RUN_ON_CPU_HOST_INT(pending_flushes));
> +        }
> +    } else {
> +        tlb_flush_by_mmuidx_async_work(cpu,
> +                                       
> RUN_ON_CPU_HOST_ULONG(mmu_idx_bitmap));
> +    }
>  }
>  
>  static inline void tlb_flush_entry(CPUTLBEntry *tlb_entry, target_ulong addr)
> @@ -227,16 +272,50 @@ void tlb_flush_page(CPUState *cpu, target_ulong addr)
>      }
>  }
>  
> -void tlb_flush_page_by_mmuidx(CPUState *cpu, target_ulong addr, ...)
> +/* As we are going to hijack the bottom bits of the page address for a
> + * mmuidx bit mask we need to fail to build if we can't do that
> + */
> +QEMU_BUILD_BUG_ON(NB_MMU_MODES > TARGET_PAGE_BITS_MIN);
> +
> +static void tlb_flush_page_by_mmuidx_async_work(CPUState *cpu,
> +                                                run_on_cpu_data data)
>  {
>      CPUArchState *env = cpu->env_ptr;
> -    int i, k;
> -    va_list argp;
> -
> -    va_start(argp, addr);
> +    target_ulong addr_and_mmuidx = (target_ulong) data.target_ptr;
> +    target_ulong addr = addr_and_mmuidx & TARGET_PAGE_MASK;
> +    unsigned long mmu_idx_bitmap = addr_and_mmuidx & ALL_MMUIDX_BITS;
> +    int page = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
> +    int mmu_idx;
> +    int i;
>  
>      assert_cpu_is_self(cpu);
> -    tlb_debug("addr "TARGET_FMT_lx"\n", addr);
> +
> +    tlb_debug("page:%d addr:"TARGET_FMT_lx" mmu_idx%" PRIxPTR "\n",
> +              page, addr, mmu_idx_bitmap);
> +
> +    for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) {
> +        if (test_bit(mmu_idx, &mmu_idx_bitmap)) {
> +            tlb_flush_entry(&env->tlb_table[mmu_idx][page], addr);
> +
> +            /* check whether there are vltb entries that need to be flushed 
> */
> +            for (i = 0; i < CPU_VTLB_SIZE; i++) {
> +                tlb_flush_entry(&env->tlb_v_table[mmu_idx][i], addr);
> +            }
> +        }
> +    }
> +
> +    tb_flush_jmp_cache(cpu, addr);
> +}
> +
> +static void tlb_check_page_and_flush_by_mmuidx_async_work(CPUState *cpu,
> +                                                          run_on_cpu_data 
> data)
> +{
> +    CPUArchState *env = cpu->env_ptr;
> +    target_ulong addr_and_mmuidx = (target_ulong) data.target_ptr;
> +    target_ulong addr = addr_and_mmuidx & TARGET_PAGE_MASK;
> +    unsigned long mmu_idx_bitmap = addr_and_mmuidx & ALL_MMUIDX_BITS;
> +
> +    tlb_debug("addr:"TARGET_FMT_lx" mmu_idx: %04lx\n", addr, mmu_idx_bitmap);
>  
>      /* Check if we need to flush due to large pages.  */
>      if ((addr & env->tlb_flush_mask) == env->tlb_flush_addr) {
> @@ -244,33 +323,35 @@ void tlb_flush_page_by_mmuidx(CPUState *cpu, 
> target_ulong addr, ...)
>                    TARGET_FMT_lx "/" TARGET_FMT_lx ")\n",
>                    env->tlb_flush_addr, env->tlb_flush_mask);
>  
> -        v_tlb_flush_by_mmuidx(cpu, argp);
> -        va_end(argp);
> -        return;
> +        tlb_flush_by_mmuidx_async_work(cpu, 
> RUN_ON_CPU_HOST_ULONG(mmu_idx_bitmap));
> +    } else {
> +        tlb_flush_page_by_mmuidx_async_work(cpu, data);
>      }
> +}
>  
> -    addr &= TARGET_PAGE_MASK;
> -    i = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
> -
> -    for (;;) {
> -        int mmu_idx = va_arg(argp, int);
> +void tlb_flush_page_by_mmuidx(CPUState *cpu, target_ulong addr, ...)
> +{
> +    unsigned long mmu_idx_bitmap;
> +    target_ulong addr_and_mmu_idx;
> +    va_list argp;
>  
> -        if (mmu_idx < 0) {
> -            break;
> -        }
> +    va_start(argp, addr);
> +    mmu_idx_bitmap = make_mmu_index_bitmap(argp);
> +    va_end(argp);
>  
> -        tlb_debug("idx %d\n", mmu_idx);
> +    tlb_debug("addr: "TARGET_FMT_lx" mmu_idx:%lx\n", addr, mmu_idx_bitmap);
>  
> -        tlb_flush_entry(&env->tlb_table[mmu_idx][i], addr);
> +    /* This should already be page aligned */
> +    addr_and_mmu_idx = addr & TARGET_PAGE_MASK;
> +    addr_and_mmu_idx |= mmu_idx_bitmap;
>  
> -        /* check whether there are vltb entries that need to be flushed */
> -        for (k = 0; k < CPU_VTLB_SIZE; k++) {
> -            tlb_flush_entry(&env->tlb_v_table[mmu_idx][k], addr);
> -        }
> +    if (!qemu_cpu_is_self(cpu)) {
> +        async_run_on_cpu(cpu, tlb_check_page_and_flush_by_mmuidx_async_work,
> +                         RUN_ON_CPU_TARGET_PTR(addr_and_mmu_idx));
> +    } else {
> +        tlb_check_page_and_flush_by_mmuidx_async_work(
> +            cpu, RUN_ON_CPU_TARGET_PTR(addr_and_mmu_idx));
>      }
> -    va_end(argp);
> -
> -    tb_flush_jmp_cache(cpu, addr);
>  }
>  
>  void tlb_flush_page_all(target_ulong addr)
> @@ -298,32 +379,50 @@ void tlb_unprotect_code(ram_addr_t ram_addr)
>      cpu_physical_memory_set_dirty_flag(ram_addr, DIRTY_MEMORY_CODE);
>  }
>  
> -static bool tlb_is_dirty_ram(CPUTLBEntry *tlbe)
> -{
> -    return (tlbe->addr_write & (TLB_INVALID_MASK|TLB_MMIO|TLB_NOTDIRTY)) == 
> 0;
> -}
>  
> -void tlb_reset_dirty_range(CPUTLBEntry *tlb_entry, uintptr_t start,
> +/*
> + * Dirty write flag handling
> + *
> + * When the TCG code writes to a location it looks up the address in
> + * the TLB and uses that data to compute the final address. If any of
> + * the lower bits of the address are set then the slow path is forced.
> + * There are a number of reasons to do this but for normal RAM the
> + * most usual is detecting writes to code regions which may invalidate
> + * generated code.
> + *
> + * Because we want other vCPUs to respond to changes straight away we
> + * update the te->addr_write field atomically. If the TLB entry has
> + * been changed by the vCPU in the mean time we skip the update.
> + */
> +
> +static void tlb_reset_dirty_range(CPUTLBEntry *tlb_entry, uintptr_t start,
>                             uintptr_t length)
>  {
> -    uintptr_t addr;
> +    /* paired with atomic_mb_set in tlb_set_page_with_attrs */
> +    uintptr_t orig_addr = atomic_mb_read(&tlb_entry->addr_write);
> +    uintptr_t addr = orig_addr;
>  
> -    if (tlb_is_dirty_ram(tlb_entry)) {
> -        addr = (tlb_entry->addr_write & TARGET_PAGE_MASK) + 
> tlb_entry->addend;
> +    if ((addr & (TLB_INVALID_MASK | TLB_MMIO | TLB_NOTDIRTY)) == 0) {
> +        addr &= TARGET_PAGE_MASK;
> +        addr += atomic_read(&tlb_entry->addend);
>          if ((addr - start) < length) {
> -            tlb_entry->addr_write |= TLB_NOTDIRTY;
> +            uintptr_t notdirty_addr = orig_addr | TLB_NOTDIRTY;
> +            atomic_cmpxchg(&tlb_entry->addr_write, orig_addr, notdirty_addr);
>          }
>      }
>  }
>  
> +/* This is a cross vCPU call (i.e. another vCPU resetting the flags of
> + * the target vCPU). As such care needs to be taken that we don't
> + * dangerously race with another vCPU update. The only thing actually
> + * updated is the target TLB entry ->addr_write flags.
> + */
>  void tlb_reset_dirty(CPUState *cpu, ram_addr_t start1, ram_addr_t length)
>  {
>      CPUArchState *env;
>  
>      int mmu_idx;
>  
> -    assert_cpu_is_self(cpu);
> -
>      env = cpu->env_ptr;
>      for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) {
>          unsigned int i;
> @@ -409,9 +508,9 @@ void tlb_set_page_with_attrs(CPUState *cpu, target_ulong 
> vaddr,
>      MemoryRegionSection *section;
>      unsigned int index;
>      target_ulong address;
> -    target_ulong code_address;
> +    target_ulong code_address, write_address;
>      uintptr_t addend;
> -    CPUTLBEntry *te;
> +    CPUTLBEntry *te, *tv;
>      hwaddr iotlb, xlat, sz;
>      unsigned vidx = env->vtlb_index++ % CPU_VTLB_SIZE;
>      int asidx = cpu_asidx_from_attrs(cpu, attrs);
> @@ -446,15 +545,21 @@ void tlb_set_page_with_attrs(CPUState *cpu, 
> target_ulong vaddr,
>  
>      index = (vaddr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
>      te = &env->tlb_table[mmu_idx][index];
> -
>      /* do not discard the translation in te, evict it into a victim tlb */
> -    env->tlb_v_table[mmu_idx][vidx] = *te;
> +    tv = &env->tlb_v_table[mmu_idx][vidx];
> +
> +    /* addr_write can race with tlb_reset_dirty_range_all */
> +    tv->addr_read = te->addr_read;
> +    atomic_set(&tv->addr_write, atomic_read(&te->addr_write));
> +    tv->addr_code = te->addr_code;
> +    atomic_set(&tv->addend, atomic_read(&te->addend));
> +
>      env->iotlb_v[mmu_idx][vidx] = env->iotlb[mmu_idx][index];
>  
>      /* refill the tlb */
>      env->iotlb[mmu_idx][index].addr = iotlb - vaddr;
>      env->iotlb[mmu_idx][index].attrs = attrs;
> -    te->addend = addend - vaddr;
> +    atomic_set(&te->addend, addend - vaddr);
>      if (prot & PAGE_READ) {
>          te->addr_read = address;
>      } else {
> @@ -466,21 +571,24 @@ void tlb_set_page_with_attrs(CPUState *cpu, 
> target_ulong vaddr,
>      } else {
>          te->addr_code = -1;
>      }
> +
> +    write_address = -1;
>      if (prot & PAGE_WRITE) {
>          if ((memory_region_is_ram(section->mr) && section->readonly)
>              || memory_region_is_romd(section->mr)) {
>              /* Write access calls the I/O callback.  */
> -            te->addr_write = address | TLB_MMIO;
> +            write_address = address | TLB_MMIO;
>          } else if (memory_region_is_ram(section->mr)
>                     && cpu_physical_memory_is_clean(
>                          memory_region_get_ram_addr(section->mr) + xlat)) {
> -            te->addr_write = address | TLB_NOTDIRTY;
> +            write_address = address | TLB_NOTDIRTY;
>          } else {
> -            te->addr_write = address;
> +            write_address = address;
>          }
> -    } else {
> -        te->addr_write = -1;
>      }
> +
> +    /* Pairs with flag setting in tlb_reset_dirty_range */
> +    atomic_mb_set(&te->addr_write, write_address);
>  }
>  
>  /* Add a new TLB entry, but without specifying the memory
> @@ -643,10 +751,28 @@ static bool victim_tlb_hit(CPUArchState *env, size_t 
> mmu_idx, size_t index,
>          if (cmp == page) {
>              /* Found entry in victim tlb, swap tlb and iotlb.  */
>              CPUTLBEntry tmptlb, *tlb = &env->tlb_table[mmu_idx][index];
> +
> +            /* tmptlb = *tlb; */
> +            /* addr_write can race with tlb_reset_dirty_range_all */
> +            tmptlb.addr_read = tlb->addr_read;
> +            tmptlb.addr_write = atomic_read(&tlb->addr_write);
> +            tmptlb.addr_code = tlb->addr_code;
> +            tmptlb.addend = atomic_read(&tlb->addend);
> +
> +            /* *tlb = *vtlb; */
> +            tlb->addr_read = vtlb->addr_read;
> +            atomic_set(&tlb->addr_write, atomic_read(&vtlb->addr_write));
> +            tlb->addr_code = vtlb->addr_code;
> +            atomic_set(&tlb->addend, atomic_read(&vtlb->addend));
> +
> +            /* *vtlb = tmptlb; */
> +            vtlb->addr_read = tmptlb.addr_read;
> +            atomic_set(&vtlb->addr_write, tmptlb.addr_write);
> +            vtlb->addr_code = tmptlb.addr_code;
> +            atomic_set(&vtlb->addend, tmptlb.addend);
> +
>              CPUIOTLBEntry tmpio, *io = &env->iotlb[mmu_idx][index];
>              CPUIOTLBEntry *vio = &env->iotlb_v[mmu_idx][vidx];
> -
> -            tmptlb = *tlb; *tlb = *vtlb; *vtlb = tmptlb;
>              tmpio = *io; *io = *vio; *vio = tmpio;
>              return true;
>          }
> diff --git a/include/exec/cputlb.h b/include/exec/cputlb.h
> index d454c00..3f94178 100644
> --- a/include/exec/cputlb.h
> +++ b/include/exec/cputlb.h
> @@ -23,8 +23,6 @@
>  /* cputlb.c */
>  void tlb_protect_code(ram_addr_t ram_addr);
>  void tlb_unprotect_code(ram_addr_t ram_addr);
> -void tlb_reset_dirty_range(CPUTLBEntry *tlb_entry, uintptr_t start,
> -                           uintptr_t length);
>  extern int tlb_flush_count;
>  
>  #endif
> diff --git a/include/qom/cpu.h b/include/qom/cpu.h
> index 880ba42..d945221 100644
> --- a/include/qom/cpu.h
> +++ b/include/qom/cpu.h
> @@ -388,17 +388,17 @@ struct CPUState {
>       */
>      bool throttle_thread_scheduled;
>  
> +    /* The pending_tlb_flush flag is set and cleared atomically to
> +     * avoid potential races. The aim of the flag is to avoid
> +     * unnecessary flushes.
> +     */
> +    uint16_t pending_tlb_flush;
> +
>      /* Note that this is accessed at the start of every TB via a negative
>         offset from AREG0.  Leave this field at the end so as to make the
>         (absolute value) offset as small as possible.  This reduces code
>         size, especially for hosts without large memory offsets.  */
>      uint32_t tcg_exit_req;
> -
> -    /* The pending_tlb_flush flag is set and cleared atomically to
> -     * avoid potential races. The aim of the flag is to avoid
> -     * unnecessary flushes.
> -     */
> -    bool pending_tlb_flush;
>  };
>  
>  QTAILQ_HEAD(CPUTailQ, CPUState);

-- 
Pranith



reply via email to

[Prev in Thread] Current Thread [Next in Thread]