[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
Re: [PATCH v3 04/18] accel/tcg: Add probe_access_flags
From: |
Peter Maydell |
Subject: |
Re: [PATCH v3 04/18] accel/tcg: Add probe_access_flags |
Date: |
Mon, 27 Apr 2020 11:48:53 +0100 |
On Wed, 22 Apr 2020 at 05:33, Richard Henderson
<address@hidden> wrote:
>
> This new interface will allow targets to probe for a page
> and then handle watchpoints themselves. This will be most
> useful for vector predicated memory operations, where one
> page lookup can be used for many operations, and one test
> can avoid many watchpoint checks.
>
> Signed-off-by: Richard Henderson <address@hidden>
> ---
> v2: Fix return of host pointer in softmmu probe_access_flags.
> ---
> include/exec/cpu-all.h | 13 ++-
> include/exec/exec-all.h | 22 +++++
> accel/tcg/cputlb.c | 177 ++++++++++++++++++++--------------------
> accel/tcg/user-exec.c | 36 +++++---
> 4 files changed, 149 insertions(+), 99 deletions(-)
>
> diff --git a/include/exec/cpu-all.h b/include/exec/cpu-all.h
> index 49384bb66a..43ddcf024c 100644
> --- a/include/exec/cpu-all.h
> +++ b/include/exec/cpu-all.h
> @@ -328,7 +328,18 @@ CPUArchState *cpu_copy(CPUArchState *env);
> | CPU_INTERRUPT_TGT_EXT_3 \
> | CPU_INTERRUPT_TGT_EXT_4)
>
> -#if !defined(CONFIG_USER_ONLY)
> +#ifdef CONFIG_USER_ONLY
> +
> +/*
> + * Allow some level of source compatibility with softmmu. We do not
> + * support any of the more exotic features, so only invalid pages may
> + * be signaled by probe_access_flags().
> + */
> +#define TLB_INVALID_MASK (1 << (TARGET_PAGE_BITS_MIN - 1))
> +#define TLB_MMIO 0
> +#define TLB_WATCHPOINT 0
> +
> +#else
>
> /*
> * Flags stored in the low bits of the TLB virtual address.
> diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h
> index d656a1f05c..8792bea07a 100644
> --- a/include/exec/exec-all.h
> +++ b/include/exec/exec-all.h
> @@ -362,6 +362,28 @@ static inline void *probe_read(CPUArchState *env,
> target_ulong addr, int size,
> return probe_access(env, addr, size, MMU_DATA_LOAD, mmu_idx, retaddr);
> }
>
> +/**
> + * probe_access_flags:
> + * @env: CPUArchState
> + * @addr: guest virtual address to look up
> + * @access_type: read, write or execute permission
> + * @mmu_idx: MMU index to use for lookup
> + * @nonfault: suppress the fault
> + * @phost: return value for host address
> + * @retaddr: return address for unwinding
> + *
> + * Similar to probe_access, loosely returning the TLB_FLAGS_MASK for
> + * the page, and storing the host address for RAM in @phost.
> + *
> + * If @nonfault is set, do not raise an exception but return
> TLB_INVALID_MASK.
> + * Do not handle watchpoints, but include TLB_WATCHPOINT in the returned
> flags.
> + * Do handle clean pages, so exclude TLB_NOTDIRY from the returned flags.
"TLB_NOTDIRTY"
> + * For simplicity, all "mmio-like" flags are folded to TLB_MMIO.
> + */
> +int probe_access_flags(CPUArchState *env, target_ulong addr,
> + MMUAccessType access_type, int mmu_idx,
> + bool nonfault, void **phost, uintptr_t retaddr);
> +
> #define CODE_GEN_ALIGN 16 /* must be >= of the size of a icache
> line */
>
> /* Estimated block size for TB allocation. */
> diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c
> index e3b5750c3b..bbe265ce28 100644
> --- a/accel/tcg/cputlb.c
> +++ b/accel/tcg/cputlb.c
> @@ -1231,86 +1231,16 @@ static void notdirty_write(CPUState *cpu, vaddr
> mem_vaddr, unsigned size,
> }
> }
>
> -/*
> - * Probe for whether the specified guest access is permitted. If it is not
> - * permitted then an exception will be taken in the same way as if this
> - * were a real access (and we will not return).
> - * If the size is 0 or the page requires I/O access, returns NULL; otherwise,
> - * returns the address of the host page similar to tlb_vaddr_to_host().
> - */
> -void *probe_access(CPUArchState *env, target_ulong addr, int size,
> - MMUAccessType access_type, int mmu_idx, uintptr_t retaddr)
> +static int probe_access_internal(CPUArchState *env, target_ulong addr,
> + int fault_size, MMUAccessType access_type,
> + int mmu_idx, bool nonfault,
> + void **phost, uintptr_t retaddr)
> {
> uintptr_t index = tlb_index(env, mmu_idx, addr);
> CPUTLBEntry *entry = tlb_entry(env, mmu_idx, addr);
> - target_ulong tlb_addr;
> - size_t elt_ofs;
> - int wp_access;
> -
> - g_assert(-(addr | TARGET_PAGE_MASK) >= size);
> -
> - switch (access_type) {
> - case MMU_DATA_LOAD:
> - elt_ofs = offsetof(CPUTLBEntry, addr_read);
> - wp_access = BP_MEM_READ;
> - break;
> - case MMU_DATA_STORE:
> - elt_ofs = offsetof(CPUTLBEntry, addr_write);
> - wp_access = BP_MEM_WRITE;
> - break;
> - case MMU_INST_FETCH:
> - elt_ofs = offsetof(CPUTLBEntry, addr_code);
> - wp_access = BP_MEM_READ;
> - break;
> - default:
> - g_assert_not_reached();
> - }
> - tlb_addr = tlb_read_ofs(entry, elt_ofs);
> -
> - if (unlikely(!tlb_hit(tlb_addr, addr))) {
> - if (!victim_tlb_hit(env, mmu_idx, index, elt_ofs,
> - addr & TARGET_PAGE_MASK)) {
> - tlb_fill(env_cpu(env), addr, size, access_type, mmu_idx,
> retaddr);
> - /* TLB resize via tlb_fill may have moved the entry. */
> - index = tlb_index(env, mmu_idx, addr);
> - entry = tlb_entry(env, mmu_idx, addr);
> - }
> - tlb_addr = tlb_read_ofs(entry, elt_ofs);
> - }
> -
> - if (!size) {
> - return NULL;
> - }
> -
> - if (unlikely(tlb_addr & TLB_FLAGS_MASK)) {
> - CPUIOTLBEntry *iotlbentry = &env_tlb(env)->d[mmu_idx].iotlb[index];
> -
> - /* Reject I/O access, or other required slow-path. */
> - if (tlb_addr & (TLB_MMIO | TLB_BSWAP | TLB_DISCARD_WRITE)) {
> - return NULL;
> - }
> -
> - /* Handle watchpoints. */
> - if (tlb_addr & TLB_WATCHPOINT) {
> - cpu_check_watchpoint(env_cpu(env), addr, size,
> - iotlbentry->attrs, wp_access, retaddr);
> - }
> -
> - /* Handle clean RAM pages. */
> - if (tlb_addr & TLB_NOTDIRTY) {
> - notdirty_write(env_cpu(env), addr, size, iotlbentry, retaddr);
> - }
> - }
> -
> - return (void *)((uintptr_t)addr + entry->addend);
> -}
> -
> -void *tlb_vaddr_to_host(CPUArchState *env, abi_ptr addr,
> - MMUAccessType access_type, int mmu_idx)
> -{
> - CPUTLBEntry *entry = tlb_entry(env, mmu_idx, addr);
> - target_ulong tlb_addr, page;
> + target_ulong tlb_addr, page_addr;
> size_t elt_ofs;
> + int flags;
>
> switch (access_type) {
> case MMU_DATA_LOAD:
> @@ -1325,20 +1255,19 @@ void *tlb_vaddr_to_host(CPUArchState *env, abi_ptr
> addr,
> default:
> g_assert_not_reached();
> }
> -
> - page = addr & TARGET_PAGE_MASK;
> tlb_addr = tlb_read_ofs(entry, elt_ofs);
>
> - if (!tlb_hit_page(tlb_addr, page)) {
> - uintptr_t index = tlb_index(env, mmu_idx, addr);
> -
> - if (!victim_tlb_hit(env, mmu_idx, index, elt_ofs, page)) {
> + page_addr = addr & TARGET_PAGE_MASK;
> + if (!tlb_hit_page(tlb_addr, page_addr)) {
> + if (!victim_tlb_hit(env, mmu_idx, index, elt_ofs, page_addr)) {
> CPUState *cs = env_cpu(env);
> CPUClass *cc = CPU_GET_CLASS(cs);
>
> - if (!cc->tlb_fill(cs, addr, 0, access_type, mmu_idx, true, 0)) {
> + if (!cc->tlb_fill(cs, addr, fault_size, access_type,
> + mmu_idx, nonfault, retaddr)) {
> /* Non-faulting page table read failed. */
> - return NULL;
> + *phost = NULL;
> + return TLB_INVALID_MASK;
> }
>
> /* TLB resize via tlb_fill may have moved the entry. */
> @@ -1346,15 +1275,89 @@ void *tlb_vaddr_to_host(CPUArchState *env, abi_ptr
> addr,
> }
> tlb_addr = tlb_read_ofs(entry, elt_ofs);
> }
> + flags = tlb_addr & TLB_FLAGS_MASK;
>
> - if (tlb_addr & ~TARGET_PAGE_MASK) {
> - /* IO access */
> + /* Fold all "mmio-like" bits into TLB_MMIO. This is not RAM. */
> + if (unlikely(flags & ~(TLB_WATCHPOINT | TLB_NOTDIRTY))) {
> + *phost = NULL;
> + return TLB_MMIO;
> + }
> +
> + /* Everything else is RAM. */
> + *phost = (void *)((uintptr_t)addr + entry->addend);
> + return flags;
> +}
> +
> +int probe_access_flags(CPUArchState *env, target_ulong addr,
> + MMUAccessType access_type, int mmu_idx,
> + bool nonfault, void **phost, uintptr_t retaddr)
> +{
> + int flags;
> +
> + flags = probe_access_internal(env, addr, 0, access_type, mmu_idx,
> + nonfault, phost, retaddr);
> +
> + /* Handle clean RAM pages. */
> + if (unlikely(flags & TLB_NOTDIRTY)) {
> + uintptr_t index = tlb_index(env, mmu_idx, addr);
> + CPUIOTLBEntry *iotlbentry = &env_tlb(env)->d[mmu_idx].iotlb[index];
> +
> + notdirty_write(env_cpu(env), addr, 1, iotlbentry, retaddr);
> + flags &= ~TLB_NOTDIRTY;
> + }
probe_access() handles watchpoints. Why doesn't probe_access_flags()
have to do that?
> +
> + return flags;
> +}
> +
> +void *probe_access(CPUArchState *env, target_ulong addr, int size,
> + MMUAccessType access_type, int mmu_idx, uintptr_t retaddr)
> +{
> + void *host;
> + int flags;
> +
> + g_assert(-(addr | TARGET_PAGE_MASK) >= size);
> +
> + flags = probe_access_internal(env, addr, size, access_type, mmu_idx,
> + false, &host, retaddr);
> +
> + /* Per the interface, size == 0 merely faults the access. */
> + if (size == 0) {
> return NULL;
> }
>
> - return (void *)((uintptr_t)addr + entry->addend);
> + if (unlikely(flags & (TLB_NOTDIRTY | TLB_WATCHPOINT))) {
> + uintptr_t index = tlb_index(env, mmu_idx, addr);
> + CPUIOTLBEntry *iotlbentry = &env_tlb(env)->d[mmu_idx].iotlb[index];
> +
> + /* Handle clean RAM pages. */
> + if (flags & TLB_NOTDIRTY) {
> + notdirty_write(env_cpu(env), addr, 1, iotlbentry, retaddr);
> + }
> +
> + /* Handle watchpoints. */
> + if (flags & TLB_WATCHPOINT) {
> + int wp_access = (access_type == MMU_DATA_STORE
> + ? BP_MEM_WRITE : BP_MEM_READ);
> + cpu_check_watchpoint(env_cpu(env), addr, size,
> + iotlbentry->attrs, wp_access, retaddr);
> + }
The old code checked for watchpoints first, and then handled notdirty-writes,
which seems like the more correct order. Why has the new
version switched them around?
(Incidentally notdirty_write() is rather misleadingly named,
since it doesn't actually do a write, it just marks a notdirty
page as dirty.)
> + }
> +
> + return host;
The probe_access_internal() doc comment doesn't say that it
guarantees to set host to NULL for the TLB_MMIO/TLB_INVALID_MASK
cases, but we implicitly rely on it here.
> }
>
> +void *tlb_vaddr_to_host(CPUArchState *env, abi_ptr addr,
> + MMUAccessType access_type, int mmu_idx)
> +{
> + void *host;
> + int flags;
> +
> + flags = probe_access_internal(env, addr, 0, access_type,
> + mmu_idx, true, &host, 0);
> +
> + /* No combination of flags are expected by the caller. */
> + return flags ? NULL : host;
> +}
>
> #ifdef CONFIG_PLUGIN
> /*
> diff --git a/accel/tcg/user-exec.c b/accel/tcg/user-exec.c
> index 4be78eb9b3..91c2dc46dd 100644
> --- a/accel/tcg/user-exec.c
> +++ b/accel/tcg/user-exec.c
> @@ -190,13 +190,12 @@ static inline int handle_cpu_signal(uintptr_t pc,
> siginfo_t *info,
> g_assert_not_reached();
> }
>
> -void *probe_access(CPUArchState *env, target_ulong addr, int size,
> - MMUAccessType access_type, int mmu_idx, uintptr_t retaddr)
> +int probe_access_flags(CPUArchState *env, target_ulong addr,
> + MMUAccessType access_type, int mmu_idx,
> + bool nonfault, void **phost, uintptr_t ra)
> {
> int flags;
>
> - g_assert(-(addr | TARGET_PAGE_MASK) >= size);
> -
> switch (access_type) {
> case MMU_DATA_STORE:
> flags = PAGE_WRITE;
> @@ -211,15 +210,30 @@ void *probe_access(CPUArchState *env, target_ulong
> addr, int size,
> g_assert_not_reached();
> }
>
> - if (!guest_addr_valid(addr) || page_check_range(addr, size, flags) < 0) {
> - CPUState *cpu = env_cpu(env);
> - CPUClass *cc = CPU_GET_CLASS(cpu);
> - cc->tlb_fill(cpu, addr, size, access_type, MMU_USER_IDX, false,
> - retaddr);
> - g_assert_not_reached();
> + if (!guest_addr_valid(addr) || page_check_range(addr, 1, flags) < 0) {
> + if (nonfault) {
> + *phost = NULL;
> + return TLB_INVALID_MASK;
> + } else {
> + CPUState *cpu = env_cpu(env);
> + CPUClass *cc = CPU_GET_CLASS(cpu);
> + cc->tlb_fill(cpu, addr, 0, access_type, MMU_USER_IDX, false, ra);
> + g_assert_not_reached();
> + }
> }
>
> - return size ? g2h(addr) : NULL;
> + *phost = g2h(addr);
> + return 0;
> +}
> +
> +void *probe_access(CPUArchState *env, target_ulong addr, int size,
> + MMUAccessType access_type, int mmu_idx, uintptr_t retaddr)
> +{
> + void *host;
> +
> + g_assert(-(addr | TARGET_PAGE_MASK) >= size);
> + probe_access_flags(env, addr, access_type, mmu_idx, false, &host,
> retaddr);
> + return host;
The old code returned NULL for a zero size; the new version does not.
The old code passed size into cc->tlb_fill; the new version does not.
The old code passed size into page_check_range(); the new version does not.
thanks
-- PMM
- [PATCH v3 00/18] target/arm: sve load/store improvements, Richard Henderson, 2020/04/22
- [PATCH v3 01/18] exec: Add block comments for watchpoint routines, Richard Henderson, 2020/04/22
- [PATCH v3 03/18] accel/tcg: Add block comment for probe_access, Richard Henderson, 2020/04/22
- [PATCH v3 04/18] accel/tcg: Add probe_access_flags, Richard Henderson, 2020/04/22
- Re: [PATCH v3 04/18] accel/tcg: Add probe_access_flags,
Peter Maydell <=
- [PATCH v3 02/18] exec: Fix cpu_watchpoint_address_matches address length, Richard Henderson, 2020/04/22
- [PATCH v3 05/18] accel/tcg: Add endian-specific cpu_{ld, st}* operations, Richard Henderson, 2020/04/22
- [PATCH v3 06/18] target/arm: Use cpu_*_data_ra for sve_ldst_tlb_fn, Richard Henderson, 2020/04/22
- [PATCH v3 09/18] target/arm: Adjust interface of sve_ld1_host_fn, Richard Henderson, 2020/04/22
- [PATCH v3 08/18] target/arm: Add sve infrastructure for page lookup, Richard Henderson, 2020/04/22