[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[PATCH v3 09/18] target/arm: Adjust interface of sve_ld1_host_fn
From: |
Richard Henderson |
Subject: |
[PATCH v3 09/18] target/arm: Adjust interface of sve_ld1_host_fn |
Date: |
Tue, 21 Apr 2020 21:33:00 -0700 |
The current interface includes a loop; change it to load a
single element. We will then be able to use the function
for ld{2,3,4} where individual vector elements are not adjacent.
Replace each call with the simplest possible loop over active
elements.
Reviewed-by: Peter Maydell <address@hidden>
Signed-off-by: Richard Henderson <address@hidden>
---
target/arm/sve_helper.c | 124 ++++++++++++++++++++--------------------
1 file changed, 63 insertions(+), 61 deletions(-)
diff --git a/target/arm/sve_helper.c b/target/arm/sve_helper.c
index 2f053a9152..d007137735 100644
--- a/target/arm/sve_helper.c
+++ b/target/arm/sve_helper.c
@@ -3972,20 +3972,10 @@ void HELPER(sve_fcmla_zpzzz_d)(CPUARMState *env, void
*vg, uint32_t desc)
*/
/*
- * Load elements into @vd, controlled by @vg, from @host + @mem_ofs.
- * Memory is valid through @host + @mem_max. The register element
- * indices are inferred from @mem_ofs, as modified by the types for
- * which the helper is built. Return the @mem_ofs of the first element
- * not loaded (which is @mem_max if they are all loaded).
- *
- * For softmmu, we have fully validated the guest page. For user-only,
- * we cannot fully validate without taking the mmap lock, but since we
- * know the access is within one host page, if any access is valid they
- * all must be valid. However, when @vg is all false, it may be that
- * no access is valid.
+ * Load one element into @vd + @reg_off from @host.
+ * The controlling predicate is known to be true.
*/
-typedef intptr_t sve_ld1_host_fn(void *vd, void *vg, void *host,
- intptr_t mem_ofs, intptr_t mem_max);
+typedef void sve_ldst1_host_fn(void *vd, intptr_t reg_off, void *host);
/*
* Load one element into @vd + @reg_off from (@env, @vaddr, @ra).
@@ -3999,20 +3989,10 @@ typedef void sve_ldst1_tlb_fn(CPUARMState *env, void
*vd, intptr_t reg_off,
*/
#define DO_LD_HOST(NAME, H, TYPEE, TYPEM, HOST) \
-static intptr_t sve_##NAME##_host(void *vd, void *vg, void *host, \
- intptr_t mem_off, const intptr_t mem_max) \
-{ \
- intptr_t reg_off = mem_off * (sizeof(TYPEE) / sizeof(TYPEM)); \
- uint64_t *pg = vg; \
- while (mem_off + sizeof(TYPEM) <= mem_max) { \
- TYPEM val = 0; \
- if (likely((pg[reg_off >> 6] >> (reg_off & 63)) & 1)) { \
- val = HOST(host + mem_off); \
- } \
- *(TYPEE *)(vd + H(reg_off)) = val; \
- mem_off += sizeof(TYPEM), reg_off += sizeof(TYPEE); \
- } \
- return mem_off; \
+static void sve_##NAME##_host(void *vd, intptr_t reg_off, void *host) \
+{ \
+ TYPEM val = HOST(host); \
+ *(TYPEE *)(vd + H(reg_off)) = val; \
}
#define DO_LD_TLB(NAME, H, TYPEE, TYPEM, TLB) \
@@ -4411,7 +4391,7 @@ static inline bool test_host_page(void *host)
static void sve_ld1_r(CPUARMState *env, void *vg, const target_ulong addr,
uint32_t desc, const uintptr_t retaddr,
const int esz, const int msz,
- sve_ld1_host_fn *host_fn,
+ sve_ldst1_host_fn *host_fn,
sve_ldst1_tlb_fn *tlb_fn)
{
const TCGMemOpIdx oi = extract32(desc, SIMD_DATA_SHIFT, MEMOPIDX_SHIFT);
@@ -4445,8 +4425,12 @@ static void sve_ld1_r(CPUARMState *env, void *vg, const
target_ulong addr,
if (likely(split == mem_max)) {
host = tlb_vaddr_to_host(env, addr + mem_off, MMU_DATA_LOAD, mmu_idx);
if (test_host_page(host)) {
- mem_off = host_fn(vd, vg, host - mem_off, mem_off, mem_max);
- tcg_debug_assert(mem_off == mem_max);
+ intptr_t i = reg_off;
+ host -= mem_off;
+ do {
+ host_fn(vd, i, host + (i >> diffsz));
+ i = find_next_active(vg, i + (1 << esz), reg_max, esz);
+ } while (i < reg_max);
/* After having taken any fault, zero leading inactive elements. */
swap_memzero(vd, reg_off);
return;
@@ -4459,7 +4443,12 @@ static void sve_ld1_r(CPUARMState *env, void *vg, const
target_ulong addr,
*/
#ifdef CONFIG_USER_ONLY
swap_memzero(&scratch, reg_off);
- host_fn(&scratch, vg, g2h(addr), mem_off, mem_max);
+ host = g2h(addr);
+ do {
+ host_fn(&scratch, reg_off, host + (reg_off >> diffsz));
+ reg_off += 1 << esz;
+ reg_off = find_next_active(vg, reg_off, reg_max, esz);
+ } while (reg_off < reg_max);
#else
memset(&scratch, 0, reg_max);
goto start;
@@ -4477,9 +4466,13 @@ static void sve_ld1_r(CPUARMState *env, void *vg, const
target_ulong addr,
host = tlb_vaddr_to_host(env, addr + mem_off,
MMU_DATA_LOAD, mmu_idx);
if (host) {
- mem_off = host_fn(&scratch, vg, host - mem_off,
- mem_off, split);
- reg_off = mem_off << diffsz;
+ host -= mem_off;
+ do {
+ host_fn(&scratch, reg_off, host + mem_off);
+ reg_off += 1 << esz;
+ reg_off = find_next_active(vg, reg_off, reg_max, esz);
+ mem_off = reg_off >> diffsz;
+ } while (split - mem_off >= (1 << msz));
continue;
}
}
@@ -4706,7 +4699,7 @@ static void record_fault(CPUARMState *env, uintptr_t i,
uintptr_t oprsz)
static void sve_ldff1_r(CPUARMState *env, void *vg, const target_ulong addr,
uint32_t desc, const uintptr_t retaddr,
const int esz, const int msz,
- sve_ld1_host_fn *host_fn,
+ sve_ldst1_host_fn *host_fn,
sve_ldst1_tlb_fn *tlb_fn)
{
const TCGMemOpIdx oi = extract32(desc, SIMD_DATA_SHIFT, MEMOPIDX_SHIFT);
@@ -4716,7 +4709,7 @@ static void sve_ldff1_r(CPUARMState *env, void *vg, const
target_ulong addr,
const int diffsz = esz - msz;
const intptr_t reg_max = simd_oprsz(desc);
const intptr_t mem_max = reg_max >> diffsz;
- intptr_t split, reg_off, mem_off;
+ intptr_t split, reg_off, mem_off, i;
void *host;
/* Skip to the first active element. */
@@ -4739,28 +4732,18 @@ static void sve_ldff1_r(CPUARMState *env, void *vg,
const target_ulong addr,
if (likely(split == mem_max)) {
host = tlb_vaddr_to_host(env, addr + mem_off, MMU_DATA_LOAD, mmu_idx);
if (test_host_page(host)) {
- mem_off = host_fn(vd, vg, host - mem_off, mem_off, mem_max);
- tcg_debug_assert(mem_off == mem_max);
+ i = reg_off;
+ host -= mem_off;
+ do {
+ host_fn(vd, i, host + (i >> diffsz));
+ i = find_next_active(vg, i + (1 << esz), reg_max, esz);
+ } while (i < reg_max);
/* After any fault, zero any leading inactive elements. */
swap_memzero(vd, reg_off);
return;
}
}
-#ifdef CONFIG_USER_ONLY
- /*
- * The page(s) containing this first element at ADDR+MEM_OFF must
- * be valid. Considering that this first element may be misaligned
- * and cross a page boundary itself, take the rest of the page from
- * the last byte of the element.
- */
- split = max_for_page(addr, mem_off + (1 << msz) - 1, mem_max);
- mem_off = host_fn(vd, vg, g2h(addr), mem_off, split);
-
- /* After any fault, zero any leading inactive elements. */
- swap_memzero(vd, reg_off);
- reg_off = mem_off << diffsz;
-#else
/*
* Perform one normal read, which will fault or not.
* But it is likely to bring the page into the tlb.
@@ -4777,11 +4760,15 @@ static void sve_ldff1_r(CPUARMState *env, void *vg,
const target_ulong addr,
if (split >= (1 << msz)) {
host = tlb_vaddr_to_host(env, addr + mem_off, MMU_DATA_LOAD, mmu_idx);
if (host) {
- mem_off = host_fn(vd, vg, host - mem_off, mem_off, split);
- reg_off = mem_off << diffsz;
+ host -= mem_off;
+ do {
+ host_fn(vd, reg_off, host + mem_off);
+ reg_off += 1 << esz;
+ reg_off = find_next_active(vg, reg_off, reg_max, esz);
+ mem_off = reg_off >> diffsz;
+ } while (split - mem_off >= (1 << msz));
}
}
-#endif
record_fault(env, reg_off, reg_max);
}
@@ -4791,7 +4778,7 @@ static void sve_ldff1_r(CPUARMState *env, void *vg, const
target_ulong addr,
*/
static void sve_ldnf1_r(CPUARMState *env, void *vg, const target_ulong addr,
uint32_t desc, const int esz, const int msz,
- sve_ld1_host_fn *host_fn)
+ sve_ldst1_host_fn *host_fn)
{
const unsigned rd = extract32(desc, SIMD_DATA_SHIFT + MEMOPIDX_SHIFT, 5);
void *vd = &env->vfp.zregs[rd];
@@ -4806,7 +4793,13 @@ static void sve_ldnf1_r(CPUARMState *env, void *vg,
const target_ulong addr,
host = tlb_vaddr_to_host(env, addr, MMU_DATA_LOAD, mmu_idx);
if (likely(page_check_range(addr, mem_max, PAGE_READ) == 0)) {
/* The entire operation is valid and will not fault. */
- host_fn(vd, vg, host, 0, mem_max);
+ reg_off = 0;
+ do {
+ mem_off = reg_off >> diffsz;
+ host_fn(vd, reg_off, host + mem_off);
+ reg_off += 1 << esz;
+ reg_off = find_next_active(vg, reg_off, reg_max, esz);
+ } while (reg_off < reg_max);
return;
}
#endif
@@ -4826,8 +4819,12 @@ static void sve_ldnf1_r(CPUARMState *env, void *vg,
const target_ulong addr,
if (page_check_range(addr + mem_off, 1 << msz, PAGE_READ) == 0) {
/* At least one load is valid; take the rest of the page. */
split = max_for_page(addr, mem_off + (1 << msz) - 1, mem_max);
- mem_off = host_fn(vd, vg, host, mem_off, split);
- reg_off = mem_off << diffsz;
+ do {
+ host_fn(vd, reg_off, host + mem_off);
+ reg_off += 1 << esz;
+ reg_off = find_next_active(vg, reg_off, reg_max, esz);
+ mem_off = reg_off >> diffsz;
+ } while (split - mem_off >= (1 << msz));
}
#else
/*
@@ -4848,8 +4845,13 @@ static void sve_ldnf1_r(CPUARMState *env, void *vg,
const target_ulong addr,
host = tlb_vaddr_to_host(env, addr + mem_off, MMU_DATA_LOAD, mmu_idx);
split = max_for_page(addr, mem_off, mem_max);
if (host && split >= (1 << msz)) {
- mem_off = host_fn(vd, vg, host - mem_off, mem_off, split);
- reg_off = mem_off << diffsz;
+ host -= mem_off;
+ do {
+ host_fn(vd, reg_off, host + mem_off);
+ reg_off += 1 << esz;
+ reg_off = find_next_active(vg, reg_off, reg_max, esz);
+ mem_off = reg_off >> diffsz;
+ } while (split - mem_off >= (1 << msz));
}
#endif
--
2.20.1
- [PATCH v3 03/18] accel/tcg: Add block comment for probe_access, (continued)
- [PATCH v3 03/18] accel/tcg: Add block comment for probe_access, Richard Henderson, 2020/04/22
- [PATCH v3 04/18] accel/tcg: Add probe_access_flags, Richard Henderson, 2020/04/22
- [PATCH v3 02/18] exec: Fix cpu_watchpoint_address_matches address length, Richard Henderson, 2020/04/22
- [PATCH v3 05/18] accel/tcg: Add endian-specific cpu_{ld, st}* operations, Richard Henderson, 2020/04/22
- [PATCH v3 06/18] target/arm: Use cpu_*_data_ra for sve_ldst_tlb_fn, Richard Henderson, 2020/04/22
- [PATCH v3 09/18] target/arm: Adjust interface of sve_ld1_host_fn,
Richard Henderson <=
- [PATCH v3 08/18] target/arm: Add sve infrastructure for page lookup, Richard Henderson, 2020/04/22
- [PATCH v3 10/18] target/arm: Use SVEContLdSt in sve_ld1_r, Richard Henderson, 2020/04/22
- [PATCH v3 12/18] target/arm: Use SVEContLdSt for multi-register contiguous loads, Richard Henderson, 2020/04/22
- [PATCH v3 07/18] target/arm: Drop manual handling of set/clear_helper_retaddr, Richard Henderson, 2020/04/22
- [PATCH v3 14/18] target/arm: Use SVEContLdSt for contiguous stores, Richard Henderson, 2020/04/22
- [PATCH v3 13/18] target/arm: Update contiguous first-fault and no-fault loads, Richard Henderson, 2020/04/22