[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[PATCH v4 16/18] target/arm: Reuse sve_probe_page for scatter stores
From: |
Richard Henderson |
Subject: |
[PATCH v4 16/18] target/arm: Reuse sve_probe_page for scatter stores |
Date: |
Thu, 30 Apr 2020 09:28:11 -0700 |
Reviewed-by: Peter Maydell <address@hidden>
Signed-off-by: Richard Henderson <address@hidden>
---
target/arm/sve_helper.c | 182 ++++++++++++++++++++++++----------------
1 file changed, 111 insertions(+), 71 deletions(-)
diff --git a/target/arm/sve_helper.c b/target/arm/sve_helper.c
index 1560129b08..ad7e10f1e7 100644
--- a/target/arm/sve_helper.c
+++ b/target/arm/sve_helper.c
@@ -5413,94 +5413,134 @@ DO_LDFF1_ZPZ_D(dd_be, zd, MO_64)
/* Stores with a vector index. */
-static void sve_st1_zs(CPUARMState *env, void *vd, void *vg, void *vm,
- target_ulong base, uint32_t desc, uintptr_t ra,
- zreg_off_fn *off_fn, sve_ldst1_tlb_fn *tlb_fn)
+static inline QEMU_ALWAYS_INLINE
+void sve_st1_z(CPUARMState *env, void *vd, uint64_t *vg, void *vm,
+ target_ulong base, uint32_t desc, uintptr_t retaddr,
+ int esize, int msize, zreg_off_fn *off_fn,
+ sve_ldst1_host_fn *host_fn,
+ sve_ldst1_tlb_fn *tlb_fn)
{
const int scale = extract32(desc, SIMD_DATA_SHIFT + MEMOPIDX_SHIFT, 2);
- intptr_t i, oprsz = simd_oprsz(desc);
+ const int mmu_idx = cpu_mmu_index(env, false);
+ const intptr_t reg_max = simd_oprsz(desc);
+ void *host[ARM_MAX_VQ * 4];
+ intptr_t reg_off, i;
+ SVEHostPage info, info2;
- for (i = 0; i < oprsz; ) {
- uint16_t pg = *(uint16_t *)(vg + H1_2(i >> 3));
+ /*
+ * Probe all of the elements for host addresses and flags.
+ */
+ i = reg_off = 0;
+ do {
+ uint64_t pg = vg[reg_off >> 6];
do {
- if (likely(pg & 1)) {
- target_ulong off = off_fn(vm, i);
- tlb_fn(env, vd, i, base + (off << scale), ra);
+ target_ulong addr = base + (off_fn(vm, reg_off) << scale);
+ target_ulong in_page = -(addr | TARGET_PAGE_MASK);
+
+ host[i] = NULL;
+ if (likely((pg >> (reg_off & 63)) & 1)) {
+ if (likely(in_page >= msize)) {
+ sve_probe_page(&info, false, env, addr, 0, MMU_DATA_STORE,
+ mmu_idx, retaddr);
+ host[i] = info.host;
+ } else {
+ /*
+ * Element crosses the page boundary.
+ * Probe both pages, but do not record the host address,
+ * so that we use the slow path.
+ */
+ sve_probe_page(&info, false, env, addr, 0,
+ MMU_DATA_STORE, mmu_idx, retaddr);
+ sve_probe_page(&info2, false, env, addr + in_page, 0,
+ MMU_DATA_STORE, mmu_idx, retaddr);
+ info.flags |= info2.flags;
+ }
+
+ if (unlikely(info.flags & TLB_WATCHPOINT)) {
+ cpu_check_watchpoint(env_cpu(env), addr, msize,
+ info.attrs, BP_MEM_WRITE, retaddr);
+ }
+ /* TODO: MTE check. */
}
- i += 4, pg >>= 4;
- } while (i & 15);
- }
-}
+ i += 1;
+ reg_off += esize;
+ } while (reg_off & 63);
+ } while (reg_off < reg_max);
-static void sve_st1_zd(CPUARMState *env, void *vd, void *vg, void *vm,
- target_ulong base, uint32_t desc, uintptr_t ra,
- zreg_off_fn *off_fn, sve_ldst1_tlb_fn *tlb_fn)
-{
- const int scale = extract32(desc, SIMD_DATA_SHIFT + MEMOPIDX_SHIFT, 2);
- intptr_t i, oprsz = simd_oprsz(desc) / 8;
-
- for (i = 0; i < oprsz; i++) {
- uint8_t pg = *(uint8_t *)(vg + H1(i));
- if (likely(pg & 1)) {
- target_ulong off = off_fn(vm, i * 8);
- tlb_fn(env, vd, i * 8, base + (off << scale), ra);
+ /*
+ * Now that we have recognized all exceptions except SyncExternal
+ * (from TLB_MMIO), which we cannot avoid, perform all of the stores.
+ *
+ * Note for the common case of an element in RAM, not crossing a page
+ * boundary, we have stored the host address in host[]. This doubles
+ * as a first-level check against the predicate, since only enabled
+ * elements have non-null host addresses.
+ */
+ i = reg_off = 0;
+ do {
+ void *h = host[i];
+ if (likely(h != NULL)) {
+ host_fn(vd, reg_off, h);
+ } else if ((vg[reg_off >> 6] >> (reg_off & 63)) & 1) {
+ target_ulong addr = base + (off_fn(vm, reg_off) << scale);
+ tlb_fn(env, vd, reg_off, addr, retaddr);
}
- }
+ i += 1;
+ reg_off += esize;
+ } while (reg_off < reg_max);
}
-#define DO_ST1_ZPZ_S(MEM, OFS) \
-void QEMU_FLATTEN HELPER(sve_st##MEM##_##OFS) \
- (CPUARMState *env, void *vd, void *vg, void *vm, \
- target_ulong base, uint32_t desc) \
-{ \
- sve_st1_zs(env, vd, vg, vm, base, desc, GETPC(), \
- off_##OFS##_s, sve_st1##MEM##_tlb); \
+#define DO_ST1_ZPZ_S(MEM, OFS, MSZ) \
+void HELPER(sve_st##MEM##_##OFS)(CPUARMState *env, void *vd, void *vg, \
+ void *vm, target_ulong base, uint32_t desc) \
+{ \
+ sve_st1_z(env, vd, vg, vm, base, desc, GETPC(), 4, 1 << MSZ, \
+ off_##OFS##_s, sve_st1##MEM##_host, sve_st1##MEM##_tlb); \
}
-#define DO_ST1_ZPZ_D(MEM, OFS) \
-void QEMU_FLATTEN HELPER(sve_st##MEM##_##OFS) \
- (CPUARMState *env, void *vd, void *vg, void *vm, \
- target_ulong base, uint32_t desc) \
-{ \
- sve_st1_zd(env, vd, vg, vm, base, desc, GETPC(), \
- off_##OFS##_d, sve_st1##MEM##_tlb); \
+#define DO_ST1_ZPZ_D(MEM, OFS, MSZ) \
+void HELPER(sve_st##MEM##_##OFS)(CPUARMState *env, void *vd, void *vg, \
+ void *vm, target_ulong base, uint32_t desc) \
+{ \
+ sve_st1_z(env, vd, vg, vm, base, desc, GETPC(), 8, 1 << MSZ, \
+ off_##OFS##_d, sve_st1##MEM##_host, sve_st1##MEM##_tlb); \
}
-DO_ST1_ZPZ_S(bs, zsu)
-DO_ST1_ZPZ_S(hs_le, zsu)
-DO_ST1_ZPZ_S(hs_be, zsu)
-DO_ST1_ZPZ_S(ss_le, zsu)
-DO_ST1_ZPZ_S(ss_be, zsu)
+DO_ST1_ZPZ_S(bs, zsu, MO_8)
+DO_ST1_ZPZ_S(hs_le, zsu, MO_16)
+DO_ST1_ZPZ_S(hs_be, zsu, MO_16)
+DO_ST1_ZPZ_S(ss_le, zsu, MO_32)
+DO_ST1_ZPZ_S(ss_be, zsu, MO_32)
-DO_ST1_ZPZ_S(bs, zss)
-DO_ST1_ZPZ_S(hs_le, zss)
-DO_ST1_ZPZ_S(hs_be, zss)
-DO_ST1_ZPZ_S(ss_le, zss)
-DO_ST1_ZPZ_S(ss_be, zss)
+DO_ST1_ZPZ_S(bs, zss, MO_8)
+DO_ST1_ZPZ_S(hs_le, zss, MO_16)
+DO_ST1_ZPZ_S(hs_be, zss, MO_16)
+DO_ST1_ZPZ_S(ss_le, zss, MO_32)
+DO_ST1_ZPZ_S(ss_be, zss, MO_32)
-DO_ST1_ZPZ_D(bd, zsu)
-DO_ST1_ZPZ_D(hd_le, zsu)
-DO_ST1_ZPZ_D(hd_be, zsu)
-DO_ST1_ZPZ_D(sd_le, zsu)
-DO_ST1_ZPZ_D(sd_be, zsu)
-DO_ST1_ZPZ_D(dd_le, zsu)
-DO_ST1_ZPZ_D(dd_be, zsu)
+DO_ST1_ZPZ_D(bd, zsu, MO_8)
+DO_ST1_ZPZ_D(hd_le, zsu, MO_16)
+DO_ST1_ZPZ_D(hd_be, zsu, MO_16)
+DO_ST1_ZPZ_D(sd_le, zsu, MO_32)
+DO_ST1_ZPZ_D(sd_be, zsu, MO_32)
+DO_ST1_ZPZ_D(dd_le, zsu, MO_64)
+DO_ST1_ZPZ_D(dd_be, zsu, MO_64)
-DO_ST1_ZPZ_D(bd, zss)
-DO_ST1_ZPZ_D(hd_le, zss)
-DO_ST1_ZPZ_D(hd_be, zss)
-DO_ST1_ZPZ_D(sd_le, zss)
-DO_ST1_ZPZ_D(sd_be, zss)
-DO_ST1_ZPZ_D(dd_le, zss)
-DO_ST1_ZPZ_D(dd_be, zss)
+DO_ST1_ZPZ_D(bd, zss, MO_8)
+DO_ST1_ZPZ_D(hd_le, zss, MO_16)
+DO_ST1_ZPZ_D(hd_be, zss, MO_16)
+DO_ST1_ZPZ_D(sd_le, zss, MO_32)
+DO_ST1_ZPZ_D(sd_be, zss, MO_32)
+DO_ST1_ZPZ_D(dd_le, zss, MO_64)
+DO_ST1_ZPZ_D(dd_be, zss, MO_64)
-DO_ST1_ZPZ_D(bd, zd)
-DO_ST1_ZPZ_D(hd_le, zd)
-DO_ST1_ZPZ_D(hd_be, zd)
-DO_ST1_ZPZ_D(sd_le, zd)
-DO_ST1_ZPZ_D(sd_be, zd)
-DO_ST1_ZPZ_D(dd_le, zd)
-DO_ST1_ZPZ_D(dd_be, zd)
+DO_ST1_ZPZ_D(bd, zd, MO_8)
+DO_ST1_ZPZ_D(hd_le, zd, MO_16)
+DO_ST1_ZPZ_D(hd_be, zd, MO_16)
+DO_ST1_ZPZ_D(sd_le, zd, MO_32)
+DO_ST1_ZPZ_D(sd_be, zd, MO_32)
+DO_ST1_ZPZ_D(dd_le, zd, MO_64)
+DO_ST1_ZPZ_D(dd_be, zd, MO_64)
#undef DO_ST1_ZPZ_S
#undef DO_ST1_ZPZ_D
--
2.20.1
- [PATCH v4 01/18] exec: Add block comments for watchpoint routines, (continued)
- [PATCH v4 01/18] exec: Add block comments for watchpoint routines, Richard Henderson, 2020/04/30
- [PATCH v4 03/18] accel/tcg: Add block comment for probe_access, Richard Henderson, 2020/04/30
- [PATCH v4 04/18] accel/tcg: Add probe_access_flags, Richard Henderson, 2020/04/30
- [PATCH v4 06/18] target/arm: Use cpu_*_data_ra for sve_ldst_tlb_fn, Richard Henderson, 2020/04/30
- [PATCH v4 05/18] accel/tcg: Add endian-specific cpu_{ld, st}* operations, Richard Henderson, 2020/04/30
- [PATCH v4 08/18] target/arm: Add sve infrastructure for page lookup, Richard Henderson, 2020/04/30
- [PATCH v4 07/18] target/arm: Drop manual handling of set/clear_helper_retaddr, Richard Henderson, 2020/04/30
- [PATCH v4 09/18] target/arm: Adjust interface of sve_ld1_host_fn, Richard Henderson, 2020/04/30
- [PATCH v4 10/18] target/arm: Use SVEContLdSt in sve_ld1_r, Richard Henderson, 2020/04/30
- [PATCH v4 11/18] target/arm: Handle watchpoints in sve_ld1_r, Richard Henderson, 2020/04/30
- [PATCH v4 16/18] target/arm: Reuse sve_probe_page for scatter stores,
Richard Henderson <=
- [PATCH v4 14/18] target/arm: Use SVEContLdSt for contiguous stores, Richard Henderson, 2020/04/30
- [PATCH v4 13/18] target/arm: Update contiguous first-fault and no-fault loads, Richard Henderson, 2020/04/30
- [PATCH v4 18/18] target/arm: Remove sve_memopidx, Richard Henderson, 2020/04/30
- [PATCH v4 12/18] target/arm: Use SVEContLdSt for multi-register contiguous loads, Richard Henderson, 2020/04/30
- [PATCH v4 15/18] target/arm: Reuse sve_probe_page for gather first-fault loads, Richard Henderson, 2020/04/30
- [PATCH v4 17/18] target/arm: Reuse sve_probe_page for gather loads, Richard Henderson, 2020/04/30