[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[PULL 32/44] target/arm: Implement MVE VCTP
|
From: |
Peter Maydell |
|
Subject: |
[PULL 32/44] target/arm: Implement MVE VCTP |
|
Date: |
Wed, 25 Aug 2021 11:35:22 +0100 |
Implement the MVE VCTP insn, which sets the VPR.P0 predicate bits so
as to predicate any element at index Rn or greater is predicated. As
with VPNOT, this insn itself is predicable and subject to beatwise
execution.
The calculation of the mask is the same as is used to determine
ltpmask in mve_element_mask(), but we precalculate masklen in
generated code to avoid having to have 4 helpers specialized by size.
We put the decode line in with the low-overhead-loop insns in
t32.decode because it's logically part of that collection of insn
patterns, even though it is an MVE only insn.
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
---
target/arm/helper-mve.h | 2 ++
target/arm/translate-a32.h | 1 +
target/arm/t32.decode | 1 +
target/arm/mve_helper.c | 20 ++++++++++++++++++++
target/arm/translate-mve.c | 2 +-
target/arm/translate.c | 33 +++++++++++++++++++++++++++++++++
6 files changed, 58 insertions(+), 1 deletion(-)
diff --git a/target/arm/helper-mve.h b/target/arm/helper-mve.h
index 8cb941912fc..b6cf3f0c94d 100644
--- a/target/arm/helper-mve.h
+++ b/target/arm/helper-mve.h
@@ -121,6 +121,8 @@ DEF_HELPER_FLAGS_4(mve_veor, TCG_CALL_NO_WG, void, env,
ptr, ptr, ptr)
DEF_HELPER_FLAGS_4(mve_vpsel, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
DEF_HELPER_FLAGS_1(mve_vpnot, TCG_CALL_NO_WG, void, env)
+DEF_HELPER_FLAGS_2(mve_vctp, TCG_CALL_NO_WG, void, env, i32)
+
DEF_HELPER_FLAGS_4(mve_vaddb, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
DEF_HELPER_FLAGS_4(mve_vaddh, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
DEF_HELPER_FLAGS_4(mve_vaddw, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
diff --git a/target/arm/translate-a32.h b/target/arm/translate-a32.h
index 6f4d65ddb00..88f15df60e8 100644
--- a/target/arm/translate-a32.h
+++ b/target/arm/translate-a32.h
@@ -48,6 +48,7 @@ long neon_element_offset(int reg, int element, MemOp memop);
void gen_rev16(TCGv_i32 dest, TCGv_i32 var);
void clear_eci_state(DisasContext *s);
bool mve_eci_check(DisasContext *s);
+void mve_update_eci(DisasContext *s);
void mve_update_and_store_eci(DisasContext *s);
bool mve_skip_vmov(DisasContext *s, int vn, int index, int size);
diff --git a/target/arm/t32.decode b/target/arm/t32.decode
index 2d47f31f143..78fadef9d62 100644
--- a/target/arm/t32.decode
+++ b/target/arm/t32.decode
@@ -748,5 +748,6 @@ BL 1111 0. .......... 11.1 ............
@branch24
# This is DLSTP
DLS 1111 0 0000 0 size:2 rn:4 1110 0000 0000 0001
}
+ VCTP 1111 0 0000 0 size:2 rn:4 1110 1000 0000 0001
]
}
diff --git a/target/arm/mve_helper.c b/target/arm/mve_helper.c
index c22a00c5ed6..1752555a218 100644
--- a/target/arm/mve_helper.c
+++ b/target/arm/mve_helper.c
@@ -2218,6 +2218,26 @@ void HELPER(mve_vpnot)(CPUARMState *env)
mve_advance_vpt(env);
}
+/*
+ * VCTP: P0 unexecuted bits unchanged, predicated bits zeroed,
+ * otherwise set according to value of Rn. The calculation of
+ * newmask here works in the same way as the calculation of the
+ * ltpmask in mve_element_mask(), but we have pre-calculated
+ * the masklen in the generated code.
+ */
+void HELPER(mve_vctp)(CPUARMState *env, uint32_t masklen)
+{
+ uint16_t mask = mve_element_mask(env);
+ uint16_t eci_mask = mve_eci_mask(env);
+ uint16_t newmask;
+
+ assert(masklen <= 16);
+ newmask = masklen ? MAKE_64BIT_MASK(0, masklen) : 0;
+ newmask &= mask;
+ env->v7m.vpr = (env->v7m.vpr & ~(uint32_t)eci_mask) | (newmask & eci_mask);
+ mve_advance_vpt(env);
+}
+
#define DO_1OP_SAT(OP, ESIZE, TYPE, FN) \
void HELPER(mve_##OP)(CPUARMState *env, void *vd, void *vm) \
{ \
diff --git a/target/arm/translate-mve.c b/target/arm/translate-mve.c
index cc2e58cfe2f..865d5acbe76 100644
--- a/target/arm/translate-mve.c
+++ b/target/arm/translate-mve.c
@@ -93,7 +93,7 @@ bool mve_eci_check(DisasContext *s)
}
}
-static void mve_update_eci(DisasContext *s)
+void mve_update_eci(DisasContext *s)
{
/*
* The helper function will always update the CPUState field,
diff --git a/target/arm/translate.c b/target/arm/translate.c
index 80c282669f0..804a53279bd 100644
--- a/target/arm/translate.c
+++ b/target/arm/translate.c
@@ -8669,6 +8669,39 @@ static bool trans_LCTP(DisasContext *s, arg_LCTP *a)
return true;
}
+static bool trans_VCTP(DisasContext *s, arg_VCTP *a)
+{
+ /*
+ * M-profile Create Vector Tail Predicate. This insn is itself
+ * predicated and is subject to beatwise execution.
+ */
+ TCGv_i32 rn_shifted, masklen;
+
+ if (!dc_isar_feature(aa32_mve, s) || a->rn == 13 || a->rn == 15) {
+ return false;
+ }
+
+ if (!mve_eci_check(s) || !vfp_access_check(s)) {
+ return true;
+ }
+
+ /*
+ * We pre-calculate the mask length here to avoid having
+ * to have multiple helpers specialized for size.
+ * We pass the helper "rn <= (1 << (4 - size)) ? (rn << size) : 16".
+ */
+ rn_shifted = tcg_temp_new_i32();
+ masklen = load_reg(s, a->rn);
+ tcg_gen_shli_i32(rn_shifted, masklen, a->size);
+ tcg_gen_movcond_i32(TCG_COND_LEU, masklen,
+ masklen, tcg_constant_i32(1 << (4 - a->size)),
+ rn_shifted, tcg_constant_i32(16));
+ gen_helper_mve_vctp(cpu_env, masklen);
+ tcg_temp_free_i32(masklen);
+ tcg_temp_free_i32(rn_shifted);
+ mve_update_eci(s);
+ return true;
+}
static bool op_tbranch(DisasContext *s, arg_tbranch *a, bool half)
{
--
2.20.1
- [PULL 25/44] target/arm: Implement MVE VMLADAV and VMLSLDAV, (continued)
- [PULL 25/44] target/arm: Implement MVE VMLADAV and VMLSLDAV, Peter Maydell, 2021/08/25
- [PULL 27/44] target/arm: Implement MVE saturating doubling multiply accumulates, Peter Maydell, 2021/08/25
- [PULL 31/44] target/arm: Implement MVE VPNOT, Peter Maydell, 2021/08/25
- [PULL 29/44] target/arm: Implement MVE VMAXA, VMINA, Peter Maydell, 2021/08/25
- [PULL 30/44] target/arm: Implement MVE VMOV to/from 2 general-purpose registers, Peter Maydell, 2021/08/25
- [PULL 28/44] target/arm: Implement MVE VQABS, VQNEG, Peter Maydell, 2021/08/25
- [PULL 33/44] target/arm: Implement MVE scatter-gather insns, Peter Maydell, 2021/08/25
- [PULL 36/44] target/arm: Re-indent sdiv and udiv helpers, Peter Maydell, 2021/08/25
- [PULL 34/44] target/arm: Implement MVE scatter-gather immediate forms, Peter Maydell, 2021/08/25
- [PULL 24/44] target/arm: Rename MVEGenDualAccOpFn to MVEGenLongDualAccOpFn, Peter Maydell, 2021/08/25
- [PULL 32/44] target/arm: Implement MVE VCTP,
Peter Maydell <=
- [PULL 35/44] target/arm: Implement MVE interleaving loads/stores, Peter Maydell, 2021/08/25
- [PULL 38/44] target/arm: kvm: use RCU_READ_LOCK_GUARD() in kvm_arch_fixup_msi_route(), Peter Maydell, 2021/08/25
- [PULL 37/44] target/arm: Implement M-profile trapping on division by zero, Peter Maydell, 2021/08/25
- [PULL 40/44] fsl-imx6ul: Instantiate SAI1/2/3 and ASRC as unimplemented devices, Peter Maydell, 2021/08/25
- [PULL 43/44] fsl-imx7: Instantiate SAI1/2/3 as unimplemented devices, Peter Maydell, 2021/08/25
- [PULL 39/44] hw/char/pl011: add support for sending break, Peter Maydell, 2021/08/25
- [PULL 44/44] docs: Document how to use gdb with unix sockets, Peter Maydell, 2021/08/25
- [PULL 41/44] hw/dma/pl330: Add memory region to replace default, Peter Maydell, 2021/08/25
- [PULL 42/44] sbsa-ref: Rename SBSA_GWDT enum value, Peter Maydell, 2021/08/25
- Re: [PULL 00/44] target-arm queue, Peter Maydell, 2021/08/25