[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[PATCH 08/10] target/arm: Convert Neon VSHLL, VMOVL to decodetree
From: |
Peter Maydell |
Subject: |
[PATCH 08/10] target/arm: Convert Neon VSHLL, VMOVL to decodetree |
Date: |
Fri, 15 May 2020 15:20:54 +0100 |
Convert the VSHLL and VMOVL insns from the 2-reg-shift group
to decodetree. Since the loop always has two passes, we unroll
it to avoid the awkward reassignment of one TCGv to another.
Signed-off-by: Peter Maydell <address@hidden>
---
target/arm/neon-dp.decode | 14 ++++++
target/arm/translate-neon.inc.c | 81 +++++++++++++++++++++++++++++++++
target/arm/translate.c | 46 +------------------
3 files changed, 97 insertions(+), 44 deletions(-)
diff --git a/target/arm/neon-dp.decode b/target/arm/neon-dp.decode
index bf4ef8c555f..4438c1c8728 100644
--- a/target/arm/neon-dp.decode
+++ b/target/arm/neon-dp.decode
@@ -404,3 +404,17 @@ VQRSHRN_U32_2sh 1111 001 1 1 . 01 .... .... 1001 0 .
. 1 .... \
@2reg_shift_q1 size=2 shift=%neon_rshift_i4
VQRSHRN_U16_2sh 1111 001 1 1 . 001 ... .... 1001 0 . . 1 .... \
@2reg_shift_q1 size=1 shift=%neon_rshift_i3
+
+VSHLL_S_2sh 1111 001 0 1 . 1 shift:5 .... 1010 0 . . 1 .... \
+ @2reg_shift_q0 size=2
+VSHLL_S_2sh 1111 001 0 1 . 01 shift:4 .... 1010 0 . . 1 .... \
+ @2reg_shift_q0 size=1
+VSHLL_S_2sh 1111 001 0 1 . 001 shift:3 .... 1010 0 . . 1 .... \
+ @2reg_shift_q0 size=0
+
+VSHLL_U_2sh 1111 001 1 1 . 1 shift:5 .... 1010 0 . . 1 .... \
+ @2reg_shift_q0 size=2
+VSHLL_U_2sh 1111 001 1 1 . 01 shift:4 .... 1010 0 . . 1 .... \
+ @2reg_shift_q0 size=1
+VSHLL_U_2sh 1111 001 1 1 . 001 shift:3 .... 1010 0 . . 1 .... \
+ @2reg_shift_q0 size=0
diff --git a/target/arm/translate-neon.inc.c b/target/arm/translate-neon.inc.c
index 9a75a69a4f5..5678bfd0d4d 100644
--- a/target/arm/translate-neon.inc.c
+++ b/target/arm/translate-neon.inc.c
@@ -1687,3 +1687,84 @@ DO_2SN_32(VQSHRN_U16, gen_helper_neon_shl_u16,
gen_helper_neon_narrow_sat_u8)
DO_2SN_64(VQRSHRN_U64, gen_helper_neon_rshl_u64,
gen_helper_neon_narrow_sat_u32)
DO_2SN_32(VQRSHRN_U32, gen_helper_neon_rshl_u32,
gen_helper_neon_narrow_sat_u16)
DO_2SN_32(VQRSHRN_U16, gen_helper_neon_rshl_u16, gen_helper_neon_narrow_sat_u8)
+
+static bool do_vshll_2sh(DisasContext *s, arg_2reg_shift *a,
+ NeonGenWidenFn *widenfn, bool u)
+{
+ TCGv_i64 tmp;
+ TCGv_i32 rm0, rm1;
+ uint64_t widen_mask = 0;
+
+ if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
+ return false;
+ }
+
+ /* UNDEF accesses to D16-D31 if they don't exist. */
+ if (!dc_isar_feature(aa32_simd_r32, s) &&
+ ((a->vd | a->vm) & 0x10)) {
+ return false;
+ }
+
+ if (a->vd & 1) {
+ return false;
+ }
+
+ if (!vfp_access_check(s)) {
+ return true;
+ }
+
+ /*
+ * This is a widen-and-shift operation. The shift is always less
+ * than the width of the source type, so after widening the input
+ * vector we can simply shift the whole 64-bit widened register,
+ * and then clear the potential overflow bits resulting from left
+ * bits of the narrow input appearing as right bits of the left
+ * neighbour narrow input. Calculate a mask of bits to clear.
+ */
+ if ((a->shift != 0) && (a->size < 2 || u)) {
+ int esize = 8 << a->size;
+ widen_mask = MAKE_64BIT_MASK(0, esize);
+ widen_mask >>= esize - a->shift;
+ widen_mask = dup_const(a->size + 1, widen_mask);
+ }
+
+ rm0 = neon_load_reg(a->vm, 0);
+ rm1 = neon_load_reg(a->vm, 1);
+ tmp = tcg_temp_new_i64();
+
+ widenfn(tmp, rm0);
+ if (a->shift != 0) {
+ tcg_gen_shli_i64(tmp, tmp, a->shift);
+ tcg_gen_andi_i64(tmp, tmp, ~widen_mask);
+ }
+ neon_store_reg64(tmp, a->vd);
+
+ widenfn(tmp, rm1);
+ if (a->shift != 0) {
+ tcg_gen_shli_i64(tmp, tmp, a->shift);
+ tcg_gen_andi_i64(tmp, tmp, ~widen_mask);
+ }
+ neon_store_reg64(tmp, a->vd + 1);
+ tcg_temp_free_i64(tmp);
+ return true;
+}
+
+static bool trans_VSHLL_S_2sh(DisasContext *s, arg_2reg_shift *a)
+{
+ NeonGenWidenFn *widenfn[] = {
+ gen_helper_neon_widen_s8,
+ gen_helper_neon_widen_s16,
+ tcg_gen_ext_i32_i64,
+ };
+ return do_vshll_2sh(s, a, widenfn[a->size], false);
+}
+
+static bool trans_VSHLL_U_2sh(DisasContext *s, arg_2reg_shift *a)
+{
+ NeonGenWidenFn *widenfn[] = {
+ gen_helper_neon_widen_u8,
+ gen_helper_neon_widen_u16,
+ tcg_gen_extu_i32_i64,
+ };
+ return do_vshll_2sh(s, a, widenfn[a->size], true);
+}
diff --git a/target/arm/translate.c b/target/arm/translate.c
index f728231b198..ef39c89f10a 100644
--- a/target/arm/translate.c
+++ b/target/arm/translate.c
@@ -5248,6 +5248,7 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t
insn)
case 7: /* VQSHL */
case 8: /* VSHRN, VRSHRN, VQSHRUN, VQRSHRUN */
case 9: /* VQSHRN, VQRSHRN */
+ case 10: /* VSHLL, including VMOVL */
return 1; /* handled by decodetree */
default:
break;
@@ -5265,50 +5266,7 @@ static int disas_neon_data_insn(DisasContext *s,
uint32_t insn)
size--;
}
shift = (insn >> 16) & ((1 << (3 + size)) - 1);
- if (op == 10) {
- /* VSHLL, VMOVL */
- if (q || (rd & 1)) {
- return 1;
- }
- tmp = neon_load_reg(rm, 0);
- tmp2 = neon_load_reg(rm, 1);
- for (pass = 0; pass < 2; pass++) {
- if (pass == 1)
- tmp = tmp2;
-
- gen_neon_widen(cpu_V0, tmp, size, u);
-
- if (shift != 0) {
- /* The shift is less than the width of the source
- type, so we can just shift the whole register. */
- tcg_gen_shli_i64(cpu_V0, cpu_V0, shift);
- /* Widen the result of shift: we need to clear
- * the potential overflow bits resulting from
- * left bits of the narrow input appearing as
- * right bits of left the neighbour narrow
- * input. */
- if (size < 2 || !u) {
- uint64_t imm64;
- if (size == 0) {
- imm = (0xffu >> (8 - shift));
- imm |= imm << 16;
- } else if (size == 1) {
- imm = 0xffff >> (16 - shift);
- } else {
- /* size == 2 */
- imm = 0xffffffff >> (32 - shift);
- }
- if (size < 2) {
- imm64 = imm | (((uint64_t)imm) << 32);
- } else {
- imm64 = imm;
- }
- tcg_gen_andi_i64(cpu_V0, cpu_V0, ~imm64);
- }
- }
- neon_store_reg64(cpu_V0, rd + pass);
- }
- } else if (op >= 14) {
+ if (op >= 14) {
/* VCVT fixed-point. */
TCGv_ptr fpst;
TCGv_i32 shiftv;
--
2.20.1
- Re: [PATCH 03/10] target/arm: Convert Neon VSHR 2-reg-shift insns to decodetree, (continued)
- [PATCH 04/10] target/arm: Convert Neon VSRA, VSRI, VRSHR, VRSRA 2-reg-shift insns to decodetree, Peter Maydell, 2020/05/15
- [PATCH 05/10] target/arm: Convert VQSHLU, VQSHL 2-reg-shift insns to decodetree, Peter Maydell, 2020/05/15
- [PATCH 06/10] target/arm: Convert Neon narrowing shifts with op==8 to decodetree, Peter Maydell, 2020/05/15
- [PATCH 07/10] target/arm: Convert Neon narrowing shifts with op==9 to decodetree, Peter Maydell, 2020/05/15
- [PATCH 08/10] target/arm: Convert Neon VSHLL, VMOVL to decodetree,
Peter Maydell <=
- [PATCH 09/10] target/arm: Convert VCVT fixed-point ops to decodetree, Peter Maydell, 2020/05/15
- [PATCH 10/10] target/arm: Convert Neon one-register-and-immediate insns to decodetree, Peter Maydell, 2020/05/15
- Re: [PATCH 00/10] target/arm: Convert 2-reg-shift and 1-reg-imm Neon insns to decodetree, no-reply, 2020/05/15