[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[PULL 22/24] target/arm: Convert SADDW, SSUBW, UADDW, USUBW to decodetre
From: |
Peter Maydell |
Subject: |
[PULL 22/24] target/arm: Convert SADDW, SSUBW, UADDW, USUBW to decodetree |
Date: |
Thu, 11 Jul 2024 14:18:20 +0100 |
From: Richard Henderson <richard.henderson@linaro.org>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
Message-id: 20240709000610.382391-5-richard.henderson@linaro.org
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
---
target/arm/tcg/a64.decode | 5 ++
target/arm/tcg/translate-a64.c | 86 +++++++++++++++++-----------------
2 files changed, 48 insertions(+), 43 deletions(-)
diff --git a/target/arm/tcg/a64.decode b/target/arm/tcg/a64.decode
index cf69e7e1beb..32e2f3a0d55 100644
--- a/target/arm/tcg/a64.decode
+++ b/target/arm/tcg/a64.decode
@@ -993,6 +993,11 @@ SQDMLAL_v 0.00 1110 101 ..... 10010 0 ..... .....
@qrrr_s
SQDMLSL_v 0.00 1110 011 ..... 10110 0 ..... ..... @qrrr_h
SQDMLSL_v 0.00 1110 101 ..... 10110 0 ..... ..... @qrrr_s
+SADDW 0.00 1110 ..1 ..... 00010 0 ..... ..... @qrrr_e
+UADDW 0.10 1110 ..1 ..... 00010 0 ..... ..... @qrrr_e
+SSUBW 0.00 1110 ..1 ..... 00110 0 ..... ..... @qrrr_e
+USUBW 0.10 1110 ..1 ..... 00110 0 ..... ..... @qrrr_e
+
### Advanced SIMD scalar x indexed element
FMUL_si 0101 1111 00 .. .... 1001 . 0 ..... ..... @rrx_h
diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c
index 07b9cdd78f4..264d2eeb27d 100644
--- a/target/arm/tcg/translate-a64.c
+++ b/target/arm/tcg/translate-a64.c
@@ -5908,6 +5908,47 @@ TRANS(SQDMLSL_vi, do_3op_widening,
a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, a->idx,
a->esz == MO_16 ? gen_sqdmlsl_h : gen_sqdmlsl_s, true)
+static bool do_addsub_wide(DisasContext *s, arg_qrrr_e *a,
+ MemOp sign, bool sub)
+{
+ TCGv_i64 tcg_op0, tcg_op1;
+ MemOp esz = a->esz;
+ int half = 8 >> esz;
+ bool top = a->q;
+ int top_swap = top ? 0 : half - 1;
+ int top_half = top ? half : 0;
+
+ /* There are no 64x64->128 bit operations. */
+ if (esz >= MO_64) {
+ return false;
+ }
+ if (!fp_access_check(s)) {
+ return true;
+ }
+ tcg_op0 = tcg_temp_new_i64();
+ tcg_op1 = tcg_temp_new_i64();
+
+ for (int elt_fwd = 0; elt_fwd < half; ++elt_fwd) {
+ int elt = elt_fwd ^ top_swap;
+
+ read_vec_element(s, tcg_op1, a->rm, elt + top_half, esz | sign);
+ read_vec_element(s, tcg_op0, a->rn, elt, esz + 1);
+ if (sub) {
+ tcg_gen_sub_i64(tcg_op0, tcg_op0, tcg_op1);
+ } else {
+ tcg_gen_add_i64(tcg_op0, tcg_op0, tcg_op1);
+ }
+ write_vec_element(s, tcg_op0, a->rd, elt, esz + 1);
+ }
+ clear_vec_high(s, 1, a->rd);
+ return true;
+}
+
+TRANS(SADDW, do_addsub_wide, a, MO_SIGN, false)
+TRANS(UADDW, do_addsub_wide, a, 0, false)
+TRANS(SSUBW, do_addsub_wide, a, MO_SIGN, true)
+TRANS(USUBW, do_addsub_wide, a, 0, true)
+
/*
* Advanced SIMD scalar/vector x indexed element
*/
@@ -10790,37 +10831,6 @@ static void gen_neon_addl(int size, bool is_sub,
TCGv_i64 tcg_res,
genfn(tcg_res, tcg_op1, tcg_op2);
}
-static void handle_3rd_wide(DisasContext *s, int is_q, int is_u, int size,
- int opcode, int rd, int rn, int rm)
-{
- TCGv_i64 tcg_res[2];
- int part = is_q ? 2 : 0;
- int pass;
-
- for (pass = 0; pass < 2; pass++) {
- TCGv_i64 tcg_op1 = tcg_temp_new_i64();
- TCGv_i32 tcg_op2 = tcg_temp_new_i32();
- TCGv_i64 tcg_op2_wide = tcg_temp_new_i64();
- static NeonGenWidenFn * const widenfns[3][2] = {
- { gen_helper_neon_widen_s8, gen_helper_neon_widen_u8 },
- { gen_helper_neon_widen_s16, gen_helper_neon_widen_u16 },
- { tcg_gen_ext_i32_i64, tcg_gen_extu_i32_i64 },
- };
- NeonGenWidenFn *widenfn = widenfns[size][is_u];
-
- read_vec_element(s, tcg_op1, rn, pass, MO_64);
- read_vec_element_i32(s, tcg_op2, rm, part + pass, MO_32);
- widenfn(tcg_op2_wide, tcg_op2);
- tcg_res[pass] = tcg_temp_new_i64();
- gen_neon_addl(size, (opcode == 3),
- tcg_res[pass], tcg_op1, tcg_op2_wide);
- }
-
- for (pass = 0; pass < 2; pass++) {
- write_vec_element(s, tcg_res[pass], rd, pass, MO_64);
- }
-}
-
static void do_narrow_round_high_u32(TCGv_i32 res, TCGv_i64 in)
{
tcg_gen_addi_i64(in, in, 1U << 31);
@@ -10889,18 +10899,6 @@ static void disas_simd_three_reg_diff(DisasContext *s,
uint32_t insn)
int rd = extract32(insn, 0, 5);
switch (opcode) {
- case 1: /* SADDW, SADDW2, UADDW, UADDW2 */
- case 3: /* SSUBW, SSUBW2, USUBW, USUBW2 */
- /* 64 x 128 -> 128 */
- if (size == 3) {
- unallocated_encoding(s);
- return;
- }
- if (!fp_access_check(s)) {
- return;
- }
- handle_3rd_wide(s, is_q, is_u, size, opcode, rd, rn, rm);
- break;
case 4: /* ADDHN, ADDHN2, RADDHN, RADDHN2 */
case 6: /* SUBHN, SUBHN2, RSUBHN, RSUBHN2 */
/* 128 x 128 -> 64 */
@@ -10948,7 +10946,9 @@ static void disas_simd_three_reg_diff(DisasContext *s,
uint32_t insn)
return;
default:
case 0: /* SADDL, SADDL2, UADDL, UADDL2 */
+ case 1: /* SADDW, SADDW2, UADDW, UADDW2 */
case 2: /* SSUBL, SSUBL2, USUBL, USUBL2 */
+ case 3: /* SSUBW, SSUBW2, USUBW, USUBW2 */
case 5: /* SABAL, SABAL2, UABAL, UABAL2 */
case 7: /* SABDL, SABDL2, UABDL, UABDL2 */
case 8: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */
--
2.34.1
- [PULL 01/24] target/arm: Correct comments about M-profile FPSCR, (continued)
- [PULL 01/24] target/arm: Correct comments about M-profile FPSCR, Peter Maydell, 2024/07/11
- [PULL 13/24] target/arm: Set arm_v7m_tcg_ops cpu_exec_halt to arm_cpu_exec_halt(), Peter Maydell, 2024/07/11
- [PULL 06/24] target/arm: Store FPSR and FPCR in separate CPU state fields, Peter Maydell, 2024/07/11
- [PULL 07/24] target/arm: Rename FPCR_ QC, NZCV macros to FPSR_, Peter Maydell, 2024/07/11
- [PULL 16/24] hw/misc: In STM32L4x5 EXTI, consolidate 2 constants, Peter Maydell, 2024/07/11
- [PULL 09/24] target/arm: Allow FPCR bits that aren't in FPSCR, Peter Maydell, 2024/07/11
- [PULL 15/24] accel/tcg: Make TCGCPUOps::cpu_exec_halt mandatory, Peter Maydell, 2024/07/11
- [PULL 23/24] target/arm: Convert ADDHN, SUBHN, RADDHN, RSUBHN to decodetree, Peter Maydell, 2024/07/11
- [PULL 12/24] target/arm: Use cpu_env in cpu_untagged_addr, Peter Maydell, 2024/07/11
- [PULL 18/24] hw/arm: In STM32L4x5 SOC, connect USART devices to EXTI, Peter Maydell, 2024/07/11
- [PULL 22/24] target/arm: Convert SADDW, SSUBW, UADDW, USUBW to decodetree,
Peter Maydell <=
- [PULL 11/24] hw/misc/bcm2835_thermal: Fix access size handling in bcm2835_thermal_ops, Peter Maydell, 2024/07/11
- [PULL 20/24] target/arm: Convert SADDL, SSUBL, SABDL, SABAL, and unsigned to decodetree, Peter Maydell, 2024/07/11
- [PULL 08/24] target/arm: Rename FPSR_MASK and FPCR_MASK and define them symbolically, Peter Maydell, 2024/07/11
- [PULL 14/24] target: Set TCGCPUOps::cpu_exec_halt to target's has_work implementation, Peter Maydell, 2024/07/11
- [PULL 21/24] target/arm: Convert SQDMULL, SQDMLAL, SQDMLSL to decodetree, Peter Maydell, 2024/07/11
- Re: [PULL 00/24] target-arm queue, Richard Henderson, 2024/07/11