[PULL 23/24] target/arm: Convert ADDHN, SUBHN, RADDHN, RSUBHN to decodet

qemu-devel

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[PULL 23/24] target/arm: Convert ADDHN, SUBHN, RADDHN, RSUBHN to decodet

From:	Peter Maydell
Subject:	[PULL 23/24] target/arm: Convert ADDHN, SUBHN, RADDHN, RSUBHN to decodetree
Date:	Thu, 11 Jul 2024 14:18:21 +0100

From: Richard Henderson <richard.henderson@linaro.org>

Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
Message-id: 20240709000610.382391-6-richard.henderson@linaro.org
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
---
 target/arm/tcg/a64.decode      |   5 ++
 target/arm/tcg/translate-a64.c | 127 +++++++++++++++------------------
 2 files changed, 61 insertions(+), 71 deletions(-)

diff --git a/target/arm/tcg/a64.decode b/target/arm/tcg/a64.decode
index 32e2f3a0d55..afb34a8fd43 100644
--- a/target/arm/tcg/a64.decode
+++ b/target/arm/tcg/a64.decode
@@ -998,6 +998,11 @@ UADDW           0.10 1110 ..1 ..... 00010 0 ..... ..... 
@qrrr_e
 SSUBW           0.00 1110 ..1 ..... 00110 0 ..... ..... @qrrr_e
 USUBW           0.10 1110 ..1 ..... 00110 0 ..... ..... @qrrr_e
 
+ADDHN           0.00 1110 ..1 ..... 01000 0 ..... ..... @qrrr_e
+RADDHN          0.10 1110 ..1 ..... 01000 0 ..... ..... @qrrr_e
+SUBHN           0.00 1110 ..1 ..... 01100 0 ..... ..... @qrrr_e
+RSUBHN          0.10 1110 ..1 ..... 01100 0 ..... ..... @qrrr_e
+
 ### Advanced SIMD scalar x indexed element
 
 FMUL_si         0101 1111 00 .. .... 1001 . 0 ..... .....   @rrx_h
diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c
index 264d2eeb27d..3e0dacdd63c 100644
--- a/target/arm/tcg/translate-a64.c
+++ b/target/arm/tcg/translate-a64.c
@@ -5949,6 +5949,60 @@ TRANS(UADDW, do_addsub_wide, a, 0, false)
 TRANS(SSUBW, do_addsub_wide, a, MO_SIGN, true)
 TRANS(USUBW, do_addsub_wide, a, 0, true)
 
+static bool do_addsub_highnarrow(DisasContext *s, arg_qrrr_e *a,
+                                 bool sub, bool round)
+{
+    TCGv_i64 tcg_op0, tcg_op1;
+    MemOp esz = a->esz;
+    int half = 8 >> esz;
+    bool top = a->q;
+    int ebits = 8 << esz;
+    uint64_t rbit = 1ull << (ebits - 1);
+    int top_swap, top_half;
+
+    /* There are no 128x128->64 bit operations. */
+    if (esz >= MO_64) {
+        return false;
+    }
+    if (!fp_access_check(s)) {
+        return true;
+    }
+    tcg_op0 = tcg_temp_new_i64();
+    tcg_op1 = tcg_temp_new_i64();
+
+    /*
+     * For top half inputs, iterate backward; forward for bottom half.
+     * This means the store to the destination will not occur until
+     * overlapping input inputs are consumed.
+     */
+    top_swap = top ? half - 1 : 0;
+    top_half = top ? half : 0;
+
+    for (int elt_fwd = 0; elt_fwd < half; ++elt_fwd) {
+        int elt = elt_fwd ^ top_swap;
+
+        read_vec_element(s, tcg_op1, a->rm, elt, esz + 1);
+        read_vec_element(s, tcg_op0, a->rn, elt, esz + 1);
+        if (sub) {
+            tcg_gen_sub_i64(tcg_op0, tcg_op0, tcg_op1);
+        } else {
+            tcg_gen_add_i64(tcg_op0, tcg_op0, tcg_op1);
+        }
+        if (round) {
+            tcg_gen_addi_i64(tcg_op0, tcg_op0, rbit);
+        }
+        tcg_gen_shri_i64(tcg_op0, tcg_op0, ebits);
+        write_vec_element(s, tcg_op0, a->rd, elt + top_half, esz);
+    }
+    clear_vec_high(s, top, a->rd);
+    return true;
+}
+
+TRANS(ADDHN, do_addsub_highnarrow, a, false, false)
+TRANS(SUBHN, do_addsub_highnarrow, a, true, false)
+TRANS(RADDHN, do_addsub_highnarrow, a, false, true)
+TRANS(RSUBHN, do_addsub_highnarrow, a, true, true)
+
 /*
  * Advanced SIMD scalar/vector x indexed element
  */
@@ -10813,65 +10867,6 @@ static void disas_simd_shift_imm(DisasContext *s, 
uint32_t insn)
     }
 }
 
-/* Generate code to do a "long" addition or subtraction, ie one done in
- * TCGv_i64 on vector lanes twice the width specified by size.
- */
-static void gen_neon_addl(int size, bool is_sub, TCGv_i64 tcg_res,
-                          TCGv_i64 tcg_op1, TCGv_i64 tcg_op2)
-{
-    static NeonGenTwo64OpFn * const fns[3][2] = {
-        { gen_helper_neon_addl_u16, gen_helper_neon_subl_u16 },
-        { gen_helper_neon_addl_u32, gen_helper_neon_subl_u32 },
-        { tcg_gen_add_i64, tcg_gen_sub_i64 },
-    };
-    NeonGenTwo64OpFn *genfn;
-    assert(size < 3);
-
-    genfn = fns[size][is_sub];
-    genfn(tcg_res, tcg_op1, tcg_op2);
-}
-
-static void do_narrow_round_high_u32(TCGv_i32 res, TCGv_i64 in)
-{
-    tcg_gen_addi_i64(in, in, 1U << 31);
-    tcg_gen_extrh_i64_i32(res, in);
-}
-
-static void handle_3rd_narrowing(DisasContext *s, int is_q, int is_u, int size,
-                                 int opcode, int rd, int rn, int rm)
-{
-    TCGv_i32 tcg_res[2];
-    int part = is_q ? 2 : 0;
-    int pass;
-
-    for (pass = 0; pass < 2; pass++) {
-        TCGv_i64 tcg_op1 = tcg_temp_new_i64();
-        TCGv_i64 tcg_op2 = tcg_temp_new_i64();
-        TCGv_i64 tcg_wideres = tcg_temp_new_i64();
-        static NeonGenNarrowFn * const narrowfns[3][2] = {
-            { gen_helper_neon_narrow_high_u8,
-              gen_helper_neon_narrow_round_high_u8 },
-            { gen_helper_neon_narrow_high_u16,
-              gen_helper_neon_narrow_round_high_u16 },
-            { tcg_gen_extrh_i64_i32, do_narrow_round_high_u32 },
-        };
-        NeonGenNarrowFn *gennarrow = narrowfns[size][is_u];
-
-        read_vec_element(s, tcg_op1, rn, pass, MO_64);
-        read_vec_element(s, tcg_op2, rm, pass, MO_64);
-
-        gen_neon_addl(size, (opcode == 6), tcg_wideres, tcg_op1, tcg_op2);
-
-        tcg_res[pass] = tcg_temp_new_i32();
-        gennarrow(tcg_res[pass], tcg_wideres);
-    }
-
-    for (pass = 0; pass < 2; pass++) {
-        write_vec_element_i32(s, tcg_res[pass], rd, pass + part, MO_32);
-    }
-    clear_vec_high(s, is_q, rd);
-}
-
 /* AdvSIMD three different
  *   31  30  29 28       24 23  22  21 20  16 15    12 11 10 9    5 4    0
  * +---+---+---+-----------+------+---+------+--------+-----+------+------+
@@ -10899,18 +10894,6 @@ static void disas_simd_three_reg_diff(DisasContext *s, 
uint32_t insn)
     int rd = extract32(insn, 0, 5);
 
     switch (opcode) {
-    case 4: /* ADDHN, ADDHN2, RADDHN, RADDHN2 */
-    case 6: /* SUBHN, SUBHN2, RSUBHN, RSUBHN2 */
-        /* 128 x 128 -> 64 */
-        if (size == 3) {
-            unallocated_encoding(s);
-            return;
-        }
-        if (!fp_access_check(s)) {
-            return;
-        }
-        handle_3rd_narrowing(s, is_q, is_u, size, opcode, rd, rn, rm);
-        break;
     case 14: /* PMULL, PMULL2 */
         if (is_u) {
             unallocated_encoding(s);
@@ -10949,7 +10932,9 @@ static void disas_simd_three_reg_diff(DisasContext *s, 
uint32_t insn)
     case 1: /* SADDW, SADDW2, UADDW, UADDW2 */
     case 2: /* SSUBL, SSUBL2, USUBL, USUBL2 */
     case 3: /* SSUBW, SSUBW2, USUBW, USUBW2 */
+    case 4: /* ADDHN, ADDHN2, RADDHN, RADDHN2 */
     case 5: /* SABAL, SABAL2, UABAL, UABAL2 */
+    case 6: /* SUBHN, SUBHN2, RSUBHN, RSUBHN2 */
     case 7: /* SABDL, SABDL2, UABDL, UABDL2 */
     case 8: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */
     case 9: /* SQDMLAL, SQDMLAL2 */
-- 
2.34.1

[Prev in Thread]

Current Thread

[Next in Thread]

[PULL 04/24] target/arm: Support migration when FPSR/FPCR won't fit in the FPSCR, (continued)
- [PULL 04/24] target/arm: Support migration when FPSR/FPCR won't fit in the FPSCR, Peter Maydell, 2024/07/11
- [PULL 05/24] target/arm: Implement store_cpu_field_low32() macro, Peter Maydell, 2024/07/11
- [PULL 24/24] target/arm: Convert PMULL to decodetree, Peter Maydell, 2024/07/11
- [PULL 01/24] target/arm: Correct comments about M-profile FPSCR, Peter Maydell, 2024/07/11
- [PULL 13/24] target/arm: Set arm_v7m_tcg_ops cpu_exec_halt to arm_cpu_exec_halt(), Peter Maydell, 2024/07/11
- [PULL 06/24] target/arm: Store FPSR and FPCR in separate CPU state fields, Peter Maydell, 2024/07/11
- [PULL 07/24] target/arm: Rename FPCR_ QC, NZCV macros to FPSR_, Peter Maydell, 2024/07/11
- [PULL 16/24] hw/misc: In STM32L4x5 EXTI, consolidate 2 constants, Peter Maydell, 2024/07/11
- [PULL 09/24] target/arm: Allow FPCR bits that aren't in FPSCR, Peter Maydell, 2024/07/11
- [PULL 15/24] accel/tcg: Make TCGCPUOps::cpu_exec_halt mandatory, Peter Maydell, 2024/07/11
- [PULL 23/24] target/arm: Convert ADDHN, SUBHN, RADDHN, RSUBHN to decodetree, Peter Maydell <=
- [PULL 12/24] target/arm: Use cpu_env in cpu_untagged_addr, Peter Maydell, 2024/07/11
- [PULL 18/24] hw/arm: In STM32L4x5 SOC, connect USART devices to EXTI, Peter Maydell, 2024/07/11
- [PULL 22/24] target/arm: Convert SADDW, SSUBW, UADDW, USUBW to decodetree, Peter Maydell, 2024/07/11
- [PULL 11/24] hw/misc/bcm2835_thermal: Fix access size handling in bcm2835_thermal_ops, Peter Maydell, 2024/07/11
- [PULL 20/24] target/arm: Convert SADDL, SSUBL, SABDL, SABAL, and unsigned to decodetree, Peter Maydell, 2024/07/11
- [PULL 08/24] target/arm: Rename FPSR_MASK and FPCR_MASK and define them symbolically, Peter Maydell, 2024/07/11
- [PULL 14/24] target: Set TCGCPUOps::cpu_exec_halt to target's has_work implementation, Peter Maydell, 2024/07/11
- [PULL 21/24] target/arm: Convert SQDMULL, SQDMLAL, SQDMLSL to decodetree, Peter Maydell, 2024/07/11
- Re: [PULL 00/24] target-arm queue, Richard Henderson, 2024/07/11

Prev by Date: [PULL 15/24] accel/tcg: Make TCGCPUOps::cpu_exec_halt mandatory
Next by Date: [PULL 12/24] target/arm: Use cpu_env in cpu_untagged_addr
Previous by thread: [PULL 15/24] accel/tcg: Make TCGCPUOps::cpu_exec_halt mandatory
Next by thread: [PULL 12/24] target/arm: Use cpu_env in cpu_untagged_addr
Index(es):
- Date
- Thread