[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[PATCH v3 46/69] target/arm: Convert handle_2misc_pairwise to decodetree
From: |
Richard Henderson |
Subject: |
[PATCH v3 46/69] target/arm: Convert handle_2misc_pairwise to decodetree |
Date: |
Wed, 11 Dec 2024 10:30:13 -0600 |
This includes SADDLP, UADDLP, SADALP, UADALP.
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
target/arm/tcg/helper-a64.h | 2 -
target/arm/tcg/helper-a64.c | 18 --------
target/arm/tcg/translate-a64.c | 84 +++-------------------------------
target/arm/tcg/a64.decode | 5 ++
4 files changed, 11 insertions(+), 98 deletions(-)
diff --git a/target/arm/tcg/helper-a64.h b/target/arm/tcg/helper-a64.h
index f811bb85dc..ac7ca190fa 100644
--- a/target/arm/tcg/helper-a64.h
+++ b/target/arm/tcg/helper-a64.h
@@ -41,8 +41,6 @@ DEF_HELPER_FLAGS_3(recpsf_f64, TCG_CALL_NO_RWG, f64, f64,
f64, ptr)
DEF_HELPER_FLAGS_3(rsqrtsf_f16, TCG_CALL_NO_RWG, f16, f16, f16, ptr)
DEF_HELPER_FLAGS_3(rsqrtsf_f32, TCG_CALL_NO_RWG, f32, f32, f32, ptr)
DEF_HELPER_FLAGS_3(rsqrtsf_f64, TCG_CALL_NO_RWG, f64, f64, f64, ptr)
-DEF_HELPER_FLAGS_1(neon_addlp_u8, TCG_CALL_NO_RWG_SE, i64, i64)
-DEF_HELPER_FLAGS_1(neon_addlp_u16, TCG_CALL_NO_RWG_SE, i64, i64)
DEF_HELPER_FLAGS_2(frecpx_f64, TCG_CALL_NO_RWG, f64, f64, ptr)
DEF_HELPER_FLAGS_2(frecpx_f32, TCG_CALL_NO_RWG, f32, f32, ptr)
DEF_HELPER_FLAGS_2(frecpx_f16, TCG_CALL_NO_RWG, f16, f16, ptr)
diff --git a/target/arm/tcg/helper-a64.c b/target/arm/tcg/helper-a64.c
index 9b3c407be3..3de564e0fe 100644
--- a/target/arm/tcg/helper-a64.c
+++ b/target/arm/tcg/helper-a64.c
@@ -306,24 +306,6 @@ float64 HELPER(rsqrtsf_f64)(float64 a, float64 b, void
*fpstp)
return float64_muladd(a, b, float64_three, float_muladd_halve_result,
fpst);
}
-uint64_t HELPER(neon_addlp_u8)(uint64_t a)
-{
- uint64_t tmp;
-
- tmp = a & 0x00ff00ff00ff00ffULL;
- tmp += (a >> 8) & 0x00ff00ff00ff00ffULL;
- return tmp;
-}
-
-uint64_t HELPER(neon_addlp_u16)(uint64_t a)
-{
- uint64_t tmp;
-
- tmp = a & 0x0000ffff0000ffffULL;
- tmp += (a >> 16) & 0x0000ffff0000ffffULL;
- return tmp;
-}
-
/* Floating-point reciprocal exponent - see FPRecpX in ARM ARM */
uint32_t HELPER(frecpx_f16)(uint32_t a, void *fpstp)
{
diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c
index f57b5e2855..717d30dd5b 100644
--- a/target/arm/tcg/translate-a64.c
+++ b/target/arm/tcg/translate-a64.c
@@ -8956,6 +8956,10 @@ static bool do_gvec_fn2_bhs(DisasContext *s, arg_qrr_e
*a, GVecGen2Fn *fn)
TRANS(CLS_v, do_gvec_fn2_bhs, a, gen_gvec_cls)
TRANS(CLZ_v, do_gvec_fn2_bhs, a, gen_gvec_clz)
TRANS(REV64_v, do_gvec_fn2_bhs, a, gen_gvec_rev64)
+TRANS(SADDLP_v, do_gvec_fn2_bhs, a, gen_gvec_saddlp)
+TRANS(UADDLP_v, do_gvec_fn2_bhs, a, gen_gvec_uaddlp)
+TRANS(SADALP_v, do_gvec_fn2_bhs, a, gen_gvec_sadalp)
+TRANS(UADALP_v, do_gvec_fn2_bhs, a, gen_gvec_uadalp)
/* Common vector code for handling integer to FP conversion */
static void handle_simd_intfp_conv(DisasContext *s, int rd, int rn,
@@ -9885,73 +9889,6 @@ static void handle_2misc_widening(DisasContext *s, int
opcode, bool is_q,
}
}
-static void handle_2misc_pairwise(DisasContext *s, int opcode, bool u,
- bool is_q, int size, int rn, int rd)
-{
- /* Implement the pairwise operations from 2-misc:
- * SADDLP, UADDLP, SADALP, UADALP.
- * These all add pairs of elements in the input to produce a
- * double-width result element in the output (possibly accumulating).
- */
- bool accum = (opcode == 0x6);
- int maxpass = is_q ? 2 : 1;
- int pass;
- TCGv_i64 tcg_res[2];
-
- if (size == 2) {
- /* 32 + 32 -> 64 op */
- MemOp memop = size + (u ? 0 : MO_SIGN);
-
- for (pass = 0; pass < maxpass; pass++) {
- TCGv_i64 tcg_op1 = tcg_temp_new_i64();
- TCGv_i64 tcg_op2 = tcg_temp_new_i64();
-
- tcg_res[pass] = tcg_temp_new_i64();
-
- read_vec_element(s, tcg_op1, rn, pass * 2, memop);
- read_vec_element(s, tcg_op2, rn, pass * 2 + 1, memop);
- tcg_gen_add_i64(tcg_res[pass], tcg_op1, tcg_op2);
- if (accum) {
- read_vec_element(s, tcg_op1, rd, pass, MO_64);
- tcg_gen_add_i64(tcg_res[pass], tcg_res[pass], tcg_op1);
- }
- }
- } else {
- for (pass = 0; pass < maxpass; pass++) {
- TCGv_i64 tcg_op = tcg_temp_new_i64();
- NeonGenOne64OpFn *genfn;
- static NeonGenOne64OpFn * const fns[2][2] = {
- { gen_helper_neon_addlp_s8, gen_helper_neon_addlp_u8 },
- { gen_helper_neon_addlp_s16, gen_helper_neon_addlp_u16 },
- };
-
- genfn = fns[size][u];
-
- tcg_res[pass] = tcg_temp_new_i64();
-
- read_vec_element(s, tcg_op, rn, pass, MO_64);
- genfn(tcg_res[pass], tcg_op);
-
- if (accum) {
- read_vec_element(s, tcg_op, rd, pass, MO_64);
- if (size == 0) {
- gen_helper_neon_addl_u16(tcg_res[pass],
- tcg_res[pass], tcg_op);
- } else {
- gen_helper_neon_addl_u32(tcg_res[pass],
- tcg_res[pass], tcg_op);
- }
- }
- }
- }
- if (!is_q) {
- tcg_res[1] = tcg_constant_i64(0);
- }
- for (pass = 0; pass < 2; pass++) {
- write_vec_element(s, tcg_res[pass], rd, pass, MO_64);
- }
-}
-
static void handle_shll(DisasContext *s, bool is_q, int size, int rn, int rd)
{
/* Implement SHLL and SHLL2 */
@@ -10011,17 +9948,6 @@ static void disas_simd_two_reg_misc(DisasContext *s,
uint32_t insn)
handle_2misc_narrow(s, false, opcode, u, is_q, size, rn, rd);
return;
- case 0x2: /* SADDLP, UADDLP */
- case 0x6: /* SADALP, UADALP */
- if (size == 3) {
- unallocated_encoding(s);
- return;
- }
- if (!fp_access_check(s)) {
- return;
- }
- handle_2misc_pairwise(s, opcode, u, is_q, size, rn, rd);
- return;
case 0x13: /* SHLL, SHLL2 */
if (u == 0 || size == 3) {
unallocated_encoding(s);
@@ -10203,9 +10129,11 @@ static void disas_simd_two_reg_misc(DisasContext *s,
uint32_t insn)
default:
case 0x0: /* REV64, REV32 */
case 0x1: /* REV16 */
+ case 0x2: /* SADDLP, UADDLP */
case 0x3: /* SUQADD, USQADD */
case 0x4: /* CLS, CLZ */
case 0x5: /* CNT, NOT, RBIT */
+ case 0x6: /* SADALP, UADALP */
case 0x7: /* SQABS, SQNEG */
case 0x8: /* CMGT, CMGE */
case 0x9: /* CMEQ, CMLE */
diff --git a/target/arm/tcg/a64.decode b/target/arm/tcg/a64.decode
index 05f1bc99b5..f3488766b2 100644
--- a/target/arm/tcg/a64.decode
+++ b/target/arm/tcg/a64.decode
@@ -1662,3 +1662,8 @@ CMLT0_v 0.00 1110 ..1 00000 10101 0 ..... .....
@qrr_e
REV16_v 0.00 1110 001 00000 00011 0 ..... ..... @qrr_b
REV32_v 0.10 1110 0.1 00000 00001 0 ..... ..... @qrr_bh
REV64_v 0.00 1110 ..1 00000 00001 0 ..... ..... @qrr_e
+
+SADDLP_v 0.00 1110 ..1 00000 00101 0 ..... ..... @qrr_e
+UADDLP_v 0.10 1110 ..1 00000 00101 0 ..... ..... @qrr_e
+SADALP_v 0.00 1110 ..1 00000 01101 0 ..... ..... @qrr_e
+UADALP_v 0.10 1110 ..1 00000 01101 0 ..... ..... @qrr_e
--
2.43.0
- [PATCH v3 35/69] target/arm: Convert SQABS, SQNEG to decodetree, (continued)
- [PATCH v3 35/69] target/arm: Convert SQABS, SQNEG to decodetree, Richard Henderson, 2024/12/11
- [PATCH v3 34/69] target/arm: Convert handle_fmov to decodetree, Richard Henderson, 2024/12/11
- [PATCH v3 36/69] target/arm: Convert ABS, NEG to decodetree, Richard Henderson, 2024/12/11
- [PATCH v3 41/69] target/arm: Convert CMGT, CMGE, GMLT, GMLE, CMEQ (zero) to decodetree, Richard Henderson, 2024/12/11
- [PATCH v3 43/69] target/arm: Convert handle_rev to decodetree, Richard Henderson, 2024/12/11
- [PATCH v3 42/69] target/arm: Introduce gen_gvec_rev{16,32,64}, Richard Henderson, 2024/12/11
- [PATCH v3 44/69] target/arm: Move helper_neon_addlp_{s8, s16} to neon_helper.c, Richard Henderson, 2024/12/11
- [PATCH v3 40/69] target/arm: Convert CNT, NOT, RBIT (vector) to decodetree, Richard Henderson, 2024/12/11
- [PATCH v3 46/69] target/arm: Convert handle_2misc_pairwise to decodetree,
Richard Henderson <=
- [PATCH v3 45/69] target/arm: Introduce gen_gvec_{s,u}{add,ada}lp, Richard Henderson, 2024/12/11
- [PATCH v3 47/69] target/arm: Remove helper_neon_{add,sub}l_u{16,32}, Richard Henderson, 2024/12/11
- [PATCH v3 48/69] target/arm: Introduce clear_vec, Richard Henderson, 2024/12/11
- [PATCH v3 49/69] target/arm: Convert XTN, SQXTUN, SQXTN, UQXTN to decodetree, Richard Henderson, 2024/12/11
- [PATCH v3 50/69] target/arm: Convert FCVTN, BFCVTN to decodetree, Richard Henderson, 2024/12/11
- [PATCH v3 51/69] target/arm: Convert FCVTXN to decodetree, Richard Henderson, 2024/12/11
- [PATCH v3 52/69] target/arm: Convert SHLL to decodetree, Richard Henderson, 2024/12/11
- [PATCH v3 53/69] target/arm: Implement gen_gvec_fabs, gen_gvec_fneg, Richard Henderson, 2024/12/11
- [PATCH v3 56/69] target/arm: Convert FRINT* (vector) to decodetree, Richard Henderson, 2024/12/11
- [PATCH v3 55/69] target/arm: Convert FSQRT (vector) to decodetree, Richard Henderson, 2024/12/11