[PULL 44/85] target/arm: Move helper_neon_addlp_{s8, s16} to neon

qemu-devel

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[PULL 44/85] target/arm: Move helper_neon_addlp_{s8, s16} to neon_helper

From:	Peter Maydell
Subject:	[PULL 44/85] target/arm: Move helper_neon_addlp_{s8, s16} to neon_helper.c
Date:	Fri, 13 Dec 2024 17:31:48 +0000

From: Richard Henderson <richard.henderson@linaro.org>

Move from helper-a64.c to neon_helper.c so that these
functions are available for arm32 code as well.

Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
Message-id: 20241211163036.2297116-45-richard.henderson@linaro.org
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
---
 target/arm/helper.h          |  2 ++
 target/arm/tcg/helper-a64.h  |  2 --
 target/arm/tcg/helper-a64.c  | 43 ------------------------------------
 target/arm/tcg/neon_helper.c | 43 ++++++++++++++++++++++++++++++++++++
 4 files changed, 45 insertions(+), 45 deletions(-)

diff --git a/target/arm/helper.h b/target/arm/helper.h
index 167e331a83e..57e0ce387bb 100644
--- a/target/arm/helper.h
+++ b/target/arm/helper.h
@@ -399,6 +399,8 @@ DEF_HELPER_2(neon_addl_u16, i64, i64, i64)
 DEF_HELPER_2(neon_addl_u32, i64, i64, i64)
 DEF_HELPER_2(neon_paddl_u16, i64, i64, i64)
 DEF_HELPER_2(neon_paddl_u32, i64, i64, i64)
+DEF_HELPER_FLAGS_1(neon_addlp_s8, TCG_CALL_NO_RWG_SE, i64, i64)
+DEF_HELPER_FLAGS_1(neon_addlp_s16, TCG_CALL_NO_RWG_SE, i64, i64)
 DEF_HELPER_2(neon_subl_u16, i64, i64, i64)
 DEF_HELPER_2(neon_subl_u32, i64, i64, i64)
 DEF_HELPER_3(neon_addl_saturate_s32, i64, env, i64, i64)
diff --git a/target/arm/tcg/helper-a64.h b/target/arm/tcg/helper-a64.h
index 203b7b7ac82..f811bb85dcb 100644
--- a/target/arm/tcg/helper-a64.h
+++ b/target/arm/tcg/helper-a64.h
@@ -41,9 +41,7 @@ DEF_HELPER_FLAGS_3(recpsf_f64, TCG_CALL_NO_RWG, f64, f64, 
f64, ptr)
 DEF_HELPER_FLAGS_3(rsqrtsf_f16, TCG_CALL_NO_RWG, f16, f16, f16, ptr)
 DEF_HELPER_FLAGS_3(rsqrtsf_f32, TCG_CALL_NO_RWG, f32, f32, f32, ptr)
 DEF_HELPER_FLAGS_3(rsqrtsf_f64, TCG_CALL_NO_RWG, f64, f64, f64, ptr)
-DEF_HELPER_FLAGS_1(neon_addlp_s8, TCG_CALL_NO_RWG_SE, i64, i64)
 DEF_HELPER_FLAGS_1(neon_addlp_u8, TCG_CALL_NO_RWG_SE, i64, i64)
-DEF_HELPER_FLAGS_1(neon_addlp_s16, TCG_CALL_NO_RWG_SE, i64, i64)
 DEF_HELPER_FLAGS_1(neon_addlp_u16, TCG_CALL_NO_RWG_SE, i64, i64)
 DEF_HELPER_FLAGS_2(frecpx_f64, TCG_CALL_NO_RWG, f64, f64, ptr)
 DEF_HELPER_FLAGS_2(frecpx_f32, TCG_CALL_NO_RWG, f32, f32, ptr)
diff --git a/target/arm/tcg/helper-a64.c b/target/arm/tcg/helper-a64.c
index 3f4d7b9aba6..9b3c407be3c 100644
--- a/target/arm/tcg/helper-a64.c
+++ b/target/arm/tcg/helper-a64.c
@@ -306,39 +306,6 @@ float64 HELPER(rsqrtsf_f64)(float64 a, float64 b, void 
*fpstp)
     return float64_muladd(a, b, float64_three, float_muladd_halve_result, 
fpst);
 }
 
-/* Pairwise long add: add pairs of adjacent elements into
- * double-width elements in the result (eg _s8 is an 8x8->16 op)
- */
-uint64_t HELPER(neon_addlp_s8)(uint64_t a)
-{
-    uint64_t nsignmask = 0x0080008000800080ULL;
-    uint64_t wsignmask = 0x8000800080008000ULL;
-    uint64_t elementmask = 0x00ff00ff00ff00ffULL;
-    uint64_t tmp1, tmp2;
-    uint64_t res, signres;
-
-    /* Extract odd elements, sign extend each to a 16 bit field */
-    tmp1 = a & elementmask;
-    tmp1 ^= nsignmask;
-    tmp1 |= wsignmask;
-    tmp1 = (tmp1 - nsignmask) ^ wsignmask;
-    /* Ditto for the even elements */
-    tmp2 = (a >> 8) & elementmask;
-    tmp2 ^= nsignmask;
-    tmp2 |= wsignmask;
-    tmp2 = (tmp2 - nsignmask) ^ wsignmask;
-
-    /* calculate the result by summing bits 0..14, 16..22, etc,
-     * and then adjusting the sign bits 15, 23, etc manually.
-     * This ensures the addition can't overflow the 16 bit field.
-     */
-    signres = (tmp1 ^ tmp2) & wsignmask;
-    res = (tmp1 & ~wsignmask) + (tmp2 & ~wsignmask);
-    res ^= signres;
-
-    return res;
-}
-
 uint64_t HELPER(neon_addlp_u8)(uint64_t a)
 {
     uint64_t tmp;
@@ -348,16 +315,6 @@ uint64_t HELPER(neon_addlp_u8)(uint64_t a)
     return tmp;
 }
 
-uint64_t HELPER(neon_addlp_s16)(uint64_t a)
-{
-    int32_t reslo, reshi;
-
-    reslo = (int32_t)(int16_t)a + (int32_t)(int16_t)(a >> 16);
-    reshi = (int32_t)(int16_t)(a >> 32) + (int32_t)(int16_t)(a >> 48);
-
-    return (uint32_t)reslo | (((uint64_t)reshi) << 32);
-}
-
 uint64_t HELPER(neon_addlp_u16)(uint64_t a)
 {
     uint64_t tmp;
diff --git a/target/arm/tcg/neon_helper.c b/target/arm/tcg/neon_helper.c
index 4e501925dec..b92ddd4914d 100644
--- a/target/arm/tcg/neon_helper.c
+++ b/target/arm/tcg/neon_helper.c
@@ -866,6 +866,49 @@ uint64_t HELPER(neon_paddl_u32)(uint64_t a, uint64_t b)
     return low + ((uint64_t)high << 32);
 }
 
+/* Pairwise long add: add pairs of adjacent elements into
+ * double-width elements in the result (eg _s8 is an 8x8->16 op)
+ */
+uint64_t HELPER(neon_addlp_s8)(uint64_t a)
+{
+    uint64_t nsignmask = 0x0080008000800080ULL;
+    uint64_t wsignmask = 0x8000800080008000ULL;
+    uint64_t elementmask = 0x00ff00ff00ff00ffULL;
+    uint64_t tmp1, tmp2;
+    uint64_t res, signres;
+
+    /* Extract odd elements, sign extend each to a 16 bit field */
+    tmp1 = a & elementmask;
+    tmp1 ^= nsignmask;
+    tmp1 |= wsignmask;
+    tmp1 = (tmp1 - nsignmask) ^ wsignmask;
+    /* Ditto for the even elements */
+    tmp2 = (a >> 8) & elementmask;
+    tmp2 ^= nsignmask;
+    tmp2 |= wsignmask;
+    tmp2 = (tmp2 - nsignmask) ^ wsignmask;
+
+    /* calculate the result by summing bits 0..14, 16..22, etc,
+     * and then adjusting the sign bits 15, 23, etc manually.
+     * This ensures the addition can't overflow the 16 bit field.
+     */
+    signres = (tmp1 ^ tmp2) & wsignmask;
+    res = (tmp1 & ~wsignmask) + (tmp2 & ~wsignmask);
+    res ^= signres;
+
+    return res;
+}
+
+uint64_t HELPER(neon_addlp_s16)(uint64_t a)
+{
+    int32_t reslo, reshi;
+
+    reslo = (int32_t)(int16_t)a + (int32_t)(int16_t)(a >> 16);
+    reshi = (int32_t)(int16_t)(a >> 32) + (int32_t)(int16_t)(a >> 48);
+
+    return (uint32_t)reslo | (((uint64_t)reshi) << 32);
+}
+
 uint64_t HELPER(neon_subl_u16)(uint64_t a, uint64_t b)
 {
     uint64_t mask;
-- 
2.34.1

[Prev in Thread]

Current Thread

[Next in Thread]

[PULL 22/85] target/arm: Convert FCMP, FCMPE, FCCMP, FCCMPE to decodetree, (continued)
- [PULL 22/85] target/arm: Convert FCMP, FCMPE, FCCMP, FCCMPE to decodetree, Peter Maydell, 2024/12/13
- [PULL 24/85] target/arm: Convert FMOV, FABS, FNEG (scalar) to decodetree, Peter Maydell, 2024/12/13
- [PULL 29/85] target/arm: Convert BFCVT to decodetree, Peter Maydell, 2024/12/13
- [PULL 33/85] target/arm: Convert FJCVTZS to decodetree, Peter Maydell, 2024/12/13
- [PULL 34/85] target/arm: Convert handle_fmov to decodetree, Peter Maydell, 2024/12/13
- [PULL 36/85] target/arm: Convert ABS, NEG to decodetree, Peter Maydell, 2024/12/13
- [PULL 38/85] target/arm: Convert CLS, CLZ (vector) to decodetree, Peter Maydell, 2024/12/13
- [PULL 09/85] target/arm: Convert PAC[ID]*, AUT[ID]* to decodetree, Peter Maydell, 2024/12/13
- [PULL 25/85] target/arm: Pass fpstatus to vfp_sqrt*, Peter Maydell, 2024/12/13
- [PULL 32/85] target/arm: Convert handle_fpfpcvt to decodetree, Peter Maydell, 2024/12/13
- [PULL 44/85] target/arm: Move helper_neon_addlp_{s8, s16} to neon_helper.c, Peter Maydell <=
- [PULL 49/85] target/arm: Convert XTN, SQXTUN, SQXTN, UQXTN to decodetree, Peter Maydell, 2024/12/13
- [PULL 48/85] target/arm: Introduce clear_vec, Peter Maydell, 2024/12/13
- [PULL 51/85] target/arm: Convert FCVTXN to decodetree, Peter Maydell, 2024/12/13
- [PULL 57/85] target/arm: Convert FCVT* (vector, integer) scalar to decodetree, Peter Maydell, 2024/12/13
- [PULL 55/85] target/arm: Convert FSQRT (vector) to decodetree, Peter Maydell, 2024/12/13
- [PULL 11/85] target/arm: Convert disas_logic_reg to decodetree, Peter Maydell, 2024/12/13
- [PULL 68/85] target/arm: Convert URECPE and URSQRTE to decodetree, Peter Maydell, 2024/12/13
- [PULL 59/85] target/arm: Convert [US]CVTF (vector, integer) scalar to decodetree, Peter Maydell, 2024/12/13
- [PULL 62/85] target/arm: Convert [US]CVTF (vector) to decodetree, Peter Maydell, 2024/12/13
- [PULL 66/85] target/arm: Convert FRECPE, FRECPX, FRSQRTE to decodetree, Peter Maydell, 2024/12/13

Prev by Date: [PULL 32/85] target/arm: Convert handle_fpfpcvt to decodetree
Next by Date: [PULL 49/85] target/arm: Convert XTN, SQXTUN, SQXTN, UQXTN to decodetree
Previous by thread: [PULL 32/85] target/arm: Convert handle_fpfpcvt to decodetree
Next by thread: [PULL 49/85] target/arm: Convert XTN, SQXTUN, SQXTN, UQXTN to decodetree
Index(es):
- Date
- Thread