[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[PATCH 43/67] target/arm: Move helper_neon_addlp_{s8, s16} to neon_helpe
From: |
Richard Henderson |
Subject: |
[PATCH 43/67] target/arm: Move helper_neon_addlp_{s8, s16} to neon_helper.c |
Date: |
Sun, 1 Dec 2024 09:05:42 -0600 |
Move from helper-a64.c to neon_helper.c so that these
functions are available for arm32 code as well.
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
target/arm/helper.h | 2 ++
target/arm/tcg/helper-a64.h | 2 --
target/arm/tcg/helper-a64.c | 43 ------------------------------------
target/arm/tcg/neon_helper.c | 43 ++++++++++++++++++++++++++++++++++++
4 files changed, 45 insertions(+), 45 deletions(-)
diff --git a/target/arm/helper.h b/target/arm/helper.h
index 167e331a83..57e0ce387b 100644
--- a/target/arm/helper.h
+++ b/target/arm/helper.h
@@ -399,6 +399,8 @@ DEF_HELPER_2(neon_addl_u16, i64, i64, i64)
DEF_HELPER_2(neon_addl_u32, i64, i64, i64)
DEF_HELPER_2(neon_paddl_u16, i64, i64, i64)
DEF_HELPER_2(neon_paddl_u32, i64, i64, i64)
+DEF_HELPER_FLAGS_1(neon_addlp_s8, TCG_CALL_NO_RWG_SE, i64, i64)
+DEF_HELPER_FLAGS_1(neon_addlp_s16, TCG_CALL_NO_RWG_SE, i64, i64)
DEF_HELPER_2(neon_subl_u16, i64, i64, i64)
DEF_HELPER_2(neon_subl_u32, i64, i64, i64)
DEF_HELPER_3(neon_addl_saturate_s32, i64, env, i64, i64)
diff --git a/target/arm/tcg/helper-a64.h b/target/arm/tcg/helper-a64.h
index 203b7b7ac8..f811bb85dc 100644
--- a/target/arm/tcg/helper-a64.h
+++ b/target/arm/tcg/helper-a64.h
@@ -41,9 +41,7 @@ DEF_HELPER_FLAGS_3(recpsf_f64, TCG_CALL_NO_RWG, f64, f64,
f64, ptr)
DEF_HELPER_FLAGS_3(rsqrtsf_f16, TCG_CALL_NO_RWG, f16, f16, f16, ptr)
DEF_HELPER_FLAGS_3(rsqrtsf_f32, TCG_CALL_NO_RWG, f32, f32, f32, ptr)
DEF_HELPER_FLAGS_3(rsqrtsf_f64, TCG_CALL_NO_RWG, f64, f64, f64, ptr)
-DEF_HELPER_FLAGS_1(neon_addlp_s8, TCG_CALL_NO_RWG_SE, i64, i64)
DEF_HELPER_FLAGS_1(neon_addlp_u8, TCG_CALL_NO_RWG_SE, i64, i64)
-DEF_HELPER_FLAGS_1(neon_addlp_s16, TCG_CALL_NO_RWG_SE, i64, i64)
DEF_HELPER_FLAGS_1(neon_addlp_u16, TCG_CALL_NO_RWG_SE, i64, i64)
DEF_HELPER_FLAGS_2(frecpx_f64, TCG_CALL_NO_RWG, f64, f64, ptr)
DEF_HELPER_FLAGS_2(frecpx_f32, TCG_CALL_NO_RWG, f32, f32, ptr)
diff --git a/target/arm/tcg/helper-a64.c b/target/arm/tcg/helper-a64.c
index 3f4d7b9aba..9b3c407be3 100644
--- a/target/arm/tcg/helper-a64.c
+++ b/target/arm/tcg/helper-a64.c
@@ -306,39 +306,6 @@ float64 HELPER(rsqrtsf_f64)(float64 a, float64 b, void
*fpstp)
return float64_muladd(a, b, float64_three, float_muladd_halve_result,
fpst);
}
-/* Pairwise long add: add pairs of adjacent elements into
- * double-width elements in the result (eg _s8 is an 8x8->16 op)
- */
-uint64_t HELPER(neon_addlp_s8)(uint64_t a)
-{
- uint64_t nsignmask = 0x0080008000800080ULL;
- uint64_t wsignmask = 0x8000800080008000ULL;
- uint64_t elementmask = 0x00ff00ff00ff00ffULL;
- uint64_t tmp1, tmp2;
- uint64_t res, signres;
-
- /* Extract odd elements, sign extend each to a 16 bit field */
- tmp1 = a & elementmask;
- tmp1 ^= nsignmask;
- tmp1 |= wsignmask;
- tmp1 = (tmp1 - nsignmask) ^ wsignmask;
- /* Ditto for the even elements */
- tmp2 = (a >> 8) & elementmask;
- tmp2 ^= nsignmask;
- tmp2 |= wsignmask;
- tmp2 = (tmp2 - nsignmask) ^ wsignmask;
-
- /* calculate the result by summing bits 0..14, 16..22, etc,
- * and then adjusting the sign bits 15, 23, etc manually.
- * This ensures the addition can't overflow the 16 bit field.
- */
- signres = (tmp1 ^ tmp2) & wsignmask;
- res = (tmp1 & ~wsignmask) + (tmp2 & ~wsignmask);
- res ^= signres;
-
- return res;
-}
-
uint64_t HELPER(neon_addlp_u8)(uint64_t a)
{
uint64_t tmp;
@@ -348,16 +315,6 @@ uint64_t HELPER(neon_addlp_u8)(uint64_t a)
return tmp;
}
-uint64_t HELPER(neon_addlp_s16)(uint64_t a)
-{
- int32_t reslo, reshi;
-
- reslo = (int32_t)(int16_t)a + (int32_t)(int16_t)(a >> 16);
- reshi = (int32_t)(int16_t)(a >> 32) + (int32_t)(int16_t)(a >> 48);
-
- return (uint32_t)reslo | (((uint64_t)reshi) << 32);
-}
-
uint64_t HELPER(neon_addlp_u16)(uint64_t a)
{
uint64_t tmp;
diff --git a/target/arm/tcg/neon_helper.c b/target/arm/tcg/neon_helper.c
index 4e501925de..b92ddd4914 100644
--- a/target/arm/tcg/neon_helper.c
+++ b/target/arm/tcg/neon_helper.c
@@ -866,6 +866,49 @@ uint64_t HELPER(neon_paddl_u32)(uint64_t a, uint64_t b)
return low + ((uint64_t)high << 32);
}
+/* Pairwise long add: add pairs of adjacent elements into
+ * double-width elements in the result (eg _s8 is an 8x8->16 op)
+ */
+uint64_t HELPER(neon_addlp_s8)(uint64_t a)
+{
+ uint64_t nsignmask = 0x0080008000800080ULL;
+ uint64_t wsignmask = 0x8000800080008000ULL;
+ uint64_t elementmask = 0x00ff00ff00ff00ffULL;
+ uint64_t tmp1, tmp2;
+ uint64_t res, signres;
+
+ /* Extract odd elements, sign extend each to a 16 bit field */
+ tmp1 = a & elementmask;
+ tmp1 ^= nsignmask;
+ tmp1 |= wsignmask;
+ tmp1 = (tmp1 - nsignmask) ^ wsignmask;
+ /* Ditto for the even elements */
+ tmp2 = (a >> 8) & elementmask;
+ tmp2 ^= nsignmask;
+ tmp2 |= wsignmask;
+ tmp2 = (tmp2 - nsignmask) ^ wsignmask;
+
+ /* calculate the result by summing bits 0..14, 16..22, etc,
+ * and then adjusting the sign bits 15, 23, etc manually.
+ * This ensures the addition can't overflow the 16 bit field.
+ */
+ signres = (tmp1 ^ tmp2) & wsignmask;
+ res = (tmp1 & ~wsignmask) + (tmp2 & ~wsignmask);
+ res ^= signres;
+
+ return res;
+}
+
+uint64_t HELPER(neon_addlp_s16)(uint64_t a)
+{
+ int32_t reslo, reshi;
+
+ reslo = (int32_t)(int16_t)a + (int32_t)(int16_t)(a >> 16);
+ reshi = (int32_t)(int16_t)(a >> 32) + (int32_t)(int16_t)(a >> 48);
+
+ return (uint32_t)reslo | (((uint64_t)reshi) << 32);
+}
+
uint64_t HELPER(neon_subl_u16)(uint64_t a, uint64_t b)
{
uint64_t mask;
--
2.43.0
- [PATCH 39/67] target/arm: Convert CNT, NOT, RBIT (vector) to decodetree, (continued)
- [PATCH 39/67] target/arm: Convert CNT, NOT, RBIT (vector) to decodetree, Richard Henderson, 2024/12/01
- [PATCH 37/67] target/arm: Convert CLS, CLZ (vector) to decodetree, Richard Henderson, 2024/12/01
- [PATCH 36/67] target/arm: Introduce gen_gvec_cls, gen_gvec_clz, Richard Henderson, 2024/12/01
- [PATCH 41/67] target/arm: Introduce gen_gvec_rev{16,32,64}, Richard Henderson, 2024/12/01
- [PATCH 43/67] target/arm: Move helper_neon_addlp_{s8, s16} to neon_helper.c,
Richard Henderson <=
- [PATCH 40/67] target/arm: Convert CMGT, CMGE, GMLT, GMLE, CMEQ (zero) to decodetree, Richard Henderson, 2024/12/01
- [PATCH 57/67] target/arm: Convert [US]CVTF (vector, integer) scalar to decodetree, Richard Henderson, 2024/12/01
- [PATCH 44/67] target/arm: Introduce gen_gvec_{s,u}{add,ada}lp, Richard Henderson, 2024/12/01
- [PATCH 50/67] target/arm: Convert FCVTXN to decodetree, Richard Henderson, 2024/12/01
- [PATCH 46/67] target/arm: Remove helper_neon_{add,sub}l_u{16,32}, Richard Henderson, 2024/12/01