[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[PATCH v2 43/45] target/arm/vec_helper: Add gvec fp indexed multiply-and
From: |
Peter Maydell |
Subject: |
[PATCH v2 43/45] target/arm/vec_helper: Add gvec fp indexed multiply-and-add operations |
Date: |
Fri, 28 Aug 2020 19:33:52 +0100 |
Add gvec helpers for doing Neon-style indexed non-fused fp
multiply-and-accumulate operations.
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
---
target/arm/helper.h | 10 ++++++++++
target/arm/vec_helper.c | 27 ++++++++++++++++++++++-----
2 files changed, 32 insertions(+), 5 deletions(-)
diff --git a/target/arm/helper.h b/target/arm/helper.h
index cbdbf824d8d..8defd7c8019 100644
--- a/target/arm/helper.h
+++ b/target/arm/helper.h
@@ -740,6 +740,16 @@ DEF_HELPER_FLAGS_5(gvec_fmul_idx_s, TCG_CALL_NO_RWG,
DEF_HELPER_FLAGS_5(gvec_fmul_idx_d, TCG_CALL_NO_RWG,
void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(gvec_fmla_nf_idx_h, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(gvec_fmla_nf_idx_s, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_5(gvec_fmls_nf_idx_h, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(gvec_fmls_nf_idx_s, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+
DEF_HELPER_FLAGS_6(gvec_fmla_idx_h, TCG_CALL_NO_RWG,
void, ptr, ptr, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_6(gvec_fmla_idx_s, TCG_CALL_NO_RWG,
diff --git a/target/arm/vec_helper.c b/target/arm/vec_helper.c
index b27b90e1dd8..a973454e4f4 100644
--- a/target/arm/vec_helper.c
+++ b/target/arm/vec_helper.c
@@ -1085,7 +1085,7 @@ DO_MLA_IDX(gvec_mls_idx_d, uint64_t, -, )
#undef DO_MLA_IDX
-#define DO_FMUL_IDX(NAME, TYPE, H) \
+#define DO_FMUL_IDX(NAME, ADD, TYPE, H) \
void HELPER(NAME)(void *vd, void *vn, void *vm, void *stat, uint32_t desc) \
{ \
intptr_t i, j, oprsz = simd_oprsz(desc); \
@@ -1095,16 +1095,33 @@ void HELPER(NAME)(void *vd, void *vn, void *vm, void
*stat, uint32_t desc) \
for (i = 0; i < oprsz / sizeof(TYPE); i += segment) { \
TYPE mm = m[H(i + idx)]; \
for (j = 0; j < segment; j++) { \
- d[i + j] = TYPE##_mul(n[i + j], mm, stat); \
+ d[i + j] = TYPE##_##ADD(d[i + j], \
+ TYPE##_mul(n[i + j], mm, stat), stat); \
} \
} \
clear_tail(d, oprsz, simd_maxsz(desc)); \
}
-DO_FMUL_IDX(gvec_fmul_idx_h, float16, H2)
-DO_FMUL_IDX(gvec_fmul_idx_s, float32, H4)
-DO_FMUL_IDX(gvec_fmul_idx_d, float64, )
+#define float16_nop(N, M, S) (M)
+#define float32_nop(N, M, S) (M)
+#define float64_nop(N, M, S) (M)
+DO_FMUL_IDX(gvec_fmul_idx_h, nop, float16, H2)
+DO_FMUL_IDX(gvec_fmul_idx_s, nop, float32, H4)
+DO_FMUL_IDX(gvec_fmul_idx_d, nop, float64, )
+
+/*
+ * Non-fused multiply-accumulate operations, for Neon. NB that unlike
+ * the fused ops below they assume accumulate both from and into Vd.
+ */
+DO_FMUL_IDX(gvec_fmla_nf_idx_h, add, float16, H2)
+DO_FMUL_IDX(gvec_fmla_nf_idx_s, add, float32, H4)
+DO_FMUL_IDX(gvec_fmls_nf_idx_h, sub, float16, H2)
+DO_FMUL_IDX(gvec_fmls_nf_idx_s, sub, float32, H4)
+
+#undef float16_nop
+#undef float32_nop
+#undef float64_nop
#undef DO_FMUL_IDX
#define DO_FMLA_IDX(NAME, TYPE, H) \
--
2.20.1
- [PATCH v2 35/45] target/arm: Implement fp16 for Neon pairwise fp ops, (continued)
- [PATCH v2 35/45] target/arm: Implement fp16 for Neon pairwise fp ops, Peter Maydell, 2020/08/28
- [PATCH v2 36/45] target/arm: Implement fp16 for Neon float-integer VCVT, Peter Maydell, 2020/08/28
- [PATCH v2 37/45] target/arm: Convert Neon VCVT fixed-point to gvec, Peter Maydell, 2020/08/28
- [PATCH v2 38/45] target/arm: Implement fp16 for Neon VCVT fixed-point, Peter Maydell, 2020/08/28
- [PATCH v2 39/45] target/arm: Implement fp16 for Neon VCVT with rounding modes, Peter Maydell, 2020/08/28
- [PATCH v2 43/45] target/arm/vec_helper: Add gvec fp indexed multiply-and-add operations,
Peter Maydell <=
- [PATCH v2 40/45] target/arm: Implement fp16 for Neon VRINT-with-specified-rounding-mode, Peter Maydell, 2020/08/28
- [PATCH v2 45/45] target/arm: Enable FP16 in '-cpu max', Peter Maydell, 2020/08/28
- [PATCH v2 42/45] target/arm/vec_helper: Handle oprsz less than 16 bytes in indexed operations, Peter Maydell, 2020/08/28
- [PATCH v2 41/45] target/arm: Implement fp16 for Neon VRINTX, Peter Maydell, 2020/08/28
- [PATCH v2 44/45] target/arm: Implement fp16 for Neon VMUL, VMLA, VMLS, Peter Maydell, 2020/08/28