[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[PATCH 41/65] target/riscv: Add widening floating-point fused mul-add in
From: |
Huang Tao |
Subject: |
[PATCH 41/65] target/riscv: Add widening floating-point fused mul-add instructions for XTheadVector |
Date: |
Fri, 12 Apr 2024 15:37:11 +0800 |
The instructions have the same function as RVV1.0. Overall there are only
general differences between XTheadVector and RVV1.0.
Signed-off-by: Huang Tao <eric.huang@linux.alibaba.com>
---
target/riscv/helper.h | 17 +++++++++
.../riscv/insn_trans/trans_xtheadvector.c.inc | 18 +++++----
target/riscv/vector_helper.c | 16 ++++----
target/riscv/vector_internals.h | 9 +++++
target/riscv/xtheadvector_helper.c | 38 +++++++++++++++++++
5 files changed, 82 insertions(+), 16 deletions(-)
diff --git a/target/riscv/helper.h b/target/riscv/helper.h
index 88e3a18e17..12b5e4573a 100644
--- a/target/riscv/helper.h
+++ b/target/riscv/helper.h
@@ -2099,3 +2099,20 @@ DEF_HELPER_6(th_vfmsub_vf_d, void, ptr, ptr, i64, ptr,
env, i32)
DEF_HELPER_6(th_vfnmsub_vf_h, void, ptr, ptr, i64, ptr, env, i32)
DEF_HELPER_6(th_vfnmsub_vf_w, void, ptr, ptr, i64, ptr, env, i32)
DEF_HELPER_6(th_vfnmsub_vf_d, void, ptr, ptr, i64, ptr, env, i32)
+
+DEF_HELPER_6(th_vfwmacc_vv_h, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(th_vfwmacc_vv_w, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(th_vfwnmacc_vv_h, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(th_vfwnmacc_vv_w, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(th_vfwmsac_vv_h, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(th_vfwmsac_vv_w, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(th_vfwnmsac_vv_h, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(th_vfwnmsac_vv_w, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(th_vfwmacc_vf_h, void, ptr, ptr, i64, ptr, env, i32)
+DEF_HELPER_6(th_vfwmacc_vf_w, void, ptr, ptr, i64, ptr, env, i32)
+DEF_HELPER_6(th_vfwnmacc_vf_h, void, ptr, ptr, i64, ptr, env, i32)
+DEF_HELPER_6(th_vfwnmacc_vf_w, void, ptr, ptr, i64, ptr, env, i32)
+DEF_HELPER_6(th_vfwmsac_vf_h, void, ptr, ptr, i64, ptr, env, i32)
+DEF_HELPER_6(th_vfwmsac_vf_w, void, ptr, ptr, i64, ptr, env, i32)
+DEF_HELPER_6(th_vfwnmsac_vf_h, void, ptr, ptr, i64, ptr, env, i32)
+DEF_HELPER_6(th_vfwnmsac_vf_w, void, ptr, ptr, i64, ptr, env, i32)
diff --git a/target/riscv/insn_trans/trans_xtheadvector.c.inc
b/target/riscv/insn_trans/trans_xtheadvector.c.inc
index af512c489b..7220b7d607 100644
--- a/target/riscv/insn_trans/trans_xtheadvector.c.inc
+++ b/target/riscv/insn_trans/trans_xtheadvector.c.inc
@@ -2037,20 +2037,22 @@ GEN_OPFVF_TRANS_TH(th_vfnmadd_vf, opfvf_check_th)
GEN_OPFVF_TRANS_TH(th_vfmsub_vf, opfvf_check_th)
GEN_OPFVF_TRANS_TH(th_vfnmsub_vf, opfvf_check_th)
+/* Vector Widening Floating-Point Fused Multiply-Add Instructions */
+GEN_OPFVV_WIDEN_TRANS_TH(th_vfwmacc_vv, opfvv_widen_check_th)
+GEN_OPFVV_WIDEN_TRANS_TH(th_vfwnmacc_vv, opfvv_widen_check_th)
+GEN_OPFVV_WIDEN_TRANS_TH(th_vfwmsac_vv, opfvv_widen_check_th)
+GEN_OPFVV_WIDEN_TRANS_TH(th_vfwnmsac_vv, opfvv_widen_check_th)
+GEN_OPFVF_WIDEN_TRANS_TH(th_vfwmacc_vf)
+GEN_OPFVF_WIDEN_TRANS_TH(th_vfwnmacc_vf)
+GEN_OPFVF_WIDEN_TRANS_TH(th_vfwmsac_vf)
+GEN_OPFVF_WIDEN_TRANS_TH(th_vfwnmsac_vf)
+
#define TH_TRANS_STUB(NAME) \
static bool trans_##NAME(DisasContext *s, arg_##NAME *a) \
{ \
return require_xtheadvector(s); \
}
-TH_TRANS_STUB(th_vfwmacc_vv)
-TH_TRANS_STUB(th_vfwmacc_vf)
-TH_TRANS_STUB(th_vfwnmacc_vv)
-TH_TRANS_STUB(th_vfwnmacc_vf)
-TH_TRANS_STUB(th_vfwmsac_vv)
-TH_TRANS_STUB(th_vfwmsac_vf)
-TH_TRANS_STUB(th_vfwnmsac_vv)
-TH_TRANS_STUB(th_vfwnmsac_vf)
TH_TRANS_STUB(th_vfsqrt_v)
TH_TRANS_STUB(th_vfmin_vv)
TH_TRANS_STUB(th_vfmin_vf)
diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c
index 165221e08b..ef89794bdd 100644
--- a/target/riscv/vector_helper.c
+++ b/target/riscv/vector_helper.c
@@ -3332,13 +3332,13 @@ GEN_VEXT_VF(vfnmsub_vf_w, 4)
GEN_VEXT_VF(vfnmsub_vf_d, 8)
/* Vector Widening Floating-Point Fused Multiply-Add Instructions */
-static uint32_t fwmacc16(uint16_t a, uint16_t b, uint32_t d, float_status *s)
+uint32_t fwmacc16(uint16_t a, uint16_t b, uint32_t d, float_status *s)
{
return float32_muladd(float16_to_float32(a, true, s),
float16_to_float32(b, true, s), d, 0, s);
}
-static uint64_t fwmacc32(uint32_t a, uint32_t b, uint64_t d, float_status *s)
+uint64_t fwmacc32(uint32_t a, uint32_t b, uint64_t d, float_status *s)
{
return float64_muladd(float32_to_float64(a, s),
float32_to_float64(b, s), d, 0, s);
@@ -3364,7 +3364,7 @@ GEN_VEXT_VV_ENV(vfwmaccbf16_vv, 4)
RVVCALL(OPFVF3, vfwmaccbf16_vf, WOP_UUU_H, H4, H2, fwmaccbf16)
GEN_VEXT_VF(vfwmaccbf16_vf, 4)
-static uint32_t fwnmacc16(uint16_t a, uint16_t b, uint32_t d, float_status *s)
+uint32_t fwnmacc16(uint16_t a, uint16_t b, uint32_t d, float_status *s)
{
return float32_muladd(float16_to_float32(a, true, s),
float16_to_float32(b, true, s), d,
@@ -3372,7 +3372,7 @@ static uint32_t fwnmacc16(uint16_t a, uint16_t b,
uint32_t d, float_status *s)
s);
}
-static uint64_t fwnmacc32(uint32_t a, uint32_t b, uint64_t d, float_status *s)
+uint64_t fwnmacc32(uint32_t a, uint32_t b, uint64_t d, float_status *s)
{
return float64_muladd(float32_to_float64(a, s), float32_to_float64(b, s),
d, float_muladd_negate_c |
@@ -3388,14 +3388,14 @@ RVVCALL(OPFVF3, vfwnmacc_vf_w, WOP_UUU_W, H8, H4,
fwnmacc32)
GEN_VEXT_VF(vfwnmacc_vf_h, 4)
GEN_VEXT_VF(vfwnmacc_vf_w, 8)
-static uint32_t fwmsac16(uint16_t a, uint16_t b, uint32_t d, float_status *s)
+uint32_t fwmsac16(uint16_t a, uint16_t b, uint32_t d, float_status *s)
{
return float32_muladd(float16_to_float32(a, true, s),
float16_to_float32(b, true, s), d,
float_muladd_negate_c, s);
}
-static uint64_t fwmsac32(uint32_t a, uint32_t b, uint64_t d, float_status *s)
+uint64_t fwmsac32(uint32_t a, uint32_t b, uint64_t d, float_status *s)
{
return float64_muladd(float32_to_float64(a, s),
float32_to_float64(b, s), d,
@@ -3411,14 +3411,14 @@ RVVCALL(OPFVF3, vfwmsac_vf_w, WOP_UUU_W, H8, H4,
fwmsac32)
GEN_VEXT_VF(vfwmsac_vf_h, 4)
GEN_VEXT_VF(vfwmsac_vf_w, 8)
-static uint32_t fwnmsac16(uint16_t a, uint16_t b, uint32_t d, float_status *s)
+uint32_t fwnmsac16(uint16_t a, uint16_t b, uint32_t d, float_status *s)
{
return float32_muladd(float16_to_float32(a, true, s),
float16_to_float32(b, true, s), d,
float_muladd_negate_product, s);
}
-static uint64_t fwnmsac32(uint32_t a, uint32_t b, uint64_t d, float_status *s)
+uint64_t fwnmsac32(uint32_t a, uint32_t b, uint64_t d, float_status *s)
{
return float64_muladd(float32_to_float64(a, s),
float32_to_float64(b, s), d,
diff --git a/target/riscv/vector_internals.h b/target/riscv/vector_internals.h
index 5733640e0d..535d31007d 100644
--- a/target/riscv/vector_internals.h
+++ b/target/riscv/vector_internals.h
@@ -385,4 +385,13 @@ uint16_t fnmsub16(uint16_t a, uint16_t b, uint16_t d,
float_status *s);
uint32_t fnmsub32(uint32_t a, uint32_t b, uint32_t d, float_status *s);
uint64_t fnmsub64(uint64_t a, uint64_t b, uint64_t d, float_status *s);
+uint32_t fwmacc16(uint16_t a, uint16_t b, uint32_t d, float_status *s);
+uint64_t fwmacc32(uint32_t a, uint32_t b, uint64_t d, float_status *s);
+uint32_t fwnmacc16(uint16_t a, uint16_t b, uint32_t d, float_status *s);
+uint64_t fwnmacc32(uint32_t a, uint32_t b, uint64_t d, float_status *s);
+uint32_t fwmsac16(uint16_t a, uint16_t b, uint32_t d, float_status *s);
+uint64_t fwmsac32(uint32_t a, uint32_t b, uint64_t d, float_status *s);
+uint32_t fwnmsac16(uint16_t a, uint16_t b, uint32_t d, float_status *s);
+uint64_t fwnmsac32(uint32_t a, uint32_t b, uint64_t d, float_status *s);
+
#endif /* TARGET_RISCV_VECTOR_INTERNALS_H */
diff --git a/target/riscv/xtheadvector_helper.c
b/target/riscv/xtheadvector_helper.c
index 1d2da6ffb7..ac8e576c49 100644
--- a/target/riscv/xtheadvector_helper.c
+++ b/target/riscv/xtheadvector_helper.c
@@ -2904,3 +2904,41 @@ THCALL(TH_OPFVF3, th_vfnmsub_vf_d, OP_UUU_D, H8, H8,
fnmsub64)
GEN_TH_VF(th_vfnmsub_vf_h, 2, 2, clearh_th)
GEN_TH_VF(th_vfnmsub_vf_w, 4, 4, clearl_th)
GEN_TH_VF(th_vfnmsub_vf_d, 8, 8, clearq_th)
+
+/* Vector Widening Floating-Point Fused Multiply-Add Instructions */
+
+THCALL(TH_OPFVV3, th_vfwmacc_vv_h, WOP_UUU_H, H4, H2, H2, fwmacc16)
+THCALL(TH_OPFVV3, th_vfwmacc_vv_w, WOP_UUU_W, H8, H4, H4, fwmacc32)
+GEN_TH_VV_ENV(th_vfwmacc_vv_h, 2, 4, clearl_th)
+GEN_TH_VV_ENV(th_vfwmacc_vv_w, 4, 8, clearq_th)
+THCALL(TH_OPFVF3, th_vfwmacc_vf_h, WOP_UUU_H, H4, H2, fwmacc16)
+THCALL(TH_OPFVF3, th_vfwmacc_vf_w, WOP_UUU_W, H8, H4, fwmacc32)
+GEN_TH_VF(th_vfwmacc_vf_h, 2, 4, clearl_th)
+GEN_TH_VF(th_vfwmacc_vf_w, 4, 8, clearq_th)
+
+THCALL(TH_OPFVV3, th_vfwnmacc_vv_h, WOP_UUU_H, H4, H2, H2, fwnmacc16)
+THCALL(TH_OPFVV3, th_vfwnmacc_vv_w, WOP_UUU_W, H8, H4, H4, fwnmacc32)
+GEN_TH_VV_ENV(th_vfwnmacc_vv_h, 2, 4, clearl_th)
+GEN_TH_VV_ENV(th_vfwnmacc_vv_w, 4, 8, clearq_th)
+THCALL(TH_OPFVF3, th_vfwnmacc_vf_h, WOP_UUU_H, H4, H2, fwnmacc16)
+THCALL(TH_OPFVF3, th_vfwnmacc_vf_w, WOP_UUU_W, H8, H4, fwnmacc32)
+GEN_TH_VF(th_vfwnmacc_vf_h, 2, 4, clearl_th)
+GEN_TH_VF(th_vfwnmacc_vf_w, 4, 8, clearq_th)
+
+THCALL(TH_OPFVV3, th_vfwmsac_vv_h, WOP_UUU_H, H4, H2, H2, fwmsac16)
+THCALL(TH_OPFVV3, th_vfwmsac_vv_w, WOP_UUU_W, H8, H4, H4, fwmsac32)
+GEN_TH_VV_ENV(th_vfwmsac_vv_h, 2, 4, clearl_th)
+GEN_TH_VV_ENV(th_vfwmsac_vv_w, 4, 8, clearq_th)
+THCALL(TH_OPFVF3, th_vfwmsac_vf_h, WOP_UUU_H, H4, H2, fwmsac16)
+THCALL(TH_OPFVF3, th_vfwmsac_vf_w, WOP_UUU_W, H8, H4, fwmsac32)
+GEN_TH_VF(th_vfwmsac_vf_h, 2, 4, clearl_th)
+GEN_TH_VF(th_vfwmsac_vf_w, 4, 8, clearq_th)
+
+THCALL(TH_OPFVV3, th_vfwnmsac_vv_h, WOP_UUU_H, H4, H2, H2, fwnmsac16)
+THCALL(TH_OPFVV3, th_vfwnmsac_vv_w, WOP_UUU_W, H8, H4, H4, fwnmsac32)
+GEN_TH_VV_ENV(th_vfwnmsac_vv_h, 2, 4, clearl_th)
+GEN_TH_VV_ENV(th_vfwnmsac_vv_w, 4, 8, clearq_th)
+THCALL(TH_OPFVF3, th_vfwnmsac_vf_h, WOP_UUU_H, H4, H2, fwnmsac16)
+THCALL(TH_OPFVF3, th_vfwnmsac_vf_w, WOP_UUU_W, H8, H4, fwnmsac32)
+GEN_TH_VF(th_vfwnmsac_vf_h, 2, 4, clearl_th)
+GEN_TH_VF(th_vfwnmsac_vf_w, 4, 8, clearq_th)
--
2.44.0
- [PATCH 31/65] target/riscv: Add single-width average add and sub instructions for XTheadVector, (continued)
- [PATCH 31/65] target/riscv: Add single-width average add and sub instructions for XTheadVector, Huang Tao, 2024/04/12
- [PATCH 32/65] target/riscv: Add single-width fractional mul with rounding and saturation for XTheadVector, Huang Tao, 2024/04/12
- [PATCH 33/65] target/riscv: Add widening saturating scaled multiply-add instructions for XTheadVector, Huang Tao, 2024/04/12
- [PATCH 34/65] target/riscv: Add single-width scaling shift instructions for XTheadVector, Huang Tao, 2024/04/12
- [PATCH 35/65] target/riscv: Add narrowing fixed-point clip instructions for XTheadVector, Huang Tao, 2024/04/12
- [PATCH 36/65] target/riscv: Add single-width floating-point add/sub instructions for XTheadVector, Huang Tao, 2024/04/12
- [PATCH 37/65] target/riscv: Add widening floating-point add/sub instructions for XTheadVector, Huang Tao, 2024/04/12
- [PATCH 38/65] target/riscv: Add single-width floating-point multiply/divide instructions for XTheadVector, Huang Tao, 2024/04/12
- [PATCH 39/65] target/riscv: Add widening floating-point multiply instructions for XTheadVector, Huang Tao, 2024/04/12
- [PATCH 40/65] target/riscv: Add single-width floating-point fused multiply-add instructions for XTheadVector, Huang Tao, 2024/04/12
- [PATCH 41/65] target/riscv: Add widening floating-point fused mul-add instructions for XTheadVector,
Huang Tao <=
- [PATCH 42/65] target/riscv: Add floating-pointing square-root instructions for XTheadVector, Huang Tao, 2024/04/12
- [PATCH 43/65] target/riscv: Add floating-point MIN/MAX instructions for XTheadVector, Huang Tao, 2024/04/12
- [PATCH 44/65] target/riscv: Add floating-point sign-injection instructions for XTheadVector, Huang Tao, 2024/04/12
- [PATCH 45/65] target/riscv: Add floating-point compare instructions for XTheadVector, Huang Tao, 2024/04/12
- [PATCH 46/65] target/riscv: Add floating-point classify and merge instructions for XTheadVector, Huang Tao, 2024/04/12
- [PATCH 47/65] target/riscv: Add single-width floating-point/integer type-convert instructions for XTheadVector, Huang Tao, 2024/04/12
- [PATCH 48/65] target/riscv: Add widening floating-point/integer type-convert instructions for XTheadVector, Huang Tao, 2024/04/12
- [PATCH 49/65] target/riscv: Add narrowing floating-point/integer type-convert instructions for XTheadVector, Huang Tao, 2024/04/12
- [PATCH 50/65] target/riscv: Add single-width integer reduction instructions for XTheadVector, Huang Tao, 2024/04/12
- [PATCH 51/65] target/riscv: Add widening integer reduction instructions for XTheadVector, Huang Tao, 2024/04/12