[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[RFC PATCH v2 7/7] target/ppc: Implemented [pm]xvbf16ger2*
From: |
Lucas Mateus Castro(alqotel) |
Subject: |
[RFC PATCH v2 7/7] target/ppc: Implemented [pm]xvbf16ger2* |
Date: |
Fri, 6 May 2022 09:18:44 -0300 |
From: "Lucas Mateus Castro (alqotel)" <lucas.araujo@eldorado.org.br>
Implement the following PowerISA v3.1 instructions:
xvbf16ger2: VSX Vector bfloat16 GER (rank-2 update)
xvbf16ger2nn: VSX Vector bfloat16 GER (rank-2 update) Negative multiply,
Negative accumulate
xvbf16ger2np: VSX Vector bfloat16 GER (rank-2 update) Negative multiply,
Positive accumulate
xvbf16ger2pn: VSX Vector bfloat16 GER (rank-2 update) Positive multiply,
Negative accumulate
xvbf16ger2pp: VSX Vector bfloat16 GER (rank-2 update) Positive multiply,
Positive accumulate
pmxvbf16ger2: Prefixed Masked VSX Vector bfloat16 GER (rank-2 update)
pmxvbf16ger2nn: Prefixed Masked VSX Vector bfloat16 GER (rank-2 update)
Negative multiply, Negative accumulate
pmxvbf16ger2np: Prefixed Masked VSX Vector bfloat16 GER (rank-2 update)
Negative multiply, Positive accumulate
pmxvbf16ger2pn: Prefixed Masked VSX Vector bfloat16 GER (rank-2 update)
Positive multiply, Negative accumulate
pmxvbf16ger2pp: Prefixed Masked VSX Vector bfloat16 GER (rank-2 update)
Positive multiply, Positive accumulate
Signed-off-by: Lucas Mateus Castro (alqotel) <lucas.araujo@eldorado.org.br>
---
There's a discrepancy between this implementation and mambo/the
hardware where implementing it with float64_mul then float64r32_muladd
sometimes results in an incorrect result after an underflow, but
implementing with float32_mul then float32_muladd results in incorrect
signal in some 0 or infinite results. I've not been able to solve this
---
target/ppc/fpu_helper.c | 40 +++++++++++++++++++++++++++++
target/ppc/helper.h | 5 ++++
target/ppc/insn32.decode | 6 +++++
target/ppc/insn64.decode | 11 ++++++++
target/ppc/translate/vsx-impl.c.inc | 12 +++++++++
5 files changed, 74 insertions(+)
diff --git a/target/ppc/fpu_helper.c b/target/ppc/fpu_helper.c
index 6857be6ccc..0882702301 100644
--- a/target/ppc/fpu_helper.c
+++ b/target/ppc/fpu_helper.c
@@ -3491,6 +3491,11 @@ static float64 extract_hf16(float16 in, float_status
*fp_status)
return float16_to_float64(in, true, fp_status);
}
+static float64 extract_bf16(bfloat16 in, float_status *fp_status)
+{
+ return bfloat16_to_float64(in, fp_status);
+}
+
static void vsxger16(CPUPPCState *env, ppc_vsr_t *a, ppc_vsr_t *b,
ppc_acc_t *at, uint32_t mask, bool acc,
bool neg_mul, bool neg_acc, extract_f16 extract)
@@ -3611,6 +3616,41 @@ static void vsxger(CPUPPCState *env, ppc_vsr_t *a,
ppc_vsr_t *b, ppc_acc_t *at,
do_float_check_status(env, GETPC());
}
+QEMU_FLATTEN
+void helper_XVBF16GER2(CPUPPCState *env, ppc_vsr_t *a, ppc_vsr_t *b,
+ ppc_acc_t *at, uint32_t mask)
+{
+ vsxger16(env, a, b, at, mask, false, false, false, extract_bf16);
+}
+
+QEMU_FLATTEN
+void helper_XVBF16GER2PP(CPUPPCState *env, ppc_vsr_t *a, ppc_vsr_t *b,
+ ppc_acc_t *at, uint32_t mask)
+{
+ vsxger16(env, a, b, at, mask, true, false, false, extract_bf16);
+}
+
+QEMU_FLATTEN
+void helper_XVBF16GER2PN(CPUPPCState *env, ppc_vsr_t *a, ppc_vsr_t *b,
+ ppc_acc_t *at, uint32_t mask)
+{
+ vsxger16(env, a, b, at, mask, true, false, true, extract_bf16);
+}
+
+QEMU_FLATTEN
+void helper_XVBF16GER2NP(CPUPPCState *env, ppc_vsr_t *a, ppc_vsr_t *b,
+ ppc_acc_t *at, uint32_t mask)
+{
+ vsxger16(env, a, b, at, mask, true, true, false, extract_bf16);
+}
+
+QEMU_FLATTEN
+void helper_XVBF16GER2NN(CPUPPCState *env, ppc_vsr_t *a, ppc_vsr_t *b,
+ ppc_acc_t *at, uint32_t mask)
+{
+ vsxger16(env, a, b, at, mask, true, true, true, extract_bf16);
+}
+
QEMU_FLATTEN
void helper_XVF16GER2(CPUPPCState *env, ppc_vsr_t *a, ppc_vsr_t *b,
ppc_acc_t *at, uint32_t mask)
diff --git a/target/ppc/helper.h b/target/ppc/helper.h
index 5f2f574d30..59e6b74f94 100644
--- a/target/ppc/helper.h
+++ b/target/ppc/helper.h
@@ -551,6 +551,11 @@ DEF_HELPER_5(XVF16GER2PP, void, env, vsr, vsr, vsr, i32)
DEF_HELPER_5(XVF16GER2PN, void, env, vsr, vsr, vsr, i32)
DEF_HELPER_5(XVF16GER2NP, void, env, vsr, vsr, vsr, i32)
DEF_HELPER_5(XVF16GER2NN, void, env, vsr, vsr, vsr, i32)
+DEF_HELPER_5(XVBF16GER2, void, env, vsr, vsr, vsr, i32)
+DEF_HELPER_5(XVBF16GER2PP, void, env, vsr, vsr, vsr, i32)
+DEF_HELPER_5(XVBF16GER2PN, void, env, vsr, vsr, vsr, i32)
+DEF_HELPER_5(XVBF16GER2NP, void, env, vsr, vsr, vsr, i32)
+DEF_HELPER_5(XVBF16GER2NN, void, env, vsr, vsr, vsr, i32)
DEF_HELPER_5(XVF32GER, void, env, vsr, vsr, vsr, i32)
DEF_HELPER_5(XVF32GERPP, void, env, vsr, vsr, vsr, i32)
DEF_HELPER_5(XVF32GERPN, void, env, vsr, vsr, vsr, i32)
diff --git a/target/ppc/insn32.decode b/target/ppc/insn32.decode
index bbd4bc80f8..2090c17268 100644
--- a/target/ppc/insn32.decode
+++ b/target/ppc/insn32.decode
@@ -736,6 +736,12 @@ XVI8GER4SPP 111011 ... -- ..... ..... 01100011 ..-
@XX3_at xa=%xx_xa
XVI16GER2S 111011 ... -- ..... ..... 00101011 ..- @XX3_at xa=%xx_xa
XVI16GER2SPP 111011 ... -- ..... ..... 00101010 ..- @XX3_at xa=%xx_xa
+XVBF16GER2 111011 ... -- ..... ..... 00110011 ..- @XX3_at xa=%xx_xa
+XVBF16GER2PP 111011 ... -- ..... ..... 00110010 ..- @XX3_at xa=%xx_xa
+XVBF16GER2PN 111011 ... -- ..... ..... 10110010 ..- @XX3_at xa=%xx_xa
+XVBF16GER2NP 111011 ... -- ..... ..... 01110010 ..- @XX3_at xa=%xx_xa
+XVBF16GER2NN 111011 ... -- ..... ..... 11110010 ..- @XX3_at xa=%xx_xa
+
XVF16GER2 111011 ... -- ..... ..... 00010011 ..- @XX3_at xa=%xx_xa
XVF16GER2PP 111011 ... -- ..... ..... 00010010 ..- @XX3_at xa=%xx_xa
XVF16GER2PN 111011 ... -- ..... ..... 10010010 ..- @XX3_at xa=%xx_xa
diff --git a/target/ppc/insn64.decode b/target/ppc/insn64.decode
index a12f11044c..78738924c6 100644
--- a/target/ppc/insn64.decode
+++ b/target/ppc/insn64.decode
@@ -150,6 +150,17 @@ PMXVI16GER2S 000001 11 1001 -- - - pmsk:2 ------
........ \
PMXVI16GER2SPP 000001 11 1001 -- - - pmsk:2 ------ ........ \
111011 ... -- ..... ..... 00101010 ..- @MMIRR_XX3
+PMXVBF16GER2 000001 11 1001 -- - - pmsk:2 ------ ........ \
+ 111011 ... -- ..... ..... 00110011 ..- @MMIRR_XX3
+PMXVBF16GER2PP 000001 11 1001 -- - - pmsk:2 ------ ........ \
+ 111011 ... -- ..... ..... 00110010 ..- @MMIRR_XX3
+PMXVBF16GER2PN 000001 11 1001 -- - - pmsk:2 ------ ........ \
+ 111011 ... -- ..... ..... 10110010 ..- @MMIRR_XX3
+PMXVBF16GER2NP 000001 11 1001 -- - - pmsk:2 ------ ........ \
+ 111011 ... -- ..... ..... 01110010 ..- @MMIRR_XX3
+PMXVBF16GER2NN 000001 11 1001 -- - - pmsk:2 ------ ........ \
+ 111011 ... -- ..... ..... 11110010 ..- @MMIRR_XX3
+
PMXVF16GER2 000001 11 1001 -- - - pmsk:2 ------ ........ \
111011 ... -- ..... ..... 00010011 ..- @MMIRR_XX3
PMXVF16GER2PP 000001 11 1001 -- - - pmsk:2 ------ ........ \
diff --git a/target/ppc/translate/vsx-impl.c.inc
b/target/ppc/translate/vsx-impl.c.inc
index 00eed2b1b9..bb5b68ee06 100644
--- a/target/ppc/translate/vsx-impl.c.inc
+++ b/target/ppc/translate/vsx-impl.c.inc
@@ -2884,6 +2884,12 @@ TRANS64(PMXVI16GER2PP, do_ger_MMIRR_XX3,
gen_helper_XVI16GER2PP)
TRANS64(PMXVI16GER2S, do_ger_MMIRR_XX3, gen_helper_XVI16GER2S)
TRANS64(PMXVI16GER2SPP, do_ger_MMIRR_XX3, gen_helper_XVI16GER2SPP)
+TRANS(XVBF16GER2, do_ger_XX3, gen_helper_XVBF16GER2)
+TRANS(XVBF16GER2PP, do_ger_XX3, gen_helper_XVBF16GER2PP)
+TRANS(XVBF16GER2PN, do_ger_XX3, gen_helper_XVBF16GER2PN)
+TRANS(XVBF16GER2NP, do_ger_XX3, gen_helper_XVBF16GER2NP)
+TRANS(XVBF16GER2NN, do_ger_XX3, gen_helper_XVBF16GER2NN)
+
TRANS(XVF16GER2, do_ger_XX3, gen_helper_XVF16GER2)
TRANS(XVF16GER2PP, do_ger_XX3, gen_helper_XVF16GER2PP)
TRANS(XVF16GER2PN, do_ger_XX3, gen_helper_XVF16GER2PN)
@@ -2902,6 +2908,12 @@ TRANS(XVF64GERPN, do_ger_XX3, gen_helper_XVF64GERPN)
TRANS(XVF64GERNP, do_ger_XX3, gen_helper_XVF64GERNP)
TRANS(XVF64GERNN, do_ger_XX3, gen_helper_XVF64GERNN)
+TRANS64(PMXVBF16GER2, do_ger_MMIRR_XX3, gen_helper_XVBF16GER2)
+TRANS64(PMXVBF16GER2PP, do_ger_MMIRR_XX3, gen_helper_XVBF16GER2PP)
+TRANS64(PMXVBF16GER2PN, do_ger_MMIRR_XX3, gen_helper_XVBF16GER2PN)
+TRANS64(PMXVBF16GER2NP, do_ger_MMIRR_XX3, gen_helper_XVBF16GER2NP)
+TRANS64(PMXVBF16GER2NN, do_ger_MMIRR_XX3, gen_helper_XVBF16GER2NN)
+
TRANS64(PMXVF16GER2, do_ger_MMIRR_XX3, gen_helper_XVF16GER2)
TRANS64(PMXVF16GER2PP, do_ger_MMIRR_XX3, gen_helper_XVF16GER2PP)
TRANS64(PMXVF16GER2PN, do_ger_MMIRR_XX3, gen_helper_XVF16GER2PN)
--
2.31.1
- Re: [RFC PATCH v2 3/7] target/ppc: Implemented pmxvi*ger* instructions, (continued)