[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[PULL 40/87] target/ppc: Implement xxeval
From: |
Cédric Le Goater |
Subject: |
[PULL 40/87] target/ppc: Implement xxeval |
Date: |
Wed, 2 Mar 2022 12:07:16 +0100 |
From: Matheus Ferst <matheus.ferst@eldorado.org.br>
Signed-off-by: Matheus Ferst <matheus.ferst@eldorado.org.br>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Message-Id: <20220225210936.1749575-35-matheus.ferst@eldorado.org.br>
Signed-off-by: Cédric Le Goater <clg@kaod.org>
---
target/ppc/helper.h | 1 +
target/ppc/insn64.decode | 8 +
target/ppc/int_helper.c | 42 ++++++
target/ppc/translate/vsx-impl.c.inc | 220 ++++++++++++++++++++++++++++
4 files changed, 271 insertions(+)
diff --git a/target/ppc/helper.h b/target/ppc/helper.h
index 7eda74211e66..13034bca58c7 100644
--- a/target/ppc/helper.h
+++ b/target/ppc/helper.h
@@ -498,6 +498,7 @@ DEF_HELPER_4(xxextractuw, void, env, vsr, vsr, i32)
DEF_HELPER_FLAGS_5(XXPERMX, TCG_CALL_NO_RWG, void, vsr, vsr, vsr, vsr, tl)
DEF_HELPER_4(xxinsertw, void, env, vsr, vsr, i32)
DEF_HELPER_3(xvxsigsp, void, env, vsr, vsr)
+DEF_HELPER_FLAGS_5(XXEVAL, TCG_CALL_NO_RWG, void, vsr, vsr, vsr, vsr, i32)
DEF_HELPER_5(XXBLENDVB, void, vsr, vsr, vsr, vsr, i32)
DEF_HELPER_5(XXBLENDVH, void, vsr, vsr, vsr, vsr, i32)
DEF_HELPER_5(XXBLENDVW, void, vsr, vsr, vsr, vsr, i32)
diff --git a/target/ppc/insn64.decode b/target/ppc/insn64.decode
index 0963e064b189..fdb859f62dd8 100644
--- a/target/ppc/insn64.decode
+++ b/target/ppc/insn64.decode
@@ -54,6 +54,11 @@
...... ..... ..... ..... ..... .. .... \
&8RR_XX4 xt=%8rr_xx_xt xa=%8rr_xx_xa xb=%8rr_xx_xb
xc=%8rr_xx_xc
+&8RR_XX4_imm xt xa xb xc imm
+@8RR_XX4_imm ........ ........ ........ imm:8 \
+ ...... ..... ..... ..... ..... .. .... \
+ &8RR_XX4_imm xt=%8rr_xx_xt xa=%8rr_xx_xa xb=%8rr_xx_xb
xc=%8rr_xx_xc
+
&8RR_XX4_uim3 xt xa xb xc uim3
@8RR_XX4_uim3 ...... .. .... .. ............... uim3:3 \
...... ..... ..... ..... ..... .. .... \
@@ -184,6 +189,9 @@ PLXVP 000001 00 0--.-- .................. \
PSTXVP 000001 00 0--.-- .................. \
111110 ..... ..... ................ @8LS_D_TSXP
+XXEVAL 000001 01 0000 -- ---------- ........ \
+ 100010 ..... ..... ..... ..... 01 .... @8RR_XX4_imm
+
XXSPLTIDP 000001 01 0000 -- -- ................ \
100000 ..... 0010 . ................ @8RR_D
XXSPLTIW 000001 01 0000 -- -- ................ \
diff --git a/target/ppc/int_helper.c b/target/ppc/int_helper.c
index d9bfdc290fbf..ca59cd3d79ec 100644
--- a/target/ppc/int_helper.c
+++ b/target/ppc/int_helper.c
@@ -28,6 +28,7 @@
#include "fpu/softfloat.h"
#include "qapi/error.h"
#include "qemu/guest-random.h"
+#include "tcg/tcg-gvec-desc.h"
#include "helper_regs.h"
/*****************************************************************************/
@@ -1572,6 +1573,47 @@ void helper_xxinsertw(CPUPPCState *env, ppc_vsr_t *xt,
*xt = t;
}
+void helper_XXEVAL(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c,
+ uint32_t desc)
+{
+ /*
+ * Instead of processing imm bit-by-bit, we'll skip the computation of
+ * conjunctions whose corresponding bit is unset.
+ */
+ int bit, imm = simd_data(desc);
+ Int128 conj, disj = int128_zero();
+
+ /* Iterate over set bits from the least to the most significant bit */
+ while (imm) {
+ /*
+ * Get the next bit to be processed with ctz64. Invert the result of
+ * ctz64 to match the indexing used by PowerISA.
+ */
+ bit = 7 - ctzl(imm);
+ if (bit & 0x4) {
+ conj = a->s128;
+ } else {
+ conj = int128_not(a->s128);
+ }
+ if (bit & 0x2) {
+ conj = int128_and(conj, b->s128);
+ } else {
+ conj = int128_and(conj, int128_not(b->s128));
+ }
+ if (bit & 0x1) {
+ conj = int128_and(conj, c->s128);
+ } else {
+ conj = int128_and(conj, int128_not(c->s128));
+ }
+ disj = int128_or(disj, conj);
+
+ /* Unset the least significant bit that is set */
+ imm &= imm - 1;
+ }
+
+ t->s128 = disj;
+}
+
#define XXBLEND(name, sz) \
void glue(helper_XXBLENDV, name)(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b, \
ppc_avr_t *c, uint32_t desc) \
diff --git a/target/ppc/translate/vsx-impl.c.inc
b/target/ppc/translate/vsx-impl.c.inc
index 92851b8926d8..b5e07cf3df32 100644
--- a/target/ppc/translate/vsx-impl.c.inc
+++ b/target/ppc/translate/vsx-impl.c.inc
@@ -2167,6 +2167,226 @@ TRANS64_FLAGS2(ISA310, PLXV, do_lstxv_PLS_D, false,
false)
TRANS64_FLAGS2(ISA310, PSTXVP, do_lstxv_PLS_D, true, true)
TRANS64_FLAGS2(ISA310, PLXVP, do_lstxv_PLS_D, false, true)
+static void gen_xxeval_i64(TCGv_i64 t, TCGv_i64 a, TCGv_i64 b, TCGv_i64 c,
+ int64_t imm)
+{
+ /*
+ * Instead of processing imm bit-by-bit, we'll skip the computation of
+ * conjunctions whose corresponding bit is unset.
+ */
+ int bit;
+ TCGv_i64 conj, disj;
+
+ conj = tcg_temp_new_i64();
+ disj = tcg_const_i64(0);
+
+ /* Iterate over set bits from the least to the most significant bit */
+ while (imm) {
+ /*
+ * Get the next bit to be processed with ctz64. Invert the result of
+ * ctz64 to match the indexing used by PowerISA.
+ */
+ bit = 7 - ctz64(imm);
+ if (bit & 0x4) {
+ tcg_gen_mov_i64(conj, a);
+ } else {
+ tcg_gen_not_i64(conj, a);
+ }
+ if (bit & 0x2) {
+ tcg_gen_and_i64(conj, conj, b);
+ } else {
+ tcg_gen_andc_i64(conj, conj, b);
+ }
+ if (bit & 0x1) {
+ tcg_gen_and_i64(conj, conj, c);
+ } else {
+ tcg_gen_andc_i64(conj, conj, c);
+ }
+ tcg_gen_or_i64(disj, disj, conj);
+
+ /* Unset the least significant bit that is set */
+ imm &= imm - 1;
+ }
+
+ tcg_gen_mov_i64(t, disj);
+
+ tcg_temp_free_i64(conj);
+ tcg_temp_free_i64(disj);
+}
+
+static void gen_xxeval_vec(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b,
+ TCGv_vec c, int64_t imm)
+{
+ /*
+ * Instead of processing imm bit-by-bit, we'll skip the computation of
+ * conjunctions whose corresponding bit is unset.
+ */
+ int bit;
+ TCGv_vec disj, conj;
+
+ disj = tcg_const_zeros_vec_matching(t);
+ conj = tcg_temp_new_vec_matching(t);
+
+ /* Iterate over set bits from the least to the most significant bit */
+ while (imm) {
+ /*
+ * Get the next bit to be processed with ctz64. Invert the result of
+ * ctz64 to match the indexing used by PowerISA.
+ */
+ bit = 7 - ctz64(imm);
+ if (bit & 0x4) {
+ tcg_gen_mov_vec(conj, a);
+ } else {
+ tcg_gen_not_vec(vece, conj, a);
+ }
+ if (bit & 0x2) {
+ tcg_gen_and_vec(vece, conj, conj, b);
+ } else {
+ tcg_gen_andc_vec(vece, conj, conj, b);
+ }
+ if (bit & 0x1) {
+ tcg_gen_and_vec(vece, conj, conj, c);
+ } else {
+ tcg_gen_andc_vec(vece, conj, conj, c);
+ }
+ tcg_gen_or_vec(vece, disj, disj, conj);
+
+ /* Unset the least significant bit that is set */
+ imm &= imm - 1;
+ }
+
+ tcg_gen_mov_vec(t, disj);
+
+ tcg_temp_free_vec(disj);
+ tcg_temp_free_vec(conj);
+}
+
+static bool trans_XXEVAL(DisasContext *ctx, arg_8RR_XX4_imm *a)
+{
+ static const TCGOpcode vecop_list[] = {
+ INDEX_op_andc_vec, 0
+ };
+ static const GVecGen4i op = {
+ .fniv = gen_xxeval_vec,
+ .fno = gen_helper_XXEVAL,
+ .fni8 = gen_xxeval_i64,
+ .opt_opc = vecop_list,
+ .vece = MO_64
+ };
+ int xt = vsr_full_offset(a->xt), xa = vsr_full_offset(a->xa),
+ xb = vsr_full_offset(a->xb), xc = vsr_full_offset(a->xc);
+
+ REQUIRE_INSNS_FLAGS2(ctx, ISA310);
+ REQUIRE_VSX(ctx);
+
+ /* Equivalent functions that can be implemented with a single gen_gvec */
+ switch (a->imm) {
+ case 0b00000000: /* true */
+ set_cpu_vsr(a->xt, tcg_constant_i64(0), true);
+ set_cpu_vsr(a->xt, tcg_constant_i64(0), false);
+ break;
+ case 0b00000011: /* and(B,A) */
+ tcg_gen_gvec_and(MO_64, xt, xb, xa, 16, 16);
+ break;
+ case 0b00000101: /* and(C,A) */
+ tcg_gen_gvec_and(MO_64, xt, xc, xa, 16, 16);
+ break;
+ case 0b00001111: /* A */
+ tcg_gen_gvec_mov(MO_64, xt, xa, 16, 16);
+ break;
+ case 0b00010001: /* and(C,B) */
+ tcg_gen_gvec_and(MO_64, xt, xc, xb, 16, 16);
+ break;
+ case 0b00011011: /* C?B:A */
+ tcg_gen_gvec_bitsel(MO_64, xt, xc, xb, xa, 16, 16);
+ break;
+ case 0b00011101: /* B?C:A */
+ tcg_gen_gvec_bitsel(MO_64, xt, xb, xc, xa, 16, 16);
+ break;
+ case 0b00100111: /* C?A:B */
+ tcg_gen_gvec_bitsel(MO_64, xt, xc, xa, xb, 16, 16);
+ break;
+ case 0b00110011: /* B */
+ tcg_gen_gvec_mov(MO_64, xt, xb, 16, 16);
+ break;
+ case 0b00110101: /* A?C:B */
+ tcg_gen_gvec_bitsel(MO_64, xt, xa, xc, xb, 16, 16);
+ break;
+ case 0b00111100: /* xor(B,A) */
+ tcg_gen_gvec_xor(MO_64, xt, xb, xa, 16, 16);
+ break;
+ case 0b00111111: /* or(B,A) */
+ tcg_gen_gvec_or(MO_64, xt, xb, xa, 16, 16);
+ break;
+ case 0b01000111: /* B?A:C */
+ tcg_gen_gvec_bitsel(MO_64, xt, xb, xa, xc, 16, 16);
+ break;
+ case 0b01010011: /* A?B:C */
+ tcg_gen_gvec_bitsel(MO_64, xt, xa, xb, xc, 16, 16);
+ break;
+ case 0b01010101: /* C */
+ tcg_gen_gvec_mov(MO_64, xt, xc, 16, 16);
+ break;
+ case 0b01011010: /* xor(C,A) */
+ tcg_gen_gvec_xor(MO_64, xt, xc, xa, 16, 16);
+ break;
+ case 0b01011111: /* or(C,A) */
+ tcg_gen_gvec_or(MO_64, xt, xc, xa, 16, 16);
+ break;
+ case 0b01100110: /* xor(C,B) */
+ tcg_gen_gvec_xor(MO_64, xt, xc, xb, 16, 16);
+ break;
+ case 0b01110111: /* or(C,B) */
+ tcg_gen_gvec_or(MO_64, xt, xc, xb, 16, 16);
+ break;
+ case 0b10001000: /* nor(C,B) */
+ tcg_gen_gvec_nor(MO_64, xt, xc, xb, 16, 16);
+ break;
+ case 0b10011001: /* eqv(C,B) */
+ tcg_gen_gvec_eqv(MO_64, xt, xc, xb, 16, 16);
+ break;
+ case 0b10100000: /* nor(C,A) */
+ tcg_gen_gvec_nor(MO_64, xt, xc, xa, 16, 16);
+ break;
+ case 0b10100101: /* eqv(C,A) */
+ tcg_gen_gvec_eqv(MO_64, xt, xc, xa, 16, 16);
+ break;
+ case 0b10101010: /* not(C) */
+ tcg_gen_gvec_not(MO_64, xt, xc, 16, 16);
+ break;
+ case 0b11000000: /* nor(B,A) */
+ tcg_gen_gvec_nor(MO_64, xt, xb, xa, 16, 16);
+ break;
+ case 0b11000011: /* eqv(B,A) */
+ tcg_gen_gvec_eqv(MO_64, xt, xb, xa, 16, 16);
+ break;
+ case 0b11001100: /* not(B) */
+ tcg_gen_gvec_not(MO_64, xt, xb, 16, 16);
+ break;
+ case 0b11101110: /* nand(C,B) */
+ tcg_gen_gvec_nand(MO_64, xt, xc, xb, 16, 16);
+ break;
+ case 0b11110000: /* not(A) */
+ tcg_gen_gvec_not(MO_64, xt, xa, 16, 16);
+ break;
+ case 0b11111010: /* nand(C,A) */
+ tcg_gen_gvec_nand(MO_64, xt, xc, xa, 16, 16);
+ break;
+ case 0b11111100: /* nand(B,A) */
+ tcg_gen_gvec_nand(MO_64, xt, xb, xa, 16, 16);
+ break;
+ case 0b11111111: /* true */
+ set_cpu_vsr(a->xt, tcg_constant_i64(-1), true);
+ set_cpu_vsr(a->xt, tcg_constant_i64(-1), false);
+ break;
+ default:
+ /* Fallback to compute all conjunctions/disjunctions */
+ tcg_gen_gvec_4i(xt, xa, xb, xc, 16, 16, a->imm, &op);
+ }
+
+ return true;
+}
+
static void gen_xxblendv_vec(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b,
TCGv_vec c)
{
--
2.34.1
- [PULL 25/87] target/ppc: move vs[lr][a][bhwd] to decodetree, (continued)
- [PULL 25/87] target/ppc: move vs[lr][a][bhwd] to decodetree, Cédric Le Goater, 2022/03/02
- [PULL 16/87] target/ppc: Move Vector Compare Not Equal or Zero to decodetree, Cédric Le Goater, 2022/03/02
- [PULL 27/87] target/ppc: implement vsrq, Cédric Le Goater, 2022/03/02
- [PULL 31/87] target/ppc: implement vrlq, Cédric Le Goater, 2022/03/02
- [PULL 30/87] target/ppc: move vrl[bhwd]nm/vrl[bhwd]mi to decodetree, Cédric Le Goater, 2022/03/02
- [PULL 38/87] target/ppc: Implement xxpermx instruction, Cédric Le Goater, 2022/03/02
- [PULL 35/87] target/ppc: Move xxsel to decodetree, Cédric Le Goater, 2022/03/02
- [PULL 29/87] target/ppc: move vrl[bhwd] to decodetree, Cédric Le Goater, 2022/03/02
- [PULL 09/87] target/ppc: Moved vector multiply high and low to decodetree, Cédric Le Goater, 2022/03/02
- [PULL 39/87] tcg/tcg-op-gvec.c: Introduce tcg_gen_gvec_4i, Cédric Le Goater, 2022/03/02
- [PULL 40/87] target/ppc: Implement xxeval,
Cédric Le Goater <=
- [PULL 42/87] target/ppc: move xs[n]madd[am][ds]p/xs[n]msub[am][ds]p to decodetree, Cédric Le Goater, 2022/03/02
- [PULL 46/87] target/ppc: Refactor VSX_SCALAR_CMP_DP, Cédric Le Goater, 2022/03/02
- [PULL 56/87] ppc/xive2: Introduce a XIVE2 core framework, Cédric Le Goater, 2022/03/02
- [PULL 55/87] target/ppc: implement lxvr[bhwd]/stxvr[bhwd]x, Cédric Le Goater, 2022/03/02
- [PULL 50/87] target/ppc: Refactor VSX_MAX_MINC helper, Cédric Le Goater, 2022/03/02
- [PULL 51/87] target/ppc: Implement xs{max,min}cqp, Cédric Le Goater, 2022/03/02
- [PULL 57/87] ppc/xive2: Introduce a presenter matching routine, Cédric Le Goater, 2022/03/02
- [PULL 65/87] ppc/xive: Add support for PQ state bits offload, Cédric Le Goater, 2022/03/02
- [PULL 36/87] target/ppc: move xxperm/xxpermr to decodetree, Cédric Le Goater, 2022/03/02
- [PULL 37/87] target/ppc: Move xxpermdi to decodetree, Cédric Le Goater, 2022/03/02