[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[PULL 18/62] target/ppc: Use gvec to decode XVTSTDC[DS]P
From: |
Daniel Henrique Barboza |
Subject: |
[PULL 18/62] target/ppc: Use gvec to decode XVTSTDC[DS]P |
Date: |
Fri, 28 Oct 2022 13:39:07 -0300 |
From: "Lucas Mateus Castro (alqotel)" <lucas.araujo@eldorado.org.br>
Used gvec to translate XVTSTDCSP and XVTSTDCDP.
xvtstdcsp:
rept loop imm master version prev version current version
25 4000 0 0,206200 0,040730 (-80.2%) 0,040740 (-80.2%)
25 4000 1 0,205120 0,053650 (-73.8%) 0,053510 (-73.9%)
25 4000 3 0,206160 0,058630 (-71.6%) 0,058570 (-71.6%)
25 4000 51 0,217110 0,191490 (-11.8%) 0,192320 (-11.4%)
25 4000 127 0,206160 0,191490 (-7.1%) 0,192640 (-6.6%)
8000 12 0 1,234719 0,418833 (-66.1%) 0,386365 (-68.7%)
8000 12 1 1,232417 1,435979 (+16.5%) 1,462792 (+18.7%)
8000 12 3 1,232760 1,766073 (+43.3%) 1,743990 (+41.5%)
8000 12 51 1,239281 1,319562 (+6.5%) 1,423479 (+14.9%)
8000 12 127 1,231708 1,315760 (+6.8%) 1,426667 (+15.8%)
xvtstdcdp:
rept loop imm master version prev version current version
25 4000 0 0,159930 0,040830 (-74.5%) 0,040610 (-74.6%)
25 4000 1 0,160640 0,053670 (-66.6%) 0,053480 (-66.7%)
25 4000 3 0,160020 0,063030 (-60.6%) 0,062960 (-60.7%)
25 4000 51 0,160410 0,128620 (-19.8%) 0,127470 (-20.5%)
25 4000 127 0,160330 0,127670 (-20.4%) 0,128690 (-19.7%)
8000 12 0 1,190365 0,422146 (-64.5%) 0,388417 (-67.4%)
8000 12 1 1,191292 1,445312 (+21.3%) 1,428698 (+19.9%)
8000 12 3 1,188687 1,980656 (+66.6%) 1,975354 (+66.2%)
8000 12 51 1,191250 1,264500 (+6.1%) 1,355083 (+13.8%)
8000 12 127 1,197313 1,266729 (+5.8%) 1,349156 (+12.7%)
Overall, these instructions are the hardest ones to measure performance
as the gvec implementation is affected by the immediate. Above there are
5 different scenarios when it comes to immediate and 2 when it comes to
rept/loop combination. The immediates scenarios are: all bits are 0
therefore the target register should just be changed to 0, with 1 bit
set, with 2 bits set in a combination the new implementation can deal
with using gvec, 4 bits set and the new implementation can't deal with
it using gvec and all bits set. The rept/loop scenarios are high loop
and low rept (so it should spend more time executing it than translating
it) and high rept low loop (so it should spend more time translating it
than executing this code).
These comparisons are between the upstream version, a previous similar
implementation and a one with a cleaner code(this one).
For a comparison with o previous different implementation:
<20221010191356.83659-13-lucas.araujo@eldorado.org.br>
Signed-off-by: Lucas Mateus Castro (alqotel) <lucas.araujo@eldorado.org.br>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Message-Id: <20221019125040.48028-13-lucas.araujo@eldorado.org.br>
Signed-off-by: Daniel Henrique Barboza <danielhb413@gmail.com>
---
target/ppc/translate/vsx-impl.c.inc | 167 ++++++++++++++++++++++++++--
1 file changed, 157 insertions(+), 10 deletions(-)
diff --git a/target/ppc/translate/vsx-impl.c.inc
b/target/ppc/translate/vsx-impl.c.inc
index af410cbf1b..4deb29ee42 100644
--- a/target/ppc/translate/vsx-impl.c.inc
+++ b/target/ppc/translate/vsx-impl.c.inc
@@ -632,6 +632,8 @@ static void gen_mtvsrws(DisasContext *ctx)
#define SGN_MASK_SP 0x8000000080000000ull
#define EXP_MASK_DP 0x7FF0000000000000ull
#define EXP_MASK_SP 0x7F8000007F800000ull
+#define FRC_MASK_DP (~(SGN_MASK_DP | EXP_MASK_DP))
+#define FRC_MASK_SP (~(SGN_MASK_SP | EXP_MASK_SP))
#define VSX_SCALAR_MOVE(name, op, sgn_mask) \
static void glue(gen_, name)(DisasContext *ctx) \
@@ -1112,23 +1114,168 @@ GEN_VSX_HELPER_X2(xscvhpdp, 0x16, 0x15, 0x10,
PPC2_ISA300)
GEN_VSX_HELPER_R2(xscvsdqp, 0x04, 0x1A, 0x0A, PPC2_ISA300)
GEN_VSX_HELPER_X2(xscvspdp, 0x12, 0x14, 0, PPC2_VSX)
+/* test if +Inf */
+static void gen_is_pos_inf(unsigned vece, TCGv_vec t, TCGv_vec b, int64_t v)
+{
+ uint64_t exp_msk = (vece == MO_32) ? (uint32_t)EXP_MASK_SP : EXP_MASK_DP;
+ tcg_gen_cmp_vec(TCG_COND_EQ, vece, t, b,
+ tcg_constant_vec_matching(t, vece, exp_msk));
+}
+
+/* test if -Inf */
+static void gen_is_neg_inf(unsigned vece, TCGv_vec t, TCGv_vec b, int64_t v)
+{
+ uint64_t exp_msk = (vece == MO_32) ? (uint32_t)EXP_MASK_SP : EXP_MASK_DP;
+ uint64_t sgn_msk = (vece == MO_32) ? (uint32_t)SGN_MASK_SP : SGN_MASK_DP;
+ tcg_gen_cmp_vec(TCG_COND_EQ, vece, t, b,
+ tcg_constant_vec_matching(t, vece, sgn_msk | exp_msk));
+}
+
+/* test if +Inf or -Inf */
+static void gen_is_any_inf(unsigned vece, TCGv_vec t, TCGv_vec b, int64_t v)
+{
+ uint64_t exp_msk = (vece == MO_32) ? (uint32_t)EXP_MASK_SP : EXP_MASK_DP;
+ uint64_t sgn_msk = (vece == MO_32) ? (uint32_t)SGN_MASK_SP : SGN_MASK_DP;
+ tcg_gen_andc_vec(vece, b, b, tcg_constant_vec_matching(t, vece, sgn_msk));
+ tcg_gen_cmp_vec(TCG_COND_EQ, vece, t, b,
+ tcg_constant_vec_matching(t, vece, exp_msk));
+}
+
+/* test if +0 */
+static void gen_is_pos_zero(unsigned vece, TCGv_vec t, TCGv_vec b, int64_t v)
+{
+ tcg_gen_cmp_vec(TCG_COND_EQ, vece, t, b,
+ tcg_constant_vec_matching(t, vece, 0));
+}
+
+/* test if -0 */
+static void gen_is_neg_zero(unsigned vece, TCGv_vec t, TCGv_vec b, int64_t v)
+{
+ uint64_t sgn_msk = (vece == MO_32) ? (uint32_t)SGN_MASK_SP : SGN_MASK_DP;
+ tcg_gen_cmp_vec(TCG_COND_EQ, vece, t, b,
+ tcg_constant_vec_matching(t, vece, sgn_msk));
+}
+
+/* test if +0 or -0 */
+static void gen_is_any_zero(unsigned vece, TCGv_vec t, TCGv_vec b, int64_t v)
+{
+ uint64_t sgn_msk = (vece == MO_32) ? (uint32_t)SGN_MASK_SP : SGN_MASK_DP;
+ tcg_gen_andc_vec(vece, b, b, tcg_constant_vec_matching(t, vece, sgn_msk));
+ tcg_gen_cmp_vec(TCG_COND_EQ, vece, t, b,
+ tcg_constant_vec_matching(t, vece, 0));
+}
+
+/* test if +Denormal */
+static void gen_is_pos_denormal(unsigned vece, TCGv_vec t,
+ TCGv_vec b, int64_t v)
+{
+ uint64_t frc_msk = (vece == MO_32) ? (uint32_t)FRC_MASK_SP : FRC_MASK_DP;
+ tcg_gen_cmp_vec(TCG_COND_LEU, vece, t, b,
+ tcg_constant_vec_matching(t, vece, frc_msk));
+ tcg_gen_cmp_vec(TCG_COND_NE, vece, b, b,
+ tcg_constant_vec_matching(t, vece, 0));
+ tcg_gen_and_vec(vece, t, t, b);
+}
+
+/* test if -Denormal */
+static void gen_is_neg_denormal(unsigned vece, TCGv_vec t,
+ TCGv_vec b, int64_t v)
+{
+ uint64_t sgn_msk = (vece == MO_32) ? (uint32_t)SGN_MASK_SP : SGN_MASK_DP;
+ uint64_t frc_msk = (vece == MO_32) ? (uint32_t)FRC_MASK_SP : FRC_MASK_DP;
+ tcg_gen_cmp_vec(TCG_COND_LEU, vece, t, b,
+ tcg_constant_vec_matching(t, vece, sgn_msk | frc_msk));
+ tcg_gen_cmp_vec(TCG_COND_GTU, vece, b, b,
+ tcg_constant_vec_matching(t, vece, sgn_msk));
+ tcg_gen_and_vec(vece, t, t, b);
+}
+
+/* test if +Denormal or -Denormal */
+static void gen_is_any_denormal(unsigned vece, TCGv_vec t,
+ TCGv_vec b, int64_t v)
+{
+ uint64_t sgn_msk = (vece == MO_32) ? (uint32_t)SGN_MASK_SP : SGN_MASK_DP;
+ uint64_t frc_msk = (vece == MO_32) ? (uint32_t)FRC_MASK_SP : FRC_MASK_DP;
+ tcg_gen_andc_vec(vece, b, b, tcg_constant_vec_matching(t, vece, sgn_msk));
+ tcg_gen_cmp_vec(TCG_COND_LE, vece, t, b,
+ tcg_constant_vec_matching(t, vece, frc_msk));
+ tcg_gen_cmp_vec(TCG_COND_NE, vece, b, b,
+ tcg_constant_vec_matching(t, vece, 0));
+ tcg_gen_and_vec(vece, t, t, b);
+}
+
+/* test if NaN */
+static void gen_is_nan(unsigned vece, TCGv_vec t, TCGv_vec b, int64_t v)
+{
+ uint64_t exp_msk = (vece == MO_32) ? (uint32_t)EXP_MASK_SP : EXP_MASK_DP;
+ uint64_t sgn_msk = (vece == MO_32) ? (uint32_t)SGN_MASK_SP : SGN_MASK_DP;
+ tcg_gen_and_vec(vece, b, b, tcg_constant_vec_matching(t, vece, ~sgn_msk));
+ tcg_gen_cmp_vec(TCG_COND_GT, vece, t, b,
+ tcg_constant_vec_matching(t, vece, exp_msk));
+}
+
static bool do_xvtstdc(DisasContext *ctx, arg_XX2_uim *a, unsigned vece)
{
- static const GVecGen2i op[] = {
- {
- .fnoi = gen_helper_XVTSTDCSP,
- .vece = MO_32
- },
- {
- .fnoi = gen_helper_XVTSTDCDP,
- .vece = MO_64
- },
+ static const TCGOpcode vecop_list[] = {
+ INDEX_op_cmp_vec, 0
+ };
+
+ GVecGen2i op = {
+ .fnoi = (vece == MO_32) ? gen_helper_XVTSTDCSP : gen_helper_XVTSTDCDP,
+ .vece = vece,
+ .opt_opc = vecop_list
};
REQUIRE_VSX(ctx);
+ switch (a->uim) {
+ case 0:
+ set_cpu_vsr(a->xt, tcg_constant_i64(0), true);
+ set_cpu_vsr(a->xt, tcg_constant_i64(0), false);
+ return true;
+ case ((1 << 0) | (1 << 1)):
+ /* test if +Denormal or -Denormal */
+ op.fniv = gen_is_any_denormal;
+ break;
+ case (1 << 0):
+ /* test if -Denormal */
+ op.fniv = gen_is_neg_denormal;
+ break;
+ case (1 << 1):
+ /* test if +Denormal */
+ op.fniv = gen_is_pos_denormal;
+ break;
+ case ((1 << 2) | (1 << 3)):
+ /* test if +0 or -0 */
+ op.fniv = gen_is_any_zero;
+ break;
+ case (1 << 2):
+ /* test if -0 */
+ op.fniv = gen_is_neg_zero;
+ break;
+ case (1 << 3):
+ /* test if +0 */
+ op.fniv = gen_is_pos_zero;
+ break;
+ case ((1 << 4) | (1 << 5)):
+ /* test if +Inf or -Inf */
+ op.fniv = gen_is_any_inf;
+ break;
+ case (1 << 4):
+ /* test if -Inf */
+ op.fniv = gen_is_neg_inf;
+ break;
+ case (1 << 5):
+ /* test if +Inf */
+ op.fniv = gen_is_pos_inf;
+ break;
+ case (1 << 6):
+ /* test if NaN */
+ op.fniv = gen_is_nan;
+ break;
+ }
tcg_gen_gvec_2i(vsr_full_offset(a->xt), vsr_full_offset(a->xb),
- 16, 16, (int32_t)(a->uim), &op[vece - MO_32]);
+ 16, 16, a->uim, &op);
return true;
}
--
2.37.3
- [PULL 01/62] target/ppc: fix msgclr/msgsnd insns flags, (continued)
- [PULL 01/62] target/ppc: fix msgclr/msgsnd insns flags, Daniel Henrique Barboza, 2022/10/28
- [PULL 03/62] target/ppc: fix REQUIRE_HV macro definition, Daniel Henrique Barboza, 2022/10/28
- [PULL 04/62] target/ppc: move msgclr/msgsnd to decodetree, Daniel Henrique Barboza, 2022/10/28
- [PULL 06/62] target/ppc: move msgsync to decodetree, Daniel Henrique Barboza, 2022/10/28
- [PULL 02/62] target/ppc: fix msgsync insns flags, Daniel Henrique Barboza, 2022/10/28
- [PULL 05/62] target/ppc: move msgclrp/msgsndp to decodetree, Daniel Henrique Barboza, 2022/10/28
- [PULL 07/62] target/ppc: Moved VMLADDUHM to decodetree and use gvec, Daniel Henrique Barboza, 2022/10/28
- [PULL 08/62] target/ppc: Move VMH[R]ADDSHS instruction to decodetree, Daniel Henrique Barboza, 2022/10/28
- [PULL 09/62] target/ppc: Move V(ADD|SUB)CUW to decodetree and use gvec, Daniel Henrique Barboza, 2022/10/28
- [PULL 15/62] target/ppc: Use gvec to decode XVCPSGN[SD]P, Daniel Henrique Barboza, 2022/10/28
- [PULL 18/62] target/ppc: Use gvec to decode XVTSTDC[DS]P,
Daniel Henrique Barboza <=
- [PULL 14/62] target/ppc: Use gvec to decode XV[N]ABS[DS]P/XVNEG[DS]P, Daniel Henrique Barboza, 2022/10/28
- [PULL 11/62] target/ppc: Move VPRTYB[WDQ] to decodetree and use gvec, Daniel Henrique Barboza, 2022/10/28
- [PULL 17/62] target/ppc: Moved XSTSTDC[QDS]P to decodetree, Daniel Henrique Barboza, 2022/10/28
- [PULL 12/62] target/ppc: Move VAVG[SU][BHW] to decodetree and use gvec, Daniel Henrique Barboza, 2022/10/28
- [PULL 10/62] target/ppc: Move VNEG[WD] to decodtree and use gvec, Daniel Henrique Barboza, 2022/10/28
- [PULL 22/62] target/ppc: prepare to split interrupt masking and delivery by excp_model, Daniel Henrique Barboza, 2022/10/28
- [PULL 13/62] target/ppc: Move VABSDU[BHW] to decodetree and use gvec, Daniel Henrique Barboza, 2022/10/28
- [PULL 20/62] target/ppc: always use ppc_set_irq to set env->pending_interrupts, Daniel Henrique Barboza, 2022/10/28
- [PULL 16/62] target/ppc: Moved XVTSTDC[DS]P to decodetree, Daniel Henrique Barboza, 2022/10/28
- [PULL 19/62] target/ppc: define PPC_INTERRUPT_* values directly, Daniel Henrique Barboza, 2022/10/28