[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[PATCH v3 01/29] target/arm: Replace tcg_gen_dupi_vec with constants in
From: |
Richard Henderson |
Subject: |
[PATCH v3 01/29] target/arm: Replace tcg_gen_dupi_vec with constants in gengvec.c |
Date: |
Wed, 11 Sep 2024 19:40:46 -0700 |
Instead of copying a constant into a temporary with dupi,
use a vector constant directly.
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
target/arm/tcg/gengvec.c | 43 ++++++++++++++++++----------------------
1 file changed, 19 insertions(+), 24 deletions(-)
diff --git a/target/arm/tcg/gengvec.c b/target/arm/tcg/gengvec.c
index 56a1dc1f75..726a1383ae 100644
--- a/target/arm/tcg/gengvec.c
+++ b/target/arm/tcg/gengvec.c
@@ -297,10 +297,9 @@ void gen_srshr32_i32(TCGv_i32 d, TCGv_i32 a, int32_t sh)
static void gen_srshr_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
{
TCGv_vec t = tcg_temp_new_vec_matching(d);
- TCGv_vec ones = tcg_temp_new_vec_matching(d);
+ TCGv_vec ones = tcg_constant_vec_matching(d, vece, 1);
tcg_gen_shri_vec(vece, t, a, sh - 1);
- tcg_gen_dupi_vec(vece, ones, 1);
tcg_gen_and_vec(vece, t, t, ones);
tcg_gen_sari_vec(vece, d, a, sh);
tcg_gen_add_vec(vece, d, d, t);
@@ -492,10 +491,9 @@ void gen_urshr64_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
static void gen_urshr_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t shift)
{
TCGv_vec t = tcg_temp_new_vec_matching(d);
- TCGv_vec ones = tcg_temp_new_vec_matching(d);
+ TCGv_vec ones = tcg_constant_vec_matching(d, vece, 1);
tcg_gen_shri_vec(vece, t, a, shift - 1);
- tcg_gen_dupi_vec(vece, ones, 1);
tcg_gen_and_vec(vece, t, t, ones);
tcg_gen_shri_vec(vece, d, a, shift);
tcg_gen_add_vec(vece, d, d, t);
@@ -685,9 +683,9 @@ static void gen_shr64_ins_i64(TCGv_i64 d, TCGv_i64 a,
int64_t shift)
static void gen_shr_ins_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
{
TCGv_vec t = tcg_temp_new_vec_matching(d);
- TCGv_vec m = tcg_temp_new_vec_matching(d);
+ int64_t mi = MAKE_64BIT_MASK((8 << vece) - sh, sh);
+ TCGv_vec m = tcg_constant_vec_matching(d, vece, mi);
- tcg_gen_dupi_vec(vece, m, MAKE_64BIT_MASK((8 << vece) - sh, sh));
tcg_gen_shri_vec(vece, t, a, sh);
tcg_gen_and_vec(vece, d, d, m);
tcg_gen_or_vec(vece, d, d, t);
@@ -773,10 +771,9 @@ static void gen_shl64_ins_i64(TCGv_i64 d, TCGv_i64 a,
int64_t shift)
static void gen_shl_ins_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
{
TCGv_vec t = tcg_temp_new_vec_matching(d);
- TCGv_vec m = tcg_temp_new_vec_matching(d);
+ TCGv_vec m = tcg_constant_vec_matching(d, vece, MAKE_64BIT_MASK(0, sh));
tcg_gen_shli_vec(vece, t, a, sh);
- tcg_gen_dupi_vec(vece, m, MAKE_64BIT_MASK(0, sh));
tcg_gen_and_vec(vece, d, d, m);
tcg_gen_or_vec(vece, d, d, t);
}
@@ -1044,14 +1041,13 @@ static void gen_ushl_vec(unsigned vece, TCGv_vec dst,
TCGv_vec rval = tcg_temp_new_vec_matching(dst);
TCGv_vec lsh = tcg_temp_new_vec_matching(dst);
TCGv_vec rsh = tcg_temp_new_vec_matching(dst);
- TCGv_vec msk, max;
+ TCGv_vec max;
tcg_gen_neg_vec(vece, rsh, shift);
if (vece == MO_8) {
tcg_gen_mov_vec(lsh, shift);
} else {
- msk = tcg_temp_new_vec_matching(dst);
- tcg_gen_dupi_vec(vece, msk, 0xff);
+ TCGv_vec msk = tcg_constant_vec_matching(dst, vece, 0xff);
tcg_gen_and_vec(vece, lsh, shift, msk);
tcg_gen_and_vec(vece, rsh, rsh, msk);
}
@@ -1064,9 +1060,6 @@ static void gen_ushl_vec(unsigned vece, TCGv_vec dst,
tcg_gen_shlv_vec(vece, lval, src, lsh);
tcg_gen_shrv_vec(vece, rval, src, rsh);
- max = tcg_temp_new_vec_matching(dst);
- tcg_gen_dupi_vec(vece, max, 8 << vece);
-
/*
* The choice of LT (signed) and GEU (unsigned) are biased toward
* the instructions of the x86_64 host. For MO_8, the whole byte
@@ -1074,6 +1067,7 @@ static void gen_ushl_vec(unsigned vece, TCGv_vec dst,
* have already masked to a byte and so a signed compare works.
* Other tcg hosts have a full set of comparisons and do not care.
*/
+ max = tcg_constant_vec_matching(dst, vece, 8 << vece);
if (vece == MO_8) {
tcg_gen_cmp_vec(TCG_COND_GEU, vece, lsh, lsh, max);
tcg_gen_cmp_vec(TCG_COND_GEU, vece, rsh, rsh, max);
@@ -1170,6 +1164,7 @@ static void gen_sshl_vec(unsigned vece, TCGv_vec dst,
TCGv_vec lsh = tcg_temp_new_vec_matching(dst);
TCGv_vec rsh = tcg_temp_new_vec_matching(dst);
TCGv_vec tmp = tcg_temp_new_vec_matching(dst);
+ TCGv_vec max, zero;
/*
* Rely on the TCG guarantee that out of range shifts produce
@@ -1180,15 +1175,15 @@ static void gen_sshl_vec(unsigned vece, TCGv_vec dst,
if (vece == MO_8) {
tcg_gen_mov_vec(lsh, shift);
} else {
- tcg_gen_dupi_vec(vece, tmp, 0xff);
- tcg_gen_and_vec(vece, lsh, shift, tmp);
- tcg_gen_and_vec(vece, rsh, rsh, tmp);
+ TCGv_vec msk = tcg_constant_vec_matching(dst, vece, 0xff);
+ tcg_gen_and_vec(vece, lsh, shift, msk);
+ tcg_gen_and_vec(vece, rsh, rsh, msk);
}
/* Bound rsh so out of bound right shift gets -1. */
- tcg_gen_dupi_vec(vece, tmp, (8 << vece) - 1);
- tcg_gen_umin_vec(vece, rsh, rsh, tmp);
- tcg_gen_cmp_vec(TCG_COND_GT, vece, tmp, lsh, tmp);
+ max = tcg_constant_vec_matching(dst, vece, (8 << vece) - 1);
+ tcg_gen_umin_vec(vece, rsh, rsh, max);
+ tcg_gen_cmp_vec(TCG_COND_GT, vece, tmp, lsh, max);
tcg_gen_shlv_vec(vece, lval, src, lsh);
tcg_gen_sarv_vec(vece, rval, src, rsh);
@@ -1197,12 +1192,12 @@ static void gen_sshl_vec(unsigned vece, TCGv_vec dst,
tcg_gen_andc_vec(vece, lval, lval, tmp);
/* Select between left and right shift. */
+ zero = tcg_constant_vec_matching(dst, vece, 0);
if (vece == MO_8) {
- tcg_gen_dupi_vec(vece, tmp, 0);
- tcg_gen_cmpsel_vec(TCG_COND_LT, vece, dst, lsh, tmp, rval, lval);
+ tcg_gen_cmpsel_vec(TCG_COND_LT, vece, dst, lsh, zero, rval, lval);
} else {
- tcg_gen_dupi_vec(vece, tmp, 0x80);
- tcg_gen_cmpsel_vec(TCG_COND_LT, vece, dst, lsh, tmp, lval, rval);
+ TCGv_vec sgn = tcg_constant_vec_matching(dst, vece, 0x80);
+ tcg_gen_cmpsel_vec(TCG_COND_LT, vece, dst, lsh, sgn, lval, rval);
}
}
--
2.43.0
- [PATCH v3 00/29] target/arm: AdvSIMD decodetree conversion, part 4, Richard Henderson, 2024/09/11
- [PATCH v3 01/29] target/arm: Replace tcg_gen_dupi_vec with constants in gengvec.c,
Richard Henderson <=
- [PATCH v3 02/29] target/arm: Replace tcg_gen_dupi_vec with constants in translate-sve.c, Richard Henderson, 2024/09/11
- [PATCH v3 04/29] target/arm: Use cmpsel in gen_sshl_vec, Richard Henderson, 2024/09/11
- [PATCH v3 03/29] target/arm: Use cmpsel in gen_ushl_vec, Richard Henderson, 2024/09/11
- [PATCH v3 06/29] target/arm: Convert EXT to decodetree, Richard Henderson, 2024/09/11
- [PATCH v3 05/29] target/arm: Use tcg_gen_extract2_i64 for EXT, Richard Henderson, 2024/09/11
- [PATCH v3 07/29] target/arm: Convert TBL, TBX to decodetree, Richard Henderson, 2024/09/11
- [PATCH v3 09/29] target/arm: Simplify do_reduction_op, Richard Henderson, 2024/09/11
- [PATCH v3 11/29] target/arm: Convert FMAXNMV, FMINNMV, FMAXV, FMINV to decodetree, Richard Henderson, 2024/09/11
- [PATCH v3 08/29] target/arm: Convert UZP, TRN, ZIP to decodetree, Richard Henderson, 2024/09/11
- [PATCH v3 10/29] target/arm: Convert ADDV, *ADDLV, *MAXV, *MINV to decodetree, Richard Henderson, 2024/09/11