[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[PULL 08/72] tcg/optimize: Change representation of s_mask
From: |
Richard Henderson |
Subject: |
[PULL 08/72] tcg/optimize: Change representation of s_mask |
Date: |
Tue, 24 Dec 2024 12:04:17 -0800 |
Change the representation from sign bit repetitions to all bits equal
to the sign bit, including the sign bit itself.
The previous format has a problem in that it is difficult to recreate
a valid sign mask after a shift operation: the "repetitions" part of
the previous format meant that applying the same shift as for the value
lead to an off-by-one value.
The new format, including the sign bit itself, means that the sign mask
can be manipulated in exactly the same way as the value, canonicalization
is easier.
Canonicalize the s_mask in fold_masks_zs, rather than requiring callers
to do so. Treat 0 as a non-canonical but typeless input for no sign
information, which will be reset as appropriate for the data type.
We can easily fold in the data from z_mask while canonicalizing.
Temporarily disable optimizations using s_mask while each operation is
converted to use fold_masks_zs and to the new form.
Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
tcg/optimize.c | 64 ++++++++++++--------------------------------------
1 file changed, 15 insertions(+), 49 deletions(-)
diff --git a/tcg/optimize.c b/tcg/optimize.c
index d8f6542c4f..fbc0dc5588 100644
--- a/tcg/optimize.c
+++ b/tcg/optimize.c
@@ -52,7 +52,7 @@ typedef struct TempOptInfo {
QSIMPLEQ_HEAD(, MemCopyInfo) mem_copy;
uint64_t val;
uint64_t z_mask; /* mask bit is 0 if and only if value bit is 0 */
- uint64_t s_mask; /* a left-aligned mask of clrsb(value) bits. */
+ uint64_t s_mask; /* mask bit is 1 if value bit matches msb */
} TempOptInfo;
typedef struct OptContext {
@@ -65,49 +65,10 @@ typedef struct OptContext {
/* In flight values from optimization. */
uint64_t z_mask; /* mask bit is 0 iff value bit is 0 */
- uint64_t s_mask; /* mask of clrsb(value) bits */
+ uint64_t s_mask; /* mask bit is 1 if value bit matches msb */
TCGType type;
} OptContext;
-/* Calculate the smask for a specific value. */
-static uint64_t smask_from_value(uint64_t value)
-{
- int rep = clrsb64(value);
- return ~(~0ull >> rep);
-}
-
-/*
- * Calculate the smask for a given set of known-zeros.
- * If there are lots of zeros on the left, we can consider the remainder
- * an unsigned field, and thus the corresponding signed field is one bit
- * larger.
- */
-static uint64_t smask_from_zmask(uint64_t zmask)
-{
- /*
- * Only the 0 bits are significant for zmask, thus the msb itself
- * must be zero, else we have no sign information.
- */
- int rep = clz64(zmask);
- if (rep == 0) {
- return 0;
- }
- rep -= 1;
- return ~(~0ull >> rep);
-}
-
-/*
- * Recreate a properly left-aligned smask after manipulation.
- * Some bit-shuffling, particularly shifts and rotates, may
- * retain sign bits on the left, but may scatter disconnected
- * sign bits on the right. Retain only what remains to the left.
- */
-static uint64_t smask_from_smask(int64_t smask)
-{
- /* Only the 1 bits are significant for smask */
- return smask_from_zmask(~smask);
-}
-
static inline TempOptInfo *ts_info(TCGTemp *ts)
{
return ts->state_ptr;
@@ -173,7 +134,7 @@ static void init_ts_info(OptContext *ctx, TCGTemp *ts)
ti->is_const = true;
ti->val = ts->val;
ti->z_mask = ts->val;
- ti->s_mask = smask_from_value(ts->val);
+ ti->s_mask = INT64_MIN >> clrsb64(ts->val);
} else {
ti->is_const = false;
ti->z_mask = -1;
@@ -992,7 +953,6 @@ static void finish_folding(OptContext *ctx, TCGOp *op)
*/
if (i == 0) {
ts_info(ts)->z_mask = ctx->z_mask;
- ts_info(ts)->s_mask = ctx->s_mask;
}
}
}
@@ -1051,11 +1011,12 @@ static bool fold_const2_commutative(OptContext *ctx,
TCGOp *op)
* The passed s_mask may be augmented by z_mask.
*/
static bool fold_masks_zs(OptContext *ctx, TCGOp *op,
- uint64_t z_mask, uint64_t s_mask)
+ uint64_t z_mask, int64_t s_mask)
{
const TCGOpDef *def = &tcg_op_defs[op->opc];
TCGTemp *ts;
TempOptInfo *ti;
+ int rep;
/* Only single-output opcodes are supported here. */
tcg_debug_assert(def->nb_oargs == 1);
@@ -1069,7 +1030,7 @@ static bool fold_masks_zs(OptContext *ctx, TCGOp *op,
*/
if (ctx->type == TCG_TYPE_I32) {
z_mask = (int32_t)z_mask;
- s_mask |= MAKE_64BIT_MASK(32, 32);
+ s_mask |= INT32_MIN;
}
if (z_mask == 0) {
@@ -1081,7 +1042,13 @@ static bool fold_masks_zs(OptContext *ctx, TCGOp *op,
ti = ts_info(ts);
ti->z_mask = z_mask;
- ti->s_mask = s_mask | smask_from_zmask(z_mask);
+
+ /* Canonicalize s_mask and incorporate data from z_mask. */
+ rep = clz64(~s_mask);
+ rep = MAX(rep, clz64(z_mask));
+ rep = MAX(rep - 1, 0);
+ ti->s_mask = INT64_MIN >> rep;
+
return true;
}
@@ -1807,7 +1774,7 @@ static bool fold_exts(OptContext *ctx, TCGOp *op)
ctx->z_mask = z_mask;
ctx->s_mask = s_mask;
- if (!type_change && fold_affected_mask(ctx, op, s_mask & ~s_mask_old)) {
+ if (0 && !type_change && fold_affected_mask(ctx, op, s_mask &
~s_mask_old)) {
return true;
}
@@ -2509,7 +2476,7 @@ static bool fold_sextract(OptContext *ctx, TCGOp *op)
s_mask |= MAKE_64BIT_MASK(len, 64 - len);
ctx->s_mask = s_mask;
- if (pos == 0 && fold_affected_mask(ctx, op, s_mask & ~s_mask_old)) {
+ if (0 && pos == 0 && fold_affected_mask(ctx, op, s_mask & ~s_mask_old)) {
return true;
}
@@ -2535,7 +2502,6 @@ static bool fold_shift(OptContext *ctx, TCGOp *op)
ctx->z_mask = do_constant_folding(op->opc, ctx->type, z_mask, sh);
s_mask = do_constant_folding(op->opc, ctx->type, s_mask, sh);
- ctx->s_mask = smask_from_smask(s_mask);
return fold_masks(ctx, op);
}
--
2.43.0
- [PULL 07/72] tcg/optimize: Augment s_mask from z_mask in fold_masks_zs, (continued)
- [PULL 07/72] tcg/optimize: Augment s_mask from z_mask in fold_masks_zs, Richard Henderson, 2024/12/24
- [PULL 09/72] tcg/optimize: Use finish_folding in fold_add, fold_add_vec, fold_addsub2, Richard Henderson, 2024/12/24
- [PULL 10/72] tcg/optimize: Introduce const value accessors for TempOptInfo, Richard Henderson, 2024/12/24
- [PULL 06/72] tcg/optimize: Split out fold_masks_zs, Richard Henderson, 2024/12/24
- [PULL 12/72] tcg/optimize: Use fold_masks_zs in fold_andc, Richard Henderson, 2024/12/24
- [PULL 14/72] tcg/optimize: Use fold_masks_zs in fold_count_zeros, Richard Henderson, 2024/12/24
- [PULL 16/72] tcg/optimize: Use fold_and and fold_masks_z in fold_deposit, Richard Henderson, 2024/12/24
- [PULL 19/72] tcg/optimize: Use finish_folding in fold_dup, fold_dup2, Richard Henderson, 2024/12/24
- [PULL 25/72] tcg/optimize: Use fold_masks_zs in fold_movcond, Richard Henderson, 2024/12/24
- [PULL 03/72] tcg/optimize: Split out finish_bb, finish_ebb, Richard Henderson, 2024/12/24
- [PULL 08/72] tcg/optimize: Change representation of s_mask,
Richard Henderson <=
- [PULL 05/72] tcg/optimize: Copy mask writeback to fold_masks, Richard Henderson, 2024/12/24
- [PULL 13/72] tcg/optimize: Use fold_masks_zs in fold_bswap, Richard Henderson, 2024/12/24
- [PULL 15/72] tcg/optimize: Use fold_masks_z in fold_ctpop, Richard Henderson, 2024/12/24
- [PULL 17/72] tcg/optimize: Compute sign mask in fold_deposit, Richard Henderson, 2024/12/24
- [PULL 18/72] tcg/optimize: Use finish_folding in fold_divide, Richard Henderson, 2024/12/24
- [PULL 21/72] tcg/optimize: Use fold_masks_z in fold_extract, Richard Henderson, 2024/12/24
- [PULL 24/72] tcg/optimize: Use fold_masks_z in fold_extu, Richard Henderson, 2024/12/24
- [PULL 26/72] tcg/optimize: Use finish_folding in fold_mul*, Richard Henderson, 2024/12/24
- [PULL 29/72] tcg/optimize: Use fold_masks_s in fold_nor, Richard Henderson, 2024/12/24
- [PULL 30/72] tcg/optimize: Use fold_masks_s in fold_not, Richard Henderson, 2024/12/24