qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [PATCH v2 29/51] tcg/optimize: Use fold_masks_zs in fold_qemu_ld


From: Pierrick Bouvier
Subject: Re: [PATCH v2 29/51] tcg/optimize: Use fold_masks_zs in fold_qemu_ld
Date: Fri, 20 Dec 2024 12:18:20 -0800
User-agent: Mozilla Thunderbird

On 12/19/24 20:10, Richard Henderson wrote:
Avoid the use of the OptContext slots.

Be careful not to call fold_masks_zs when the memory operation
is wide enough to require multiple outputs, so split into two
functions: fold_qemu_ld_1reg and fold_qemu_ld_2reg.

Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
  tcg/optimize.c | 28 ++++++++++++++++++++++------
  1 file changed, 22 insertions(+), 6 deletions(-)

diff --git a/tcg/optimize.c b/tcg/optimize.c
index da9c8c4669..b01929fccf 100644
--- a/tcg/optimize.c
+++ b/tcg/optimize.c
@@ -2127,24 +2127,33 @@ static bool fold_orc(OptContext *ctx, TCGOp *op)
      return fold_masks_s(ctx, op, s_mask);
  }
-static bool fold_qemu_ld(OptContext *ctx, TCGOp *op)
+static bool fold_qemu_ld_1reg(OptContext *ctx, TCGOp *op)
  {
      const TCGOpDef *def = &tcg_op_defs[op->opc];
      MemOpIdx oi = op->args[def->nb_oargs + def->nb_iargs];
      MemOp mop = get_memop(oi);
      int width = 8 * memop_size(mop);
+    uint64_t z_mask = -1, s_mask = 0;
if (width < 64) {
-        ctx->s_mask = MAKE_64BIT_MASK(width, 64 - width);
+        s_mask = MAKE_64BIT_MASK(width, 64 - width);
          if (!(mop & MO_SIGN)) {
-            ctx->z_mask = MAKE_64BIT_MASK(0, width);
-            ctx->s_mask <<= 1;
+            z_mask = MAKE_64BIT_MASK(0, width);
+            s_mask <<= 1;
          }
      }
/* Opcodes that touch guest memory stop the mb optimization. */
      ctx->prev_mb = NULL;
-    return false;
+
+    return fold_masks_zs(ctx, op, z_mask, s_mask);
+}
+
+static bool fold_qemu_ld_2reg(OptContext *ctx, TCGOp *op)
+{
+    /* Opcodes that touch guest memory stop the mb optimization.  */
+    ctx->prev_mb = NULL;
+    return finish_folding(ctx, op);
  }
static bool fold_qemu_st(OptContext *ctx, TCGOp *op)
@@ -3033,11 +3042,18 @@ void tcg_optimize(TCGContext *s)
              break;
          case INDEX_op_qemu_ld_a32_i32:
          case INDEX_op_qemu_ld_a64_i32:
+            done = fold_qemu_ld_1reg(&ctx, op);
+            break;
          case INDEX_op_qemu_ld_a32_i64:
          case INDEX_op_qemu_ld_a64_i64:
+            if (TCG_TARGET_REG_BITS == 64) {
+                done = fold_qemu_ld_1reg(&ctx, op);
+                break;
+            }
+            QEMU_FALLTHROUGH;
          case INDEX_op_qemu_ld_a32_i128:
          case INDEX_op_qemu_ld_a64_i128:
-            done = fold_qemu_ld(&ctx, op);
+            done = fold_qemu_ld_2reg(&ctx, op);
              break;
          case INDEX_op_qemu_st8_a32_i32:
          case INDEX_op_qemu_st8_a64_i32:

Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>




reply via email to

[Prev in Thread] Current Thread [Next in Thread]