qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [PATCH 12/13] target/i386: pull computation of string update value o


From: Richard Henderson
Subject: Re: [PATCH 12/13] target/i386: pull computation of string update value out of loop
Date: Sun, 15 Dec 2024 09:15:30 -0600
User-agent: Mozilla Thunderbird

On 12/15/24 03:06, Paolo Bonzini wrote:
This is a common operation that is executed many times in rep
movs or rep stos loops.  It can improve performance by several
percentage points.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
  target/i386/tcg/translate.c | 54 ++++++++++++++++++-------------------
  1 file changed, 26 insertions(+), 28 deletions(-)

diff --git a/target/i386/tcg/translate.c b/target/i386/tcg/translate.c
index e0f9f7748bc..4b652cc23e1 100644
--- a/target/i386/tcg/translate.c
+++ b/target/i386/tcg/translate.c
@@ -831,16 +831,13 @@ static bool gen_check_io(DisasContext *s, MemOp ot, 
TCGv_i32 port,
  #endif
  }
-static void gen_movs(DisasContext *s, MemOp ot)
+static void gen_movs(DisasContext *s, MemOp ot, TCGv dshift)
  {
-    TCGv dshift;
-
      gen_string_movl_A0_ESI(s);
      gen_op_ld_v(s, ot, s->T0, s->A0);
      gen_string_movl_A0_EDI(s);
      gen_op_st_v(s, ot, s->T0, s->A0);
- dshift = gen_compute_Dshift(s, ot);
      gen_op_add_reg(s, s->aflag, R_ESI, dshift);
      gen_op_add_reg(s, s->aflag, R_EDI, dshift);
  }
@@ -1244,22 +1241,22 @@ static inline void gen_jcc(DisasContext *s, int b, 
TCGLabel *l1)
      }
  }
-static void gen_stos(DisasContext *s, MemOp ot)
+static void gen_stos(DisasContext *s, MemOp ot, TCGv dshift)
  {
      gen_string_movl_A0_EDI(s);
      gen_op_st_v(s, ot, s->T0, s->A0);
-    gen_op_add_reg(s, s->aflag, R_EDI, gen_compute_Dshift(s, ot));
+    gen_op_add_reg(s, s->aflag, R_EDI, dshift);
  }
-static void gen_lods(DisasContext *s, MemOp ot)
+static void gen_lods(DisasContext *s, MemOp ot, TCGv dshift)
  {
      gen_string_movl_A0_ESI(s);
      gen_op_ld_v(s, ot, s->T0, s->A0);
      gen_op_mov_reg_v(s, ot, R_EAX, s->T0);
-    gen_op_add_reg(s, s->aflag, R_ESI, gen_compute_Dshift(s, ot));
+    gen_op_add_reg(s, s->aflag, R_ESI, dshift);
  }
-static void gen_scas(DisasContext *s, MemOp ot)
+static void gen_scas(DisasContext *s, MemOp ot, TCGv dshift)
  {
      gen_string_movl_A0_EDI(s);
      gen_op_ld_v(s, ot, s->T1, s->A0);
@@ -1268,13 +1265,11 @@ static void gen_scas(DisasContext *s, MemOp ot)
      tcg_gen_sub_tl(cpu_cc_dst, s->T0, s->T1);
      set_cc_op(s, CC_OP_SUBB + ot);
- gen_op_add_reg(s, s->aflag, R_EDI, gen_compute_Dshift(s, ot));
+    gen_op_add_reg(s, s->aflag, R_EDI, dshift);
  }
-static void gen_cmps(DisasContext *s, MemOp ot)
+static void gen_cmps(DisasContext *s, MemOp ot, TCGv dshift)
  {
-    TCGv dshift;
-
      gen_string_movl_A0_EDI(s);
      gen_op_ld_v(s, ot, s->T1, s->A0);
      gen_string_movl_A0_ESI(s);
@@ -1284,7 +1279,6 @@ static void gen_cmps(DisasContext *s, MemOp ot)
      tcg_gen_sub_tl(cpu_cc_dst, s->T0, s->T1);
      set_cc_op(s, CC_OP_SUBB + ot);
- dshift = gen_compute_Dshift(s, ot);
      gen_op_add_reg(s, s->aflag, R_ESI, dshift);
      gen_op_add_reg(s, s->aflag, R_EDI, dshift);
  }
@@ -1303,7 +1297,7 @@ static void gen_bpt_io(DisasContext *s, TCGv_i32 t_port, 
int ot)
      }
  }
-static void gen_ins(DisasContext *s, MemOp ot)
+static void gen_ins(DisasContext *s, MemOp ot, TCGv dshift)
  {
      gen_string_movl_A0_EDI(s);
      /* Note: we must do this dummy write first to be restartable in
@@ -1314,11 +1308,11 @@ static void gen_ins(DisasContext *s, MemOp ot)
      tcg_gen_andi_i32(s->tmp2_i32, s->tmp2_i32, 0xffff);
      gen_helper_in_func(ot, s->T0, s->tmp2_i32);
      gen_op_st_v(s, ot, s->T0, s->A0);
-    gen_op_add_reg(s, s->aflag, R_EDI, gen_compute_Dshift(s, ot));
+    gen_op_add_reg(s, s->aflag, R_EDI, dshift);
      gen_bpt_io(s, s->tmp2_i32, ot);
  }
-static void gen_outs(DisasContext *s, MemOp ot)
+static void gen_outs(DisasContext *s, MemOp ot, TCGv dshift)
  {
      gen_string_movl_A0_ESI(s);
      gen_op_ld_v(s, ot, s->T0, s->A0);
@@ -1327,14 +1321,14 @@ static void gen_outs(DisasContext *s, MemOp ot)
      tcg_gen_andi_i32(s->tmp2_i32, s->tmp2_i32, 0xffff);
      tcg_gen_trunc_tl_i32(s->tmp3_i32, s->T0);
      gen_helper_out_func(ot, s->tmp2_i32, s->tmp3_i32);
-    gen_op_add_reg(s, s->aflag, R_ESI, gen_compute_Dshift(s, ot));
+    gen_op_add_reg(s, s->aflag, R_ESI, dshift);
      gen_bpt_io(s, s->tmp2_i32, ot);
  }
#define REP_MAX 65535 -static void do_gen_rep(DisasContext *s, MemOp ot,
-                       void (*fn)(DisasContext *s, MemOp ot),
+static void do_gen_rep(DisasContext *s, MemOp ot, TCGv dshift,
+                       void (*fn)(DisasContext *s, MemOp ot, TCGv dshift),
                         bool is_repz_nz)
  {
      TCGLabel *last = gen_new_label();
@@ -1399,7 +1393,7 @@ static void do_gen_rep(DisasContext *s, MemOp ot,
      }
gen_set_label(loop);
-    fn(s, ot);
+    fn(s, ot, dshift);
      tcg_gen_mov_tl(cpu_regs[R_ECX], cx_next);
      gen_update_cc_op(s);
@@ -1434,7 +1428,7 @@ static void do_gen_rep(DisasContext *s, MemOp ot,
           */
          gen_set_label(last);
          set_cc_op(s, CC_OP_DYNAMIC);
-        fn(s, ot);
+        fn(s, ot, dshift);
          tcg_gen_mov_tl(cpu_regs[R_ECX], cx_next);
          gen_update_cc_op(s);
      }
@@ -1449,23 +1443,27 @@ static void do_gen_rep(DisasContext *s, MemOp ot,
  }
static void gen_repz(DisasContext *s, MemOp ot,
-                     void (*fn)(DisasContext *s, MemOp ot))
+                     void (*fn)(DisasContext *s, MemOp ot, TCGv dshift))
{
+    TCGv dshift = gen_compute_Dshift(s, ot);
+
      if (s->prefix & (PREFIX_REPZ | PREFIX_REPNZ)) {
-        do_gen_rep(s, ot, fn, false);
+        do_gen_rep(s, ot, dshift, fn, false);
      } else {
-        fn(s, ot);
+        fn(s, ot, dshift);
      }
  }
static void gen_repz_nz(DisasContext *s, MemOp ot,
-                        void (*fn)(DisasContext *s, MemOp ot))
+                        void (*fn)(DisasContext *s, MemOp ot, TCGv dshift))
  {
+    TCGv dshift = gen_compute_Dshift(s, ot);
+
      if (s->prefix & (PREFIX_REPZ | PREFIX_REPNZ)) {
-        do_gen_rep(s, ot, fn, true);
+        do_gen_rep(s, ot, dshift, fn, true);
      } else {
-        fn(s, ot);
+        fn(s, ot, dshift);
      }
  }

Still not a fan of the repetition.  I think this could easily be

static void do_gen_rep(DisasContext *s, MemOp ot,
                       void (*fn)(DisasContext *s, MemOp ot, TCGv dshift),
                       bool is_repz_nz)
{
    TCGv dshift = gen_compute_Dshift(s, ot);
    TCGLabel *last, *loop, *done;
    target_ulong cx_mask;
    TCGv cx_next;
    bool can_loop, had_rf;

    if (!(s->prefix & (REPZ | REPNZ))) {
        fn(s, ot, dshift);
        return;
    }

    done = gen_new_label();
    can_loop = ...
    everything else.
}

Or, if you prefer, use two functions gen_repz_nz{0,1}.


r~



reply via email to

[Prev in Thread] Current Thread [Next in Thread]