[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[PATCH 11/13] target/i386: execute multiple REP/REPZ iterations without
From: |
Paolo Bonzini |
Subject: |
[PATCH 11/13] target/i386: execute multiple REP/REPZ iterations without leaving TB |
Date: |
Sun, 15 Dec 2024 10:06:10 +0100 |
Use a TCG loop so that it is not necessary to go through the setup steps
of REP and through the I/O check on every iteration. Interestingly, this
is not a particularly effective optimization on its own, though it avoids
the cost of correct RF emulation that was added in the previous patch.
The main benefit lies in allowing the hoisting of loop invariants outside
the loop, which will happen separately.
The loop exits when the low 16 bits of CX/ECX/RCX are zero (so generally
speaking the string operation runs in 65536 iteration batches) to give
the main loop an opportunity to pick up interrupts.
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
target/i386/tcg/translate.c | 55 +++++++++++++++++++++++++++++++++----
1 file changed, 49 insertions(+), 6 deletions(-)
diff --git a/target/i386/tcg/translate.c b/target/i386/tcg/translate.c
index 2ea8a418612..e0f9f7748bc 100644
--- a/target/i386/tcg/translate.c
+++ b/target/i386/tcg/translate.c
@@ -1331,13 +1331,28 @@ static void gen_outs(DisasContext *s, MemOp ot)
gen_bpt_io(s, s->tmp2_i32, ot);
}
+#define REP_MAX 65535
+
static void do_gen_rep(DisasContext *s, MemOp ot,
void (*fn)(DisasContext *s, MemOp ot),
bool is_repz_nz)
{
+ TCGLabel *last = gen_new_label();
+ TCGLabel *loop = gen_new_label();
TCGLabel *done = gen_new_label();
+
target_ulong cx_mask = MAKE_64BIT_MASK(0, 8 << s->aflag);
TCGv cx_next = tcg_temp_new();
+
+ /*
+ * Check if we must translate a single iteration only. Normally,
HF_RF_MASK
+ * would also limit translation blocks to one instruction, so that gen_eob
+ * can reset the flag; here however RF is set throughout the repetition, so
+ * we can plow through until CX/ECX/RCX is zero.
+ */
+ bool can_loop =
+ (!(tb_cflags(s->base.tb) & (CF_USE_ICOUNT | CF_SINGLE_STEP))
+ && !(s->flags & (HF_TF_MASK | HF_INHIBIT_IRQ_MASK)));
bool had_rf = s->flags & HF_RF_MASK;
/*
@@ -1362,19 +1377,29 @@ static void do_gen_rep(DisasContext *s, MemOp ot,
/* Any iteration at all? */
tcg_gen_brcondi_tl(TCG_COND_TSTEQ, cpu_regs[R_ECX], cx_mask, done);
- fn(s, ot);
-
- tcg_gen_subi_tl(cx_next, cpu_regs[R_ECX], 1);
-
/*
- * Write back cx_next to CX/ECX/RCX. There can be no carry, so zero
- * extend if needed but do not do expensive deposit operations.
+ * From now on we operate on the value of CX/ECX/RCX that will be written
+ * back, which is stored in cx_next. There can be no carry, so we can zero
+ * extend here if needed and not do any expensive deposit operations later.
*/
+ tcg_gen_subi_tl(cx_next, cpu_regs[R_ECX], 1);
#ifdef TARGET_X86_64
if (s->aflag == MO_32) {
tcg_gen_ext32u_tl(cx_next, cx_next);
+ cx_mask = ~0;
}
#endif
+
+ /*
+ * The last iteration is handled outside the loop, so that cx_next
+ * can never underflow.
+ */
+ if (can_loop) {
+ tcg_gen_brcondi_tl(TCG_COND_TSTEQ, cx_next, cx_mask, last);
+ }
+
+ gen_set_label(loop);
+ fn(s, ot);
tcg_gen_mov_tl(cpu_regs[R_ECX], cx_next);
gen_update_cc_op(s);
@@ -1384,6 +1409,12 @@ static void do_gen_rep(DisasContext *s, MemOp ot,
gen_jcc_noeob(s, (JCC_Z << 1) | (nz ^ 1), done);
}
+ if (can_loop) {
+ tcg_gen_subi_tl(cx_next, cpu_regs[R_ECX], 1);
+ tcg_gen_brcondi_tl(TCG_COND_TSTNE, cx_next, REP_MAX, loop);
+ tcg_gen_brcondi_tl(TCG_COND_TSTEQ, cx_next, cx_mask, last);
+ }
+
/*
* Traps or interrupts set RF_MASK if they happen after any iteration
* but the last. Set it here before giving the main loop a chance to
@@ -1396,6 +1427,18 @@ static void do_gen_rep(DisasContext *s, MemOp ot,
/* Go to the main loop but reenter the same instruction. */
gen_jmp_rel_csize(s, -cur_insn_len(s), 0);
+ if (can_loop) {
+ /*
+ * The last iteration needs no conditional jump, even if is_repz_nz,
+ * because the repeats are ending anyway.
+ */
+ gen_set_label(last);
+ set_cc_op(s, CC_OP_DYNAMIC);
+ fn(s, ot);
+ tcg_gen_mov_tl(cpu_regs[R_ECX], cx_next);
+ gen_update_cc_op(s);
+ }
+
/* CX/ECX/RCX is zero, or REPZ/REPNZ broke the repetition. */
gen_set_label(done);
set_cc_op(s, CC_OP_DYNAMIC);
--
2.47.1
[PATCH 12/13] target/i386: pull computation of string update value out of loop, Paolo Bonzini, 2024/12/15
[PATCH 13/13] target/i386: avoid using s->tmp0 for add to implicit registers, Paolo Bonzini, 2024/12/15