[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[PATCH v3 02/13] tcg/s390x: Remove TCG_REG_TB
From: |
Richard Henderson |
Subject: |
[PATCH v3 02/13] tcg/s390x: Remove TCG_REG_TB |
Date: |
Thu, 1 Dec 2022 22:51:49 -0800 |
This reverts 829e1376d940 ("tcg/s390: Introduce TCG_REG_TB"), and
several follow-up patches. The primary motivation is to reduce the
less-tested code paths, pre-z10. Secondarily, this allows the
unconditional use of TCG_TARGET_HAS_direct_jump, which might be more
important for performance than any slight increase in code size.
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
tcg/s390x/tcg-target.h | 2 +-
tcg/s390x/tcg-target.c.inc | 176 +++++--------------------------------
2 files changed, 23 insertions(+), 155 deletions(-)
diff --git a/tcg/s390x/tcg-target.h b/tcg/s390x/tcg-target.h
index 22d70d431b..645f522058 100644
--- a/tcg/s390x/tcg-target.h
+++ b/tcg/s390x/tcg-target.h
@@ -103,7 +103,7 @@ extern uint64_t s390_facilities[3];
#define TCG_TARGET_HAS_mulsh_i32 0
#define TCG_TARGET_HAS_extrl_i64_i32 0
#define TCG_TARGET_HAS_extrh_i64_i32 0
-#define TCG_TARGET_HAS_direct_jump HAVE_FACILITY(GEN_INST_EXT)
+#define TCG_TARGET_HAS_direct_jump 1
#define TCG_TARGET_HAS_qemu_st8_i32 0
#define TCG_TARGET_HAS_div2_i64 1
diff --git a/tcg/s390x/tcg-target.c.inc b/tcg/s390x/tcg-target.c.inc
index cb00bb6999..8a4bec0a28 100644
--- a/tcg/s390x/tcg-target.c.inc
+++ b/tcg/s390x/tcg-target.c.inc
@@ -65,12 +65,6 @@
/* A scratch register that may be be used throughout the backend. */
#define TCG_TMP0 TCG_REG_R1
-/* A scratch register that holds a pointer to the beginning of the TB.
- We don't need this when we have pc-relative loads with the general
- instructions extension facility. */
-#define TCG_REG_TB TCG_REG_R12
-#define USE_REG_TB (!HAVE_FACILITY(GEN_INST_EXT))
-
#ifndef CONFIG_SOFTMMU
#define TCG_GUEST_BASE_REG TCG_REG_R13
#endif
@@ -813,8 +807,8 @@ static bool maybe_out_small_movi(TCGContext *s, TCGType
type,
}
/* load a register with an immediate value */
-static void tcg_out_movi_int(TCGContext *s, TCGType type, TCGReg ret,
- tcg_target_long sval, bool in_prologue)
+static void tcg_out_movi(TCGContext *s, TCGType type,
+ TCGReg ret, tcg_target_long sval)
{
tcg_target_ulong uval;
@@ -853,14 +847,6 @@ static void tcg_out_movi_int(TCGContext *s, TCGType type,
TCGReg ret,
tcg_out_insn(s, RIL, LARL, ret, off);
return;
}
- } else if (USE_REG_TB && !in_prologue) {
- ptrdiff_t off = tcg_tbrel_diff(s, (void *)sval);
- if (off == sextract64(off, 0, 20)) {
- /* This is certain to be an address within TB, and therefore
- OFF will be negative; don't try RX_LA. */
- tcg_out_insn(s, RXY, LAY, ret, TCG_REG_TB, TCG_REG_NONE, off);
- return;
- }
}
/* A 32-bit unsigned value can be loaded in 2 insns. And given
@@ -876,10 +862,6 @@ static void tcg_out_movi_int(TCGContext *s, TCGType type,
TCGReg ret,
if (HAVE_FACILITY(GEN_INST_EXT)) {
tcg_out_insn(s, RIL, LGRL, ret, 0);
new_pool_label(s, sval, R_390_PC32DBL, s->code_ptr - 2, 2);
- } else if (USE_REG_TB && !in_prologue) {
- tcg_out_insn(s, RXY, LG, ret, TCG_REG_TB, TCG_REG_NONE, 0);
- new_pool_label(s, sval, R_390_20, s->code_ptr - 2,
- tcg_tbrel_diff(s, NULL));
} else {
TCGReg base = ret ? ret : TCG_TMP0;
tcg_out_insn(s, RIL, LARL, base, 0);
@@ -888,12 +870,6 @@ static void tcg_out_movi_int(TCGContext *s, TCGType type,
TCGReg ret,
}
}
-static void tcg_out_movi(TCGContext *s, TCGType type,
- TCGReg ret, tcg_target_long sval)
-{
- tcg_out_movi_int(s, type, ret, sval, false);
-}
-
/* Emit a load/store type instruction. Inputs are:
DATA: The register to be loaded or stored.
BASE+OFS: The effective address.
@@ -1020,35 +996,6 @@ static inline bool tcg_out_sti(TCGContext *s, TCGType
type, TCGArg val,
return false;
}
-/* load data from an absolute host address */
-static void tcg_out_ld_abs(TCGContext *s, TCGType type,
- TCGReg dest, const void *abs)
-{
- intptr_t addr = (intptr_t)abs;
-
- if (HAVE_FACILITY(GEN_INST_EXT) && !(addr & 1)) {
- ptrdiff_t disp = tcg_pcrel_diff(s, abs) >> 1;
- if (disp == (int32_t)disp) {
- if (type == TCG_TYPE_I32) {
- tcg_out_insn(s, RIL, LRL, dest, disp);
- } else {
- tcg_out_insn(s, RIL, LGRL, dest, disp);
- }
- return;
- }
- }
- if (USE_REG_TB) {
- ptrdiff_t disp = tcg_tbrel_diff(s, abs);
- if (disp == sextract64(disp, 0, 20)) {
- tcg_out_ld(s, type, dest, TCG_REG_TB, disp);
- return;
- }
- }
-
- tcg_out_movi(s, TCG_TYPE_PTR, dest, addr & ~0xffff);
- tcg_out_ld(s, type, dest, dest, addr & 0xffff);
-}
-
static inline void tcg_out_risbg(TCGContext *s, TCGReg dest, TCGReg src,
int msb, int lsb, int ofs, int z)
{
@@ -1243,17 +1190,7 @@ static void tgen_andi(TCGContext *s, TCGType type,
TCGReg dest, uint64_t val)
return;
}
- /* Use the constant pool if USE_REG_TB, but not for small constants. */
- if (USE_REG_TB) {
- if (!maybe_out_small_movi(s, type, TCG_TMP0, val)) {
- tcg_out_insn(s, RXY, NG, dest, TCG_REG_TB, TCG_REG_NONE, 0);
- new_pool_label(s, val & valid, R_390_20, s->code_ptr - 2,
- tcg_tbrel_diff(s, NULL));
- return;
- }
- } else {
- tcg_out_movi(s, type, TCG_TMP0, val);
- }
+ tcg_out_movi(s, type, TCG_TMP0, val);
if (type == TCG_TYPE_I32) {
tcg_out_insn(s, RR, NR, dest, TCG_TMP0);
} else {
@@ -1297,24 +1234,11 @@ static void tgen_ori(TCGContext *s, TCGType type,
TCGReg dest, uint64_t val)
}
}
- /* Use the constant pool if USE_REG_TB, but not for small constants. */
- if (maybe_out_small_movi(s, type, TCG_TMP0, val)) {
- if (type == TCG_TYPE_I32) {
- tcg_out_insn(s, RR, OR, dest, TCG_TMP0);
- } else {
- tcg_out_insn(s, RRE, OGR, dest, TCG_TMP0);
- }
- } else if (USE_REG_TB) {
- tcg_out_insn(s, RXY, OG, dest, TCG_REG_TB, TCG_REG_NONE, 0);
- new_pool_label(s, val, R_390_20, s->code_ptr - 2,
- tcg_tbrel_diff(s, NULL));
+ tcg_out_movi(s, type, TCG_TMP0, val);
+ if (type == TCG_TYPE_I32) {
+ tcg_out_insn(s, RR, OR, dest, TCG_TMP0);
} else {
- /* Perform the OR via sequential modifications to the high and
- low parts. Do this via recursion to handle 16-bit vs 32-bit
- masks in each half. */
- tcg_debug_assert(HAVE_FACILITY(EXT_IMM));
- tgen_ori(s, type, dest, val & 0x00000000ffffffffull);
- tgen_ori(s, type, dest, val & 0xffffffff00000000ull);
+ tcg_out_insn(s, RRE, OGR, dest, TCG_TMP0);
}
}
@@ -1332,26 +1256,11 @@ static void tgen_xori(TCGContext *s, TCGType type,
TCGReg dest, uint64_t val)
}
}
- /* Use the constant pool if USE_REG_TB, but not for small constants. */
- if (maybe_out_small_movi(s, type, TCG_TMP0, val)) {
- if (type == TCG_TYPE_I32) {
- tcg_out_insn(s, RR, XR, dest, TCG_TMP0);
- } else {
- tcg_out_insn(s, RRE, XGR, dest, TCG_TMP0);
- }
- } else if (USE_REG_TB) {
- tcg_out_insn(s, RXY, XG, dest, TCG_REG_TB, TCG_REG_NONE, 0);
- new_pool_label(s, val, R_390_20, s->code_ptr - 2,
- tcg_tbrel_diff(s, NULL));
+ tcg_out_movi(s, type, TCG_TMP0, val);
+ if (type == TCG_TYPE_I32) {
+ tcg_out_insn(s, RR, XR, dest, TCG_TMP0);
} else {
- /* Perform the xor by parts. */
- tcg_debug_assert(HAVE_FACILITY(EXT_IMM));
- if (val & 0xffffffff) {
- tcg_out_insn(s, RIL, XILF, dest, val);
- }
- if (val > 0xffffffff) {
- tcg_out_insn(s, RIL, XIHF, dest, val >> 32);
- }
+ tcg_out_insn(s, RRE, XGR, dest, TCG_TMP0);
}
}
@@ -1395,19 +1304,6 @@ static int tgen_cmp(TCGContext *s, TCGType type, TCGCond
c, TCGReg r1,
if (maybe_out_small_movi(s, type, TCG_TMP0, c2)) {
c2 = TCG_TMP0;
/* fall through to reg-reg */
- } else if (USE_REG_TB) {
- if (type == TCG_TYPE_I32) {
- op = (is_unsigned ? RXY_CLY : RXY_CY);
- tcg_out_insn_RXY(s, op, r1, TCG_REG_TB, TCG_REG_NONE, 0);
- new_pool_label(s, (uint32_t)c2, R_390_20, s->code_ptr - 2,
- 4 - tcg_tbrel_diff(s, NULL));
- } else {
- op = (is_unsigned ? RXY_CLG : RXY_CG);
- tcg_out_insn_RXY(s, op, r1, TCG_REG_TB, TCG_REG_NONE, 0);
- new_pool_label(s, c2, R_390_20, s->code_ptr - 2,
- tcg_tbrel_diff(s, NULL));
- }
- goto exit;
} else {
if (type == TCG_TYPE_I32) {
op = (is_unsigned ? RIL_CLRL : RIL_CRL);
@@ -2101,43 +1997,22 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode
opc,
case INDEX_op_goto_tb:
a0 = args[0];
- if (s->tb_jmp_insn_offset) {
- /*
- * branch displacement must be aligned for atomic patching;
- * see if we need to add extra nop before branch
- */
- if (!QEMU_PTR_IS_ALIGNED(s->code_ptr + 1, 4)) {
- tcg_out16(s, NOP);
- }
- tcg_debug_assert(!USE_REG_TB);
- tcg_out16(s, RIL_BRCL | (S390_CC_ALWAYS << 4));
- s->tb_jmp_insn_offset[a0] = tcg_current_code_size(s);
- s->code_ptr += 2;
- } else {
- /* load address stored at s->tb_jmp_target_addr + a0 */
- tcg_out_ld_abs(s, TCG_TYPE_PTR, TCG_REG_TB,
- tcg_splitwx_to_rx(s->tb_jmp_target_addr + a0));
- /* and go there */
- tcg_out_insn(s, RR, BCR, S390_CC_ALWAYS, TCG_REG_TB);
+ tcg_debug_assert(s->tb_jmp_insn_offset);
+ /*
+ * branch displacement must be aligned for atomic patching;
+ * see if we need to add extra nop before branch
+ */
+ if (!QEMU_PTR_IS_ALIGNED(s->code_ptr + 1, 4)) {
+ tcg_out16(s, NOP);
}
+ tcg_out16(s, RIL_BRCL | (S390_CC_ALWAYS << 4));
+ s->tb_jmp_insn_offset[a0] = tcg_current_code_size(s);
+ tcg_out32(s, 0);
set_jmp_reset_offset(s, a0);
-
- /* For the unlinked path of goto_tb, we need to reset
- TCG_REG_TB to the beginning of this TB. */
- if (USE_REG_TB) {
- int ofs = -tcg_current_code_size(s);
- /* All TB are restricted to 64KiB by unwind info. */
- tcg_debug_assert(ofs == sextract64(ofs, 0, 20));
- tcg_out_insn(s, RXY, LAY, TCG_REG_TB,
- TCG_REG_TB, TCG_REG_NONE, ofs);
- }
break;
case INDEX_op_goto_ptr:
a0 = args[0];
- if (USE_REG_TB) {
- tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_TB, a0);
- }
tcg_out_insn(s, RR, BCR, S390_CC_ALWAYS, a0);
break;
@@ -3405,9 +3280,6 @@ static void tcg_target_init(TCGContext *s)
/* XXX many insns can't be used with R0, so we better avoid it for now */
tcg_regset_set_reg(s->reserved_regs, TCG_REG_R0);
tcg_regset_set_reg(s->reserved_regs, TCG_REG_CALL_STACK);
- if (USE_REG_TB) {
- tcg_regset_set_reg(s->reserved_regs, TCG_REG_TB);
- }
}
#define FRAME_SIZE ((int)(TCG_TARGET_CALL_STACK_OFFSET \
@@ -3428,16 +3300,12 @@ static void tcg_target_qemu_prologue(TCGContext *s)
#ifndef CONFIG_SOFTMMU
if (guest_base >= 0x80000) {
- tcg_out_movi_int(s, TCG_TYPE_PTR, TCG_GUEST_BASE_REG, guest_base,
true);
+ tcg_out_movi(s, TCG_TYPE_PTR, TCG_GUEST_BASE_REG, guest_base);
tcg_regset_set_reg(s->reserved_regs, TCG_GUEST_BASE_REG);
}
#endif
tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]);
- if (USE_REG_TB) {
- tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_TB,
- tcg_target_call_iarg_regs[1]);
- }
/* br %r3 (go to TB) */
tcg_out_insn(s, RR, BCR, S390_CC_ALWAYS, tcg_target_call_iarg_regs[1]);
--
2.34.1
- [PATCH v3 00/13] tcg/s390x: misc patches, Richard Henderson, 2022/12/02
- [PATCH v3 01/13] tcg/s390x: Use register pair allocation for div and mulu2, Richard Henderson, 2022/12/02
- [PATCH v3 04/13] tcg/s390x: Distinguish RRF-a and RRF-c formats, Richard Henderson, 2022/12/02
- [PATCH v3 02/13] tcg/s390x: Remove TCG_REG_TB,
Richard Henderson <=
- Re: [PATCH v3 02/13] tcg/s390x: Remove TCG_REG_TB, Ilya Leoshkevich, 2022/12/06
- Re: [PATCH v3 02/13] tcg/s390x: Remove TCG_REG_TB, Thomas Huth, 2022/12/07
- Re: [PATCH v3 02/13] tcg/s390x: Remove TCG_REG_TB, Richard Henderson, 2022/12/07
- Re: [PATCH v3 02/13] tcg/s390x: Remove TCG_REG_TB, Ilya Leoshkevich, 2022/12/07
- Re: [PATCH v3 02/13] tcg/s390x: Remove TCG_REG_TB, Christian Borntraeger, 2022/12/07
- Re: [PATCH v3 02/13] tcg/s390x: Remove TCG_REG_TB, Ilya Leoshkevich, 2022/12/07
[PATCH v3 06/13] tcg/s390x: Support MIE2 multiply single instructions, Richard Henderson, 2022/12/02