Drop the ld/st_trampolines, loading the return address into a
parameter register directly.
Signed-off-by: Richard Henderson <address@hidden>
---
include/exec/exec-all.h | 4 +-
tcg/ppc/tcg-target.c | 220 +++++++++++++++++++-----------------------------
2 files changed, 86 insertions(+), 138 deletions(-)
diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h
index beb4149..a81e805 100644
--- a/include/exec/exec-all.h
+++ b/include/exec/exec-all.h
@@ -324,9 +324,7 @@ extern uintptr_t tci_tb_ptr;
In some implementations, we pass the "logical" return address manually;
in others, we must infer the logical return from the true return. */
#if defined(CONFIG_QEMU_LDST_OPTIMIZATION) && defined(CONFIG_SOFTMMU)
-# if defined (_ARCH_PPC) && !defined (_ARCH_PPC64)
-# define GETRA_LDST(RA) (*(int32_t *)((RA) - 4))
-# elif defined(__arm__)
+# if defined(__arm__)
/* We define two insns between the return address and the branch back to
straight-line. Find and decode that branch insn. */
# define GETRA_LDST(RA) tcg_getra_ldst(RA)
diff --git a/tcg/ppc/tcg-target.c b/tcg/ppc/tcg-target.c
index b0fbc54..a890319 100644
--- a/tcg/ppc/tcg-target.c
+++ b/tcg/ppc/tcg-target.c
@@ -551,27 +551,26 @@ static void add_qemu_ldst_label (TCGContext *s,
label->label_ptr[0] = label_ptr;
}
-/* helper signature: helper_ld_mmu(CPUState *env, target_ulong addr,
- int mmu_idx) */
+/* helper signature: helper_ret_ld_mmu(CPUState *env, target_ulong addr,
+ * int mmu_idx, uintptr_t ra)
+ */
static const void * const qemu_ld_helpers[4] = {
- helper_ldb_mmu,
- helper_ldw_mmu,
- helper_ldl_mmu,
- helper_ldq_mmu,
+ helper_ret_ldub_mmu,
+ helper_ret_lduw_mmu,
+ helper_ret_ldul_mmu,
+ helper_ret_ldq_mmu,
};
-/* helper signature: helper_st_mmu(CPUState *env, target_ulong addr,
- uintxx_t val, int mmu_idx) */
+/* helper signature: helper_ret_st_mmu(CPUState *env, target_ulong addr,
+ * uintxx_t val, int mmu_idx, uintptr_t ra)
+ */
static const void * const qemu_st_helpers[4] = {
- helper_stb_mmu,
- helper_stw_mmu,
- helper_stl_mmu,
- helper_stq_mmu,
+ helper_ret_stb_mmu,
+ helper_ret_stw_mmu,
+ helper_ret_stl_mmu,
+ helper_ret_stq_mmu,
};
-static void *ld_trampolines[4];
-static void *st_trampolines[4];
-
static void tcg_out_tlb_check (TCGContext *s, int r0, int r1, int r2,
int addr_reg, int addr_reg2, int s_bits,
int offset1, int offset2, uint8_t **label_ptr)
@@ -608,9 +607,14 @@ static void tcg_out_tlb_check (TCGContext *s, int r0, int
r1, int r2,
tcg_out32 (s, CMP | BF (6) | RA (addr_reg2) | RB (r1));
tcg_out32 (s, CRAND | BT (7, CR_EQ) | BA (6, CR_EQ) | BB (7, CR_EQ));
#endif
+
+ /* Use a conditional branch-and-link so that we load a pointer to
+ somewhere within the current opcode, for passing on to the helper.
+ This address cannot be used for a tail call, but it's shorter
+ than forming an address from scratch. */
*label_ptr = s->code_ptr;
retranst = ((uint16_t *) s->code_ptr)[1] & ~3;
- tcg_out32 (s, BC | BI (7, CR_EQ) | retranst | BO_COND_FALSE);
+ tcg_out32(s, BC | BI(7, CR_EQ) | retranst | BO_COND_FALSE | LK);
/* r0 now contains &env->tlb_table[mem_index][index].addr_x */
tcg_out32 (s, (LWZ
@@ -833,132 +837,99 @@ static void tcg_out_qemu_st (TCGContext *s, const TCGArg
*args, int opc)
}
#if defined(CONFIG_SOFTMMU)
-static void tcg_out_qemu_ld_slow_path (TCGContext *s, TCGLabelQemuLdst *label)
+static void tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
{
- int s_bits;
- int ir;
- int opc = label->opc;
- int mem_index = label->mem_index;
- int data_reg = label->datalo_reg;
- int data_reg2 = label->datahi_reg;
- int addr_reg = label->addrlo_reg;
- uint8_t *raddr = label->raddr;
- uint8_t **label_ptr = &label->label_ptr[0];
+ TCGReg ir, datalo, datahi;
+ int opc = lb->opc;
- s_bits = opc & 3;
+ reloc_pc14(lb->label_ptr[0], (uintptr_t)s->code_ptr);
- /* resolve label address */
- reloc_pc14 (label_ptr[0], (tcg_target_long) s->code_ptr);
+ ir = TCG_REG_R3;
+ tcg_out_mov(s, TCG_TYPE_PTR, ir++, TCG_AREG0);
- /* slow path */
- ir = 4;
-#if TARGET_LONG_BITS == 32
- tcg_out_mov (s, TCG_TYPE_I32, ir++, addr_reg);
-#else
+ if (TARGET_LONG_BITS == 32) {
+ tcg_out_mov(s, TCG_TYPE_I32, ir++, lb->addrlo_reg);
+ } else {
#ifdef TCG_TARGET_CALL_ALIGN_ARGS
- ir |= 1;
-#endif
- tcg_out_mov (s, TCG_TYPE_I32, ir++, label->addrhi_reg);
- tcg_out_mov (s, TCG_TYPE_I32, ir++, addr_reg);
+ ir |= 1;
#endif
- tcg_out_movi (s, TCG_TYPE_I32, ir, mem_index);
- tcg_out_call (s, (tcg_target_long) ld_trampolines[s_bits], 1);
- tcg_out32 (s, (tcg_target_long) raddr);
+ tcg_out_mov(s, TCG_TYPE_I32, ir++, lb->addrhi_reg);
+ tcg_out_mov(s, TCG_TYPE_I32, ir++, lb->addrlo_reg);
+ }
+
+ tcg_out_movi(s, TCG_TYPE_I32, ir++, lb->mem_index);
+ tcg_out32(s, MFSPR | RT(ir++) | LR);
+
+ tcg_out_call(s, (uintptr_t)qemu_ld_helpers[opc & 3], 1);
+
+ datalo = lb->datalo_reg;
switch (opc) {
case 0|4:
- tcg_out32 (s, EXTSB | RA (data_reg) | RS (3));
+ tcg_out32(s, EXTSB | RA(datalo) | RS(TCG_REG_R3));
break;
case 1|4:
- tcg_out32 (s, EXTSH | RA (data_reg) | RS (3));
+ tcg_out32(s, EXTSH | RA(datalo) | RS(TCG_REG_R3));
break;
- case 0:
- case 1:
- case 2:
- if (data_reg != 3)
- tcg_out_mov (s, TCG_TYPE_I32, data_reg, 3);
+
+ default:
+ tcg_out_mov(s, TCG_TYPE_I32, datalo, TCG_REG_R3);
break;
+
case 3:
- if (data_reg == 3) {
- if (data_reg2 == 4) {
- tcg_out_mov (s, TCG_TYPE_I32, 0, 4);
- tcg_out_mov (s, TCG_TYPE_I32, 4, 3);
- tcg_out_mov (s, TCG_TYPE_I32, 3, 0);
- }
- else {
- tcg_out_mov (s, TCG_TYPE_I32, data_reg2, 3);
- tcg_out_mov (s, TCG_TYPE_I32, 3, 4);
- }
- }
- else {
- if (data_reg != 4) tcg_out_mov (s, TCG_TYPE_I32, data_reg, 4);
- if (data_reg2 != 3) tcg_out_mov (s, TCG_TYPE_I32, data_reg2, 3);
+ datahi = lb->datahi_reg;
+ if (datalo != TCG_REG_R3) {
+ tcg_out_mov(s, TCG_TYPE_I32, datalo, TCG_REG_R4);
+ tcg_out_mov(s, TCG_TYPE_I32, datahi, TCG_REG_R3);
+ } else if (datahi != TCG_REG_R4) {
+ tcg_out_mov(s, TCG_TYPE_I32, datahi, TCG_REG_R3);
+ tcg_out_mov(s, TCG_TYPE_I32, datalo, TCG_REG_R4);
+ } else {
+ tcg_out_mov(s, TCG_TYPE_I32, TCG_REG_R0, TCG_REG_R4);
+ tcg_out_mov(s, TCG_TYPE_I32, datahi, TCG_REG_R3);
+ tcg_out_mov(s, TCG_TYPE_I32, datalo, TCG_REG_R0);
}
break;
}
+
/* Jump to the code corresponding to next IR of qemu_st */
- tcg_out_b (s, 0, (tcg_target_long) raddr);
+ tcg_out_b(s, 0, (uintptr_t)lb->raddr);
}
-static void tcg_out_qemu_st_slow_path (TCGContext *s, TCGLabelQemuLdst *label)
+static void tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
{
- int ir;
- int opc = label->opc;
- int mem_index = label->mem_index;
- int data_reg = label->datalo_reg;
- int data_reg2 = label->datahi_reg;
- int addr_reg = label->addrlo_reg;
- uint8_t *raddr = label->raddr;
- uint8_t **label_ptr = &label->label_ptr[0];
-
- /* resolve label address */
- reloc_pc14 (label_ptr[0], (tcg_target_long) s->code_ptr);
-
- /* slow path */
- ir = 4;
-#if TARGET_LONG_BITS == 32
- tcg_out_mov (s, TCG_TYPE_I32, ir++, addr_reg);
-#else
+ TCGReg ir;
+
+ reloc_pc14(lb->label_ptr[0], (uintptr_t)s->code_ptr);
+
+ ir = TCG_REG_R3;
+ tcg_out_mov(s, TCG_TYPE_PTR, ir++, TCG_AREG0);
+
+ if (TARGET_LONG_BITS == 32) {
+ tcg_out_mov(s, TCG_TYPE_I32, ir++, lb->addrlo_reg);
+ } else {
#ifdef TCG_TARGET_CALL_ALIGN_ARGS
- ir |= 1;
-#endif
- tcg_out_mov (s, TCG_TYPE_I32, ir++, label->addrhi_reg);
- tcg_out_mov (s, TCG_TYPE_I32, ir++, addr_reg);
+ ir |= 1;
#endif
+ tcg_out_mov(s, TCG_TYPE_I32, ir++, lb->addrhi_reg);
+ tcg_out_mov(s, TCG_TYPE_I32, ir++, lb->addrlo_reg);
+ }
- switch (opc) {
- case 0:
- tcg_out32 (s, (RLWINM
- | RA (ir)
- | RS (data_reg)
- | SH (0)
- | MB (24)
- | ME (31)));
- break;
- case 1:
- tcg_out32 (s, (RLWINM
- | RA (ir)
- | RS (data_reg)
- | SH (0)
- | MB (16)
- | ME (31)));
- break;
- case 2:
- tcg_out_mov (s, TCG_TYPE_I32, ir, data_reg);
- break;
- case 3:
+ if (lb->opc != 3) {
+ tcg_out_mov(s, TCG_TYPE_I32, ir++, lb->datalo_reg);
+ } else {
#ifdef TCG_TARGET_CALL_ALIGN_ARGS
ir |= 1;
#endif
- tcg_out_mov (s, TCG_TYPE_I32, ir++, data_reg2);
- tcg_out_mov (s, TCG_TYPE_I32, ir, data_reg);
- break;
+ tcg_out_mov(s, TCG_TYPE_I32, ir++, lb->datahi_reg);
+ tcg_out_mov(s, TCG_TYPE_I32, ir++, lb->datalo_reg);
}
- ir++;
- tcg_out_movi (s, TCG_TYPE_I32, ir, mem_index);
- tcg_out_call (s, (tcg_target_long) st_trampolines[opc], 1);
- tcg_out32 (s, (tcg_target_long) raddr);
- tcg_out_b (s, 0, (tcg_target_long) raddr);
+ tcg_out_movi(s, TCG_TYPE_I32, ir++, lb->mem_index);
+ tcg_out32(s, MFSPR | RT(ir++) | LR);
+
+ tcg_out_call(s, (uintptr_t)qemu_st_helpers[lb->opc], 1);
+
+ tcg_out_b(s, 0, (uintptr_t)lb->raddr);
}
void tcg_out_tb_finalize(TCGContext *s)
@@ -979,17 +950,6 @@ void tcg_out_tb_finalize(TCGContext *s)
}
#endif
-#ifdef CONFIG_SOFTMMU
-static void emit_ldst_trampoline (TCGContext *s, const void *ptr)
-{
- tcg_out32 (s, MFSPR | RT (3) | LR);
- tcg_out32 (s, ADDI | RT (3) | RA (3) | 4);
- tcg_out32 (s, MTSPR | RS (3) | LR);
- tcg_out_mov (s, TCG_TYPE_I32, 3, TCG_AREG0);
- tcg_out_b (s, 0, (tcg_target_long) ptr);
-}
-#endif
-
static void tcg_target_qemu_prologue (TCGContext *s)
{
int i, frame_size;
@@ -1050,16 +1010,6 @@ static void tcg_target_qemu_prologue (TCGContext *s)
tcg_out32 (s, MTSPR | RS (0) | LR);
tcg_out32 (s, ADDI | RT (1) | RA (1) | frame_size);
tcg_out32 (s, BCLR | BO_ALWAYS);
-
-#ifdef CONFIG_SOFTMMU
- for (i = 0; i < 4; ++i) {
- ld_trampolines[i] = s->code_ptr;
- emit_ldst_trampoline (s, qemu_ld_helpers[i]);
-
- st_trampolines[i] = s->code_ptr;
- emit_ldst_trampoline (s, qemu_st_helpers[i]);
- }
-#endif
}
static void tcg_out_ld (TCGContext *s, TCGType type, TCGReg ret, TCGReg arg1,