[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
Re: [Qemu-devel] [PULL 05/11] tcg-s390: Move ldst helpers out of line
From: |
Alexander Graf |
Subject: |
Re: [Qemu-devel] [PULL 05/11] tcg-s390: Move ldst helpers out of line |
Date: |
Thu, 29 Jan 2015 17:01:19 +0100 |
> On 15.05.2014, at 18:39, Richard Henderson <address@hidden> wrote:
>
> That is, the old LDST_OPTIMIZATION.
>
> Signed-off-by: Richard Henderson <address@hidden>
> ---
> tcg/s390/tcg-target.c | 210 ++++++++++++++++++++++++++++----------------------
> 1 file changed, 118 insertions(+), 92 deletions(-)
>
> diff --git a/tcg/s390/tcg-target.c b/tcg/s390/tcg-target.c
> index 4fbadaf..1c955db 100644
> --- a/tcg/s390/tcg-target.c
> +++ b/tcg/s390/tcg-target.c
> @@ -24,7 +24,7 @@
> * THE SOFTWARE.
> */
>
> -#include "tcg-be-null.h"
> +#include "tcg-be-ldst.h"
>
> /* We only support generating code for 64-bit mode. */
> #if TCG_TARGET_REG_BITS != 64
> @@ -1386,107 +1386,123 @@ static void tcg_out_qemu_st_direct(TCGContext *s,
> TCGMemOp opc, TCGReg data,
> }
>
> #if defined(CONFIG_SOFTMMU)
> -static TCGReg tcg_prepare_qemu_ldst(TCGContext* s, TCGReg data_reg,
> - TCGReg addr_reg, int mem_index, int opc,
> - tcg_insn_unit **label2_ptr_p, int
> is_store)
> +/* We're expecting to use a 20-bit signed offset on the tlb memory ops.
> + Using the offset of the second entry in the last tlb table ensures
> + that we can index all of the elements of the first entry. */
> +QEMU_BUILD_BUG_ON(offsetof(CPUArchState, tlb_table[NB_MMU_MODES - 1][1])
> + > 0x7ffff);
> +
> +/* Load and compare a TLB entry, leaving the flags set. Loads the TLB
> + addend into R2. Returns a register with the santitized guest address. */
> +static TCGReg tcg_out_tlb_read(TCGContext* s, TCGReg addr_reg, TCGMemOp opc,
> + int mem_index, bool is_ld)
> {
> - const TCGReg arg0 = tcg_target_call_iarg_regs[0];
> - const TCGReg arg1 = tcg_target_call_iarg_regs[1];
> - const TCGReg arg2 = tcg_target_call_iarg_regs[2];
> - const TCGReg arg3 = tcg_target_call_iarg_regs[3];
> - const TCGReg arg4 = tcg_target_call_iarg_regs[4];
> TCGMemOp s_bits = opc & MO_SIZE;
> - tcg_insn_unit *label1_ptr;
> - tcg_target_long ofs;
> + int ofs;
> +
> + tcg_out_sh64(s, RSY_SRLG, TCG_REG_R2, addr_reg, TCG_REG_NONE,
> + TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
>
> if (TARGET_LONG_BITS == 32) {
> - tgen_ext32u(s, arg1, addr_reg);
> + tgen_ext32u(s, TCG_REG_R3, addr_reg);
> } else {
> - tcg_out_mov(s, TCG_TYPE_I64, arg1, addr_reg);
> + tcg_out_mov(s, TCG_TYPE_I64, TCG_REG_R3, addr_reg);
> }
>
> - tcg_out_sh64(s, RSY_SRLG, arg2, addr_reg, TCG_REG_NONE,
> - TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
> -
> - tgen_andi(s, TCG_TYPE_I64, arg1, TARGET_PAGE_MASK | ((1 << s_bits) - 1));
> - tgen_andi(s, TCG_TYPE_I64, arg2, (CPU_TLB_SIZE - 1) <<
> CPU_TLB_ENTRY_BITS);
> + tgen_andi(s, TCG_TYPE_I64, TCG_REG_R2,
> + (CPU_TLB_SIZE - 1) << CPU_TLB_ENTRY_BITS);
> + tgen_andi(s, TCG_TYPE_I64, TCG_REG_R3,
> + TARGET_PAGE_MASK | ((1 << s_bits) - 1));
>
> - if (is_store) {
> - ofs = offsetof(CPUArchState, tlb_table[mem_index][0].addr_write);
> - } else {
> + if (is_ld) {
> ofs = offsetof(CPUArchState, tlb_table[mem_index][0].addr_read);
> + } else {
> + ofs = offsetof(CPUArchState, tlb_table[mem_index][0].addr_write);
> }
> - assert(ofs < 0x80000);
> -
> if (TARGET_LONG_BITS == 32) {
> - tcg_out_mem(s, RX_C, RXY_CY, arg1, arg2, TCG_AREG0, ofs);
> + tcg_out_mem(s, RX_C, RXY_CY, TCG_REG_R3, TCG_REG_R2, TCG_AREG0, ofs);
> } else {
> - tcg_out_mem(s, 0, RXY_CG, arg1, arg2, TCG_AREG0, ofs);
> + tcg_out_mem(s, 0, RXY_CG, TCG_REG_R3, TCG_REG_R2, TCG_AREG0, ofs);
> }
>
> + ofs = offsetof(CPUArchState, tlb_table[mem_index][0].addend);
> + tcg_out_mem(s, 0, RXY_LG, TCG_REG_R2, TCG_REG_R2, TCG_AREG0, ofs);
> +
> if (TARGET_LONG_BITS == 32) {
> - tgen_ext32u(s, arg1, addr_reg);
> - } else {
> - tcg_out_mov(s, TCG_TYPE_I64, arg1, addr_reg);
> - }
> -
> - label1_ptr = s->code_ptr;
> -
> - /* je label1 (offset will be patched in later) */
> - tcg_out_insn(s, RI, BRC, S390_CC_EQ, 0);
> -
> - /* call load/store helper */
> - if (is_store) {
> - /* Make sure to zero-extend the value to the full register
> - for the calling convention. */
> - switch (s_bits) {
> - case MO_UB:
> - tgen_ext8u(s, TCG_TYPE_I64, arg2, data_reg);
> - break;
> - case MO_UW:
> - tgen_ext16u(s, TCG_TYPE_I64, arg2, data_reg);
> - break;
> - case MO_UL:
> - tgen_ext32u(s, arg2, data_reg);
> - break;
> - case MO_Q:
> - tcg_out_mov(s, TCG_TYPE_I64, arg2, data_reg);
> - break;
> - default:
> - tcg_abort();
> - }
> - tcg_out_movi(s, TCG_TYPE_I32, arg3, mem_index);
> - tcg_out_mov(s, TCG_TYPE_PTR, arg0, TCG_AREG0);
> - tcg_out_movi(s, TCG_TYPE_PTR, arg4, (uintptr_t)s->code_ptr);
> - tcg_out_call(s, qemu_st_helpers[opc]);
> - } else {
> - tcg_out_movi(s, TCG_TYPE_I32, arg2, mem_index);
> - tcg_out_mov(s, TCG_TYPE_PTR, arg0, TCG_AREG0);
> - tcg_out_movi(s, TCG_TYPE_PTR, arg3, (uintptr_t)s->code_ptr);
> - tcg_out_call(s, qemu_ld_helpers[opc]);
> - tcg_out_mov(s, TCG_TYPE_I64, data_reg, TCG_REG_R2);
> + tgen_ext32u(s, TCG_REG_R3, addr_reg);
> + return TCG_REG_R3;
> }
> + return addr_reg;
> +}
>
> - /* jump to label2 (end) */
> - *label2_ptr_p = s->code_ptr;
> -
> - tcg_out_insn(s, RI, BRC, S390_CC_ALWAYS, 0);
> +static void add_qemu_ldst_label(TCGContext *s, bool is_ld, TCGMemOp opc,
> + TCGReg data, TCGReg addr, int mem_index,
> + tcg_insn_unit *raddr, tcg_insn_unit
> *label_ptr)
> +{
> + TCGLabelQemuLdst *label = new_ldst_label(s);
> +
> + label->is_ld = is_ld;
> + label->opc = opc;
> + label->datalo_reg = data;
> + label->addrlo_reg = addr;
> + label->mem_index = mem_index;
> + label->raddr = raddr;
> + label->label_ptr[0] = label_ptr;
> +}
>
> - /* this is label1, patch branch */
> - label1_ptr[1] = s->code_ptr - label1_ptr;
> +static void tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
> +{
> + TCGReg addr_reg = lb->addrlo_reg;
> + TCGReg data_reg = lb->datalo_reg;
> + TCGMemOp opc = lb->opc;
>
> - ofs = offsetof(CPUArchState, tlb_table[mem_index][0].addend);
> - assert(ofs < 0x80000);
> + patch_reloc(lb->label_ptr[0], R_390_PC16DBL, (intptr_t)s->code_ptr, -2);
>
> - tcg_out_mem(s, 0, RXY_AG, arg1, arg2, TCG_AREG0, ofs);
> + tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_R2, TCG_AREG0);
> + if (TARGET_LONG_BITS == 64) {
> + tcg_out_mov(s, TCG_TYPE_I64, TCG_REG_R3, addr_reg);
> + }
> + tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_R4, lb->mem_index);
> + tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R5, (uintptr_t)lb->raddr);
> + tcg_out_call(s, qemu_ld_helpers[opc]);
> + tcg_out_mov(s, TCG_TYPE_I64, data_reg, TCG_REG_R2);
>
> - return arg1;
> + tgen_gotoi(s, S390_CC_ALWAYS, lb->raddr);
> }
>
> -static void tcg_finish_qemu_ldst(TCGContext* s, tcg_insn_unit *label2_ptr)
> +static void tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
> {
> - /* patch branch */
> - label2_ptr[1] = s->code_ptr - label2_ptr;
> + TCGReg addr_reg = lb->addrlo_reg;
> + TCGReg data_reg = lb->datalo_reg;
> + TCGMemOp opc = lb->opc;
> +
> + patch_reloc(lb->label_ptr[0], R_390_PC16DBL, (intptr_t)s->code_ptr, -2);
Under certain situations this seems to create a jump to itself:
(gdb)
0x000000009082a3f0 in code_gen_buffer ()
1: x/i $pc
=> 0x9082a3f0 <code_gen_buffer+8561648>:
jlh 0x9082a3f0 <code_gen_buffer+8561648>
Any idea why exactly this would happen? Or do we endless loop on purpose? ;)
Reverting this patch makes the guest work fine btw.
Alex
- Re: [Qemu-devel] [PULL 05/11] tcg-s390: Move ldst helpers out of line,
Alexander Graf <=