[Top][All Lists]
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
Re: [Qemu-devel] [PATCH 05/14] tcg-sparc: Simplify qemu_ld/st direct mem
From: |
Blue Swirl |
Subject: |
Re: [Qemu-devel] [PATCH 05/14] tcg-sparc: Simplify qemu_ld/st direct memory paths. |
Date: |
Thu, 29 Mar 2012 18:47:56 +0000 |
On Wed, Mar 28, 2012 at 00:32, Richard Henderson <address@hidden> wrote:
> Given that we have an opcode for all sizes, all endianness,
> turn the functions into a simple table lookup.
>
> Signed-off-by: Richard Henderson <address@hidden>
> ---
> tcg/sparc/tcg-target.c | 384
> +++++++++++++++++++-----------------------------
> 1 files changed, 150 insertions(+), 234 deletions(-)
>
> diff --git a/tcg/sparc/tcg-target.c b/tcg/sparc/tcg-target.c
> index c74fc2c..5cea5a8 100644
> --- a/tcg/sparc/tcg-target.c
> +++ b/tcg/sparc/tcg-target.c
> @@ -294,6 +294,16 @@ static inline int tcg_target_const_match(tcg_target_long
> val,
> #define ASI_PRIMARY_LITTLE 0x88
> #endif
>
> +#define LDUH_LE (LDUHA | INSN_ASI(ASI_PRIMARY_LITTLE))
> +#define LDSH_LE (LDSHA | INSN_ASI(ASI_PRIMARY_LITTLE))
> +#define LDUW_LE (LDUWA | INSN_ASI(ASI_PRIMARY_LITTLE))
> +#define LDSW_LE (LDSWA | INSN_ASI(ASI_PRIMARY_LITTLE))
> +#define LDX_LE (LDXA | INSN_ASI(ASI_PRIMARY_LITTLE))
> +
> +#define STH_LE (STHA | INSN_ASI(ASI_PRIMARY_LITTLE))
> +#define STW_LE (STWA | INSN_ASI(ASI_PRIMARY_LITTLE))
> +#define STX_LE (STXA | INSN_ASI(ASI_PRIMARY_LITTLE))
> +
> static inline void tcg_out_arith(TCGContext *s, int rd, int rs1, int rs2,
> int op)
> {
> @@ -366,66 +376,46 @@ static inline void tcg_out_movi(TCGContext *s, TCGType
> type,
> }
> }
>
> -static inline void tcg_out_ld_raw(TCGContext *s, int ret,
> - tcg_target_long arg)
> +static inline void tcg_out_ldst_rr(TCGContext *s, int data, int a1,
> + int a2, int op)
> {
> - tcg_out_sethi(s, ret, arg);
> - tcg_out32(s, LDUW | INSN_RD(ret) | INSN_RS1(ret) |
> - INSN_IMM13(arg & 0x3ff));
> + tcg_out32(s, op | INSN_RD(data) | INSN_RS1(a1) | INSN_RS2(a2));
> }
>
> -static inline void tcg_out_ld_ptr(TCGContext *s, int ret,
> - tcg_target_long arg)
> +static inline void tcg_out_ldst(TCGContext *s, int ret, int addr,
> + int offset, int op)
> {
> - if (!check_fit_tl(arg, 10))
> - tcg_out_movi(s, TCG_TYPE_PTR, ret, arg & ~0x3ffULL);
> - if (TCG_TARGET_REG_BITS == 64) {
> - tcg_out32(s, LDX | INSN_RD(ret) | INSN_RS1(ret) |
> - INSN_IMM13(arg & 0x3ff));
> - } else {
> - tcg_out32(s, LDUW | INSN_RD(ret) | INSN_RS1(ret) |
> - INSN_IMM13(arg & 0x3ff));
> - }
> -}
> -
> -static inline void tcg_out_ldst(TCGContext *s, int ret, int addr, int
> offset, int op)
> -{
> - if (check_fit_tl(offset, 13))
> + if (check_fit_tl(offset, 13)) {
> tcg_out32(s, op | INSN_RD(ret) | INSN_RS1(addr) |
> INSN_IMM13(offset));
> - else {
> + } else {
> tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_I5, offset);
> - tcg_out32(s, op | INSN_RD(ret) | INSN_RS1(TCG_REG_I5) |
> - INSN_RS2(addr));
> + tcg_out_ldst_rr(s, ret, addr, TCG_REG_I5, op);
> }
> }
>
> -static inline void tcg_out_ldst_asi(TCGContext *s, int ret, int addr,
> - int offset, int op, int asi)
> -{
> - tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_I5, offset);
> - tcg_out32(s, op | INSN_RD(ret) | INSN_RS1(TCG_REG_I5) |
> - INSN_ASI(asi) | INSN_RS2(addr));
> -}
> -
> static inline void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret,
> TCGReg arg1, tcg_target_long arg2)
> {
> - if (type == TCG_TYPE_I32)
> - tcg_out_ldst(s, ret, arg1, arg2, LDUW);
> - else
> - tcg_out_ldst(s, ret, arg1, arg2, LDX);
> + tcg_out_ldst(s, ret, arg1, arg2, (type == TCG_TYPE_I32 ? LDUW : LDX));
> }
>
> static inline void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg,
> TCGReg arg1, tcg_target_long arg2)
> {
> - if (type == TCG_TYPE_I32)
> - tcg_out_ldst(s, arg, arg1, arg2, STW);
> - else
> - tcg_out_ldst(s, arg, arg1, arg2, STX);
> + tcg_out_ldst(s, arg, arg1, arg2, (type == TCG_TYPE_I32 ? STW : STX));
> +}
> +
> +static inline void tcg_out_ld_ptr(TCGContext *s, int ret,
> + tcg_target_long arg)
> +{
> + if (!check_fit_tl(arg, 10)) {
> + tcg_out_movi(s, TCG_TYPE_PTR, ret, arg & ~0x3ff);
> + }
> + tcg_out_ld(s, TCG_TYPE_PTR, ret, ret, arg & 0x3ff);
> }
>
> +
> static inline void tcg_out_sety(TCGContext *s, int rs)
> {
> tcg_out32(s, WRY | INSN_RS1(TCG_REG_G0) | INSN_RS2(rs));
> @@ -757,22 +747,16 @@ static const void * const qemu_st_helpers[4] = {
> WHICH is the offset into the CPUTLBEntry structure of the slot to read.
> This should be offsetof addr_read or addr_write.
>
> - Outputs:
> - LABEL_PTRS is filled with the position of the forward jumps to the
> - TLB miss case. This will always be a ,PN insn, so a 19-bit offset.
> -
> - Returns a register loaded with the low part of the address, adjusted
> - as indicated by the TLB and so is a host address. Undefined in the
> - TLB miss case. */
> + The result of the TLB comparison is in %[ix]cc. The sanitized address
> + is in the returned register, maybe %o0. The TLB addend is in %o1. */
>
> static int tcg_out_tlb_load(TCGContext *s, int addrlo_idx, int mem_index,
> - int s_bits, const TCGArg *args,
> - uint32_t **label_ptr, int which)
> + int s_bits, const TCGArg *args, int which)
> {
> const int addrlo = args[addrlo_idx];
> - const int r0 = tcg_target_call_iarg_regs[0];
> - const int r1 = tcg_target_call_iarg_regs[1];
> - const int r2 = tcg_target_call_iarg_regs[2];
> + const int r0 = TCG_REG_O0;
> + const int r1 = TCG_REG_O1;
> + const int r2 = TCG_REG_O2;
> int addr = addrlo;
> int tlb_ofs;
>
> @@ -803,110 +787,39 @@ static int tcg_out_tlb_load(TCGContext *s, int
> addrlo_idx, int mem_index,
> tlb_ofs = 0;
> }
>
> - /* ld [arg1 + which], arg2 */
> + /* Load the tlb comparator and the addend. */
> tcg_out_ld(s, TCG_TYPE_TL, r2, r1, tlb_ofs + which);
> + tcg_out_ld(s, TCG_TYPE_PTR, r1, r1, tlb_ofs+offsetof(CPUTLBEntry,
> addend));
>
> /* subcc arg0, arg2, %g0 */
> tcg_out_cmp(s, r0, r2, 0);
>
> - /* bne,pn %[ix]cc, label0 */
> - *label_ptr = (uint32_t *)s->code_ptr;
> - tcg_out32(s, (INSN_OP(0) | INSN_COND(COND_NE, 0) | INSN_OP2(0x1) |
> - ((TARGET_LONG_BITS == 64) << 21)));
> -
> - /* TLB Hit. Compute the host address into r1. The ld is in the
> - branch delay slot; harmless for the TLB miss case. */
> - tcg_out_ld(s, TCG_TYPE_PTR, r1, r1, tlb_ofs+offsetof(CPUTLBEntry,
> addend));
> -
> + /* If the guest address must be zero-extended, do so now. */
> if (TCG_TARGET_REG_BITS == 64 && TARGET_LONG_BITS == 32) {
> tcg_out_arithi(s, r0, addrlo, 0, SHIFT_SRL);
> - tcg_out_arith(s, r1, r0, r1, ARITH_ADD);
> - } else {
> - tcg_out_arith(s, r1, addrlo, r1, ARITH_ADD);
> + return r0;
> }
> -
> - return r1;
> + return addrlo;
> }
> #endif /* CONFIG_SOFTMMU */
>
> -static void tcg_out_qemu_ld_direct(TCGContext *s, int addr, int datalo,
> - int datahi, int sizeop)
> -{
> +static const int qemu_ld_opc[8] = {
> #ifdef TARGET_WORDS_BIGENDIAN
> - const int bigendian = 1;
> + LDUB, LDUH, LDUW, LDX, LDSB, LDSH, LDSW, LDX
> #else
> - const int bigendian = 0;
> + LDUB, LDUH_LE, LDUW_LE, LDX_LE, LDSB, LDSH_LE, LDSW_LE, LDX_LE
> #endif
> - switch (sizeop) {
> - case 0:
> - /* ldub [addr], datalo */
> - tcg_out_ldst(s, datalo, addr, 0, LDUB);
> - break;
> - case 0 | 4:
> - /* ldsb [addr], datalo */
> - tcg_out_ldst(s, datalo, addr, 0, LDSB);
> - break;
> - case 1:
> - if (bigendian) {
> - /* lduh [addr], datalo */
> - tcg_out_ldst(s, datalo, addr, 0, LDUH);
> - } else {
> - /* lduha [addr] ASI_PRIMARY_LITTLE, datalo */
> - tcg_out_ldst_asi(s, datalo, addr, 0, LDUHA, ASI_PRIMARY_LITTLE);
> - }
> - break;
> - case 1 | 4:
> - if (bigendian) {
> - /* ldsh [addr], datalo */
> - tcg_out_ldst(s, datalo, addr, 0, LDSH);
> - } else {
> - /* ldsha [addr] ASI_PRIMARY_LITTLE, datalo */
> - tcg_out_ldst_asi(s, datalo, addr, 0, LDSHA, ASI_PRIMARY_LITTLE);
> - }
> - break;
> - case 2:
> - if (bigendian) {
> - /* lduw [addr], datalo */
> - tcg_out_ldst(s, datalo, addr, 0, LDUW);
> - } else {
> - /* lduwa [addr] ASI_PRIMARY_LITTLE, datalo */
> - tcg_out_ldst_asi(s, datalo, addr, 0, LDUWA, ASI_PRIMARY_LITTLE);
> - }
> - break;
> - case 2 | 4:
> - if (bigendian) {
> - /* ldsw [addr], datalo */
> - tcg_out_ldst(s, datalo, addr, 0, LDSW);
> - } else {
> - /* ldswa [addr] ASI_PRIMARY_LITTLE, datalo */
> - tcg_out_ldst_asi(s, datalo, addr, 0, LDSWA, ASI_PRIMARY_LITTLE);
> - }
> - break;
> - case 3:
> - if (TCG_TARGET_REG_BITS == 64) {
> - if (bigendian) {
> - /* ldx [addr], datalo */
> - tcg_out_ldst(s, datalo, addr, 0, LDX);
> - } else {
> - /* ldxa [addr] ASI_PRIMARY_LITTLE, datalo */
> - tcg_out_ldst_asi(s, datalo, addr, 0, LDXA,
> ASI_PRIMARY_LITTLE);
> - }
> - } else {
> - if (bigendian) {
> - tcg_out_ldst(s, datahi, addr, 0, LDUW);
> - tcg_out_ldst(s, datalo, addr, 4, LDUW);
> - } else {
> - tcg_out_ldst_asi(s, datalo, addr, 0, LDUWA,
> ASI_PRIMARY_LITTLE);
> - tcg_out_ldst_asi(s, datahi, addr, 4, LDUWA,
> ASI_PRIMARY_LITTLE);
> - }
> - }
> - break;
> - default:
> - tcg_abort();
> - }
> -}
> +};
>
> -static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, int opc)
> +static const int qemu_st_opc[4] = {
> +#ifdef TARGET_WORDS_BIGENDIAN
> + STB, STH, STW, STX
> +#else
> + STB, STH_LE, STW_LE, STX_LE
> +#endif
> +};
> +
> +static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, int sizeop)
> {
> int addrlo_idx = 1, datalo, datahi, addr_reg;
> #if defined(CONFIG_SOFTMMU)
> @@ -915,7 +828,7 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg
> *args, int opc)
> #endif
>
> datahi = datalo = args[0];
> - if (TCG_TARGET_REG_BITS == 32 && opc == 3) {
> + if (TCG_TARGET_REG_BITS == 32 && sizeop == 3) {
> datahi = args[1];
> addrlo_idx = 2;
> }
> @@ -923,27 +836,59 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg
> *args, int opc)
> #if defined(CONFIG_SOFTMMU)
> memi_idx = addrlo_idx + 1 + (TARGET_LONG_BITS > TCG_TARGET_REG_BITS);
> memi = args[memi_idx];
> - s_bits = opc & 3;
> + s_bits = sizeop & 3;
>
> addr_reg = tcg_out_tlb_load(s, addrlo_idx, memi, s_bits, args,
> - label_ptr, offsetof(CPUTLBEntry, addr_read));
> + offsetof(CPUTLBEntry, addr_read));
>
> - /* TLB Hit. */
> - tcg_out_qemu_ld_direct(s, addr_reg, datalo, datahi, opc);
> + if (TCG_TARGET_REG_BITS == 32 && sizeop == 3) {
> + int reg64;
>
> - /* b,pt,n label1 */
> - label_ptr[1] = (uint32_t *)s->code_ptr;
> - tcg_out32(s, (INSN_OP(0) | INSN_COND(COND_A, 0) | INSN_OP2(0x1)
> - | (1 << 29) | (1 << 19)));
> + /* bne,pn %[xi]cc, label0 */
> + label_ptr[0] = (uint32_t *)s->code_ptr;
> + tcg_out32(s, (INSN_OP(0) | INSN_COND(COND_NE, 0) | INSN_OP2(0x1)
> + | ((TARGET_LONG_BITS == 64) << 21)));
> +
> + /* TLB Hit. */
> + /* Load all 64-bits into an O/G register. */
> + reg64 = (datalo < 16 ? datalo : TCG_REG_O0);
> + tcg_out_ldst_rr(s, reg64, addr_reg, TCG_REG_O1, qemu_ld_opc[sizeop]);
> +
> + /* Move the two 32-bit pieces into the destination registers. */
> + tcg_out_arithi(s, datahi, reg64, 32, SHIFT_SRLX);
> + if (reg64 != datalo) {
> + tcg_out_mov(s, TCG_TYPE_I32, datalo, reg64);
> + }
> +
> + /* b,pt,n label1 */
> + label_ptr[1] = (uint32_t *)s->code_ptr;
> + tcg_out32(s, (INSN_OP(0) | INSN_COND(COND_A, 0) | INSN_OP2(0x1)
> + | (1 << 29) | (1 << 19)));
> + } else {
> + /* The fast path is exactly one insn. Thus we can perform the
> + entire TLB Hit in the (annulled) delay slot of the branch
> + over the TLB Miss case. */
> +
> + /* beq,a,pt %[xi]cc, label0 */
> + label_ptr[0] = NULL;
> + label_ptr[1] = (uint32_t *)s->code_ptr;
> + tcg_out32(s, (INSN_OP(0) | INSN_COND(COND_E, 0) | INSN_OP2(0x1)
> + | ((TARGET_LONG_BITS == 64) << 21)
> + | (1 << 29) | (1 << 19)));
> + /* delay slot */
> + tcg_out_ldst_rr(s, datalo, addr_reg, TCG_REG_O1,
> qemu_ld_opc[sizeop]);
> + }
>
> /* TLB Miss. */
>
> - *label_ptr[0] |= INSN_OFF19((unsigned long)s->code_ptr -
> - (unsigned long)label_ptr[0]);
> - n = 0;
> -#ifdef CONFIG_TCG_PASS_AREG0
> - tcg_out_mov(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[n++], TCG_AREG0);
> -#endif
> + if (label_ptr[0]) {
> + *label_ptr[0] |= INSN_OFF19((unsigned long)s->code_ptr -
> + (unsigned long)label_ptr[0]);
> + }
> + n = ARG_OFFSET;
> + if (ARG_OFFSET) {
> + tcg_out_mov(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[0], TCG_AREG0);
WARNING: suspect code indent for conditional statements (4, 7)
#395: FILE: tcg/sparc/tcg-target.c:889:
+ if (ARG_OFFSET) {
+ tcg_out_mov(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[0], TCG_AREG0);
WARNING: suspect code indent for conditional statements (4, 9)
#542: FILE: tcg/sparc/tcg-target.c:1013:
+ if (ARG_OFFSET) {
+ tcg_out_mov(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[0], TCG_AREG0);
total: 0 errors, 2 warnings, 525 lines checked
> + }
> if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) {
> tcg_out_mov(s, TCG_TYPE_REG, tcg_target_call_iarg_regs[n++],
> args[addrlo_idx + 1]);
> @@ -971,7 +916,7 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg
> *args, int opc)
>
> n = tcg_target_call_oarg_regs[0];
> /* datalo = sign_extend(arg0) */
> - switch(opc) {
> + switch (sizeop) {
> case 0 | 4:
> /* Recall that SRA sign extends from bit 31 through bit 63. */
> tcg_out_arithi(s, datalo, n, 24, SHIFT_SLL);
> @@ -1008,75 +953,31 @@ static void tcg_out_qemu_ld(TCGContext *s, const
> TCGArg *args, int opc)
> tcg_out_arithi(s, TCG_REG_I5, addr_reg, 0, SHIFT_SRL);
> addr_reg = TCG_REG_I5;
> }
> - tcg_out_qemu_ld_direct(s, addr_reg, datalo, datahi, opc);
> -#endif /* CONFIG_SOFTMMU */
> -}
> + if (TCG_TARGET_REG_BITS == 32 && sizeop == 3) {
> + int reg64 = (datalo < 16 ? datalo : TCG_REG_O0);
>
> -static void tcg_out_qemu_st_direct(TCGContext *s, int addr, int datalo,
> - int datahi, int sizeop)
> -{
> -#ifdef TARGET_WORDS_BIGENDIAN
> - const int bigendian = 1;
> -#else
> - const int bigendian = 0;
> -#endif
> - switch (sizeop) {
> - case 0:
> - /* stb datalo, [addr] */
> - tcg_out_ldst(s, datalo, addr, 0, STB);
> - break;
> - case 1:
> - if (bigendian) {
> - /* sth datalo, [addr] */
> - tcg_out_ldst(s, datalo, addr, 0, STH);
> - } else {
> - /* stha datalo, [addr] ASI_PRIMARY_LITTLE */
> - tcg_out_ldst_asi(s, datalo, addr, 0, STHA, ASI_PRIMARY_LITTLE);
> - }
> - break;
> - case 2:
> - if (bigendian) {
> - /* stw datalo, [addr] */
> - tcg_out_ldst(s, datalo, addr, 0, STW);
> - } else {
> - /* stwa datalo, [addr] ASI_PRIMARY_LITTLE */
> - tcg_out_ldst_asi(s, datalo, addr, 0, STWA, ASI_PRIMARY_LITTLE);
> - }
> - break;
> - case 3:
> - if (TCG_TARGET_REG_BITS == 64) {
> - if (bigendian) {
> - /* stx datalo, [addr] */
> - tcg_out_ldst(s, datalo, addr, 0, STX);
> - } else {
> - /* stxa datalo, [addr] ASI_PRIMARY_LITTLE */
> - tcg_out_ldst_asi(s, datalo, addr, 0, STXA,
> ASI_PRIMARY_LITTLE);
> - }
> - } else {
> - if (bigendian) {
> - tcg_out_ldst(s, datahi, addr, 0, STW);
> - tcg_out_ldst(s, datalo, addr, 4, STW);
> - } else {
> - tcg_out_ldst_asi(s, datalo, addr, 0, STWA,
> ASI_PRIMARY_LITTLE);
> - tcg_out_ldst_asi(s, datahi, addr, 4, STWA,
> ASI_PRIMARY_LITTLE);
> - }
> + tcg_out_ldst_rr(s, reg64, addr_reg, TCG_REG_G0, qemu_ld_opc[sizeop]);
> +
> + tcg_out_arithi(s, datahi, reg64, 32, SHIFT_SRLX);
> + if (reg64 != datalo) {
> + tcg_out_mov(s, TCG_TYPE_I32, datalo, reg64);
> }
> - break;
> - default:
> - tcg_abort();
> + } else {
> + tcg_out_ldst_rr(s, datalo, addr_reg, TCG_REG_G0,
> qemu_ld_opc[sizeop]);
> }
> +#endif /* CONFIG_SOFTMMU */
> }
>
> -static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, int opc)
> +static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, int sizeop)
> {
> int addrlo_idx = 1, datalo, datahi, addr_reg;
> #if defined(CONFIG_SOFTMMU)
> int memi_idx, memi, n;
> - uint32_t *label_ptr[2];
> + uint32_t *label_ptr;
> #endif
>
> datahi = datalo = args[0];
> - if (TCG_TARGET_REG_BITS == 32 && opc == 3) {
> + if (TCG_TARGET_REG_BITS == 32 && sizeop == 3) {
> datahi = args[1];
> addrlo_idx = 2;
> }
> @@ -1085,33 +986,40 @@ static void tcg_out_qemu_st(TCGContext *s, const
> TCGArg *args, int opc)
> memi_idx = addrlo_idx + 1 + (TARGET_LONG_BITS > TCG_TARGET_REG_BITS);
> memi = args[memi_idx];
>
> - addr_reg = tcg_out_tlb_load(s, addrlo_idx, memi, opc, args,
> - label_ptr, offsetof(CPUTLBEntry,
> addr_write));
> + addr_reg = tcg_out_tlb_load(s, addrlo_idx, memi, sizeop, args,
> + offsetof(CPUTLBEntry, addr_write));
>
> - /* TLB Hit. */
> - tcg_out_qemu_st_direct(s, addr_reg, datalo, datahi, opc);
> + if (TCG_TARGET_REG_BITS == 32 && sizeop == 3) {
> + /* Reconstruct the full 64-bit value in %g1, using %o2 as temp. */
> + /* ??? Redefine the temps from %i4/%i5 so that we have a o/g temp. */
> + tcg_out_arithi(s, TCG_REG_G1, datalo, 0, SHIFT_SRL);
> + tcg_out_arithi(s, TCG_REG_O2, datahi, 32, SHIFT_SLLX);
> + tcg_out_arith(s, TCG_REG_G1, TCG_REG_G1, TCG_REG_O2, ARITH_OR);
> + datalo = TCG_REG_G1;
> + }
>
> - /* b,pt,n label1 */
> - label_ptr[1] = (uint32_t *)s->code_ptr;
> - tcg_out32(s, (INSN_OP(0) | INSN_COND(COND_A, 0) | INSN_OP2(0x1)
> + /* The fast path is exactly one insn. Thus we can perform the entire
> + TLB Hit in the (annulled) delay slot of the branch over TLB Miss. */
> + /* beq,a,pt %[xi]cc, label0 */
> + label_ptr = (uint32_t *)s->code_ptr;
> + tcg_out32(s, (INSN_OP(0) | INSN_COND(COND_E, 0) | INSN_OP2(0x1)
> + | ((TARGET_LONG_BITS == 64) << 21)
> | (1 << 29) | (1 << 19)));
> + /* delay slot */
> + tcg_out_ldst_rr(s, datalo, addr_reg, TCG_REG_O1, qemu_st_opc[sizeop]);
>
> /* TLB Miss. */
> -
> - *label_ptr[0] |= INSN_OFF19((unsigned long)s->code_ptr -
> - (unsigned long)label_ptr[0]);
> -
> - n = 0;
> -#ifdef CONFIG_TCG_PASS_AREG0
> - tcg_out_mov(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[n++], TCG_AREG0);
> -#endif
> + n = ARG_OFFSET;
> + if (ARG_OFFSET) {
> + tcg_out_mov(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[0],
> TCG_AREG0);
> + }
> if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) {
> tcg_out_mov(s, TCG_TYPE_REG, tcg_target_call_iarg_regs[n++],
> args[addrlo_idx + 1]);
> }
> tcg_out_mov(s, TCG_TYPE_REG, tcg_target_call_iarg_regs[n++],
> args[addrlo_idx]);
> - if (TCG_TARGET_REG_BITS == 32 && opc == 3) {
> + if (TCG_TARGET_REG_BITS == 32 && sizeop == 3) {
> tcg_out_mov(s, TCG_TYPE_REG, tcg_target_call_iarg_regs[n++], datahi);
> }
> tcg_out_mov(s, TCG_TYPE_REG, tcg_target_call_iarg_regs[n++], datalo);
> @@ -1123,7 +1031,7 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg
> *args, int opc)
> sizeof(long));
>
> /* qemu_st_helper[s_bits](arg0, arg1, arg2) */
> - tcg_out32(s, CALL | ((((tcg_target_ulong)qemu_st_helpers[opc]
> + tcg_out32(s, CALL | ((((tcg_target_ulong)qemu_st_helpers[sizeop]
> - (tcg_target_ulong)s->code_ptr) >> 2)
> & 0x3fffffff));
> /* delay slot */
> @@ -1134,15 +1042,23 @@ static void tcg_out_qemu_st(TCGContext *s, const
> TCGArg *args, int opc)
> TCG_TARGET_CALL_STACK_OFFSET - TCG_STATIC_CALL_ARGS_SIZE -
> sizeof(long));
>
> - *label_ptr[1] |= INSN_OFF19((unsigned long)s->code_ptr -
> - (unsigned long)label_ptr[1]);
> + *label_ptr |= INSN_OFF19((unsigned long)s->code_ptr -
> + (unsigned long)label_ptr);
> #else
> addr_reg = args[addrlo_idx];
> if (TCG_TARGET_REG_BITS == 64 && TARGET_LONG_BITS == 32) {
> tcg_out_arithi(s, TCG_REG_I5, addr_reg, 0, SHIFT_SRL);
> addr_reg = TCG_REG_I5;
> }
> - tcg_out_qemu_st_direct(s, addr_reg, datalo, datahi, opc);
> + if (TCG_TARGET_REG_BITS == 32 && sizeop == 3) {
> + /* Reconstruct the full 64-bit value in %g1, using %o2 as temp. */
> + /* ??? Redefine the temps from %i4/%i5 so that we have a o/g temp. */
> + tcg_out_arithi(s, TCG_REG_G1, datalo, 0, SHIFT_SRL);
> + tcg_out_arithi(s, TCG_REG_O2, datahi, 32, SHIFT_SLLX);
> + tcg_out_arith(s, TCG_REG_G1, TCG_REG_G1, TCG_REG_O2, ARITH_OR);
> + datalo = TCG_REG_G1;
> + }
> + tcg_out_ldst_rr(s, datalo, addr_reg, TCG_REG_G0, qemu_st_opc[sizeop]);
> #endif /* CONFIG_SOFTMMU */
> }
>
> --
> 1.7.7.6
>
- [Qemu-devel] [PATCH 02/14] tcg-sparc: Fix ADDX opcode., (continued)
- [Qemu-devel] [PATCH 02/14] tcg-sparc: Fix ADDX opcode., Richard Henderson, 2012/03/27
- [Qemu-devel] [PATCH 03/14] tcg-sparc: Assume v9 cpu always, i.e. force v8plus in 32-bit mode., Richard Henderson, 2012/03/27
- [Qemu-devel] [PATCH 06/14] tcg-sparc: Support GUEST_BASE., Richard Henderson, 2012/03/27
- [Qemu-devel] [PATCH 04/14] tcg-sparc: Fix qemu_ld/st to handle 32-bit host., Richard Henderson, 2012/03/27
- [Qemu-devel] [PATCH 07/14] Avoid declaring the env variable at all if CONFIG_TCG_PASS_AREG0., Richard Henderson, 2012/03/27
- [Qemu-devel] [PATCH 09/14] tcg-sparc: Change AREG0 in generated code to %i0., Richard Henderson, 2012/03/27
- [Qemu-devel] [PATCH 05/14] tcg-sparc: Simplify qemu_ld/st direct memory paths., Richard Henderson, 2012/03/27
- Re: [Qemu-devel] [PATCH 05/14] tcg-sparc: Simplify qemu_ld/st direct memory paths.,
Blue Swirl <=
- [Qemu-devel] [PATCH 08/14] tcg-sparc: Do not use a global register for AREG0., Richard Henderson, 2012/03/27
- [Qemu-devel] [PATCH 10/14] tcg-sparc: Clean up cruft stemming from attempts to use global registers., Richard Henderson, 2012/03/27
- [Qemu-devel] [PATCH 11/14] tcg-sparc: Mask shift immediates to avoid illegal insns., Richard Henderson, 2012/03/27
- [Qemu-devel] [PATCH 14/14] tcg-sparc: Fix and enable direct TB chaining., Richard Henderson, 2012/03/27
- [Qemu-devel] [PATCH 12/14] tcg-sparc: Use defines for temporaries., Richard Henderson, 2012/03/27
- [Qemu-devel] [PATCH 13/14] tcg-sparc: Add %g/%o registers to alloc_order, Richard Henderson, 2012/03/27