[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
Re: [Qemu-devel] [PATCH v3 16/20] tcg-arm: Fix local stack frame
From: |
Aurelien Jarno |
Subject: |
Re: [Qemu-devel] [PATCH v3 16/20] tcg-arm: Fix local stack frame |
Date: |
Fri, 29 Mar 2013 17:50:30 +0100 |
User-agent: |
Mutt/1.5.21 (2010-09-15) |
On Thu, Mar 28, 2013 at 08:32:57AM -0700, Richard Henderson wrote:
> We were not allocating TCG_STATIC_CALL_ARGS_SIZE, so this meant that
> any helper with more than 4 arguments would clobber the saved regs.
> Realizing that we're supposed to have this memory pre-allocated means
> we can clean up the tcg_out_arg functions, which were trying to do
> more stack allocation.
>
> Allocate stack memory for the TCG temporaries while we're at it.
>
> Signed-off-by: Richard Henderson <address@hidden>
> ---
> tcg/arm/tcg-target.c | 120
> ++++++++++++++++++---------------------------------
> 1 file changed, 43 insertions(+), 77 deletions(-)
This fixes a crash booting a mips64 guest, but unfortunately there are
still more issues to fix.
Reviewed-by: Aurelien Jarno <address@hidden>
> diff --git a/tcg/arm/tcg-target.c b/tcg/arm/tcg-target.c
> index fb8c96d..70f63cb 100644
> --- a/tcg/arm/tcg-target.c
> +++ b/tcg/arm/tcg-target.c
> @@ -1074,65 +1074,35 @@ static const void * const qemu_st_helpers[4] = {
> * argreg is where we want to put this argument, arg is the argument itself.
> * Return value is the updated argreg ready for the next call.
> * Note that argreg 0..3 is real registers, 4+ on stack.
> - * When we reach the first stacked argument, we allocate space for it
> - * and the following stacked arguments using "str r8, [sp, #-0x10]!".
> - * Following arguments are filled in with "str r8, [sp, #0xNN]".
> - * For more than 4 stacked arguments we'd need to know how much
> - * space to allocate when we pushed the first stacked argument.
> - * We don't need this, so don't implement it (and will assert if you try it.)
> *
> * We provide routines for arguments which are: immediate, 32 bit
> * value in register, 16 and 8 bit values in register (which must be zero
> * extended before use) and 64 bit value in a lo:hi register pair.
> */
> -#define DEFINE_TCG_OUT_ARG(NAME, ARGPARAM) \
> - static TCGReg NAME(TCGContext *s, TCGReg argreg, ARGPARAM) \
> - { \
> - if (argreg < 4) { \
> - TCG_OUT_ARG_GET_ARG(argreg); \
> - } else if (argreg == 4) { \
> - TCG_OUT_ARG_GET_ARG(TCG_REG_TMP); \
> - tcg_out32(s, (COND_AL << 28) | 0x052d0010 | (TCG_REG_TMP <<
> 12)); \
> - } else { \
> - assert(argreg < 8); \
> - TCG_OUT_ARG_GET_ARG(TCG_REG_TMP); \
> - tcg_out_st32_12(s, COND_AL, TCG_REG_TMP, TCG_REG_CALL_STACK, \
> - (argreg - 4) * 4); \
> - } \
> - return argreg + 1; \
> - }
> -
> -#define TCG_OUT_ARG_GET_ARG(A) tcg_out_dat_imm(s, COND_AL, ARITH_MOV, A, 0,
> arg)
> -DEFINE_TCG_OUT_ARG(tcg_out_arg_imm32, uint32_t arg)
> -#undef TCG_OUT_ARG_GET_ARG
> -#define TCG_OUT_ARG_GET_ARG(A) tcg_out_ext8u(s, COND_AL, A, arg)
> -DEFINE_TCG_OUT_ARG(tcg_out_arg_reg8, TCGReg arg)
> -#undef TCG_OUT_ARG_GET_ARG
> -#define TCG_OUT_ARG_GET_ARG(A) tcg_out_ext16u(s, COND_AL, A, arg)
> -DEFINE_TCG_OUT_ARG(tcg_out_arg_reg16, TCGReg arg)
> -#undef TCG_OUT_ARG_GET_ARG
> -
> -/* We don't use the macro for this one to avoid an unnecessary reg-reg
> - * move when storing to the stack.
> - */
> -static TCGReg tcg_out_arg_reg32(TCGContext *s, TCGReg argreg, TCGReg arg)
> -{
> - if (argreg < 4) {
> - tcg_out_mov_reg(s, COND_AL, argreg, arg);
> - } else if (argreg == 4) {
> - /* str arg, [sp, #-0x10]! */
> - tcg_out32(s, (COND_AL << 28) | 0x052d0010 | (arg << 12));
> - } else {
> - assert(argreg < 8);
> - /* str arg, [sp, #0xNN] */
> - tcg_out32(s, (COND_AL << 28) | 0x058d0000 |
> - (arg << 12) | (argreg - 4) * 4);
> - }
> - return argreg + 1;
> -}
> -
> -static inline TCGReg tcg_out_arg_reg64(TCGContext *s, TCGReg argreg,
> - TCGReg arglo, TCGReg arghi)
> +#define DEFINE_TCG_OUT_ARG(NAME, ARGTYPE, MOV_ARG, EXT_ARG) \
> +static TCGReg NAME(TCGContext *s, TCGReg argreg, ARGTYPE arg) \
> +{ \
> + if (argreg < 4) { \
> + MOV_ARG(s, COND_AL, argreg, arg); \
> + } else { \
> + int ofs = (argreg - 4) * 4; \
> + EXT_ARG; \
> + assert(ofs + 4 <= TCG_STATIC_CALL_ARGS_SIZE); \
> + tcg_out_st32_12(s, COND_AL, arg, TCG_REG_CALL_STACK, ofs); \
> + } \
> + return argreg + 1; \
> +}
> +
> +DEFINE_TCG_OUT_ARG(tcg_out_arg_imm32, uint32_t, tcg_out_movi32,
> + (tcg_out_movi32(s, COND_AL, TCG_REG_TMP, arg), arg = TCG_REG_TMP))
> +DEFINE_TCG_OUT_ARG(tcg_out_arg_reg8, TCGReg, tcg_out_ext8u,
> + (tcg_out_ext8u(s, COND_AL, TCG_REG_TMP, arg), arg = TCG_REG_TMP))
> +DEFINE_TCG_OUT_ARG(tcg_out_arg_reg16, TCGReg, tcg_out_ext16u,
> + (tcg_out_ext16u(s, COND_AL, TCG_REG_TMP, arg), arg = TCG_REG_TMP))
> +DEFINE_TCG_OUT_ARG(tcg_out_arg_reg32, TCGReg, tcg_out_mov_reg, )
> +
> +static TCGReg tcg_out_arg_reg64(TCGContext *s, TCGReg argreg,
> + TCGReg arglo, TCGReg arghi)
> {
> /* 64 bit arguments must go in even/odd register pairs
> * and in 8-aligned stack slots.
> @@ -1144,16 +1114,7 @@ static inline TCGReg tcg_out_arg_reg64(TCGContext *s,
> TCGReg argreg,
> argreg = tcg_out_arg_reg32(s, argreg, arghi);
> return argreg;
> }
> -
> -static inline void tcg_out_arg_stacktidy(TCGContext *s, TCGReg argreg)
> -{
> - /* Output any necessary post-call cleanup of the stack */
> - if (argreg > 4) {
> - tcg_out_dat_imm(s, COND_AL, ARITH_ADD, TCG_REG_R13, TCG_REG_R13,
> 0x10);
> - }
> -}
> -
> -#endif
> +#endif /* SOFTMMU */
>
> #define TLB_SHIFT (CPU_TLB_ENTRY_BITS + CPU_TLB_BITS)
>
> @@ -1284,7 +1245,6 @@ static inline void tcg_out_qemu_ld(TCGContext *s, const
> TCGArg *args, int opc)
> #endif
> argreg = tcg_out_arg_imm32(s, argreg, mem_index);
> tcg_out_call(s, (tcg_target_long) qemu_ld_helpers[s_bits]);
> - tcg_out_arg_stacktidy(s, argreg);
>
> switch (opc) {
> case 0 | 4:
> @@ -1502,7 +1462,6 @@ static inline void tcg_out_qemu_st(TCGContext *s, const
> TCGArg *args, int opc)
>
> argreg = tcg_out_arg_imm32(s, argreg, mem_index);
> tcg_out_call(s, (tcg_target_long) qemu_st_helpers[s_bits]);
> - tcg_out_arg_stacktidy(s, argreg);
>
> reloc_pc24(label_ptr, (tcg_target_long)s->code_ptr);
> #else /* !CONFIG_SOFTMMU */
> @@ -1560,6 +1519,7 @@ static inline void tcg_out_qemu_st(TCGContext *s, const
> TCGArg *args, int opc)
> }
>
> static uint32_t tb_pop_ret;
> +static uint32_t tb_frame_size;
>
> static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
> const TCGArg *args, const int *const_args)
> @@ -1570,14 +1530,16 @@ static inline void tcg_out_op(TCGContext *s,
> TCGOpcode opc,
> switch (opc) {
> case INDEX_op_exit_tb:
> a0 = args[0];
> + tcg_out_dat_rI(s, COND_AL, ARITH_ADD, TCG_REG_CALL_STACK,
> + TCG_REG_CALL_STACK, tb_frame_size, 1);
> if (use_armv7_instructions || check_fit_imm(a0)) {
> - tcg_out_movi32(s, COND_AL, TCG_REG_R0, args[0]);
> + tcg_out_movi32(s, COND_AL, TCG_REG_R0, a0);
> tcg_out32(s, tb_pop_ret);
> } else {
> /* pc is always current address + 8, so 0 reads the word. */
> tcg_out_ld32_12(s, COND_AL, TCG_REG_R0, TCG_REG_PC, 0);
> tcg_out32(s, tb_pop_ret);
> - tcg_out32(s, args[0]);
> + tcg_out32(s, a0);
> }
> break;
> case INDEX_op_goto_tb:
> @@ -2002,8 +1964,6 @@ static void tcg_target_init(TCGContext *s)
> tcg_regset_set_reg(s->reserved_regs, TCG_REG_PC);
>
> tcg_add_target_add_op_defs(arm_op_defs);
> - tcg_set_frame(s, TCG_AREG0, offsetof(CPUArchState, temp_buf),
> - CPU_TEMP_BUF_NLONGS * sizeof(long));
> }
>
> static inline void tcg_out_ld(TCGContext *s, TCGType type, TCGReg arg,
> @@ -2032,17 +1992,23 @@ static inline void tcg_out_movi(TCGContext *s,
> TCGType type,
>
> static void tcg_target_qemu_prologue(TCGContext *s)
> {
> - /* Calling convention requires us to save r4-r11 and lr;
> - * save also r12 to maintain stack 8-alignment.
> - */
> -
> + /* Calling convention requires us to save r4-r11 and lr; save also r12
> + to maintain stack 8-alignment. */
> /* stmdb sp!, { r4 - r12, lr } */
> tcg_out32(s, (COND_AL << 28) | 0x092d5ff0);
>
> + /* ldmia sp!, { r4 - r12, pc } */
> + tb_pop_ret = (COND_AL << 28) | 0x08bd9ff0;
> +
> + /* Allocate the local stack frame. */
> + tb_frame_size = TCG_STATIC_CALL_ARGS_SIZE;
> + tb_frame_size += CPU_TEMP_BUF_NLONGS * sizeof(long);
> + tcg_out_dat_rI(s, COND_AL, ARITH_SUB, TCG_REG_CALL_STACK,
> + TCG_REG_CALL_STACK, tb_frame_size, 1);
> + tcg_set_frame(s, TCG_REG_CALL_STACK, TCG_STATIC_CALL_ARGS_SIZE,
> + CPU_TEMP_BUF_NLONGS * sizeof(long));
> +
> tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]);
>
> tcg_out_bx(s, COND_AL, tcg_target_call_iarg_regs[1]);
> -
> - /* ldmia sp!, { r4 - r12, pc } */
> - tb_pop_ret = (COND_AL << 28) | 0x08bd9ff0;
> }
> --
> 1.8.1.4
>
>
>
--
Aurelien Jarno GPG: 1024D/F1BCDB73
address@hidden http://www.aurel32.net
- [Qemu-devel] [PATCH v3 11/20] tcg-arm: Use R12 for the tcg temporary, (continued)
[Qemu-devel] [PATCH v3 15/20] tcg-arm: Cleanup most primitive load store subroutines, Richard Henderson, 2013/03/28
[Qemu-devel] [PATCH v3 16/20] tcg-arm: Fix local stack frame, Richard Henderson, 2013/03/28
- Re: [Qemu-devel] [PATCH v3 16/20] tcg-arm: Fix local stack frame,
Aurelien Jarno <=
[Qemu-devel] [PATCH v3 17/20] tcg-arm: Split out tcg_out_tlb_read, Richard Henderson, 2013/03/28
[Qemu-devel] [PATCH v3 18/20] tcg-arm: Improve scheduling of tcg_out_tlb_read, Richard Henderson, 2013/03/28
[Qemu-devel] [PATCH v3 20/20] tcg-arm: Convert to CONFIG_QEMU_LDST_OPTIMIZATION, Richard Henderson, 2013/03/28
[Qemu-devel] [PATCH v3 19/20] tcg-arm: Use movi32 + blx for calls on v7, Richard Henderson, 2013/03/28