qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Qemu-devel] [PATCH v2 1/3] tcg/arm: fix TLB access in qemu-ld/st op


From: Laurent Desnogues
Subject: Re: [Qemu-devel] [PATCH v2 1/3] tcg/arm: fix TLB access in qemu-ld/st ops
Date: Wed, 31 Oct 2012 14:54:38 +0100

On Tue, Oct 30, 2012 at 1:18 AM,  <address@hidden> wrote:
> From: Aurelien Jarno <address@hidden>
>
> The TCG arm backend considers likely that the offset to the TLB
> entries does not exceed 12 bits for mem_index = 0. In practice this is
> not true for at least the MIPS target.
>
> The current patch fixes that by loading the bits 23-12 with a separate
> instruction, and using loads with address writeback, independently of
> the value of mem_idx. In total this allow a 24-bit offset, which is a
> lot more than needed.
>
> Cc: Andrzej Zaborowski <address@hidden>
> Cc: Peter Maydell <address@hidden>
> Cc: address@hidden
> Signed-off-by: Aurelien Jarno <address@hidden>
> ---
>  tcg/arm/tcg-target.c |   77 
> +++++++++++++++++++++++++++-----------------------
>  1 file changed, 41 insertions(+), 36 deletions(-)
>
> diff --git a/tcg/arm/tcg-target.c b/tcg/arm/tcg-target.c
> index e790bf0..03b1576 100644
> --- a/tcg/arm/tcg-target.c
> +++ b/tcg/arm/tcg-target.c
> @@ -639,6 +639,22 @@ static inline void tcg_out_ld32_12(TCGContext *s, int 
> cond,
>                          (rn << 16) | (rd << 12) | ((-im) & 0xfff));
>  }
>
> +/* Offset pre-increment with base writeback.  */
> +static inline void tcg_out_ld32_12wb(TCGContext *s, int cond,
> +                                     int rd, int rn, tcg_target_long im)
> +{
> +    /* ldr with writeback and both register equals is UNPREDICTABLE */
> +    assert(rd != rn);
> +
> +    if (im >= 0) {
> +        tcg_out32(s, (cond << 28) | 0x05b00000 |
> +                        (rn << 16) | (rd << 12) | (im & 0xfff));
> +    } else {
> +        tcg_out32(s, (cond << 28) | 0x05300000 |
> +                        (rn << 16) | (rd << 12) | ((-im) & 0xfff));
> +    }
> +}
> +
>  static inline void tcg_out_st32_12(TCGContext *s, int cond,
>                  int rd, int rn, tcg_target_long im)
>  {
> @@ -1071,7 +1087,7 @@ static inline void tcg_out_qemu_ld(TCGContext *s, const 
> TCGArg *args, int opc)
>  {
>      int addr_reg, data_reg, data_reg2, bswap;
>  #ifdef CONFIG_SOFTMMU
> -    int mem_index, s_bits;
> +    int mem_index, s_bits, tlb_offset;
>      TCGReg argreg;
>  # if TARGET_LONG_BITS == 64
>      int addr_reg2;
> @@ -1111,19 +1127,15 @@ static inline void tcg_out_qemu_ld(TCGContext *s, 
> const TCGArg *args, int opc)
>                      TCG_REG_R0, TCG_REG_R8, CPU_TLB_SIZE - 1);
>      tcg_out_dat_reg(s, COND_AL, ARITH_ADD, TCG_REG_R0, TCG_AREG0,
>                      TCG_REG_R0, SHIFT_IMM_LSL(CPU_TLB_ENTRY_BITS));
> -    /* In the
> -     *  ldr r1 [r0, #(offsetof(CPUArchState, 
> tlb_table[mem_index][0].addr_read))]
> -     * below, the offset is likely to exceed 12 bits if mem_index != 0 and
> -     * not exceed otherwise, so use an
> -     *  add r0, r0, #(mem_index * sizeof *CPUArchState.tlb_table)
> -     * before.
> -     */
> -    if (mem_index)
> +    /* We assume that the offset is contained within 24 bits.  */
> +    tlb_offset = offsetof(CPUArchState, tlb_table[mem_index][0].addr_read);
> +    assert(tlb_offset & ~0xffffff == 0);
> +    if (tlb_offset > 0xfff) {
>          tcg_out_dat_imm(s, COND_AL, ARITH_ADD, TCG_REG_R0, TCG_REG_R0,
> -                        (mem_index << (TLB_SHIFT & 1)) |
> -                        ((16 - (TLB_SHIFT >> 1)) << 8));
> -    tcg_out_ld32_12(s, COND_AL, TCG_REG_R1, TCG_REG_R0,
> -                    offsetof(CPUArchState, tlb_table[0][0].addr_read));
> +                        0xa00 | (tlb_offset >> 12));

Isn't it 20 bits rather than 24 bits since the immediate is 8-bit right-rotated
by 20?


Laurent

> +        tlb_offset &= 0xfff;
> +    }
> +    tcg_out_ld32_12wb(s, COND_AL, TCG_REG_R1, TCG_REG_R0, tlb_offset);
>      tcg_out_dat_reg(s, COND_AL, ARITH_CMP, 0, TCG_REG_R1,
>                      TCG_REG_R8, SHIFT_IMM_LSL(TARGET_PAGE_BITS));
>      /* Check alignment.  */
> @@ -1131,15 +1143,14 @@ static inline void tcg_out_qemu_ld(TCGContext *s, 
> const TCGArg *args, int opc)
>          tcg_out_dat_imm(s, COND_EQ, ARITH_TST,
>                          0, addr_reg, (1 << s_bits) - 1);
>  #  if TARGET_LONG_BITS == 64
> -    /* XXX: possibly we could use a block data load or writeback in
> -     * the first access.  */
> -    tcg_out_ld32_12(s, COND_EQ, TCG_REG_R1, TCG_REG_R0,
> -                    offsetof(CPUArchState, tlb_table[0][0].addr_read) + 4);
> +    /* XXX: possibly we could use a block data load in the first access.  */
> +    tcg_out_ld32_12(s, COND_EQ, TCG_REG_R1, TCG_REG_R0, 4);
>      tcg_out_dat_reg(s, COND_EQ, ARITH_CMP, 0,
>                      TCG_REG_R1, addr_reg2, SHIFT_IMM_LSL(0));
>  #  endif
>      tcg_out_ld32_12(s, COND_EQ, TCG_REG_R1, TCG_REG_R0,
> -                    offsetof(CPUArchState, tlb_table[0][0].addend));
> +                    offsetof(CPUTLBEntry, addend)
> +                    - offsetof(CPUTLBEntry, addr_read));
>
>      switch (opc) {
>      case 0:
> @@ -1288,7 +1299,7 @@ static inline void tcg_out_qemu_st(TCGContext *s, const 
> TCGArg *args, int opc)
>  {
>      int addr_reg, data_reg, data_reg2, bswap;
>  #ifdef CONFIG_SOFTMMU
> -    int mem_index, s_bits;
> +    int mem_index, s_bits, tlb_offset;
>      TCGReg argreg;
>  # if TARGET_LONG_BITS == 64
>      int addr_reg2;
> @@ -1325,19 +1336,14 @@ static inline void tcg_out_qemu_st(TCGContext *s, 
> const TCGArg *args, int opc)
>                      TCG_REG_R0, TCG_REG_R8, CPU_TLB_SIZE - 1);
>      tcg_out_dat_reg(s, COND_AL, ARITH_ADD, TCG_REG_R0,
>                      TCG_AREG0, TCG_REG_R0, 
> SHIFT_IMM_LSL(CPU_TLB_ENTRY_BITS));
> -    /* In the
> -     *  ldr r1 [r0, #(offsetof(CPUArchState, 
> tlb_table[mem_index][0].addr_write))]
> -     * below, the offset is likely to exceed 12 bits if mem_index != 0 and
> -     * not exceed otherwise, so use an
> -     *  add r0, r0, #(mem_index * sizeof *CPUArchState.tlb_table)
> -     * before.
> -     */
> -    if (mem_index)
> +    /* We assume that the offset is contained within 24 bits.  */
> +    tlb_offset = offsetof(CPUArchState, tlb_table[mem_index][0].addr_write);
> +    if (tlb_offset > 0xfff) {
>          tcg_out_dat_imm(s, COND_AL, ARITH_ADD, TCG_REG_R0, TCG_REG_R0,
> -                        (mem_index << (TLB_SHIFT & 1)) |
> -                        ((16 - (TLB_SHIFT >> 1)) << 8));
> -    tcg_out_ld32_12(s, COND_AL, TCG_REG_R1, TCG_REG_R0,
> -                    offsetof(CPUArchState, tlb_table[0][0].addr_write));
> +                        0xa00 | (tlb_offset >> 12));
> +        tlb_offset &= 0xfff;
> +    }
> +    tcg_out_ld32_12wb(s, COND_AL, TCG_REG_R1, TCG_REG_R0, tlb_offset);
>      tcg_out_dat_reg(s, COND_AL, ARITH_CMP, 0, TCG_REG_R1,
>                      TCG_REG_R8, SHIFT_IMM_LSL(TARGET_PAGE_BITS));
>      /* Check alignment.  */
> @@ -1345,15 +1351,14 @@ static inline void tcg_out_qemu_st(TCGContext *s, 
> const TCGArg *args, int opc)
>          tcg_out_dat_imm(s, COND_EQ, ARITH_TST,
>                          0, addr_reg, (1 << s_bits) - 1);
>  #  if TARGET_LONG_BITS == 64
> -    /* XXX: possibly we could use a block data load or writeback in
> -     * the first access.  */
> -    tcg_out_ld32_12(s, COND_EQ, TCG_REG_R1, TCG_REG_R0,
> -                    offsetof(CPUArchState, tlb_table[0][0].addr_write) + 4);
> +    /* XXX: possibly we could use a block data load in the first access.  */
> +    tcg_out_ld32_12(s, COND_EQ, TCG_REG_R1, TCG_REG_R0, 4);
>      tcg_out_dat_reg(s, COND_EQ, ARITH_CMP, 0,
>                      TCG_REG_R1, addr_reg2, SHIFT_IMM_LSL(0));
>  #  endif
>      tcg_out_ld32_12(s, COND_EQ, TCG_REG_R1, TCG_REG_R0,
> -                    offsetof(CPUArchState, tlb_table[0][0].addend));
> +                    offsetof(CPUTLBEntry, addend)
> +                    - offsetof(CPUTLBEntry, addr_write));
>
>      switch (opc) {
>      case 0:
> --
> 1.7.10.4
>
>



reply via email to

[Prev in Thread] Current Thread [Next in Thread]