qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Qemu-devel] [RFC v3 05/13] target-arm: translate: implement qemu_ld


From: Alex Bennée
Subject: Re: [Qemu-devel] [RFC v3 05/13] target-arm: translate: implement qemu_ldlink and qemu_stcond ops
Date: Fri, 17 Jul 2015 13:51:35 +0100

Alvise Rigo <address@hidden> writes:

> Implement strex and ldrex instruction relying on TCG's qemu_ldlink and
> qemu_stcond.  For the time being only the 32bit instructions are supported.
>
> Suggested-by: Jani Kokkonen <address@hidden>
> Suggested-by: Claudio Fontana <address@hidden>
> Signed-off-by: Alvise Rigo <address@hidden>
> ---
>  target-arm/translate.c |  87 ++++++++++++++++++++++++++++++++++-
>  tcg/arm/tcg-target.c   | 121 
> +++++++++++++++++++++++++++++++++++++------------
>  2 files changed, 178 insertions(+), 30 deletions(-)
>
> diff --git a/target-arm/translate.c b/target-arm/translate.c
> index 80302cd..0366c76 100644
> --- a/target-arm/translate.c
> +++ b/target-arm/translate.c
> @@ -72,6 +72,8 @@ static TCGv_i64 cpu_exclusive_test;
>  static TCGv_i32 cpu_exclusive_info;
>  #endif
>  
> +static TCGv_i32 cpu_ll_sc_context;
> +
>  /* FIXME:  These should be removed.  */
>  static TCGv_i32 cpu_F0s, cpu_F1s;
>  static TCGv_i64 cpu_F0d, cpu_F1d;
> @@ -103,6 +105,8 @@ void arm_translate_init(void)
>          offsetof(CPUARMState, exclusive_addr), "exclusive_addr");
>      cpu_exclusive_val = tcg_global_mem_new_i64(TCG_AREG0,
>          offsetof(CPUARMState, exclusive_val), "exclusive_val");
> +    cpu_ll_sc_context = tcg_global_mem_new_i32(TCG_AREG0,
> +        offsetof(CPUARMState, ll_sc_context), "ll_sc_context");
>  #ifdef CONFIG_USER_ONLY
>      cpu_exclusive_test = tcg_global_mem_new_i64(TCG_AREG0,
>          offsetof(CPUARMState, exclusive_test), "exclusive_test");
> @@ -961,6 +965,18 @@ DO_GEN_ST(8, MO_UB)
>  DO_GEN_ST(16, MO_TEUW)
>  DO_GEN_ST(32, MO_TEUL)
>  
> +/* Load/Store exclusive generators (always unsigned) */
> +static inline void gen_aa32_ldex32(TCGv_i32 val, TCGv_i32 addr, int index)
> +{
> +    tcg_gen_qemu_ldlink_i32(val, addr, index, MO_TEUL | MO_EXCL);
> +}
> +
> +static inline void gen_aa32_stex32(TCGv_i32 is_dirty, TCGv_i32 val,
> +                                   TCGv_i32 addr, int index)
> +{
> +    tcg_gen_qemu_stcond_i32(is_dirty, val, addr, index, MO_TEUL | MO_EXCL);
> +}
> +
>  static inline void gen_set_pc_im(DisasContext *s, target_ulong val)
>  {
>      tcg_gen_movi_i32(cpu_R[15], val);
> @@ -7427,6 +7443,26 @@ static void gen_load_exclusive(DisasContext *s, int 
> rt, int rt2,
>      store_reg(s, rt, tmp);
>  }
>  
> +static void gen_load_exclusive_multi(DisasContext *s, int rt, int rt2,
> +                                     TCGv_i32 addr, int size)
> +{
> +    TCGv_i32 tmp = tcg_temp_new_i32();
> +
> +    switch (size) {
> +    case 0:
> +    case 1:
> +        abort();
> +    case 2:
> +        gen_aa32_ldex32(tmp, addr, get_mem_index(s));
> +        break;
> +    case 3:
> +    default:
> +        abort();
> +    }
> +
> +    store_reg(s, rt, tmp);
> +}
> +
>  static void gen_clrex(DisasContext *s)
>  {
>      gen_helper_atomic_clear(cpu_env);
> @@ -7460,6 +7496,52 @@ static void gen_store_exclusive(DisasContext *s, int 
> rd, int rt, int rt2,
>      tcg_temp_free_i64(val);
>      tcg_temp_free_i32(tmp_size);
>  }
> +
> +static void gen_store_exclusive_multi(DisasContext *s, int rd, int rt, int 
> rt2,
> +                                      TCGv_i32 addr, int size)
> +{
> +    TCGv_i32 tmp;
> +    TCGv_i32 is_dirty;
> +    TCGLabel *done_label;
> +    TCGLabel *fail_label;
> +
> +    fail_label = gen_new_label();
> +    done_label = gen_new_label();
> +
> +    tmp = tcg_temp_new_i32();
> +    is_dirty = tcg_temp_new_i32();
> +
> +    /* Fail if we are not in LL/SC context. */
> +    tcg_gen_brcondi_i32(TCG_COND_NE, cpu_ll_sc_context, 1, fail_label);
> +
> +    tmp = load_reg(s, rt);
> +    switch (size) {
> +    case 0:
> +    case 1:
> +        abort();
> +        break;
> +    case 2:
> +        gen_aa32_stex32(is_dirty, tmp, addr, get_mem_index(s));
> +        break;
> +    case 3:
> +    default:
> +        abort();
> +    }
> +
> +    tcg_temp_free_i32(tmp);
> +
> +    /* Check if the store conditional has to fail. */
> +    tcg_gen_brcondi_i32(TCG_COND_EQ, is_dirty, 1, fail_label);
> +    tcg_temp_free_i32(is_dirty);
> +
> +    tcg_temp_free_i32(tmp);
> +
> +    tcg_gen_movi_i32(cpu_R[rd], 0); /* is_dirty = 0 */
> +    tcg_gen_br(done_label);
> +    gen_set_label(fail_label);
> +    tcg_gen_movi_i32(cpu_R[rd], 1); /* is_dirty = 1 */
> +    gen_set_label(done_label);
> +}
>  #endif
>  
>  /* gen_srs:
> @@ -8308,7 +8390,7 @@ static void disas_arm_insn(DisasContext *s, unsigned 
> int insn)
>                          } else if (insn & (1 << 20)) {
>                              switch (op1) {
>                              case 0: /* ldrex */
> -                                gen_load_exclusive(s, rd, 15, addr, 2);
> +                                gen_load_exclusive_multi(s, rd, 15, addr, 2);
>                                  break;
>                              case 1: /* ldrexd */
>                                  gen_load_exclusive(s, rd, rd + 1, addr, 3);
> @@ -8326,7 +8408,8 @@ static void disas_arm_insn(DisasContext *s, unsigned 
> int insn)
>                              rm = insn & 0xf;
>                              switch (op1) {
>                              case 0:  /*  strex */
> -                                gen_store_exclusive(s, rd, rm, 15, addr, 2);
> +                                gen_store_exclusive_multi(s, rd, rm, 15,
> +                                                          addr, 2);
>                                  break;
>                              case 1: /*  strexd */
>                                  gen_store_exclusive(s, rd, rm, rm + 1, addr, 
> 3);
> diff --git a/tcg/arm/tcg-target.c b/tcg/arm/tcg-target.c
> index ae2ec7a..f2b69a0 100644
> --- a/tcg/arm/tcg-target.c
> +++ b/tcg/arm/tcg-target.c
> @@ -1069,6 +1069,17 @@ static void * const qemu_ld_helpers[16] = {
>      [MO_BESL] = helper_be_ldul_mmu,
>  };

So I'm guessing we'll be implementing this for every TCG backend? 

>  
> +/* LoadLink helpers, only unsigned. Use the macro below to access them. */
> +static void * const qemu_ldex_helpers[16] = {
> +    [MO_LEUL] = helper_le_ldlinkul_mmu,
> +};
> +
> +#define LDEX_HELPER(mem_op)                                             \
> +({                                                                      \
> +    assert(mem_op & MO_EXCL);                                           \
> +    qemu_ldex_helpers[((int)mem_op - MO_EXCL)];                         \
> +})
> +
>  /* helper signature: helper_ret_st_mmu(CPUState *env, target_ulong addr,
>   *                                     uintxx_t val, int mmu_idx, uintptr_t 
> ra)
>   */
> @@ -1082,6 +1093,19 @@ static void * const qemu_st_helpers[16] = {
>      [MO_BEQ]  = helper_be_stq_mmu,
>  };
>  
> +/* StoreConditional helpers. Use the macro below to access them. */
> +static void * const qemu_stex_helpers[16] = {
> +    [MO_LEUL] = helper_le_stcondl_mmu,
> +};
> +
> +#define STEX_HELPER(mem_op)                                             \
> +({                                                                      \
> +    assert(mem_op & MO_EXCL);                                           \
> +    qemu_stex_helpers[(int)mem_op - MO_EXCL];                           \
> +})

Can the lookup not be merged with the existing ldst_helpers?

> +
> +
> +
>  /* Helper routines for marshalling helper function arguments into
>   * the correct registers and stack.
>   * argreg is where we want to put this argument, arg is the argument itself.
> @@ -1222,13 +1246,14 @@ static TCGReg tcg_out_tlb_read(TCGContext *s, TCGReg 
> addrlo, TCGReg addrhi,
>     path for a load or store, so that we can later generate the correct
>     helper code.  */
>  static void add_qemu_ldst_label(TCGContext *s, bool is_ld, TCGMemOpIdx oi,
> -                                TCGReg datalo, TCGReg datahi, TCGReg addrlo,
> -                                TCGReg addrhi, tcg_insn_unit *raddr,
> -                                tcg_insn_unit *label_ptr)
> +                                TCGReg llsc_success, TCGReg datalo,
> +                                TCGReg datahi, TCGReg addrlo, TCGReg addrhi,
> +                                tcg_insn_unit *raddr, tcg_insn_unit 
> *label_ptr)
>  {
>      TCGLabelQemuLdst *label = new_ldst_label(s);
>  
>      label->is_ld = is_ld;
> +    label->llsc_success = llsc_success;
>      label->oi = oi;
>      label->datalo_reg = datalo;
>      label->datahi_reg = datahi;
> @@ -1259,12 +1284,16 @@ static void tcg_out_qemu_ld_slow_path(TCGContext *s, 
> TCGLabelQemuLdst *lb)
>      /* For armv6 we can use the canonical unsigned helpers and minimize
>         icache usage.  For pre-armv6, use the signed helpers since we do
>         not have a single insn sign-extend.  */
> -    if (use_armv6_instructions) {
> -        func = qemu_ld_helpers[opc & (MO_BSWAP | MO_SIZE)];
> +    if (opc & MO_EXCL) {
> +        func = LDEX_HELPER(opc);
>      } else {
> -        func = qemu_ld_helpers[opc & (MO_BSWAP | MO_SSIZE)];
> -        if (opc & MO_SIGN) {
> -            opc = MO_UL;
> +        if (use_armv6_instructions) {
> +            func = qemu_ld_helpers[opc & (MO_BSWAP | MO_SIZE)];
> +        } else {
> +            func = qemu_ld_helpers[opc & (MO_BSWAP | MO_SSIZE)];
> +            if (opc & MO_SIGN) {
> +                opc = MO_UL;
> +            }
>          }
>      }
>      tcg_out_call(s, func);
> @@ -1336,8 +1365,15 @@ static void tcg_out_qemu_st_slow_path(TCGContext *s, 
> TCGLabelQemuLdst *lb)
>      argreg = tcg_out_arg_imm32(s, argreg, oi);
>      argreg = tcg_out_arg_reg32(s, argreg, TCG_REG_R14);
>  
> -    /* Tail-call to the helper, which will return to the fast path.  */
> -    tcg_out_goto(s, COND_AL, qemu_st_helpers[opc & (MO_BSWAP | MO_SIZE)]);
> +    if (opc & MO_EXCL) {
> +        tcg_out_call(s, STEX_HELPER(opc));
> +        /* Save the output of the StoreConditional */
> +        tcg_out_mov_reg(s, COND_AL, lb->llsc_success, TCG_REG_R0);
> +        tcg_out_goto(s, COND_AL, lb->raddr);
> +    } else {
> +        /* Tail-call to the helper, which will return to the fast path.  */
> +        tcg_out_goto(s, COND_AL, qemu_st_helpers[opc & (MO_BSWAP | 
> MO_SIZE)]);
> +    }
>  }
>  #endif /* SOFTMMU */
>  
> @@ -1461,7 +1497,8 @@ static inline void tcg_out_qemu_ld_direct(TCGContext 
> *s, TCGMemOp opc,
>      }
>  }
>  
> -static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, bool is64)
> +static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, bool is64,
> +                            bool isLoadLink)
>  {
>      TCGReg addrlo, datalo, datahi, addrhi __attribute__((unused));
>      TCGMemOpIdx oi;
> @@ -1484,13 +1521,20 @@ static void tcg_out_qemu_ld(TCGContext *s, const 
> TCGArg *args, bool is64)
>      addend = tcg_out_tlb_read(s, addrlo, addrhi, opc & MO_SIZE, mem_index, 
> 1);
>  
>      /* This a conditional BL only to load a pointer within this opcode into 
> LR
> -       for the slow path.  We will not be using the value for a tail call.  
> */
> -    label_ptr = s->code_ptr;
> -    tcg_out_bl_noaddr(s, COND_NE);
> +       for the slow path.  We will not be using the value for a tail call.
> +       In the context of a LoadLink instruction, we don't check the TLB but 
> we
> +       always follow the slow path.  */
> +    if (isLoadLink) {
> +        label_ptr = s->code_ptr;
> +        tcg_out_bl_noaddr(s, COND_AL);
> +    } else {
> +        label_ptr = s->code_ptr;
> +        tcg_out_bl_noaddr(s, COND_NE);

This seems a little redundant, we could set label_ptr outside the
isLoadLink check as it will be the same in both cases. However if we are
always taking the slow path for exclusive accesses then why should we
generate a tcg_out_tlb_read()? Do we need some side effects from calling
it for the slow path?

>  
> -    tcg_out_qemu_ld_index(s, opc, datalo, datahi, addrlo, addend);
> +        tcg_out_qemu_ld_index(s, opc, datalo, datahi, addrlo, addend);
> +    }
>  
> -    add_qemu_ldst_label(s, true, oi, datalo, datahi, addrlo, addrhi,
> +    add_qemu_ldst_label(s, true, oi, 0, datalo, datahi, addrlo, addrhi,
>                          s->code_ptr, label_ptr);
>  #else /* !CONFIG_SOFTMMU */
>      if (GUEST_BASE) {
> @@ -1592,9 +1636,11 @@ static inline void tcg_out_qemu_st_direct(TCGContext 
> *s, TCGMemOp opc,
>      }
>  }
>  
> -static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is64)
> +static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is64,
> +                            bool isStoreCond)
>  {
>      TCGReg addrlo, datalo, datahi, addrhi __attribute__((unused));
> +    TCGReg llsc_success;
>      TCGMemOpIdx oi;
>      TCGMemOp opc;
>  #ifdef CONFIG_SOFTMMU
> @@ -1603,6 +1649,9 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg 
> *args, bool is64)
>      tcg_insn_unit *label_ptr;
>  #endif
>  
> +    /* The stcond variant has one more param */
> +    llsc_success = (isStoreCond ? *args++ : 0);
> +
>      datalo = *args++;
>      datahi = (is64 ? *args++ : 0);
>      addrlo = *args++;
> @@ -1612,16 +1661,24 @@ static void tcg_out_qemu_st(TCGContext *s, const 
> TCGArg *args, bool is64)
>  
>  #ifdef CONFIG_SOFTMMU
>      mem_index = get_mmuidx(oi);
> -    addend = tcg_out_tlb_read(s, addrlo, addrhi, opc & MO_SIZE, mem_index, 
> 0);
>  
> -    tcg_out_qemu_st_index(s, COND_EQ, opc, datalo, datahi, addrlo, addend);
> +    if (isStoreCond) {
> +        /* Always follow the slow-path for an exclusive access */
> +        label_ptr = s->code_ptr;
> +        tcg_out_bl_noaddr(s, COND_AL);
> +    } else {
> +        addend = tcg_out_tlb_read(s, addrlo, addrhi, opc & MO_SIZE,
> +                                  mem_index, 0);
>  
> -    /* The conditional call must come last, as we're going to return here.  
> */
> -    label_ptr = s->code_ptr;
> -    tcg_out_bl_noaddr(s, COND_NE);
> +        tcg_out_qemu_st_index(s, COND_EQ, opc, datalo, datahi, addrlo, 
> addend);
>  
> -    add_qemu_ldst_label(s, false, oi, datalo, datahi, addrlo, addrhi,
> -                        s->code_ptr, label_ptr);
> +        /* The conditional call must come last, as we're going to return 
> here.*/
> +        label_ptr = s->code_ptr;
> +        tcg_out_bl_noaddr(s, COND_NE);
> +    }
> +
> +    add_qemu_ldst_label(s, false, oi, llsc_success, datalo, datahi, addrlo,
> +                        addrhi, s->code_ptr, label_ptr);

Indeed this does what I commented about above ;-)

>  #else /* !CONFIG_SOFTMMU */
>      if (GUEST_BASE) {
>          tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_TMP, GUEST_BASE);
> @@ -1864,16 +1921,22 @@ static inline void tcg_out_op(TCGContext *s, 
> TCGOpcode opc,
>          break;
>  
>      case INDEX_op_qemu_ld_i32:
> -        tcg_out_qemu_ld(s, args, 0);
> +        tcg_out_qemu_ld(s, args, 0, 0);
> +        break;
> +    case INDEX_op_qemu_ldlink_i32:
> +        tcg_out_qemu_ld(s, args, 0, 1); /* LoadLink */
>          break;
>      case INDEX_op_qemu_ld_i64:
> -        tcg_out_qemu_ld(s, args, 1);
> +        tcg_out_qemu_ld(s, args, 1, 0);
>          break;
>      case INDEX_op_qemu_st_i32:
> -        tcg_out_qemu_st(s, args, 0);
> +        tcg_out_qemu_st(s, args, 0, 0);
> +        break;
> +    case INDEX_op_qemu_stcond_i32:
> +        tcg_out_qemu_st(s, args, 0, 1); /* StoreConditional */
>          break;
>      case INDEX_op_qemu_st_i64:
> -        tcg_out_qemu_st(s, args, 1);
> +        tcg_out_qemu_st(s, args, 1, 0);
>          break;
>  
>      case INDEX_op_bswap16_i32:
> @@ -1957,8 +2020,10 @@ static const TCGTargetOpDef arm_op_defs[] = {
>  
>  #if TARGET_LONG_BITS == 32
>      { INDEX_op_qemu_ld_i32, { "r", "l" } },
> +    { INDEX_op_qemu_ldlink_i32, { "r", "l" } },
>      { INDEX_op_qemu_ld_i64, { "r", "r", "l" } },
>      { INDEX_op_qemu_st_i32, { "s", "s" } },
> +    { INDEX_op_qemu_stcond_i32, { "r", "s", "s" } },
>      { INDEX_op_qemu_st_i64, { "s", "s", "s" } },
>  #else
>      { INDEX_op_qemu_ld_i32, { "r", "l", "l" } },

-- 
Alex Bennée



reply via email to

[Prev in Thread] Current Thread [Next in Thread]