[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
Re: [Qemu-devel] [RFC v3 05/13] target-arm: translate: implement qemu_ld
From: |
alvise rigo |
Subject: |
Re: [Qemu-devel] [RFC v3 05/13] target-arm: translate: implement qemu_ldlink and qemu_stcond ops |
Date: |
Fri, 17 Jul 2015 15:01:24 +0200 |
On Fri, Jul 17, 2015 at 2:51 PM, Alex Bennée <address@hidden> wrote:
>
> Alvise Rigo <address@hidden> writes:
>
>> Implement strex and ldrex instruction relying on TCG's qemu_ldlink and
>> qemu_stcond. For the time being only the 32bit instructions are supported.
>>
>> Suggested-by: Jani Kokkonen <address@hidden>
>> Suggested-by: Claudio Fontana <address@hidden>
>> Signed-off-by: Alvise Rigo <address@hidden>
>> ---
>> target-arm/translate.c | 87 ++++++++++++++++++++++++++++++++++-
>> tcg/arm/tcg-target.c | 121
>> +++++++++++++++++++++++++++++++++++++------------
>> 2 files changed, 178 insertions(+), 30 deletions(-)
>>
>> diff --git a/target-arm/translate.c b/target-arm/translate.c
>> index 80302cd..0366c76 100644
>> --- a/target-arm/translate.c
>> +++ b/target-arm/translate.c
>> @@ -72,6 +72,8 @@ static TCGv_i64 cpu_exclusive_test;
>> static TCGv_i32 cpu_exclusive_info;
>> #endif
>>
>> +static TCGv_i32 cpu_ll_sc_context;
>> +
>> /* FIXME: These should be removed. */
>> static TCGv_i32 cpu_F0s, cpu_F1s;
>> static TCGv_i64 cpu_F0d, cpu_F1d;
>> @@ -103,6 +105,8 @@ void arm_translate_init(void)
>> offsetof(CPUARMState, exclusive_addr), "exclusive_addr");
>> cpu_exclusive_val = tcg_global_mem_new_i64(TCG_AREG0,
>> offsetof(CPUARMState, exclusive_val), "exclusive_val");
>> + cpu_ll_sc_context = tcg_global_mem_new_i32(TCG_AREG0,
>> + offsetof(CPUARMState, ll_sc_context), "ll_sc_context");
>> #ifdef CONFIG_USER_ONLY
>> cpu_exclusive_test = tcg_global_mem_new_i64(TCG_AREG0,
>> offsetof(CPUARMState, exclusive_test), "exclusive_test");
>> @@ -961,6 +965,18 @@ DO_GEN_ST(8, MO_UB)
>> DO_GEN_ST(16, MO_TEUW)
>> DO_GEN_ST(32, MO_TEUL)
>>
>> +/* Load/Store exclusive generators (always unsigned) */
>> +static inline void gen_aa32_ldex32(TCGv_i32 val, TCGv_i32 addr, int index)
>> +{
>> + tcg_gen_qemu_ldlink_i32(val, addr, index, MO_TEUL | MO_EXCL);
>> +}
>> +
>> +static inline void gen_aa32_stex32(TCGv_i32 is_dirty, TCGv_i32 val,
>> + TCGv_i32 addr, int index)
>> +{
>> + tcg_gen_qemu_stcond_i32(is_dirty, val, addr, index, MO_TEUL | MO_EXCL);
>> +}
>> +
>> static inline void gen_set_pc_im(DisasContext *s, target_ulong val)
>> {
>> tcg_gen_movi_i32(cpu_R[15], val);
>> @@ -7427,6 +7443,26 @@ static void gen_load_exclusive(DisasContext *s, int
>> rt, int rt2,
>> store_reg(s, rt, tmp);
>> }
>>
>> +static void gen_load_exclusive_multi(DisasContext *s, int rt, int rt2,
>> + TCGv_i32 addr, int size)
>> +{
>> + TCGv_i32 tmp = tcg_temp_new_i32();
>> +
>> + switch (size) {
>> + case 0:
>> + case 1:
>> + abort();
>> + case 2:
>> + gen_aa32_ldex32(tmp, addr, get_mem_index(s));
>> + break;
>> + case 3:
>> + default:
>> + abort();
>> + }
>> +
>> + store_reg(s, rt, tmp);
>> +}
>> +
>> static void gen_clrex(DisasContext *s)
>> {
>> gen_helper_atomic_clear(cpu_env);
>> @@ -7460,6 +7496,52 @@ static void gen_store_exclusive(DisasContext *s, int
>> rd, int rt, int rt2,
>> tcg_temp_free_i64(val);
>> tcg_temp_free_i32(tmp_size);
>> }
>> +
>> +static void gen_store_exclusive_multi(DisasContext *s, int rd, int rt, int
>> rt2,
>> + TCGv_i32 addr, int size)
>> +{
>> + TCGv_i32 tmp;
>> + TCGv_i32 is_dirty;
>> + TCGLabel *done_label;
>> + TCGLabel *fail_label;
>> +
>> + fail_label = gen_new_label();
>> + done_label = gen_new_label();
>> +
>> + tmp = tcg_temp_new_i32();
>> + is_dirty = tcg_temp_new_i32();
>> +
>> + /* Fail if we are not in LL/SC context. */
>> + tcg_gen_brcondi_i32(TCG_COND_NE, cpu_ll_sc_context, 1, fail_label);
>> +
>> + tmp = load_reg(s, rt);
>> + switch (size) {
>> + case 0:
>> + case 1:
>> + abort();
>> + break;
>> + case 2:
>> + gen_aa32_stex32(is_dirty, tmp, addr, get_mem_index(s));
>> + break;
>> + case 3:
>> + default:
>> + abort();
>> + }
>> +
>> + tcg_temp_free_i32(tmp);
>> +
>> + /* Check if the store conditional has to fail. */
>> + tcg_gen_brcondi_i32(TCG_COND_EQ, is_dirty, 1, fail_label);
>> + tcg_temp_free_i32(is_dirty);
>> +
>> + tcg_temp_free_i32(tmp);
>> +
>> + tcg_gen_movi_i32(cpu_R[rd], 0); /* is_dirty = 0 */
>> + tcg_gen_br(done_label);
>> + gen_set_label(fail_label);
>> + tcg_gen_movi_i32(cpu_R[rd], 1); /* is_dirty = 1 */
>> + gen_set_label(done_label);
>> +}
>> #endif
>>
>> /* gen_srs:
>> @@ -8308,7 +8390,7 @@ static void disas_arm_insn(DisasContext *s, unsigned
>> int insn)
>> } else if (insn & (1 << 20)) {
>> switch (op1) {
>> case 0: /* ldrex */
>> - gen_load_exclusive(s, rd, 15, addr, 2);
>> + gen_load_exclusive_multi(s, rd, 15, addr,
>> 2);
>> break;
>> case 1: /* ldrexd */
>> gen_load_exclusive(s, rd, rd + 1, addr, 3);
>> @@ -8326,7 +8408,8 @@ static void disas_arm_insn(DisasContext *s, unsigned
>> int insn)
>> rm = insn & 0xf;
>> switch (op1) {
>> case 0: /* strex */
>> - gen_store_exclusive(s, rd, rm, 15, addr, 2);
>> + gen_store_exclusive_multi(s, rd, rm, 15,
>> + addr, 2);
>> break;
>> case 1: /* strexd */
>> gen_store_exclusive(s, rd, rm, rm + 1,
>> addr, 3);
>> diff --git a/tcg/arm/tcg-target.c b/tcg/arm/tcg-target.c
>> index ae2ec7a..f2b69a0 100644
>> --- a/tcg/arm/tcg-target.c
>> +++ b/tcg/arm/tcg-target.c
>> @@ -1069,6 +1069,17 @@ static void * const qemu_ld_helpers[16] = {
>> [MO_BESL] = helper_be_ldul_mmu,
>> };
>
> So I'm guessing we'll be implementing this for every TCG backend?
Correct. According to the architecture's atomic instructions, a subset
of these helpers will be implemented.
>
>>
>> +/* LoadLink helpers, only unsigned. Use the macro below to access them. */
>> +static void * const qemu_ldex_helpers[16] = {
>> + [MO_LEUL] = helper_le_ldlinkul_mmu,
>> +};
>> +
>> +#define LDEX_HELPER(mem_op) \
>> +({ \
>> + assert(mem_op & MO_EXCL); \
>> + qemu_ldex_helpers[((int)mem_op - MO_EXCL)]; \
>> +})
>> +
>> /* helper signature: helper_ret_st_mmu(CPUState *env, target_ulong addr,
>> * uintxx_t val, int mmu_idx, uintptr_t
>> ra)
>> */
>> @@ -1082,6 +1093,19 @@ static void * const qemu_st_helpers[16] = {
>> [MO_BEQ] = helper_be_stq_mmu,
>> };
>>
>> +/* StoreConditional helpers. Use the macro below to access them. */
>> +static void * const qemu_stex_helpers[16] = {
>> + [MO_LEUL] = helper_le_stcondl_mmu,
>> +};
>> +
>> +#define STEX_HELPER(mem_op) \
>> +({ \
>> + assert(mem_op & MO_EXCL); \
>> + qemu_stex_helpers[(int)mem_op - MO_EXCL]; \
>> +})
>
> Can the lookup not be merged with the existing ldst_helpers?
Sure.
>
>> +
>> +
>> +
>> /* Helper routines for marshalling helper function arguments into
>> * the correct registers and stack.
>> * argreg is where we want to put this argument, arg is the argument itself.
>> @@ -1222,13 +1246,14 @@ static TCGReg tcg_out_tlb_read(TCGContext *s, TCGReg
>> addrlo, TCGReg addrhi,
>> path for a load or store, so that we can later generate the correct
>> helper code. */
>> static void add_qemu_ldst_label(TCGContext *s, bool is_ld, TCGMemOpIdx oi,
>> - TCGReg datalo, TCGReg datahi, TCGReg addrlo,
>> - TCGReg addrhi, tcg_insn_unit *raddr,
>> - tcg_insn_unit *label_ptr)
>> + TCGReg llsc_success, TCGReg datalo,
>> + TCGReg datahi, TCGReg addrlo, TCGReg addrhi,
>> + tcg_insn_unit *raddr, tcg_insn_unit
>> *label_ptr)
>> {
>> TCGLabelQemuLdst *label = new_ldst_label(s);
>>
>> label->is_ld = is_ld;
>> + label->llsc_success = llsc_success;
>> label->oi = oi;
>> label->datalo_reg = datalo;
>> label->datahi_reg = datahi;
>> @@ -1259,12 +1284,16 @@ static void tcg_out_qemu_ld_slow_path(TCGContext *s,
>> TCGLabelQemuLdst *lb)
>> /* For armv6 we can use the canonical unsigned helpers and minimize
>> icache usage. For pre-armv6, use the signed helpers since we do
>> not have a single insn sign-extend. */
>> - if (use_armv6_instructions) {
>> - func = qemu_ld_helpers[opc & (MO_BSWAP | MO_SIZE)];
>> + if (opc & MO_EXCL) {
>> + func = LDEX_HELPER(opc);
>> } else {
>> - func = qemu_ld_helpers[opc & (MO_BSWAP | MO_SSIZE)];
>> - if (opc & MO_SIGN) {
>> - opc = MO_UL;
>> + if (use_armv6_instructions) {
>> + func = qemu_ld_helpers[opc & (MO_BSWAP | MO_SIZE)];
>> + } else {
>> + func = qemu_ld_helpers[opc & (MO_BSWAP | MO_SSIZE)];
>> + if (opc & MO_SIGN) {
>> + opc = MO_UL;
>> + }
>> }
>> }
>> tcg_out_call(s, func);
>> @@ -1336,8 +1365,15 @@ static void tcg_out_qemu_st_slow_path(TCGContext *s,
>> TCGLabelQemuLdst *lb)
>> argreg = tcg_out_arg_imm32(s, argreg, oi);
>> argreg = tcg_out_arg_reg32(s, argreg, TCG_REG_R14);
>>
>> - /* Tail-call to the helper, which will return to the fast path. */
>> - tcg_out_goto(s, COND_AL, qemu_st_helpers[opc & (MO_BSWAP | MO_SIZE)]);
>> + if (opc & MO_EXCL) {
>> + tcg_out_call(s, STEX_HELPER(opc));
>> + /* Save the output of the StoreConditional */
>> + tcg_out_mov_reg(s, COND_AL, lb->llsc_success, TCG_REG_R0);
>> + tcg_out_goto(s, COND_AL, lb->raddr);
>> + } else {
>> + /* Tail-call to the helper, which will return to the fast path. */
>> + tcg_out_goto(s, COND_AL, qemu_st_helpers[opc & (MO_BSWAP |
>> MO_SIZE)]);
>> + }
>> }
>> #endif /* SOFTMMU */
>>
>> @@ -1461,7 +1497,8 @@ static inline void tcg_out_qemu_ld_direct(TCGContext
>> *s, TCGMemOp opc,
>> }
>> }
>>
>> -static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, bool is64)
>> +static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, bool is64,
>> + bool isLoadLink)
>> {
>> TCGReg addrlo, datalo, datahi, addrhi __attribute__((unused));
>> TCGMemOpIdx oi;
>> @@ -1484,13 +1521,20 @@ static void tcg_out_qemu_ld(TCGContext *s, const
>> TCGArg *args, bool is64)
>> addend = tcg_out_tlb_read(s, addrlo, addrhi, opc & MO_SIZE, mem_index,
>> 1);
>>
>> /* This a conditional BL only to load a pointer within this opcode into
>> LR
>> - for the slow path. We will not be using the value for a tail call.
>> */
>> - label_ptr = s->code_ptr;
>> - tcg_out_bl_noaddr(s, COND_NE);
>> + for the slow path. We will not be using the value for a tail call.
>> + In the context of a LoadLink instruction, we don't check the TLB but
>> we
>> + always follow the slow path. */
>> + if (isLoadLink) {
>> + label_ptr = s->code_ptr;
>> + tcg_out_bl_noaddr(s, COND_AL);
>> + } else {
>> + label_ptr = s->code_ptr;
>> + tcg_out_bl_noaddr(s, COND_NE);
>
> This seems a little redundant, we could set label_ptr outside the
> isLoadLink check as it will be the same in both cases. However if we are
> always taking the slow path for exclusive accesses then why should we
> generate a tcg_out_tlb_read()? Do we need some side effects from calling
> it for the slow path?
Good point, we can avoid the TLB read if we are doing a SC/LL.
In fact, the first thing we do in softmmu_llsc_template.h is to update
the corresponding TLB entry if necessary.
Thank you,
alvise
>
>>
>> - tcg_out_qemu_ld_index(s, opc, datalo, datahi, addrlo, addend);
>> + tcg_out_qemu_ld_index(s, opc, datalo, datahi, addrlo, addend);
>> + }
>>
>> - add_qemu_ldst_label(s, true, oi, datalo, datahi, addrlo, addrhi,
>> + add_qemu_ldst_label(s, true, oi, 0, datalo, datahi, addrlo, addrhi,
>> s->code_ptr, label_ptr);
>> #else /* !CONFIG_SOFTMMU */
>> if (GUEST_BASE) {
>> @@ -1592,9 +1636,11 @@ static inline void tcg_out_qemu_st_direct(TCGContext
>> *s, TCGMemOp opc,
>> }
>> }
>>
>> -static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is64)
>> +static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is64,
>> + bool isStoreCond)
>> {
>> TCGReg addrlo, datalo, datahi, addrhi __attribute__((unused));
>> + TCGReg llsc_success;
>> TCGMemOpIdx oi;
>> TCGMemOp opc;
>> #ifdef CONFIG_SOFTMMU
>> @@ -1603,6 +1649,9 @@ static void tcg_out_qemu_st(TCGContext *s, const
>> TCGArg *args, bool is64)
>> tcg_insn_unit *label_ptr;
>> #endif
>>
>> + /* The stcond variant has one more param */
>> + llsc_success = (isStoreCond ? *args++ : 0);
>> +
>> datalo = *args++;
>> datahi = (is64 ? *args++ : 0);
>> addrlo = *args++;
>> @@ -1612,16 +1661,24 @@ static void tcg_out_qemu_st(TCGContext *s, const
>> TCGArg *args, bool is64)
>>
>> #ifdef CONFIG_SOFTMMU
>> mem_index = get_mmuidx(oi);
>> - addend = tcg_out_tlb_read(s, addrlo, addrhi, opc & MO_SIZE, mem_index,
>> 0);
>>
>> - tcg_out_qemu_st_index(s, COND_EQ, opc, datalo, datahi, addrlo, addend);
>> + if (isStoreCond) {
>> + /* Always follow the slow-path for an exclusive access */
>> + label_ptr = s->code_ptr;
>> + tcg_out_bl_noaddr(s, COND_AL);
>> + } else {
>> + addend = tcg_out_tlb_read(s, addrlo, addrhi, opc & MO_SIZE,
>> + mem_index, 0);
>>
>> - /* The conditional call must come last, as we're going to return here.
>> */
>> - label_ptr = s->code_ptr;
>> - tcg_out_bl_noaddr(s, COND_NE);
>> + tcg_out_qemu_st_index(s, COND_EQ, opc, datalo, datahi, addrlo,
>> addend);
>>
>> - add_qemu_ldst_label(s, false, oi, datalo, datahi, addrlo, addrhi,
>> - s->code_ptr, label_ptr);
>> + /* The conditional call must come last, as we're going to return
>> here.*/
>> + label_ptr = s->code_ptr;
>> + tcg_out_bl_noaddr(s, COND_NE);
>> + }
>> +
>> + add_qemu_ldst_label(s, false, oi, llsc_success, datalo, datahi, addrlo,
>> + addrhi, s->code_ptr, label_ptr);
>
> Indeed this does what I commented about above ;-)
>
>> #else /* !CONFIG_SOFTMMU */
>> if (GUEST_BASE) {
>> tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_TMP, GUEST_BASE);
>> @@ -1864,16 +1921,22 @@ static inline void tcg_out_op(TCGContext *s,
>> TCGOpcode opc,
>> break;
>>
>> case INDEX_op_qemu_ld_i32:
>> - tcg_out_qemu_ld(s, args, 0);
>> + tcg_out_qemu_ld(s, args, 0, 0);
>> + break;
>> + case INDEX_op_qemu_ldlink_i32:
>> + tcg_out_qemu_ld(s, args, 0, 1); /* LoadLink */
>> break;
>> case INDEX_op_qemu_ld_i64:
>> - tcg_out_qemu_ld(s, args, 1);
>> + tcg_out_qemu_ld(s, args, 1, 0);
>> break;
>> case INDEX_op_qemu_st_i32:
>> - tcg_out_qemu_st(s, args, 0);
>> + tcg_out_qemu_st(s, args, 0, 0);
>> + break;
>> + case INDEX_op_qemu_stcond_i32:
>> + tcg_out_qemu_st(s, args, 0, 1); /* StoreConditional */
>> break;
>> case INDEX_op_qemu_st_i64:
>> - tcg_out_qemu_st(s, args, 1);
>> + tcg_out_qemu_st(s, args, 1, 0);
>> break;
>>
>> case INDEX_op_bswap16_i32:
>> @@ -1957,8 +2020,10 @@ static const TCGTargetOpDef arm_op_defs[] = {
>>
>> #if TARGET_LONG_BITS == 32
>> { INDEX_op_qemu_ld_i32, { "r", "l" } },
>> + { INDEX_op_qemu_ldlink_i32, { "r", "l" } },
>> { INDEX_op_qemu_ld_i64, { "r", "r", "l" } },
>> { INDEX_op_qemu_st_i32, { "s", "s" } },
>> + { INDEX_op_qemu_stcond_i32, { "r", "s", "s" } },
>> { INDEX_op_qemu_st_i64, { "s", "s", "s" } },
>> #else
>> { INDEX_op_qemu_ld_i32, { "r", "l", "l" } },
>
> --
> Alex Bennée
- Re: [Qemu-devel] [RFC v3 03/13] softmmu: Add helpers for a new slow-path, (continued)
- [Qemu-devel] [RFC v3 06/13] target-i386: translate: implement qemu_ldlink and qemu_stcond ops, Alvise Rigo, 2015/07/10
- [Qemu-devel] [RFC v3 04/13] tcg-op: create new TCG qemu_ldlink and qemu_stcond instructions, Alvise Rigo, 2015/07/10
- [Qemu-devel] [RFC v3 07/13] ram_addr.h: Make exclusive bitmap accessors atomic, Alvise Rigo, 2015/07/10
- [Qemu-devel] [RFC v3 08/13] exec.c: introduce a simple rendezvous support, Alvise Rigo, 2015/07/10
- [Qemu-devel] [RFC v3 05/13] target-arm: translate: implement qemu_ldlink and qemu_stcond ops, Alvise Rigo, 2015/07/10
- [Qemu-devel] [RFC v3 09/13] cpus.c: introduce simple callback support, Alvise Rigo, 2015/07/10
[Qemu-devel] [RFC v3 11/13] Introduce exit_flush_req and tcg_excl_access_lock, Alvise Rigo, 2015/07/10
[Qemu-devel] [RFC v3 10/13] Simple TLB flush wrap to use as exit callback, Alvise Rigo, 2015/07/10
[Qemu-devel] [RFC v3 13/13] softmmu_template.h: move to multithreading, Alvise Rigo, 2015/07/10