qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Qemu-devel] [RFC v7 14/16] target-arm: translate: Use ld/st excl fo


From: Alex Bennée
Subject: Re: [Qemu-devel] [RFC v7 14/16] target-arm: translate: Use ld/st excl for atomic insns
Date: Mon, 07 Mar 2016 20:06:09 +0000
User-agent: mu4e 0.9.17; emacs 25.0.92.3

alvise rigo <address@hidden> writes:

> On Thu, Feb 18, 2016 at 6:02 PM, Alex Bennée <address@hidden> wrote:
>>
>> Alvise Rigo <address@hidden> writes:
>>
>>> Use the new LL/SC runtime helpers to handle the ARM atomic instructions
>>> in softmmu_llsc_template.h.
>>>
>>> In general, the helper generator
>>> gen_{ldrex,strex}_{8,16a,32a,64a}() calls the function
>>> helper_{le,be}_{ldlink,stcond}{ub,uw,ul,q}_mmu() implemented in
>>> softmmu_llsc_template.h, doing an alignment check.
>>>
>>> In addition, add a simple helper function to emulate the CLREX instruction.
>>>
>>> Suggested-by: Jani Kokkonen <address@hidden>
>>> Suggested-by: Claudio Fontana <address@hidden>
>>> Signed-off-by: Alvise Rigo <address@hidden>
>>> ---
>>>  target-arm/cpu.h       |   2 +
>>>  target-arm/helper.h    |   4 ++
>>>  target-arm/machine.c   |   2 +
>>>  target-arm/op_helper.c |  10 +++
>>>  target-arm/translate.c | 188 
>>> +++++++++++++++++++++++++++++++++++++++++++++++--
>>>  5 files changed, 202 insertions(+), 4 deletions(-)
>>>
>>> diff --git a/target-arm/cpu.h b/target-arm/cpu.h
>>> index b8b3364..bb5361f 100644
>>> --- a/target-arm/cpu.h
>>> +++ b/target-arm/cpu.h
>>> @@ -462,9 +462,11 @@ typedef struct CPUARMState {
>>>          float_status fp_status;
>>>          float_status standard_fp_status;
>>>      } vfp;
>>> +#if !defined(CONFIG_ARM_USE_LDST_EXCL)
>>>      uint64_t exclusive_addr;
>>>      uint64_t exclusive_val;
>>>      uint64_t exclusive_high;
>>> +#endif
>>>  #if defined(CONFIG_USER_ONLY)
>>>      uint64_t exclusive_test;
>>>      uint32_t exclusive_info;
>>> diff --git a/target-arm/helper.h b/target-arm/helper.h
>>> index c2a85c7..6bc3c0a 100644
>>> --- a/target-arm/helper.h
>>> +++ b/target-arm/helper.h
>>> @@ -532,6 +532,10 @@ DEF_HELPER_2(dc_zva, void, env, i64)
>>>  DEF_HELPER_FLAGS_2(neon_pmull_64_lo, TCG_CALL_NO_RWG_SE, i64, i64, i64)
>>>  DEF_HELPER_FLAGS_2(neon_pmull_64_hi, TCG_CALL_NO_RWG_SE, i64, i64, i64)
>>>
>>> +#ifdef CONFIG_ARM_USE_LDST_EXCL
>>> +DEF_HELPER_1(atomic_clear, void, env)
>>> +#endif
>>> +
>>>  #ifdef TARGET_AARCH64
>>>  #include "helper-a64.h"
>>>  #endif
>>> diff --git a/target-arm/machine.c b/target-arm/machine.c
>>> index ed1925a..7adfb4d 100644
>>> --- a/target-arm/machine.c
>>> +++ b/target-arm/machine.c
>>> @@ -309,9 +309,11 @@ const VMStateDescription vmstate_arm_cpu = {
>>>          VMSTATE_VARRAY_INT32(cpreg_vmstate_values, ARMCPU,
>>>                               cpreg_vmstate_array_len,
>>>                               0, vmstate_info_uint64, uint64_t),
>>> +#if !defined(CONFIG_ARM_USE_LDST_EXCL)
>>>          VMSTATE_UINT64(env.exclusive_addr, ARMCPU),
>>>          VMSTATE_UINT64(env.exclusive_val, ARMCPU),
>>>          VMSTATE_UINT64(env.exclusive_high, ARMCPU),
>>> +#endif
>>
>> Hmm this does imply we either need to support migration of the LL/SC
>> state in the generic code or map the generic state into the ARM specific
>> machine state or we'll break migration.
>>
>> The later if probably better so you can save machine state from a
>> pre-LL/SC build and migrate to a new LL/SC enabled build.
>
> This basically would require to add in cpu_pre_save some code to copy
> env.exclusive_* to the new structures. As a consequence, this will not
> get rid of the variables pre-LL/SC.

I wonder what the others think but I think breaking any existing TCG
state on migration would be against the spirit of the thing, but I could
be wrong. If we do break migration we'll need to at least bump the
version tag.

>
>>
>>>          VMSTATE_UINT64(env.features, ARMCPU),
>>>          VMSTATE_UINT32(env.exception.syndrome, ARMCPU),
>>>          VMSTATE_UINT32(env.exception.fsr, ARMCPU),
>>> diff --git a/target-arm/op_helper.c b/target-arm/op_helper.c
>>> index a5ee65f..404c13b 100644
>>> --- a/target-arm/op_helper.c
>>> +++ b/target-arm/op_helper.c
>>> @@ -51,6 +51,14 @@ static int exception_target_el(CPUARMState *env)
>>>      return target_el;
>>>  }
>>>
>>> +#ifdef CONFIG_ARM_USE_LDST_EXCL
>>> +void HELPER(atomic_clear)(CPUARMState *env)
>>> +{
>>> +    ENV_GET_CPU(env)->excl_protected_range.begin = EXCLUSIVE_RESET_ADDR;
>>> +    ENV_GET_CPU(env)->ll_sc_context = false;
>>> +}
>>> +#endif
>>> +
>>
>> Given this is just touching generic CPU state this helper should probably be
>> part of the generic TCG runtime. I assume other arches will just call
>> this helper as well.
>
> Would it make sense instead to add a new CPUClass hook for this? Other
> architectures might want a different behaviour (or add something
> else).

Yes. Best of both worlds a generic helper with an option to vary it if needed.

>
> Thank you,
> alvise
>
>>
>>
>>>  uint32_t HELPER(neon_tbl)(CPUARMState *env, uint32_t ireg, uint32_t def,
>>>                            uint32_t rn, uint32_t maxindex)
>>>  {
>>> @@ -689,7 +697,9 @@ void HELPER(exception_return)(CPUARMState *env)
>>>
>>>      aarch64_save_sp(env, cur_el);
>>>
>>> +#if !defined(CONFIG_ARM_USE_LDST_EXCL)
>>>      env->exclusive_addr = -1;
>>> +#endif
>>>
>>>      /* We must squash the PSTATE.SS bit to zero unless both of the
>>>       * following hold:
>>> diff --git a/target-arm/translate.c b/target-arm/translate.c
>>> index cff511b..5150841 100644
>>> --- a/target-arm/translate.c
>>> +++ b/target-arm/translate.c
>>> @@ -60,8 +60,10 @@ TCGv_ptr cpu_env;
>>>  static TCGv_i64 cpu_V0, cpu_V1, cpu_M0;
>>>  static TCGv_i32 cpu_R[16];
>>>  TCGv_i32 cpu_CF, cpu_NF, cpu_VF, cpu_ZF;
>>> +#if !defined(CONFIG_ARM_USE_LDST_EXCL)
>>>  TCGv_i64 cpu_exclusive_addr;
>>>  TCGv_i64 cpu_exclusive_val;
>>> +#endif
>>>  #ifdef CONFIG_USER_ONLY
>>>  TCGv_i64 cpu_exclusive_test;
>>>  TCGv_i32 cpu_exclusive_info;
>>> @@ -94,10 +96,12 @@ void arm_translate_init(void)
>>>      cpu_VF = tcg_global_mem_new_i32(TCG_AREG0, offsetof(CPUARMState, VF), 
>>> "VF");
>>>      cpu_ZF = tcg_global_mem_new_i32(TCG_AREG0, offsetof(CPUARMState, ZF), 
>>> "ZF");
>>>
>>> +#if !defined(CONFIG_ARM_USE_LDST_EXCL)
>>>      cpu_exclusive_addr = tcg_global_mem_new_i64(TCG_AREG0,
>>>          offsetof(CPUARMState, exclusive_addr), "exclusive_addr");
>>>      cpu_exclusive_val = tcg_global_mem_new_i64(TCG_AREG0,
>>>          offsetof(CPUARMState, exclusive_val), "exclusive_val");
>>> +#endif
>>>  #ifdef CONFIG_USER_ONLY
>>>      cpu_exclusive_test = tcg_global_mem_new_i64(TCG_AREG0,
>>>          offsetof(CPUARMState, exclusive_test), "exclusive_test");
>>> @@ -7413,15 +7417,145 @@ static void gen_logicq_cc(TCGv_i32 lo, TCGv_i32 hi)
>>>      tcg_gen_or_i32(cpu_ZF, lo, hi);
>>>  }
>>>
>>> -/* Load/Store exclusive instructions are implemented by remembering
>>> +/* If the softmmu is enabled, the translation of Load/Store exclusive
>>> +   instructions will rely on the gen_helper_{ldlink,stcond} helpers,
>>> +   offloading most of the work to the softmmu_llsc_template.h functions.
>>> +   All the accesses made by the exclusive instructions include an
>>> +   alignment check.
>>> +
>>> +   Otherwise, these instructions are implemented by remembering
>>>     the value/address loaded, and seeing if these are the same
>>>     when the store is performed. This should be sufficient to implement
>>>     the architecturally mandated semantics, and avoids having to monitor
>>>     regular stores.
>>>
>>> -   In system emulation mode only one CPU will be running at once, so
>>> -   this sequence is effectively atomic.  In user emulation mode we
>>> -   throw an exception and handle the atomic operation elsewhere.  */
>>> +   In user emulation mode we throw an exception and handle the atomic
>>> +   operation elsewhere.  */
>>> +#ifdef CONFIG_ARM_USE_LDST_EXCL
>>> +
>>> +#if TARGET_LONG_BITS == 32
>>> +#define DO_GEN_LDREX(SUFF)                                             \
>>> +static inline void gen_ldrex_##SUFF(TCGv_i32 dst, TCGv_i32 addr,       \
>>> +                                    TCGv_i32 index)                    \
>>> +{                                                                      \
>>> +    gen_helper_ldlink_##SUFF(dst, cpu_env, addr, index);               \
>>> +}
>>> +
>>> +#define DO_GEN_STREX(SUFF)                                             \
>>> +static inline void gen_strex_##SUFF(TCGv_i32 dst, TCGv_i32 addr,       \
>>> +                                    TCGv_i32 val, TCGv_i32 index)      \
>>> +{                                                                      \
>>> +    gen_helper_stcond_##SUFF(dst, cpu_env, addr, val, index);          \
>>> +}
>>> +
>>> +static inline void gen_ldrex_i64a(TCGv_i64 dst, TCGv_i32 addr, TCGv_i32 
>>> index)
>>> +{
>>> +    gen_helper_ldlink_i64a(dst, cpu_env, addr, index);
>>> +}
>>> +
>>> +static inline void gen_strex_i64a(TCGv_i32 dst, TCGv_i32 addr, TCGv_i64 
>>> val,
>>> +                                  TCGv_i32 index)
>>> +{
>>> +
>>> +    gen_helper_stcond_i64a(dst, cpu_env, addr, val, index);
>>> +}
>>> +#else
>>> +#define DO_GEN_LDREX(SUFF)                                             \
>>> +static inline void gen_ldrex_##SUFF(TCGv_i32 dst, TCGv_i32 addr,       \
>>> +                                         TCGv_i32 index)               \
>>> +{                                                                      \
>>> +    TCGv addr64 = tcg_temp_new();                                      \
>>> +    tcg_gen_extu_i32_i64(addr64, addr);                                \
>>> +    gen_helper_ldlink_##SUFF(dst, cpu_env, addr64, index);             \
>>> +    tcg_temp_free(addr64);                                             \
>>> +}
>>> +
>>> +#define DO_GEN_STREX(SUFF)                                             \
>>> +static inline void gen_strex_##SUFF(TCGv_i32 dst, TCGv_i32 addr,       \
>>> +                                    TCGv_i32 val, TCGv_i32 index)      \
>>> +{                                                                      \
>>> +    TCGv addr64 = tcg_temp_new();                                      \
>>> +    TCGv dst64 = tcg_temp_new();                                       \
>>> +    tcg_gen_extu_i32_i64(addr64, addr);                                \
>>> +    gen_helper_stcond_##SUFF(dst64, cpu_env, addr64, val, index);      \
>>> +    tcg_gen_extrl_i64_i32(dst, dst64);                                 \
>>> +    tcg_temp_free(dst64);                                              \
>>> +    tcg_temp_free(addr64);                                             \
>>> +}
>>> +
>>> +static inline void gen_ldrex_i64a(TCGv_i64 dst, TCGv_i32 addr, TCGv_i32 
>>> index)
>>> +{
>>> +    TCGv addr64 = tcg_temp_new();
>>> +    tcg_gen_extu_i32_i64(addr64, addr);
>>> +    gen_helper_ldlink_i64a(dst, cpu_env, addr64, index);
>>> +    tcg_temp_free(addr64);
>>> +}
>>> +
>>> +static inline void gen_strex_i64a(TCGv_i32 dst, TCGv_i32 addr, TCGv_i64 
>>> val,
>>> +                                  TCGv_i32 index)
>>> +{
>>> +    TCGv addr64 = tcg_temp_new();
>>> +    TCGv dst64 = tcg_temp_new();
>>> +
>>> +    tcg_gen_extu_i32_i64(addr64, addr);
>>> +    gen_helper_stcond_i64a(dst64, cpu_env, addr64, val, index);
>>> +    tcg_gen_extrl_i64_i32(dst, dst64);
>>> +
>>> +    tcg_temp_free(dst64);
>>> +    tcg_temp_free(addr64);
>>> +}
>>> +#endif
>>> +
>>> +#if defined(CONFIG_ARM_USE_LDST_EXCL)
>>> +DO_GEN_LDREX(i8)
>>> +DO_GEN_LDREX(i16a)
>>> +DO_GEN_LDREX(i32a)
>>> +
>>> +DO_GEN_STREX(i8)
>>> +DO_GEN_STREX(i16a)
>>> +DO_GEN_STREX(i32a)
>>> +#endif
>>> +
>>> +static void gen_load_exclusive(DisasContext *s, int rt, int rt2,
>>> +                               TCGv_i32 addr, int size)
>>> + {
>>> +    TCGv_i32 tmp = tcg_temp_new_i32();
>>> +    TCGv_i32 mem_idx = tcg_temp_new_i32();
>>> +
>>> +    tcg_gen_movi_i32(mem_idx, get_mem_index(s));
>>> +
>>> +    if (size != 3) {
>>> +        switch (size) {
>>> +        case 0:
>>> +            gen_ldrex_i8(tmp, addr, mem_idx);
>>> +            break;
>>> +        case 1:
>>> +            gen_ldrex_i16a(tmp, addr, mem_idx);
>>> +            break;
>>> +        case 2:
>>> +            gen_ldrex_i32a(tmp, addr, mem_idx);
>>> +            break;
>>> +        default:
>>> +            abort();
>>> +        }
>>> +
>>> +        store_reg(s, rt, tmp);
>>> +    } else {
>>> +        TCGv_i64 tmp64 = tcg_temp_new_i64();
>>> +        TCGv_i32 tmph = tcg_temp_new_i32();
>>> +
>>> +        gen_ldrex_i64a(tmp64, addr, mem_idx);
>>> +        tcg_gen_extr_i64_i32(tmp, tmph, tmp64);
>>> +
>>> +        store_reg(s, rt, tmp);
>>> +        store_reg(s, rt2, tmph);
>>> +
>>> +        tcg_temp_free_i64(tmp64);
>>> +    }
>>> +
>>> +    tcg_temp_free_i32(mem_idx);
>>> +}
>>> +#else
>>>  static void gen_load_exclusive(DisasContext *s, int rt, int rt2,
>>>                                 TCGv_i32 addr, int size)
>>>  {
>>> @@ -7460,10 +7594,15 @@ static void gen_load_exclusive(DisasContext *s, int 
>>> rt, int rt2,
>>>      store_reg(s, rt, tmp);
>>>      tcg_gen_extu_i32_i64(cpu_exclusive_addr, addr);
>>>  }
>>> +#endif
>>>
>>>  static void gen_clrex(DisasContext *s)
>>>  {
>>> +#ifdef CONFIG_ARM_USE_LDST_EXCL
>>> +    gen_helper_atomic_clear(cpu_env);
>>> +#else
>>>      tcg_gen_movi_i64(cpu_exclusive_addr, -1);
>>> +#endif
>>>  }
>>>
>>>  #ifdef CONFIG_USER_ONLY
>>> @@ -7475,6 +7614,47 @@ static void gen_store_exclusive(DisasContext *s, int 
>>> rd, int rt, int rt2,
>>>                       size | (rd << 4) | (rt << 8) | (rt2 << 12));
>>>      gen_exception_internal_insn(s, 4, EXCP_STREX);
>>>  }
>>> +#elif defined CONFIG_ARM_USE_LDST_EXCL
>>> +static void gen_store_exclusive(DisasContext *s, int rd, int rt, int rt2,
>>> +                                TCGv_i32 addr, int size)
>>> +{
>>> +    TCGv_i32 tmp, mem_idx;
>>> +
>>> +    mem_idx = tcg_temp_new_i32();
>>> +
>>> +    tcg_gen_movi_i32(mem_idx, get_mem_index(s));
>>> +    tmp = load_reg(s, rt);
>>> +
>>> +    if (size != 3) {
>>> +        switch (size) {
>>> +        case 0:
>>> +            gen_strex_i8(cpu_R[rd], addr, tmp, mem_idx);
>>> +            break;
>>> +        case 1:
>>> +            gen_strex_i16a(cpu_R[rd], addr, tmp, mem_idx);
>>> +            break;
>>> +        case 2:
>>> +            gen_strex_i32a(cpu_R[rd], addr, tmp, mem_idx);
>>> +            break;
>>> +        default:
>>> +            abort();
>>> +        }
>>> +    } else {
>>> +        TCGv_i64 tmp64;
>>> +        TCGv_i32 tmp2;
>>> +
>>> +        tmp64 = tcg_temp_new_i64();
>>> +        tmp2 = load_reg(s, rt2);
>>> +        tcg_gen_concat_i32_i64(tmp64, tmp, tmp2);
>>> +        gen_strex_i64a(cpu_R[rd], addr, tmp64, mem_idx);
>>> +
>>> +        tcg_temp_free_i32(tmp2);
>>> +        tcg_temp_free_i64(tmp64);
>>> +    }
>>> +
>>> +    tcg_temp_free_i32(tmp);
>>> +    tcg_temp_free_i32(mem_idx);
>>> +}
>>>  #else
>>>  static void gen_store_exclusive(DisasContext *s, int rd, int rt, int rt2,
>>>                                  TCGv_i32 addr, int size)
>>
>>
>> --
>> Alex Bennée


--
Alex Bennée



reply via email to

[Prev in Thread] Current Thread [Next in Thread]