qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Qemu-devel] [RFC PATCH] Use atomic cmpxchg to atomically check the


From: Alex Bennée
Subject: Re: [Qemu-devel] [RFC PATCH] Use atomic cmpxchg to atomically check the exclusive value in a STREX
Date: Tue, 09 Jun 2015 10:12:56 +0100

address@hidden writes:

> From: KONRAD Frederic <address@hidden>
>
> This mechanism replaces the existing load/store exclusive mechanism which 
> seems
> to be broken for multithread.
> It follows the intention of the existing mechanism and stores the target 
> address
> and data values during a load operation and checks that they remain unchanged
> before a store.
>
> In common with the older approach, this provides weaker semantics than 
> required
> in that it could be that a different processor writes the same value as a
> non-exclusive write, however in practise this seems to be irrelevant.

You can WFE on the global monitor and use it for a lockless sleep on a
flag value. I don't think the Linux kernel does it but certainly
anything trying to be power efficient may use it.

>
> The old implementation didn’t correctly store it’s values as globals, but 
> rather
> kept a local copy per CPU.
>
> This new mechanism stores the values globally and also uses the atomic cmpxchg
> macros to ensure atomicity - it is therefore very efficient and threadsafe.
>
> Signed-off-by: KONRAD Frederic <address@hidden>
> ---
>  target-arm/cpu.c       |  21 +++++++
>  target-arm/cpu.h       |   6 ++
>  target-arm/helper.c    |  12 ++++
>  target-arm/helper.h    |   6 ++
>  target-arm/op_helper.c | 146 
> ++++++++++++++++++++++++++++++++++++++++++++++++-
>  target-arm/translate.c |  98 ++++++---------------------------
>  6 files changed, 207 insertions(+), 82 deletions(-)
>
> diff --git a/target-arm/cpu.c b/target-arm/cpu.c
> index 4a888ab..0400061 100644
> --- a/target-arm/cpu.c
> +++ b/target-arm/cpu.c
> @@ -31,6 +31,26 @@
>  #include "sysemu/kvm.h"
>  #include "kvm_arm.h"
>  
> +/* Protect cpu_exclusive_* variable .*/
> +__thread bool cpu_have_exclusive_lock;
> +QemuMutex cpu_exclusive_lock;
> +
> +inline void arm_exclusive_lock(void)
> +{
> +    if (!cpu_have_exclusive_lock) {
> +        qemu_mutex_lock(&cpu_exclusive_lock);
> +        cpu_have_exclusive_lock = true;
> +    }
> +}
> +
> +inline void arm_exclusive_unlock(void)
> +{
> +    if (cpu_have_exclusive_lock) {
> +        cpu_have_exclusive_lock = false;
> +        qemu_mutex_unlock(&cpu_exclusive_lock);
> +    }
> +}

I don't quite follow. If these locks are mean to be protecting access to
variables then how do they do that? The lock won't block if another
thread is currently messing with the protected values.

> +
>  static void arm_cpu_set_pc(CPUState *cs, vaddr value)
>  {
>      ARMCPU *cpu = ARM_CPU(cs);
> @@ -425,6 +445,7 @@ static void arm_cpu_initfn(Object *obj)
>          cpu->psci_version = 2; /* TCG implements PSCI 0.2 */
>          if (!inited) {
>              inited = true;
> +            qemu_mutex_init(&cpu_exclusive_lock);
>              arm_translate_init();
>          }
>      }
> diff --git a/target-arm/cpu.h b/target-arm/cpu.h
> index 21b5b8e..257ed05 100644
> --- a/target-arm/cpu.h
> +++ b/target-arm/cpu.h
> @@ -506,6 +506,9 @@ static inline bool is_a64(CPUARMState *env)
>  int cpu_arm_signal_handler(int host_signum, void *pinfo,
>                             void *puc);
>  
> +int arm_get_phys_addr(CPUARMState *env, target_ulong address, int 
> access_type,
> +                      hwaddr *phys_ptr, int *prot, target_ulong *page_size);
> +
>  /**
>   * pmccntr_sync
>   * @env: CPUARMState
> @@ -1922,4 +1925,7 @@ enum {
>      QEMU_PSCI_CONDUIT_HVC = 2,
>  };
>  
> +void arm_exclusive_lock(void);
> +void arm_exclusive_unlock(void);
> +
>  #endif
> diff --git a/target-arm/helper.c b/target-arm/helper.c
> index 3da0c05..31ee1e5 100644
> --- a/target-arm/helper.c
> +++ b/target-arm/helper.c
> @@ -23,6 +23,14 @@ static inline int get_phys_addr(CPUARMState *env, 
> target_ulong address,
>  #define PMCRE   0x1
>  #endif
>  
> +int arm_get_phys_addr(CPUARMState *env, target_ulong address, int 
> access_type,
> +                      hwaddr *phys_ptr, int *prot, target_ulong *page_size)
> +{
> +    MemTxAttrs attrs = {};
> +    return get_phys_addr(env, address, access_type, cpu_mmu_index(env),
> +                         phys_ptr, &attrs, prot, page_size);
> +}
> +
>  static int vfp_gdb_get_reg(CPUARMState *env, uint8_t *buf, int reg)
>  {
>      int nregs;
> @@ -4731,6 +4739,10 @@ void arm_cpu_do_interrupt(CPUState *cs)
>  
>      arm_log_exception(cs->exception_index);
>  
> +    arm_exclusive_lock();
> +    env->exclusive_addr = -1;
> +    arm_exclusive_unlock();
> +
>      if (arm_is_psci_call(cpu, cs->exception_index)) {
>          arm_handle_psci_call(cpu);
>          qemu_log_mask(CPU_LOG_INT, "...handled as PSCI call\n");
> diff --git a/target-arm/helper.h b/target-arm/helper.h
> index fc885de..63c2809 100644
> --- a/target-arm/helper.h
> +++ b/target-arm/helper.h
> @@ -529,6 +529,12 @@ DEF_HELPER_2(dc_zva, void, env, i64)
>  DEF_HELPER_FLAGS_2(neon_pmull_64_lo, TCG_CALL_NO_RWG_SE, i64, i64, i64)
>  DEF_HELPER_FLAGS_2(neon_pmull_64_hi, TCG_CALL_NO_RWG_SE, i64, i64, i64)
>  
> +DEF_HELPER_4(atomic_cmpxchg64, i32, env, i32, i64, i32)
> +DEF_HELPER_2(atomic_check, i32, env, i32)
> +DEF_HELPER_1(atomic_release, void, env)
> +DEF_HELPER_1(atomic_clear, void, env)
> +DEF_HELPER_3(atomic_claim, void, env, i32, i64)
> +
>  #ifdef TARGET_AARCH64
>  #include "helper-a64.h"
>  #endif
> diff --git a/target-arm/op_helper.c b/target-arm/op_helper.c
> index 7583ae7..81dd403 100644
> --- a/target-arm/op_helper.c
> +++ b/target-arm/op_helper.c
> @@ -30,12 +30,157 @@ static void raise_exception(CPUARMState *env, uint32_t 
> excp,
>      CPUState *cs = CPU(arm_env_get_cpu(env));
>  
>      assert(!excp_is_internal(excp));
> +    arm_exclusive_lock();
>      cs->exception_index = excp;
>      env->exception.syndrome = syndrome;
>      env->exception.target_el = target_el;
> +    /*
> +     * We MAY already have the lock - in which case we are exiting the
> +     * instruction due to an exception. Otherwise we better make sure we are 
> not
> +     * about to enter a STREX anyway.
> +     */
> +    env->exclusive_addr = -1;
> +    arm_exclusive_unlock();
>      cpu_loop_exit(cs);
>  }
>  
> +/* NB return 1 for fail, 0 for pass */
> +uint32_t HELPER(atomic_cmpxchg64)(CPUARMState *env, uint32_t addr,
> +                                  uint64_t newval, uint32_t size)
> +{
> +    ARMCPU *cpu = arm_env_get_cpu(env);
> +    CPUState *cs = CPU(cpu);
> +
> +    bool result = false;
> +    hwaddr len = 8 << size;
> +
> +    hwaddr paddr;
> +    target_ulong page_size;
> +    int prot;
> +
> +    arm_exclusive_lock();
> +
> +    if (env->exclusive_addr != addr) {
> +        arm_exclusive_unlock();
> +        return 1;
> +    }
> +
> +    if (arm_get_phys_addr(env, addr, 1, &paddr, &prot, &page_size) != 0) {
> +        tlb_fill(ENV_GET_CPU(env), addr, MMU_DATA_STORE, cpu_mmu_index(env), 
> 0);
> +        if  (arm_get_phys_addr(env, addr, 1, &paddr, &prot, &page_size) != 
> 0) {
> +            arm_exclusive_unlock();
> +            return 1;
> +        }
> +    }

Some comments as to what we are doing here would be useful.

> +
> +    switch (size) {
> +    case 0:
> +    {
> +        uint8_t oldval, *p;
> +        p = address_space_map(cs->as, paddr, &len, true);
> +        if (len == 8 << size) {
> +            oldval = (uint8_t)env->exclusive_val;
> +            result = (atomic_cmpxchg(p, oldval, (uint8_t)newval) == oldval);
> +        }
> +        address_space_unmap(cs->as, p, len, true, result ? 8 : 0);
> +    }
> +    break;
> +    case 1:
> +    {
> +        uint16_t oldval, *p;
> +        p = address_space_map(cs->as, paddr, &len, true);
> +        if (len == 8 << size) {
> +            oldval = (uint16_t)env->exclusive_val;
> +            result = (atomic_cmpxchg(p, oldval, (uint16_t)newval) == oldval);
> +        }
> +        address_space_unmap(cs->as, p, len, true, result ? 8 : 0);
> +    }
> +    break;
> +    case 2:
> +    {
> +        uint32_t oldval, *p;
> +        p = address_space_map(cs->as, paddr, &len, true);
> +        if (len == 8 << size) {
> +            oldval = (uint32_t)env->exclusive_val;
> +            result = (atomic_cmpxchg(p, oldval, (uint32_t)newval) == oldval);
> +        }
> +        address_space_unmap(cs->as, p, len, true, result ? 8 : 0);
> +    }
> +    break;
> +    case 3:
> +    {
> +        uint64_t oldval, *p;
> +        p = address_space_map(cs->as, paddr, &len, true);
> +        if (len == 8 << size) {
> +            oldval = (uint64_t)env->exclusive_val;
> +            result = (atomic_cmpxchg(p, oldval, (uint64_t)newval) == oldval);
> +        }
> +        address_space_unmap(cs->as, p, len, true, result ? 8 : 0);
> +    }
> +    break;
> +    default:
> +        abort();
> +    break;
> +    }
> +
> +    env->exclusive_addr = -1;
> +    arm_exclusive_unlock();
> +    if (result) {
> +        return 0;
> +    } else {
> +        return 1;
> +    }
> +}
> +
> +
> +uint32_t HELPER(atomic_check)(CPUARMState *env, uint32_t addr)
> +{
> +      arm_exclusive_lock();
> +      if (env->exclusive_addr == addr) {
> +        return true;
> +      }
> +      /* we failed to keep the address tagged, so we fail */
> +      env->exclusive_addr = -1;  /* for efficiency */
> +      arm_exclusive_unlock();
> +      return false;
> +}
> +
> +void HELPER(atomic_release)(CPUARMState *env)
> +{
> +    env->exclusive_addr = -1;
> +    arm_exclusive_unlock();
> +}
> +
> +void HELPER(atomic_clear)(CPUARMState *env)
> +{
> +    /* make sure no STREX is about to start */
> +    arm_exclusive_lock();
> +    env->exclusive_addr = -1;
> +    arm_exclusive_unlock();
> +}
> +
> +
> +void HELPER(atomic_claim)(CPUARMState *env, uint32_t addr, uint64_t val)
> +{
> +    CPUState *cpu;
> +    CPUARMState *current_cpu;
> +
> +    /* ensure that there are no STREX's executing */
> +    arm_exclusive_lock();
> +
> +    CPU_FOREACH(cpu) {
> +        current_cpu = &ARM_CPU(cpu)->env;
> +        if (current_cpu->exclusive_addr  == addr) {
> +            /* We steal the atomic of this CPU. */
> +            current_cpu->exclusive_addr = -1;
> +        }
> +    }
> +
> +    env->exclusive_val = val;
> +    env->exclusive_addr = addr;
> +    arm_exclusive_unlock();
> +}
> +
>  static int exception_target_el(CPUARMState *env)
>  {
>      int target_el = MAX(1, arm_current_el(env));
> @@ -582,7 +727,6 @@ void HELPER(exception_return)(CPUARMState *env)
>  
>      aarch64_save_sp(env, cur_el);
>  
> -    env->exclusive_addr = -1;
>  
>      /* We must squash the PSTATE.SS bit to zero unless both of the
>       * following hold:
> diff --git a/target-arm/translate.c b/target-arm/translate.c
> index 39692d7..ba4eb65 100644
> --- a/target-arm/translate.c
> +++ b/target-arm/translate.c
> @@ -65,8 +65,8 @@ TCGv_ptr cpu_env;
>  static TCGv_i64 cpu_V0, cpu_V1, cpu_M0;
>  static TCGv_i32 cpu_R[16];
>  static TCGv_i32 cpu_CF, cpu_NF, cpu_VF, cpu_ZF;
> -static TCGv_i64 cpu_exclusive_addr;
>  static TCGv_i64 cpu_exclusive_val;
> +static TCGv_i64 cpu_exclusive_addr;
>  #ifdef CONFIG_USER_ONLY
>  static TCGv_i64 cpu_exclusive_test;
>  static TCGv_i32 cpu_exclusive_info;
> @@ -7391,6 +7391,7 @@ static void gen_load_exclusive(DisasContext *s, int rt, 
> int rt2,
>                                 TCGv_i32 addr, int size)
>  {
>      TCGv_i32 tmp = tcg_temp_new_i32();
> +    TCGv_i64 val = tcg_temp_new_i64();
>  
>      s->is_ldex = true;
>  
> @@ -7415,20 +7416,20 @@ static void gen_load_exclusive(DisasContext *s, int 
> rt, int rt2,
>  
>          tcg_gen_addi_i32(tmp2, addr, 4);
>          gen_aa32_ld32u(tmp3, tmp2, get_mem_index(s));
> +        tcg_gen_concat_i32_i64(val, tmp, tmp3);
>          tcg_temp_free_i32(tmp2);
> -        tcg_gen_concat_i32_i64(cpu_exclusive_val, tmp, tmp3);
>          store_reg(s, rt2, tmp3);
>      } else {
> -        tcg_gen_extu_i32_i64(cpu_exclusive_val, tmp);
> +        tcg_gen_extu_i32_i64(val, tmp);
>      }
> -
> +    gen_helper_atomic_claim(cpu_env, addr, val);
> +    tcg_temp_free_i64(val);
>      store_reg(s, rt, tmp);
> -    tcg_gen_extu_i32_i64(cpu_exclusive_addr, addr);
>  }
>  
>  static void gen_clrex(DisasContext *s)
>  {
> -    tcg_gen_movi_i64(cpu_exclusive_addr, -1);
> +    gen_helper_atomic_clear(cpu_env);
>  }
>  
>  #ifdef CONFIG_USER_ONLY
> @@ -7445,84 +7446,19 @@ static void gen_store_exclusive(DisasContext *s, int 
> rd, int rt, int rt2,
>                                  TCGv_i32 addr, int size)
>  {
>      TCGv_i32 tmp;
> -    TCGv_i64 val64, extaddr;
> -    TCGLabel *done_label;
> -    TCGLabel *fail_label;
> -
> -    /* if (env->exclusive_addr == addr && env->exclusive_val == [addr]) {
> -         [addr] = {Rt};
> -         {Rd} = 0;
> -       } else {
> -         {Rd} = 1;
> -       } */
> -    fail_label = gen_new_label();
> -    done_label = gen_new_label();
> -    extaddr = tcg_temp_new_i64();
> -    tcg_gen_extu_i32_i64(extaddr, addr);
> -    tcg_gen_brcond_i64(TCG_COND_NE, extaddr, cpu_exclusive_addr, fail_label);
> -    tcg_temp_free_i64(extaddr);
> -
> -    tmp = tcg_temp_new_i32();
> -    switch (size) {
> -    case 0:
> -        gen_aa32_ld8u(tmp, addr, get_mem_index(s));
> -        break;
> -    case 1:
> -        gen_aa32_ld16u(tmp, addr, get_mem_index(s));
> -        break;
> -    case 2:
> -    case 3:
> -        gen_aa32_ld32u(tmp, addr, get_mem_index(s));
> -        break;
> -    default:
> -        abort();
> -    }
> -
> -    val64 = tcg_temp_new_i64();
> -    if (size == 3) {
> -        TCGv_i32 tmp2 = tcg_temp_new_i32();
> -        TCGv_i32 tmp3 = tcg_temp_new_i32();
> -        tcg_gen_addi_i32(tmp2, addr, 4);
> -        gen_aa32_ld32u(tmp3, tmp2, get_mem_index(s));
> -        tcg_temp_free_i32(tmp2);
> -        tcg_gen_concat_i32_i64(val64, tmp, tmp3);
> -        tcg_temp_free_i32(tmp3);
> -    } else {
> -        tcg_gen_extu_i32_i64(val64, tmp);
> -    }
> -    tcg_temp_free_i32(tmp);
> -
> -    tcg_gen_brcond_i64(TCG_COND_NE, val64, cpu_exclusive_val, fail_label);
> -    tcg_temp_free_i64(val64);
> +    TCGv_i32 tmp2;
> +    TCGv_i64 val = tcg_temp_new_i64();
> +    TCGv_i32 tmp_size = tcg_const_i32(size);
>  
>      tmp = load_reg(s, rt);
> -    switch (size) {
> -    case 0:
> -        gen_aa32_st8(tmp, addr, get_mem_index(s));
> -        break;
> -    case 1:
> -        gen_aa32_st16(tmp, addr, get_mem_index(s));
> -        break;
> -    case 2:
> -    case 3:
> -        gen_aa32_st32(tmp, addr, get_mem_index(s));
> -        break;
> -    default:
> -        abort();
> -    }
> +    tmp2 = load_reg(s, rt2);
> +    tcg_gen_concat_i32_i64(val, tmp, tmp2);
>      tcg_temp_free_i32(tmp);
> -    if (size == 3) {
> -        tcg_gen_addi_i32(addr, addr, 4);
> -        tmp = load_reg(s, rt2);
> -        gen_aa32_st32(tmp, addr, get_mem_index(s));
> -        tcg_temp_free_i32(tmp);
> -    }
> -    tcg_gen_movi_i32(cpu_R[rd], 0);
> -    tcg_gen_br(done_label);
> -    gen_set_label(fail_label);
> -    tcg_gen_movi_i32(cpu_R[rd], 1);
> -    gen_set_label(done_label);
> -    tcg_gen_movi_i64(cpu_exclusive_addr, -1);
> +    tcg_temp_free_i32(tmp2);
> +
> +    gen_helper_atomic_cmpxchg64(cpu_R[rd], cpu_env, addr, val, tmp_size);
> +    tcg_temp_free_i64(val);
> +    tcg_temp_free_i32(tmp_size);
>  }
>  #endif

-- 
Alex Bennée



reply via email to

[Prev in Thread] Current Thread [Next in Thread]