qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Qemu-devel] [PATCH 10/11] target-arm: Implement fccmp branchless


From: Laurent Desnogues
Subject: Re: [Qemu-devel] [PATCH 10/11] target-arm: Implement fccmp branchless
Date: Fri, 20 Feb 2015 14:57:54 +0100

Hi Richard,

On Thu, Feb 19, 2015 at 10:14 PM, Richard Henderson <address@hidden> wrote:
> Signed-off-by: Richard Henderson <address@hidden>
> ---
>  target-arm/translate-a64.c | 50 
> +++++++++++++++++++++++++++-------------------
>  1 file changed, 29 insertions(+), 21 deletions(-)
>
> diff --git a/target-arm/translate-a64.c b/target-arm/translate-a64.c
> index 8171a1f..5539ae3 100644
> --- a/target-arm/translate-a64.c
> +++ b/target-arm/translate-a64.c
> @@ -4128,11 +4128,11 @@ static void disas_data_proc_reg(DisasContext *s, 
> uint32_t insn)
>      }
>  }
>
> -static void handle_fp_compare(DisasContext *s, bool is_double,
> -                              unsigned int rn, unsigned int rm,
> -                              bool cmp_with_zero, bool signal_all_nans)
> +static void handle_fp_compare_1(DisasContext *s, TCGv_i64 tcg_flags,
> +                                bool is_double, unsigned int rn,
> +                                unsigned int rm, bool cmp_with_zero,
> +                                bool signal_all_nans)
>  {
> -    TCGv_i64 tcg_flags = tcg_temp_new_i64();
>      TCGv_ptr fpst = get_fpstatus_ptr();
>
>      if (is_double) {
> @@ -4170,7 +4170,16 @@ static void handle_fp_compare(DisasContext *s, bool 
> is_double,
>      }
>
>      tcg_temp_free_ptr(fpst);
> +}
>
> +static void handle_fp_compare(DisasContext *s, bool is_double,
> +                              unsigned int rn, unsigned int rm,
> +                              bool cmp_with_zero, bool signal_all_nans)
> +{
> +    TCGv_i64 tcg_flags = tcg_temp_new_i64();
> +
> +    handle_fp_compare_1(s, tcg_flags, is_double, rn, rm,
> +                        cmp_with_zero, signal_all_nans);
>      gen_set_nzcv(tcg_flags);
>
>      tcg_temp_free_i64(tcg_flags);
> @@ -4215,8 +4224,8 @@ static void disas_fp_compare(DisasContext *s, uint32_t 
> insn)
>  static void disas_fp_ccomp(DisasContext *s, uint32_t insn)
>  {
>      unsigned int mos, type, rm, cond, rn, op, nzcv;
> -    TCGv_i64 tcg_flags;
> -    TCGLabel *label_continue = NULL;
> +    TCGv_i64 t_flags, t_zero, t_nzcv;
> +    DisasCompare c;
>
>      mos = extract32(insn, 29, 3);
>      type = extract32(insn, 22, 2); /* 0 = single, 1 = double */
> @@ -4235,23 +4244,22 @@ static void disas_fp_ccomp(DisasContext *s, uint32_t 
> insn)
>          return;
>      }
>
> -    if (cond < 0x0e) { /* not always */
> -        TCGLabel *label_match = gen_new_label();
> -        label_continue = gen_new_label();
> -        arm_gen_test_cc(cond, label_match);
> -        /* nomatch: */
> -        tcg_flags = tcg_const_i64(nzcv << 28);
> -        gen_set_nzcv(tcg_flags);
> -        tcg_temp_free_i64(tcg_flags);
> -        tcg_gen_br(label_continue);
> -        gen_set_label(label_match);
> -    }
> +    /* Perform the new compare, but don't write the result back to flags. */
> +    t_flags = tcg_temp_new_i64();
> +    handle_fp_compare_1(s, t_flags, type, rn, rm, false, op);

The problem with this approach is that you'll always call the FP
compare which might result in FP flags corruption.

The ARMv8 manual clearly states that the FP compare should only be
called if the condition holds.

Thanks,

Laurent

> -    handle_fp_compare(s, type, rn, rm, false, op);
> +    /* If the condition is false, force the flags to #nzcv.  */
> +    arm_test_cc(&c, cond);
> +    t_zero = tcg_const_i64(0);
> +    t_nzcv = tcg_const_i64(nzcv << 28);
> +    tcg_gen_movcond_i64(c.cond, t_flags, c.value, t_zero, t_flags, t_nzcv);
> +    tcg_temp_free_i64(t_zero);
> +    tcg_temp_free_i64(t_nzcv);
> +    arm_free_cc(&c);
>
> -    if (cond < 0x0e) {
> -        gen_set_label(label_continue);
> -    }
> +    /* Write back the new flags.  */
> +    gen_set_nzcv(t_flags);
> +    tcg_temp_free_i64(t_flags);
>  }
>
>  /* copy src FP register to dst FP register; type specifies single or double 
> */
> --
> 2.1.0
>
>



reply via email to

[Prev in Thread] Current Thread [Next in Thread]