qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Qemu-devel] [PATCH v2 05/11] target-arm: Implement ccmp branchless


From: Peter Maydell
Subject: Re: [Qemu-devel] [PATCH v2 05/11] target-arm: Implement ccmp branchless
Date: Mon, 7 Sep 2015 18:31:53 +0100

On 2 September 2015 at 18:57, Richard Henderson <address@hidden> wrote:
> This can allow much of a ccmp to be elided when particular
> flags are subsequently dead.
>
> Signed-off-by: Richard Henderson <address@hidden>
> ---
>  target-arm/translate-a64.c | 65 
> +++++++++++++++++++++++++++++++---------------
>  1 file changed, 44 insertions(+), 21 deletions(-)
>
> diff --git a/target-arm/translate-a64.c b/target-arm/translate-a64.c
> index dcac490..48ecf23 100644
> --- a/target-arm/translate-a64.c
> +++ b/target-arm/translate-a64.c
> @@ -3552,8 +3552,9 @@ static void disas_adc_sbc(DisasContext *s, uint32_t 
> insn)
>  static void disas_cc(DisasContext *s, uint32_t insn)
>  {
>      unsigned int sf, op, y, cond, rn, nzcv, is_imm;
> -    TCGLabel *label_continue = NULL;
> -    TCGv_i64 tcg_tmp, tcg_y, tcg_rn;
> +    TCGv_i32 tcg_t0, tcg_t1, tcg_t2;
> +    TCGv_i64 tcg_res, tcg_y, tcg_rn;
> +    DisasCompare c;
>
>      if (!extract32(insn, 29, 1)) {
>          unallocated_encoding(s);
> @@ -3571,19 +3572,13 @@ static void disas_cc(DisasContext *s, uint32_t insn)
>      rn = extract32(insn, 5, 5);
>      nzcv = extract32(insn, 0, 4);
>
> -    if (cond < 0x0e) { /* not always */
> -        TCGLabel *label_match = gen_new_label();
> -        label_continue = gen_new_label();
> -        arm_gen_test_cc(cond, label_match);
> -        /* nomatch: */
> -        tcg_tmp = tcg_temp_new_i64();
> -        tcg_gen_movi_i64(tcg_tmp, nzcv << 28);
> -        gen_set_nzcv(tcg_tmp);
> -        tcg_temp_free_i64(tcg_tmp);
> -        tcg_gen_br(label_continue);
> -        gen_set_label(label_match);
> -    }
> -    /* match, or condition is always */
> +    /* Set T0 = !COND.  */
> +    tcg_t0 = tcg_temp_new_i32();
> +    arm_test_cc(&c, cond);
> +    tcg_gen_setcondi_i32(tcg_invert_cond(c.cond), tcg_t0, c.value, 0);
> +    arm_free_cc(&c);
> +
> +    /* Load the arguments for the new comparison.  */
>      if (is_imm) {
>          tcg_y = new_tmp_a64(s);
>          tcg_gen_movi_i64(tcg_y, y);
> @@ -3592,17 +3587,45 @@ static void disas_cc(DisasContext *s, uint32_t insn)
>      }
>      tcg_rn = cpu_reg(s, rn);
>
> -    tcg_tmp = tcg_temp_new_i64();
> +    /* Set the flags for the new comparison.  */
> +    tcg_res = tcg_temp_new_i64();
>      if (op) {
> -        gen_sub_CC(sf, tcg_tmp, tcg_rn, tcg_y);
> +        gen_sub_CC(sf, tcg_res, tcg_rn, tcg_y);
>      } else {
> -        gen_add_CC(sf, tcg_tmp, tcg_rn, tcg_y);
> +        gen_add_CC(sf, tcg_res, tcg_rn, tcg_y);
>      }
> -    tcg_temp_free_i64(tcg_tmp);
> +    tcg_temp_free_i64(tcg_res);

Seems a bit unnecessary to bother changing the name of
this TCG temporary.

>
> -    if (cond < 0x0e) { /* continue */
> -        gen_set_label(label_continue);
> +    /* If COND was false, force the flags to #nzcv.
> +       Note that T1 = (COND ? 0 : -1), T2 = (COND ? -1 : 0).  */
> +    tcg_t1 = tcg_temp_new_i32();
> +    tcg_t2 = tcg_temp_new_i32();
> +    tcg_gen_neg_i32(tcg_t1, tcg_t0);
> +    tcg_gen_subi_i32(tcg_t2, tcg_t0, 1);

t2 is ~t1, right? Do we get better/worse code if we use
tcg_gen_andc_i32(..., tcg_t1) rather than creating t2 and
using gen_and_i32 ?

> +
> +    if (nzcv & 8) { /* N */
> +        tcg_gen_or_i32(cpu_NF, cpu_NF, tcg_t1);
> +    } else {
> +        tcg_gen_and_i32(cpu_NF, cpu_NF, tcg_t2);
> +    }
> +    if (nzcv & 4) { /* Z */
> +        tcg_gen_and_i32(cpu_ZF, cpu_ZF, tcg_t2);
> +    } else {
> +        tcg_gen_or_i32(cpu_ZF, cpu_ZF, tcg_t0);
> +    }
> +    if (nzcv & 2) { /* C */
> +        tcg_gen_or_i32(cpu_CF, cpu_CF, tcg_t0);
> +    } else {
> +        tcg_gen_and_i32(cpu_CF, cpu_CF, tcg_t2);
> +    }
> +    if (nzcv & 1) { /* V */
> +        tcg_gen_or_i32(cpu_VF, cpu_VF, tcg_t1);
> +    } else {
> +        tcg_gen_and_i32(cpu_VF, cpu_VF, tcg_t2);
>      }
> +    tcg_temp_free_i32(tcg_t0);
> +    tcg_temp_free_i32(tcg_t1);
> +    tcg_temp_free_i32(tcg_t2);
>  }

Otherwise looks OK.

-- PMM



reply via email to

[Prev in Thread] Current Thread [Next in Thread]