lightning
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [PATCH] mips: Optimize jit_lshr() and jit_lshr_u()


From: Paulo César Pereira de Andrade
Subject: Re: [PATCH] mips: Optimize jit_lshr() and jit_lshr_u()
Date: Mon, 2 Oct 2023 09:33:25 -0300

Em qui., 28 de set. de 2023 às 11:50, Paul Cercueil
<paul@crapouillou.net> escreveu:
>
> Rework the branch-less path to shrink the size of jit_lshr() by one
> instruction, and jit_lshr_u() by two instructions. It also uses one
> register less, so the whole code path that uses branches can be dropped.
>
> Finally, fix whitespace issues as the original code used sometimes tabs,
> sometimes spaces.

  This is due to the text editor I use for coding in Lightning and its default
automatic indentation.

> Signed-off-by: Paul Cercueil <paul@crapouillou.net>
> ---
>  lib/jit_mips-cpu.c | 72 ++++++++++++++--------------------------------
>  1 file changed, 21 insertions(+), 51 deletions(-)
>
> diff --git a/lib/jit_mips-cpu.c b/lib/jit_mips-cpu.c
> index 6a07e87..8350bf8 100644
> --- a/lib/jit_mips-cpu.c
> +++ b/lib/jit_mips-cpu.c
> @@ -2353,72 +2353,42 @@ static void
>  _xlshr(jit_state_t *_jit, jit_bool_t sign,
>         jit_int32_t r0, jit_int32_t r1, jit_int32_t r2, jit_int32_t r3)
>  {
> -    jit_bool_t         branch;
> -    jit_word_t         over, zero, done, done_over;
> -    jit_int32_t                t0, s0, t1, s1, t2, s2, t3, s3;
> +    jit_int32_t                t0, s0, t2, s2, t3, s3;
>      s0 = jit_get_reg(jit_class_gpr);
>      t0 = rn(s0);
>      if (r0 == r2 || r1 == r2) {
> -       s2 = jit_get_reg(jit_class_gpr);
> -       t2 = rn(s2);
> -       movr(t2, r2);
> +        s2 = jit_get_reg(jit_class_gpr);
> +        t2 = rn(s2);
> +        movr(t2, r2);
>      }
>      else
> -       t2 = r2;
> +        t2 = r2;
>      if (r0 == r3 || r1 == r3) {
> -       s3 = jit_get_reg(jit_class_gpr);
> -       t3 = rn(s3);
> -       movr(t3, r3);
> -    }
> -    else
> -       t3 = r3;
> -    if ((s1 = jit_get_reg(jit_class_gpr|jit_class_nospill|jit_class_chk))) {
> -       t1 = rn(s1);
> -       branch = 0;
> +        s3 = jit_get_reg(jit_class_gpr);
> +        t3 = rn(s3);
> +        movr(t3, r3);
>      }
>      else
> -       branch = 1;
> +        t3 = r3;
>      rsbi(t0, t3, __WORDSIZE);
>      lshr(r0, t2, t3);
>      if (sign)
> -       rshr(r1, t2, t0);
> +        rshr(r1, t2, t0);
>      else
> -       rshr_u(r1, t2, t0);
> -    if (branch) {
> -       zero = beqi(_jit->pc.w, t3, 0);
> -       over = beqi(_jit->pc.w, t3, __WORDSIZE);
> -       done = jmpi(_jit->pc.w, 1);
> -       flush();
> -       patch_at(over, _jit->pc.w);
> -       /* overflow */
> -       movi(r0, 0);
> -       done_over = jmpi(_jit->pc.w, 1);
> -       /* zero */
> -       flush();
> -       patch_at(zero, _jit->pc.w);
> -       if (sign)
> -           rshi(r1, t2, __WORDSIZE - 1);
> -       else
> -           movi(r1, 0);
> -       flush();
> -       patch_at(done, _jit->pc.w);
> -       patch_at(done_over, _jit->pc.w);
> +        rshr_u(r1, t2, t0);
> +    if (sign) {
> +        rshi(t0, t2, __WORDSIZE - 1);
> +        /* zero? */
> +        movzr(r1, t0, t3);
>      }
>      else {
> -       if (sign)
> -           rshi(t0, t2, __WORDSIZE - 1);
> -       else
> -           movi(t0, 0);
> -       /* zero? */
> -       movzr(r1, t0, t3);
> -       /* Branchless but 4 bytes longer than branching fallback */
> -       if (sign)
> -           movi(t0, 0);
> -       /* overflow? */
> -       eqi(t1, t3, __WORDSIZE);
> -       movnr(r0, t0, t1);
> -       jit_unget_reg(s1);
> +        /* zero? */
> +        movzr(r1, t3, t3);
>      }
> +    /* overflow? */
> +    nei(t0, t3, __WORDSIZE);
> +    movzr(r0, t0, t0);
> +
>      jit_unget_reg(s0);
>      if (t2 != r2)
>         jit_unget_reg(s2);
> --
> 2.40.1
>

Attachment: xedit.png
Description: PNG image


reply via email to

[Prev in Thread] Current Thread [Next in Thread]