[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
Re: [PATCH] mips: Optimize jit_lshr() and jit_lshr_u()
From: |
Paulo César Pereira de Andrade |
Subject: |
Re: [PATCH] mips: Optimize jit_lshr() and jit_lshr_u() |
Date: |
Mon, 2 Oct 2023 09:33:25 -0300 |
Em qui., 28 de set. de 2023 às 11:50, Paul Cercueil
<paul@crapouillou.net> escreveu:
>
> Rework the branch-less path to shrink the size of jit_lshr() by one
> instruction, and jit_lshr_u() by two instructions. It also uses one
> register less, so the whole code path that uses branches can be dropped.
>
> Finally, fix whitespace issues as the original code used sometimes tabs,
> sometimes spaces.
This is due to the text editor I use for coding in Lightning and its default
automatic indentation.
> Signed-off-by: Paul Cercueil <paul@crapouillou.net>
> ---
> lib/jit_mips-cpu.c | 72 ++++++++++++++--------------------------------
> 1 file changed, 21 insertions(+), 51 deletions(-)
>
> diff --git a/lib/jit_mips-cpu.c b/lib/jit_mips-cpu.c
> index 6a07e87..8350bf8 100644
> --- a/lib/jit_mips-cpu.c
> +++ b/lib/jit_mips-cpu.c
> @@ -2353,72 +2353,42 @@ static void
> _xlshr(jit_state_t *_jit, jit_bool_t sign,
> jit_int32_t r0, jit_int32_t r1, jit_int32_t r2, jit_int32_t r3)
> {
> - jit_bool_t branch;
> - jit_word_t over, zero, done, done_over;
> - jit_int32_t t0, s0, t1, s1, t2, s2, t3, s3;
> + jit_int32_t t0, s0, t2, s2, t3, s3;
> s0 = jit_get_reg(jit_class_gpr);
> t0 = rn(s0);
> if (r0 == r2 || r1 == r2) {
> - s2 = jit_get_reg(jit_class_gpr);
> - t2 = rn(s2);
> - movr(t2, r2);
> + s2 = jit_get_reg(jit_class_gpr);
> + t2 = rn(s2);
> + movr(t2, r2);
> }
> else
> - t2 = r2;
> + t2 = r2;
> if (r0 == r3 || r1 == r3) {
> - s3 = jit_get_reg(jit_class_gpr);
> - t3 = rn(s3);
> - movr(t3, r3);
> - }
> - else
> - t3 = r3;
> - if ((s1 = jit_get_reg(jit_class_gpr|jit_class_nospill|jit_class_chk))) {
> - t1 = rn(s1);
> - branch = 0;
> + s3 = jit_get_reg(jit_class_gpr);
> + t3 = rn(s3);
> + movr(t3, r3);
> }
> else
> - branch = 1;
> + t3 = r3;
> rsbi(t0, t3, __WORDSIZE);
> lshr(r0, t2, t3);
> if (sign)
> - rshr(r1, t2, t0);
> + rshr(r1, t2, t0);
> else
> - rshr_u(r1, t2, t0);
> - if (branch) {
> - zero = beqi(_jit->pc.w, t3, 0);
> - over = beqi(_jit->pc.w, t3, __WORDSIZE);
> - done = jmpi(_jit->pc.w, 1);
> - flush();
> - patch_at(over, _jit->pc.w);
> - /* overflow */
> - movi(r0, 0);
> - done_over = jmpi(_jit->pc.w, 1);
> - /* zero */
> - flush();
> - patch_at(zero, _jit->pc.w);
> - if (sign)
> - rshi(r1, t2, __WORDSIZE - 1);
> - else
> - movi(r1, 0);
> - flush();
> - patch_at(done, _jit->pc.w);
> - patch_at(done_over, _jit->pc.w);
> + rshr_u(r1, t2, t0);
> + if (sign) {
> + rshi(t0, t2, __WORDSIZE - 1);
> + /* zero? */
> + movzr(r1, t0, t3);
> }
> else {
> - if (sign)
> - rshi(t0, t2, __WORDSIZE - 1);
> - else
> - movi(t0, 0);
> - /* zero? */
> - movzr(r1, t0, t3);
> - /* Branchless but 4 bytes longer than branching fallback */
> - if (sign)
> - movi(t0, 0);
> - /* overflow? */
> - eqi(t1, t3, __WORDSIZE);
> - movnr(r0, t0, t1);
> - jit_unget_reg(s1);
> + /* zero? */
> + movzr(r1, t3, t3);
> }
> + /* overflow? */
> + nei(t0, t3, __WORDSIZE);
> + movzr(r0, t0, t0);
> +
> jit_unget_reg(s0);
> if (t2 != r2)
> jit_unget_reg(s2);
> --
> 2.40.1
>
xedit.png
Description: PNG image
[Prev in Thread] |
Current Thread |
[Next in Thread] |
- Re: [PATCH] mips: Optimize jit_lshr() and jit_lshr_u(),
Paulo César Pereira de Andrade <=