[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
Re: [PATCH] ppc: Fix 'calli' when floating-point arguments are passed
From: |
Paulo César Pereira de Andrade |
Subject: |
Re: [PATCH] ppc: Fix 'calli' when floating-point arguments are passed |
Date: |
Thu, 8 Sep 2022 14:06:39 -0300 |
Em qui., 8 de set. de 2022 às 06:37, Paul Cercueil
<paul@crapouillou.net> escreveu:
Hi Paul,
[snip]
> I spoke too soon, I'm still getting problems :(
>
> See the attachment.
Reading jit_print output I could not understand why r26 is not listed in:
L2: r14 r15 r16 r17 r18 r19 r20 r21 r22 r23 r24 r25 r10 f14 f15 f16
f17 f18 f19 f20 f21 f22 f23 f24 f25 f26 f27 f28 f29 f30 f31
> The problem now is that r26 should be live at L2 but is not detected as
> such.
> This causes Lightning to use r26 as a temporary for the andi line 34
> (lines 153-155 in the generated code).
I tried mimic'ing the code with:
"""
.disasm
.data 32
.code
anywhere:
movi %r0 0
prolog
ldxi_i %v0 %v(13) 0x44
addi %v0 %v0 0x1
lti %v1 %v0 0x8
stxi_i 0x44 %v(13) %v0
ldxi_i %v0 %v(13) 0x40
addi %v0 %v0 0x18
stxi_i 0x40 %v(13) %v0
stxi_i 0x8 %v(13) %v1
subi %r(5) %r(5) 0x8
beqi L2 %v1 0x0
movi %v0 0x8008a1ac
live %r(5)
jmpi anywhere
L2:
movi %v0 0x70
stxi_i 0x8 %v(13) %v0
movi %v0 0x800c0000
andi %r2 %v0 0x7fffff
addi %r1 %r2 0x10000000
stxi_i 0x3e7c %r1 %v1
addr %r2 %r2 %v(13)
stxi_i 0x4110 %r2 %v1
andi %r2 %v0 0x7fffff
addi %r1 %r2 0x10000000
stxi_i 0x3e78 %r1 %v1
addr %r2 %r2 %v(13)
stxi_i 0x410c %r2 %v1
andi %r2 %v0 0x7fffff
addi %r1 %r2 0x10000000
stxi_i 0x3e80 %r1 %v1
addr %r2 %r2 %v(13)
stxi_i 0x4114 %r2 %v1
stxi_i 0x4 %v(13) %v0
subi %r(5) %r(5) 0xe
L3:
movi %v0 0x800c0000
ldxi_i %v1 %v(13) 0x8
addr %v0 %v0 %v1
stxi_i 0x4 %v(13) %v0
ldxi_i %r2 %v(13) 0x244
movi %r0 0x286d0000
prepare
ori %r(12) %r0 0xd
live %r0
live %r1
live %r2
live %v(13)
live %r(5)
callr %r2
live %r0
live %r1
live %r2
live %v(13)
live %r(5)
addi %v1 %v1 0xfffffff0
stxi_i 0x8 %v(13) %v1
subi %r(5) %r(5) 0xc
blti L4 %v1 0x0
bgti L3 %r(5) 0x0
live %r(5)
jmpi anywhere
L4:
movi %v0 0x800c0000
movi %v1 0x0
andi %v(12) %v0 0x7fffff
addi %v(11) %v(12) 0x10000000
stxi_i 0x3f04 %v(11) %v1
addr %v(12) %v(12) %v(13)
stxi_i 0x4198 %v(12) %v1
stxi_i 0x4 %v(13) %v0
movi %v0 0x8008a208
stxi_i 0x7c %v(13) %v0
movi %v0 0x8008ae68
subi %r(5) %r(5) 0x8
live %r(5)
jmpi anywhere
ret
epilog
"""
but check/lightning does not allow a jmpi to an address that is not
a node.
I did also need to hack check/lighnting.c to be able to use r10 and
r3, used as %r(5) and %r(12).
It shows the expected output in L2, as:
L2: r14 r26 r10
but it is because it can follow jumps. See below:
"""
L0:
#note x.tst:4
movi r28 0x0
L1: r14 r10 /* prolog */
ldxi_i r27 r14 0x44
addi r27 r27 0x1
lti r26 r27 0x8
stxi_i 0x44 r14 r27
ldxi_i r27 r14 0x40
addi r27 r27 0x18
stxi_i 0x40 r14 r27
stxi_i 0x8 r14 r26
subi r10 r10 0x8
beqi L2 r26 0x0
L6: r14 r26 r10
movi r27 0x8008a1ac
live r10
jmpi L0
L2: r14 r26 r10
#note x.tst:20
movi r27 0x70
stxi_i 0x8 r14 r27
movi r27 0x800c0000
andi r30 r27 0x7fffff
addi r29 r30 0x10000000
stxi_i 0x3e7c r29 r26
addr r30 r30 r14
stxi_i 0x4110 r30 r26
andi r30 r27 0x7fffff
addi r29 r30 0x10000000
stxi_i 0x3e78 r29 r26
addr r30 r30 r14
stxi_i 0x410c r30 r26
andi r30 r27 0x7fffff
addi r29 r30 0x10000000
stxi_i 0x3e80 r29 r26
addr r30 r30 r14
stxi_i 0x4114 r30 r26
stxi_i 0x4 r14 r27
subi r10 r10 0xe
L3: r14 r29 r10
#note x.tst:41
movi r27 0x800c0000
ldxi_i r26 r14 0x8
addr r27 r27 r26
stxi_i 0x4 r14 r27
ldxi_i r30 r14 0x244
movi r28 0x286d0000
prepare
ori r3 r28 0xd
live r28
live r29
live r30
live r14
live r10
callr r30
L7: r14 r26 r28 r29 r30 r10
live r28
live r29
live r30
live r14
live r10
addi r26 r26 0xfffffff0
stxi_i 0x8 r14 r26
subi r10 r10 0xc
blti L4 r26 0x0
L8: r14 r29 r10
bgti L3 r10 0x0
L9: r14 r10
live r10
jmpi L0
L4: r14 r10
#note x.tst:68
movi r27 0x800c0000
movi r26 0x0
andi r15 r27 0x7fffff
addi r16 r15 0x10000000
stxi_i 0x3f04 r16 r26
addr r15 r15 r14
stxi_i 0x4198 r15 r26
stxi_i 0x4 r14 r27
movi r27 0x8008a208
stxi_i 0x7c r14 r27
movi r27 0x8008ae68
subi r10 r10 0x8
live r10
jmpi L0
L10:
ret
L5: /* epilog */
"""
So, I hacked a bit more with a C code:
"""
#include <lightning.h>
int
main(int argc, char *argv[])
{
jit_state_t *_jit;
void (*code)(void);
jit_node_t *L2, *L3, *L4;
init_jit(argv[0]);
_jit = jit_new_state();
jit_prolog();
jit_ldxi_i(JIT_V0, JIT_V(13), 0x44);
jit_addi(JIT_V0, JIT_V0, 0x1);
jit_lti(JIT_V1, JIT_V0, 0x8);
jit_stxi_i(0x44, JIT_V(13), JIT_V0);
jit_ldxi_i(JIT_V0, JIT_V(13), 0x40);
jit_addi(JIT_V0, JIT_V0, 0x18);
jit_stxi_i(0x40, JIT_V(13), JIT_V0);
jit_stxi_i(0x8, JIT_V(13), JIT_V1);
jit_subi(JIT_R(5), JIT_R(5), 0x8);
L2 = jit_beqi(JIT_V1, 0x0);
jit_movi(JIT_V0, 0x8008a1ac);
jit_live(JIT_R(5));
jit_patch_abs(jit_jmpi(), 0xdeadbeef);
jit_patch(L2);
jit_movi(JIT_V0, 0x70);
jit_stxi_i(0x8, JIT_V(13), JIT_V0);
jit_movi(JIT_V0, 0x800c0000);
jit_andi(JIT_R2, JIT_V0, 0x7fffff);
jit_addi(JIT_R1, JIT_R2, 0x10000000);
jit_stxi_i(0x3e7c, JIT_R1, JIT_V1);
jit_addr(JIT_R2, JIT_R2, JIT_V(13));
jit_stxi_i(0x4110, JIT_R2, JIT_V1);
jit_andi(JIT_R2, JIT_V0, 0x7fffff);
jit_addi(JIT_R1, JIT_R2, 0x10000000);
jit_stxi_i(0x3e78, JIT_R1, JIT_V1);
jit_addr(JIT_R2, JIT_R2, JIT_V(13));
jit_stxi_i(0x410c, JIT_R2, JIT_V1);
jit_andi(JIT_R2, JIT_V0, 0x7fffff);
jit_addi(JIT_R1, JIT_R2, 0x10000000);
jit_stxi_i(0x3e80, JIT_R1, JIT_V1);
jit_addr(JIT_R2, JIT_R2, JIT_V(13));
jit_stxi_i(0x4114, JIT_R2, JIT_V1);
jit_stxi_i(0x4, JIT_V(13), JIT_V0);
jit_subi(JIT_R(5), JIT_R(5), 0xe);
L3 = jit_label();
jit_movi(JIT_V0, 0x800c0000);
jit_ldxi_i(JIT_V1, JIT_V(13), 0x8);
jit_addr(JIT_V0, JIT_V0, JIT_V1);
jit_stxi_i(0x4, JIT_V(13), JIT_V0);
jit_ldxi_i(JIT_R2, JIT_V(13), 0x244);
jit_movi(JIT_R0, 0x286d0000);
jit_prepare();
jit_ori(JIT_R(12), JIT_R0, 0xd);
jit_live(JIT_R0);
jit_live(JIT_R1);
jit_live(JIT_R2);
jit_live(JIT_V(13));
jit_live(JIT_R(5));
jit_callr(JIT_R2);
jit_live(JIT_R0);
jit_live(JIT_R1);
jit_live(JIT_R2);
jit_live(JIT_V(13));
jit_live(JIT_R(5));
jit_addi(JIT_V1, JIT_V1, 0xfffffff0);
jit_stxi_i(0x8, JIT_V(13), JIT_V1);
jit_subi(JIT_R(5), JIT_R(5), 0xc);
L4 = jit_blti(JIT_V1, 0x0);
jit_patch_at(jit_bgti(JIT_R(5), 0x0), L3);
jit_live(JIT_R(5));
jit_patch_abs(jit_jmpi(), 0xdeadbeef);
jit_patch(L4);
jit_movi(JIT_V0, 0x800c0000);
jit_movi(JIT_V1, 0x0);
jit_andi(JIT_V(12), JIT_V0, 0x7fffff);
jit_addi(JIT_V(11), JIT_V(12), 0x10000000);
jit_stxi_i(0x3f04, JIT_V(11), JIT_V1);
jit_addr(JIT_V(12), JIT_V(12), JIT_V(13));
jit_stxi_i(0x4198, JIT_V(12), JIT_V1);
jit_stxi_i(0x4, JIT_V(13), JIT_V0);
jit_movi(JIT_V0, 0x8008a208);
jit_stxi_i(0x7c, JIT_V(13), JIT_V0);
jit_movi(JIT_V0, 0x8008ae68);
jit_subi(JIT_R(5), JIT_R(5), 0x8);
jit_live(JIT_R(5));
jit_patch_abs(jit_jmpi(), 0xdeadbeef);
jit_ret();
jit_epilog();
code = jit_emit();
#if 1
jit_print();
#endif
jit_clear_state();
//(*code)();
jit_destroy_state();
finish_jit();
return (0);
}
"""
and it still prints the expected state of r26 as live:
output gets renamed to L
"""
L0: r14 r15 r16 r17 r18 r19 r20 r21 r22 r23 r24 r25 r28 r29 r30 r10
f14 f15 f16 f17 f18 f19 f20 f21 f22 f23 f24 f25 f26 f27 f28 f29 f30
f31 /* prolog */
ldxi_i r27 r14 0x44
addi r27 r27 0x1
lti r26 r27 0x8
stxi_i 0x44 r14 r27
ldxi_i r27 r14 0x40
addi r27 r27 0x18
stxi_i 0x40 r14 r27
stxi_i 0x8 r14 r26
subi r10 r10 0x8
beqi L1 r26 0x0
L5: r14 r15 r16 r17 r18 r19 r20 r21 r22 r23 r24 r25 r26 r27 r28 r29
r30 r10 f14 f15 f16 f17 f18 f19 f20 f21 f22 f23 f24 f25 f26 f27 f28
f29 f30 f31
movi r27 0x8008a1ac
live r10
jmpi 0xdeadbeef
L1: r14 r15 r16 r17 r18 r19 r20 r21 r22 r23 r24 r25 r26 r10 f14 f15
f16 f17 f18 f19 f20 f21 f22 f23 f24 f25 f26 f27 f28 f29 f30 f31
movi r27 0x70
stxi_i 0x8 r14 r27
movi r27 0x800c0000
andi r30 r27 0x7fffff
addi r29 r30 0x10000000
stxi_i 0x3e7c r29 r26
addr r30 r30 r14
stxi_i 0x4110 r30 r26
andi r30 r27 0x7fffff
addi r29 r30 0x10000000
stxi_i 0x3e78 r29 r26
addr r30 r30 r14
stxi_i 0x410c r30 r26
andi r30 r27 0x7fffff
addi r29 r30 0x10000000
stxi_i 0x3e80 r29 r26
addr r30 r30 r14
stxi_i 0x4114 r30 r26
stxi_i 0x4 r14 r27
subi r10 r10 0xe
L2: r14 r15 r16 r17 r18 r19 r20 r21 r22 r23 r24 r25 r29 r10 f14 f15
f16 f17 f18 f19 f20 f21 f22 f23 f24 f25 f26 f27 f28 f29 f30 f31
movi r27 0x800c0000
ldxi_i r26 r14 0x8
addr r27 r27 r26
stxi_i 0x4 r14 r27
ldxi_i r30 r14 0x244
movi r28 0x286d0000
prepare
ori r3 r28 0xd
live r28
live r29
live r30
live r14
live r10
callr r30
L6: r14 r15 r16 r17 r18 r19 r20 r21 r22 r23 r24 r25 r26 r27 r28 r29
r30 r10 f14 f15 f16 f17 f18 f19 f20 f21 f22 f23 f24 f25 f26 f27 f28
f29 f30 f31
live r28
live r29
live r30
live r14
live r10
addi r26 r26 0xfffffff0
stxi_i 0x8 r14 r26
subi r10 r10 0xc
blti L3 r26 0x0
L7: r14 r15 r16 r17 r18 r19 r20 r21 r22 r23 r24 r25 r26 r27 r28 r29
r30 r10 f14 f15 f16 f17 f18 f19 f20 f21 f22 f23 f24 f25 f26 f27 f28
f29 f30 f31
bgti L2 r10 0x0
L8: r14 r15 r16 r17 r18 r19 r20 r21 r22 r23 r24 r25 r26 r27 r28 r29
r30 r10 f14 f15 f16 f17 f18 f19 f20 f21 f22 f23 f24 f25 f26 f27 f28
f29 f30 f31
live r10
jmpi 0xdeadbeef
L3: r14 r15 r16 r17 r18 r19 r20 r21 r22 r23 r24 r25 r26 r27 r28 r29
r30 r10 f14 f15 f16 f17 f18 f19 f20 f21 f22 f23 f24 f25 f26 f27 f28
f29 f30 f31
movi r27 0x800c0000
movi r26 0x0
andi r15 r27 0x7fffff
addi r16 r15 0x10000000
stxi_i 0x3f04 r16 r26
addr r15 r15 r14
stxi_i 0x4198 r15 r26
stxi_i 0x4 r14 r27
movi r27 0x8008a208
stxi_i 0x7c r14 r27
movi r27 0x8008ae68
subi r10 r10 0x8
live r10
jmpi 0xdeadbeef
L9:
ret
L4: /* epilog */
"""
> Cheers,
> -Paul
Thanks,
Paulo
- Re: [PATCH] ppc: Fix 'calli' when floating-point arguments are passed, (continued)
- Re: [PATCH] ppc: Fix 'calli' when floating-point arguments are passed, Paulo César Pereira de Andrade, 2022/09/05
- Re: [PATCH] ppc: Fix 'calli' when floating-point arguments are passed, Paul Cercueil, 2022/09/05
- Re: [PATCH] ppc: Fix 'calli' when floating-point arguments are passed, Paulo César Pereira de Andrade, 2022/09/06
- Re: [PATCH] ppc: Fix 'calli' when floating-point arguments are passed, Paul Cercueil, 2022/09/07
- Re: [PATCH] ppc: Fix 'calli' when floating-point arguments are passed, Paulo César Pereira de Andrade, 2022/09/07
- Re: [PATCH] ppc: Fix 'calli' when floating-point arguments are passed, Paul Cercueil, 2022/09/07
- Re: [PATCH] ppc: Fix 'calli' when floating-point arguments are passed, Paulo César Pereira de Andrade, 2022/09/07
- Re: [PATCH] ppc: Fix 'calli' when floating-point arguments are passed, Paul Cercueil, 2022/09/07
- Re: [PATCH] ppc: Fix 'calli' when floating-point arguments are passed, Paulo César Pereira de Andrade, 2022/09/07
- Re: [PATCH] ppc: Fix 'calli' when floating-point arguments are passed, Paul Cercueil, 2022/09/08
- Re: [PATCH] ppc: Fix 'calli' when floating-point arguments are passed,
Paulo César Pereira de Andrade <=
- Re: [PATCH] ppc: Fix 'calli' when floating-point arguments are passed, Paulo César Pereira de Andrade, 2022/09/08
- Re: [PATCH] ppc: Fix 'calli' when floating-point arguments are passed, Paulo César Pereira de Andrade, 2022/09/08
- Re: [PATCH] ppc: Fix 'calli' when floating-point arguments are passed, Paul Cercueil, 2022/09/08
- Re: [PATCH] ppc: Fix 'calli' when floating-point arguments are passed, Paulo César Pereira de Andrade, 2022/09/08
- Re: [PATCH] ppc: Fix 'calli' when floating-point arguments are passed, Paul Cercueil, 2022/09/09
- Re: [PATCH] ppc: Fix 'calli' when floating-point arguments are passed, Paulo César Pereira de Andrade, 2022/09/09
- Re: [PATCH] ppc: Fix 'calli' when floating-point arguments are passed, Paulo César Pereira de Andrade, 2022/09/09
- Re: [PATCH] ppc: Fix 'calli' when floating-point arguments are passed, Paul Cercueil, 2022/09/09
- Re: [PATCH] ppc: Fix 'calli' when floating-point arguments are passed, Paulo César Pereira de Andrade, 2022/09/09
- Re: [PATCH] ppc: Fix 'calli' when floating-point arguments are passed, Paulo César Pereira de Andrade, 2022/09/09