[Top][All Lists]
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
Re: [Qemu-devel] [PATCH 01/10] tcg: Split out swap_commutative as a subr
From: |
Richard Henderson |
Subject: |
Re: [Qemu-devel] [PATCH 01/10] tcg: Split out swap_commutative as a subroutine |
Date: |
Tue, 09 Oct 2012 09:40:18 -0700 |
User-agent: |
Mozilla/5.0 (X11; Linux x86_64; rv:15.0) Gecko/20120911 Thunderbird/15.0.1 |
On 10/09/2012 08:31 AM, Aurelien Jarno wrote:
> I am not talking about the code generated by TCG, but rather by the code
> generated by GCC. Does using sum += and sum -= brings a gain to compare
> to the equivalent if function?
It's hard to tell. My guess is that it's about a wash. Adding an
artificial __attribute__((noinline)) to make it easier to see:
SUM version
0000000000000190 <swap_commutative>:
190: 48 83 ec 18 sub $0x18,%rsp
194: 4c 8b 06 mov (%rsi),%r8
197: 48 8b 0a mov (%rdx),%rcx
19a: 64 48 8b 04 25 28 00 mov %fs:0x28,%rax
1a1: 00 00
1a3: 48 89 44 24 08 mov %rax,0x8(%rsp)
1a8: 31 c0 xor %eax,%eax
1aa: 4c 89 c0 mov %r8,%rax
1ad: 49 89 c9 mov %rcx,%r9
1b0: 48 c1 e0 04 shl $0x4,%rax
1b4: 83 b8 00 00 00 00 01 cmpl $0x1,0x0(%rax)
1b6: R_X86_64_32S .bss
1bb: 0f 94 c0 sete %al
1be: 49 c1 e1 04 shl $0x4,%r9
1c2: 41 83 b9 00 00 00 00 cmpl $0x1,0x0(%r9)
1c9: 01
1c5: R_X86_64_32S .bss
1ca: 0f b6 c0 movzbl %al,%eax
1cd: 41 0f 94 c1 sete %r9b
1d1: 45 0f b6 c9 movzbl %r9b,%r9d
1d5: 44 29 c8 sub %r9d,%eax
1d8: 83 f8 01 cmp $0x1,%eax
1db: 75 23 jne 200 <swap_commutative+0x70>
1dd: 48 89 0e mov %rcx,(%rsi)
1e0: b8 01 00 00 00 mov $0x1,%eax
1e5: 4c 89 02 mov %r8,(%rdx)
1e8: 48 8b 54 24 08 mov 0x8(%rsp),%rdx
1ed: 64 48 33 14 25 28 00 xor %fs:0x28,%rdx
1f4: 00 00
1f6: 75 15 jne 20d <swap_commutative+0x7d>
1f8: 48 83 c4 18 add $0x18,%rsp
1fc: c3 retq
1fd: 0f 1f 00 nopl (%rax)
200: 48 39 cf cmp %rcx,%rdi
203: 75 04 jne 209 <swap_commutative+0x79>
205: 85 c0 test %eax,%eax
207: 74 d4 je 1dd <swap_commutative+0x4d>
209: 31 c0 xor %eax,%eax
20b: eb db jmp 1e8 <swap_commutative+0x58>
20d: 0f 1f 00 nopl (%rax)
210: e8 00 00 00 00 callq 215 <swap_commutative+0x85>
211: R_X86_64_PC32 __stack_chk_fail-0x4
=======
if ((temps[a1].state == TCG_TEMP_CONST
&& temps[a2].state != TCG_TEMP_CONST)
|| (dest == a2
&& ((temps[a1].state == TCG_TEMP_CONST
&& temps[a2].state == TCG_TEMP_CONST)
|| (temps[a1].state != TCG_TEMP_CONST
&& temps[a2].state != TCG_TEMP_CONST)))) {
0000000000000190 <swap_commutative>:
190: 48 83 ec 18 sub $0x18,%rsp
194: 4c 8b 02 mov (%rdx),%r8
197: 64 48 8b 04 25 28 00 mov %fs:0x28,%rax
19e: 00 00
1a0: 48 89 44 24 08 mov %rax,0x8(%rsp)
1a5: 31 c0 xor %eax,%eax
1a7: 48 8b 06 mov (%rsi),%rax
1aa: 48 89 c1 mov %rax,%rcx
1ad: 48 c1 e1 04 shl $0x4,%rcx
1b1: 83 b9 00 00 00 00 01 cmpl $0x1,0x0(%rcx)
1b3: R_X86_64_32S .bss
1b8: 74 0e je 1c8 <swap_commutative+0x38>
1ba: 4c 39 c7 cmp %r8,%rdi
1bd: 74 39 je 1f8 <swap_commutative+0x68>
1bf: 31 c0 xor %eax,%eax
1c1: eb 20 jmp 1e3 <swap_commutative+0x53>
1c3: 0f 1f 44 00 00 nopl 0x0(%rax,%rax,1)
1c8: 4c 89 c1 mov %r8,%rcx
1cb: 48 c1 e1 04 shl $0x4,%rcx
1cf: 83 b9 00 00 00 00 01 cmpl $0x1,0x0(%rcx)
1d1: R_X86_64_32S .bss
1d6: 74 36 je 20e <swap_commutative+0x7e>
1d8: 4c 89 06 mov %r8,(%rsi)
1db: 48 89 02 mov %rax,(%rdx)
1de: b8 01 00 00 00 mov $0x1,%eax
1e3: 48 8b 54 24 08 mov 0x8(%rsp),%rdx
1e8: 64 48 33 14 25 28 00 xor %fs:0x28,%rdx
1ef: 00 00
1f1: 75 16 jne 209 <swap_commutative+0x79>
1f3: 48 83 c4 18 add $0x18,%rsp
1f7: c3 retq
1f8: 48 c1 e7 04 shl $0x4,%rdi
1fc: 83 bf 00 00 00 00 01 cmpl $0x1,0x0(%rdi)
1fe: R_X86_64_32S .bss
203: 75 d3 jne 1d8 <swap_commutative+0x48>
205: 31 c0 xor %eax,%eax
207: eb da jmp 1e3 <swap_commutative+0x53>
209: e8 00 00 00 00 callq 20e <swap_commutative+0x7e>
20a: R_X86_64_PC32 __stack_chk_fail-0x4
20e: 4c 39 c7 cmp %r8,%rdi
211: 75 ac jne 1bf <swap_commutative+0x2f>
213: eb c3 jmp 1d8 <swap_commutative+0x48>
r~
[Qemu-devel] [PATCH 03/10] tcg: Swap commutative double-word comparisons, Richard Henderson, 2012/10/02
[Qemu-devel] [PATCH 02/10] tcg: Canonicalize add2 operand ordering, Richard Henderson, 2012/10/02
[Qemu-devel] [PATCH 04/10] tcg: Use common code when failing to optimize, Richard Henderson, 2012/10/02
[Qemu-devel] [PATCH 05/10] tcg: Optimize double-word comparisons against zero, Richard Henderson, 2012/10/02