Dump of assembler code for function volk_32f_s32f_multiply_32f_a_sse: 0x00007ffff3322e50 <+0>: movaps %xmm0,%xmm1 0x00007ffff3322e53 <+3>: push %r12 0x00007ffff3322e55 <+5>: mov %edx,%r8d 0x00007ffff3322e58 <+8>: mov %rsi,%r9 0x00007ffff3322e5b <+11>: mov %rdi,%rax 0x00007ffff3322e5e <+14>: shufps $0x0,%xmm1,%xmm1 0x00007ffff3322e62 <+18>: push %rbp 0x00007ffff3322e63 <+19>: shr $0x2,%r8d 0x00007ffff3322e67 <+23>: test %r8d,%r8d 0x00007ffff3322e6a <+26>: push %rbx 0x00007ffff3322e6b <+27>: movaps %xmm1,%xmm2 0x00007ffff3322e6e <+30>: je 0x7ffff3322e9c 0x00007ffff3322e70 <+32>: lea -0x1(%r8),%ecx 0x00007ffff3322e74 <+36>: xor %eax,%eax 0x00007ffff3322e76 <+38>: add $0x1,%rcx 0x00007ffff3322e7a <+42>: shl $0x4,%rcx 0x00007ffff3322e7e <+46>: xchg %ax,%ax 0x00007ffff3322e80 <+48>: movaps (%rsi,%rax,1),%xmm1 0x00007ffff3322e84 <+52>: mulps %xmm2,%xmm1 0x00007ffff3322e87 <+55>: movaps %xmm1,(%rdi,%rax,1) 0x00007ffff3322e8b <+59>: add $0x10,%rax 0x00007ffff3322e8f <+63>: cmp %rcx,%rax 0x00007ffff3322e92 <+66>: jne 0x7ffff3322e80 0x00007ffff3322e94 <+68>: lea (%rsi,%rax,1),%r9 0x00007ffff3322e98 <+72>: lea (%rdi,%rax,1),%rax 0x00007ffff3322e9c <+76>: lea 0x0(,%r8,4),%edi 0x00007ffff3322ea4 <+84>: cmp %edi,%edx 0x00007ffff3322ea6 <+86>: jbe 0x7ffff3322f52 0x00007ffff3322eac <+92>: mov %edx,%ebx 0x00007ffff3322eae <+94>: mov %r9,%rcx 0x00007ffff3322eb1 <+97>: mov %rax,%rsi 0x00007ffff3322eb4 <+100>: sub %edi,%ebx 0x00007ffff3322eb6 <+102>: mov %ebx,%r10d 0x00007ffff3322eb9 <+105>: shr $0x2,%r10d 0x00007ffff3322ebd <+109>: lea 0x0(,%r10,4),%r11d 0x00007ffff3322ec5 <+117>: test %r11d,%r11d 0x00007ffff3322ec8 <+120>: je 0x7ffff3322f32 0x00007ffff3322eca <+122>: lea 0x10(%r9),%r8 0x00007ffff3322ece <+126>: cmp $0x7,%ebx 0x00007ffff3322ed1 <+129>: lea 0x10(%rax),%r12 0x00007ffff3322ed5 <+133>: seta %bpl 0x00007ffff3322ed9 <+137>: cmp %r8,%rax 0x00007ffff3322edc <+140>: seta %r8b 0x00007ffff3322ee0 <+144>: cmp %r12,%r9 0x00007ffff3322ee3 <+147>: seta %r12b 0x00007ffff3322ee7 <+151>: or %r12d,%r8d 0x00007ffff3322eea <+154>: test %r8b,%bpl 0x00007ffff3322eed <+157>: je 0x7ffff3322f32 0x00007ffff3322eef <+159>: xorps %xmm3,%xmm3 0x00007ffff3322ef2 <+162>: xor %r8d,%r8d 0x00007ffff3322ef5 <+165>: nopl (%rax) 0x00007ffff3322ef8 <+168>: movaps %xmm3,%xmm1 0x00007ffff3322efb <+171>: add $0x1,%r8d 0x00007ffff3322eff <+175>: movlps (%rcx),%xmm1 0x00007ffff3322f02 <+178>: movhps 0x8(%rcx),%xmm1 0x00007ffff3322f06 <+182>: add $0x10,%rcx 0x00007ffff3322f0a <+186>: mulps %xmm2,%xmm1 0x00007ffff3322f0d <+189>: movlps %xmm1,(%rsi) 0x00007ffff3322f10 <+192>: movhps %xmm1,0x8(%rsi) 0x00007ffff3322f14 <+196>: add $0x10,%rsi 0x00007ffff3322f18 <+200>: cmp %r8d,%r10d 0x00007ffff3322f1b <+203>: ja 0x7ffff3322ef8 0x00007ffff3322f1d <+205>: mov %r11d,%ecx 0x00007ffff3322f20 <+208>: add %r11d,%edi 0x00007ffff3322f23 <+211>: shl $0x2,%rcx 0x00007ffff3322f27 <+215>: add %rcx,%rax 0x00007ffff3322f2a <+218>: add %rcx,%r9 0x00007ffff3322f2d <+221>: cmp %r11d,%ebx 0x00007ffff3322f30 <+224>: je 0x7ffff3322f52 0x00007ffff3322f32 <+226>: xor %ecx,%ecx 0x00007ffff3322f34 <+228>: nopl 0x0(%rax) 0x00007ffff3322f38 <+232>: movss (%r9,%rcx,1),%xmm1 0x00007ffff3322f3e <+238>: add $0x1,%edi 0x00007ffff3322f41 <+241>: mulss %xmm0,%xmm1 0x00007ffff3322f45 <+245>: movss %xmm1,(%rax,%rcx,1) 0x00007ffff3322f4a <+250>: add $0x4,%rcx 0x00007ffff3322f4e <+254>: cmp %edi,%edx 0x00007ffff3322f50 <+256>: ja 0x7ffff3322f38 0x00007ffff3322f52 <+258>: pop %rbx 0x00007ffff3322f53 <+259>: pop %rbp 0x00007ffff3322f54 <+260>: pop %r12 0x00007ffff3322f56 <+262>: retq End of assembler dump.