| ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py |
| ; RUN: llc < %s -mcpu=pentium4 -O0 | FileCheck %s |
| |
| target datalayout = "e-m:e-p:32:32-p270:32:32-p271:32:32-p272:64:64-f64:32:64-f80:32-n8:16:32-S128" |
| target triple = "i386-unknown-linux-unknown" |
| |
| define <4 x half> @doTheTestMod(<4 x half> %0, <4 x half> %1) nounwind { |
| ; CHECK-LABEL: doTheTestMod: |
| ; CHECK: # %bb.0: # %Entry |
| ; CHECK-NEXT: subl $140, %esp |
| ; CHECK-NEXT: movaps %xmm1, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill |
| ; CHECK-NEXT: movaps %xmm0, %xmm6 |
| ; CHECK-NEXT: movaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload |
| ; CHECK-NEXT: movaps %xmm0, %xmm1 |
| ; CHECK-NEXT: movaps %xmm0, %xmm3 |
| ; CHECK-NEXT: psrlq $48, %xmm3 |
| ; CHECK-NEXT: movaps %xmm0, %xmm2 |
| ; CHECK-NEXT: shufps {{.*#+}} xmm2 = xmm2[1,1,1,1] |
| ; CHECK-NEXT: psrld $16, %xmm0 |
| ; CHECK-NEXT: movaps %xmm6, %xmm7 |
| ; CHECK-NEXT: movaps %xmm6, %xmm4 |
| ; CHECK-NEXT: psrlq $48, %xmm4 |
| ; CHECK-NEXT: movaps %xmm6, %xmm5 |
| ; CHECK-NEXT: shufps {{.*#+}} xmm5 = xmm5[1,1,1,1] |
| ; CHECK-NEXT: psrld $16, %xmm6 |
| ; CHECK-NEXT: pextrw $0, %xmm7, %eax |
| ; CHECK-NEXT: # kill: def $ax killed $ax killed $eax |
| ; CHECK-NEXT: movw %ax, {{[0-9]+}}(%esp) |
| ; CHECK-NEXT: pextrw $0, %xmm6, %eax |
| ; CHECK-NEXT: # kill: def $ax killed $ax killed $eax |
| ; CHECK-NEXT: movw %ax, {{[0-9]+}}(%esp) |
| ; CHECK-NEXT: pextrw $0, %xmm5, %eax |
| ; CHECK-NEXT: # kill: def $ax killed $ax killed $eax |
| ; CHECK-NEXT: movw %ax, {{[0-9]+}}(%esp) |
| ; CHECK-NEXT: pextrw $0, %xmm4, %eax |
| ; CHECK-NEXT: # kill: def $ax killed $ax killed $eax |
| ; CHECK-NEXT: movw %ax, {{[0-9]+}}(%esp) |
| ; CHECK-NEXT: pextrw $0, %xmm3, %eax |
| ; CHECK-NEXT: # kill: def $ax killed $ax killed $eax |
| ; CHECK-NEXT: movw %ax, {{[0-9]+}}(%esp) |
| ; CHECK-NEXT: pextrw $0, %xmm2, %eax |
| ; CHECK-NEXT: # kill: def $ax killed $ax killed $eax |
| ; CHECK-NEXT: movw %ax, {{[0-9]+}}(%esp) |
| ; CHECK-NEXT: pextrw $0, %xmm0, %eax |
| ; CHECK-NEXT: # kill: def $ax killed $ax killed $eax |
| ; CHECK-NEXT: movw %ax, {{[0-9]+}}(%esp) |
| ; CHECK-NEXT: pextrw $0, %xmm1, %eax |
| ; CHECK-NEXT: # kill: def $ax killed $ax killed $eax |
| ; CHECK-NEXT: movw %ax, {{[0-9]+}}(%esp) |
| ; CHECK-NEXT: # implicit-def: $xmm0 |
| ; CHECK-NEXT: pinsrw $0, {{[0-9]+}}(%esp), %xmm0 |
| ; CHECK-NEXT: movss %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill |
| ; CHECK-NEXT: # implicit-def: $xmm0 |
| ; CHECK-NEXT: pinsrw $0, {{[0-9]+}}(%esp), %xmm0 |
| ; CHECK-NEXT: movss %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill |
| ; CHECK-NEXT: # implicit-def: $xmm0 |
| ; CHECK-NEXT: pinsrw $0, {{[0-9]+}}(%esp), %xmm0 |
| ; CHECK-NEXT: movss %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill |
| ; CHECK-NEXT: # implicit-def: $xmm0 |
| ; CHECK-NEXT: pinsrw $0, {{[0-9]+}}(%esp), %xmm0 |
| ; CHECK-NEXT: # implicit-def: $xmm1 |
| ; CHECK-NEXT: pinsrw $0, {{[0-9]+}}(%esp), %xmm1 |
| ; CHECK-NEXT: movss %xmm1, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill |
| ; CHECK-NEXT: # implicit-def: $xmm1 |
| ; CHECK-NEXT: pinsrw $0, {{[0-9]+}}(%esp), %xmm1 |
| ; CHECK-NEXT: movss %xmm1, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill |
| ; CHECK-NEXT: # implicit-def: $xmm1 |
| ; CHECK-NEXT: pinsrw $0, {{[0-9]+}}(%esp), %xmm1 |
| ; CHECK-NEXT: movss %xmm1, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill |
| ; CHECK-NEXT: # implicit-def: $xmm1 |
| ; CHECK-NEXT: pinsrw $0, {{[0-9]+}}(%esp), %xmm1 |
| ; CHECK-NEXT: movss %xmm1, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill |
| ; CHECK-NEXT: pextrw $0, %xmm0, %eax |
| ; CHECK-NEXT: movw %ax, %cx |
| ; CHECK-NEXT: movl %esp, %eax |
| ; CHECK-NEXT: movw %cx, (%eax) |
| ; CHECK-NEXT: calll __extendhfsf2 |
| ; CHECK-NEXT: movss {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 4-byte Reload |
| ; CHECK-NEXT: # xmm0 = mem[0],zero,zero,zero |
| ; CHECK-NEXT: fstpt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Spill |
| ; CHECK-NEXT: pextrw $0, %xmm0, %eax |
| ; CHECK-NEXT: movw %ax, %cx |
| ; CHECK-NEXT: movl %esp, %eax |
| ; CHECK-NEXT: movw %cx, (%eax) |
| ; CHECK-NEXT: calll __extendhfsf2 |
| ; CHECK-NEXT: fldt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Reload |
| ; CHECK-NEXT: movl %esp, %eax |
| ; CHECK-NEXT: fxch %st(1) |
| ; CHECK-NEXT: fstps 4(%eax) |
| ; CHECK-NEXT: fstps (%eax) |
| ; CHECK-NEXT: calll fmodf |
| ; CHECK-NEXT: movl %esp, %eax |
| ; CHECK-NEXT: fstps (%eax) |
| ; CHECK-NEXT: calll __truncsfhf2 |
| ; CHECK-NEXT: movaps %xmm0, %xmm1 |
| ; CHECK-NEXT: movss {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 4-byte Reload |
| ; CHECK-NEXT: # xmm0 = mem[0],zero,zero,zero |
| ; CHECK-NEXT: movss %xmm1, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill |
| ; CHECK-NEXT: pextrw $0, %xmm0, %eax |
| ; CHECK-NEXT: movw %ax, %cx |
| ; CHECK-NEXT: movl %esp, %eax |
| ; CHECK-NEXT: movw %cx, (%eax) |
| ; CHECK-NEXT: calll __extendhfsf2 |
| ; CHECK-NEXT: movss {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 4-byte Reload |
| ; CHECK-NEXT: # xmm0 = mem[0],zero,zero,zero |
| ; CHECK-NEXT: fstpt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Spill |
| ; CHECK-NEXT: pextrw $0, %xmm0, %eax |
| ; CHECK-NEXT: movw %ax, %cx |
| ; CHECK-NEXT: movl %esp, %eax |
| ; CHECK-NEXT: movw %cx, (%eax) |
| ; CHECK-NEXT: calll __extendhfsf2 |
| ; CHECK-NEXT: fldt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Reload |
| ; CHECK-NEXT: movl %esp, %eax |
| ; CHECK-NEXT: fxch %st(1) |
| ; CHECK-NEXT: fstps 4(%eax) |
| ; CHECK-NEXT: fstps (%eax) |
| ; CHECK-NEXT: calll fmodf |
| ; CHECK-NEXT: movl %esp, %eax |
| ; CHECK-NEXT: fstps (%eax) |
| ; CHECK-NEXT: calll __truncsfhf2 |
| ; CHECK-NEXT: movaps %xmm0, %xmm1 |
| ; CHECK-NEXT: movss {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 4-byte Reload |
| ; CHECK-NEXT: # xmm0 = mem[0],zero,zero,zero |
| ; CHECK-NEXT: movss %xmm1, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill |
| ; CHECK-NEXT: pextrw $0, %xmm0, %eax |
| ; CHECK-NEXT: movw %ax, %cx |
| ; CHECK-NEXT: movl %esp, %eax |
| ; CHECK-NEXT: movw %cx, (%eax) |
| ; CHECK-NEXT: calll __extendhfsf2 |
| ; CHECK-NEXT: movss {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 4-byte Reload |
| ; CHECK-NEXT: # xmm0 = mem[0],zero,zero,zero |
| ; CHECK-NEXT: fstpt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Spill |
| ; CHECK-NEXT: pextrw $0, %xmm0, %eax |
| ; CHECK-NEXT: movw %ax, %cx |
| ; CHECK-NEXT: movl %esp, %eax |
| ; CHECK-NEXT: movw %cx, (%eax) |
| ; CHECK-NEXT: calll __extendhfsf2 |
| ; CHECK-NEXT: fldt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Reload |
| ; CHECK-NEXT: movl %esp, %eax |
| ; CHECK-NEXT: fxch %st(1) |
| ; CHECK-NEXT: fstps 4(%eax) |
| ; CHECK-NEXT: fstps (%eax) |
| ; CHECK-NEXT: calll fmodf |
| ; CHECK-NEXT: movl %esp, %eax |
| ; CHECK-NEXT: fstps (%eax) |
| ; CHECK-NEXT: calll __truncsfhf2 |
| ; CHECK-NEXT: movaps %xmm0, %xmm1 |
| ; CHECK-NEXT: movss {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 4-byte Reload |
| ; CHECK-NEXT: # xmm0 = mem[0],zero,zero,zero |
| ; CHECK-NEXT: movss %xmm1, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill |
| ; CHECK-NEXT: pextrw $0, %xmm0, %eax |
| ; CHECK-NEXT: movw %ax, %cx |
| ; CHECK-NEXT: movl %esp, %eax |
| ; CHECK-NEXT: movw %cx, (%eax) |
| ; CHECK-NEXT: calll __extendhfsf2 |
| ; CHECK-NEXT: movss {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 4-byte Reload |
| ; CHECK-NEXT: # xmm0 = mem[0],zero,zero,zero |
| ; CHECK-NEXT: fstpt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Spill |
| ; CHECK-NEXT: pextrw $0, %xmm0, %eax |
| ; CHECK-NEXT: movw %ax, %cx |
| ; CHECK-NEXT: movl %esp, %eax |
| ; CHECK-NEXT: movw %cx, (%eax) |
| ; CHECK-NEXT: calll __extendhfsf2 |
| ; CHECK-NEXT: fldt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Reload |
| ; CHECK-NEXT: movl %esp, %eax |
| ; CHECK-NEXT: fxch %st(1) |
| ; CHECK-NEXT: fstps 4(%eax) |
| ; CHECK-NEXT: fstps (%eax) |
| ; CHECK-NEXT: calll fmodf |
| ; CHECK-NEXT: movl %esp, %eax |
| ; CHECK-NEXT: fstps (%eax) |
| ; CHECK-NEXT: calll __truncsfhf2 |
| ; CHECK-NEXT: movss {{[-0-9]+}}(%e{{[sb]}}p), %xmm1 # 4-byte Reload |
| ; CHECK-NEXT: # xmm1 = mem[0],zero,zero,zero |
| ; CHECK-NEXT: movss {{[-0-9]+}}(%e{{[sb]}}p), %xmm2 # 4-byte Reload |
| ; CHECK-NEXT: # xmm2 = mem[0],zero,zero,zero |
| ; CHECK-NEXT: movaps %xmm0, %xmm3 |
| ; CHECK-NEXT: movss {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 4-byte Reload |
| ; CHECK-NEXT: # xmm0 = mem[0],zero,zero,zero |
| ; CHECK-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3] |
| ; CHECK-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3] |
| ; CHECK-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] |
| ; CHECK-NEXT: addl $140, %esp |
| ; CHECK-NEXT: retl |
| Entry: |
| %x = alloca <4 x half>, align 8 |
| %y = alloca <4 x half>, align 8 |
| store <4 x half> %0, ptr %x, align 8 |
| store <4 x half> %1, ptr %y, align 8 |
| %2 = load <4 x half>, ptr %x, align 8 |
| %3 = load <4 x half>, ptr %y, align 8 |
| %4 = frem <4 x half> %2, %3 |
| ret <4 x half> %4 |
| } |
| |