| ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py |
| ; RUN: llc < %s -mtriple=i686-unknown -mattr=+sse2 | FileCheck %s --check-prefix=X86 |
| ; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+sse2 | FileCheck %s --check-prefix=X64 |
| |
| ; |
| ; Library Functions |
| ; |
| |
| define float @tst1(float %a, float %b) nounwind { |
| ; X86-LABEL: tst1: |
| ; X86: # %bb.0: |
| ; X86-NEXT: subl $8, %esp |
| ; X86-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero |
| ; X86-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero |
| ; X86-NEXT: movss %xmm1, {{[0-9]+}}(%esp) |
| ; X86-NEXT: movss %xmm0, (%esp) |
| ; X86-NEXT: calll copysignf |
| ; X86-NEXT: addl $8, %esp |
| ; X86-NEXT: retl |
| ; |
| ; X64-LABEL: tst1: |
| ; X64: # %bb.0: |
| ; X64-NEXT: movaps %xmm0, %xmm2 |
| ; X64-NEXT: movaps %xmm1, %xmm0 |
| ; X64-NEXT: movaps %xmm2, %xmm1 |
| ; X64-NEXT: jmp copysignf # TAILCALL |
| %tmp = tail call float @copysignf( float %b, float %a ) |
| ret float %tmp |
| } |
| |
| define double @tst2(double %a, float %b, float %c) nounwind { |
| ; X86-LABEL: tst2: |
| ; X86: # %bb.0: |
| ; X86-NEXT: subl $16, %esp |
| ; X86-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero |
| ; X86-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero |
| ; X86-NEXT: addss {{[0-9]+}}(%esp), %xmm1 |
| ; X86-NEXT: cvtss2sd %xmm1, %xmm1 |
| ; X86-NEXT: movsd %xmm0, (%esp) |
| ; X86-NEXT: movsd %xmm1, {{[0-9]+}}(%esp) |
| ; X86-NEXT: calll copysign |
| ; X86-NEXT: addl $16, %esp |
| ; X86-NEXT: retl |
| ; |
| ; X64-LABEL: tst2: |
| ; X64: # %bb.0: |
| ; X64-NEXT: addss %xmm2, %xmm1 |
| ; X64-NEXT: cvtss2sd %xmm1, %xmm1 |
| ; X64-NEXT: jmp copysign # TAILCALL |
| %tmp1 = fadd float %b, %c |
| %tmp2 = fpext float %tmp1 to double |
| %tmp = tail call double @copysign( double %a, double %tmp2 ) |
| ret double %tmp |
| } |
| |
| define x86_fp80 @tst3(x86_fp80 %a, x86_fp80 %b) nounwind { |
| ; X86-LABEL: tst3: |
| ; X86: # %bb.0: |
| ; X86-NEXT: subl $24, %esp |
| ; X86-NEXT: fldt {{[0-9]+}}(%esp) |
| ; X86-NEXT: fldt {{[0-9]+}}(%esp) |
| ; X86-NEXT: fstpt {{[0-9]+}}(%esp) |
| ; X86-NEXT: fstpt (%esp) |
| ; X86-NEXT: calll copysignl |
| ; X86-NEXT: addl $24, %esp |
| ; X86-NEXT: retl |
| ; |
| ; X64-LABEL: tst3: |
| ; X64: # %bb.0: |
| ; X64-NEXT: subq $40, %rsp |
| ; X64-NEXT: fldt {{[0-9]+}}(%rsp) |
| ; X64-NEXT: fldt {{[0-9]+}}(%rsp) |
| ; X64-NEXT: fstpt {{[0-9]+}}(%rsp) |
| ; X64-NEXT: fstpt (%rsp) |
| ; X64-NEXT: callq copysignl |
| ; X64-NEXT: addq $40, %rsp |
| ; X64-NEXT: retq |
| %tmp = tail call x86_fp80 @copysignl( x86_fp80 %b, x86_fp80 %a ) |
| ret x86_fp80 %tmp |
| } |
| |
| declare dso_local float @copysignf(float, float) |
| declare dso_local double @copysign(double, double) |
| declare dso_local x86_fp80 @copysignl(x86_fp80, x86_fp80) |
| |
| ; |
| ; LLVM Intrinsic |
| ; |
| |
| define float @int1(float %a, float %b) nounwind { |
| ; X86-LABEL: int1: |
| ; X86: # %bb.0: |
| ; X86-NEXT: pushl %eax |
| ; X86-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero |
| ; X86-NEXT: andps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 |
| ; X86-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero |
| ; X86-NEXT: andps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1 |
| ; X86-NEXT: orps %xmm0, %xmm1 |
| ; X86-NEXT: movss %xmm1, (%esp) |
| ; X86-NEXT: flds (%esp) |
| ; X86-NEXT: popl %eax |
| ; X86-NEXT: retl |
| ; |
| ; X64-LABEL: int1: |
| ; X64: # %bb.0: |
| ; X64-NEXT: andps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 |
| ; X64-NEXT: andps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 |
| ; X64-NEXT: orps %xmm1, %xmm0 |
| ; X64-NEXT: retq |
| %tmp = tail call float @llvm.copysign.f32( float %b, float %a ) |
| ret float %tmp |
| } |
| |
| define double @int2(double %a, float %b, float %c) nounwind { |
| ; X86-LABEL: int2: |
| ; X86: # %bb.0: |
| ; X86-NEXT: pushl %ebp |
| ; X86-NEXT: movl %esp, %ebp |
| ; X86-NEXT: andl $-8, %esp |
| ; X86-NEXT: subl $8, %esp |
| ; X86-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero |
| ; X86-NEXT: addss 20(%ebp), %xmm0 |
| ; X86-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero |
| ; X86-NEXT: andps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1 |
| ; X86-NEXT: cvtss2sd %xmm0, %xmm0 |
| ; X86-NEXT: andps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 |
| ; X86-NEXT: orps %xmm1, %xmm0 |
| ; X86-NEXT: movlps %xmm0, (%esp) |
| ; X86-NEXT: fldl (%esp) |
| ; X86-NEXT: movl %ebp, %esp |
| ; X86-NEXT: popl %ebp |
| ; X86-NEXT: retl |
| ; |
| ; X64-LABEL: int2: |
| ; X64: # %bb.0: |
| ; X64-NEXT: addss %xmm2, %xmm1 |
| ; X64-NEXT: cvtss2sd %xmm1, %xmm1 |
| ; X64-NEXT: andps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 |
| ; X64-NEXT: andps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 |
| ; X64-NEXT: orps %xmm1, %xmm0 |
| ; X64-NEXT: retq |
| %tmp1 = fadd float %b, %c |
| %tmp2 = fpext float %tmp1 to double |
| %tmp = tail call double @llvm.copysign.f64( double %a, double %tmp2 ) |
| ret double %tmp |
| } |
| |
| define x86_fp80 @int3(x86_fp80 %a, x86_fp80 %b) nounwind { |
| ; X86-LABEL: int3: |
| ; X86: # %bb.0: |
| ; X86-NEXT: subl $12, %esp |
| ; X86-NEXT: fldt {{[0-9]+}}(%esp) |
| ; X86-NEXT: fldt {{[0-9]+}}(%esp) |
| ; X86-NEXT: fstpt (%esp) |
| ; X86-NEXT: fabs |
| ; X86-NEXT: fld %st(0) |
| ; X86-NEXT: fchs |
| ; X86-NEXT: testb $-128, {{[0-9]+}}(%esp) |
| ; X86-NEXT: fxch %st(1) |
| ; X86-NEXT: fcmovne %st(1), %st |
| ; X86-NEXT: fstp %st(1) |
| ; X86-NEXT: addl $12, %esp |
| ; X86-NEXT: retl |
| ; |
| ; X64-LABEL: int3: |
| ; X64: # %bb.0: |
| ; X64-NEXT: fldt {{[0-9]+}}(%rsp) |
| ; X64-NEXT: fldt {{[0-9]+}}(%rsp) |
| ; X64-NEXT: fstpt -{{[0-9]+}}(%rsp) |
| ; X64-NEXT: fabs |
| ; X64-NEXT: fld %st(0) |
| ; X64-NEXT: fchs |
| ; X64-NEXT: testb $-128, -{{[0-9]+}}(%rsp) |
| ; X64-NEXT: fxch %st(1) |
| ; X64-NEXT: fcmovne %st(1), %st |
| ; X64-NEXT: fstp %st(1) |
| ; X64-NEXT: retq |
| %tmp = tail call x86_fp80 @llvm.copysign.f80( x86_fp80 %b, x86_fp80 %a ) |
| ret x86_fp80 %tmp |
| } |
| |
| define float @cst1() nounwind { |
| ; X86-LABEL: cst1: |
| ; X86: # %bb.0: |
| ; X86-NEXT: fld1 |
| ; X86-NEXT: fchs |
| ; X86-NEXT: retl |
| ; |
| ; X64-LABEL: cst1: |
| ; X64: # %bb.0: |
| ; X64-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero |
| ; X64-NEXT: retq |
| %tmp = tail call float @llvm.copysign.f32( float 1.0, float -2.0 ) |
| ret float %tmp |
| } |
| |
| define double @cst2() nounwind { |
| ; X86-LABEL: cst2: |
| ; X86: # %bb.0: |
| ; X86-NEXT: fldz |
| ; X86-NEXT: fchs |
| ; X86-NEXT: retl |
| ; |
| ; X64-LABEL: cst2: |
| ; X64: # %bb.0: |
| ; X64-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero |
| ; X64-NEXT: retq |
| %tmp1 = fadd float -1.0, -1.0 |
| %tmp2 = fpext float %tmp1 to double |
| %tmp = tail call double @llvm.copysign.f64( double 0.0, double %tmp2 ) |
| ret double %tmp |
| } |
| |
| define x86_fp80 @cst3() nounwind { |
| ; X86-LABEL: cst3: |
| ; X86: # %bb.0: |
| ; X86-NEXT: fldz |
| ; X86-NEXT: fchs |
| ; X86-NEXT: retl |
| ; |
| ; X64-LABEL: cst3: |
| ; X64: # %bb.0: |
| ; X64-NEXT: fldz |
| ; X64-NEXT: fchs |
| ; X64-NEXT: retq |
| %tmp1 = fadd float -1.0, -1.0 |
| %tmp2 = fpext float %tmp1 to x86_fp80 |
| %tmp = tail call x86_fp80 @llvm.copysign.f80( x86_fp80 zeroinitializer, x86_fp80 %tmp2 ) |
| ret x86_fp80 %tmp |
| } |
| |
| define void @PR41749() { |
| ; X86-LABEL: PR41749: |
| ; X86: # %bb.0: |
| ; X86-NEXT: subl $12, %esp |
| ; X86-NEXT: .cfi_def_cfa_offset 16 |
| ; X86-NEXT: fldz |
| ; X86-NEXT: fld %st(0) |
| ; X86-NEXT: fstpt (%esp) |
| ; X86-NEXT: testb $-128, {{[0-9]+}}(%esp) |
| ; X86-NEXT: fld %st(0) |
| ; X86-NEXT: fchs |
| ; X86-NEXT: fxch %st(1) |
| ; X86-NEXT: fcmovne %st(1), %st |
| ; X86-NEXT: fstp %st(1) |
| ; X86-NEXT: fstpt (%eax) |
| ; X86-NEXT: addl $12, %esp |
| ; X86-NEXT: .cfi_def_cfa_offset 4 |
| ; X86-NEXT: retl |
| ; |
| ; X64-LABEL: PR41749: |
| ; X64: # %bb.0: |
| ; X64-NEXT: fldz |
| ; X64-NEXT: fld %st(0) |
| ; X64-NEXT: fstpt -{{[0-9]+}}(%rsp) |
| ; X64-NEXT: testb $-128, -{{[0-9]+}}(%rsp) |
| ; X64-NEXT: fld %st(0) |
| ; X64-NEXT: fchs |
| ; X64-NEXT: fxch %st(1) |
| ; X64-NEXT: fcmovne %st(1), %st |
| ; X64-NEXT: fstp %st(1) |
| ; X64-NEXT: fstpt (%rax) |
| ; X64-NEXT: retq |
| %1 = call x86_fp80 @llvm.copysign.f80(x86_fp80 0xK00000000000000000000, x86_fp80 undef) |
| store x86_fp80 %1, ptr undef, align 16 |
| ret void |
| } |
| |
| declare dso_local float @llvm.copysign.f32(float %Mag, float %Sgn) |
| declare dso_local double @llvm.copysign.f64(double %Mag, double %Sgn) |
| declare dso_local x86_fp80 @llvm.copysign.f80(x86_fp80 %Mag, x86_fp80 %Sgn) |