| ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --default-march x86_64-unknown-linux-gnu --version 5 |
| ; RUN: llc -mtriple=i686-- < %s | FileCheck %s -check-prefixes=X87 |
| ; RUN: llc -mattr=+sse2 -mtriple=i686-- < %s | FileCheck %s -check-prefixes=X86-SSE |
| ; RUN: llc -mattr=+avx -mtriple=i686-- < %s | FileCheck %s -check-prefixes=X86-AVX |
| ; RUN: llc -mattr=+sse2 -mtriple=x86_64 < %s | FileCheck %s -check-prefixes=SSE |
| ; RUN: llc -mattr=+avx -mtriple=x86_64 < %s | FileCheck %s -check-prefixes=AVX,AVX1 |
| ; RUN: llc -mattr=+avx2 -mtriple=x86_64 < %s | FileCheck %s -check-prefixes=AVX,AVX2 |
| ; RUN: llc -mattr=+avx512f -mtriple=x86_64 < %s | FileCheck %s -check-prefixes=AVX,AVX512F |
| |
| define float @canon_fp32_varargsf32(float %a) { |
| ; X87-LABEL: canon_fp32_varargsf32: |
| ; X87: # %bb.0: |
| ; X87-NEXT: fld1 |
| ; X87-NEXT: fmuls {{[0-9]+}}(%esp) |
| ; X87-NEXT: retl |
| ; |
| ; X86-SSE-LABEL: canon_fp32_varargsf32: |
| ; X86-SSE: # %bb.0: |
| ; X86-SSE-NEXT: pushl %eax |
| ; X86-SSE-NEXT: .cfi_def_cfa_offset 8 |
| ; X86-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero |
| ; X86-SSE-NEXT: mulss {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 |
| ; X86-SSE-NEXT: movss %xmm0, (%esp) |
| ; X86-SSE-NEXT: flds (%esp) |
| ; X86-SSE-NEXT: popl %eax |
| ; X86-SSE-NEXT: .cfi_def_cfa_offset 4 |
| ; X86-SSE-NEXT: retl |
| ; |
| ; X86-AVX-LABEL: canon_fp32_varargsf32: |
| ; X86-AVX: # %bb.0: |
| ; X86-AVX-NEXT: pushl %eax |
| ; X86-AVX-NEXT: .cfi_def_cfa_offset 8 |
| ; X86-AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero |
| ; X86-AVX-NEXT: vmulss {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0 |
| ; X86-AVX-NEXT: vmovss %xmm0, (%esp) |
| ; X86-AVX-NEXT: flds (%esp) |
| ; X86-AVX-NEXT: popl %eax |
| ; X86-AVX-NEXT: .cfi_def_cfa_offset 4 |
| ; X86-AVX-NEXT: retl |
| ; |
| ; SSE-LABEL: canon_fp32_varargsf32: |
| ; SSE: # %bb.0: |
| ; SSE-NEXT: mulss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 |
| ; SSE-NEXT: retq |
| ; |
| ; AVX-LABEL: canon_fp32_varargsf32: |
| ; AVX: # %bb.0: |
| ; AVX-NEXT: vmulss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 |
| ; AVX-NEXT: retq |
| %canonicalized = call float @llvm.canonicalize.f32(float %a) |
| ret float %canonicalized |
| } |
| |
| define x86_fp80 @canon_fp32_varargsf80(x86_fp80 %a) { |
| ; X87-LABEL: canon_fp32_varargsf80: |
| ; X87: # %bb.0: |
| ; X87-NEXT: fldt {{[0-9]+}}(%esp) |
| ; X87-NEXT: fld1 |
| ; X87-NEXT: fmulp %st, %st(1) |
| ; X87-NEXT: retl |
| ; |
| ; X86-SSE-LABEL: canon_fp32_varargsf80: |
| ; X86-SSE: # %bb.0: |
| ; X86-SSE-NEXT: fldt {{[0-9]+}}(%esp) |
| ; X86-SSE-NEXT: fld1 |
| ; X86-SSE-NEXT: fmulp %st, %st(1) |
| ; X86-SSE-NEXT: retl |
| ; |
| ; X86-AVX-LABEL: canon_fp32_varargsf80: |
| ; X86-AVX: # %bb.0: |
| ; X86-AVX-NEXT: fldt {{[0-9]+}}(%esp) |
| ; X86-AVX-NEXT: fld1 |
| ; X86-AVX-NEXT: fmulp %st, %st(1) |
| ; X86-AVX-NEXT: retl |
| ; |
| ; SSE-LABEL: canon_fp32_varargsf80: |
| ; SSE: # %bb.0: |
| ; SSE-NEXT: fldt {{[0-9]+}}(%rsp) |
| ; SSE-NEXT: fld1 |
| ; SSE-NEXT: fmulp %st, %st(1) |
| ; SSE-NEXT: retq |
| ; |
| ; AVX-LABEL: canon_fp32_varargsf80: |
| ; AVX: # %bb.0: |
| ; AVX-NEXT: fldt {{[0-9]+}}(%rsp) |
| ; AVX-NEXT: fld1 |
| ; AVX-NEXT: fmulp %st, %st(1) |
| ; AVX-NEXT: retq |
| %canonicalized = call x86_fp80 @llvm.canonicalize.f80(x86_fp80 %a) |
| ret x86_fp80 %canonicalized |
| } |
| |
| define x86_fp80 @complex_canonicalize_fmul_x86_fp80(x86_fp80 %a, x86_fp80 %b) { |
| ; X87-LABEL: complex_canonicalize_fmul_x86_fp80: |
| ; X87: # %bb.0: # %entry |
| ; X87-NEXT: fldt {{[0-9]+}}(%esp) |
| ; X87-NEXT: fldt {{[0-9]+}}(%esp) |
| ; X87-NEXT: fsub %st(1), %st |
| ; X87-NEXT: fld %st(0) |
| ; X87-NEXT: fadd %st(2), %st |
| ; X87-NEXT: fsubp %st, %st(1) |
| ; X87-NEXT: fld1 |
| ; X87-NEXT: fmulp %st, %st(1) |
| ; X87-NEXT: fsubp %st, %st(1) |
| ; X87-NEXT: retl |
| ; |
| ; X86-SSE-LABEL: complex_canonicalize_fmul_x86_fp80: |
| ; X86-SSE: # %bb.0: # %entry |
| ; X86-SSE-NEXT: fldt {{[0-9]+}}(%esp) |
| ; X86-SSE-NEXT: fldt {{[0-9]+}}(%esp) |
| ; X86-SSE-NEXT: fsub %st(1), %st |
| ; X86-SSE-NEXT: fld %st(0) |
| ; X86-SSE-NEXT: fadd %st(2), %st |
| ; X86-SSE-NEXT: fsubp %st, %st(1) |
| ; X86-SSE-NEXT: fld1 |
| ; X86-SSE-NEXT: fmulp %st, %st(1) |
| ; X86-SSE-NEXT: fsubp %st, %st(1) |
| ; X86-SSE-NEXT: retl |
| ; |
| ; X86-AVX-LABEL: complex_canonicalize_fmul_x86_fp80: |
| ; X86-AVX: # %bb.0: # %entry |
| ; X86-AVX-NEXT: fldt {{[0-9]+}}(%esp) |
| ; X86-AVX-NEXT: fldt {{[0-9]+}}(%esp) |
| ; X86-AVX-NEXT: fsub %st(1), %st |
| ; X86-AVX-NEXT: fld %st(0) |
| ; X86-AVX-NEXT: fadd %st(2), %st |
| ; X86-AVX-NEXT: fsubp %st, %st(1) |
| ; X86-AVX-NEXT: fld1 |
| ; X86-AVX-NEXT: fmulp %st, %st(1) |
| ; X86-AVX-NEXT: fsubp %st, %st(1) |
| ; X86-AVX-NEXT: retl |
| ; |
| ; SSE-LABEL: complex_canonicalize_fmul_x86_fp80: |
| ; SSE: # %bb.0: # %entry |
| ; SSE-NEXT: fldt {{[0-9]+}}(%rsp) |
| ; SSE-NEXT: fldt {{[0-9]+}}(%rsp) |
| ; SSE-NEXT: fsub %st(1), %st |
| ; SSE-NEXT: fld %st(0) |
| ; SSE-NEXT: fadd %st(2), %st |
| ; SSE-NEXT: fsubp %st, %st(1) |
| ; SSE-NEXT: fld1 |
| ; SSE-NEXT: fmulp %st, %st(1) |
| ; SSE-NEXT: fsubp %st, %st(1) |
| ; SSE-NEXT: retq |
| ; |
| ; AVX-LABEL: complex_canonicalize_fmul_x86_fp80: |
| ; AVX: # %bb.0: # %entry |
| ; AVX-NEXT: fldt {{[0-9]+}}(%rsp) |
| ; AVX-NEXT: fldt {{[0-9]+}}(%rsp) |
| ; AVX-NEXT: fsub %st(1), %st |
| ; AVX-NEXT: fld %st(0) |
| ; AVX-NEXT: fadd %st(2), %st |
| ; AVX-NEXT: fsubp %st, %st(1) |
| ; AVX-NEXT: fld1 |
| ; AVX-NEXT: fmulp %st, %st(1) |
| ; AVX-NEXT: fsubp %st, %st(1) |
| ; AVX-NEXT: retq |
| entry: |
| %mul1 = fsub x86_fp80 %a, %b |
| %add = fadd x86_fp80 %mul1, %b |
| %mul2 = fsub x86_fp80 %add, %mul1 |
| %canonicalized = call x86_fp80 @llvm.canonicalize.f80(x86_fp80 %mul2) |
| %result = fsub x86_fp80 %canonicalized, %b |
| ret x86_fp80 %result |
| } |
| |
| define double @canonicalize_fp64(double %a, double %b) unnamed_addr #0 { |
| ; X87-LABEL: canonicalize_fp64: |
| ; X87: # %bb.0: # %start |
| ; X87-NEXT: fldl {{[0-9]+}}(%esp) |
| ; X87-NEXT: fldl {{[0-9]+}}(%esp) |
| ; X87-NEXT: fucom %st(1) |
| ; X87-NEXT: fnstsw %ax |
| ; X87-NEXT: # kill: def $ah killed $ah killed $ax |
| ; X87-NEXT: sahf |
| ; X87-NEXT: fxch %st(1) |
| ; X87-NEXT: fucom %st(0) |
| ; X87-NEXT: fnstsw %ax |
| ; X87-NEXT: fld %st(1) |
| ; X87-NEXT: ja .LBB3_2 |
| ; X87-NEXT: # %bb.1: # %start |
| ; X87-NEXT: fstp %st(0) |
| ; X87-NEXT: fldz |
| ; X87-NEXT: fxch %st(1) |
| ; X87-NEXT: .LBB3_2: # %start |
| ; X87-NEXT: fstp %st(1) |
| ; X87-NEXT: # kill: def $ah killed $ah killed $ax |
| ; X87-NEXT: sahf |
| ; X87-NEXT: jp .LBB3_4 |
| ; X87-NEXT: # %bb.3: # %start |
| ; X87-NEXT: fstp %st(1) |
| ; X87-NEXT: fldz |
| ; X87-NEXT: .LBB3_4: # %start |
| ; X87-NEXT: fstp %st(0) |
| ; X87-NEXT: fld1 |
| ; X87-NEXT: fmulp %st, %st(1) |
| ; X87-NEXT: retl |
| ; |
| ; X86-SSE-LABEL: canonicalize_fp64: |
| ; X86-SSE: # %bb.0: # %start |
| ; X86-SSE-NEXT: pushl %ebp |
| ; X86-SSE-NEXT: .cfi_def_cfa_offset 8 |
| ; X86-SSE-NEXT: .cfi_offset %ebp, -8 |
| ; X86-SSE-NEXT: movl %esp, %ebp |
| ; X86-SSE-NEXT: .cfi_def_cfa_register %ebp |
| ; X86-SSE-NEXT: andl $-8, %esp |
| ; X86-SSE-NEXT: subl $8, %esp |
| ; X86-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero |
| ; X86-SSE-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero |
| ; X86-SSE-NEXT: movapd %xmm0, %xmm2 |
| ; X86-SSE-NEXT: cmpunordsd %xmm0, %xmm2 |
| ; X86-SSE-NEXT: movapd %xmm2, %xmm3 |
| ; X86-SSE-NEXT: andpd %xmm1, %xmm3 |
| ; X86-SSE-NEXT: maxsd %xmm0, %xmm1 |
| ; X86-SSE-NEXT: andnpd %xmm1, %xmm2 |
| ; X86-SSE-NEXT: orpd %xmm3, %xmm2 |
| ; X86-SSE-NEXT: mulsd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm2 |
| ; X86-SSE-NEXT: movsd %xmm2, (%esp) |
| ; X86-SSE-NEXT: fldl (%esp) |
| ; X86-SSE-NEXT: movl %ebp, %esp |
| ; X86-SSE-NEXT: popl %ebp |
| ; X86-SSE-NEXT: .cfi_def_cfa %esp, 4 |
| ; X86-SSE-NEXT: retl |
| ; |
| ; X86-AVX-LABEL: canonicalize_fp64: |
| ; X86-AVX: # %bb.0: # %start |
| ; X86-AVX-NEXT: pushl %ebp |
| ; X86-AVX-NEXT: .cfi_def_cfa_offset 8 |
| ; X86-AVX-NEXT: .cfi_offset %ebp, -8 |
| ; X86-AVX-NEXT: movl %esp, %ebp |
| ; X86-AVX-NEXT: .cfi_def_cfa_register %ebp |
| ; X86-AVX-NEXT: andl $-8, %esp |
| ; X86-AVX-NEXT: subl $8, %esp |
| ; X86-AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero |
| ; X86-AVX-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero |
| ; X86-AVX-NEXT: vmaxsd %xmm0, %xmm1, %xmm2 |
| ; X86-AVX-NEXT: vcmpunordsd %xmm0, %xmm0, %xmm0 |
| ; X86-AVX-NEXT: vblendvpd %xmm0, %xmm1, %xmm2, %xmm0 |
| ; X86-AVX-NEXT: vmulsd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0 |
| ; X86-AVX-NEXT: vmovsd %xmm0, (%esp) |
| ; X86-AVX-NEXT: fldl (%esp) |
| ; X86-AVX-NEXT: movl %ebp, %esp |
| ; X86-AVX-NEXT: popl %ebp |
| ; X86-AVX-NEXT: .cfi_def_cfa %esp, 4 |
| ; X86-AVX-NEXT: retl |
| ; |
| ; SSE-LABEL: canonicalize_fp64: |
| ; SSE: # %bb.0: # %start |
| ; SSE-NEXT: movapd %xmm0, %xmm2 |
| ; SSE-NEXT: cmpunordsd %xmm0, %xmm2 |
| ; SSE-NEXT: movapd %xmm2, %xmm3 |
| ; SSE-NEXT: andpd %xmm1, %xmm3 |
| ; SSE-NEXT: maxsd %xmm0, %xmm1 |
| ; SSE-NEXT: andnpd %xmm1, %xmm2 |
| ; SSE-NEXT: orpd %xmm3, %xmm2 |
| ; SSE-NEXT: mulsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2 |
| ; SSE-NEXT: movapd %xmm2, %xmm0 |
| ; SSE-NEXT: retq |
| ; |
| ; AVX1-LABEL: canonicalize_fp64: |
| ; AVX1: # %bb.0: # %start |
| ; AVX1-NEXT: vmaxsd %xmm0, %xmm1, %xmm2 |
| ; AVX1-NEXT: vcmpunordsd %xmm0, %xmm0, %xmm0 |
| ; AVX1-NEXT: vblendvpd %xmm0, %xmm1, %xmm2, %xmm0 |
| ; AVX1-NEXT: vmulsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 |
| ; AVX1-NEXT: retq |
| ; |
| ; AVX2-LABEL: canonicalize_fp64: |
| ; AVX2: # %bb.0: # %start |
| ; AVX2-NEXT: vmaxsd %xmm0, %xmm1, %xmm2 |
| ; AVX2-NEXT: vcmpunordsd %xmm0, %xmm0, %xmm0 |
| ; AVX2-NEXT: vblendvpd %xmm0, %xmm1, %xmm2, %xmm0 |
| ; AVX2-NEXT: vmulsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 |
| ; AVX2-NEXT: retq |
| ; |
| ; AVX512F-LABEL: canonicalize_fp64: |
| ; AVX512F: # %bb.0: # %start |
| ; AVX512F-NEXT: vmaxsd %xmm0, %xmm1, %xmm2 |
| ; AVX512F-NEXT: vcmpunordsd %xmm0, %xmm0, %k1 |
| ; AVX512F-NEXT: vmovsd %xmm1, %xmm2, %xmm2 {%k1} |
| ; AVX512F-NEXT: vmulsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm0 |
| ; AVX512F-NEXT: retq |
| start: |
| %c = fcmp olt double %a, %b |
| %d = fcmp uno double %a, 0.000000e+00 |
| %or.cond.i.i = or i1 %d, %c |
| %e = select i1 %or.cond.i.i, double %b, double %a |
| %f = tail call double @llvm.canonicalize.f64(double %e) #2 |
| ret double %f |
| } |
| |
| define float @canonicalize_fp32(float %aa, float %bb) unnamed_addr #0 { |
| ; X87-LABEL: canonicalize_fp32: |
| ; X87: # %bb.0: # %start |
| ; X87-NEXT: flds {{[0-9]+}}(%esp) |
| ; X87-NEXT: flds {{[0-9]+}}(%esp) |
| ; X87-NEXT: fucom %st(1) |
| ; X87-NEXT: fnstsw %ax |
| ; X87-NEXT: # kill: def $ah killed $ah killed $ax |
| ; X87-NEXT: sahf |
| ; X87-NEXT: fxch %st(1) |
| ; X87-NEXT: fucom %st(0) |
| ; X87-NEXT: fnstsw %ax |
| ; X87-NEXT: fld %st(1) |
| ; X87-NEXT: ja .LBB4_2 |
| ; X87-NEXT: # %bb.1: # %start |
| ; X87-NEXT: fstp %st(0) |
| ; X87-NEXT: fldz |
| ; X87-NEXT: fxch %st(1) |
| ; X87-NEXT: .LBB4_2: # %start |
| ; X87-NEXT: fstp %st(1) |
| ; X87-NEXT: # kill: def $ah killed $ah killed $ax |
| ; X87-NEXT: sahf |
| ; X87-NEXT: jp .LBB4_4 |
| ; X87-NEXT: # %bb.3: # %start |
| ; X87-NEXT: fstp %st(1) |
| ; X87-NEXT: fldz |
| ; X87-NEXT: .LBB4_4: # %start |
| ; X87-NEXT: fstp %st(0) |
| ; X87-NEXT: fld1 |
| ; X87-NEXT: fmulp %st, %st(1) |
| ; X87-NEXT: retl |
| ; |
| ; X86-SSE-LABEL: canonicalize_fp32: |
| ; X86-SSE: # %bb.0: # %start |
| ; X86-SSE-NEXT: pushl %eax |
| ; X86-SSE-NEXT: .cfi_def_cfa_offset 8 |
| ; X86-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero |
| ; X86-SSE-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero |
| ; X86-SSE-NEXT: movaps %xmm0, %xmm2 |
| ; X86-SSE-NEXT: cmpunordss %xmm0, %xmm2 |
| ; X86-SSE-NEXT: movaps %xmm2, %xmm3 |
| ; X86-SSE-NEXT: andps %xmm1, %xmm3 |
| ; X86-SSE-NEXT: maxss %xmm0, %xmm1 |
| ; X86-SSE-NEXT: andnps %xmm1, %xmm2 |
| ; X86-SSE-NEXT: orps %xmm3, %xmm2 |
| ; X86-SSE-NEXT: mulss {{\.?LCPI[0-9]+_[0-9]+}}, %xmm2 |
| ; X86-SSE-NEXT: movss %xmm2, (%esp) |
| ; X86-SSE-NEXT: flds (%esp) |
| ; X86-SSE-NEXT: popl %eax |
| ; X86-SSE-NEXT: .cfi_def_cfa_offset 4 |
| ; X86-SSE-NEXT: retl |
| ; |
| ; X86-AVX-LABEL: canonicalize_fp32: |
| ; X86-AVX: # %bb.0: # %start |
| ; X86-AVX-NEXT: pushl %eax |
| ; X86-AVX-NEXT: .cfi_def_cfa_offset 8 |
| ; X86-AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero |
| ; X86-AVX-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero |
| ; X86-AVX-NEXT: vmaxss %xmm0, %xmm1, %xmm2 |
| ; X86-AVX-NEXT: vcmpunordss %xmm0, %xmm0, %xmm0 |
| ; X86-AVX-NEXT: vblendvps %xmm0, %xmm1, %xmm2, %xmm0 |
| ; X86-AVX-NEXT: vmulss {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0 |
| ; X86-AVX-NEXT: vmovss %xmm0, (%esp) |
| ; X86-AVX-NEXT: flds (%esp) |
| ; X86-AVX-NEXT: popl %eax |
| ; X86-AVX-NEXT: .cfi_def_cfa_offset 4 |
| ; X86-AVX-NEXT: retl |
| ; |
| ; SSE-LABEL: canonicalize_fp32: |
| ; SSE: # %bb.0: # %start |
| ; SSE-NEXT: movaps %xmm0, %xmm2 |
| ; SSE-NEXT: cmpunordss %xmm0, %xmm2 |
| ; SSE-NEXT: movaps %xmm2, %xmm3 |
| ; SSE-NEXT: andps %xmm1, %xmm3 |
| ; SSE-NEXT: maxss %xmm0, %xmm1 |
| ; SSE-NEXT: andnps %xmm1, %xmm2 |
| ; SSE-NEXT: orps %xmm3, %xmm2 |
| ; SSE-NEXT: mulss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2 |
| ; SSE-NEXT: movaps %xmm2, %xmm0 |
| ; SSE-NEXT: retq |
| ; |
| ; AVX1-LABEL: canonicalize_fp32: |
| ; AVX1: # %bb.0: # %start |
| ; AVX1-NEXT: vmaxss %xmm0, %xmm1, %xmm2 |
| ; AVX1-NEXT: vcmpunordss %xmm0, %xmm0, %xmm0 |
| ; AVX1-NEXT: vblendvps %xmm0, %xmm1, %xmm2, %xmm0 |
| ; AVX1-NEXT: vmulss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 |
| ; AVX1-NEXT: retq |
| ; |
| ; AVX2-LABEL: canonicalize_fp32: |
| ; AVX2: # %bb.0: # %start |
| ; AVX2-NEXT: vmaxss %xmm0, %xmm1, %xmm2 |
| ; AVX2-NEXT: vcmpunordss %xmm0, %xmm0, %xmm0 |
| ; AVX2-NEXT: vblendvps %xmm0, %xmm1, %xmm2, %xmm0 |
| ; AVX2-NEXT: vmulss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 |
| ; AVX2-NEXT: retq |
| ; |
| ; AVX512F-LABEL: canonicalize_fp32: |
| ; AVX512F: # %bb.0: # %start |
| ; AVX512F-NEXT: vmaxss %xmm0, %xmm1, %xmm2 |
| ; AVX512F-NEXT: vcmpunordss %xmm0, %xmm0, %k1 |
| ; AVX512F-NEXT: vmovss %xmm1, %xmm2, %xmm2 {%k1} |
| ; AVX512F-NEXT: vmulss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm0 |
| ; AVX512F-NEXT: retq |
| start: |
| %cc = fcmp olt float %aa, %bb |
| %dd = fcmp uno float %aa, 0.000000e+00 |
| %or.cond.i.i.x = or i1 %dd, %cc |
| %ee = select i1 %or.cond.i.i.x, float %bb, float %aa |
| %ff = tail call float @llvm.canonicalize.f32(float %ee) #2 |
| ret float %ff |
| } |
| |
| define void @v_test_canonicalize_var_f32(float addrspace(1)* %out) #1 { |
| ; X87-LABEL: v_test_canonicalize_var_f32: |
| ; X87: # %bb.0: |
| ; X87-NEXT: movl {{[0-9]+}}(%esp), %eax |
| ; X87-NEXT: fld1 |
| ; X87-NEXT: fmuls (%eax) |
| ; X87-NEXT: fstps (%eax) |
| ; X87-NEXT: retl |
| ; |
| ; X86-SSE-LABEL: v_test_canonicalize_var_f32: |
| ; X86-SSE: # %bb.0: |
| ; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax |
| ; X86-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero |
| ; X86-SSE-NEXT: mulss {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 |
| ; X86-SSE-NEXT: movss %xmm0, (%eax) |
| ; X86-SSE-NEXT: retl |
| ; |
| ; X86-AVX-LABEL: v_test_canonicalize_var_f32: |
| ; X86-AVX: # %bb.0: |
| ; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax |
| ; X86-AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero |
| ; X86-AVX-NEXT: vmulss {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0 |
| ; X86-AVX-NEXT: vmovss %xmm0, (%eax) |
| ; X86-AVX-NEXT: retl |
| ; |
| ; SSE-LABEL: v_test_canonicalize_var_f32: |
| ; SSE: # %bb.0: |
| ; SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero |
| ; SSE-NEXT: mulss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 |
| ; SSE-NEXT: movss %xmm0, (%rdi) |
| ; SSE-NEXT: retq |
| ; |
| ; AVX-LABEL: v_test_canonicalize_var_f32: |
| ; AVX: # %bb.0: |
| ; AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero |
| ; AVX-NEXT: vmulss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 |
| ; AVX-NEXT: vmovss %xmm0, (%rdi) |
| ; AVX-NEXT: retq |
| %val = load float, float addrspace(1)* %out |
| %canonicalized = call float @llvm.canonicalize.f32(float %val) |
| store float %canonicalized, float addrspace(1)* %out |
| ret void |
| } |
| |
| define void @v_test_canonicalize_x86_fp80(x86_fp80 addrspace(1)* %out) #1 { |
| ; X87-LABEL: v_test_canonicalize_x86_fp80: |
| ; X87: # %bb.0: |
| ; X87-NEXT: movl {{[0-9]+}}(%esp), %eax |
| ; X87-NEXT: fldt (%eax) |
| ; X87-NEXT: fld1 |
| ; X87-NEXT: fmulp %st, %st(1) |
| ; X87-NEXT: fstpt (%eax) |
| ; X87-NEXT: retl |
| ; |
| ; X86-SSE-LABEL: v_test_canonicalize_x86_fp80: |
| ; X86-SSE: # %bb.0: |
| ; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax |
| ; X86-SSE-NEXT: fldt (%eax) |
| ; X86-SSE-NEXT: fld1 |
| ; X86-SSE-NEXT: fmulp %st, %st(1) |
| ; X86-SSE-NEXT: fstpt (%eax) |
| ; X86-SSE-NEXT: retl |
| ; |
| ; X86-AVX-LABEL: v_test_canonicalize_x86_fp80: |
| ; X86-AVX: # %bb.0: |
| ; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax |
| ; X86-AVX-NEXT: fldt (%eax) |
| ; X86-AVX-NEXT: fld1 |
| ; X86-AVX-NEXT: fmulp %st, %st(1) |
| ; X86-AVX-NEXT: fstpt (%eax) |
| ; X86-AVX-NEXT: retl |
| ; |
| ; SSE-LABEL: v_test_canonicalize_x86_fp80: |
| ; SSE: # %bb.0: |
| ; SSE-NEXT: fldt (%rdi) |
| ; SSE-NEXT: fld1 |
| ; SSE-NEXT: fmulp %st, %st(1) |
| ; SSE-NEXT: fstpt (%rdi) |
| ; SSE-NEXT: retq |
| ; |
| ; AVX-LABEL: v_test_canonicalize_x86_fp80: |
| ; AVX: # %bb.0: |
| ; AVX-NEXT: fldt (%rdi) |
| ; AVX-NEXT: fld1 |
| ; AVX-NEXT: fmulp %st, %st(1) |
| ; AVX-NEXT: fstpt (%rdi) |
| ; AVX-NEXT: retq |
| %val = load x86_fp80, x86_fp80 addrspace(1)* %out |
| %canonicalized = call x86_fp80 @llvm.canonicalize.f80(x86_fp80 %val) |
| store x86_fp80 %canonicalized, x86_fp80 addrspace(1)* %out |
| ret void |
| } |
| |
| define void @v_test_canonicalize_var_f64(double addrspace(1)* %out) #1 { |
| ; X87-LABEL: v_test_canonicalize_var_f64: |
| ; X87: # %bb.0: |
| ; X87-NEXT: movl {{[0-9]+}}(%esp), %eax |
| ; X87-NEXT: fld1 |
| ; X87-NEXT: fmull (%eax) |
| ; X87-NEXT: fstpl (%eax) |
| ; X87-NEXT: retl |
| ; |
| ; X86-SSE-LABEL: v_test_canonicalize_var_f64: |
| ; X86-SSE: # %bb.0: |
| ; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax |
| ; X86-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero |
| ; X86-SSE-NEXT: mulsd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 |
| ; X86-SSE-NEXT: movsd %xmm0, (%eax) |
| ; X86-SSE-NEXT: retl |
| ; |
| ; X86-AVX-LABEL: v_test_canonicalize_var_f64: |
| ; X86-AVX: # %bb.0: |
| ; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax |
| ; X86-AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero |
| ; X86-AVX-NEXT: vmulsd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0 |
| ; X86-AVX-NEXT: vmovsd %xmm0, (%eax) |
| ; X86-AVX-NEXT: retl |
| ; |
| ; SSE-LABEL: v_test_canonicalize_var_f64: |
| ; SSE: # %bb.0: |
| ; SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero |
| ; SSE-NEXT: mulsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 |
| ; SSE-NEXT: movsd %xmm0, (%rdi) |
| ; SSE-NEXT: retq |
| ; |
| ; AVX-LABEL: v_test_canonicalize_var_f64: |
| ; AVX: # %bb.0: |
| ; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero |
| ; AVX-NEXT: vmulsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 |
| ; AVX-NEXT: vmovsd %xmm0, (%rdi) |
| ; AVX-NEXT: retq |
| %val = load double, double addrspace(1)* %out |
| %canonicalized = call double @llvm.canonicalize.f64(double %val) |
| store double %canonicalized, double addrspace(1)* %out |
| ret void |
| } |
| |
| define void @canonicalize_undef(double addrspace(1)* %out) { |
| ; X87-LABEL: canonicalize_undef: |
| ; X87: # %bb.0: |
| ; X87-NEXT: movl {{[0-9]+}}(%esp), %eax |
| ; X87-NEXT: movl $2146959360, 4(%eax) # imm = 0x7FF80000 |
| ; X87-NEXT: movl $0, (%eax) |
| ; X87-NEXT: retl |
| ; |
| ; X86-SSE-LABEL: canonicalize_undef: |
| ; X86-SSE: # %bb.0: |
| ; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax |
| ; X86-SSE-NEXT: movl $2146959360, 4(%eax) # imm = 0x7FF80000 |
| ; X86-SSE-NEXT: movl $0, (%eax) |
| ; X86-SSE-NEXT: retl |
| ; |
| ; X86-AVX-LABEL: canonicalize_undef: |
| ; X86-AVX: # %bb.0: |
| ; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax |
| ; X86-AVX-NEXT: movl $2146959360, 4(%eax) # imm = 0x7FF80000 |
| ; X86-AVX-NEXT: movl $0, (%eax) |
| ; X86-AVX-NEXT: retl |
| ; |
| ; SSE-LABEL: canonicalize_undef: |
| ; SSE: # %bb.0: |
| ; SSE-NEXT: movabsq $9221120237041090560, %rax # imm = 0x7FF8000000000000 |
| ; SSE-NEXT: movq %rax, (%rdi) |
| ; SSE-NEXT: retq |
| ; |
| ; AVX-LABEL: canonicalize_undef: |
| ; AVX: # %bb.0: |
| ; AVX-NEXT: movabsq $9221120237041090560, %rax # imm = 0x7FF8000000000000 |
| ; AVX-NEXT: movq %rax, (%rdi) |
| ; AVX-NEXT: retq |
| %canonicalized = call double @llvm.canonicalize.f64(double undef) |
| store double %canonicalized, double addrspace(1)* %out |
| ret void |
| } |
| |
| define <4 x float> @canon_fp32_varargsv4f32(<4 x float> %a) { |
| ; X87-LABEL: canon_fp32_varargsv4f32: |
| ; X87: # %bb.0: |
| ; X87-NEXT: movl {{[0-9]+}}(%esp), %eax |
| ; X87-NEXT: fld1 |
| ; X87-NEXT: fld %st(0) |
| ; X87-NEXT: fmuls {{[0-9]+}}(%esp) |
| ; X87-NEXT: fld %st(1) |
| ; X87-NEXT: fmuls {{[0-9]+}}(%esp) |
| ; X87-NEXT: fld %st(2) |
| ; X87-NEXT: fmuls {{[0-9]+}}(%esp) |
| ; X87-NEXT: fxch %st(3) |
| ; X87-NEXT: fmuls {{[0-9]+}}(%esp) |
| ; X87-NEXT: fstps 12(%eax) |
| ; X87-NEXT: fxch %st(2) |
| ; X87-NEXT: fstps 8(%eax) |
| ; X87-NEXT: fxch %st(1) |
| ; X87-NEXT: fstps 4(%eax) |
| ; X87-NEXT: fstps (%eax) |
| ; X87-NEXT: retl $4 |
| ; |
| ; X86-SSE-LABEL: canon_fp32_varargsv4f32: |
| ; X86-SSE: # %bb.0: |
| ; X86-SSE-NEXT: mulps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 |
| ; X86-SSE-NEXT: retl |
| ; |
| ; X86-AVX-LABEL: canon_fp32_varargsv4f32: |
| ; X86-AVX: # %bb.0: |
| ; X86-AVX-NEXT: vmulps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0 |
| ; X86-AVX-NEXT: retl |
| ; |
| ; SSE-LABEL: canon_fp32_varargsv4f32: |
| ; SSE: # %bb.0: |
| ; SSE-NEXT: mulps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 |
| ; SSE-NEXT: retq |
| ; |
| ; AVX1-LABEL: canon_fp32_varargsv4f32: |
| ; AVX1: # %bb.0: |
| ; AVX1-NEXT: vmulps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 |
| ; AVX1-NEXT: retq |
| ; |
| ; AVX2-LABEL: canon_fp32_varargsv4f32: |
| ; AVX2: # %bb.0: |
| ; AVX2-NEXT: vbroadcastss {{.*#+}} xmm1 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0] |
| ; AVX2-NEXT: vmulps %xmm1, %xmm0, %xmm0 |
| ; AVX2-NEXT: retq |
| ; |
| ; AVX512F-LABEL: canon_fp32_varargsv4f32: |
| ; AVX512F: # %bb.0: |
| ; AVX512F-NEXT: vbroadcastss {{.*#+}} xmm1 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0] |
| ; AVX512F-NEXT: vmulps %xmm1, %xmm0, %xmm0 |
| ; AVX512F-NEXT: retq |
| %canonicalized = call <4 x float> @llvm.canonicalize.v4f32(<4 x float> %a) |
| ret <4 x float> %canonicalized |
| } |
| |
| define <4 x double> @canon_fp64_varargsv4f64(<4 x double> %a) { |
| ; X87-LABEL: canon_fp64_varargsv4f64: |
| ; X87: # %bb.0: |
| ; X87-NEXT: movl {{[0-9]+}}(%esp), %eax |
| ; X87-NEXT: fld1 |
| ; X87-NEXT: fld %st(0) |
| ; X87-NEXT: fmull {{[0-9]+}}(%esp) |
| ; X87-NEXT: fld %st(1) |
| ; X87-NEXT: fmull {{[0-9]+}}(%esp) |
| ; X87-NEXT: fld %st(2) |
| ; X87-NEXT: fmull {{[0-9]+}}(%esp) |
| ; X87-NEXT: fxch %st(3) |
| ; X87-NEXT: fmull {{[0-9]+}}(%esp) |
| ; X87-NEXT: fstpl 24(%eax) |
| ; X87-NEXT: fxch %st(2) |
| ; X87-NEXT: fstpl 16(%eax) |
| ; X87-NEXT: fxch %st(1) |
| ; X87-NEXT: fstpl 8(%eax) |
| ; X87-NEXT: fstpl (%eax) |
| ; X87-NEXT: retl $4 |
| ; |
| ; X86-SSE-LABEL: canon_fp64_varargsv4f64: |
| ; X86-SSE: # %bb.0: |
| ; X86-SSE-NEXT: movapd {{.*#+}} xmm2 = [1.0E+0,1.0E+0] |
| ; X86-SSE-NEXT: mulpd %xmm2, %xmm0 |
| ; X86-SSE-NEXT: mulpd %xmm2, %xmm1 |
| ; X86-SSE-NEXT: retl |
| ; |
| ; X86-AVX-LABEL: canon_fp64_varargsv4f64: |
| ; X86-AVX: # %bb.0: |
| ; X86-AVX-NEXT: vmulpd {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm0 |
| ; X86-AVX-NEXT: retl |
| ; |
| ; SSE-LABEL: canon_fp64_varargsv4f64: |
| ; SSE: # %bb.0: |
| ; SSE-NEXT: movapd {{.*#+}} xmm2 = [1.0E+0,1.0E+0] |
| ; SSE-NEXT: mulpd %xmm2, %xmm0 |
| ; SSE-NEXT: mulpd %xmm2, %xmm1 |
| ; SSE-NEXT: retq |
| ; |
| ; AVX1-LABEL: canon_fp64_varargsv4f64: |
| ; AVX1: # %bb.0: |
| ; AVX1-NEXT: vmulpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 |
| ; AVX1-NEXT: retq |
| ; |
| ; AVX2-LABEL: canon_fp64_varargsv4f64: |
| ; AVX2: # %bb.0: |
| ; AVX2-NEXT: vbroadcastsd {{.*#+}} ymm1 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0] |
| ; AVX2-NEXT: vmulpd %ymm1, %ymm0, %ymm0 |
| ; AVX2-NEXT: retq |
| ; |
| ; AVX512F-LABEL: canon_fp64_varargsv4f64: |
| ; AVX512F: # %bb.0: |
| ; AVX512F-NEXT: vbroadcastsd {{.*#+}} ymm1 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0] |
| ; AVX512F-NEXT: vmulpd %ymm1, %ymm0, %ymm0 |
| ; AVX512F-NEXT: retq |
| %canonicalized = call <4 x double> @llvm.canonicalize.v4f32(<4 x double> %a) |
| ret <4 x double> %canonicalized |
| } |
| |
| define <2 x x86_fp80> @canon_fp80_varargsv2fp80(<2 x x86_fp80> %a) { |
| ; X87-LABEL: canon_fp80_varargsv2fp80: |
| ; X87: # %bb.0: |
| ; X87-NEXT: fldt {{[0-9]+}}(%esp) |
| ; X87-NEXT: fldt {{[0-9]+}}(%esp) |
| ; X87-NEXT: fld1 |
| ; X87-NEXT: fmul %st, %st(1) |
| ; X87-NEXT: fmulp %st, %st(2) |
| ; X87-NEXT: fxch %st(1) |
| ; X87-NEXT: retl |
| ; |
| ; X86-SSE-LABEL: canon_fp80_varargsv2fp80: |
| ; X86-SSE: # %bb.0: |
| ; X86-SSE-NEXT: fldt {{[0-9]+}}(%esp) |
| ; X86-SSE-NEXT: fldt {{[0-9]+}}(%esp) |
| ; X86-SSE-NEXT: fld1 |
| ; X86-SSE-NEXT: fmul %st, %st(1) |
| ; X86-SSE-NEXT: fmulp %st, %st(2) |
| ; X86-SSE-NEXT: fxch %st(1) |
| ; X86-SSE-NEXT: retl |
| ; |
| ; X86-AVX-LABEL: canon_fp80_varargsv2fp80: |
| ; X86-AVX: # %bb.0: |
| ; X86-AVX-NEXT: fldt {{[0-9]+}}(%esp) |
| ; X86-AVX-NEXT: fldt {{[0-9]+}}(%esp) |
| ; X86-AVX-NEXT: fld1 |
| ; X86-AVX-NEXT: fmul %st, %st(1) |
| ; X86-AVX-NEXT: fmulp %st, %st(2) |
| ; X86-AVX-NEXT: fxch %st(1) |
| ; X86-AVX-NEXT: retl |
| ; |
| ; SSE-LABEL: canon_fp80_varargsv2fp80: |
| ; SSE: # %bb.0: |
| ; SSE-NEXT: fldt {{[0-9]+}}(%rsp) |
| ; SSE-NEXT: fldt {{[0-9]+}}(%rsp) |
| ; SSE-NEXT: fld1 |
| ; SSE-NEXT: fmul %st, %st(1) |
| ; SSE-NEXT: fmulp %st, %st(2) |
| ; SSE-NEXT: fxch %st(1) |
| ; SSE-NEXT: retq |
| ; |
| ; AVX-LABEL: canon_fp80_varargsv2fp80: |
| ; AVX: # %bb.0: |
| ; AVX-NEXT: fldt {{[0-9]+}}(%rsp) |
| ; AVX-NEXT: fldt {{[0-9]+}}(%rsp) |
| ; AVX-NEXT: fld1 |
| ; AVX-NEXT: fmul %st, %st(1) |
| ; AVX-NEXT: fmulp %st, %st(2) |
| ; AVX-NEXT: fxch %st(1) |
| ; AVX-NEXT: retq |
| %canonicalized = call <2 x x86_fp80> @llvm.canonicalize.v2f80(<2 x x86_fp80> %a) |
| ret <2 x x86_fp80> %canonicalized |
| } |
| |
| define void @vec_canonicalize_var_v4f32(<4 x float> addrspace(1)* %out) #1 { |
| ; X87-LABEL: vec_canonicalize_var_v4f32: |
| ; X87: # %bb.0: |
| ; X87-NEXT: movl {{[0-9]+}}(%esp), %eax |
| ; X87-NEXT: fld1 |
| ; X87-NEXT: fld %st(0) |
| ; X87-NEXT: fmuls (%eax) |
| ; X87-NEXT: fld %st(1) |
| ; X87-NEXT: fmuls 4(%eax) |
| ; X87-NEXT: fld %st(2) |
| ; X87-NEXT: fmuls 8(%eax) |
| ; X87-NEXT: fxch %st(3) |
| ; X87-NEXT: fmuls 12(%eax) |
| ; X87-NEXT: fstps 12(%eax) |
| ; X87-NEXT: fxch %st(2) |
| ; X87-NEXT: fstps 8(%eax) |
| ; X87-NEXT: fxch %st(1) |
| ; X87-NEXT: fstps 4(%eax) |
| ; X87-NEXT: fstps (%eax) |
| ; X87-NEXT: retl |
| ; |
| ; X86-SSE-LABEL: vec_canonicalize_var_v4f32: |
| ; X86-SSE: # %bb.0: |
| ; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax |
| ; X86-SSE-NEXT: movaps (%eax), %xmm0 |
| ; X86-SSE-NEXT: mulps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 |
| ; X86-SSE-NEXT: movaps %xmm0, (%eax) |
| ; X86-SSE-NEXT: retl |
| ; |
| ; X86-AVX-LABEL: vec_canonicalize_var_v4f32: |
| ; X86-AVX: # %bb.0: |
| ; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax |
| ; X86-AVX-NEXT: vmovaps (%eax), %xmm0 |
| ; X86-AVX-NEXT: vmulps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0 |
| ; X86-AVX-NEXT: vmovaps %xmm0, (%eax) |
| ; X86-AVX-NEXT: retl |
| ; |
| ; SSE-LABEL: vec_canonicalize_var_v4f32: |
| ; SSE: # %bb.0: |
| ; SSE-NEXT: movaps (%rdi), %xmm0 |
| ; SSE-NEXT: mulps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 |
| ; SSE-NEXT: movaps %xmm0, (%rdi) |
| ; SSE-NEXT: retq |
| ; |
| ; AVX1-LABEL: vec_canonicalize_var_v4f32: |
| ; AVX1: # %bb.0: |
| ; AVX1-NEXT: vmovaps (%rdi), %xmm0 |
| ; AVX1-NEXT: vmulps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 |
| ; AVX1-NEXT: vmovaps %xmm0, (%rdi) |
| ; AVX1-NEXT: retq |
| ; |
| ; AVX2-LABEL: vec_canonicalize_var_v4f32: |
| ; AVX2: # %bb.0: |
| ; AVX2-NEXT: vbroadcastss {{.*#+}} xmm0 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0] |
| ; AVX2-NEXT: vmulps (%rdi), %xmm0, %xmm0 |
| ; AVX2-NEXT: vmovaps %xmm0, (%rdi) |
| ; AVX2-NEXT: retq |
| ; |
| ; AVX512F-LABEL: vec_canonicalize_var_v4f32: |
| ; AVX512F: # %bb.0: |
| ; AVX512F-NEXT: vbroadcastss {{.*#+}} xmm0 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0] |
| ; AVX512F-NEXT: vmulps (%rdi), %xmm0, %xmm0 |
| ; AVX512F-NEXT: vmovaps %xmm0, (%rdi) |
| ; AVX512F-NEXT: retq |
| %val = load <4 x float>, <4 x float> addrspace(1)* %out |
| %canonicalized = call <4 x float> @llvm.canonicalize.v4f32(<4 x float> %val) |
| store <4 x float> %canonicalized, <4 x float> addrspace(1)* %out |
| ret void |
| } |
| |
| define void @vec_canonicalize_var_v4f64(<4 x double> addrspace(1)* %out) #1 { |
| ; X87-LABEL: vec_canonicalize_var_v4f64: |
| ; X87: # %bb.0: |
| ; X87-NEXT: movl {{[0-9]+}}(%esp), %eax |
| ; X87-NEXT: fld1 |
| ; X87-NEXT: fld %st(0) |
| ; X87-NEXT: fmull (%eax) |
| ; X87-NEXT: fld %st(1) |
| ; X87-NEXT: fmull 8(%eax) |
| ; X87-NEXT: fld %st(2) |
| ; X87-NEXT: fmull 16(%eax) |
| ; X87-NEXT: fxch %st(3) |
| ; X87-NEXT: fmull 24(%eax) |
| ; X87-NEXT: fstpl 24(%eax) |
| ; X87-NEXT: fxch %st(2) |
| ; X87-NEXT: fstpl 16(%eax) |
| ; X87-NEXT: fxch %st(1) |
| ; X87-NEXT: fstpl 8(%eax) |
| ; X87-NEXT: fstpl (%eax) |
| ; X87-NEXT: retl |
| ; |
| ; X86-SSE-LABEL: vec_canonicalize_var_v4f64: |
| ; X86-SSE: # %bb.0: |
| ; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax |
| ; X86-SSE-NEXT: movapd {{.*#+}} xmm0 = [1.0E+0,1.0E+0] |
| ; X86-SSE-NEXT: movapd 16(%eax), %xmm1 |
| ; X86-SSE-NEXT: mulpd %xmm0, %xmm1 |
| ; X86-SSE-NEXT: mulpd (%eax), %xmm0 |
| ; X86-SSE-NEXT: movapd %xmm0, (%eax) |
| ; X86-SSE-NEXT: movapd %xmm1, 16(%eax) |
| ; X86-SSE-NEXT: retl |
| ; |
| ; X86-AVX-LABEL: vec_canonicalize_var_v4f64: |
| ; X86-AVX: # %bb.0: |
| ; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax |
| ; X86-AVX-NEXT: vmovapd (%eax), %ymm0 |
| ; X86-AVX-NEXT: vmulpd {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm0 |
| ; X86-AVX-NEXT: vmovapd %ymm0, (%eax) |
| ; X86-AVX-NEXT: vzeroupper |
| ; X86-AVX-NEXT: retl |
| ; |
| ; SSE-LABEL: vec_canonicalize_var_v4f64: |
| ; SSE: # %bb.0: |
| ; SSE-NEXT: movapd {{.*#+}} xmm0 = [1.0E+0,1.0E+0] |
| ; SSE-NEXT: movapd 16(%rdi), %xmm1 |
| ; SSE-NEXT: mulpd %xmm0, %xmm1 |
| ; SSE-NEXT: mulpd (%rdi), %xmm0 |
| ; SSE-NEXT: movapd %xmm0, (%rdi) |
| ; SSE-NEXT: movapd %xmm1, 16(%rdi) |
| ; SSE-NEXT: retq |
| ; |
| ; AVX1-LABEL: vec_canonicalize_var_v4f64: |
| ; AVX1: # %bb.0: |
| ; AVX1-NEXT: vmovapd (%rdi), %ymm0 |
| ; AVX1-NEXT: vmulpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 |
| ; AVX1-NEXT: vmovapd %ymm0, (%rdi) |
| ; AVX1-NEXT: vzeroupper |
| ; AVX1-NEXT: retq |
| ; |
| ; AVX2-LABEL: vec_canonicalize_var_v4f64: |
| ; AVX2: # %bb.0: |
| ; AVX2-NEXT: vbroadcastsd {{.*#+}} ymm0 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0] |
| ; AVX2-NEXT: vmulpd (%rdi), %ymm0, %ymm0 |
| ; AVX2-NEXT: vmovapd %ymm0, (%rdi) |
| ; AVX2-NEXT: vzeroupper |
| ; AVX2-NEXT: retq |
| ; |
| ; AVX512F-LABEL: vec_canonicalize_var_v4f64: |
| ; AVX512F: # %bb.0: |
| ; AVX512F-NEXT: vbroadcastsd {{.*#+}} ymm0 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0] |
| ; AVX512F-NEXT: vmulpd (%rdi), %ymm0, %ymm0 |
| ; AVX512F-NEXT: vmovapd %ymm0, (%rdi) |
| ; AVX512F-NEXT: vzeroupper |
| ; AVX512F-NEXT: retq |
| %val = load <4 x double>, <4 x double> addrspace(1)* %out |
| %canonicalized = call <4 x double> @llvm.canonicalize.v4f32(<4 x double> %val) |
| store <4 x double> %canonicalized, <4 x double> addrspace(1)* %out |
| ret void |
| } |
| |
| define void @vec_canonicalize_x86_fp80(<4 x x86_fp80> addrspace(1)* %out) #1 { |
| ; X87-LABEL: vec_canonicalize_x86_fp80: |
| ; X87: # %bb.0: |
| ; X87-NEXT: movl {{[0-9]+}}(%esp), %eax |
| ; X87-NEXT: fldt 30(%eax) |
| ; X87-NEXT: fldt 20(%eax) |
| ; X87-NEXT: fldt 10(%eax) |
| ; X87-NEXT: fldt (%eax) |
| ; X87-NEXT: fld1 |
| ; X87-NEXT: fmul %st, %st(1) |
| ; X87-NEXT: fmul %st, %st(2) |
| ; X87-NEXT: fmul %st, %st(3) |
| ; X87-NEXT: fmulp %st, %st(4) |
| ; X87-NEXT: fxch %st(3) |
| ; X87-NEXT: fstpt 30(%eax) |
| ; X87-NEXT: fxch %st(1) |
| ; X87-NEXT: fstpt 20(%eax) |
| ; X87-NEXT: fstpt 10(%eax) |
| ; X87-NEXT: fstpt (%eax) |
| ; X87-NEXT: retl |
| ; |
| ; X86-SSE-LABEL: vec_canonicalize_x86_fp80: |
| ; X86-SSE: # %bb.0: |
| ; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax |
| ; X86-SSE-NEXT: fldt 30(%eax) |
| ; X86-SSE-NEXT: fldt 20(%eax) |
| ; X86-SSE-NEXT: fldt 10(%eax) |
| ; X86-SSE-NEXT: fldt (%eax) |
| ; X86-SSE-NEXT: fld1 |
| ; X86-SSE-NEXT: fmul %st, %st(1) |
| ; X86-SSE-NEXT: fmul %st, %st(2) |
| ; X86-SSE-NEXT: fmul %st, %st(3) |
| ; X86-SSE-NEXT: fmulp %st, %st(4) |
| ; X86-SSE-NEXT: fxch %st(3) |
| ; X86-SSE-NEXT: fstpt 30(%eax) |
| ; X86-SSE-NEXT: fxch %st(1) |
| ; X86-SSE-NEXT: fstpt 20(%eax) |
| ; X86-SSE-NEXT: fstpt 10(%eax) |
| ; X86-SSE-NEXT: fstpt (%eax) |
| ; X86-SSE-NEXT: retl |
| ; |
| ; X86-AVX-LABEL: vec_canonicalize_x86_fp80: |
| ; X86-AVX: # %bb.0: |
| ; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax |
| ; X86-AVX-NEXT: fldt 30(%eax) |
| ; X86-AVX-NEXT: fldt 20(%eax) |
| ; X86-AVX-NEXT: fldt 10(%eax) |
| ; X86-AVX-NEXT: fldt (%eax) |
| ; X86-AVX-NEXT: fld1 |
| ; X86-AVX-NEXT: fmul %st, %st(1) |
| ; X86-AVX-NEXT: fmul %st, %st(2) |
| ; X86-AVX-NEXT: fmul %st, %st(3) |
| ; X86-AVX-NEXT: fmulp %st, %st(4) |
| ; X86-AVX-NEXT: fxch %st(3) |
| ; X86-AVX-NEXT: fstpt 30(%eax) |
| ; X86-AVX-NEXT: fxch %st(1) |
| ; X86-AVX-NEXT: fstpt 20(%eax) |
| ; X86-AVX-NEXT: fstpt 10(%eax) |
| ; X86-AVX-NEXT: fstpt (%eax) |
| ; X86-AVX-NEXT: retl |
| ; |
| ; SSE-LABEL: vec_canonicalize_x86_fp80: |
| ; SSE: # %bb.0: |
| ; SSE-NEXT: fldt 30(%rdi) |
| ; SSE-NEXT: fldt 20(%rdi) |
| ; SSE-NEXT: fldt 10(%rdi) |
| ; SSE-NEXT: fldt (%rdi) |
| ; SSE-NEXT: fld1 |
| ; SSE-NEXT: fmul %st, %st(1) |
| ; SSE-NEXT: fmul %st, %st(2) |
| ; SSE-NEXT: fmul %st, %st(3) |
| ; SSE-NEXT: fmulp %st, %st(4) |
| ; SSE-NEXT: fxch %st(3) |
| ; SSE-NEXT: fstpt 30(%rdi) |
| ; SSE-NEXT: fxch %st(1) |
| ; SSE-NEXT: fstpt 20(%rdi) |
| ; SSE-NEXT: fstpt 10(%rdi) |
| ; SSE-NEXT: fstpt (%rdi) |
| ; SSE-NEXT: retq |
| ; |
| ; AVX-LABEL: vec_canonicalize_x86_fp80: |
| ; AVX: # %bb.0: |
| ; AVX-NEXT: fldt 30(%rdi) |
| ; AVX-NEXT: fldt 20(%rdi) |
| ; AVX-NEXT: fldt 10(%rdi) |
| ; AVX-NEXT: fldt (%rdi) |
| ; AVX-NEXT: fld1 |
| ; AVX-NEXT: fmul %st, %st(1) |
| ; AVX-NEXT: fmul %st, %st(2) |
| ; AVX-NEXT: fmul %st, %st(3) |
| ; AVX-NEXT: fmulp %st, %st(4) |
| ; AVX-NEXT: fxch %st(3) |
| ; AVX-NEXT: fstpt 30(%rdi) |
| ; AVX-NEXT: fxch %st(1) |
| ; AVX-NEXT: fstpt 20(%rdi) |
| ; AVX-NEXT: fstpt 10(%rdi) |
| ; AVX-NEXT: fstpt (%rdi) |
| ; AVX-NEXT: retq |
| %val = load <4 x x86_fp80>, <4 x x86_fp80> addrspace(1)* %out |
| %canonicalized = call <4 x x86_fp80> @llvm.canonicalize.f80(<4 x x86_fp80> %val) |
| store <4 x x86_fp80> %canonicalized, <4 x x86_fp80> addrspace(1)* %out |
| ret void |
| } |