| ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py |
| ; RUN: llc < %s -mtriple=x86_64-unknown -mcpu=x86-64 | FileCheck %s --check-prefixes=SSE |
| ; RUN: llc < %s -mtriple=x86_64-unknown -mcpu=x86-64-v2 | FileCheck %s --check-prefixes=SSE |
| ; RUN: llc < %s -mtriple=x86_64-unknown -mcpu=x86-64-v3 | FileCheck %s --check-prefixes=AVX,AVX2 |
| ; RUN: llc < %s -mtriple=x86_64-unknown -mcpu=x86-64-v4 | FileCheck %s --check-prefixes=AVX,AVX512,AVX512-VL |
| ; RUN: llc < %s -mtriple=x86_64-unknown -mcpu=x86-64-v4 -mattr=-avx512vl | FileCheck %s --check-prefixes=AVX,AVX512,AVX512-NOVL |
| |
| ; |
| ; fptosi -> sitofp |
| ; |
| |
| define double @scvtf64_i32(double %a0) { |
| ; SSE-LABEL: scvtf64_i32: |
| ; SSE: # %bb.0: |
| ; SSE-NEXT: cvttpd2dq %xmm0, %xmm0 |
| ; SSE-NEXT: cvtdq2pd %xmm0, %xmm0 |
| ; SSE-NEXT: retq |
| ; |
| ; AVX-LABEL: scvtf64_i32: |
| ; AVX: # %bb.0: |
| ; AVX-NEXT: vcvttpd2dq %xmm0, %xmm0 |
| ; AVX-NEXT: vcvtdq2pd %xmm0, %xmm0 |
| ; AVX-NEXT: retq |
| %ii = fptosi double %a0 to i32 |
| %ff = sitofp i32 %ii to double |
| ret double %ff |
| } |
| |
| define double @scvtf64_i64(double %a0) { |
| ; SSE-LABEL: scvtf64_i64: |
| ; SSE: # %bb.0: |
| ; SSE-NEXT: cvttsd2si %xmm0, %rax |
| ; SSE-NEXT: xorps %xmm0, %xmm0 |
| ; SSE-NEXT: cvtsi2sd %rax, %xmm0 |
| ; SSE-NEXT: retq |
| ; |
| ; AVX-LABEL: scvtf64_i64: |
| ; AVX: # %bb.0: |
| ; AVX-NEXT: vcvttsd2si %xmm0, %rax |
| ; AVX-NEXT: vcvtsi2sd %rax, %xmm15, %xmm0 |
| ; AVX-NEXT: retq |
| %ii = fptosi double %a0 to i64 |
| %ff = sitofp i64 %ii to double |
| ret double %ff |
| } |
| |
| define float @scvtf32_i32(float %a0) { |
| ; SSE-LABEL: scvtf32_i32: |
| ; SSE: # %bb.0: |
| ; SSE-NEXT: cvttps2dq %xmm0, %xmm0 |
| ; SSE-NEXT: cvtdq2ps %xmm0, %xmm0 |
| ; SSE-NEXT: retq |
| ; |
| ; AVX-LABEL: scvtf32_i32: |
| ; AVX: # %bb.0: |
| ; AVX-NEXT: vcvttps2dq %xmm0, %xmm0 |
| ; AVX-NEXT: vcvtdq2ps %xmm0, %xmm0 |
| ; AVX-NEXT: retq |
| %ii = fptosi float %a0 to i32 |
| %ff = sitofp i32 %ii to float |
| ret float %ff |
| } |
| |
| define float @scvtf32_i64(float %a0) { |
| ; SSE-LABEL: scvtf32_i64: |
| ; SSE: # %bb.0: |
| ; SSE-NEXT: cvttss2si %xmm0, %rax |
| ; SSE-NEXT: xorps %xmm0, %xmm0 |
| ; SSE-NEXT: cvtsi2ss %rax, %xmm0 |
| ; SSE-NEXT: retq |
| ; |
| ; AVX-LABEL: scvtf32_i64: |
| ; AVX: # %bb.0: |
| ; AVX-NEXT: vcvttss2si %xmm0, %rax |
| ; AVX-NEXT: vcvtsi2ss %rax, %xmm15, %xmm0 |
| ; AVX-NEXT: retq |
| %ii = fptosi float %a0 to i64 |
| %ff = sitofp i64 %ii to float |
| ret float %ff |
| } |
| |
| ; |
| ; fptoui -> uitofp |
| ; |
| |
| define double @ucvtf64_i32(double %a0) { |
| ; SSE-LABEL: ucvtf64_i32: |
| ; SSE: # %bb.0: |
| ; SSE-NEXT: cvttsd2si %xmm0, %rax |
| ; SSE-NEXT: movl %eax, %eax |
| ; SSE-NEXT: xorps %xmm0, %xmm0 |
| ; SSE-NEXT: cvtsi2sd %rax, %xmm0 |
| ; SSE-NEXT: retq |
| ; |
| ; AVX2-LABEL: ucvtf64_i32: |
| ; AVX2: # %bb.0: |
| ; AVX2-NEXT: vcvttsd2si %xmm0, %rax |
| ; AVX2-NEXT: movl %eax, %eax |
| ; AVX2-NEXT: vcvtsi2sd %rax, %xmm15, %xmm0 |
| ; AVX2-NEXT: retq |
| ; |
| ; AVX512-LABEL: ucvtf64_i32: |
| ; AVX512: # %bb.0: |
| ; AVX512-NEXT: vcvttsd2usi %xmm0, %eax |
| ; AVX512-NEXT: vcvtusi2sd %eax, %xmm15, %xmm0 |
| ; AVX512-NEXT: retq |
| %ii = fptoui double %a0 to i32 |
| %ff = uitofp i32 %ii to double |
| ret double %ff |
| } |
| |
| define double @ucvtf64_i64(double %a0) { |
| ; SSE-LABEL: ucvtf64_i64: |
| ; SSE: # %bb.0: |
| ; SSE-NEXT: cvttsd2si %xmm0, %rax |
| ; SSE-NEXT: movq %rax, %rcx |
| ; SSE-NEXT: subsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 |
| ; SSE-NEXT: cvttsd2si %xmm0, %rdx |
| ; SSE-NEXT: sarq $63, %rcx |
| ; SSE-NEXT: andq %rcx, %rdx |
| ; SSE-NEXT: orq %rax, %rdx |
| ; SSE-NEXT: movq %rdx, %xmm1 |
| ; SSE-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1] |
| ; SSE-NEXT: subpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 |
| ; SSE-NEXT: movapd %xmm1, %xmm0 |
| ; SSE-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] |
| ; SSE-NEXT: addsd %xmm1, %xmm0 |
| ; SSE-NEXT: retq |
| ; |
| ; AVX2-LABEL: ucvtf64_i64: |
| ; AVX2: # %bb.0: |
| ; AVX2-NEXT: vcvttsd2si %xmm0, %rax |
| ; AVX2-NEXT: movq %rax, %rcx |
| ; AVX2-NEXT: vsubsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 |
| ; AVX2-NEXT: sarq $63, %rcx |
| ; AVX2-NEXT: vcvttsd2si %xmm0, %rdx |
| ; AVX2-NEXT: andq %rcx, %rdx |
| ; AVX2-NEXT: orq %rax, %rdx |
| ; AVX2-NEXT: vmovq %rdx, %xmm0 |
| ; AVX2-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1] |
| ; AVX2-NEXT: vsubpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 |
| ; AVX2-NEXT: vshufpd {{.*#+}} xmm1 = xmm0[1,0] |
| ; AVX2-NEXT: vaddsd %xmm0, %xmm1, %xmm0 |
| ; AVX2-NEXT: retq |
| ; |
| ; AVX512-LABEL: ucvtf64_i64: |
| ; AVX512: # %bb.0: |
| ; AVX512-NEXT: vcvttsd2usi %xmm0, %rax |
| ; AVX512-NEXT: vcvtusi2sd %rax, %xmm15, %xmm0 |
| ; AVX512-NEXT: retq |
| %ii = fptoui double %a0 to i64 |
| %ff = uitofp i64 %ii to double |
| ret double %ff |
| } |
| |
| define float @ucvtf32_i32(float %a0) { |
| ; SSE-LABEL: ucvtf32_i32: |
| ; SSE: # %bb.0: |
| ; SSE-NEXT: cvttss2si %xmm0, %rax |
| ; SSE-NEXT: movl %eax, %eax |
| ; SSE-NEXT: xorps %xmm0, %xmm0 |
| ; SSE-NEXT: cvtsi2ss %rax, %xmm0 |
| ; SSE-NEXT: retq |
| ; |
| ; AVX2-LABEL: ucvtf32_i32: |
| ; AVX2: # %bb.0: |
| ; AVX2-NEXT: vcvttss2si %xmm0, %rax |
| ; AVX2-NEXT: movl %eax, %eax |
| ; AVX2-NEXT: vcvtsi2ss %rax, %xmm15, %xmm0 |
| ; AVX2-NEXT: retq |
| ; |
| ; AVX512-LABEL: ucvtf32_i32: |
| ; AVX512: # %bb.0: |
| ; AVX512-NEXT: vcvttss2usi %xmm0, %eax |
| ; AVX512-NEXT: vcvtusi2ss %eax, %xmm15, %xmm0 |
| ; AVX512-NEXT: retq |
| %ii = fptoui float %a0 to i32 |
| %ff = uitofp i32 %ii to float |
| ret float %ff |
| } |
| |
| define float @ucvtf32_i64(float %a0) { |
| ; SSE-LABEL: ucvtf32_i64: |
| ; SSE: # %bb.0: |
| ; SSE-NEXT: cvttss2si %xmm0, %rcx |
| ; SSE-NEXT: movq %rcx, %rdx |
| ; SSE-NEXT: sarq $63, %rdx |
| ; SSE-NEXT: subss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 |
| ; SSE-NEXT: cvttss2si %xmm0, %rax |
| ; SSE-NEXT: andq %rdx, %rax |
| ; SSE-NEXT: orq %rcx, %rax |
| ; SSE-NEXT: js .LBB7_1 |
| ; SSE-NEXT: # %bb.2: |
| ; SSE-NEXT: xorps %xmm0, %xmm0 |
| ; SSE-NEXT: cvtsi2ss %rax, %xmm0 |
| ; SSE-NEXT: retq |
| ; SSE-NEXT: .LBB7_1: |
| ; SSE-NEXT: movq %rax, %rcx |
| ; SSE-NEXT: shrq %rcx |
| ; SSE-NEXT: andl $1, %eax |
| ; SSE-NEXT: orq %rcx, %rax |
| ; SSE-NEXT: xorps %xmm0, %xmm0 |
| ; SSE-NEXT: cvtsi2ss %rax, %xmm0 |
| ; SSE-NEXT: addss %xmm0, %xmm0 |
| ; SSE-NEXT: retq |
| ; |
| ; AVX2-LABEL: ucvtf32_i64: |
| ; AVX2: # %bb.0: |
| ; AVX2-NEXT: vcvttss2si %xmm0, %rcx |
| ; AVX2-NEXT: movq %rcx, %rdx |
| ; AVX2-NEXT: sarq $63, %rdx |
| ; AVX2-NEXT: vsubss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 |
| ; AVX2-NEXT: vcvttss2si %xmm0, %rax |
| ; AVX2-NEXT: andq %rdx, %rax |
| ; AVX2-NEXT: orq %rcx, %rax |
| ; AVX2-NEXT: js .LBB7_1 |
| ; AVX2-NEXT: # %bb.2: |
| ; AVX2-NEXT: vcvtsi2ss %rax, %xmm15, %xmm0 |
| ; AVX2-NEXT: retq |
| ; AVX2-NEXT: .LBB7_1: |
| ; AVX2-NEXT: movq %rax, %rcx |
| ; AVX2-NEXT: shrq %rcx |
| ; AVX2-NEXT: andl $1, %eax |
| ; AVX2-NEXT: orq %rcx, %rax |
| ; AVX2-NEXT: vcvtsi2ss %rax, %xmm15, %xmm0 |
| ; AVX2-NEXT: vaddss %xmm0, %xmm0, %xmm0 |
| ; AVX2-NEXT: retq |
| ; |
| ; AVX512-LABEL: ucvtf32_i64: |
| ; AVX512: # %bb.0: |
| ; AVX512-NEXT: vcvttss2usi %xmm0, %rax |
| ; AVX512-NEXT: vcvtusi2ss %rax, %xmm15, %xmm0 |
| ; AVX512-NEXT: retq |
| %ii = fptoui float %a0 to i64 |
| %ff = uitofp i64 %ii to float |
| ret float %ff |
| } |
| ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: |
| ; AVX512-NOVL: {{.*}} |
| ; AVX512-VL: {{.*}} |