| ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py |
| ; RUN: llc < %s -mtriple=i686-unknown -mattr=+avx512vl | FileCheck %s --check-prefixes=X86 |
| ; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx512vl | FileCheck %s --check-prefixes=X64 |
| |
| declare <4 x double> @llvm.x86.avx512.vpermi2var.pd.256(<4 x double>, <4 x i64>, <4 x double>) #1 |
| |
| define <4 x double> @concat_vpermv3_ops_vpermv_v4f64(ptr %p0, <4 x i64> %m) { |
| ; X86-LABEL: concat_vpermv3_ops_vpermv_v4f64: |
| ; X86: # %bb.0: |
| ; X86-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 |
| ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax |
| ; X86-NEXT: vpermpd (%eax), %zmm0, %zmm0 |
| ; X86-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 |
| ; X86-NEXT: retl |
| ; |
| ; X64-LABEL: concat_vpermv3_ops_vpermv_v4f64: |
| ; X64: # %bb.0: |
| ; X64-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 |
| ; X64-NEXT: vpermpd (%rdi), %zmm0, %zmm0 |
| ; X64-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 |
| ; X64-NEXT: retq |
| %p1 = getelementptr inbounds nuw i8, ptr %p0, i64 32 |
| %lo = load <4 x double>, ptr %p0, align 32 |
| %hi = load <4 x double>, ptr %p1, align 32 |
| %res = tail call noundef <4 x double> @llvm.x86.avx512.vpermi2var.pd.256(<4 x double> %lo, <4 x i64> %m, <4 x double> %hi) |
| ret <4 x double> %res |
| } |
| |
| define <4 x double> @concat_vpermv3_ops_vpermv_swap_v4f64(ptr %p0, <4 x i64> %m) { |
| ; X86-LABEL: concat_vpermv3_ops_vpermv_swap_v4f64: |
| ; X86: # %bb.0: |
| ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax |
| ; X86-NEXT: vmovapd 32(%eax), %ymm1 |
| ; X86-NEXT: vpermi2pd (%eax), %ymm1, %ymm0 |
| ; X86-NEXT: retl |
| ; |
| ; X64-LABEL: concat_vpermv3_ops_vpermv_swap_v4f64: |
| ; X64: # %bb.0: |
| ; X64-NEXT: vmovapd 32(%rdi), %ymm1 |
| ; X64-NEXT: vpermi2pd (%rdi), %ymm1, %ymm0 |
| ; X64-NEXT: retq |
| %p1 = getelementptr inbounds nuw i8, ptr %p0, i64 32 |
| %lo = load <4 x double>, ptr %p1, align 32 |
| %hi = load <4 x double>, ptr %p0, align 32 |
| %res = tail call noundef <4 x double> @llvm.x86.avx512.vpermi2var.pd.256(<4 x double> %lo, <4 x i64> %m, <4 x double> %hi) |
| ret <4 x double> %res |
| } |