blob: 1f4228b1fdec9f0193e4f418a4ea57feda0a37e3 [file] [log] [blame]
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=i686-unknown -mattr=+avx512vl | FileCheck %s --check-prefixes=X86
; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx512vl | FileCheck %s --check-prefixes=X64
declare <4 x double> @llvm.x86.avx512.vpermi2var.pd.256(<4 x double>, <4 x i64>, <4 x double>) #1
define <4 x double> @concat_vpermv3_ops_vpermv_v4f64(ptr %p0, <4 x i64> %m) {
; X86-LABEL: concat_vpermv3_ops_vpermv_v4f64:
; X86: # %bb.0:
; X86-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: vpermpd (%eax), %zmm0, %zmm0
; X86-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
; X86-NEXT: retl
;
; X64-LABEL: concat_vpermv3_ops_vpermv_v4f64:
; X64: # %bb.0:
; X64-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
; X64-NEXT: vpermpd (%rdi), %zmm0, %zmm0
; X64-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
; X64-NEXT: retq
%p1 = getelementptr inbounds nuw i8, ptr %p0, i64 32
%lo = load <4 x double>, ptr %p0, align 32
%hi = load <4 x double>, ptr %p1, align 32
%res = tail call noundef <4 x double> @llvm.x86.avx512.vpermi2var.pd.256(<4 x double> %lo, <4 x i64> %m, <4 x double> %hi)
ret <4 x double> %res
}
define <4 x double> @concat_vpermv3_ops_vpermv_swap_v4f64(ptr %p0, <4 x i64> %m) {
; X86-LABEL: concat_vpermv3_ops_vpermv_swap_v4f64:
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: vmovapd 32(%eax), %ymm1
; X86-NEXT: vpermi2pd (%eax), %ymm1, %ymm0
; X86-NEXT: retl
;
; X64-LABEL: concat_vpermv3_ops_vpermv_swap_v4f64:
; X64: # %bb.0:
; X64-NEXT: vmovapd 32(%rdi), %ymm1
; X64-NEXT: vpermi2pd (%rdi), %ymm1, %ymm0
; X64-NEXT: retq
%p1 = getelementptr inbounds nuw i8, ptr %p0, i64 32
%lo = load <4 x double>, ptr %p1, align 32
%hi = load <4 x double>, ptr %p0, align 32
%res = tail call noundef <4 x double> @llvm.x86.avx512.vpermi2var.pd.256(<4 x double> %lo, <4 x i64> %m, <4 x double> %hi)
ret <4 x double> %res
}