blob: 7c5b87466eccda3175ceb798c92f3f536c801304 [file] [log] [blame]
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -disable-peephole -mtriple=x86_64-apple-darwin -mattr=+avx512f | FileCheck %s
define <8 x double> @test_mask_compress_pd_512(<8 x double> %data, <8 x double> %passthru, i8 %mask) {
; CHECK-LABEL: test_mask_compress_pd_512:
; CHECK: ## %bb.0:
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vcompresspd %zmm0, %zmm1 {%k1}
; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0
; CHECK-NEXT: retq
%res = call <8 x double> @llvm.x86.avx512.mask.compress.pd.512(<8 x double> %data, <8 x double> %passthru, i8 %mask)
ret <8 x double> %res
}
define <8 x double> @test_maskz_compress_pd_512(<8 x double> %data, i8 %mask) {
; CHECK-LABEL: test_maskz_compress_pd_512:
; CHECK: ## %bb.0:
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vcompresspd %zmm0, %zmm0 {%k1} {z}
; CHECK-NEXT: retq
%res = call <8 x double> @llvm.x86.avx512.mask.compress.pd.512(<8 x double> %data, <8 x double> zeroinitializer, i8 %mask)
ret <8 x double> %res
}
define <8 x double> @test_compress_pd_512(<8 x double> %data) {
; CHECK-LABEL: test_compress_pd_512:
; CHECK: ## %bb.0:
; CHECK-NEXT: retq
%res = call <8 x double> @llvm.x86.avx512.mask.compress.pd.512(<8 x double> %data, <8 x double> undef, i8 -1)
ret <8 x double> %res
}
declare <8 x double> @llvm.x86.avx512.mask.compress.pd.512(<8 x double> %data, <8 x double> %src0, i8 %mask)
define <16 x float> @test_mask_compress_ps_512(<16 x float> %data, <16 x float> %passthru, i16 %mask) {
; CHECK-LABEL: test_mask_compress_ps_512:
; CHECK: ## %bb.0:
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vcompressps %zmm0, %zmm1 {%k1}
; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0
; CHECK-NEXT: retq
%res = call <16 x float> @llvm.x86.avx512.mask.compress.ps.512(<16 x float> %data, <16 x float> %passthru, i16 %mask)
ret <16 x float> %res
}
define <16 x float> @test_maskz_compress_ps_512(<16 x float> %data, i16 %mask) {
; CHECK-LABEL: test_maskz_compress_ps_512:
; CHECK: ## %bb.0:
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vcompressps %zmm0, %zmm0 {%k1} {z}
; CHECK-NEXT: retq
%res = call <16 x float> @llvm.x86.avx512.mask.compress.ps.512(<16 x float> %data, <16 x float> zeroinitializer, i16 %mask)
ret <16 x float> %res
}
define <16 x float> @test_compress_ps_512(<16 x float> %data) {
; CHECK-LABEL: test_compress_ps_512:
; CHECK: ## %bb.0:
; CHECK-NEXT: retq
%res = call <16 x float> @llvm.x86.avx512.mask.compress.ps.512(<16 x float> %data, <16 x float> undef, i16 -1)
ret <16 x float> %res
}
declare <16 x float> @llvm.x86.avx512.mask.compress.ps.512(<16 x float> %data, <16 x float> %src0, i16 %mask)
define <8 x i64> @test_mask_compress_q_512(<8 x i64> %data, <8 x i64> %passthru, i8 %mask) {
; CHECK-LABEL: test_mask_compress_q_512:
; CHECK: ## %bb.0:
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vpcompressq %zmm0, %zmm1 {%k1}
; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0
; CHECK-NEXT: retq
%res = call <8 x i64> @llvm.x86.avx512.mask.compress.q.512(<8 x i64> %data, <8 x i64> %passthru, i8 %mask)
ret <8 x i64> %res
}
define <8 x i64> @test_maskz_compress_q_512(<8 x i64> %data, i8 %mask) {
; CHECK-LABEL: test_maskz_compress_q_512:
; CHECK: ## %bb.0:
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vpcompressq %zmm0, %zmm0 {%k1} {z}
; CHECK-NEXT: retq
%res = call <8 x i64> @llvm.x86.avx512.mask.compress.q.512(<8 x i64> %data, <8 x i64> zeroinitializer, i8 %mask)
ret <8 x i64> %res
}
define <8 x i64> @test_compress_q_512(<8 x i64> %data) {
; CHECK-LABEL: test_compress_q_512:
; CHECK: ## %bb.0:
; CHECK-NEXT: retq
%res = call <8 x i64> @llvm.x86.avx512.mask.compress.q.512(<8 x i64> %data, <8 x i64> undef, i8 -1)
ret <8 x i64> %res
}
declare <8 x i64> @llvm.x86.avx512.mask.compress.q.512(<8 x i64> %data, <8 x i64> %src0, i8 %mask)
define <16 x i32> @test_mask_compress_d_512(<16 x i32> %data, <16 x i32> %passthru, i16 %mask) {
; CHECK-LABEL: test_mask_compress_d_512:
; CHECK: ## %bb.0:
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vpcompressd %zmm0, %zmm1 {%k1}
; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0
; CHECK-NEXT: retq
%res = call <16 x i32> @llvm.x86.avx512.mask.compress.d.512(<16 x i32> %data, <16 x i32> %passthru, i16 %mask)
ret <16 x i32> %res
}
define <16 x i32> @test_maskz_compress_d_512(<16 x i32> %data, i16 %mask) {
; CHECK-LABEL: test_maskz_compress_d_512:
; CHECK: ## %bb.0:
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vpcompressd %zmm0, %zmm0 {%k1} {z}
; CHECK-NEXT: retq
%res = call <16 x i32> @llvm.x86.avx512.mask.compress.d.512(<16 x i32> %data, <16 x i32> zeroinitializer, i16 %mask)
ret <16 x i32> %res
}
define <16 x i32> @test_compress_d_512(<16 x i32> %data) {
; CHECK-LABEL: test_compress_d_512:
; CHECK: ## %bb.0:
; CHECK-NEXT: retq
%res = call <16 x i32> @llvm.x86.avx512.mask.compress.d.512(<16 x i32> %data, <16 x i32> undef, i16 -1)
ret <16 x i32> %res
}
declare <16 x i32> @llvm.x86.avx512.mask.compress.d.512(<16 x i32> %data, <16 x i32> %src0, i16 %mask)
define <8 x double> @test_expand_pd_512(<8 x double> %data) {
; CHECK-LABEL: test_expand_pd_512:
; CHECK: ## %bb.0:
; CHECK-NEXT: retq
%res = call <8 x double> @llvm.x86.avx512.mask.expand.pd.512(<8 x double> %data, <8 x double> undef, i8 -1)
ret <8 x double> %res
}
define <8 x double> @test_mask_expand_pd_512(<8 x double> %data, <8 x double> %passthru, i8 %mask) {
; CHECK-LABEL: test_mask_expand_pd_512:
; CHECK: ## %bb.0:
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vexpandpd %zmm0, %zmm1 {%k1}
; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0
; CHECK-NEXT: retq
%res = call <8 x double> @llvm.x86.avx512.mask.expand.pd.512(<8 x double> %data, <8 x double> %passthru, i8 %mask)
ret <8 x double> %res
}
define <8 x double> @test_maskz_expand_pd_512(<8 x double> %data, i8 %mask) {
; CHECK-LABEL: test_maskz_expand_pd_512:
; CHECK: ## %bb.0:
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vexpandpd %zmm0, %zmm0 {%k1} {z}
; CHECK-NEXT: retq
%res = call <8 x double> @llvm.x86.avx512.mask.expand.pd.512(<8 x double> %data, <8 x double> zeroinitializer, i8 %mask)
ret <8 x double> %res
}
declare <8 x double> @llvm.x86.avx512.mask.expand.pd.512(<8 x double> %data, <8 x double> %src0, i8 %mask)
define <16 x float> @test_expand_ps_512(<16 x float> %data) {
; CHECK-LABEL: test_expand_ps_512:
; CHECK: ## %bb.0:
; CHECK-NEXT: retq
%res = call <16 x float> @llvm.x86.avx512.mask.expand.ps.512(<16 x float> %data, <16 x float> undef, i16 -1)
ret <16 x float> %res
}
define <16 x float> @test_mask_expand_ps_512(<16 x float> %data, <16 x float> %passthru, i16 %mask) {
; CHECK-LABEL: test_mask_expand_ps_512:
; CHECK: ## %bb.0:
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vexpandps %zmm0, %zmm1 {%k1}
; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0
; CHECK-NEXT: retq
%res = call <16 x float> @llvm.x86.avx512.mask.expand.ps.512(<16 x float> %data, <16 x float> %passthru, i16 %mask)
ret <16 x float> %res
}
define <16 x float> @test_maskz_expand_ps_512(<16 x float> %data, i16 %mask) {
; CHECK-LABEL: test_maskz_expand_ps_512:
; CHECK: ## %bb.0:
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vexpandps %zmm0, %zmm0 {%k1} {z}
; CHECK-NEXT: retq
%res = call <16 x float> @llvm.x86.avx512.mask.expand.ps.512(<16 x float> %data, <16 x float> zeroinitializer, i16 %mask)
ret <16 x float> %res
}
declare <16 x float> @llvm.x86.avx512.mask.expand.ps.512(<16 x float> %data, <16 x float> %src0, i16 %mask)
define <8 x i64> @test_expand_q_512(<8 x i64> %data) {
; CHECK-LABEL: test_expand_q_512:
; CHECK: ## %bb.0:
; CHECK-NEXT: retq
%res = call <8 x i64> @llvm.x86.avx512.mask.expand.q.512(<8 x i64> %data, <8 x i64> undef, i8 -1)
ret <8 x i64> %res
}
define <8 x i64> @test_mask_expand_q_512(<8 x i64> %data, <8 x i64> %passthru, i8 %mask) {
; CHECK-LABEL: test_mask_expand_q_512:
; CHECK: ## %bb.0:
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vpexpandq %zmm0, %zmm1 {%k1}
; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0
; CHECK-NEXT: retq
%res = call <8 x i64> @llvm.x86.avx512.mask.expand.q.512(<8 x i64> %data, <8 x i64> %passthru, i8 %mask)
ret <8 x i64> %res
}
define <8 x i64> @test_maskz_expand_q_512(<8 x i64> %data, i8 %mask) {
; CHECK-LABEL: test_maskz_expand_q_512:
; CHECK: ## %bb.0:
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vpexpandq %zmm0, %zmm0 {%k1} {z}
; CHECK-NEXT: retq
%res = call <8 x i64> @llvm.x86.avx512.mask.expand.q.512(<8 x i64> %data, <8 x i64> zeroinitializer, i8 %mask)
ret <8 x i64> %res
}
declare <8 x i64> @llvm.x86.avx512.mask.expand.q.512(<8 x i64> %data, <8 x i64> %src0, i8 %mask)
define <16 x i32> @test_expand_d_512(<16 x i32> %data) {
; CHECK-LABEL: test_expand_d_512:
; CHECK: ## %bb.0:
; CHECK-NEXT: retq
%res = call <16 x i32> @llvm.x86.avx512.mask.expand.d.512(<16 x i32> %data, <16 x i32> undef, i16 -1)
ret <16 x i32> %res
}
define <16 x i32> @test_mask_expand_d_512(<16 x i32> %data, <16 x i32> %passthru, i16 %mask) {
; CHECK-LABEL: test_mask_expand_d_512:
; CHECK: ## %bb.0:
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vpexpandd %zmm0, %zmm1 {%k1}
; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0
; CHECK-NEXT: retq
%res = call <16 x i32> @llvm.x86.avx512.mask.expand.d.512(<16 x i32> %data, <16 x i32> %passthru, i16 %mask)
ret <16 x i32> %res
}
define <16 x i32> @test_maskz_expand_d_512(<16 x i32> %data, i16 %mask) {
; CHECK-LABEL: test_maskz_expand_d_512:
; CHECK: ## %bb.0:
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vpexpandd %zmm0, %zmm0 {%k1} {z}
; CHECK-NEXT: retq
%res = call <16 x i32> @llvm.x86.avx512.mask.expand.d.512(<16 x i32> %data, <16 x i32> zeroinitializer, i16 %mask)
ret <16 x i32> %res
}
declare <16 x i32> @llvm.x86.avx512.mask.expand.d.512(<16 x i32> %data, <16 x i32> %src0, i16 %mask)
define <16 x float> @test_rcp_ps_512(<16 x float> %a0) {
; CHECK-LABEL: test_rcp_ps_512:
; CHECK: ## %bb.0:
; CHECK-NEXT: vrcp14ps %zmm0, %zmm0
; CHECK-NEXT: retq
%res = call <16 x float> @llvm.x86.avx512.rcp14.ps.512(<16 x float> %a0, <16 x float> zeroinitializer, i16 -1) ; <<16 x float>> [#uses=1]
ret <16 x float> %res
}
declare <16 x float> @llvm.x86.avx512.rcp14.ps.512(<16 x float>, <16 x float>, i16) nounwind readnone
define <8 x double> @test_rcp_pd_512(<8 x double> %a0) {
; CHECK-LABEL: test_rcp_pd_512:
; CHECK: ## %bb.0:
; CHECK-NEXT: vrcp14pd %zmm0, %zmm0
; CHECK-NEXT: retq
%res = call <8 x double> @llvm.x86.avx512.rcp14.pd.512(<8 x double> %a0, <8 x double> zeroinitializer, i8 -1) ; <<8 x double>> [#uses=1]
ret <8 x double> %res
}
declare <8 x double> @llvm.x86.avx512.rcp14.pd.512(<8 x double>, <8 x double>, i8) nounwind readnone
declare <2 x double> @llvm.x86.avx512.mask.rndscale.sd(<2 x double>, <2 x double>, <2 x double>, i8, i32, i32)
define <2 x double> @test_rndscale_sd(<2 x double> %a, <2 x double> %b) {
; CHECK-LABEL: test_rndscale_sd:
; CHECK: ## %bb.0:
; CHECK-NEXT: vroundsd $11, %xmm1, %xmm0, %xmm0
; CHECK-NEXT: retq
%res = call <2 x double> @llvm.x86.avx512.mask.rndscale.sd(<2 x double> %a, <2 x double> %b, <2 x double> undef, i8 -1, i32 11, i32 4)
ret <2 x double>%res
}
define <2 x double> @test_rndscale_sd_mask(<2 x double> %a, <2 x double> %b, <2 x double> %c, i8 %mask) {
; CHECK-LABEL: test_rndscale_sd_mask:
; CHECK: ## %bb.0:
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vrndscalesd $11, %xmm1, %xmm0, %xmm2 {%k1}
; CHECK-NEXT: vmovapd %xmm2, %xmm0
; CHECK-NEXT: retq
%res = call <2 x double> @llvm.x86.avx512.mask.rndscale.sd(<2 x double> %a, <2 x double> %b, <2 x double> %c, i8 %mask, i32 11, i32 4)
ret <2 x double>%res
}
define <2 x double> @test_rndscale_sd_mask_load(<2 x double> %a, <2 x double>* %bptr, <2 x double> %c, i8 %mask) {
; CHECK-LABEL: test_rndscale_sd_mask_load:
; CHECK: ## %bb.0:
; CHECK-NEXT: kmovw %esi, %k1
; CHECK-NEXT: vrndscalesd $11, (%rdi), %xmm0, %xmm1 {%k1}
; CHECK-NEXT: vmovapd %xmm1, %xmm0
; CHECK-NEXT: retq
%b = load <2 x double>, <2 x double>* %bptr
%res = call <2 x double> @llvm.x86.avx512.mask.rndscale.sd(<2 x double> %a, <2 x double> %b, <2 x double> %c, i8 %mask, i32 11, i32 4)
ret <2 x double>%res
}
define <2 x double> @test_rndscale_sd_maskz(<2 x double> %a, <2 x double> %b, i8 %mask) {
; CHECK-LABEL: test_rndscale_sd_maskz:
; CHECK: ## %bb.0:
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vrndscalesd $11, %xmm1, %xmm0, %xmm0 {%k1} {z}
; CHECK-NEXT: retq
%res = call <2 x double> @llvm.x86.avx512.mask.rndscale.sd(<2 x double> %a, <2 x double> %b, <2 x double> zeroinitializer, i8 %mask, i32 11, i32 4)
ret <2 x double>%res
}
declare <4 x float> @llvm.x86.avx512.mask.rndscale.ss(<4 x float>, <4 x float>, <4 x float>, i8, i32, i32)
define <4 x float> @test_rndscale_ss(<4 x float> %a, <4 x float> %b) {
; CHECK-LABEL: test_rndscale_ss:
; CHECK: ## %bb.0:
; CHECK-NEXT: vroundss $11, %xmm1, %xmm0, %xmm0
; CHECK-NEXT: retq
%res = call <4 x float> @llvm.x86.avx512.mask.rndscale.ss(<4 x float> %a, <4 x float> %b, <4 x float> undef, i8 -1, i32 11, i32 4)
ret <4 x float>%res
}
define <4 x float> @test_rndscale_ss_load(<4 x float> %a, <4 x float>* %bptr) {
; CHECK-LABEL: test_rndscale_ss_load:
; CHECK: ## %bb.0:
; CHECK-NEXT: vroundss $11, (%rdi), %xmm0, %xmm0
; CHECK-NEXT: retq
%b = load <4 x float>, <4 x float>* %bptr
%res = call <4 x float> @llvm.x86.avx512.mask.rndscale.ss(<4 x float> %a, <4 x float> %b, <4 x float> undef, i8 -1, i32 11, i32 4)
ret <4 x float>%res
}
define <4 x float> @test_rndscale_ss_mask(<4 x float> %a, <4 x float> %b, <4 x float> %c, i8 %mask) {
; CHECK-LABEL: test_rndscale_ss_mask:
; CHECK: ## %bb.0:
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vrndscaless $11, %xmm1, %xmm0, %xmm2 {%k1}
; CHECK-NEXT: vmovaps %xmm2, %xmm0
; CHECK-NEXT: retq
%res = call <4 x float> @llvm.x86.avx512.mask.rndscale.ss(<4 x float> %a, <4 x float> %b, <4 x float> %c, i8 %mask, i32 11, i32 4)
ret <4 x float>%res
}
define <4 x float> @test_rndscale_ss_maskz(<4 x float> %a, <4 x float> %b, i8 %mask) {
; CHECK-LABEL: test_rndscale_ss_maskz:
; CHECK: ## %bb.0:
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vrndscaless $11, %xmm1, %xmm0, %xmm0 {%k1} {z}
; CHECK-NEXT: retq
%res = call <4 x float> @llvm.x86.avx512.mask.rndscale.ss(<4 x float> %a, <4 x float> %b, <4 x float> zeroinitializer, i8 %mask, i32 11, i32 4)
ret <4 x float>%res
}
declare <8 x double> @llvm.x86.avx512.mask.rndscale.pd.512(<8 x double>, i32, <8 x double>, i8, i32)
define <8 x double> @test7(<8 x double> %a) {
; CHECK-LABEL: test7:
; CHECK: ## %bb.0:
; CHECK-NEXT: vrndscalepd $11, %zmm0, %zmm0
; CHECK-NEXT: retq
%res = call <8 x double> @llvm.x86.avx512.mask.rndscale.pd.512(<8 x double> %a, i32 11, <8 x double> %a, i8 -1, i32 4)
ret <8 x double>%res
}
declare <16 x float> @llvm.x86.avx512.mask.rndscale.ps.512(<16 x float>, i32, <16 x float>, i16, i32)
define <16 x float> @test8(<16 x float> %a) {
; CHECK-LABEL: test8:
; CHECK: ## %bb.0:
; CHECK-NEXT: vrndscaleps $11, %zmm0, %zmm0
; CHECK-NEXT: retq
%res = call <16 x float> @llvm.x86.avx512.mask.rndscale.ps.512(<16 x float> %a, i32 11, <16 x float> %a, i16 -1, i32 4)
ret <16 x float>%res
}
define <16 x float> @test_rsqrt_ps_512(<16 x float> %a0) {
; CHECK-LABEL: test_rsqrt_ps_512:
; CHECK: ## %bb.0:
; CHECK-NEXT: vrsqrt14ps %zmm0, %zmm0
; CHECK-NEXT: retq
%res = call <16 x float> @llvm.x86.avx512.rsqrt14.ps.512(<16 x float> %a0, <16 x float> zeroinitializer, i16 -1) ; <<16 x float>> [#uses=1]
ret <16 x float> %res
}
declare <16 x float> @llvm.x86.avx512.rsqrt14.ps.512(<16 x float>, <16 x float>, i16) nounwind readnone
define <8 x double> @test_sqrt_pd_512(<8 x double> %a0) {
; CHECK-LABEL: test_sqrt_pd_512:
; CHECK: ## %bb.0:
; CHECK-NEXT: vsqrtpd %zmm0, %zmm0
; CHECK-NEXT: retq
%1 = call <8 x double> @llvm.sqrt.v8f64(<8 x double> %a0)
ret <8 x double> %1
}
define <8 x double> @test_mask_sqrt_pd_512(<8 x double> %a0, <8 x double> %passthru, i8 %mask) {
; CHECK-LABEL: test_mask_sqrt_pd_512:
; CHECK: ## %bb.0:
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vsqrtpd %zmm0, %zmm1 {%k1}
; CHECK-NEXT: vmovapd %zmm1, %zmm0
; CHECK-NEXT: retq
%1 = call <8 x double> @llvm.sqrt.v8f64(<8 x double> %a0)
%2 = bitcast i8 %mask to <8 x i1>
%3 = select <8 x i1> %2, <8 x double> %1, <8 x double> %passthru
ret <8 x double> %3
}
define <8 x double> @test_maskz_sqrt_pd_512(<8 x double> %a0, i8 %mask) {
; CHECK-LABEL: test_maskz_sqrt_pd_512:
; CHECK: ## %bb.0:
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vsqrtpd %zmm0, %zmm0 {%k1} {z}
; CHECK-NEXT: retq
%1 = call <8 x double> @llvm.sqrt.v8f64(<8 x double> %a0)
%2 = bitcast i8 %mask to <8 x i1>
%3 = select <8 x i1> %2, <8 x double> %1, <8 x double> zeroinitializer
ret <8 x double> %3
}
declare <8 x double> @llvm.sqrt.v8f64(<8 x double>)
define <8 x double> @test_sqrt_round_pd_512(<8 x double> %a0) {
; CHECK-LABEL: test_sqrt_round_pd_512:
; CHECK: ## %bb.0:
; CHECK-NEXT: vsqrtpd {rz-sae}, %zmm0, %zmm0
; CHECK-NEXT: retq
%1 = call <8 x double> @llvm.x86.avx512.sqrt.pd.512(<8 x double> %a0, i32 11)
ret <8 x double> %1
}
define <8 x double> @test_mask_sqrt_round_pd_512(<8 x double> %a0, <8 x double> %passthru, i8 %mask) {
; CHECK-LABEL: test_mask_sqrt_round_pd_512:
; CHECK: ## %bb.0:
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vsqrtpd {rz-sae}, %zmm0, %zmm1 {%k1}
; CHECK-NEXT: vmovapd %zmm1, %zmm0
; CHECK-NEXT: retq
%1 = call <8 x double> @llvm.x86.avx512.sqrt.pd.512(<8 x double> %a0, i32 11)
%2 = bitcast i8 %mask to <8 x i1>
%3 = select <8 x i1> %2, <8 x double> %1, <8 x double> %passthru
ret <8 x double> %3
}
define <8 x double> @test_maskz_sqrt_round_pd_512(<8 x double> %a0, i8 %mask) {
; CHECK-LABEL: test_maskz_sqrt_round_pd_512:
; CHECK: ## %bb.0:
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vsqrtpd {rz-sae}, %zmm0, %zmm0 {%k1} {z}
; CHECK-NEXT: retq
%1 = call <8 x double> @llvm.x86.avx512.sqrt.pd.512(<8 x double> %a0, i32 11)
%2 = bitcast i8 %mask to <8 x i1>
%3 = select <8 x i1> %2, <8 x double> %1, <8 x double> zeroinitializer
ret <8 x double> %3
}
declare <8 x double> @llvm.x86.avx512.sqrt.pd.512(<8 x double>, i32) nounwind readnone
define <16 x float> @test_sqrt_ps_512(<16 x float> %a0) {
; CHECK-LABEL: test_sqrt_ps_512:
; CHECK: ## %bb.0:
; CHECK-NEXT: vsqrtps %zmm0, %zmm0
; CHECK-NEXT: retq
%1 = call <16 x float> @llvm.sqrt.v16f32(<16 x float> %a0)
ret <16 x float> %1
}
define <16 x float> @test_mask_sqrt_ps_512(<16 x float> %a0, <16 x float> %passthru, i16 %mask) {
; CHECK-LABEL: test_mask_sqrt_ps_512:
; CHECK: ## %bb.0:
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vsqrtps %zmm0, %zmm1 {%k1}
; CHECK-NEXT: vmovaps %zmm1, %zmm0
; CHECK-NEXT: retq
%1 = call <16 x float> @llvm.sqrt.v16f32(<16 x float> %a0)
%2 = bitcast i16 %mask to <16 x i1>
%3 = select <16 x i1> %2, <16 x float> %1, <16 x float> %passthru
ret <16 x float> %3
}
define <16 x float> @test_maskz_sqrt_ps_512(<16 x float> %a0, i16 %mask) {
; CHECK-LABEL: test_maskz_sqrt_ps_512:
; CHECK: ## %bb.0:
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vsqrtps %zmm0, %zmm0 {%k1} {z}
; CHECK-NEXT: retq
%1 = call <16 x float> @llvm.sqrt.v16f32(<16 x float> %a0)
%2 = bitcast i16 %mask to <16 x i1>
%3 = select <16 x i1> %2, <16 x float> %1, <16 x float> zeroinitializer
ret <16 x float> %3
}
declare <16 x float> @llvm.sqrt.v16f32(<16 x float>)
define <16 x float> @test_sqrt_round_ps_512(<16 x float> %a0) {
; CHECK-LABEL: test_sqrt_round_ps_512:
; CHECK: ## %bb.0:
; CHECK-NEXT: vsqrtps {rz-sae}, %zmm0, %zmm0
; CHECK-NEXT: retq
%1 = call <16 x float> @llvm.x86.avx512.sqrt.ps.512(<16 x float> %a0, i32 11)
ret <16 x float> %1
}
define <16 x float> @test_mask_sqrt_round_ps_512(<16 x float> %a0, <16 x float> %passthru, i16 %mask) {
; CHECK-LABEL: test_mask_sqrt_round_ps_512:
; CHECK: ## %bb.0:
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vsqrtps {rz-sae}, %zmm0, %zmm1 {%k1}
; CHECK-NEXT: vmovaps %zmm1, %zmm0
; CHECK-NEXT: retq
%1 = call <16 x float> @llvm.x86.avx512.sqrt.ps.512(<16 x float> %a0, i32 11)
%2 = bitcast i16 %mask to <16 x i1>
%3 = select <16 x i1> %2, <16 x float> %1, <16 x float> %passthru
ret <16 x float> %3
}
define <16 x float> @test_maskz_sqrt_round_ps_512(<16 x float> %a0, i16 %mask) {
; CHECK-LABEL: test_maskz_sqrt_round_ps_512:
; CHECK: ## %bb.0:
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vsqrtps {rz-sae}, %zmm0, %zmm0 {%k1} {z}
; CHECK-NEXT: retq
%1 = call <16 x float> @llvm.x86.avx512.sqrt.ps.512(<16 x float> %a0, i32 11)
%2 = bitcast i16 %mask to <16 x i1>
%3 = select <16 x i1> %2, <16 x float> %1, <16 x float> zeroinitializer
ret <16 x float> %3
}
declare <16 x float> @llvm.x86.avx512.sqrt.ps.512(<16 x float>, i32) nounwind readnone
define <8 x double> @test_getexp_pd_512(<8 x double> %a0) {
; CHECK-LABEL: test_getexp_pd_512:
; CHECK: ## %bb.0:
; CHECK-NEXT: vgetexppd %zmm0, %zmm0
; CHECK-NEXT: retq
%res = call <8 x double> @llvm.x86.avx512.mask.getexp.pd.512(<8 x double> %a0, <8 x double> zeroinitializer, i8 -1, i32 4)
ret <8 x double> %res
}
define <8 x double> @test_getexp_round_pd_512(<8 x double> %a0) {
; CHECK-LABEL: test_getexp_round_pd_512:
; CHECK: ## %bb.0:
; CHECK-NEXT: vgetexppd {sae}, %zmm0, %zmm0
; CHECK-NEXT: retq
%res = call <8 x double> @llvm.x86.avx512.mask.getexp.pd.512(<8 x double> %a0, <8 x double> zeroinitializer, i8 -1, i32 8)
ret <8 x double> %res
}
declare <8 x double> @llvm.x86.avx512.mask.getexp.pd.512(<8 x double>, <8 x double>, i8, i32) nounwind readnone
define <16 x float> @test_getexp_ps_512(<16 x float> %a0) {
; CHECK-LABEL: test_getexp_ps_512:
; CHECK: ## %bb.0:
; CHECK-NEXT: vgetexpps %zmm0, %zmm0
; CHECK-NEXT: retq
%res = call <16 x float> @llvm.x86.avx512.mask.getexp.ps.512(<16 x float> %a0, <16 x float> zeroinitializer, i16 -1, i32 4)
ret <16 x float> %res
}
define <16 x float> @test_getexp_round_ps_512(<16 x float> %a0) {
; CHECK-LABEL: test_getexp_round_ps_512:
; CHECK: ## %bb.0:
; CHECK-NEXT: vgetexpps {sae}, %zmm0, %zmm0
; CHECK-NEXT: retq
%res = call <16 x float> @llvm.x86.avx512.mask.getexp.ps.512(<16 x float> %a0, <16 x float> zeroinitializer, i16 -1, i32 8)
ret <16 x float> %res
}
declare <16 x float> @llvm.x86.avx512.mask.getexp.ps.512(<16 x float>, <16 x float>, i16, i32) nounwind readnone
declare <4 x float> @llvm.x86.avx512.mask.sqrt.ss(<4 x float>, <4 x float>, <4 x float>, i8, i32) nounwind readnone
define <4 x float> @test_sqrt_ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) {
; CHECK-LABEL: test_sqrt_ss:
; CHECK: ## %bb.0:
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vmovaps %xmm2, %xmm3
; CHECK-NEXT: vsqrtss %xmm1, %xmm0, %xmm3 {%k1}
; CHECK-NEXT: vsqrtss {rd-sae}, %xmm1, %xmm0, %xmm2 {%k1}
; CHECK-NEXT: vaddps %xmm2, %xmm3, %xmm2
; CHECK-NEXT: vsqrtss {ru-sae}, %xmm1, %xmm0, %xmm3 {%k1} {z}
; CHECK-NEXT: vsqrtss {rz-sae}, %xmm1, %xmm0, %xmm0
; CHECK-NEXT: vaddps %xmm0, %xmm3, %xmm0
; CHECK-NEXT: vaddps %xmm0, %xmm2, %xmm0
; CHECK-NEXT: retq
%res0 = call <4 x float> @llvm.x86.avx512.mask.sqrt.ss(<4 x float>%a0, <4 x float> %a1, <4 x float> %a2, i8 %mask, i32 4)
%res1 = call <4 x float> @llvm.x86.avx512.mask.sqrt.ss(<4 x float>%a0, <4 x float> %a1, <4 x float> %a2, i8 %mask, i32 1)
%res2 = call <4 x float> @llvm.x86.avx512.mask.sqrt.ss(<4 x float>%a0, <4 x float> %a1, <4 x float> zeroinitializer, i8 %mask, i32 2)
%res3 = call <4 x float> @llvm.x86.avx512.mask.sqrt.ss(<4 x float>%a0, <4 x float> %a1, <4 x float> zeroinitializer, i8 -1, i32 3)
%res.1 = fadd <4 x float> %res0, %res1
%res.2 = fadd <4 x float> %res2, %res3
%res = fadd <4 x float> %res.1, %res.2
ret <4 x float> %res
}
declare <2 x double> @llvm.x86.avx512.mask.sqrt.sd(<2 x double>, <2 x double>, <2 x double>, i8, i32) nounwind readnone
define <2 x double> @test_sqrt_sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) {
; CHECK-LABEL: test_sqrt_sd:
; CHECK: ## %bb.0:
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vmovapd %xmm2, %xmm3
; CHECK-NEXT: vsqrtsd %xmm1, %xmm0, %xmm3 {%k1}
; CHECK-NEXT: vsqrtsd {rd-sae}, %xmm1, %xmm0, %xmm2 {%k1}
; CHECK-NEXT: vaddpd %xmm2, %xmm3, %xmm2
; CHECK-NEXT: vsqrtsd {ru-sae}, %xmm1, %xmm0, %xmm3 {%k1} {z}
; CHECK-NEXT: vsqrtsd {rz-sae}, %xmm1, %xmm0, %xmm0
; CHECK-NEXT: vaddpd %xmm0, %xmm3, %xmm0
; CHECK-NEXT: vaddpd %xmm0, %xmm2, %xmm0
; CHECK-NEXT: retq
%res0 = call <2 x double> @llvm.x86.avx512.mask.sqrt.sd(<2 x double>%a0, <2 x double> %a1, <2 x double> %a2, i8 %mask, i32 4)
%res1 = call <2 x double> @llvm.x86.avx512.mask.sqrt.sd(<2 x double>%a0, <2 x double> %a1, <2 x double> %a2, i8 %mask, i32 1)
%res2 = call <2 x double> @llvm.x86.avx512.mask.sqrt.sd(<2 x double>%a0, <2 x double> %a1, <2 x double> zeroinitializer, i8 %mask, i32 2)
%res3 = call <2 x double> @llvm.x86.avx512.mask.sqrt.sd(<2 x double>%a0, <2 x double> %a1, <2 x double> zeroinitializer, i8 -1, i32 3)
%res.1 = fadd <2 x double> %res0, %res1
%res.2 = fadd <2 x double> %res2, %res3
%res = fadd <2 x double> %res.1, %res.2
ret <2 x double> %res
}
define i64 @test_x86_sse2_cvtsd2si64(<2 x double> %a0) {
; CHECK-LABEL: test_x86_sse2_cvtsd2si64:
; CHECK: ## %bb.0:
; CHECK-NEXT: vcvtsd2si %xmm0, %rax
; CHECK-NEXT: retq
%res = call i64 @llvm.x86.sse2.cvtsd2si64(<2 x double> %a0) ; <i64> [#uses=1]
ret i64 %res
}
declare i64 @llvm.x86.sse2.cvtsd2si64(<2 x double>) nounwind readnone
define <2 x double> @test_x86_sse2_cvtsi642sd(<2 x double> %a0, i64 %a1) {
; CHECK-LABEL: test_x86_sse2_cvtsi642sd:
; CHECK: ## %bb.0:
; CHECK-NEXT: vcvtsi2sdq %rdi, %xmm0, %xmm0
; CHECK-NEXT: retq
%res = call <2 x double> @llvm.x86.sse2.cvtsi642sd(<2 x double> %a0, i64 %a1) ; <<2 x double>> [#uses=1]
ret <2 x double> %res
}
declare <2 x double> @llvm.x86.sse2.cvtsi642sd(<2 x double>, i64) nounwind readnone
define i64 @test_x86_avx512_cvttsd2si64(<2 x double> %a0) {
; CHECK-LABEL: test_x86_avx512_cvttsd2si64:
; CHECK: ## %bb.0:
; CHECK-NEXT: vcvttsd2si %xmm0, %rcx
; CHECK-NEXT: vcvttsd2si {sae}, %xmm0, %rax
; CHECK-NEXT: addq %rcx, %rax
; CHECK-NEXT: retq
%res0 = call i64 @llvm.x86.avx512.cvttsd2si64(<2 x double> %a0, i32 4) ;
%res1 = call i64 @llvm.x86.avx512.cvttsd2si64(<2 x double> %a0, i32 8) ;
%res2 = add i64 %res0, %res1
ret i64 %res2
}
declare i64 @llvm.x86.avx512.cvttsd2si64(<2 x double>, i32) nounwind readnone
define i32 @test_x86_avx512_cvttsd2usi(<2 x double> %a0) {
; CHECK-LABEL: test_x86_avx512_cvttsd2usi:
; CHECK: ## %bb.0:
; CHECK-NEXT: vcvttsd2usi %xmm0, %ecx
; CHECK-NEXT: vcvttsd2usi {sae}, %xmm0, %eax
; CHECK-NEXT: addl %ecx, %eax
; CHECK-NEXT: retq
%res0 = call i32 @llvm.x86.avx512.cvttsd2usi(<2 x double> %a0, i32 4) ;
%res1 = call i32 @llvm.x86.avx512.cvttsd2usi(<2 x double> %a0, i32 8) ;
%res2 = add i32 %res0, %res1
ret i32 %res2
}
declare i32 @llvm.x86.avx512.cvttsd2usi(<2 x double>, i32) nounwind readnone
define i32 @test_x86_avx512_cvttsd2si(<2 x double> %a0) {
; CHECK-LABEL: test_x86_avx512_cvttsd2si:
; CHECK: ## %bb.0:
; CHECK-NEXT: vcvttsd2si %xmm0, %ecx
; CHECK-NEXT: vcvttsd2si {sae}, %xmm0, %eax
; CHECK-NEXT: addl %ecx, %eax
; CHECK-NEXT: retq
%res0 = call i32 @llvm.x86.avx512.cvttsd2si(<2 x double> %a0, i32 4) ;
%res1 = call i32 @llvm.x86.avx512.cvttsd2si(<2 x double> %a0, i32 8) ;
%res2 = add i32 %res0, %res1
ret i32 %res2
}
declare i32 @llvm.x86.avx512.cvttsd2si(<2 x double>, i32) nounwind readnone
define i64 @test_x86_avx512_cvttsd2usi64(<2 x double> %a0) {
; CHECK-LABEL: test_x86_avx512_cvttsd2usi64:
; CHECK: ## %bb.0:
; CHECK-NEXT: vcvttsd2usi %xmm0, %rcx
; CHECK-NEXT: vcvttsd2usi {sae}, %xmm0, %rax
; CHECK-NEXT: addq %rcx, %rax
; CHECK-NEXT: retq
%res0 = call i64 @llvm.x86.avx512.cvttsd2usi64(<2 x double> %a0, i32 4) ;
%res1 = call i64 @llvm.x86.avx512.cvttsd2usi64(<2 x double> %a0, i32 8) ;
%res2 = add i64 %res0, %res1
ret i64 %res2
}
declare i64 @llvm.x86.avx512.cvttsd2usi64(<2 x double>, i32) nounwind readnone
define i64 @test_x86_sse_cvtss2si64(<4 x float> %a0) {
; CHECK-LABEL: test_x86_sse_cvtss2si64:
; CHECK: ## %bb.0:
; CHECK-NEXT: vcvtss2si %xmm0, %rax
; CHECK-NEXT: retq
%res = call i64 @llvm.x86.sse.cvtss2si64(<4 x float> %a0) ; <i64> [#uses=1]
ret i64 %res
}
declare i64 @llvm.x86.sse.cvtss2si64(<4 x float>) nounwind readnone
define <4 x float> @test_x86_sse_cvtsi642ss(<4 x float> %a0, i64 %a1) {
; CHECK-LABEL: test_x86_sse_cvtsi642ss:
; CHECK: ## %bb.0:
; CHECK-NEXT: vcvtsi2ssq %rdi, %xmm0, %xmm0
; CHECK-NEXT: retq
%res = call <4 x float> @llvm.x86.sse.cvtsi642ss(<4 x float> %a0, i64 %a1) ; <<4 x float>> [#uses=1]
ret <4 x float> %res
}
declare <4 x float> @llvm.x86.sse.cvtsi642ss(<4 x float>, i64) nounwind readnone
define i32 @test_x86_avx512_cvttss2si(<4 x float> %a0) {
; CHECK-LABEL: test_x86_avx512_cvttss2si:
; CHECK: ## %bb.0:
; CHECK-NEXT: vcvttss2si {sae}, %xmm0, %ecx
; CHECK-NEXT: vcvttss2si %xmm0, %eax
; CHECK-NEXT: addl %ecx, %eax
; CHECK-NEXT: retq
%res0 = call i32 @llvm.x86.avx512.cvttss2si(<4 x float> %a0, i32 8) ;
%res1 = call i32 @llvm.x86.avx512.cvttss2si(<4 x float> %a0, i32 4) ;
%res2 = add i32 %res0, %res1
ret i32 %res2
}
declare i32 @llvm.x86.avx512.cvttss2si(<4 x float>, i32) nounwind readnone
define i32 @test_x86_avx512_cvttss2si_load(<4 x float>* %a0) {
; CHECK-LABEL: test_x86_avx512_cvttss2si_load:
; CHECK: ## %bb.0:
; CHECK-NEXT: vcvttss2si (%rdi), %eax
; CHECK-NEXT: retq
%a1 = load <4 x float>, <4 x float>* %a0
%res = call i32 @llvm.x86.avx512.cvttss2si(<4 x float> %a1, i32 4) ;
ret i32 %res
}
define i64 @test_x86_avx512_cvttss2si64(<4 x float> %a0) {
; CHECK-LABEL: test_x86_avx512_cvttss2si64:
; CHECK: ## %bb.0:
; CHECK-NEXT: vcvttss2si %xmm0, %rcx
; CHECK-NEXT: vcvttss2si {sae}, %xmm0, %rax
; CHECK-NEXT: addq %rcx, %rax
; CHECK-NEXT: retq
%res0 = call i64 @llvm.x86.avx512.cvttss2si64(<4 x float> %a0, i32 4) ;
%res1 = call i64 @llvm.x86.avx512.cvttss2si64(<4 x float> %a0, i32 8) ;
%res2 = add i64 %res0, %res1
ret i64 %res2
}
declare i64 @llvm.x86.avx512.cvttss2si64(<4 x float>, i32) nounwind readnone
define i32 @test_x86_avx512_cvttss2usi(<4 x float> %a0) {
; CHECK-LABEL: test_x86_avx512_cvttss2usi:
; CHECK: ## %bb.0:
; CHECK-NEXT: vcvttss2usi {sae}, %xmm0, %ecx
; CHECK-NEXT: vcvttss2usi %xmm0, %eax
; CHECK-NEXT: addl %ecx, %eax
; CHECK-NEXT: retq
%res0 = call i32 @llvm.x86.avx512.cvttss2usi(<4 x float> %a0, i32 8) ;
%res1 = call i32 @llvm.x86.avx512.cvttss2usi(<4 x float> %a0, i32 4) ;
%res2 = add i32 %res0, %res1
ret i32 %res2
}
declare i32 @llvm.x86.avx512.cvttss2usi(<4 x float>, i32) nounwind readnone
define i64 @test_x86_avx512_cvttss2usi64(<4 x float> %a0) {
; CHECK-LABEL: test_x86_avx512_cvttss2usi64:
; CHECK: ## %bb.0:
; CHECK-NEXT: vcvttss2usi %xmm0, %rcx
; CHECK-NEXT: vcvttss2usi {sae}, %xmm0, %rax
; CHECK-NEXT: addq %rcx, %rax
; CHECK-NEXT: retq
%res0 = call i64 @llvm.x86.avx512.cvttss2usi64(<4 x float> %a0, i32 4) ;
%res1 = call i64 @llvm.x86.avx512.cvttss2usi64(<4 x float> %a0, i32 8) ;
%res2 = add i64 %res0, %res1
ret i64 %res2
}
declare i64 @llvm.x86.avx512.cvttss2usi64(<4 x float>, i32) nounwind readnone
define i64 @test_x86_avx512_cvtsd2usi64(<2 x double> %a0) {
; CHECK-LABEL: test_x86_avx512_cvtsd2usi64:
; CHECK: ## %bb.0:
; CHECK-NEXT: vcvtsd2usi %xmm0, %rax
; CHECK-NEXT: vcvtsd2usi {rz-sae}, %xmm0, %rcx
; CHECK-NEXT: addq %rax, %rcx
; CHECK-NEXT: vcvtsd2usi {rd-sae}, %xmm0, %rax
; CHECK-NEXT: addq %rcx, %rax
; CHECK-NEXT: retq
%res = call i64 @llvm.x86.avx512.vcvtsd2usi64(<2 x double> %a0, i32 4)
%res1 = call i64 @llvm.x86.avx512.vcvtsd2usi64(<2 x double> %a0, i32 3)
%res2 = call i64 @llvm.x86.avx512.vcvtsd2usi64(<2 x double> %a0, i32 1)
%res3 = add i64 %res, %res1
%res4 = add i64 %res3, %res2
ret i64 %res4
}
declare i64 @llvm.x86.avx512.vcvtsd2usi64(<2 x double>, i32) nounwind readnone
define i64 @test_x86_avx512_cvtsd2si64(<2 x double> %a0) {
; CHECK-LABEL: test_x86_avx512_cvtsd2si64:
; CHECK: ## %bb.0:
; CHECK-NEXT: vcvtsd2si %xmm0, %rax
; CHECK-NEXT: vcvtsd2si {rz-sae}, %xmm0, %rcx
; CHECK-NEXT: addq %rax, %rcx
; CHECK-NEXT: vcvtsd2si {rd-sae}, %xmm0, %rax
; CHECK-NEXT: addq %rcx, %rax
; CHECK-NEXT: retq
%res = call i64 @llvm.x86.avx512.vcvtsd2si64(<2 x double> %a0, i32 4)
%res1 = call i64 @llvm.x86.avx512.vcvtsd2si64(<2 x double> %a0, i32 3)
%res2 = call i64 @llvm.x86.avx512.vcvtsd2si64(<2 x double> %a0, i32 1)
%res3 = add i64 %res, %res1
%res4 = add i64 %res3, %res2
ret i64 %res4
}
declare i64 @llvm.x86.avx512.vcvtsd2si64(<2 x double>, i32) nounwind readnone
define i64 @test_x86_avx512_cvtss2usi64(<4 x float> %a0) {
; CHECK-LABEL: test_x86_avx512_cvtss2usi64:
; CHECK: ## %bb.0:
; CHECK-NEXT: vcvtss2usi %xmm0, %rax
; CHECK-NEXT: vcvtss2usi {rz-sae}, %xmm0, %rcx
; CHECK-NEXT: addq %rax, %rcx
; CHECK-NEXT: vcvtss2usi {rd-sae}, %xmm0, %rax
; CHECK-NEXT: addq %rcx, %rax
; CHECK-NEXT: retq
%res = call i64 @llvm.x86.avx512.vcvtss2usi64(<4 x float> %a0, i32 4)
%res1 = call i64 @llvm.x86.avx512.vcvtss2usi64(<4 x float> %a0, i32 3)
%res2 = call i64 @llvm.x86.avx512.vcvtss2usi64(<4 x float> %a0, i32 1)
%res3 = add i64 %res, %res1
%res4 = add i64 %res3, %res2
ret i64 %res4
}
declare i64 @llvm.x86.avx512.vcvtss2usi64(<4 x float>, i32) nounwind readnone
define i64 @test_x86_avx512_cvtss2si64(<4 x float> %a0) {
; CHECK-LABEL: test_x86_avx512_cvtss2si64:
; CHECK: ## %bb.0:
; CHECK-NEXT: vcvtss2si %xmm0, %rax
; CHECK-NEXT: vcvtss2si {rz-sae}, %xmm0, %rcx
; CHECK-NEXT: addq %rax, %rcx
; CHECK-NEXT: vcvtss2si {rd-sae}, %xmm0, %rax
; CHECK-NEXT: addq %rcx, %rax
; CHECK-NEXT: retq
%res = call i64 @llvm.x86.avx512.vcvtss2si64(<4 x float> %a0, i32 4)
%res1 = call i64 @llvm.x86.avx512.vcvtss2si64(<4 x float> %a0, i32 3)
%res2 = call i64 @llvm.x86.avx512.vcvtss2si64(<4 x float> %a0, i32 1)
%res3 = add i64 %res, %res1
%res4 = add i64 %res3, %res2
ret i64 %res4
}
declare i64 @llvm.x86.avx512.vcvtss2si64(<4 x float>, i32) nounwind readnone
define i32 @test_x86_avx512_cvtsd2usi32(<2 x double> %a0) {
; CHECK-LABEL: test_x86_avx512_cvtsd2usi32:
; CHECK: ## %bb.0:
; CHECK-NEXT: vcvtsd2usi %xmm0, %eax
; CHECK-NEXT: vcvtsd2usi {rz-sae}, %xmm0, %ecx
; CHECK-NEXT: addl %eax, %ecx
; CHECK-NEXT: vcvtsd2usi {rd-sae}, %xmm0, %eax
; CHECK-NEXT: addl %ecx, %eax
; CHECK-NEXT: retq
%res = call i32 @llvm.x86.avx512.vcvtsd2usi32(<2 x double> %a0, i32 4)
%res1 = call i32 @llvm.x86.avx512.vcvtsd2usi32(<2 x double> %a0, i32 3)
%res2 = call i32 @llvm.x86.avx512.vcvtsd2usi32(<2 x double> %a0, i32 1)
%res3 = add i32 %res, %res1
%res4 = add i32 %res3, %res2
ret i32 %res4
}
declare i32 @llvm.x86.avx512.vcvtsd2usi32(<2 x double>, i32) nounwind readnone
define i32 @test_x86_avx512_cvtsd2si32(<2 x double> %a0) {
; CHECK-LABEL: test_x86_avx512_cvtsd2si32:
; CHECK: ## %bb.0:
; CHECK-NEXT: vcvtsd2si %xmm0, %eax
; CHECK-NEXT: vcvtsd2si {rz-sae}, %xmm0, %ecx
; CHECK-NEXT: addl %eax, %ecx
; CHECK-NEXT: vcvtsd2si {rd-sae}, %xmm0, %eax
; CHECK-NEXT: addl %ecx, %eax
; CHECK-NEXT: retq
%res = call i32 @llvm.x86.avx512.vcvtsd2si32(<2 x double> %a0, i32 4)
%res1 = call i32 @llvm.x86.avx512.vcvtsd2si32(<2 x double> %a0, i32 3)
%res2 = call i32 @llvm.x86.avx512.vcvtsd2si32(<2 x double> %a0, i32 1)
%res3 = add i32 %res, %res1
%res4 = add i32 %res3, %res2
ret i32 %res4
}
declare i32 @llvm.x86.avx512.vcvtsd2si32(<2 x double>, i32) nounwind readnone
define i32 @test_x86_avx512_cvtss2usi32(<4 x float> %a0) {
; CHECK-LABEL: test_x86_avx512_cvtss2usi32:
; CHECK: ## %bb.0:
; CHECK-NEXT: vcvtss2usi %xmm0, %eax
; CHECK-NEXT: vcvtss2usi {rz-sae}, %xmm0, %ecx
; CHECK-NEXT: addl %eax, %ecx
; CHECK-NEXT: vcvtss2usi {rd-sae}, %xmm0, %eax
; CHECK-NEXT: addl %ecx, %eax
; CHECK-NEXT: retq
%res = call i32 @llvm.x86.avx512.vcvtss2usi32(<4 x float> %a0, i32 4)
%res1 = call i32 @llvm.x86.avx512.vcvtss2usi32(<4 x float> %a0, i32 3)
%res2 = call i32 @llvm.x86.avx512.vcvtss2usi32(<4 x float> %a0, i32 1)
%res3 = add i32 %res, %res1
%res4 = add i32 %res3, %res2
ret i32 %res4
}
declare i32 @llvm.x86.avx512.vcvtss2usi32(<4 x float>, i32) nounwind readnone
define i32 @test_x86_avx512_cvtss2si32(<4 x float> %a0) {
; CHECK-LABEL: test_x86_avx512_cvtss2si32:
; CHECK: ## %bb.0:
; CHECK-NEXT: vcvtss2si %xmm0, %eax
; CHECK-NEXT: vcvtss2si {rz-sae}, %xmm0, %ecx
; CHECK-NEXT: addl %eax, %ecx
; CHECK-NEXT: vcvtss2si {rd-sae}, %xmm0, %eax
; CHECK-NEXT: addl %ecx, %eax
; CHECK-NEXT: retq
%res = call i32 @llvm.x86.avx512.vcvtss2si32(<4 x float> %a0, i32 4)
%res1 = call i32 @llvm.x86.avx512.vcvtss2si32(<4 x float> %a0, i32 3)
%res2 = call i32 @llvm.x86.avx512.vcvtss2si32(<4 x float> %a0, i32 1)
%res3 = add i32 %res, %res1
%res4 = add i32 %res3, %res2
ret i32 %res4
}
declare i32 @llvm.x86.avx512.vcvtss2si32(<4 x float>, i32) nounwind readnone
define <16 x float> @test_x86_vcvtph2ps_512(<16 x i16> %a0) {
; CHECK-LABEL: test_x86_vcvtph2ps_512:
; CHECK: ## %bb.0:
; CHECK-NEXT: vcvtph2ps %ymm0, %zmm0
; CHECK-NEXT: retq
%res = call <16 x float> @llvm.x86.avx512.mask.vcvtph2ps.512(<16 x i16> %a0, <16 x float> zeroinitializer, i16 -1, i32 4)
ret <16 x float> %res
}
define <16 x float> @test_x86_vcvtph2ps_512_sae(<16 x i16> %a0) {
; CHECK-LABEL: test_x86_vcvtph2ps_512_sae:
; CHECK: ## %bb.0:
; CHECK-NEXT: vcvtph2ps {sae}, %ymm0, %zmm0
; CHECK-NEXT: retq
%res = call <16 x float> @llvm.x86.avx512.mask.vcvtph2ps.512(<16 x i16> %a0, <16 x float> zeroinitializer, i16 -1, i32 8)
ret <16 x float> %res
}
define <16 x float> @test_x86_vcvtph2ps_512_rrk(<16 x i16> %a0,<16 x float> %a1, i16 %mask) {
; CHECK-LABEL: test_x86_vcvtph2ps_512_rrk:
; CHECK: ## %bb.0:
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vcvtph2ps %ymm0, %zmm1 {%k1}
; CHECK-NEXT: vmovaps %zmm1, %zmm0
; CHECK-NEXT: retq
%res = call <16 x float> @llvm.x86.avx512.mask.vcvtph2ps.512(<16 x i16> %a0, <16 x float> %a1, i16 %mask, i32 4)
ret <16 x float> %res
}
define <16 x float> @test_x86_vcvtph2ps_512_sae_rrkz(<16 x i16> %a0, i16 %mask) {
; CHECK-LABEL: test_x86_vcvtph2ps_512_sae_rrkz:
; CHECK: ## %bb.0:
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vcvtph2ps {sae}, %ymm0, %zmm0 {%k1} {z}
; CHECK-NEXT: retq
%res = call <16 x float> @llvm.x86.avx512.mask.vcvtph2ps.512(<16 x i16> %a0, <16 x float> zeroinitializer, i16 %mask, i32 8)
ret <16 x float> %res
}
define <16 x float> @test_x86_vcvtph2ps_512_rrkz(<16 x i16> %a0, i16 %mask) {
; CHECK-LABEL: test_x86_vcvtph2ps_512_rrkz:
; CHECK: ## %bb.0:
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vcvtph2ps %ymm0, %zmm0 {%k1} {z}
; CHECK-NEXT: retq
%res = call <16 x float> @llvm.x86.avx512.mask.vcvtph2ps.512(<16 x i16> %a0, <16 x float> zeroinitializer, i16 %mask, i32 4)
ret <16 x float> %res
}
declare <16 x float> @llvm.x86.avx512.mask.vcvtph2ps.512(<16 x i16>, <16 x float>, i16, i32) nounwind readonly
define <16 x i16> @test_x86_vcvtps2ph_256(<16 x float> %a0, <16 x i16> %src, i16 %mask, <16 x i16> * %dst) {
; CHECK-LABEL: test_x86_vcvtps2ph_256:
; CHECK: ## %bb.0:
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vcvtps2ph $2, %zmm0, %ymm1 {%k1}
; CHECK-NEXT: vcvtps2ph $2, %zmm0, %ymm2 {%k1} {z}
; CHECK-NEXT: vpaddw %ymm1, %ymm2, %ymm1
; CHECK-NEXT: vcvtps2ph $2, %zmm0, (%rsi)
; CHECK-NEXT: vmovdqa %ymm1, %ymm0
; CHECK-NEXT: retq
%res1 = call <16 x i16> @llvm.x86.avx512.mask.vcvtps2ph.512(<16 x float> %a0, i32 2, <16 x i16> zeroinitializer, i16 -1)
%res2 = call <16 x i16> @llvm.x86.avx512.mask.vcvtps2ph.512(<16 x float> %a0, i32 2, <16 x i16> zeroinitializer, i16 %mask)
%res3 = call <16 x i16> @llvm.x86.avx512.mask.vcvtps2ph.512(<16 x float> %a0, i32 2, <16 x i16> %src, i16 %mask)
store <16 x i16> %res1, <16 x i16> * %dst
%res = add <16 x i16> %res2, %res3
ret <16 x i16> %res
}
declare <16 x i16> @llvm.x86.avx512.mask.vcvtps2ph.512(<16 x float>, i32, <16 x i16>, i16) nounwind readonly
define i16 @test_cmpps(<16 x float> %a, <16 x float> %b) {
; CHECK-LABEL: test_cmpps:
; CHECK: ## %bb.0:
; CHECK-NEXT: vcmpleps {sae}, %zmm1, %zmm0, %k0
; CHECK-NEXT: kmovw %k0, %eax
; CHECK-NEXT: ## kill: def $ax killed $ax killed $eax
; CHECK-NEXT: vzeroupper
; CHECK-NEXT: retq
%res = call <16 x i1> @llvm.x86.avx512.cmp.ps.512(<16 x float> %a, <16 x float> %b, i32 2, i32 8)
%1 = bitcast <16 x i1> %res to i16
ret i16 %1
}
declare <16 x i1> @llvm.x86.avx512.cmp.ps.512(<16 x float>, <16 x float>, i32, i32)
define i8 @test_cmppd(<8 x double> %a, <8 x double> %b) {
; CHECK-LABEL: test_cmppd:
; CHECK: ## %bb.0:
; CHECK-NEXT: vcmpneqpd %zmm1, %zmm0, %k0
; CHECK-NEXT: kmovw %k0, %eax
; CHECK-NEXT: ## kill: def $al killed $al killed $eax
; CHECK-NEXT: vzeroupper
; CHECK-NEXT: retq
%res = call <8 x i1> @llvm.x86.avx512.cmp.pd.512(<8 x double> %a, <8 x double> %b, i32 4, i32 4)
%1 = bitcast <8 x i1> %res to i8
ret i8 %1
}
declare <8 x i1> @llvm.x86.avx512.cmp.pd.512(<8 x double>, <8 x double>, i32, i32)
; Function Attrs: nounwind readnone
; fp min - max
define <8 x double> @test_vmaxpd(<8 x double> %a0, <8 x double> %a1) {
; CHECK-LABEL: test_vmaxpd:
; CHECK: ## %bb.0:
; CHECK-NEXT: vmaxpd %zmm1, %zmm0, %zmm0
; CHECK-NEXT: retq
%1 = call <8 x double> @llvm.x86.avx512.max.pd.512(<8 x double> %a0, <8 x double> %a1, i32 4)
ret <8 x double> %1
}
declare <8 x double> @llvm.x86.avx512.max.pd.512(<8 x double>, <8 x double>, i32)
define <8 x double> @test_vminpd(<8 x double> %a0, <8 x double> %a1) {
; CHECK-LABEL: test_vminpd:
; CHECK: ## %bb.0:
; CHECK-NEXT: vminpd %zmm1, %zmm0, %zmm0
; CHECK-NEXT: retq
%1 = call <8 x double> @llvm.x86.avx512.min.pd.512(<8 x double> %a0, <8 x double> %a1, i32 4)
ret <8 x double> %1
}
declare <8 x double> @llvm.x86.avx512.min.pd.512(<8 x double>, <8 x double>, i32)
define void @test_mask_store_ss(i8* %ptr, <4 x float> %data, i8 %mask) {
; CHECK-LABEL: test_mask_store_ss:
; CHECK: ## %bb.0:
; CHECK-NEXT: kmovw %esi, %k1
; CHECK-NEXT: vmovss %xmm0, (%rdi) {%k1}
; CHECK-NEXT: retq
%1 = and i8 %mask, 1
%2 = bitcast i8* %ptr to <4 x float>*
%3 = bitcast i8 %1 to <8 x i1>
%extract = shufflevector <8 x i1> %3, <8 x i1> %3, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
call void @llvm.masked.store.v4f32.p0v4f32(<4 x float> %data, <4 x float>* %2, i32 1, <4 x i1> %extract)
ret void
}
declare void @llvm.masked.store.v4f32.p0v4f32(<4 x float>, <4 x float>*, i32, <4 x i1>) #1
declare <16 x float> @llvm.x86.avx512.sub.ps.512(<16 x float>, <16 x float>, i32)
declare <16 x float> @llvm.x86.avx512.mul.ps.512(<16 x float>, <16 x float>, i32)
declare <8 x double> @llvm.x86.avx512.mul.pd.512(<8 x double>, <8 x double>, i32)
define <16 x float> @test_vsubps_rn(<16 x float> %a0, <16 x float> %a1) {
; CHECK-LABEL: test_vsubps_rn:
; CHECK: ## %bb.0:
; CHECK-NEXT: vsubps {rn-sae}, %zmm1, %zmm0, %zmm0
; CHECK-NEXT: retq
%1 = call <16 x float> @llvm.x86.avx512.sub.ps.512(<16 x float> %a0, <16 x float> %a1, i32 0)
ret <16 x float> %1
}
define <16 x float> @test_vsubps_rd(<16 x float> %a0, <16 x float> %a1) {
; CHECK-LABEL: test_vsubps_rd:
; CHECK: ## %bb.0:
; CHECK-NEXT: vsubps {rd-sae}, %zmm1, %zmm0, %zmm0
; CHECK-NEXT: retq
%1 = call <16 x float> @llvm.x86.avx512.sub.ps.512(<16 x float> %a0, <16 x float> %a1, i32 1)
ret <16 x float> %1
}
define <16 x float> @test_vsubps_ru(<16 x float> %a0, <16 x float> %a1) {
; CHECK-LABEL: test_vsubps_ru:
; CHECK: ## %bb.0:
; CHECK-NEXT: vsubps {ru-sae}, %zmm1, %zmm0, %zmm0
; CHECK-NEXT: retq
%1 = call <16 x float> @llvm.x86.avx512.sub.ps.512(<16 x float> %a0, <16 x float> %a1, i32 2)
ret <16 x float> %1
}
define <16 x float> @test_vsubps_rz(<16 x float> %a0, <16 x float> %a1) {
; CHECK-LABEL: test_vsubps_rz:
; CHECK: ## %bb.0:
; CHECK-NEXT: vsubps {rz-sae}, %zmm1, %zmm0, %zmm0
; CHECK-NEXT: retq
%1 = call <16 x float> @llvm.x86.avx512.sub.ps.512(<16 x float> %a0, <16 x float> %a1, i32 3)
ret <16 x float> %1
}
define <16 x float> @test_vmulps_rn(<16 x float> %a0, <16 x float> %a1) {
; CHECK-LABEL: test_vmulps_rn:
; CHECK: ## %bb.0:
; CHECK-NEXT: vmulps {rn-sae}, %zmm1, %zmm0, %zmm0
; CHECK-NEXT: retq
%1 = call <16 x float> @llvm.x86.avx512.mul.ps.512(<16 x float> %a0, <16 x float> %a1, i32 0)
ret <16 x float> %1
}
define <16 x float> @test_vmulps_rd(<16 x float> %a0, <16 x float> %a1) {
; CHECK-LABEL: test_vmulps_rd:
; CHECK: ## %bb.0:
; CHECK-NEXT: vmulps {rd-sae}, %zmm1, %zmm0, %zmm0
; CHECK-NEXT: retq
%1 = call <16 x float> @llvm.x86.avx512.mul.ps.512(<16 x float> %a0, <16 x float> %a1, i32 1)
ret <16 x float> %1
}
define <16 x float> @test_vmulps_ru(<16 x float> %a0, <16 x float> %a1) {
; CHECK-LABEL: test_vmulps_ru:
; CHECK: ## %bb.0:
; CHECK-NEXT: vmulps {ru-sae}, %zmm1, %zmm0, %zmm0
; CHECK-NEXT: retq
%1 = call <16 x float> @llvm.x86.avx512.mul.ps.512(<16 x float> %a0, <16 x float> %a1, i32 2)
ret <16 x float> %1
}
define <16 x float> @test_vmulps_rz(<16 x float> %a0, <16 x float> %a1) {
; CHECK-LABEL: test_vmulps_rz:
; CHECK: ## %bb.0:
; CHECK-NEXT: vmulps {rz-sae}, %zmm1, %zmm0, %zmm0
; CHECK-NEXT: retq
%1 = call <16 x float> @llvm.x86.avx512.mul.ps.512(<16 x float> %a0, <16 x float> %a1, i32 3)
ret <16 x float> %1
}
;; mask float
define <16 x float> @test_vmulps_mask_rn(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
; CHECK-LABEL: test_vmulps_mask_rn:
; CHECK: ## %bb.0:
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vmulps {rn-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z}
; CHECK-NEXT: retq
%1 = call <16 x float> @llvm.x86.avx512.mul.ps.512(<16 x float> %a0, <16 x float> %a1, i32 0)
%2 = bitcast i16 %mask to <16 x i1>
%3 = select <16 x i1> %2, <16 x float> %1, <16 x float> zeroinitializer
ret <16 x float> %3
}
define <16 x float> @test_vmulps_mask_rd(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
; CHECK-LABEL: test_vmulps_mask_rd:
; CHECK: ## %bb.0:
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vmulps {rd-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z}
; CHECK-NEXT: retq
%1 = call <16 x float> @llvm.x86.avx512.mul.ps.512(<16 x float> %a0, <16 x float> %a1, i32 1)
%2 = bitcast i16 %mask to <16 x i1>
%3 = select <16 x i1> %2, <16 x float> %1, <16 x float> zeroinitializer
ret <16 x float> %3
}
define <16 x float> @test_vmulps_mask_ru(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
; CHECK-LABEL: test_vmulps_mask_ru:
; CHECK: ## %bb.0:
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vmulps {ru-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z}
; CHECK-NEXT: retq
%1 = call <16 x float> @llvm.x86.avx512.mul.ps.512(<16 x float> %a0, <16 x float> %a1, i32 2)
%2 = bitcast i16 %mask to <16 x i1>
%3 = select <16 x i1> %2, <16 x float> %1, <16 x float> zeroinitializer
ret <16 x float> %3
}
define <16 x float> @test_vmulps_mask_rz(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
; CHECK-LABEL: test_vmulps_mask_rz:
; CHECK: ## %bb.0:
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vmulps {rz-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z}
; CHECK-NEXT: retq
%1 = call <16 x float> @llvm.x86.avx512.mul.ps.512(<16 x float> %a0, <16 x float> %a1, i32 3)
%2 = bitcast i16 %mask to <16 x i1>
%3 = select <16 x i1> %2, <16 x float> %1, <16 x float> zeroinitializer
ret <16 x float> %3
}
;; With Passthru value
define <16 x float> @test_vmulps_mask_passthru_rn(<16 x float> %a0, <16 x float> %a1, <16 x float> %passthru, i16 %mask) {
; CHECK-LABEL: test_vmulps_mask_passthru_rn:
; CHECK: ## %bb.0:
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vmulps {rn-sae}, %zmm1, %zmm0, %zmm2 {%k1}
; CHECK-NEXT: vmovaps %zmm2, %zmm0
; CHECK-NEXT: retq
%1 = call <16 x float> @llvm.x86.avx512.mul.ps.512(<16 x float> %a0, <16 x float> %a1, i32 0)
%2 = bitcast i16 %mask to <16 x i1>
%3 = select <16 x i1> %2, <16 x float> %1, <16 x float> %passthru
ret <16 x float> %3
}
define <16 x float> @test_vmulps_mask_passthru_rd(<16 x float> %a0, <16 x float> %a1, <16 x float> %passthru, i16 %mask) {
; CHECK-LABEL: test_vmulps_mask_passthru_rd:
; CHECK: ## %bb.0:
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vmulps {rd-sae}, %zmm1, %zmm0, %zmm2 {%k1}
; CHECK-NEXT: vmovaps %zmm2, %zmm0
; CHECK-NEXT: retq
%1 = call <16 x float> @llvm.x86.avx512.mul.ps.512(<16 x float> %a0, <16 x float> %a1, i32 1)
%2 = bitcast i16 %mask to <16 x i1>
%3 = select <16 x i1> %2, <16 x float> %1, <16 x float> %passthru
ret <16 x float> %3
}
define <16 x float> @test_vmulps_mask_passthru_ru(<16 x float> %a0, <16 x float> %a1, <16 x float> %passthru, i16 %mask) {
; CHECK-LABEL: test_vmulps_mask_passthru_ru:
; CHECK: ## %bb.0:
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vmulps {ru-sae}, %zmm1, %zmm0, %zmm2 {%k1}
; CHECK-NEXT: vmovaps %zmm2, %zmm0
; CHECK-NEXT: retq
%1 = call <16 x float> @llvm.x86.avx512.mul.ps.512(<16 x float> %a0, <16 x float> %a1, i32 2)
%2 = bitcast i16 %mask to <16 x i1>
%3 = select <16 x i1> %2, <16 x float> %1, <16 x float> %passthru
ret <16 x float> %3
}
define <16 x float> @test_vmulps_mask_passthru_rz(<16 x float> %a0, <16 x float> %a1, <16 x float> %passthru, i16 %mask) {
; CHECK-LABEL: test_vmulps_mask_passthru_rz:
; CHECK: ## %bb.0:
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vmulps {rz-sae}, %zmm1, %zmm0, %zmm2 {%k1}
; CHECK-NEXT: vmovaps %zmm2, %zmm0
; CHECK-NEXT: retq
%1 = call <16 x float> @llvm.x86.avx512.mul.ps.512(<16 x float> %a0, <16 x float> %a1, i32 3)
%2 = bitcast i16 %mask to <16 x i1>
%3 = select <16 x i1> %2, <16 x float> %1, <16 x float> %passthru
ret <16 x float> %3
}
;; mask double
define <8 x double> @test_vmulpd_mask_rn(<8 x double> %a0, <8 x double> %a1, i8 %mask) {
; CHECK-LABEL: test_vmulpd_mask_rn:
; CHECK: ## %bb.0:
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vmulpd {rn-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z}
; CHECK-NEXT: retq
%1 = call <8 x double> @llvm.x86.avx512.mul.pd.512(<8 x double> %a0, <8 x double> %a1, i32 0)
%2 = bitcast i8 %mask to <8 x i1>
%3 = select <8 x i1> %2, <8 x double> %1, <8 x double> zeroinitializer
ret <8 x double> %3
}
define <8 x double> @test_vmulpd_mask_rd(<8 x double> %a0, <8 x double> %a1, i8 %mask) {
; CHECK-LABEL: test_vmulpd_mask_rd:
; CHECK: ## %bb.0:
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vmulpd {rd-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z}
; CHECK-NEXT: retq
%1 = call <8 x double> @llvm.x86.avx512.mul.pd.512(<8 x double> %a0, <8 x double> %a1, i32 1)
%2 = bitcast i8 %mask to <8 x i1>
%3 = select <8 x i1> %2, <8 x double> %1, <8 x double> zeroinitializer
ret <8 x double> %3
}
define <8 x double> @test_vmulpd_mask_ru(<8 x double> %a0, <8 x double> %a1, i8 %mask) {
; CHECK-LABEL: test_vmulpd_mask_ru:
; CHECK: ## %bb.0:
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vmulpd {ru-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z}
; CHECK-NEXT: retq
%1 = call <8 x double> @llvm.x86.avx512.mul.pd.512(<8 x double> %a0, <8 x double> %a1, i32 2)
%2 = bitcast i8 %mask to <8 x i1>
%3 = select <8 x i1> %2, <8 x double> %1, <8 x double> zeroinitializer
ret <8 x double> %3
}
define <8 x double> @test_vmulpd_mask_rz(<8 x double> %a0, <8 x double> %a1, i8 %mask) {
; CHECK-LABEL: test_vmulpd_mask_rz:
; CHECK: ## %bb.0:
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vmulpd {rz-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z}
; CHECK-NEXT: retq
%1 = call <8 x double> @llvm.x86.avx512.mul.pd.512(<8 x double> %a0, <8 x double> %a1, i32 3)
%2 = bitcast i8 %mask to <8 x i1>
%3 = select <8 x i1> %2, <8 x double> %1, <8 x double> zeroinitializer
ret <8 x double> %3
}
define <16 x float> @test_mm512_maskz_add_round_ps_rn_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
; CHECK-LABEL: test_mm512_maskz_add_round_ps_rn_sae:
; CHECK: ## %bb.0:
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vaddps {rn-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z}
; CHECK-NEXT: retq
%1 = call <16 x float> @llvm.x86.avx512.add.ps.512(<16 x float> %a0, <16 x float> %a1, i32 0)
%2 = bitcast i16 %mask to <16 x i1>
%3 = select <16 x i1> %2, <16 x float> %1, <16 x float> zeroinitializer
ret <16 x float> %3
}
define <16 x float> @test_mm512_maskz_add_round_ps_rd_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
; CHECK-LABEL: test_mm512_maskz_add_round_ps_rd_sae:
; CHECK: ## %bb.0:
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vaddps {rd-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z}
; CHECK-NEXT: retq
%1 = call <16 x float> @llvm.x86.avx512.add.ps.512(<16 x float> %a0, <16 x float> %a1, i32 1)
%2 = bitcast i16 %mask to <16 x i1>
%3 = select <16 x i1> %2, <16 x float> %1, <16 x float> zeroinitializer
ret <16 x float> %3
}
define <16 x float> @test_mm512_maskz_add_round_ps_ru_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
; CHECK-LABEL: test_mm512_maskz_add_round_ps_ru_sae:
; CHECK: ## %bb.0:
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vaddps {ru-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z}
; CHECK-NEXT: retq
%1 = call <16 x float> @llvm.x86.avx512.add.ps.512(<16 x float> %a0, <16 x float> %a1, i32 2)
%2 = bitcast i16 %mask to <16 x i1>
%3 = select <16 x i1> %2, <16 x float> %1, <16 x float> zeroinitializer
ret <16 x float> %3
}
define <16 x float> @test_mm512_maskz_add_round_ps_rz_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
; CHECK-LABEL: test_mm512_maskz_add_round_ps_rz_sae:
; CHECK: ## %bb.0:
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vaddps {rz-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z}
; CHECK-NEXT: retq
%1 = call <16 x float> @llvm.x86.avx512.add.ps.512(<16 x float> %a0, <16 x float> %a1, i32 3)
%2 = bitcast i16 %mask to <16 x i1>
%3 = select <16 x i1> %2, <16 x float> %1, <16 x float> zeroinitializer
ret <16 x float> %3
}
define <16 x float> @test_mm512_maskz_add_round_ps_current(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
; CHECK-LABEL: test_mm512_maskz_add_round_ps_current:
; CHECK: ## %bb.0:
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vaddps %zmm1, %zmm0, %zmm0 {%k1} {z}
; CHECK-NEXT: retq
%1 = call <16 x float> @llvm.x86.avx512.add.ps.512(<16 x float> %a0, <16 x float> %a1, i32 4)
%2 = bitcast i16 %mask to <16 x i1>
%3 = select <16 x i1> %2, <16 x float> %1, <16 x float> zeroinitializer
ret <16 x float> %3
}
define <16 x float> @test_mm512_mask_add_round_ps_rn_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
; CHECK-LABEL: test_mm512_mask_add_round_ps_rn_sae:
; CHECK: ## %bb.0:
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vaddps {rn-sae}, %zmm1, %zmm0, %zmm2 {%k1}
; CHECK-NEXT: vmovaps %zmm2, %zmm0
; CHECK-NEXT: retq
%1 = call <16 x float> @llvm.x86.avx512.add.ps.512(<16 x float> %a0, <16 x float> %a1, i32 0)
%2 = bitcast i16 %mask to <16 x i1>
%3 = select <16 x i1> %2, <16 x float> %1, <16 x float> %src
ret <16 x float> %3
}
define <16 x float> @test_mm512_mask_add_round_ps_rd_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
; CHECK-LABEL: test_mm512_mask_add_round_ps_rd_sae:
; CHECK: ## %bb.0:
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vaddps {rd-sae}, %zmm1, %zmm0, %zmm2 {%k1}
; CHECK-NEXT: vmovaps %zmm2, %zmm0
; CHECK-NEXT: retq
%1 = call <16 x float> @llvm.x86.avx512.add.ps.512(<16 x float> %a0, <16 x float> %a1, i32 1)
%2 = bitcast i16 %mask to <16 x i1>
%3 = select <16 x i1> %2, <16 x float> %1, <16 x float> %src
ret <16 x float> %3
}
define <16 x float> @test_mm512_mask_add_round_ps_ru_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
; CHECK-LABEL: test_mm512_mask_add_round_ps_ru_sae:
; CHECK: ## %bb.0:
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vaddps {ru-sae}, %zmm1, %zmm0, %zmm2 {%k1}
; CHECK-NEXT: vmovaps %zmm2, %zmm0
; CHECK-NEXT: retq
%1 = call <16 x float> @llvm.x86.avx512.add.ps.512(<16 x float> %a0, <16 x float> %a1, i32 2)
%2 = bitcast i16 %mask to <16 x i1>
%3 = select <16 x i1> %2, <16 x float> %1, <16 x float> %src
ret <16 x float> %3
}
define <16 x float> @test_mm512_mask_add_round_ps_rz_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
; CHECK-LABEL: test_mm512_mask_add_round_ps_rz_sae:
; CHECK: ## %bb.0:
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vaddps {rz-sae}, %zmm1, %zmm0, %zmm2 {%k1}
; CHECK-NEXT: vmovaps %zmm2, %zmm0
; CHECK-NEXT: retq
%1 = call <16 x float> @llvm.x86.avx512.add.ps.512(<16 x float> %a0, <16 x float> %a1, i32 3)
%2 = bitcast i16 %mask to <16 x i1>
%3 = select <16 x i1> %2, <16 x float> %1, <16 x float> %src
ret <16 x float> %3
}
define <16 x float> @test_mm512_mask_add_round_ps_current(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
; CHECK-LABEL: test_mm512_mask_add_round_ps_current:
; CHECK: ## %bb.0:
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vaddps %zmm1, %zmm0, %zmm2 {%k1}
; CHECK-NEXT: vmovaps %zmm2, %zmm0
; CHECK-NEXT: retq
%1 = call <16 x float> @llvm.x86.avx512.add.ps.512(<16 x float> %a0, <16 x float> %a1, i32 4)
%2 = bitcast i16 %mask to <16 x i1>
%3 = select <16 x i1> %2, <16 x float> %1, <16 x float> %src
ret <16 x float> %3
}
define <16 x float> @test_mm512_add_round_ps_rn_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
; CHECK-LABEL: test_mm512_add_round_ps_rn_sae:
; CHECK: ## %bb.0:
; CHECK-NEXT: vaddps {rn-sae}, %zmm1, %zmm0, %zmm0
; CHECK-NEXT: retq
%1 = call <16 x float> @llvm.x86.avx512.add.ps.512(<16 x float> %a0, <16 x float> %a1, i32 0)
ret <16 x float> %1
}
define <16 x float> @test_mm512_add_round_ps_rd_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
; CHECK-LABEL: test_mm512_add_round_ps_rd_sae:
; CHECK: ## %bb.0:
; CHECK-NEXT: vaddps {rd-sae}, %zmm1, %zmm0, %zmm0
; CHECK-NEXT: retq
%1 = call <16 x float> @llvm.x86.avx512.add.ps.512(<16 x float> %a0, <16 x float> %a1, i32 1)
ret <16 x float> %1
}
define <16 x float> @test_mm512_add_round_ps_ru_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
; CHECK-LABEL: test_mm512_add_round_ps_ru_sae:
; CHECK: ## %bb.0:
; CHECK-NEXT: vaddps {ru-sae}, %zmm1, %zmm0, %zmm0
; CHECK-NEXT: retq
%1 = call <16 x float> @llvm.x86.avx512.add.ps.512(<16 x float> %a0, <16 x float> %a1, i32 2)
ret <16 x float> %1
}
define <16 x float> @test_mm512_add_round_ps_rz_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
; CHECK-LABEL: test_mm512_add_round_ps_rz_sae:
; CHECK: ## %bb.0:
; CHECK-NEXT: vaddps {rz-sae}, %zmm1, %zmm0, %zmm0
; CHECK-NEXT: retq
%1 = call <16 x float> @llvm.x86.avx512.add.ps.512(<16 x float> %a0, <16 x float> %a1, i32 3)
ret <16 x float> %1
}
define <16 x float> @test_mm512_add_round_ps_current(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
; CHECK-LABEL: test_mm512_add_round_ps_current:
; CHECK: ## %bb.0:
; CHECK-NEXT: vaddps %zmm1, %zmm0, %zmm0
; CHECK-NEXT: retq
%1 = call <16 x float> @llvm.x86.avx512.add.ps.512(<16 x float> %a0, <16 x float> %a1, i32 4)
ret <16 x float> %1
}
declare <16 x float> @llvm.x86.avx512.add.ps.512(<16 x float>, <16 x float>, i32)
define <16 x float> @test_mm512_mask_sub_round_ps_rn_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
; CHECK-LABEL: test_mm512_mask_sub_round_ps_rn_sae:
; CHECK: ## %bb.0:
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vsubps {rn-sae}, %zmm1, %zmm0, %zmm2 {%k1}
; CHECK-NEXT: vmovaps %zmm2, %zmm0
; CHECK-NEXT: retq
%1 = call <16 x float> @llvm.x86.avx512.sub.ps.512(<16 x float> %a0, <16 x float> %a1, i32 0)
%2 = bitcast i16 %mask to <16 x i1>
%3 = select <16 x i1> %2, <16 x float> %1, <16 x float> %src
ret <16 x float> %3
}
define <16 x float> @test_mm512_mask_sub_round_ps_rd_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
; CHECK-LABEL: test_mm512_mask_sub_round_ps_rd_sae:
; CHECK: ## %bb.0:
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vsubps {rd-sae}, %zmm1, %zmm0, %zmm2 {%k1}
; CHECK-NEXT: vmovaps %zmm2, %zmm0
; CHECK-NEXT: retq
%1 = call <16 x float> @llvm.x86.avx512.sub.ps.512(<16 x float> %a0, <16 x float> %a1, i32 1)
%2 = bitcast i16 %mask to <16 x i1>
%3 = select <16 x i1> %2, <16 x float> %1, <16 x float> %src
ret <16 x float> %3
}
define <16 x float> @test_mm512_mask_sub_round_ps_ru_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
; CHECK-LABEL: test_mm512_mask_sub_round_ps_ru_sae:
; CHECK: ## %bb.0:
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vsubps {ru-sae}, %zmm1, %zmm0, %zmm2 {%k1}
; CHECK-NEXT: vmovaps %zmm2, %zmm0
; CHECK-NEXT: retq
%1 = call <16 x float> @llvm.x86.avx512.sub.ps.512(<16 x float> %a0, <16 x float> %a1, i32 2)
%2 = bitcast i16 %mask to <16 x i1>
%3 = select <16 x i1> %2, <16 x float> %1, <16 x float> %src
ret <16 x float> %3
}
define <16 x float> @test_mm512_mask_sub_round_ps_rz_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
; CHECK-LABEL: test_mm512_mask_sub_round_ps_rz_sae:
; CHECK: ## %bb.0:
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vsubps {rz-sae}, %zmm1, %zmm0, %zmm2 {%k1}
; CHECK-NEXT: vmovaps %zmm2, %zmm0
; CHECK-NEXT: retq
%1 = call <16 x float> @llvm.x86.avx512.sub.ps.512(<16 x float> %a0, <16 x float> %a1, i32 3)
%2 = bitcast i16 %mask to <16 x i1>
%3 = select <16 x i1> %2, <16 x float> %1, <16 x float> %src
ret <16 x float> %3
}
define <16 x float> @test_mm512_mask_sub_round_ps_current(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
; CHECK-LABEL: test_mm512_mask_sub_round_ps_current:
; CHECK: ## %bb.0:
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vsubps %zmm1, %zmm0, %zmm2 {%k1}
; CHECK-NEXT: vmovaps %zmm2, %zmm0
; CHECK-NEXT: retq
%1 = call <16 x float> @llvm.x86.avx512.sub.ps.512(<16 x float> %a0, <16 x float> %a1, i32 4)
%2 = bitcast i16 %mask to <16 x i1>
%3 = select <16 x i1> %2, <16 x float> %1, <16 x float> %src
ret <16 x float> %3
}
define <16 x float> @test_mm512_sub_round_ps_rn_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
; CHECK-LABEL: test_mm512_sub_round_ps_rn_sae:
; CHECK: ## %bb.0:
; CHECK-NEXT: vsubps {rn-sae}, %zmm1, %zmm0, %zmm0
; CHECK-NEXT: retq
%1 = call <16 x float> @llvm.x86.avx512.sub.ps.512(<16 x float> %a0, <16 x float> %a1, i32 0)
ret <16 x float> %1
}
define <16 x float> @test_mm512_sub_round_ps_rd_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
; CHECK-LABEL: test_mm512_sub_round_ps_rd_sae:
; CHECK: ## %bb.0:
; CHECK-NEXT: vsubps {rd-sae}, %zmm1, %zmm0, %zmm0
; CHECK-NEXT: retq
%1 = call <16 x float> @llvm.x86.avx512.sub.ps.512(<16 x float> %a0, <16 x float> %a1, i32 1)
ret <16 x float> %1
}
define <16 x float> @test_mm512_sub_round_ps_ru_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
; CHECK-LABEL: test_mm512_sub_round_ps_ru_sae:
; CHECK: ## %bb.0:
; CHECK-NEXT: vsubps {ru-sae}, %zmm1, %zmm0, %zmm0
; CHECK-NEXT: retq
%1 = call <16 x float> @llvm.x86.avx512.sub.ps.512(<16 x float> %a0, <16 x float> %a1, i32 2)
ret <16 x float> %1
}
define <16 x float> @test_mm512_sub_round_ps_rz_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
; CHECK-LABEL: test_mm512_sub_round_ps_rz_sae:
; CHECK: ## %bb.0:
; CHECK-NEXT: vsubps {rz-sae}, %zmm1, %zmm0, %zmm0
; CHECK-NEXT: retq
%1 = call <16 x float> @llvm.x86.avx512.sub.ps.512(<16 x float> %a0, <16 x float> %a1, i32 3)
ret <16 x float> %1
}
define <16 x float> @test_mm512_sub_round_ps_current(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
; CHECK-LABEL: test_mm512_sub_round_ps_current:
; CHECK: ## %bb.0:
; CHECK-NEXT: vsubps %zmm1, %zmm0, %zmm0
; CHECK-NEXT: retq
%1 = call <16 x float> @llvm.x86.avx512.sub.ps.512(<16 x float> %a0, <16 x float> %a1, i32 4)
ret <16 x float> %1
}
define <16 x float> @test_mm512_maskz_div_round_ps_rn_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
; CHECK-LABEL: test_mm512_maskz_div_round_ps_rn_sae:
; CHECK: ## %bb.0:
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vdivps {rn-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z}
; CHECK-NEXT: retq
%1 = call <16 x float> @llvm.x86.avx512.div.ps.512(<16 x float> %a0, <16 x float> %a1, i32 0)
%2 = bitcast i16 %mask to <16 x i1>
%3 = select <16 x i1> %2, <16 x float> %1, <16 x float> zeroinitializer
ret <16 x float> %3
}
define <16 x float> @test_mm512_maskz_div_round_ps_rd_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
; CHECK-LABEL: test_mm512_maskz_div_round_ps_rd_sae:
; CHECK: ## %bb.0:
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vdivps {rd-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z}
; CHECK-NEXT: retq
%1 = call <16 x float> @llvm.x86.avx512.div.ps.512(<16 x float> %a0, <16 x float> %a1, i32 1)
%2 = bitcast i16 %mask to <16 x i1>
%3 = select <16 x i1> %2, <16 x float> %1, <16 x float> zeroinitializer
ret <16 x float> %3
}
define <16 x float> @test_mm512_maskz_div_round_ps_ru_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
; CHECK-LABEL: test_mm512_maskz_div_round_ps_ru_sae:
; CHECK: ## %bb.0:
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vdivps {ru-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z}
; CHECK-NEXT: retq
%1 = call <16 x float> @llvm.x86.avx512.div.ps.512(<16 x float> %a0, <16 x float> %a1, i32 2)
%2 = bitcast i16 %mask to <16 x i1>
%3 = select <16 x i1> %2, <16 x float> %1, <16 x float> zeroinitializer
ret <16 x float> %3
}
define <16 x float> @test_mm512_maskz_div_round_ps_rz_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
; CHECK-LABEL: test_mm512_maskz_div_round_ps_rz_sae:
; CHECK: ## %bb.0:
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vdivps {rz-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z}
; CHECK-NEXT: retq
%1 = call <16 x float> @llvm.x86.avx512.div.ps.512(<16 x float> %a0, <16 x float> %a1, i32 3)
%2 = bitcast i16 %mask to <16 x i1>
%3 = select <16 x i1> %2, <16 x float> %1, <16 x float> zeroinitializer
ret <16 x float> %3
}
define <16 x float> @test_mm512_maskz_div_round_ps_current(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
; CHECK-LABEL: test_mm512_maskz_div_round_ps_current:
; CHECK: ## %bb.0:
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vdivps %zmm1, %zmm0, %zmm0 {%k1} {z}
; CHECK-NEXT: retq
%1 = call <16 x float> @llvm.x86.avx512.div.ps.512(<16 x float> %a0, <16 x float> %a1, i32 4)
%2 = bitcast i16 %mask to <16 x i1>
%3 = select <16 x i1> %2, <16 x float> %1, <16 x float> zeroinitializer
ret <16 x float> %3
}
define <16 x float> @test_mm512_mask_div_round_ps_rn_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
; CHECK-LABEL: test_mm512_mask_div_round_ps_rn_sae:
; CHECK: ## %bb.0:
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vdivps {rn-sae}, %zmm1, %zmm0, %zmm2 {%k1}
; CHECK-NEXT: vmovaps %zmm2, %zmm0
; CHECK-NEXT: retq
%1 = call <16 x float> @llvm.x86.avx512.div.ps.512(<16 x float> %a0, <16 x float> %a1, i32 0)
%2 = bitcast i16 %mask to <16 x i1>
%3 = select <16 x i1> %2, <16 x float> %1, <16 x float> %src
ret <16 x float> %3
}
define <16 x float> @test_mm512_mask_div_round_ps_rd_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
; CHECK-LABEL: test_mm512_mask_div_round_ps_rd_sae:
; CHECK: ## %bb.0:
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vdivps {rd-sae}, %zmm1, %zmm0, %zmm2 {%k1}
; CHECK-NEXT: vmovaps %zmm2, %zmm0
; CHECK-NEXT: retq
%1 = call <16 x float> @llvm.x86.avx512.div.ps.512(<16 x float> %a0, <16 x float> %a1, i32 1)
%2 = bitcast i16 %mask to <16 x i1>
%3 = select <16 x i1> %2, <16 x float> %1, <16 x float> %src
ret <16 x float> %3
}
define <16 x float> @test_mm512_mask_div_round_ps_ru_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
; CHECK-LABEL: test_mm512_mask_div_round_ps_ru_sae:
; CHECK: ## %bb.0:
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vdivps {ru-sae}, %zmm1, %zmm0, %zmm2 {%k1}
; CHECK-NEXT: vmovaps %zmm2, %zmm0
; CHECK-NEXT: retq
%1 = call <16 x float> @llvm.x86.avx512.div.ps.512(<16 x float> %a0, <16 x float> %a1, i32 2)
%2 = bitcast i16 %mask to <16 x i1>
%3 = select <16 x i1> %2, <16 x float> %1, <16 x float> %src
ret <16 x float> %3
}
define <16 x float> @test_mm512_mask_div_round_ps_rz_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
; CHECK-LABEL: test_mm512_mask_div_round_ps_rz_sae:
; CHECK: ## %bb.0:
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vdivps {rz-sae}, %zmm1, %zmm0, %zmm2 {%k1}
; CHECK-NEXT: vmovaps %zmm2, %zmm0
; CHECK-NEXT: retq
%1 = call <16 x float> @llvm.x86.avx512.div.ps.512(<16 x float> %a0, <16 x float> %a1, i32 3)
%2 = bitcast i16 %mask to <16 x i1>
%3 = select <16 x i1> %2, <16 x float> %1, <16 x float> %src
ret <16 x float> %3
}
define <16 x float> @test_mm512_mask_div_round_ps_current(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
; CHECK-LABEL: test_mm512_mask_div_round_ps_current:
; CHECK: ## %bb.0:
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vdivps %zmm1, %zmm0, %zmm2 {%k1}
; CHECK-NEXT: vmovaps %zmm2, %zmm0
; CHECK-NEXT: retq
%1 = call <16 x float> @llvm.x86.avx512.div.ps.512(<16 x float> %a0, <16 x float> %a1, i32 4)
%2 = bitcast i16 %mask to <16 x i1>
%3 = select <16 x i1> %2, <16 x float> %1, <16 x float> %src
ret <16 x float> %3
}
define <16 x float> @test_mm512_div_round_ps_rn_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
; CHECK-LABEL: test_mm512_div_round_ps_rn_sae:
; CHECK: ## %bb.0:
; CHECK-NEXT: vdivps {rn-sae}, %zmm1, %zmm0, %zmm0
; CHECK-NEXT: retq
%1 = call <16 x float> @llvm.x86.avx512.div.ps.512(<16 x float> %a0, <16 x float> %a1, i32 0)
ret <16 x float> %1
}
define <16 x float> @test_mm512_div_round_ps_rd_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
; CHECK-LABEL: test_mm512_div_round_ps_rd_sae:
; CHECK: ## %bb.0:
; CHECK-NEXT: vdivps {rd-sae}, %zmm1, %zmm0, %zmm0
; CHECK-NEXT: retq
%1 = call <16 x float> @llvm.x86.avx512.div.ps.512(<16 x float> %a0, <16 x float> %a1, i32 1)
ret <16 x float> %1
}
define <16 x float> @test_mm512_div_round_ps_ru_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
; CHECK-LABEL: test_mm512_div_round_ps_ru_sae:
; CHECK: ## %bb.0:
; CHECK-NEXT: vdivps {ru-sae}, %zmm1, %zmm0, %zmm0
; CHECK-NEXT: retq
%1 = call <16 x float> @llvm.x86.avx512.div.ps.512(<16 x float> %a0, <16 x float> %a1, i32 2)
ret <16 x float> %1
}
define <16 x float> @test_mm512_div_round_ps_rz_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
; CHECK-LABEL: test_mm512_div_round_ps_rz_sae:
; CHECK: ## %bb.0:
; CHECK-NEXT: vdivps {rz-sae}, %zmm1, %zmm0, %zmm0
; CHECK-NEXT: retq
%1 = call <16 x float> @llvm.x86.avx512.div.ps.512(<16 x float> %a0, <16 x float> %a1, i32 3)
ret <16 x float> %1
}
define <16 x float> @test_mm512_div_round_ps_current(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
; CHECK-LABEL: test_mm512_div_round_ps_current:
; CHECK: ## %bb.0:
; CHECK-NEXT: vdivps %zmm1, %zmm0, %zmm0
; CHECK-NEXT: retq
%1 = call <16 x float> @llvm.x86.avx512.div.ps.512(<16 x float> %a0, <16 x float> %a1, i32 4)
ret <16 x float> %1
}
declare <16 x float> @llvm.x86.avx512.div.ps.512(<16 x float>, <16 x float>, i32)
define <16 x float> @test_mm512_maskz_min_round_ps_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
; CHECK-LABEL: test_mm512_maskz_min_round_ps_sae:
; CHECK: ## %bb.0:
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vminps {sae}, %zmm1, %zmm0, %zmm0 {%k1} {z}
; CHECK-NEXT: retq
%1 = call <16 x float> @llvm.x86.avx512.min.ps.512(<16 x float> %a0, <16 x float> %a1, i32 8)
%2 = bitcast i16 %mask to <16 x i1>
%3 = select <16 x i1> %2, <16 x float> %1, <16 x float> zeroinitializer
ret <16 x float> %3
}
define <16 x float> @test_mm512_maskz_min_round_ps_current(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
; CHECK-LABEL: test_mm512_maskz_min_round_ps_current:
; CHECK: ## %bb.0:
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vminps %zmm1, %zmm0, %zmm0 {%k1} {z}
; CHECK-NEXT: retq
%1 = call <16 x float> @llvm.x86.avx512.min.ps.512(<16 x float> %a0, <16 x float> %a1, i32 4)
%2 = bitcast i16 %mask to <16 x i1>
%3 = select <16 x i1> %2, <16 x float> %1, <16 x float> zeroinitializer
ret <16 x float> %3
}
define <16 x float> @test_mm512_mask_min_round_ps_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
; CHECK-LABEL: test_mm512_mask_min_round_ps_sae:
; CHECK: ## %bb.0:
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vminps {sae}, %zmm1, %zmm0, %zmm2 {%k1}
; CHECK-NEXT: vmovaps %zmm2, %zmm0
; CHECK-NEXT: retq
%1 = call <16 x float> @llvm.x86.avx512.min.ps.512(<16 x float> %a0, <16 x float> %a1, i32 8)
%2 = bitcast i16 %mask to <16 x i1>
%3 = select <16 x i1> %2, <16 x float> %1, <16 x float> %src
ret <16 x float> %3
}
define <16 x float> @test_mm512_mask_min_round_ps_current(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
; CHECK-LABEL: test_mm512_mask_min_round_ps_current:
; CHECK: ## %bb.0:
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vminps %zmm1, %zmm0, %zmm2 {%k1}
; CHECK-NEXT: vmovaps %zmm2, %zmm0
; CHECK-NEXT: retq
%1 = call <16 x float> @llvm.x86.avx512.min.ps.512(<16 x float> %a0, <16 x float> %a1, i32 4)
%2 = bitcast i16 %mask to <16 x i1>
%3 = select <16 x i1> %2, <16 x float> %1, <16 x float> %src
ret <16 x float> %3
}
define <16 x float> @test_mm512_min_round_ps_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
; CHECK-LABEL: test_mm512_min_round_ps_sae:
; CHECK: ## %bb.0:
; CHECK-NEXT: vminps {sae}, %zmm1, %zmm0, %zmm0
; CHECK-NEXT: retq
%1 = call <16 x float> @llvm.x86.avx512.min.ps.512(<16 x float> %a0, <16 x float> %a1, i32 8)
ret <16 x float> %1
}
define <16 x float> @test_mm512_min_round_ps_current(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
; CHECK-LABEL: test_mm512_min_round_ps_current:
; CHECK: ## %bb.0:
; CHECK-NEXT: vminps %zmm1, %zmm0, %zmm0
; CHECK-NEXT: retq
%1 = call <16 x float> @llvm.x86.avx512.min.ps.512(<16 x float> %a0, <16 x float> %a1, i32 4)
ret <16 x float> %1
}
declare <16 x float> @llvm.x86.avx512.min.ps.512(<16 x float>, <16 x float>, i32)
define <16 x float> @test_mm512_maskz_max_round_ps_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
; CHECK-LABEL: test_mm512_maskz_max_round_ps_sae:
; CHECK: ## %bb.0:
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vmaxps {sae}, %zmm1, %zmm0, %zmm0 {%k1} {z}
; CHECK-NEXT: retq
%1 = call <16 x float> @llvm.x86.avx512.max.ps.512(<16 x float> %a0, <16 x float> %a1, i32 8)
%2 = bitcast i16 %mask to <16 x i1>
%3 = select <16 x i1> %2, <16 x float> %1, <16 x float> zeroinitializer
ret <16 x float> %3
}
define <16 x float> @test_mm512_maskz_max_round_ps_current(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
; CHECK-LABEL: test_mm512_maskz_max_round_ps_current:
; CHECK: ## %bb.0:
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vmaxps %zmm1, %zmm0, %zmm0 {%k1} {z}
; CHECK-NEXT: retq
%1 = call <16 x float> @llvm.x86.avx512.max.ps.512(<16 x float> %a0, <16 x float> %a1, i32 4)
%2 = bitcast i16 %mask to <16 x i1>
%3 = select <16 x i1> %2, <16 x float> %1, <16 x float> zeroinitializer
ret <16 x float> %3
}
define <16 x float> @test_mm512_mask_max_round_ps_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
; CHECK-LABEL: test_mm512_mask_max_round_ps_sae:
; CHECK: ## %bb.0:
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vmaxps {sae}, %zmm1, %zmm0, %zmm2 {%k1}
; CHECK-NEXT: vmovaps %zmm2, %zmm0
; CHECK-NEXT: retq
%1 = call <16 x float> @llvm.x86.avx512.max.ps.512(<16 x float> %a0, <16 x float> %a1, i32 8)
%2 = bitcast i16 %mask to <16 x i1>
%3 = select <16 x i1> %2, <16 x float> %1, <16 x float> %src
ret <16 x float> %3
}
define <16 x float> @test_mm512_mask_max_round_ps_current(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
; CHECK-LABEL: test_mm512_mask_max_round_ps_current:
; CHECK: ## %bb.0:
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vmaxps %zmm1, %zmm0, %zmm2 {%k1}
; CHECK-NEXT: vmovaps %zmm2, %zmm0
; CHECK-NEXT: retq
%1 = call <16 x float> @llvm.x86.avx512.max.ps.512(<16 x float> %a0, <16 x float> %a1, i32 4)
%2 = bitcast i16 %mask to <16 x i1>
%3 = select <16 x i1> %2, <16 x float> %1, <16 x float> %src
ret <16 x float> %3
}
define <16 x float> @test_mm512_max_round_ps_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
; CHECK-LABEL: test_mm512_max_round_ps_sae:
; CHECK: ## %bb.0:
; CHECK-NEXT: vmaxps {sae}, %zmm1, %zmm0, %zmm0
; CHECK-NEXT: retq
%1 = call <16 x float> @llvm.x86.avx512.max.ps.512(<16 x float> %a0, <16 x float> %a1, i32 8)
ret <16 x float> %1
}
define <16 x float> @test_mm512_max_round_ps_current(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
; CHECK-LABEL: test_mm512_max_round_ps_current:
; CHECK: ## %bb.0:
; CHECK-NEXT: vmaxps %zmm1, %zmm0, %zmm0
; CHECK-NEXT: retq
%1 = call <16 x float> @llvm.x86.avx512.max.ps.512(<16 x float> %a0, <16 x float> %a1, i32 4)
ret <16 x float> %1
}
declare <16 x float> @llvm.x86.avx512.max.ps.512(<16 x float>, <16 x float>, i32)
declare <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float>, <4 x float>, <4 x float>, i8, i32) nounwind readnone
define <4 x float> @test_mask_add_ss_rn(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) {
; CHECK-LABEL: test_mask_add_ss_rn:
; CHECK: ## %bb.0:
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vaddss {rn-sae}, %xmm1, %xmm0, %xmm2 {%k1}
; CHECK-NEXT: vmovaps %xmm2, %xmm0
; CHECK-NEXT: retq
%res = call <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float>%a0, <4 x float> %a1, <4 x float> %a2, i8 %mask, i32 0)
ret <4 x float> %res
}
define <4 x float> @test_mask_add_ss_rd(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) {
; CHECK-LABEL: test_mask_add_ss_rd:
; CHECK: ## %bb.0:
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vaddss {rd-sae}, %xmm1, %xmm0, %xmm2 {%k1}
; CHECK-NEXT: vmovaps %xmm2, %xmm0
; CHECK-NEXT: retq
%res = call <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float>%a0, <4 x float> %a1, <4 x float> %a2, i8 %mask, i32 1)
ret <4 x float> %res
}
define <4 x float> @test_mask_add_ss_ru(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) {
; CHECK-LABEL: test_mask_add_ss_ru:
; CHECK: ## %bb.0:
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vaddss {ru-sae}, %xmm1, %xmm0, %xmm2 {%k1}
; CHECK-NEXT: vmovaps %xmm2, %xmm0
; CHECK-NEXT: retq
%res = call <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float>%a0, <4 x float> %a1, <4 x float> %a2, i8 %mask, i32 2)
ret <4 x float> %res
}
define <4 x float> @test_mask_add_ss_rz(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) {
; CHECK-LABEL: test_mask_add_ss_rz:
; CHECK: ## %bb.0:
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vaddss {rz-sae}, %xmm1, %xmm0, %xmm2 {%k1}
; CHECK-NEXT: vmovaps %xmm2, %xmm0
; CHECK-NEXT: retq
%res = call <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float>%a0, <4 x float> %a1, <4 x float> %a2, i8 %mask, i32 3)
ret <4 x float> %res
}
define <4 x float> @test_mask_add_ss_current(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) {
; CHECK-LABEL: test_mask_add_ss_current:
; CHECK: ## %bb.0:
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vaddss %xmm1, %xmm0, %xmm2 {%k1}
; CHECK-NEXT: vmovaps %xmm2, %xmm0
; CHECK-NEXT: retq
%res = call <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float>%a0, <4 x float> %a1, <4 x float> %a2, i8 %mask, i32 4)
ret <4 x float> %res
}
define <4 x float> @test_maskz_add_ss_rn(<4 x float> %a0, <4 x float> %a1, i8 %mask) {
; CHECK-LABEL: test_maskz_add_ss_rn:
; CHECK: ## %bb.0:
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vaddss {rn-sae}, %xmm1, %xmm0, %xmm0 {%k1} {z}
; CHECK-NEXT: retq
%res = call <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float>%a0, <4 x float> %a1, <4 x float> zeroinitializer, i8 %mask, i32 0)
ret <4 x float> %res
}
define <4 x float> @test_add_ss_rn(<4 x float> %a0, <4 x float> %a1) {
; CHECK-LABEL: test_add_ss_rn:
; CHECK: ## %bb.0:
; CHECK-NEXT: vaddss {rn-sae}, %xmm1, %xmm0, %xmm0
; CHECK-NEXT: retq
%res = call <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float>%a0, <4 x float> %a1, <4 x float> zeroinitializer, i8 -1, i32 0)
ret <4 x float> %res
}
define <4 x float> @test_mask_add_ss_current_memfold(<4 x float> %a0, float* %a1, <4 x float> %a2, i8 %mask) {
; CHECK-LABEL: test_mask_add_ss_current_memfold:
; CHECK: ## %bb.0:
; CHECK-NEXT: kmovw %esi, %k1
; CHECK-NEXT: vaddss (%rdi), %xmm0, %xmm1 {%k1}
; CHECK-NEXT: vmovaps %xmm1, %xmm0
; CHECK-NEXT: retq
%a1.val = load float, float* %a1
%a1v0 = insertelement <4 x float> undef, float %a1.val, i32 0
%a1v1 = insertelement <4 x float> %a1v0, float 0.000000e+00, i32 1
%a1v2 = insertelement <4 x float> %a1v1, float 0.000000e+00, i32 2
%a1v = insertelement <4 x float> %a1v2, float 0.000000e+00, i32 3
%res = call <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float>%a0, <4 x float> %a1v, <4 x float> %a2, i8 %mask, i32 4)
ret <4 x float> %res
}
define <4 x float> @test_maskz_add_ss_current_memfold(<4 x float> %a0, float* %a1, i8 %mask) {
; CHECK-LABEL: test_maskz_add_ss_current_memfold:
; CHECK: ## %bb.0:
; CHECK-NEXT: kmovw %esi, %k1
; CHECK-NEXT: vaddss (%rdi), %xmm0, %xmm0 {%k1} {z}
; CHECK-NEXT: retq
%a1.val = load float, float* %a1
%a1v0 = insertelement <4 x float> undef, float %a1.val, i32 0
%a1v1 = insertelement <4 x float> %a1v0, float 0.000000e+00, i32 1
%a1v2 = insertelement <4 x float> %a1v1, float 0.000000e+00, i32 2
%a1v = insertelement <4 x float> %a1v2, float 0.000000e+00, i32 3
%res = call <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float>%a0, <4 x float> %a1v, <4 x float> zeroinitializer, i8 %mask, i32 4)
ret <4 x float> %res
}
declare <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double>, <2 x double>, <2 x double>, i8, i32) nounwind readnone
define