| ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py |
| ; RUN: llc < %s -disable-peephole -mtriple=x86_64-apple-darwin -mattr=+avx512f | FileCheck %s |
| |
| |
| define <8 x double> @test_mask_compress_pd_512(<8 x double> %data, <8 x double> %passthru, i8 %mask) { |
| ; CHECK-LABEL: test_mask_compress_pd_512: |
| ; CHECK: ## %bb.0: |
| ; CHECK-NEXT: kmovw %edi, %k1 |
| ; CHECK-NEXT: vcompresspd %zmm0, %zmm1 {%k1} |
| ; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0 |
| ; CHECK-NEXT: retq |
| %res = call <8 x double> @llvm.x86.avx512.mask.compress.pd.512(<8 x double> %data, <8 x double> %passthru, i8 %mask) |
| ret <8 x double> %res |
| } |
| |
| define <8 x double> @test_maskz_compress_pd_512(<8 x double> %data, i8 %mask) { |
| ; CHECK-LABEL: test_maskz_compress_pd_512: |
| ; CHECK: ## %bb.0: |
| ; CHECK-NEXT: kmovw %edi, %k1 |
| ; CHECK-NEXT: vcompresspd %zmm0, %zmm0 {%k1} {z} |
| ; CHECK-NEXT: retq |
| %res = call <8 x double> @llvm.x86.avx512.mask.compress.pd.512(<8 x double> %data, <8 x double> zeroinitializer, i8 %mask) |
| ret <8 x double> %res |
| } |
| |
| define <8 x double> @test_compress_pd_512(<8 x double> %data) { |
| ; CHECK-LABEL: test_compress_pd_512: |
| ; CHECK: ## %bb.0: |
| ; CHECK-NEXT: retq |
| %res = call <8 x double> @llvm.x86.avx512.mask.compress.pd.512(<8 x double> %data, <8 x double> undef, i8 -1) |
| ret <8 x double> %res |
| } |
| |
| declare <8 x double> @llvm.x86.avx512.mask.compress.pd.512(<8 x double> %data, <8 x double> %src0, i8 %mask) |
| |
| define <16 x float> @test_mask_compress_ps_512(<16 x float> %data, <16 x float> %passthru, i16 %mask) { |
| ; CHECK-LABEL: test_mask_compress_ps_512: |
| ; CHECK: ## %bb.0: |
| ; CHECK-NEXT: kmovw %edi, %k1 |
| ; CHECK-NEXT: vcompressps %zmm0, %zmm1 {%k1} |
| ; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0 |
| ; CHECK-NEXT: retq |
| %res = call <16 x float> @llvm.x86.avx512.mask.compress.ps.512(<16 x float> %data, <16 x float> %passthru, i16 %mask) |
| ret <16 x float> %res |
| } |
| |
| define <16 x float> @test_maskz_compress_ps_512(<16 x float> %data, i16 %mask) { |
| ; CHECK-LABEL: test_maskz_compress_ps_512: |
| ; CHECK: ## %bb.0: |
| ; CHECK-NEXT: kmovw %edi, %k1 |
| ; CHECK-NEXT: vcompressps %zmm0, %zmm0 {%k1} {z} |
| ; CHECK-NEXT: retq |
| %res = call <16 x float> @llvm.x86.avx512.mask.compress.ps.512(<16 x float> %data, <16 x float> zeroinitializer, i16 %mask) |
| ret <16 x float> %res |
| } |
| |
| define <16 x float> @test_compress_ps_512(<16 x float> %data) { |
| ; CHECK-LABEL: test_compress_ps_512: |
| ; CHECK: ## %bb.0: |
| ; CHECK-NEXT: retq |
| %res = call <16 x float> @llvm.x86.avx512.mask.compress.ps.512(<16 x float> %data, <16 x float> undef, i16 -1) |
| ret <16 x float> %res |
| } |
| |
| declare <16 x float> @llvm.x86.avx512.mask.compress.ps.512(<16 x float> %data, <16 x float> %src0, i16 %mask) |
| |
| define <8 x i64> @test_mask_compress_q_512(<8 x i64> %data, <8 x i64> %passthru, i8 %mask) { |
| ; CHECK-LABEL: test_mask_compress_q_512: |
| ; CHECK: ## %bb.0: |
| ; CHECK-NEXT: kmovw %edi, %k1 |
| ; CHECK-NEXT: vpcompressq %zmm0, %zmm1 {%k1} |
| ; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0 |
| ; CHECK-NEXT: retq |
| %res = call <8 x i64> @llvm.x86.avx512.mask.compress.q.512(<8 x i64> %data, <8 x i64> %passthru, i8 %mask) |
| ret <8 x i64> %res |
| } |
| |
| define <8 x i64> @test_maskz_compress_q_512(<8 x i64> %data, i8 %mask) { |
| ; CHECK-LABEL: test_maskz_compress_q_512: |
| ; CHECK: ## %bb.0: |
| ; CHECK-NEXT: kmovw %edi, %k1 |
| ; CHECK-NEXT: vpcompressq %zmm0, %zmm0 {%k1} {z} |
| ; CHECK-NEXT: retq |
| %res = call <8 x i64> @llvm.x86.avx512.mask.compress.q.512(<8 x i64> %data, <8 x i64> zeroinitializer, i8 %mask) |
| ret <8 x i64> %res |
| } |
| |
| define <8 x i64> @test_compress_q_512(<8 x i64> %data) { |
| ; CHECK-LABEL: test_compress_q_512: |
| ; CHECK: ## %bb.0: |
| ; CHECK-NEXT: retq |
| %res = call <8 x i64> @llvm.x86.avx512.mask.compress.q.512(<8 x i64> %data, <8 x i64> undef, i8 -1) |
| ret <8 x i64> %res |
| } |
| |
| declare <8 x i64> @llvm.x86.avx512.mask.compress.q.512(<8 x i64> %data, <8 x i64> %src0, i8 %mask) |
| |
| define <16 x i32> @test_mask_compress_d_512(<16 x i32> %data, <16 x i32> %passthru, i16 %mask) { |
| ; CHECK-LABEL: test_mask_compress_d_512: |
| ; CHECK: ## %bb.0: |
| ; CHECK-NEXT: kmovw %edi, %k1 |
| ; CHECK-NEXT: vpcompressd %zmm0, %zmm1 {%k1} |
| ; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0 |
| ; CHECK-NEXT: retq |
| %res = call <16 x i32> @llvm.x86.avx512.mask.compress.d.512(<16 x i32> %data, <16 x i32> %passthru, i16 %mask) |
| ret <16 x i32> %res |
| } |
| |
| define <16 x i32> @test_maskz_compress_d_512(<16 x i32> %data, i16 %mask) { |
| ; CHECK-LABEL: test_maskz_compress_d_512: |
| ; CHECK: ## %bb.0: |
| ; CHECK-NEXT: kmovw %edi, %k1 |
| ; CHECK-NEXT: vpcompressd %zmm0, %zmm0 {%k1} {z} |
| ; CHECK-NEXT: retq |
| %res = call <16 x i32> @llvm.x86.avx512.mask.compress.d.512(<16 x i32> %data, <16 x i32> zeroinitializer, i16 %mask) |
| ret <16 x i32> %res |
| } |
| |
| define <16 x i32> @test_compress_d_512(<16 x i32> %data) { |
| ; CHECK-LABEL: test_compress_d_512: |
| ; CHECK: ## %bb.0: |
| ; CHECK-NEXT: retq |
| %res = call <16 x i32> @llvm.x86.avx512.mask.compress.d.512(<16 x i32> %data, <16 x i32> undef, i16 -1) |
| ret <16 x i32> %res |
| } |
| |
| declare <16 x i32> @llvm.x86.avx512.mask.compress.d.512(<16 x i32> %data, <16 x i32> %src0, i16 %mask) |
| |
| define <8 x double> @test_expand_pd_512(<8 x double> %data) { |
| ; CHECK-LABEL: test_expand_pd_512: |
| ; CHECK: ## %bb.0: |
| ; CHECK-NEXT: retq |
| %res = call <8 x double> @llvm.x86.avx512.mask.expand.pd.512(<8 x double> %data, <8 x double> undef, i8 -1) |
| ret <8 x double> %res |
| } |
| |
| define <8 x double> @test_mask_expand_pd_512(<8 x double> %data, <8 x double> %passthru, i8 %mask) { |
| ; CHECK-LABEL: test_mask_expand_pd_512: |
| ; CHECK: ## %bb.0: |
| ; CHECK-NEXT: kmovw %edi, %k1 |
| ; CHECK-NEXT: vexpandpd %zmm0, %zmm1 {%k1} |
| ; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0 |
| ; CHECK-NEXT: retq |
| %res = call <8 x double> @llvm.x86.avx512.mask.expand.pd.512(<8 x double> %data, <8 x double> %passthru, i8 %mask) |
| ret <8 x double> %res |
| } |
| |
| define <8 x double> @test_maskz_expand_pd_512(<8 x double> %data, i8 %mask) { |
| ; CHECK-LABEL: test_maskz_expand_pd_512: |
| ; CHECK: ## %bb.0: |
| ; CHECK-NEXT: kmovw %edi, %k1 |
| ; CHECK-NEXT: vexpandpd %zmm0, %zmm0 {%k1} {z} |
| ; CHECK-NEXT: retq |
| %res = call <8 x double> @llvm.x86.avx512.mask.expand.pd.512(<8 x double> %data, <8 x double> zeroinitializer, i8 %mask) |
| ret <8 x double> %res |
| } |
| |
| declare <8 x double> @llvm.x86.avx512.mask.expand.pd.512(<8 x double> %data, <8 x double> %src0, i8 %mask) |
| |
| define <16 x float> @test_expand_ps_512(<16 x float> %data) { |
| ; CHECK-LABEL: test_expand_ps_512: |
| ; CHECK: ## %bb.0: |
| ; CHECK-NEXT: retq |
| %res = call <16 x float> @llvm.x86.avx512.mask.expand.ps.512(<16 x float> %data, <16 x float> undef, i16 -1) |
| ret <16 x float> %res |
| } |
| |
| define <16 x float> @test_mask_expand_ps_512(<16 x float> %data, <16 x float> %passthru, i16 %mask) { |
| ; CHECK-LABEL: test_mask_expand_ps_512: |
| ; CHECK: ## %bb.0: |
| ; CHECK-NEXT: kmovw %edi, %k1 |
| ; CHECK-NEXT: vexpandps %zmm0, %zmm1 {%k1} |
| ; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0 |
| ; CHECK-NEXT: retq |
| %res = call <16 x float> @llvm.x86.avx512.mask.expand.ps.512(<16 x float> %data, <16 x float> %passthru, i16 %mask) |
| ret <16 x float> %res |
| } |
| |
| define <16 x float> @test_maskz_expand_ps_512(<16 x float> %data, i16 %mask) { |
| ; CHECK-LABEL: test_maskz_expand_ps_512: |
| ; CHECK: ## %bb.0: |
| ; CHECK-NEXT: kmovw %edi, %k1 |
| ; CHECK-NEXT: vexpandps %zmm0, %zmm0 {%k1} {z} |
| ; CHECK-NEXT: retq |
| %res = call <16 x float> @llvm.x86.avx512.mask.expand.ps.512(<16 x float> %data, <16 x float> zeroinitializer, i16 %mask) |
| ret <16 x float> %res |
| } |
| |
| declare <16 x float> @llvm.x86.avx512.mask.expand.ps.512(<16 x float> %data, <16 x float> %src0, i16 %mask) |
| |
| define <8 x i64> @test_expand_q_512(<8 x i64> %data) { |
| ; CHECK-LABEL: test_expand_q_512: |
| ; CHECK: ## %bb.0: |
| ; CHECK-NEXT: retq |
| %res = call <8 x i64> @llvm.x86.avx512.mask.expand.q.512(<8 x i64> %data, <8 x i64> undef, i8 -1) |
| ret <8 x i64> %res |
| } |
| |
| define <8 x i64> @test_mask_expand_q_512(<8 x i64> %data, <8 x i64> %passthru, i8 %mask) { |
| ; CHECK-LABEL: test_mask_expand_q_512: |
| ; CHECK: ## %bb.0: |
| ; CHECK-NEXT: kmovw %edi, %k1 |
| ; CHECK-NEXT: vpexpandq %zmm0, %zmm1 {%k1} |
| ; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0 |
| ; CHECK-NEXT: retq |
| %res = call <8 x i64> @llvm.x86.avx512.mask.expand.q.512(<8 x i64> %data, <8 x i64> %passthru, i8 %mask) |
| ret <8 x i64> %res |
| } |
| |
| define <8 x i64> @test_maskz_expand_q_512(<8 x i64> %data, i8 %mask) { |
| ; CHECK-LABEL: test_maskz_expand_q_512: |
| ; CHECK: ## %bb.0: |
| ; CHECK-NEXT: kmovw %edi, %k1 |
| ; CHECK-NEXT: vpexpandq %zmm0, %zmm0 {%k1} {z} |
| ; CHECK-NEXT: retq |
| %res = call <8 x i64> @llvm.x86.avx512.mask.expand.q.512(<8 x i64> %data, <8 x i64> zeroinitializer, i8 %mask) |
| ret <8 x i64> %res |
| } |
| |
| declare <8 x i64> @llvm.x86.avx512.mask.expand.q.512(<8 x i64> %data, <8 x i64> %src0, i8 %mask) |
| |
| define <16 x i32> @test_expand_d_512(<16 x i32> %data) { |
| ; CHECK-LABEL: test_expand_d_512: |
| ; CHECK: ## %bb.0: |
| ; CHECK-NEXT: retq |
| %res = call <16 x i32> @llvm.x86.avx512.mask.expand.d.512(<16 x i32> %data, <16 x i32> undef, i16 -1) |
| ret <16 x i32> %res |
| } |
| |
| define <16 x i32> @test_mask_expand_d_512(<16 x i32> %data, <16 x i32> %passthru, i16 %mask) { |
| ; CHECK-LABEL: test_mask_expand_d_512: |
| ; CHECK: ## %bb.0: |
| ; CHECK-NEXT: kmovw %edi, %k1 |
| ; CHECK-NEXT: vpexpandd %zmm0, %zmm1 {%k1} |
| ; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0 |
| ; CHECK-NEXT: retq |
| %res = call <16 x i32> @llvm.x86.avx512.mask.expand.d.512(<16 x i32> %data, <16 x i32> %passthru, i16 %mask) |
| ret <16 x i32> %res |
| } |
| |
| define <16 x i32> @test_maskz_expand_d_512(<16 x i32> %data, i16 %mask) { |
| ; CHECK-LABEL: test_maskz_expand_d_512: |
| ; CHECK: ## %bb.0: |
| ; CHECK-NEXT: kmovw %edi, %k1 |
| ; CHECK-NEXT: vpexpandd %zmm0, %zmm0 {%k1} {z} |
| ; CHECK-NEXT: retq |
| %res = call <16 x i32> @llvm.x86.avx512.mask.expand.d.512(<16 x i32> %data, <16 x i32> zeroinitializer, i16 %mask) |
| ret <16 x i32> %res |
| } |
| |
| declare <16 x i32> @llvm.x86.avx512.mask.expand.d.512(<16 x i32> %data, <16 x i32> %src0, i16 %mask) |
| |
| define <16 x float> @test_rcp_ps_512(<16 x float> %a0) { |
| ; CHECK-LABEL: test_rcp_ps_512: |
| ; CHECK: ## %bb.0: |
| ; CHECK-NEXT: vrcp14ps %zmm0, %zmm0 |
| ; CHECK-NEXT: retq |
| %res = call <16 x float> @llvm.x86.avx512.rcp14.ps.512(<16 x float> %a0, <16 x float> zeroinitializer, i16 -1) ; <<16 x float>> [#uses=1] |
| ret <16 x float> %res |
| } |
| declare <16 x float> @llvm.x86.avx512.rcp14.ps.512(<16 x float>, <16 x float>, i16) nounwind readnone |
| |
| define <8 x double> @test_rcp_pd_512(<8 x double> %a0) { |
| ; CHECK-LABEL: test_rcp_pd_512: |
| ; CHECK: ## %bb.0: |
| ; CHECK-NEXT: vrcp14pd %zmm0, %zmm0 |
| ; CHECK-NEXT: retq |
| %res = call <8 x double> @llvm.x86.avx512.rcp14.pd.512(<8 x double> %a0, <8 x double> zeroinitializer, i8 -1) ; <<8 x double>> [#uses=1] |
| ret <8 x double> %res |
| } |
| declare <8 x double> @llvm.x86.avx512.rcp14.pd.512(<8 x double>, <8 x double>, i8) nounwind readnone |
| |
| declare <2 x double> @llvm.x86.avx512.mask.rndscale.sd(<2 x double>, <2 x double>, <2 x double>, i8, i32, i32) |
| |
| define <2 x double> @test_rndscale_sd(<2 x double> %a, <2 x double> %b) { |
| ; CHECK-LABEL: test_rndscale_sd: |
| ; CHECK: ## %bb.0: |
| ; CHECK-NEXT: vroundsd $11, %xmm1, %xmm0, %xmm0 |
| ; CHECK-NEXT: retq |
| %res = call <2 x double> @llvm.x86.avx512.mask.rndscale.sd(<2 x double> %a, <2 x double> %b, <2 x double> undef, i8 -1, i32 11, i32 4) |
| ret <2 x double>%res |
| } |
| |
| define <2 x double> @test_rndscale_sd_mask(<2 x double> %a, <2 x double> %b, <2 x double> %c, i8 %mask) { |
| ; CHECK-LABEL: test_rndscale_sd_mask: |
| ; CHECK: ## %bb.0: |
| ; CHECK-NEXT: kmovw %edi, %k1 |
| ; CHECK-NEXT: vrndscalesd $11, %xmm1, %xmm0, %xmm2 {%k1} |
| ; CHECK-NEXT: vmovapd %xmm2, %xmm0 |
| ; CHECK-NEXT: retq |
| %res = call <2 x double> @llvm.x86.avx512.mask.rndscale.sd(<2 x double> %a, <2 x double> %b, <2 x double> %c, i8 %mask, i32 11, i32 4) |
| ret <2 x double>%res |
| } |
| |
| define <2 x double> @test_rndscale_sd_mask_load(<2 x double> %a, <2 x double>* %bptr, <2 x double> %c, i8 %mask) { |
| ; CHECK-LABEL: test_rndscale_sd_mask_load: |
| ; CHECK: ## %bb.0: |
| ; CHECK-NEXT: kmovw %esi, %k1 |
| ; CHECK-NEXT: vrndscalesd $11, (%rdi), %xmm0, %xmm1 {%k1} |
| ; CHECK-NEXT: vmovapd %xmm1, %xmm0 |
| ; CHECK-NEXT: retq |
| %b = load <2 x double>, <2 x double>* %bptr |
| %res = call <2 x double> @llvm.x86.avx512.mask.rndscale.sd(<2 x double> %a, <2 x double> %b, <2 x double> %c, i8 %mask, i32 11, i32 4) |
| ret <2 x double>%res |
| } |
| |
| define <2 x double> @test_rndscale_sd_maskz(<2 x double> %a, <2 x double> %b, i8 %mask) { |
| ; CHECK-LABEL: test_rndscale_sd_maskz: |
| ; CHECK: ## %bb.0: |
| ; CHECK-NEXT: kmovw %edi, %k1 |
| ; CHECK-NEXT: vrndscalesd $11, %xmm1, %xmm0, %xmm0 {%k1} {z} |
| ; CHECK-NEXT: retq |
| %res = call <2 x double> @llvm.x86.avx512.mask.rndscale.sd(<2 x double> %a, <2 x double> %b, <2 x double> zeroinitializer, i8 %mask, i32 11, i32 4) |
| ret <2 x double>%res |
| } |
| |
| declare <4 x float> @llvm.x86.avx512.mask.rndscale.ss(<4 x float>, <4 x float>, <4 x float>, i8, i32, i32) |
| |
| define <4 x float> @test_rndscale_ss(<4 x float> %a, <4 x float> %b) { |
| ; CHECK-LABEL: test_rndscale_ss: |
| ; CHECK: ## %bb.0: |
| ; CHECK-NEXT: vroundss $11, %xmm1, %xmm0, %xmm0 |
| ; CHECK-NEXT: retq |
| %res = call <4 x float> @llvm.x86.avx512.mask.rndscale.ss(<4 x float> %a, <4 x float> %b, <4 x float> undef, i8 -1, i32 11, i32 4) |
| ret <4 x float>%res |
| } |
| |
| define <4 x float> @test_rndscale_ss_load(<4 x float> %a, <4 x float>* %bptr) { |
| ; CHECK-LABEL: test_rndscale_ss_load: |
| ; CHECK: ## %bb.0: |
| ; CHECK-NEXT: vroundss $11, (%rdi), %xmm0, %xmm0 |
| ; CHECK-NEXT: retq |
| %b = load <4 x float>, <4 x float>* %bptr |
| %res = call <4 x float> @llvm.x86.avx512.mask.rndscale.ss(<4 x float> %a, <4 x float> %b, <4 x float> undef, i8 -1, i32 11, i32 4) |
| ret <4 x float>%res |
| } |
| |
| define <4 x float> @test_rndscale_ss_mask(<4 x float> %a, <4 x float> %b, <4 x float> %c, i8 %mask) { |
| ; CHECK-LABEL: test_rndscale_ss_mask: |
| ; CHECK: ## %bb.0: |
| ; CHECK-NEXT: kmovw %edi, %k1 |
| ; CHECK-NEXT: vrndscaless $11, %xmm1, %xmm0, %xmm2 {%k1} |
| ; CHECK-NEXT: vmovaps %xmm2, %xmm0 |
| ; CHECK-NEXT: retq |
| %res = call <4 x float> @llvm.x86.avx512.mask.rndscale.ss(<4 x float> %a, <4 x float> %b, <4 x float> %c, i8 %mask, i32 11, i32 4) |
| ret <4 x float>%res |
| } |
| |
| define <4 x float> @test_rndscale_ss_maskz(<4 x float> %a, <4 x float> %b, i8 %mask) { |
| ; CHECK-LABEL: test_rndscale_ss_maskz: |
| ; CHECK: ## %bb.0: |
| ; CHECK-NEXT: kmovw %edi, %k1 |
| ; CHECK-NEXT: vrndscaless $11, %xmm1, %xmm0, %xmm0 {%k1} {z} |
| ; CHECK-NEXT: retq |
| %res = call <4 x float> @llvm.x86.avx512.mask.rndscale.ss(<4 x float> %a, <4 x float> %b, <4 x float> zeroinitializer, i8 %mask, i32 11, i32 4) |
| ret <4 x float>%res |
| } |
| |
| declare <8 x double> @llvm.x86.avx512.mask.rndscale.pd.512(<8 x double>, i32, <8 x double>, i8, i32) |
| |
| define <8 x double> @test7(<8 x double> %a) { |
| ; CHECK-LABEL: test7: |
| ; CHECK: ## %bb.0: |
| ; CHECK-NEXT: vrndscalepd $11, %zmm0, %zmm0 |
| ; CHECK-NEXT: retq |
| %res = call <8 x double> @llvm.x86.avx512.mask.rndscale.pd.512(<8 x double> %a, i32 11, <8 x double> %a, i8 -1, i32 4) |
| ret <8 x double>%res |
| } |
| |
| declare <16 x float> @llvm.x86.avx512.mask.rndscale.ps.512(<16 x float>, i32, <16 x float>, i16, i32) |
| |
| define <16 x float> @test8(<16 x float> %a) { |
| ; CHECK-LABEL: test8: |
| ; CHECK: ## %bb.0: |
| ; CHECK-NEXT: vrndscaleps $11, %zmm0, %zmm0 |
| ; CHECK-NEXT: retq |
| %res = call <16 x float> @llvm.x86.avx512.mask.rndscale.ps.512(<16 x float> %a, i32 11, <16 x float> %a, i16 -1, i32 4) |
| ret <16 x float>%res |
| } |
| |
| define <16 x float> @test_rsqrt_ps_512(<16 x float> %a0) { |
| ; CHECK-LABEL: test_rsqrt_ps_512: |
| ; CHECK: ## %bb.0: |
| ; CHECK-NEXT: vrsqrt14ps %zmm0, %zmm0 |
| ; CHECK-NEXT: retq |
| %res = call <16 x float> @llvm.x86.avx512.rsqrt14.ps.512(<16 x float> %a0, <16 x float> zeroinitializer, i16 -1) ; <<16 x float>> [#uses=1] |
| ret <16 x float> %res |
| } |
| declare <16 x float> @llvm.x86.avx512.rsqrt14.ps.512(<16 x float>, <16 x float>, i16) nounwind readnone |
| |
| define <8 x double> @test_sqrt_pd_512(<8 x double> %a0) { |
| ; CHECK-LABEL: test_sqrt_pd_512: |
| ; CHECK: ## %bb.0: |
| ; CHECK-NEXT: vsqrtpd %zmm0, %zmm0 |
| ; CHECK-NEXT: retq |
| %1 = call <8 x double> @llvm.sqrt.v8f64(<8 x double> %a0) |
| ret <8 x double> %1 |
| } |
| |
| define <8 x double> @test_mask_sqrt_pd_512(<8 x double> %a0, <8 x double> %passthru, i8 %mask) { |
| ; CHECK-LABEL: test_mask_sqrt_pd_512: |
| ; CHECK: ## %bb.0: |
| ; CHECK-NEXT: kmovw %edi, %k1 |
| ; CHECK-NEXT: vsqrtpd %zmm0, %zmm1 {%k1} |
| ; CHECK-NEXT: vmovapd %zmm1, %zmm0 |
| ; CHECK-NEXT: retq |
| %1 = call <8 x double> @llvm.sqrt.v8f64(<8 x double> %a0) |
| %2 = bitcast i8 %mask to <8 x i1> |
| %3 = select <8 x i1> %2, <8 x double> %1, <8 x double> %passthru |
| ret <8 x double> %3 |
| } |
| |
| define <8 x double> @test_maskz_sqrt_pd_512(<8 x double> %a0, i8 %mask) { |
| ; CHECK-LABEL: test_maskz_sqrt_pd_512: |
| ; CHECK: ## %bb.0: |
| ; CHECK-NEXT: kmovw %edi, %k1 |
| ; CHECK-NEXT: vsqrtpd %zmm0, %zmm0 {%k1} {z} |
| ; CHECK-NEXT: retq |
| %1 = call <8 x double> @llvm.sqrt.v8f64(<8 x double> %a0) |
| %2 = bitcast i8 %mask to <8 x i1> |
| %3 = select <8 x i1> %2, <8 x double> %1, <8 x double> zeroinitializer |
| ret <8 x double> %3 |
| } |
| declare <8 x double> @llvm.sqrt.v8f64(<8 x double>) |
| |
| define <8 x double> @test_sqrt_round_pd_512(<8 x double> %a0) { |
| ; CHECK-LABEL: test_sqrt_round_pd_512: |
| ; CHECK: ## %bb.0: |
| ; CHECK-NEXT: vsqrtpd {rz-sae}, %zmm0, %zmm0 |
| ; CHECK-NEXT: retq |
| %1 = call <8 x double> @llvm.x86.avx512.sqrt.pd.512(<8 x double> %a0, i32 11) |
| ret <8 x double> %1 |
| } |
| |
| define <8 x double> @test_mask_sqrt_round_pd_512(<8 x double> %a0, <8 x double> %passthru, i8 %mask) { |
| ; CHECK-LABEL: test_mask_sqrt_round_pd_512: |
| ; CHECK: ## %bb.0: |
| ; CHECK-NEXT: kmovw %edi, %k1 |
| ; CHECK-NEXT: vsqrtpd {rz-sae}, %zmm0, %zmm1 {%k1} |
| ; CHECK-NEXT: vmovapd %zmm1, %zmm0 |
| ; CHECK-NEXT: retq |
| %1 = call <8 x double> @llvm.x86.avx512.sqrt.pd.512(<8 x double> %a0, i32 11) |
| %2 = bitcast i8 %mask to <8 x i1> |
| %3 = select <8 x i1> %2, <8 x double> %1, <8 x double> %passthru |
| ret <8 x double> %3 |
| } |
| |
| define <8 x double> @test_maskz_sqrt_round_pd_512(<8 x double> %a0, i8 %mask) { |
| ; CHECK-LABEL: test_maskz_sqrt_round_pd_512: |
| ; CHECK: ## %bb.0: |
| ; CHECK-NEXT: kmovw %edi, %k1 |
| ; CHECK-NEXT: vsqrtpd {rz-sae}, %zmm0, %zmm0 {%k1} {z} |
| ; CHECK-NEXT: retq |
| %1 = call <8 x double> @llvm.x86.avx512.sqrt.pd.512(<8 x double> %a0, i32 11) |
| %2 = bitcast i8 %mask to <8 x i1> |
| %3 = select <8 x i1> %2, <8 x double> %1, <8 x double> zeroinitializer |
| ret <8 x double> %3 |
| } |
| declare <8 x double> @llvm.x86.avx512.sqrt.pd.512(<8 x double>, i32) nounwind readnone |
| |
| define <16 x float> @test_sqrt_ps_512(<16 x float> %a0) { |
| ; CHECK-LABEL: test_sqrt_ps_512: |
| ; CHECK: ## %bb.0: |
| ; CHECK-NEXT: vsqrtps %zmm0, %zmm0 |
| ; CHECK-NEXT: retq |
| %1 = call <16 x float> @llvm.sqrt.v16f32(<16 x float> %a0) |
| ret <16 x float> %1 |
| } |
| |
| define <16 x float> @test_mask_sqrt_ps_512(<16 x float> %a0, <16 x float> %passthru, i16 %mask) { |
| ; CHECK-LABEL: test_mask_sqrt_ps_512: |
| ; CHECK: ## %bb.0: |
| ; CHECK-NEXT: kmovw %edi, %k1 |
| ; CHECK-NEXT: vsqrtps %zmm0, %zmm1 {%k1} |
| ; CHECK-NEXT: vmovaps %zmm1, %zmm0 |
| ; CHECK-NEXT: retq |
| %1 = call <16 x float> @llvm.sqrt.v16f32(<16 x float> %a0) |
| %2 = bitcast i16 %mask to <16 x i1> |
| %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> %passthru |
| ret <16 x float> %3 |
| } |
| |
| define <16 x float> @test_maskz_sqrt_ps_512(<16 x float> %a0, i16 %mask) { |
| ; CHECK-LABEL: test_maskz_sqrt_ps_512: |
| ; CHECK: ## %bb.0: |
| ; CHECK-NEXT: kmovw %edi, %k1 |
| ; CHECK-NEXT: vsqrtps %zmm0, %zmm0 {%k1} {z} |
| ; CHECK-NEXT: retq |
| %1 = call <16 x float> @llvm.sqrt.v16f32(<16 x float> %a0) |
| %2 = bitcast i16 %mask to <16 x i1> |
| %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> zeroinitializer |
| ret <16 x float> %3 |
| } |
| declare <16 x float> @llvm.sqrt.v16f32(<16 x float>) |
| |
| define <16 x float> @test_sqrt_round_ps_512(<16 x float> %a0) { |
| ; CHECK-LABEL: test_sqrt_round_ps_512: |
| ; CHECK: ## %bb.0: |
| ; CHECK-NEXT: vsqrtps {rz-sae}, %zmm0, %zmm0 |
| ; CHECK-NEXT: retq |
| %1 = call <16 x float> @llvm.x86.avx512.sqrt.ps.512(<16 x float> %a0, i32 11) |
| ret <16 x float> %1 |
| } |
| |
| define <16 x float> @test_mask_sqrt_round_ps_512(<16 x float> %a0, <16 x float> %passthru, i16 %mask) { |
| ; CHECK-LABEL: test_mask_sqrt_round_ps_512: |
| ; CHECK: ## %bb.0: |
| ; CHECK-NEXT: kmovw %edi, %k1 |
| ; CHECK-NEXT: vsqrtps {rz-sae}, %zmm0, %zmm1 {%k1} |
| ; CHECK-NEXT: vmovaps %zmm1, %zmm0 |
| ; CHECK-NEXT: retq |
| %1 = call <16 x float> @llvm.x86.avx512.sqrt.ps.512(<16 x float> %a0, i32 11) |
| %2 = bitcast i16 %mask to <16 x i1> |
| %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> %passthru |
| ret <16 x float> %3 |
| } |
| |
| define <16 x float> @test_maskz_sqrt_round_ps_512(<16 x float> %a0, i16 %mask) { |
| ; CHECK-LABEL: test_maskz_sqrt_round_ps_512: |
| ; CHECK: ## %bb.0: |
| ; CHECK-NEXT: kmovw %edi, %k1 |
| ; CHECK-NEXT: vsqrtps {rz-sae}, %zmm0, %zmm0 {%k1} {z} |
| ; CHECK-NEXT: retq |
| %1 = call <16 x float> @llvm.x86.avx512.sqrt.ps.512(<16 x float> %a0, i32 11) |
| %2 = bitcast i16 %mask to <16 x i1> |
| %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> zeroinitializer |
| ret <16 x float> %3 |
| } |
| declare <16 x float> @llvm.x86.avx512.sqrt.ps.512(<16 x float>, i32) nounwind readnone |
| |
| define <8 x double> @test_getexp_pd_512(<8 x double> %a0) { |
| ; CHECK-LABEL: test_getexp_pd_512: |
| ; CHECK: ## %bb.0: |
| ; CHECK-NEXT: vgetexppd %zmm0, %zmm0 |
| ; CHECK-NEXT: retq |
| %res = call <8 x double> @llvm.x86.avx512.mask.getexp.pd.512(<8 x double> %a0, <8 x double> zeroinitializer, i8 -1, i32 4) |
| ret <8 x double> %res |
| } |
| define <8 x double> @test_getexp_round_pd_512(<8 x double> %a0) { |
| ; CHECK-LABEL: test_getexp_round_pd_512: |
| ; CHECK: ## %bb.0: |
| ; CHECK-NEXT: vgetexppd {sae}, %zmm0, %zmm0 |
| ; CHECK-NEXT: retq |
| %res = call <8 x double> @llvm.x86.avx512.mask.getexp.pd.512(<8 x double> %a0, <8 x double> zeroinitializer, i8 -1, i32 8) |
| ret <8 x double> %res |
| } |
| declare <8 x double> @llvm.x86.avx512.mask.getexp.pd.512(<8 x double>, <8 x double>, i8, i32) nounwind readnone |
| |
| define <16 x float> @test_getexp_ps_512(<16 x float> %a0) { |
| ; CHECK-LABEL: test_getexp_ps_512: |
| ; CHECK: ## %bb.0: |
| ; CHECK-NEXT: vgetexpps %zmm0, %zmm0 |
| ; CHECK-NEXT: retq |
| %res = call <16 x float> @llvm.x86.avx512.mask.getexp.ps.512(<16 x float> %a0, <16 x float> zeroinitializer, i16 -1, i32 4) |
| ret <16 x float> %res |
| } |
| |
| define <16 x float> @test_getexp_round_ps_512(<16 x float> %a0) { |
| ; CHECK-LABEL: test_getexp_round_ps_512: |
| ; CHECK: ## %bb.0: |
| ; CHECK-NEXT: vgetexpps {sae}, %zmm0, %zmm0 |
| ; CHECK-NEXT: retq |
| %res = call <16 x float> @llvm.x86.avx512.mask.getexp.ps.512(<16 x float> %a0, <16 x float> zeroinitializer, i16 -1, i32 8) |
| ret <16 x float> %res |
| } |
| declare <16 x float> @llvm.x86.avx512.mask.getexp.ps.512(<16 x float>, <16 x float>, i16, i32) nounwind readnone |
| |
| declare <4 x float> @llvm.x86.avx512.mask.sqrt.ss(<4 x float>, <4 x float>, <4 x float>, i8, i32) nounwind readnone |
| |
| define <4 x float> @test_sqrt_ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) { |
| ; CHECK-LABEL: test_sqrt_ss: |
| ; CHECK: ## %bb.0: |
| ; CHECK-NEXT: kmovw %edi, %k1 |
| ; CHECK-NEXT: vmovaps %xmm2, %xmm3 |
| ; CHECK-NEXT: vsqrtss %xmm1, %xmm0, %xmm3 {%k1} |
| ; CHECK-NEXT: vsqrtss {rd-sae}, %xmm1, %xmm0, %xmm2 {%k1} |
| ; CHECK-NEXT: vaddps %xmm2, %xmm3, %xmm2 |
| ; CHECK-NEXT: vsqrtss {ru-sae}, %xmm1, %xmm0, %xmm3 {%k1} {z} |
| ; CHECK-NEXT: vsqrtss {rz-sae}, %xmm1, %xmm0, %xmm0 |
| ; CHECK-NEXT: vaddps %xmm0, %xmm3, %xmm0 |
| ; CHECK-NEXT: vaddps %xmm0, %xmm2, %xmm0 |
| ; CHECK-NEXT: retq |
| %res0 = call <4 x float> @llvm.x86.avx512.mask.sqrt.ss(<4 x float>%a0, <4 x float> %a1, <4 x float> %a2, i8 %mask, i32 4) |
| %res1 = call <4 x float> @llvm.x86.avx512.mask.sqrt.ss(<4 x float>%a0, <4 x float> %a1, <4 x float> %a2, i8 %mask, i32 1) |
| %res2 = call <4 x float> @llvm.x86.avx512.mask.sqrt.ss(<4 x float>%a0, <4 x float> %a1, <4 x float> zeroinitializer, i8 %mask, i32 2) |
| %res3 = call <4 x float> @llvm.x86.avx512.mask.sqrt.ss(<4 x float>%a0, <4 x float> %a1, <4 x float> zeroinitializer, i8 -1, i32 3) |
| |
| %res.1 = fadd <4 x float> %res0, %res1 |
| %res.2 = fadd <4 x float> %res2, %res3 |
| %res = fadd <4 x float> %res.1, %res.2 |
| ret <4 x float> %res |
| } |
| |
| declare <2 x double> @llvm.x86.avx512.mask.sqrt.sd(<2 x double>, <2 x double>, <2 x double>, i8, i32) nounwind readnone |
| |
| define <2 x double> @test_sqrt_sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) { |
| ; CHECK-LABEL: test_sqrt_sd: |
| ; CHECK: ## %bb.0: |
| ; CHECK-NEXT: kmovw %edi, %k1 |
| ; CHECK-NEXT: vmovapd %xmm2, %xmm3 |
| ; CHECK-NEXT: vsqrtsd %xmm1, %xmm0, %xmm3 {%k1} |
| ; CHECK-NEXT: vsqrtsd {rd-sae}, %xmm1, %xmm0, %xmm2 {%k1} |
| ; CHECK-NEXT: vaddpd %xmm2, %xmm3, %xmm2 |
| ; CHECK-NEXT: vsqrtsd {ru-sae}, %xmm1, %xmm0, %xmm3 {%k1} {z} |
| ; CHECK-NEXT: vsqrtsd {rz-sae}, %xmm1, %xmm0, %xmm0 |
| ; CHECK-NEXT: vaddpd %xmm0, %xmm3, %xmm0 |
| ; CHECK-NEXT: vaddpd %xmm0, %xmm2, %xmm0 |
| ; CHECK-NEXT: retq |
| %res0 = call <2 x double> @llvm.x86.avx512.mask.sqrt.sd(<2 x double>%a0, <2 x double> %a1, <2 x double> %a2, i8 %mask, i32 4) |
| %res1 = call <2 x double> @llvm.x86.avx512.mask.sqrt.sd(<2 x double>%a0, <2 x double> %a1, <2 x double> %a2, i8 %mask, i32 1) |
| %res2 = call <2 x double> @llvm.x86.avx512.mask.sqrt.sd(<2 x double>%a0, <2 x double> %a1, <2 x double> zeroinitializer, i8 %mask, i32 2) |
| %res3 = call <2 x double> @llvm.x86.avx512.mask.sqrt.sd(<2 x double>%a0, <2 x double> %a1, <2 x double> zeroinitializer, i8 -1, i32 3) |
| |
| %res.1 = fadd <2 x double> %res0, %res1 |
| %res.2 = fadd <2 x double> %res2, %res3 |
| %res = fadd <2 x double> %res.1, %res.2 |
| ret <2 x double> %res |
| } |
| |
| define i64 @test_x86_sse2_cvtsd2si64(<2 x double> %a0) { |
| ; CHECK-LABEL: test_x86_sse2_cvtsd2si64: |
| ; CHECK: ## %bb.0: |
| ; CHECK-NEXT: vcvtsd2si %xmm0, %rax |
| ; CHECK-NEXT: retq |
| %res = call i64 @llvm.x86.sse2.cvtsd2si64(<2 x double> %a0) ; <i64> [#uses=1] |
| ret i64 %res |
| } |
| declare i64 @llvm.x86.sse2.cvtsd2si64(<2 x double>) nounwind readnone |
| |
| define <2 x double> @test_x86_sse2_cvtsi642sd(<2 x double> %a0, i64 %a1) { |
| ; CHECK-LABEL: test_x86_sse2_cvtsi642sd: |
| ; CHECK: ## %bb.0: |
| ; CHECK-NEXT: vcvtsi2sdq %rdi, %xmm0, %xmm0 |
| ; CHECK-NEXT: retq |
| %res = call <2 x double> @llvm.x86.sse2.cvtsi642sd(<2 x double> %a0, i64 %a1) ; <<2 x double>> [#uses=1] |
| ret <2 x double> %res |
| } |
| declare <2 x double> @llvm.x86.sse2.cvtsi642sd(<2 x double>, i64) nounwind readnone |
| |
| define i64 @test_x86_avx512_cvttsd2si64(<2 x double> %a0) { |
| ; CHECK-LABEL: test_x86_avx512_cvttsd2si64: |
| ; CHECK: ## %bb.0: |
| ; CHECK-NEXT: vcvttsd2si %xmm0, %rcx |
| ; CHECK-NEXT: vcvttsd2si {sae}, %xmm0, %rax |
| ; CHECK-NEXT: addq %rcx, %rax |
| ; CHECK-NEXT: retq |
| %res0 = call i64 @llvm.x86.avx512.cvttsd2si64(<2 x double> %a0, i32 4) ; |
| %res1 = call i64 @llvm.x86.avx512.cvttsd2si64(<2 x double> %a0, i32 8) ; |
| %res2 = add i64 %res0, %res1 |
| ret i64 %res2 |
| } |
| declare i64 @llvm.x86.avx512.cvttsd2si64(<2 x double>, i32) nounwind readnone |
| |
| define i32 @test_x86_avx512_cvttsd2usi(<2 x double> %a0) { |
| ; CHECK-LABEL: test_x86_avx512_cvttsd2usi: |
| ; CHECK: ## %bb.0: |
| ; CHECK-NEXT: vcvttsd2usi %xmm0, %ecx |
| ; CHECK-NEXT: vcvttsd2usi {sae}, %xmm0, %eax |
| ; CHECK-NEXT: addl %ecx, %eax |
| ; CHECK-NEXT: retq |
| %res0 = call i32 @llvm.x86.avx512.cvttsd2usi(<2 x double> %a0, i32 4) ; |
| %res1 = call i32 @llvm.x86.avx512.cvttsd2usi(<2 x double> %a0, i32 8) ; |
| %res2 = add i32 %res0, %res1 |
| ret i32 %res2 |
| } |
| declare i32 @llvm.x86.avx512.cvttsd2usi(<2 x double>, i32) nounwind readnone |
| |
| define i32 @test_x86_avx512_cvttsd2si(<2 x double> %a0) { |
| ; CHECK-LABEL: test_x86_avx512_cvttsd2si: |
| ; CHECK: ## %bb.0: |
| ; CHECK-NEXT: vcvttsd2si %xmm0, %ecx |
| ; CHECK-NEXT: vcvttsd2si {sae}, %xmm0, %eax |
| ; CHECK-NEXT: addl %ecx, %eax |
| ; CHECK-NEXT: retq |
| %res0 = call i32 @llvm.x86.avx512.cvttsd2si(<2 x double> %a0, i32 4) ; |
| %res1 = call i32 @llvm.x86.avx512.cvttsd2si(<2 x double> %a0, i32 8) ; |
| %res2 = add i32 %res0, %res1 |
| ret i32 %res2 |
| } |
| declare i32 @llvm.x86.avx512.cvttsd2si(<2 x double>, i32) nounwind readnone |
| |
| |
| |
| define i64 @test_x86_avx512_cvttsd2usi64(<2 x double> %a0) { |
| ; CHECK-LABEL: test_x86_avx512_cvttsd2usi64: |
| ; CHECK: ## %bb.0: |
| ; CHECK-NEXT: vcvttsd2usi %xmm0, %rcx |
| ; CHECK-NEXT: vcvttsd2usi {sae}, %xmm0, %rax |
| ; CHECK-NEXT: addq %rcx, %rax |
| ; CHECK-NEXT: retq |
| %res0 = call i64 @llvm.x86.avx512.cvttsd2usi64(<2 x double> %a0, i32 4) ; |
| %res1 = call i64 @llvm.x86.avx512.cvttsd2usi64(<2 x double> %a0, i32 8) ; |
| %res2 = add i64 %res0, %res1 |
| ret i64 %res2 |
| } |
| declare i64 @llvm.x86.avx512.cvttsd2usi64(<2 x double>, i32) nounwind readnone |
| |
| define i64 @test_x86_sse_cvtss2si64(<4 x float> %a0) { |
| ; CHECK-LABEL: test_x86_sse_cvtss2si64: |
| ; CHECK: ## %bb.0: |
| ; CHECK-NEXT: vcvtss2si %xmm0, %rax |
| ; CHECK-NEXT: retq |
| %res = call i64 @llvm.x86.sse.cvtss2si64(<4 x float> %a0) ; <i64> [#uses=1] |
| ret i64 %res |
| } |
| declare i64 @llvm.x86.sse.cvtss2si64(<4 x float>) nounwind readnone |
| |
| |
| define <4 x float> @test_x86_sse_cvtsi642ss(<4 x float> %a0, i64 %a1) { |
| ; CHECK-LABEL: test_x86_sse_cvtsi642ss: |
| ; CHECK: ## %bb.0: |
| ; CHECK-NEXT: vcvtsi2ssq %rdi, %xmm0, %xmm0 |
| ; CHECK-NEXT: retq |
| %res = call <4 x float> @llvm.x86.sse.cvtsi642ss(<4 x float> %a0, i64 %a1) ; <<4 x float>> [#uses=1] |
| ret <4 x float> %res |
| } |
| declare <4 x float> @llvm.x86.sse.cvtsi642ss(<4 x float>, i64) nounwind readnone |
| |
| |
| define i32 @test_x86_avx512_cvttss2si(<4 x float> %a0) { |
| ; CHECK-LABEL: test_x86_avx512_cvttss2si: |
| ; CHECK: ## %bb.0: |
| ; CHECK-NEXT: vcvttss2si {sae}, %xmm0, %ecx |
| ; CHECK-NEXT: vcvttss2si %xmm0, %eax |
| ; CHECK-NEXT: addl %ecx, %eax |
| ; CHECK-NEXT: retq |
| %res0 = call i32 @llvm.x86.avx512.cvttss2si(<4 x float> %a0, i32 8) ; |
| %res1 = call i32 @llvm.x86.avx512.cvttss2si(<4 x float> %a0, i32 4) ; |
| %res2 = add i32 %res0, %res1 |
| ret i32 %res2 |
| } |
| declare i32 @llvm.x86.avx512.cvttss2si(<4 x float>, i32) nounwind readnone |
| |
| define i32 @test_x86_avx512_cvttss2si_load(<4 x float>* %a0) { |
| ; CHECK-LABEL: test_x86_avx512_cvttss2si_load: |
| ; CHECK: ## %bb.0: |
| ; CHECK-NEXT: vcvttss2si (%rdi), %eax |
| ; CHECK-NEXT: retq |
| %a1 = load <4 x float>, <4 x float>* %a0 |
| %res = call i32 @llvm.x86.avx512.cvttss2si(<4 x float> %a1, i32 4) ; |
| ret i32 %res |
| } |
| |
| define i64 @test_x86_avx512_cvttss2si64(<4 x float> %a0) { |
| ; CHECK-LABEL: test_x86_avx512_cvttss2si64: |
| ; CHECK: ## %bb.0: |
| ; CHECK-NEXT: vcvttss2si %xmm0, %rcx |
| ; CHECK-NEXT: vcvttss2si {sae}, %xmm0, %rax |
| ; CHECK-NEXT: addq %rcx, %rax |
| ; CHECK-NEXT: retq |
| %res0 = call i64 @llvm.x86.avx512.cvttss2si64(<4 x float> %a0, i32 4) ; |
| %res1 = call i64 @llvm.x86.avx512.cvttss2si64(<4 x float> %a0, i32 8) ; |
| %res2 = add i64 %res0, %res1 |
| ret i64 %res2 |
| } |
| declare i64 @llvm.x86.avx512.cvttss2si64(<4 x float>, i32) nounwind readnone |
| |
| define i32 @test_x86_avx512_cvttss2usi(<4 x float> %a0) { |
| ; CHECK-LABEL: test_x86_avx512_cvttss2usi: |
| ; CHECK: ## %bb.0: |
| ; CHECK-NEXT: vcvttss2usi {sae}, %xmm0, %ecx |
| ; CHECK-NEXT: vcvttss2usi %xmm0, %eax |
| ; CHECK-NEXT: addl %ecx, %eax |
| ; CHECK-NEXT: retq |
| %res0 = call i32 @llvm.x86.avx512.cvttss2usi(<4 x float> %a0, i32 8) ; |
| %res1 = call i32 @llvm.x86.avx512.cvttss2usi(<4 x float> %a0, i32 4) ; |
| %res2 = add i32 %res0, %res1 |
| ret i32 %res2 |
| } |
| declare i32 @llvm.x86.avx512.cvttss2usi(<4 x float>, i32) nounwind readnone |
| |
| define i64 @test_x86_avx512_cvttss2usi64(<4 x float> %a0) { |
| ; CHECK-LABEL: test_x86_avx512_cvttss2usi64: |
| ; CHECK: ## %bb.0: |
| ; CHECK-NEXT: vcvttss2usi %xmm0, %rcx |
| ; CHECK-NEXT: vcvttss2usi {sae}, %xmm0, %rax |
| ; CHECK-NEXT: addq %rcx, %rax |
| ; CHECK-NEXT: retq |
| %res0 = call i64 @llvm.x86.avx512.cvttss2usi64(<4 x float> %a0, i32 4) ; |
| %res1 = call i64 @llvm.x86.avx512.cvttss2usi64(<4 x float> %a0, i32 8) ; |
| %res2 = add i64 %res0, %res1 |
| ret i64 %res2 |
| } |
| declare i64 @llvm.x86.avx512.cvttss2usi64(<4 x float>, i32) nounwind readnone |
| |
| define i64 @test_x86_avx512_cvtsd2usi64(<2 x double> %a0) { |
| ; CHECK-LABEL: test_x86_avx512_cvtsd2usi64: |
| ; CHECK: ## %bb.0: |
| ; CHECK-NEXT: vcvtsd2usi %xmm0, %rax |
| ; CHECK-NEXT: vcvtsd2usi {rz-sae}, %xmm0, %rcx |
| ; CHECK-NEXT: addq %rax, %rcx |
| ; CHECK-NEXT: vcvtsd2usi {rd-sae}, %xmm0, %rax |
| ; CHECK-NEXT: addq %rcx, %rax |
| ; CHECK-NEXT: retq |
| |
| %res = call i64 @llvm.x86.avx512.vcvtsd2usi64(<2 x double> %a0, i32 4) |
| %res1 = call i64 @llvm.x86.avx512.vcvtsd2usi64(<2 x double> %a0, i32 3) |
| %res2 = call i64 @llvm.x86.avx512.vcvtsd2usi64(<2 x double> %a0, i32 1) |
| %res3 = add i64 %res, %res1 |
| %res4 = add i64 %res3, %res2 |
| ret i64 %res4 |
| } |
| declare i64 @llvm.x86.avx512.vcvtsd2usi64(<2 x double>, i32) nounwind readnone |
| |
| define i64 @test_x86_avx512_cvtsd2si64(<2 x double> %a0) { |
| ; CHECK-LABEL: test_x86_avx512_cvtsd2si64: |
| ; CHECK: ## %bb.0: |
| ; CHECK-NEXT: vcvtsd2si %xmm0, %rax |
| ; CHECK-NEXT: vcvtsd2si {rz-sae}, %xmm0, %rcx |
| ; CHECK-NEXT: addq %rax, %rcx |
| ; CHECK-NEXT: vcvtsd2si {rd-sae}, %xmm0, %rax |
| ; CHECK-NEXT: addq %rcx, %rax |
| ; CHECK-NEXT: retq |
| |
| %res = call i64 @llvm.x86.avx512.vcvtsd2si64(<2 x double> %a0, i32 4) |
| %res1 = call i64 @llvm.x86.avx512.vcvtsd2si64(<2 x double> %a0, i32 3) |
| %res2 = call i64 @llvm.x86.avx512.vcvtsd2si64(<2 x double> %a0, i32 1) |
| %res3 = add i64 %res, %res1 |
| %res4 = add i64 %res3, %res2 |
| ret i64 %res4 |
| } |
| declare i64 @llvm.x86.avx512.vcvtsd2si64(<2 x double>, i32) nounwind readnone |
| |
| define i64 @test_x86_avx512_cvtss2usi64(<4 x float> %a0) { |
| ; CHECK-LABEL: test_x86_avx512_cvtss2usi64: |
| ; CHECK: ## %bb.0: |
| ; CHECK-NEXT: vcvtss2usi %xmm0, %rax |
| ; CHECK-NEXT: vcvtss2usi {rz-sae}, %xmm0, %rcx |
| ; CHECK-NEXT: addq %rax, %rcx |
| ; CHECK-NEXT: vcvtss2usi {rd-sae}, %xmm0, %rax |
| ; CHECK-NEXT: addq %rcx, %rax |
| ; CHECK-NEXT: retq |
| |
| %res = call i64 @llvm.x86.avx512.vcvtss2usi64(<4 x float> %a0, i32 4) |
| %res1 = call i64 @llvm.x86.avx512.vcvtss2usi64(<4 x float> %a0, i32 3) |
| %res2 = call i64 @llvm.x86.avx512.vcvtss2usi64(<4 x float> %a0, i32 1) |
| %res3 = add i64 %res, %res1 |
| %res4 = add i64 %res3, %res2 |
| ret i64 %res4 |
| } |
| declare i64 @llvm.x86.avx512.vcvtss2usi64(<4 x float>, i32) nounwind readnone |
| |
| define i64 @test_x86_avx512_cvtss2si64(<4 x float> %a0) { |
| ; CHECK-LABEL: test_x86_avx512_cvtss2si64: |
| ; CHECK: ## %bb.0: |
| ; CHECK-NEXT: vcvtss2si %xmm0, %rax |
| ; CHECK-NEXT: vcvtss2si {rz-sae}, %xmm0, %rcx |
| ; CHECK-NEXT: addq %rax, %rcx |
| ; CHECK-NEXT: vcvtss2si {rd-sae}, %xmm0, %rax |
| ; CHECK-NEXT: addq %rcx, %rax |
| ; CHECK-NEXT: retq |
| |
| %res = call i64 @llvm.x86.avx512.vcvtss2si64(<4 x float> %a0, i32 4) |
| %res1 = call i64 @llvm.x86.avx512.vcvtss2si64(<4 x float> %a0, i32 3) |
| %res2 = call i64 @llvm.x86.avx512.vcvtss2si64(<4 x float> %a0, i32 1) |
| %res3 = add i64 %res, %res1 |
| %res4 = add i64 %res3, %res2 |
| ret i64 %res4 |
| } |
| declare i64 @llvm.x86.avx512.vcvtss2si64(<4 x float>, i32) nounwind readnone |
| |
| define i32 @test_x86_avx512_cvtsd2usi32(<2 x double> %a0) { |
| ; CHECK-LABEL: test_x86_avx512_cvtsd2usi32: |
| ; CHECK: ## %bb.0: |
| ; CHECK-NEXT: vcvtsd2usi %xmm0, %eax |
| ; CHECK-NEXT: vcvtsd2usi {rz-sae}, %xmm0, %ecx |
| ; CHECK-NEXT: addl %eax, %ecx |
| ; CHECK-NEXT: vcvtsd2usi {rd-sae}, %xmm0, %eax |
| ; CHECK-NEXT: addl %ecx, %eax |
| ; CHECK-NEXT: retq |
| |
| %res = call i32 @llvm.x86.avx512.vcvtsd2usi32(<2 x double> %a0, i32 4) |
| %res1 = call i32 @llvm.x86.avx512.vcvtsd2usi32(<2 x double> %a0, i32 3) |
| %res2 = call i32 @llvm.x86.avx512.vcvtsd2usi32(<2 x double> %a0, i32 1) |
| %res3 = add i32 %res, %res1 |
| %res4 = add i32 %res3, %res2 |
| ret i32 %res4 |
| } |
| declare i32 @llvm.x86.avx512.vcvtsd2usi32(<2 x double>, i32) nounwind readnone |
| |
| define i32 @test_x86_avx512_cvtsd2si32(<2 x double> %a0) { |
| ; CHECK-LABEL: test_x86_avx512_cvtsd2si32: |
| ; CHECK: ## %bb.0: |
| ; CHECK-NEXT: vcvtsd2si %xmm0, %eax |
| ; CHECK-NEXT: vcvtsd2si {rz-sae}, %xmm0, %ecx |
| ; CHECK-NEXT: addl %eax, %ecx |
| ; CHECK-NEXT: vcvtsd2si {rd-sae}, %xmm0, %eax |
| ; CHECK-NEXT: addl %ecx, %eax |
| ; CHECK-NEXT: retq |
| |
| %res = call i32 @llvm.x86.avx512.vcvtsd2si32(<2 x double> %a0, i32 4) |
| %res1 = call i32 @llvm.x86.avx512.vcvtsd2si32(<2 x double> %a0, i32 3) |
| %res2 = call i32 @llvm.x86.avx512.vcvtsd2si32(<2 x double> %a0, i32 1) |
| %res3 = add i32 %res, %res1 |
| %res4 = add i32 %res3, %res2 |
| ret i32 %res4 |
| } |
| declare i32 @llvm.x86.avx512.vcvtsd2si32(<2 x double>, i32) nounwind readnone |
| |
| define i32 @test_x86_avx512_cvtss2usi32(<4 x float> %a0) { |
| ; CHECK-LABEL: test_x86_avx512_cvtss2usi32: |
| ; CHECK: ## %bb.0: |
| ; CHECK-NEXT: vcvtss2usi %xmm0, %eax |
| ; CHECK-NEXT: vcvtss2usi {rz-sae}, %xmm0, %ecx |
| ; CHECK-NEXT: addl %eax, %ecx |
| ; CHECK-NEXT: vcvtss2usi {rd-sae}, %xmm0, %eax |
| ; CHECK-NEXT: addl %ecx, %eax |
| ; CHECK-NEXT: retq |
| |
| %res = call i32 @llvm.x86.avx512.vcvtss2usi32(<4 x float> %a0, i32 4) |
| %res1 = call i32 @llvm.x86.avx512.vcvtss2usi32(<4 x float> %a0, i32 3) |
| %res2 = call i32 @llvm.x86.avx512.vcvtss2usi32(<4 x float> %a0, i32 1) |
| %res3 = add i32 %res, %res1 |
| %res4 = add i32 %res3, %res2 |
| ret i32 %res4 |
| } |
| declare i32 @llvm.x86.avx512.vcvtss2usi32(<4 x float>, i32) nounwind readnone |
| |
| define i32 @test_x86_avx512_cvtss2si32(<4 x float> %a0) { |
| ; CHECK-LABEL: test_x86_avx512_cvtss2si32: |
| ; CHECK: ## %bb.0: |
| ; CHECK-NEXT: vcvtss2si %xmm0, %eax |
| ; CHECK-NEXT: vcvtss2si {rz-sae}, %xmm0, %ecx |
| ; CHECK-NEXT: addl %eax, %ecx |
| ; CHECK-NEXT: vcvtss2si {rd-sae}, %xmm0, %eax |
| ; CHECK-NEXT: addl %ecx, %eax |
| ; CHECK-NEXT: retq |
| |
| %res = call i32 @llvm.x86.avx512.vcvtss2si32(<4 x float> %a0, i32 4) |
| %res1 = call i32 @llvm.x86.avx512.vcvtss2si32(<4 x float> %a0, i32 3) |
| %res2 = call i32 @llvm.x86.avx512.vcvtss2si32(<4 x float> %a0, i32 1) |
| %res3 = add i32 %res, %res1 |
| %res4 = add i32 %res3, %res2 |
| ret i32 %res4 |
| } |
| declare i32 @llvm.x86.avx512.vcvtss2si32(<4 x float>, i32) nounwind readnone |
| |
| define <16 x float> @test_x86_vcvtph2ps_512(<16 x i16> %a0) { |
| ; CHECK-LABEL: test_x86_vcvtph2ps_512: |
| ; CHECK: ## %bb.0: |
| ; CHECK-NEXT: vcvtph2ps %ymm0, %zmm0 |
| ; CHECK-NEXT: retq |
| %res = call <16 x float> @llvm.x86.avx512.mask.vcvtph2ps.512(<16 x i16> %a0, <16 x float> zeroinitializer, i16 -1, i32 4) |
| ret <16 x float> %res |
| } |
| |
| define <16 x float> @test_x86_vcvtph2ps_512_sae(<16 x i16> %a0) { |
| ; CHECK-LABEL: test_x86_vcvtph2ps_512_sae: |
| ; CHECK: ## %bb.0: |
| ; CHECK-NEXT: vcvtph2ps {sae}, %ymm0, %zmm0 |
| ; CHECK-NEXT: retq |
| %res = call <16 x float> @llvm.x86.avx512.mask.vcvtph2ps.512(<16 x i16> %a0, <16 x float> zeroinitializer, i16 -1, i32 8) |
| ret <16 x float> %res |
| } |
| |
| define <16 x float> @test_x86_vcvtph2ps_512_rrk(<16 x i16> %a0,<16 x float> %a1, i16 %mask) { |
| ; CHECK-LABEL: test_x86_vcvtph2ps_512_rrk: |
| ; CHECK: ## %bb.0: |
| ; CHECK-NEXT: kmovw %edi, %k1 |
| ; CHECK-NEXT: vcvtph2ps %ymm0, %zmm1 {%k1} |
| ; CHECK-NEXT: vmovaps %zmm1, %zmm0 |
| ; CHECK-NEXT: retq |
| %res = call <16 x float> @llvm.x86.avx512.mask.vcvtph2ps.512(<16 x i16> %a0, <16 x float> %a1, i16 %mask, i32 4) |
| ret <16 x float> %res |
| } |
| |
| define <16 x float> @test_x86_vcvtph2ps_512_sae_rrkz(<16 x i16> %a0, i16 %mask) { |
| ; CHECK-LABEL: test_x86_vcvtph2ps_512_sae_rrkz: |
| ; CHECK: ## %bb.0: |
| ; CHECK-NEXT: kmovw %edi, %k1 |
| ; CHECK-NEXT: vcvtph2ps {sae}, %ymm0, %zmm0 {%k1} {z} |
| ; CHECK-NEXT: retq |
| %res = call <16 x float> @llvm.x86.avx512.mask.vcvtph2ps.512(<16 x i16> %a0, <16 x float> zeroinitializer, i16 %mask, i32 8) |
| ret <16 x float> %res |
| } |
| |
| define <16 x float> @test_x86_vcvtph2ps_512_rrkz(<16 x i16> %a0, i16 %mask) { |
| ; CHECK-LABEL: test_x86_vcvtph2ps_512_rrkz: |
| ; CHECK: ## %bb.0: |
| ; CHECK-NEXT: kmovw %edi, %k1 |
| ; CHECK-NEXT: vcvtph2ps %ymm0, %zmm0 {%k1} {z} |
| ; CHECK-NEXT: retq |
| %res = call <16 x float> @llvm.x86.avx512.mask.vcvtph2ps.512(<16 x i16> %a0, <16 x float> zeroinitializer, i16 %mask, i32 4) |
| ret <16 x float> %res |
| } |
| |
| declare <16 x float> @llvm.x86.avx512.mask.vcvtph2ps.512(<16 x i16>, <16 x float>, i16, i32) nounwind readonly |
| |
| define <16 x i16> @test_x86_vcvtps2ph_256(<16 x float> %a0, <16 x i16> %src, i16 %mask, <16 x i16> * %dst) { |
| ; CHECK-LABEL: test_x86_vcvtps2ph_256: |
| ; CHECK: ## %bb.0: |
| ; CHECK-NEXT: kmovw %edi, %k1 |
| ; CHECK-NEXT: vcvtps2ph $2, %zmm0, %ymm2 {%k1} {z} |
| ; CHECK-NEXT: vcvtps2ph $2, %zmm0, %ymm1 {%k1} |
| ; CHECK-NEXT: vpaddw %ymm1, %ymm2, %ymm1 |
| ; CHECK-NEXT: vcvtps2ph $2, %zmm0, (%rsi) |
| ; CHECK-NEXT: vmovdqa %ymm1, %ymm0 |
| ; CHECK-NEXT: retq |
| %res1 = call <16 x i16> @llvm.x86.avx512.mask.vcvtps2ph.512(<16 x float> %a0, i32 2, <16 x i16> zeroinitializer, i16 -1) |
| %res2 = call <16 x i16> @llvm.x86.avx512.mask.vcvtps2ph.512(<16 x float> %a0, i32 2, <16 x i16> zeroinitializer, i16 %mask) |
| %res3 = call <16 x i16> @llvm.x86.avx512.mask.vcvtps2ph.512(<16 x float> %a0, i32 2, <16 x i16> %src, i16 %mask) |
| store <16 x i16> %res1, <16 x i16> * %dst |
| %res = add <16 x i16> %res2, %res3 |
| ret <16 x i16> %res |
| } |
| |
| declare <16 x i16> @llvm.x86.avx512.mask.vcvtps2ph.512(<16 x float>, i32, <16 x i16>, i16) nounwind readonly |
| |
| define i16 @test_cmpps(<16 x float> %a, <16 x float> %b) { |
| ; CHECK-LABEL: test_cmpps: |
| ; CHECK: ## %bb.0: |
| ; CHECK-NEXT: vcmpleps {sae}, %zmm1, %zmm0, %k0 |
| ; CHECK-NEXT: kmovw %k0, %eax |
| ; CHECK-NEXT: ## kill: def $ax killed $ax killed $eax |
| ; CHECK-NEXT: vzeroupper |
| ; CHECK-NEXT: retq |
| %res = call <16 x i1> @llvm.x86.avx512.cmp.ps.512(<16 x float> %a, <16 x float> %b, i32 2, i32 8) |
| %1 = bitcast <16 x i1> %res to i16 |
| ret i16 %1 |
| } |
| declare <16 x i1> @llvm.x86.avx512.cmp.ps.512(<16 x float>, <16 x float>, i32, i32) |
| |
| define i8 @test_cmppd(<8 x double> %a, <8 x double> %b) { |
| ; CHECK-LABEL: test_cmppd: |
| ; CHECK: ## %bb.0: |
| ; CHECK-NEXT: vcmpneqpd %zmm1, %zmm0, %k0 |
| ; CHECK-NEXT: kmovw %k0, %eax |
| ; CHECK-NEXT: ## kill: def $al killed $al killed $eax |
| ; CHECK-NEXT: vzeroupper |
| ; CHECK-NEXT: retq |
| %res = call <8 x i1> @llvm.x86.avx512.cmp.pd.512(<8 x double> %a, <8 x double> %b, i32 4, i32 4) |
| %1 = bitcast <8 x i1> %res to i8 |
| ret i8 %1 |
| } |
| declare <8 x i1> @llvm.x86.avx512.cmp.pd.512(<8 x double>, <8 x double>, i32, i32) |
| |
| ; Function Attrs: nounwind readnone |
| |
| ; fp min - max |
| define <8 x double> @test_vmaxpd(<8 x double> %a0, <8 x double> %a1) { |
| ; CHECK-LABEL: test_vmaxpd: |
| ; CHECK: ## %bb.0: |
| ; CHECK-NEXT: vmaxpd %zmm1, %zmm0, %zmm0 |
| ; CHECK-NEXT: retq |
| %1 = call <8 x double> @llvm.x86.avx512.max.pd.512(<8 x double> %a0, <8 x double> %a1, i32 4) |
| ret <8 x double> %1 |
| } |
| declare <8 x double> @llvm.x86.avx512.max.pd.512(<8 x double>, <8 x double>, i32) |
| |
| define <8 x double> @test_vminpd(<8 x double> %a0, <8 x double> %a1) { |
| ; CHECK-LABEL: test_vminpd: |
| ; CHECK: ## %bb.0: |
| ; CHECK-NEXT: vminpd %zmm1, %zmm0, %zmm0 |
| ; CHECK-NEXT: retq |
| %1 = call <8 x double> @llvm.x86.avx512.min.pd.512(<8 x double> %a0, <8 x double> %a1, i32 4) |
| ret <8 x double> %1 |
| } |
| declare <8 x double> @llvm.x86.avx512.min.pd.512(<8 x double>, <8 x double>, i32) |
| |
| define void @test_mask_store_ss(i8* %ptr, <4 x float> %data, i8 %mask) { |
| ; CHECK-LABEL: test_mask_store_ss: |
| ; CHECK: ## %bb.0: |
| ; CHECK-NEXT: kmovw %esi, %k1 |
| ; CHECK-NEXT: vmovss %xmm0, (%rdi) {%k1} |
| ; CHECK-NEXT: retq |
| %1 = and i8 %mask, 1 |
| %2 = bitcast i8* %ptr to <4 x float>* |
| %3 = bitcast i8 %1 to <8 x i1> |
| %extract = shufflevector <8 x i1> %3, <8 x i1> %3, <4 x i32> <i32 0, i32 1, i32 2, i32 3> |
| call void @llvm.masked.store.v4f32.p0v4f32(<4 x float> %data, <4 x float>* %2, i32 1, <4 x i1> %extract) |
| ret void |
| } |
| declare void @llvm.masked.store.v4f32.p0v4f32(<4 x float>, <4 x float>*, i32, <4 x i1>) #1 |
| |
| |
| declare <16 x float> @llvm.x86.avx512.sub.ps.512(<16 x float>, <16 x float>, i32) |
| declare <16 x float> @llvm.x86.avx512.mul.ps.512(<16 x float>, <16 x float>, i32) |
| declare <8 x double> @llvm.x86.avx512.mul.pd.512(<8 x double>, <8 x double>, i32) |
| |
| define <16 x float> @test_vsubps_rn(<16 x float> %a0, <16 x float> %a1) { |
| ; CHECK-LABEL: test_vsubps_rn: |
| ; CHECK: ## %bb.0: |
| ; CHECK-NEXT: vsubps {rn-sae}, %zmm1, %zmm0, %zmm0 |
| ; CHECK-NEXT: retq |
| %1 = call <16 x float> @llvm.x86.avx512.sub.ps.512(<16 x float> %a0, <16 x float> %a1, i32 0) |
| ret <16 x float> %1 |
| } |
| |
| define <16 x float> @test_vsubps_rd(<16 x float> %a0, <16 x float> %a1) { |
| ; CHECK-LABEL: test_vsubps_rd: |
| ; CHECK: ## %bb.0: |
| ; CHECK-NEXT: vsubps {rd-sae}, %zmm1, %zmm0, %zmm0 |
| ; CHECK-NEXT: retq |
| %1 = call <16 x float> @llvm.x86.avx512.sub.ps.512(<16 x float> %a0, <16 x float> %a1, i32 1) |
| ret <16 x float> %1 |
| } |
| |
| define <16 x float> @test_vsubps_ru(<16 x float> %a0, <16 x float> %a1) { |
| ; CHECK-LABEL: test_vsubps_ru: |
| ; CHECK: ## %bb.0: |
| ; CHECK-NEXT: vsubps {ru-sae}, %zmm1, %zmm0, %zmm0 |
| ; CHECK-NEXT: retq |
| %1 = call <16 x float> @llvm.x86.avx512.sub.ps.512(<16 x float> %a0, <16 x float> %a1, i32 2) |
| ret <16 x float> %1 |
| } |
| |
| define <16 x float> @test_vsubps_rz(<16 x float> %a0, <16 x float> %a1) { |
| ; CHECK-LABEL: test_vsubps_rz: |
| ; CHECK: ## %bb.0: |
| ; CHECK-NEXT: vsubps {rz-sae}, %zmm1, %zmm0, %zmm0 |
| ; CHECK-NEXT: retq |
| %1 = call <16 x float> @llvm.x86.avx512.sub.ps.512(<16 x float> %a0, <16 x float> %a1, i32 3) |
| ret <16 x float> %1 |
| } |
| |
| define <16 x float> @test_vmulps_rn(<16 x float> %a0, <16 x float> %a1) { |
| ; CHECK-LABEL: test_vmulps_rn: |
| ; CHECK: ## %bb.0: |
| ; CHECK-NEXT: vmulps {rn-sae}, %zmm1, %zmm0, %zmm0 |
| ; CHECK-NEXT: retq |
| %1 = call <16 x float> @llvm.x86.avx512.mul.ps.512(<16 x float> %a0, <16 x float> %a1, i32 0) |
| ret <16 x float> %1 |
| } |
| |
| define <16 x float> @test_vmulps_rd(<16 x float> %a0, <16 x float> %a1) { |
| ; CHECK-LABEL: test_vmulps_rd: |
| ; CHECK: ## %bb.0: |
| ; CHECK-NEXT: vmulps {rd-sae}, %zmm1, %zmm0, %zmm0 |
| ; CHECK-NEXT: retq |
| %1 = call <16 x float> @llvm.x86.avx512.mul.ps.512(<16 x float> %a0, <16 x float> %a1, i32 1) |
| ret <16 x float> %1 |
| } |
| |
| define <16 x float> @test_vmulps_ru(<16 x float> %a0, <16 x float> %a1) { |
| ; CHECK-LABEL: test_vmulps_ru: |
| ; CHECK: ## %bb.0: |
| ; CHECK-NEXT: vmulps {ru-sae}, %zmm1, %zmm0, %zmm0 |
| ; CHECK-NEXT: retq |
| %1 = call <16 x float> @llvm.x86.avx512.mul.ps.512(<16 x float> %a0, <16 x float> %a1, i32 2) |
| ret <16 x float> %1 |
| } |
| |
| define <16 x float> @test_vmulps_rz(<16 x float> %a0, <16 x float> %a1) { |
| ; CHECK-LABEL: test_vmulps_rz: |
| ; CHECK: ## %bb.0: |
| ; CHECK-NEXT: vmulps {rz-sae}, %zmm1, %zmm0, %zmm0 |
| ; CHECK-NEXT: retq |
| %1 = call <16 x float> @llvm.x86.avx512.mul.ps.512(<16 x float> %a0, <16 x float> %a1, i32 3) |
| ret <16 x float> %1 |
| } |
| |
| ;; mask float |
| define <16 x float> @test_vmulps_mask_rn(<16 x float> %a0, <16 x float> %a1, i16 %mask) { |
| ; CHECK-LABEL: test_vmulps_mask_rn: |
| ; CHECK: ## %bb.0: |
| ; CHECK-NEXT: kmovw %edi, %k1 |
| ; CHECK-NEXT: vmulps {rn-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z} |
| ; CHECK-NEXT: retq |
| %1 = call <16 x float> @llvm.x86.avx512.mul.ps.512(<16 x float> %a0, <16 x float> %a1, i32 0) |
| %2 = bitcast i16 %mask to <16 x i1> |
| %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> zeroinitializer |
| ret <16 x float> %3 |
| } |
| |
| define <16 x float> @test_vmulps_mask_rd(<16 x float> %a0, <16 x float> %a1, i16 %mask) { |
| ; CHECK-LABEL: test_vmulps_mask_rd: |
| ; CHECK: ## %bb.0: |
| ; CHECK-NEXT: kmovw %edi, %k1 |
| ; CHECK-NEXT: vmulps {rd-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z} |
| ; CHECK-NEXT: retq |
| %1 = call <16 x float> @llvm.x86.avx512.mul.ps.512(<16 x float> %a0, <16 x float> %a1, i32 1) |
| %2 = bitcast i16 %mask to <16 x i1> |
| %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> zeroinitializer |
| ret <16 x float> %3 |
| } |
| |
| define <16 x float> @test_vmulps_mask_ru(<16 x float> %a0, <16 x float> %a1, i16 %mask) { |
| ; CHECK-LABEL: test_vmulps_mask_ru: |
| ; CHECK: ## %bb.0: |
| ; CHECK-NEXT: kmovw %edi, %k1 |
| ; CHECK-NEXT: vmulps {ru-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z} |
| ; CHECK-NEXT: retq |
| %1 = call <16 x float> @llvm.x86.avx512.mul.ps.512(<16 x float> %a0, <16 x float> %a1, i32 2) |
| %2 = bitcast i16 %mask to <16 x i1> |
| %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> zeroinitializer |
| ret <16 x float> %3 |
| } |
| |
| define <16 x float> @test_vmulps_mask_rz(<16 x float> %a0, <16 x float> %a1, i16 %mask) { |
| ; CHECK-LABEL: test_vmulps_mask_rz: |
| ; CHECK: ## %bb.0: |
| ; CHECK-NEXT: kmovw %edi, %k1 |
| ; CHECK-NEXT: vmulps {rz-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z} |
| ; CHECK-NEXT: retq |
| %1 = call <16 x float> @llvm.x86.avx512.mul.ps.512(<16 x float> %a0, <16 x float> %a1, i32 3) |
| %2 = bitcast i16 %mask to <16 x i1> |
| %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> zeroinitializer |
| ret <16 x float> %3 |
| } |
| |
| ;; With Passthru value |
| define <16 x float> @test_vmulps_mask_passthru_rn(<16 x float> %a0, <16 x float> %a1, <16 x float> %passthru, i16 %mask) { |
| ; CHECK-LABEL: test_vmulps_mask_passthru_rn: |
| ; CHECK: ## %bb.0: |
| ; CHECK-NEXT: kmovw %edi, %k1 |
| ; CHECK-NEXT: vmulps {rn-sae}, %zmm1, %zmm0, %zmm2 {%k1} |
| ; CHECK-NEXT: vmovaps %zmm2, %zmm0 |
| ; CHECK-NEXT: retq |
| %1 = call <16 x float> @llvm.x86.avx512.mul.ps.512(<16 x float> %a0, <16 x float> %a1, i32 0) |
| %2 = bitcast i16 %mask to <16 x i1> |
| %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> %passthru |
| ret <16 x float> %3 |
| } |
| |
| define <16 x float> @test_vmulps_mask_passthru_rd(<16 x float> %a0, <16 x float> %a1, <16 x float> %passthru, i16 %mask) { |
| ; CHECK-LABEL: test_vmulps_mask_passthru_rd: |
| ; CHECK: ## %bb.0: |
| ; CHECK-NEXT: kmovw %edi, %k1 |
| ; CHECK-NEXT: vmulps {rd-sae}, %zmm1, %zmm0, %zmm2 {%k1} |
| ; CHECK-NEXT: vmovaps %zmm2, %zmm0 |
| ; CHECK-NEXT: retq |
| %1 = call <16 x float> @llvm.x86.avx512.mul.ps.512(<16 x float> %a0, <16 x float> %a1, i32 1) |
| %2 = bitcast i16 %mask to <16 x i1> |
| %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> %passthru |
| ret <16 x float> %3 |
| } |
| |
| define <16 x float> @test_vmulps_mask_passthru_ru(<16 x float> %a0, <16 x float> %a1, <16 x float> %passthru, i16 %mask) { |
| ; CHECK-LABEL: test_vmulps_mask_passthru_ru: |
| ; CHECK: ## %bb.0: |
| ; CHECK-NEXT: kmovw %edi, %k1 |
| ; CHECK-NEXT: vmulps {ru-sae}, %zmm1, %zmm0, %zmm2 {%k1} |
| ; CHECK-NEXT: vmovaps %zmm2, %zmm0 |
| ; CHECK-NEXT: retq |
| %1 = call <16 x float> @llvm.x86.avx512.mul.ps.512(<16 x float> %a0, <16 x float> %a1, i32 2) |
| %2 = bitcast i16 %mask to <16 x i1> |
| %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> %passthru |
| ret <16 x float> %3 |
| } |
| |
| define <16 x float> @test_vmulps_mask_passthru_rz(<16 x float> %a0, <16 x float> %a1, <16 x float> %passthru, i16 %mask) { |
| ; CHECK-LABEL: test_vmulps_mask_passthru_rz: |
| ; CHECK: ## %bb.0: |
| ; CHECK-NEXT: kmovw %edi, %k1 |
| ; CHECK-NEXT: vmulps {rz-sae}, %zmm1, %zmm0, %zmm2 {%k1} |
| ; CHECK-NEXT: vmovaps %zmm2, %zmm0 |
| ; CHECK-NEXT: retq |
| %1 = call <16 x float> @llvm.x86.avx512.mul.ps.512(<16 x float> %a0, <16 x float> %a1, i32 3) |
| %2 = bitcast i16 %mask to <16 x i1> |
| %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> %passthru |
| ret <16 x float> %3 |
| } |
| |
| ;; mask double |
| define <8 x double> @test_vmulpd_mask_rn(<8 x double> %a0, <8 x double> %a1, i8 %mask) { |
| ; CHECK-LABEL: test_vmulpd_mask_rn: |
| ; CHECK: ## %bb.0: |
| ; CHECK-NEXT: kmovw %edi, %k1 |
| ; CHECK-NEXT: vmulpd {rn-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z} |
| ; CHECK-NEXT: retq |
| %1 = call <8 x double> @llvm.x86.avx512.mul.pd.512(<8 x double> %a0, <8 x double> %a1, i32 0) |
| %2 = bitcast i8 %mask to <8 x i1> |
| %3 = select <8 x i1> %2, <8 x double> %1, <8 x double> zeroinitializer |
| ret <8 x double> %3 |
| } |
| |
| define <8 x double> @test_vmulpd_mask_rd(<8 x double> %a0, <8 x double> %a1, i8 %mask) { |
| ; CHECK-LABEL: test_vmulpd_mask_rd: |
| ; CHECK: ## %bb.0: |
| ; CHECK-NEXT: kmovw %edi, %k1 |
| ; CHECK-NEXT: vmulpd {rd-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z} |
| ; CHECK-NEXT: retq |
| %1 = call <8 x double> @llvm.x86.avx512.mul.pd.512(<8 x double> %a0, <8 x double> %a1, i32 1) |
| %2 = bitcast i8 %mask to <8 x i1> |
| %3 = select <8 x i1> %2, <8 x double> %1, <8 x double> zeroinitializer |
| ret <8 x double> %3 |
| } |
| |
| define <8 x double> @test_vmulpd_mask_ru(<8 x double> %a0, <8 x double> %a1, i8 %mask) { |
| ; CHECK-LABEL: test_vmulpd_mask_ru: |
| ; CHECK: ## %bb.0: |
| ; CHECK-NEXT: kmovw %edi, %k1 |
| ; CHECK-NEXT: vmulpd {ru-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z} |
| ; CHECK-NEXT: retq |
| %1 = call <8 x double> @llvm.x86.avx512.mul.pd.512(<8 x double> %a0, <8 x double> %a1, i32 2) |
| %2 = bitcast i8 %mask to <8 x i1> |
| %3 = select <8 x i1> %2, <8 x double> %1, <8 x double> zeroinitializer |
| ret <8 x double> %3 |
| } |
| |
| define <8 x double> @test_vmulpd_mask_rz(<8 x double> %a0, <8 x double> %a1, i8 %mask) { |
| ; CHECK-LABEL: test_vmulpd_mask_rz: |
| ; CHECK: ## %bb.0: |
| ; CHECK-NEXT: kmovw %edi, %k1 |
| ; CHECK-NEXT: vmulpd {rz-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z} |
| ; CHECK-NEXT: retq |
| %1 = call <8 x double> @llvm.x86.avx512.mul.pd.512(<8 x double> %a0, <8 x double> %a1, i32 3) |
| %2 = bitcast i8 %mask to <8 x i1> |
| %3 = select <8 x i1> %2, <8 x double> %1, <8 x double> zeroinitializer |
| ret <8 x double> %3 |
| } |
| |
| define <16 x float> @test_mm512_maskz_add_round_ps_rn_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) { |
| ; CHECK-LABEL: test_mm512_maskz_add_round_ps_rn_sae: |
| ; CHECK: ## %bb.0: |
| ; CHECK-NEXT: kmovw %edi, %k1 |
| ; CHECK-NEXT: vaddps {rn-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z} |
| ; CHECK-NEXT: retq |
| %1 = call <16 x float> @llvm.x86.avx512.add.ps.512(<16 x float> %a0, <16 x float> %a1, i32 0) |
| %2 = bitcast i16 %mask to <16 x i1> |
| %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> zeroinitializer |
| ret <16 x float> %3 |
| } |
| |
| define <16 x float> @test_mm512_maskz_add_round_ps_rd_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) { |
| ; CHECK-LABEL: test_mm512_maskz_add_round_ps_rd_sae: |
| ; CHECK: ## %bb.0: |
| ; CHECK-NEXT: kmovw %edi, %k1 |
| ; CHECK-NEXT: vaddps {rd-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z} |
| ; CHECK-NEXT: retq |
| %1 = call <16 x float> @llvm.x86.avx512.add.ps.512(<16 x float> %a0, <16 x float> %a1, i32 1) |
| %2 = bitcast i16 %mask to <16 x i1> |
| %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> zeroinitializer |
| ret <16 x float> %3 |
| } |
| |
| define <16 x float> @test_mm512_maskz_add_round_ps_ru_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) { |
| ; CHECK-LABEL: test_mm512_maskz_add_round_ps_ru_sae: |
| ; CHECK: ## %bb.0: |
| ; CHECK-NEXT: kmovw %edi, %k1 |
| ; CHECK-NEXT: vaddps {ru-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z} |
| ; CHECK-NEXT: retq |
| %1 = call <16 x float> @llvm.x86.avx512.add.ps.512(<16 x float> %a0, <16 x float> %a1, i32 2) |
| %2 = bitcast i16 %mask to <16 x i1> |
| %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> zeroinitializer |
| ret <16 x float> %3 |
| } |
| |
| define <16 x float> @test_mm512_maskz_add_round_ps_rz_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) { |
| ; CHECK-LABEL: test_mm512_maskz_add_round_ps_rz_sae: |
| ; CHECK: ## %bb.0: |
| ; CHECK-NEXT: kmovw %edi, %k1 |
| ; CHECK-NEXT: vaddps {rz-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z} |
| ; CHECK-NEXT: retq |
| %1 = call <16 x float> @llvm.x86.avx512.add.ps.512(<16 x float> %a0, <16 x float> %a1, i32 3) |
| %2 = bitcast i16 %mask to <16 x i1> |
| %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> zeroinitializer |
| ret <16 x float> %3 |
| } |
| |
| define <16 x float> @test_mm512_maskz_add_round_ps_current(<16 x float> %a0, <16 x float> %a1, i16 %mask) { |
| ; CHECK-LABEL: test_mm512_maskz_add_round_ps_current: |
| ; CHECK: ## %bb.0: |
| ; CHECK-NEXT: kmovw %edi, %k1 |
| ; CHECK-NEXT: vaddps %zmm1, %zmm0, %zmm0 {%k1} {z} |
| ; CHECK-NEXT: retq |
| %1 = call <16 x float> @llvm.x86.avx512.add.ps.512(<16 x float> %a0, <16 x float> %a1, i32 4) |
| %2 = bitcast i16 %mask to <16 x i1> |
| %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> zeroinitializer |
| ret <16 x float> %3 |
| } |
| |
| define <16 x float> @test_mm512_mask_add_round_ps_rn_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) { |
| ; CHECK-LABEL: test_mm512_mask_add_round_ps_rn_sae: |
| ; CHECK: ## %bb.0: |
| ; CHECK-NEXT: kmovw %edi, %k1 |
| ; CHECK-NEXT: vaddps {rn-sae}, %zmm1, %zmm0, %zmm2 {%k1} |
| ; CHECK-NEXT: vmovaps %zmm2, %zmm0 |
| ; CHECK-NEXT: retq |
| %1 = call <16 x float> @llvm.x86.avx512.add.ps.512(<16 x float> %a0, <16 x float> %a1, i32 0) |
| %2 = bitcast i16 %mask to <16 x i1> |
| %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> %src |
| ret <16 x float> %3 |
| } |
| |
| define <16 x float> @test_mm512_mask_add_round_ps_rd_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) { |
| ; CHECK-LABEL: test_mm512_mask_add_round_ps_rd_sae: |
| ; CHECK: ## %bb.0: |
| ; CHECK-NEXT: kmovw %edi, %k1 |
| ; CHECK-NEXT: vaddps {rd-sae}, %zmm1, %zmm0, %zmm2 {%k1} |
| ; CHECK-NEXT: vmovaps %zmm2, %zmm0 |
| ; CHECK-NEXT: retq |
| %1 = call <16 x float> @llvm.x86.avx512.add.ps.512(<16 x float> %a0, <16 x float> %a1, i32 1) |
| %2 = bitcast i16 %mask to <16 x i1> |
| %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> %src |
| ret <16 x float> %3 |
| } |
| |
| define <16 x float> @test_mm512_mask_add_round_ps_ru_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) { |
| ; CHECK-LABEL: test_mm512_mask_add_round_ps_ru_sae: |
| ; CHECK: ## %bb.0: |
| ; CHECK-NEXT: kmovw %edi, %k1 |
| ; CHECK-NEXT: vaddps {ru-sae}, %zmm1, %zmm0, %zmm2 {%k1} |
| ; CHECK-NEXT: vmovaps %zmm2, %zmm0 |
| ; CHECK-NEXT: retq |
| %1 = call <16 x float> @llvm.x86.avx512.add.ps.512(<16 x float> %a0, <16 x float> %a1, i32 2) |
| %2 = bitcast i16 %mask to <16 x i1> |
| %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> %src |
| ret <16 x float> %3 |
| } |
| |
| define <16 x float> @test_mm512_mask_add_round_ps_rz_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) { |
| ; CHECK-LABEL: test_mm512_mask_add_round_ps_rz_sae: |
| ; CHECK: ## %bb.0: |
| ; CHECK-NEXT: kmovw %edi, %k1 |
| ; CHECK-NEXT: vaddps {rz-sae}, %zmm1, %zmm0, %zmm2 {%k1} |
| ; CHECK-NEXT: vmovaps %zmm2, %zmm0 |
| ; CHECK-NEXT: retq |
| %1 = call <16 x float> @llvm.x86.avx512.add.ps.512(<16 x float> %a0, <16 x float> %a1, i32 3) |
| %2 = bitcast i16 %mask to <16 x i1> |
| %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> %src |
| ret <16 x float> %3 |
| } |
| |
| define <16 x float> @test_mm512_mask_add_round_ps_current(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) { |
| ; CHECK-LABEL: test_mm512_mask_add_round_ps_current: |
| ; CHECK: ## %bb.0: |
| ; CHECK-NEXT: kmovw %edi, %k1 |
| ; CHECK-NEXT: vaddps %zmm1, %zmm0, %zmm2 {%k1} |
| ; CHECK-NEXT: vmovaps %zmm2, %zmm0 |
| ; CHECK-NEXT: retq |
| %1 = call <16 x float> @llvm.x86.avx512.add.ps.512(<16 x float> %a0, <16 x float> %a1, i32 4) |
| %2 = bitcast i16 %mask to <16 x i1> |
| %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> %src |
| ret <16 x float> %3 |
| } |
| |
| define <16 x float> @test_mm512_add_round_ps_rn_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) { |
| ; CHECK-LABEL: test_mm512_add_round_ps_rn_sae: |
| ; CHECK: ## %bb.0: |
| ; CHECK-NEXT: vaddps {rn-sae}, %zmm1, %zmm0, %zmm0 |
| ; CHECK-NEXT: retq |
| %1 = call <16 x float> @llvm.x86.avx512.add.ps.512(<16 x float> %a0, <16 x float> %a1, i32 0) |
| ret <16 x float> %1 |
| } |
| |
| define <16 x float> @test_mm512_add_round_ps_rd_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) { |
| ; CHECK-LABEL: test_mm512_add_round_ps_rd_sae: |
| ; CHECK: ## %bb.0: |
| ; CHECK-NEXT: vaddps {rd-sae}, %zmm1, %zmm0, %zmm0 |
| ; CHECK-NEXT: retq |
| %1 = call <16 x float> @llvm.x86.avx512.add.ps.512(<16 x float> %a0, <16 x float> %a1, i32 1) |
| ret <16 x float> %1 |
| } |
| |
| define <16 x float> @test_mm512_add_round_ps_ru_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) { |
| ; CHECK-LABEL: test_mm512_add_round_ps_ru_sae: |
| ; CHECK: ## %bb.0: |
| ; CHECK-NEXT: vaddps {ru-sae}, %zmm1, %zmm0, %zmm0 |
| ; CHECK-NEXT: retq |
| %1 = call <16 x float> @llvm.x86.avx512.add.ps.512(<16 x float> %a0, <16 x float> %a1, i32 2) |
| ret <16 x float> %1 |
| } |
| |
| define <16 x float> @test_mm512_add_round_ps_rz_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) { |
| ; CHECK-LABEL: test_mm512_add_round_ps_rz_sae: |
| ; CHECK: ## %bb.0: |
| ; CHECK-NEXT: vaddps {rz-sae}, %zmm1, %zmm0, %zmm0 |
| ; CHECK-NEXT: retq |
| %1 = call <16 x float> @llvm.x86.avx512.add.ps.512(<16 x float> %a0, <16 x float> %a1, i32 3) |
| ret <16 x float> %1 |
| } |
| |
| define <16 x float> @test_mm512_add_round_ps_current(<16 x float> %a0, <16 x float> %a1, i16 %mask) { |
| ; CHECK-LABEL: test_mm512_add_round_ps_current: |
| ; CHECK: ## %bb.0: |
| ; CHECK-NEXT: vaddps %zmm1, %zmm0, %zmm0 |
| ; CHECK-NEXT: retq |
| %1 = call <16 x float> @llvm.x86.avx512.add.ps.512(<16 x float> %a0, <16 x float> %a1, i32 4) |
| ret <16 x float> %1 |
| } |
| declare <16 x float> @llvm.x86.avx512.add.ps.512(<16 x float>, <16 x float>, i32) |
| |
| define <16 x float> @test_mm512_mask_sub_round_ps_rn_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) { |
| ; CHECK-LABEL: test_mm512_mask_sub_round_ps_rn_sae: |
| ; CHECK: ## %bb.0: |
| ; CHECK-NEXT: kmovw %edi, %k1 |
| ; CHECK-NEXT: vsubps {rn-sae}, %zmm1, %zmm0, %zmm2 {%k1} |
| ; CHECK-NEXT: vmovaps %zmm2, %zmm0 |
| ; CHECK-NEXT: retq |
| %1 = call <16 x float> @llvm.x86.avx512.sub.ps.512(<16 x float> %a0, <16 x float> %a1, i32 0) |
| %2 = bitcast i16 %mask to <16 x i1> |
| %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> %src |
| ret <16 x float> %3 |
| } |
| |
| define <16 x float> @test_mm512_mask_sub_round_ps_rd_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) { |
| ; CHECK-LABEL: test_mm512_mask_sub_round_ps_rd_sae: |
| ; CHECK: ## %bb.0: |
| ; CHECK-NEXT: kmovw %edi, %k1 |
| ; CHECK-NEXT: vsubps {rd-sae}, %zmm1, %zmm0, %zmm2 {%k1} |
| ; CHECK-NEXT: vmovaps %zmm2, %zmm0 |
| ; CHECK-NEXT: retq |
| %1 = call <16 x float> @llvm.x86.avx512.sub.ps.512(<16 x float> %a0, <16 x float> %a1, i32 1) |
| %2 = bitcast i16 %mask to <16 x i1> |
| %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> %src |
| ret <16 x float> %3 |
| } |
| |
| define <16 x float> @test_mm512_mask_sub_round_ps_ru_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) { |
| ; CHECK-LABEL: test_mm512_mask_sub_round_ps_ru_sae: |
| ; CHECK: ## %bb.0: |
| ; CHECK-NEXT: kmovw %edi, %k1 |
| ; CHECK-NEXT: vsubps {ru-sae}, %zmm1, %zmm0, %zmm2 {%k1} |
| ; CHECK-NEXT: vmovaps %zmm2, %zmm0 |
| ; CHECK-NEXT: retq |
| %1 = call <16 x float> @llvm.x86.avx512.sub.ps.512(<16 x float> %a0, <16 x float> %a1, i32 2) |
| %2 = bitcast i16 %mask to <16 x i1> |
| %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> %src |
| ret <16 x float> %3 |
| } |
| |
| define <16 x float> @test_mm512_mask_sub_round_ps_rz_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) { |
| ; CHECK-LABEL: test_mm512_mask_sub_round_ps_rz_sae: |
| ; CHECK: ## %bb.0: |
| ; CHECK-NEXT: kmovw %edi, %k1 |
| ; CHECK-NEXT: vsubps {rz-sae}, %zmm1, %zmm0, %zmm2 {%k1} |
| ; CHECK-NEXT: vmovaps %zmm2, %zmm0 |
| ; CHECK-NEXT: retq |
| %1 = call <16 x float> @llvm.x86.avx512.sub.ps.512(<16 x float> %a0, <16 x float> %a1, i32 3) |
| %2 = bitcast i16 %mask to <16 x i1> |
| %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> %src |
| ret <16 x float> %3 |
| } |
| |
| define <16 x float> @test_mm512_mask_sub_round_ps_current(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) { |
| ; CHECK-LABEL: test_mm512_mask_sub_round_ps_current: |
| ; CHECK: ## %bb.0: |
| ; CHECK-NEXT: kmovw %edi, %k1 |
| ; CHECK-NEXT: vsubps %zmm1, %zmm0, %zmm2 {%k1} |
| ; CHECK-NEXT: vmovaps %zmm2, %zmm0 |
| ; CHECK-NEXT: retq |
| %1 = call <16 x float> @llvm.x86.avx512.sub.ps.512(<16 x float> %a0, <16 x float> %a1, i32 4) |
| %2 = bitcast i16 %mask to <16 x i1> |
| %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> %src |
| ret <16 x float> %3 |
| } |
| |
| define <16 x float> @test_mm512_sub_round_ps_rn_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) { |
| ; CHECK-LABEL: test_mm512_sub_round_ps_rn_sae: |
| ; CHECK: ## %bb.0: |
| ; CHECK-NEXT: vsubps {rn-sae}, %zmm1, %zmm0, %zmm0 |
| ; CHECK-NEXT: retq |
| %1 = call <16 x float> @llvm.x86.avx512.sub.ps.512(<16 x float> %a0, <16 x float> %a1, i32 0) |
| ret <16 x float> %1 |
| } |
| |
| define <16 x float> @test_mm512_sub_round_ps_rd_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) { |
| ; CHECK-LABEL: test_mm512_sub_round_ps_rd_sae: |
| ; CHECK: ## %bb.0: |
| ; CHECK-NEXT: vsubps {rd-sae}, %zmm1, %zmm0, %zmm0 |
| ; CHECK-NEXT: retq |
| %1 = call <16 x float> @llvm.x86.avx512.sub.ps.512(<16 x float> %a0, <16 x float> %a1, i32 1) |
| ret <16 x float> %1 |
| } |
| |
| define <16 x float> @test_mm512_sub_round_ps_ru_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) { |
| ; CHECK-LABEL: test_mm512_sub_round_ps_ru_sae: |
| ; CHECK: ## %bb.0: |
| ; CHECK-NEXT: vsubps {ru-sae}, %zmm1, %zmm0, %zmm0 |
| ; CHECK-NEXT: retq |
| %1 = call <16 x float> @llvm.x86.avx512.sub.ps.512(<16 x float> %a0, <16 x float> %a1, i32 2) |
| ret <16 x float> %1 |
| } |
| |
| define <16 x float> @test_mm512_sub_round_ps_rz_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) { |
| ; CHECK-LABEL: test_mm512_sub_round_ps_rz_sae: |
| ; CHECK: ## %bb.0: |
| ; CHECK-NEXT: vsubps {rz-sae}, %zmm1, %zmm0, %zmm0 |
| ; CHECK-NEXT: retq |
| %1 = call <16 x float> @llvm.x86.avx512.sub.ps.512(<16 x float> %a0, <16 x float> %a1, i32 3) |
| ret <16 x float> %1 |
| } |
| |
| define <16 x float> @test_mm512_sub_round_ps_current(<16 x float> %a0, <16 x float> %a1, i16 %mask) { |
| ; CHECK-LABEL: test_mm512_sub_round_ps_current: |
| ; CHECK: ## %bb.0: |
| ; CHECK-NEXT: vsubps %zmm1, %zmm0, %zmm0 |
| ; CHECK-NEXT: retq |
| %1 = call <16 x float> @llvm.x86.avx512.sub.ps.512(<16 x float> %a0, <16 x float> %a1, i32 4) |
| ret <16 x float> %1 |
| } |
| |
| define <16 x float> @test_mm512_maskz_div_round_ps_rn_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) { |
| ; CHECK-LABEL: test_mm512_maskz_div_round_ps_rn_sae: |
| ; CHECK: ## %bb.0: |
| ; CHECK-NEXT: kmovw %edi, %k1 |
| ; CHECK-NEXT: vdivps {rn-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z} |
| ; CHECK-NEXT: retq |
| %1 = call <16 x float> @llvm.x86.avx512.div.ps.512(<16 x float> %a0, <16 x float> %a1, i32 0) |
| %2 = bitcast i16 %mask to <16 x i1> |
| %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> zeroinitializer |
| ret <16 x float> %3 |
| } |
| |
| define <16 x float> @test_mm512_maskz_div_round_ps_rd_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) { |
| ; CHECK-LABEL: test_mm512_maskz_div_round_ps_rd_sae: |
| ; CHECK: ## %bb.0: |
| ; CHECK-NEXT: kmovw %edi, %k1 |
| ; CHECK-NEXT: vdivps {rd-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z} |
| ; CHECK-NEXT: retq |
| %1 = call <16 x float> @llvm.x86.avx512.div.ps.512(<16 x float> %a0, <16 x float> %a1, i32 1) |
| %2 = bitcast i16 %mask to <16 x i1> |
| %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> zeroinitializer |
| ret <16 x float> %3 |
| } |
| |
| define <16 x float> @test_mm512_maskz_div_round_ps_ru_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) { |
| ; CHECK-LABEL: test_mm512_maskz_div_round_ps_ru_sae: |
| ; CHECK: ## %bb.0: |
| ; CHECK-NEXT: kmovw %edi, %k1 |
| ; CHECK-NEXT: vdivps {ru-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z} |
| ; CHECK-NEXT: retq |
| %1 = call <16 x float> @llvm.x86.avx512.div.ps.512(<16 x float> %a0, <16 x float> %a1, i32 2) |
| %2 = bitcast i16 %mask to <16 x i1> |
| %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> zeroinitializer |
| ret <16 x float> %3 |
| } |
| |
| define <16 x float> @test_mm512_maskz_div_round_ps_rz_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) { |
| ; CHECK-LABEL: test_mm512_maskz_div_round_ps_rz_sae: |
| ; CHECK: ## %bb.0: |
| ; CHECK-NEXT: kmovw %edi, %k1 |
| ; CHECK-NEXT: vdivps {rz-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z} |
| ; CHECK-NEXT: retq |
| %1 = call <16 x float> @llvm.x86.avx512.div.ps.512(<16 x float> %a0, <16 x float> %a1, i32 3) |
| %2 = bitcast i16 %mask to <16 x i1> |
| %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> zeroinitializer |
| ret <16 x float> %3 |
| } |
| |
| define <16 x float> @test_mm512_maskz_div_round_ps_current(<16 x float> %a0, <16 x float> %a1, i16 %mask) { |
| ; CHECK-LABEL: test_mm512_maskz_div_round_ps_current: |
| ; CHECK: ## %bb.0: |
| ; CHECK-NEXT: kmovw %edi, %k1 |
| ; CHECK-NEXT: vdivps %zmm1, %zmm0, %zmm0 {%k1} {z} |
| ; CHECK-NEXT: retq |
| %1 = call <16 x float> @llvm.x86.avx512.div.ps.512(<16 x float> %a0, <16 x float> %a1, i32 4) |
| %2 = bitcast i16 %mask to <16 x i1> |
| %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> zeroinitializer |
| ret <16 x float> %3 |
| } |
| |
| define <16 x float> @test_mm512_mask_div_round_ps_rn_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) { |
| ; CHECK-LABEL: test_mm512_mask_div_round_ps_rn_sae: |
| ; CHECK: ## %bb.0: |
| ; CHECK-NEXT: kmovw %edi, %k1 |
| ; CHECK-NEXT: vdivps {rn-sae}, %zmm1, %zmm0, %zmm2 {%k1} |
| ; CHECK-NEXT: vmovaps %zmm2, %zmm0 |
| ; CHECK-NEXT: retq |
| %1 = call <16 x float> @llvm.x86.avx512.div.ps.512(<16 x float> %a0, <16 x float> %a1, i32 0) |
| %2 = bitcast i16 %mask to <16 x i1> |
| %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> %src |
| ret <16 x float> %3 |
| } |
| |
| define <16 x float> @test_mm512_mask_div_round_ps_rd_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) { |
| ; CHECK-LABEL: test_mm512_mask_div_round_ps_rd_sae: |
| ; CHECK: ## %bb.0: |
| ; CHECK-NEXT: kmovw %edi, %k1 |
| ; CHECK-NEXT: vdivps {rd-sae}, %zmm1, %zmm0, %zmm2 {%k1} |
| ; CHECK-NEXT: vmovaps %zmm2, %zmm0 |
| ; CHECK-NEXT: retq |
| %1 = call <16 x float> @llvm.x86.avx512.div.ps.512(<16 x float> %a0, <16 x float> %a1, i32 1) |
| %2 = bitcast i16 %mask to <16 x i1> |
| %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> %src |
| ret <16 x float> %3 |
| } |
| |
| define <16 x float> @test_mm512_mask_div_round_ps_ru_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) { |
| ; CHECK-LABEL: test_mm512_mask_div_round_ps_ru_sae: |
| ; CHECK: ## %bb.0: |
| ; CHECK-NEXT: kmovw %edi, %k1 |
| ; CHECK-NEXT: vdivps {ru-sae}, %zmm1, %zmm0, %zmm2 {%k1} |
| ; CHECK-NEXT: vmovaps %zmm2, %zmm0 |
| ; CHECK-NEXT: retq |
| %1 = call <16 x float> @llvm.x86.avx512.div.ps.512(<16 x float> %a0, <16 x float> %a1, i32 2) |
| %2 = bitcast i16 %mask to <16 x i1> |
| %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> %src |
| ret <16 x float> %3 |
| } |
| |
| define <16 x float> @test_mm512_mask_div_round_ps_rz_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) { |
| ; CHECK-LABEL: test_mm512_mask_div_round_ps_rz_sae: |
| ; CHECK: ## %bb.0: |
| ; CHECK-NEXT: kmovw %edi, %k1 |
| ; CHECK-NEXT: vdivps {rz-sae}, %zmm1, %zmm0, %zmm2 {%k1} |
| ; CHECK-NEXT: vmovaps %zmm2, %zmm0 |
| ; CHECK-NEXT: retq |
| %1 = call <16 x float> @llvm.x86.avx512.div.ps.512(<16 x float> %a0, <16 x float> %a1, i32 3) |
| %2 = bitcast i16 %mask to <16 x i1> |
| %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> %src |
| ret <16 x float> %3 |
| } |
| |
| define <16 x float> @test_mm512_mask_div_round_ps_current(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) { |
| ; CHECK-LABEL: test_mm512_mask_div_round_ps_current: |
| ; CHECK: ## %bb.0: |
| ; CHECK-NEXT: kmovw %edi, %k1 |
| ; CHECK-NEXT: vdivps %zmm1, %zmm0, %zmm2 {%k1} |
| ; CHECK-NEXT: vmovaps %zmm2, %zmm0 |
| ; CHECK-NEXT: retq |
| %1 = call <16 x float> @llvm.x86.avx512.div.ps.512(<16 x float> %a0, <16 x float> %a1, i32 4) |
| %2 = bitcast i16 %mask to <16 x i1> |
| %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> %src |
| ret <16 x float> %3 |
| } |
| |
| define <16 x float> @test_mm512_div_round_ps_rn_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) { |
| ; CHECK-LABEL: test_mm512_div_round_ps_rn_sae: |
| ; CHECK: ## %bb.0: |
| ; CHECK-NEXT: vdivps {rn-sae}, %zmm1, %zmm0, %zmm0 |
| ; CHECK-NEXT: retq |
| %1 = call <16 x float> @llvm.x86.avx512.div.ps.512(<16 x float> %a0, <16 x float> %a1, i32 0) |
| ret <16 x float> %1 |
| } |
| |
| define <16 x float> @test_mm512_div_round_ps_rd_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) { |
| ; CHECK-LABEL: test_mm512_div_round_ps_rd_sae: |
| ; CHECK: ## %bb.0: |
| ; CHECK-NEXT: vdivps {rd-sae}, %zmm1, %zmm0, %zmm0 |
| ; CHECK-NEXT: retq |
| %1 = call <16 x float> @llvm.x86.avx512.div.ps.512(<16 x float> %a0, <16 x float> %a1, i32 1) |
| ret <16 x float> %1 |
| } |
| |
| define <16 x float> @test_mm512_div_round_ps_ru_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) { |
| ; CHECK-LABEL: test_mm512_div_round_ps_ru_sae: |
| ; CHECK: ## %bb.0: |
| ; CHECK-NEXT: vdivps {ru-sae}, %zmm1, %zmm0, %zmm0 |
| ; CHECK-NEXT: retq |
| %1 = call <16 x float> @llvm.x86.avx512.div.ps.512(<16 x float> %a0, <16 x float> %a1, i32 2) |
| ret <16 x float> %1 |
| } |
| |
| define <16 x float> @test_mm512_div_round_ps_rz_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) { |
| ; CHECK-LABEL: test_mm512_div_round_ps_rz_sae: |
| ; CHECK: ## %bb.0: |
| ; CHECK-NEXT: vdivps {rz-sae}, %zmm1, %zmm0, %zmm0 |
| ; CHECK-NEXT: retq |
| %1 = call <16 x float> @llvm.x86.avx512.div.ps.512(<16 x float> %a0, <16 x float> %a1, i32 3) |
| ret <16 x float> %1 |
| } |
| |
| define <16 x float> @test_mm512_div_round_ps_current(<16 x float> %a0, <16 x float> %a1, i16 %mask) { |
| ; CHECK-LABEL: test_mm512_div_round_ps_current: |
| ; CHECK: ## %bb.0: |
| ; CHECK-NEXT: vdivps %zmm1, %zmm0, %zmm0 |
| ; CHECK-NEXT: retq |
| %1 = call <16 x float> @llvm.x86.avx512.div.ps.512(<16 x float> %a0, <16 x float> %a1, i32 4) |
| ret <16 x float> %1 |
| } |
| declare <16 x float> @llvm.x86.avx512.div.ps.512(<16 x float>, <16 x float>, i32) |
| |
| define <16 x float> @test_mm512_maskz_min_round_ps_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) { |
| ; CHECK-LABEL: test_mm512_maskz_min_round_ps_sae: |
| ; CHECK: ## %bb.0: |
| ; CHECK-NEXT: kmovw %edi, %k1 |
| ; CHECK-NEXT: vminps {sae}, %zmm1, %zmm0, %zmm0 {%k1} {z} |
| ; CHECK-NEXT: retq |
| %1 = call <16 x float> @llvm.x86.avx512.min.ps.512(<16 x float> %a0, <16 x float> %a1, i32 8) |
| %2 = bitcast i16 %mask to <16 x i1> |
| %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> zeroinitializer |
| ret <16 x float> %3 |
| } |
| |
| define <16 x float> @test_mm512_maskz_min_round_ps_current(<16 x float> %a0, <16 x float> %a1, i16 %mask) { |
| ; CHECK-LABEL: test_mm512_maskz_min_round_ps_current: |
| ; CHECK: ## %bb.0: |
| ; CHECK-NEXT: kmovw %edi, %k1 |
| ; CHECK-NEXT: vminps %zmm1, %zmm0, %zmm0 {%k1} {z} |
| ; CHECK-NEXT: retq |
| %1 = call <16 x float> @llvm.x86.avx512.min.ps.512(<16 x float> %a0, <16 x float> %a1, i32 4) |
| %2 = bitcast i16 %mask to <16 x i1> |
| %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> zeroinitializer |
| ret <16 x float> %3 |
| } |
| |
| define <16 x float> @test_mm512_mask_min_round_ps_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) { |
| ; CHECK-LABEL: test_mm512_mask_min_round_ps_sae: |
| ; CHECK: ## %bb.0: |
| ; CHECK-NEXT: kmovw %edi, %k1 |
| ; CHECK-NEXT: vminps {sae}, %zmm1, %zmm0, %zmm2 {%k1} |
| ; CHECK-NEXT: vmovaps %zmm2, %zmm0 |
| ; CHECK-NEXT: retq |
| %1 = call <16 x float> @llvm.x86.avx512.min.ps.512(<16 x float> %a0, <16 x float> %a1, i32 8) |
| %2 = bitcast i16 %mask to <16 x i1> |
| %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> %src |
| ret <16 x float> %3 |
| } |
| |
| define <16 x float> @test_mm512_mask_min_round_ps_current(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) { |
| ; CHECK-LABEL: test_mm512_mask_min_round_ps_current: |
| ; CHECK: ## %bb.0: |
| ; CHECK-NEXT: kmovw %edi, %k1 |
| ; CHECK-NEXT: vminps %zmm1, %zmm0, %zmm2 {%k1} |
| ; CHECK-NEXT: vmovaps %zmm2, %zmm0 |
| ; CHECK-NEXT: retq |
| %1 = call <16 x float> @llvm.x86.avx512.min.ps.512(<16 x float> %a0, <16 x float> %a1, i32 4) |
| %2 = bitcast i16 %mask to <16 x i1> |
| %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> %src |
| ret <16 x float> %3 |
| } |
| |
| define <16 x float> @test_mm512_min_round_ps_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) { |
| ; CHECK-LABEL: test_mm512_min_round_ps_sae: |
| ; CHECK: ## %bb.0: |
| ; CHECK-NEXT: vminps {sae}, %zmm1, %zmm0, %zmm0 |
| ; CHECK-NEXT: retq |
| %1 = call <16 x float> @llvm.x86.avx512.min.ps.512(<16 x float> %a0, <16 x float> %a1, i32 8) |
| ret <16 x float> %1 |
| } |
| |
| define <16 x float> @test_mm512_min_round_ps_current(<16 x float> %a0, <16 x float> %a1, i16 %mask) { |
| ; CHECK-LABEL: test_mm512_min_round_ps_current: |
| ; CHECK: ## %bb.0: |
| ; CHECK-NEXT: vminps %zmm1, %zmm0, %zmm0 |
| ; CHECK-NEXT: retq |
| %1 = call <16 x float> @llvm.x86.avx512.min.ps.512(<16 x float> %a0, <16 x float> %a1, i32 4) |
| ret <16 x float> %1 |
| } |
| declare <16 x float> @llvm.x86.avx512.min.ps.512(<16 x float>, <16 x float>, i32) |
| |
| define <16 x float> @test_mm512_maskz_max_round_ps_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) { |
| ; CHECK-LABEL: test_mm512_maskz_max_round_ps_sae: |
| ; CHECK: ## %bb.0: |
| ; CHECK-NEXT: kmovw %edi, %k1 |
| ; CHECK-NEXT: vmaxps {sae}, %zmm1, %zmm0, %zmm0 {%k1} {z} |
| ; CHECK-NEXT: retq |
| %1 = call <16 x float> @llvm.x86.avx512.max.ps.512(<16 x float> %a0, <16 x float> %a1, i32 8) |
| %2 = bitcast i16 %mask to <16 x i1> |
| %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> zeroinitializer |
| ret <16 x float> %3 |
| } |
| |
| define <16 x float> @test_mm512_maskz_max_round_ps_current(<16 x float> %a0, <16 x float> %a1, i16 %mask) { |
| ; CHECK-LABEL: test_mm512_maskz_max_round_ps_current: |
| ; CHECK: ## %bb.0: |
| ; CHECK-NEXT: kmovw %edi, %k1 |
| ; CHECK-NEXT: vmaxps %zmm1, %zmm0, %zmm0 {%k1} {z} |
| ; CHECK-NEXT: retq |
| %1 = call <16 x float> @llvm.x86.avx512.max.ps.512(<16 x float> %a0, <16 x float> %a1, i32 4) |
| %2 = bitcast i16 %mask to <16 x i1> |
| %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> zeroinitializer |
| ret <16 x float> %3 |
| } |
| |
| define <16 x float> @test_mm512_mask_max_round_ps_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) { |
| ; CHECK-LABEL: test_mm512_mask_max_round_ps_sae: |
| ; CHECK: ## %bb.0: |
| ; CHECK-NEXT: kmovw %edi, %k1 |
| ; CHECK-NEXT: vmaxps {sae}, %zmm1, %zmm0, %zmm2 {%k1} |
| ; CHECK-NEXT: vmovaps %zmm2, %zmm0 |
| ; CHECK-NEXT: retq |
| %1 = call <16 x float> @llvm.x86.avx512.max.ps.512(<16 x float> %a0, <16 x float> %a1, i32 8) |
| %2 = bitcast i16 %mask to <16 x i1> |
| %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> %src |
| ret <16 x float> %3 |
| } |
| |
| define <16 x float> @test_mm512_mask_max_round_ps_current(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) { |
| ; CHECK-LABEL: test_mm512_mask_max_round_ps_current: |
| ; CHECK: ## %bb.0: |
| ; CHECK-NEXT: kmovw %edi, %k1 |
| ; CHECK-NEXT: vmaxps %zmm1, %zmm0, %zmm2 {%k1} |
| ; CHECK-NEXT: vmovaps %zmm2, %zmm0 |
| ; CHECK-NEXT: retq |
| %1 = call <16 x float> @llvm.x86.avx512.max.ps.512(<16 x float> %a0, <16 x float> %a1, i32 4) |
| %2 = bitcast i16 %mask to <16 x i1> |
| %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> %src |
| ret <16 x float> %3 |
| } |
| |
| define <16 x float> @test_mm512_max_round_ps_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) { |
| ; CHECK-LABEL: test_mm512_max_round_ps_sae: |
| ; CHECK: ## %bb.0: |
| ; CHECK-NEXT: vmaxps {sae}, %zmm1, %zmm0, %zmm0 |
| ; CHECK-NEXT: retq |
| %1 = call <16 x float> @llvm.x86.avx512.max.ps.512(<16 x float> %a0, <16 x float> %a1, i32 8) |
| ret <16 x float> %1 |
| } |
| |
| define <16 x float> @test_mm512_max_round_ps_current(<16 x float> %a0, <16 x float> %a1, i16 %mask) { |
| ; CHECK-LABEL: test_mm512_max_round_ps_current: |
| ; CHECK: ## %bb.0: |
| ; CHECK-NEXT: vmaxps %zmm1, %zmm0, %zmm0 |
| ; CHECK-NEXT: retq |
| %1 = call <16 x float> @llvm.x86.avx512.max.ps.512(<16 x float> %a0, <16 x float> %a1, i32 4) |
| ret <16 x float> %1 |
| } |
| declare <16 x float> @llvm.x86.avx512.max.ps.512(<16 x float>, <16 x float>, i32) |
| |
| declare <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float>, <4 x float>, <4 x float>, i8, i32) nounwind readnone |
| |
| define <4 x float> @test_mask_add_ss_rn(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) { |
| ; CHECK-LABEL: test_mask_add_ss_rn: |
| ; CHECK: ## %bb.0: |
| ; CHECK-NEXT: kmovw %edi, %k1 |
| ; CHECK-NEXT: vaddss {rn-sae}, %xmm1, %xmm0, %xmm2 {%k1} |
| ; CHECK-NEXT: vmovaps %xmm2, %xmm0 |
| ; CHECK-NEXT: retq |
| %res = call <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float>%a0, <4 x float> %a1, <4 x float> %a2, i8 %mask, i32 0) |
| ret <4 x float> %res |
| } |
| |
| define <4 x float> @test_mask_add_ss_rd(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) { |
| ; CHECK-LABEL: test_mask_add_ss_rd: |
| ; CHECK: ## %bb.0: |
| ; CHECK-NEXT: kmovw %edi, %k1 |
| ; CHECK-NEXT: vaddss {rd-sae}, %xmm1, %xmm0, %xmm2 {%k1} |
| ; CHECK-NEXT: vmovaps %xmm2, %xmm0 |
| ; CHECK-NEXT: retq |
| %res = call <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float>%a0, <4 x float> %a1, <4 x float> %a2, i8 %mask, i32 1) |
| ret <4 x float> %res |
| } |
| |
| define <4 x float> @test_mask_add_ss_ru(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) { |
| ; CHECK-LABEL: test_mask_add_ss_ru: |
| ; CHECK: ## %bb.0: |
| ; CHECK-NEXT: kmovw %edi, %k1 |
| ; CHECK-NEXT: vaddss {ru-sae}, %xmm1, %xmm0, %xmm2 {%k1} |
| ; CHECK-NEXT: vmovaps %xmm2, %xmm0 |
| ; CHECK-NEXT: retq |
| %res = call <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float>%a0, <4 x float> %a1, <4 x float> %a2, i8 %mask, i32 2) |
| ret <4 x float> %res |
| } |
| |
| define <4 x float> @test_mask_add_ss_rz(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) { |
| ; CHECK-LABEL: test_mask_add_ss_rz: |
| ; CHECK: ## %bb.0: |
| ; CHECK-NEXT: kmovw %edi, %k1 |
| ; CHECK-NEXT: vaddss {rz-sae}, %xmm1, %xmm0, %xmm2 {%k1} |
| ; CHECK-NEXT: vmovaps %xmm2, %xmm0 |
| ; CHECK-NEXT: retq |
| %res = call <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float>%a0, <4 x float> %a1, <4 x float> %a2, i8 %mask, i32 3) |
| ret <4 x float> %res |
| } |
| |
| define <4 x float> @test_mask_add_ss_current(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) { |
| ; CHECK-LABEL: test_mask_add_ss_current: |
| ; CHECK: ## %bb.0: |
| ; CHECK-NEXT: kmovw %edi, %k1 |
| ; CHECK-NEXT: vaddss %xmm1, %xmm0, %xmm2 {%k1} |
| ; CHECK-NEXT: vmovaps %xmm2, %xmm0 |
| ; CHECK-NEXT: retq |
| %res = call <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float>%a0, <4 x float> %a1, <4 x float> %a2, i8 %mask, i32 4) |
| ret <4 x float> %res |
| } |
| |
| define <4 x float> @test_maskz_add_ss_rn(<4 x float> %a0, <4 x float> %a1, i8 %mask) { |
| ; CHECK-LABEL: test_maskz_add_ss_rn: |
| ; CHECK: ## %bb.0: |
| ; CHECK-NEXT: kmovw %edi, %k1 |
| ; CHECK-NEXT: vaddss {rn-sae}, %xmm1, %xmm0, %xmm0 {%k1} {z} |
| ; CHECK-NEXT: retq |
| %res = call <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float>%a0, <4 x float> %a1, <4 x float> zeroinitializer, i8 %mask, i32 0) |
| ret <4 x float> %res |
| } |
| |
| define <4 x float> @test_add_ss_rn(<4 x float> %a0, <4 x float> %a1) { |
| ; CHECK-LABEL: test_add_ss_rn: |
| ; CHECK: ## %bb.0: |
| ; CHECK-NEXT: vaddss {rn-sae}, %xmm1, %xmm0, %xmm0 |
| ; CHECK-NEXT: retq |
| %res = call <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float>%a0, <4 x float> %a1, <4 x float> zeroinitializer, i8 -1, i32 0) |
| ret <4 x float> %res |
| } |
| |
| define <4 x float> @test_mask_add_ss_current_memfold(<4 x float> %a0, float* %a1, <4 x float> %a2, i8 %mask) { |
| ; CHECK-LABEL: test_mask_add_ss_current_memfold: |
| ; CHECK: ## %bb.0: |
| ; CHECK-NEXT: kmovw %esi, %k1 |
| ; CHECK-NEXT: vaddss (%rdi), %xmm0, %xmm1 {%k1} |
| ; CHECK-NEXT: vmovaps %xmm1, %xmm0 |
| ; CHECK-NEXT: retq |
| %a1.val = load float, float* %a1 |
| %a1v0 = insertelement <4 x float> undef, float %a1.val, i32 0 |
| %a1v1 = insertelement <4 x float> %a1v0, float 0.000000e+00, i32 1 |
| %a1v2 = insertelement <4 x float> %a1v1, float 0.000000e+00, i32 2 |
| %a1v = insertelement <4 x float> %a1v2, float 0.000000e+00, i32 3 |
| %res = call <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float>%a0, <4 x float> %a1v, <4 x float> %a2, i8 %mask, i32 4) |
| ret <4 x float> %res |
| } |
| |
| define <4 x float> @test_maskz_add_ss_current_memfold(<4 x float> %a0, float* %a1, i8 %mask) { |
| ; CHECK-LABEL: test_maskz_add_ss_current_memfold: |
| ; CHECK: ## %bb.0: |
| ; CHECK-NEXT: kmovw %esi, %k1 |
| ; CHECK-NEXT: vaddss (%rdi), %xmm0, %xmm0 {%k1} {z} |
| ; CHECK-NEXT: retq |
| %a1.val = load float, float* %a1 |
| %a1v0 = insertelement <4 x float> undef, float %a1.val, i32 0 |
| %a1v1 = insertelement <4 x float> %a1v0, float 0.000000e+00, i32 1 |
| %a1v2 = insertelement <4 x float> %a1v1, float 0.000000e+00, i32 2 |
| %a1v = insertelement <4 x float> %a1v2, float 0.000000e+00, i32 3 |
| %res = call <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float>%a0, <4 x float> %a1v, <4 x float> zeroinitializer, i8 %mask, i32 4) |
| ret <4 x float> %res |
| } |
| |
| declare <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double>, <2 x double>, <2 x double>, i8, i32) nounwind readnone |
| |
| define |