blob: cd455bf6d541fdedd47bdf818de7f113adcfe0bb [file] [log] [blame]
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=x86_64-unkown-unkown -mattr=+avx512bw -mattr=+avx512fp16 | FileCheck %s
declare <32 x half> @llvm.x86.avx512.sitofp.round.v32f16.v32i16(<32 x i16>, i32)
define <32 x half> @test_int_x86_avx512fp16_mask_cvtw2ph_512(<32 x i16> %arg0, <32 x half> %arg1, i32 %mask) {
; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtw2ph_512:
; CHECK: # %bb.0:
; CHECK-NEXT: kmovd %edi, %k1
; CHECK-NEXT: vcvtw2ph %zmm0, %zmm1 {%k1}
; CHECK-NEXT: vmovaps %zmm1, %zmm0
; CHECK-NEXT: retq
%msk = bitcast i32 %mask to <32 x i1>
%res0 = call <32 x half> @llvm.x86.avx512.sitofp.round.v32f16.v32i16(<32 x i16> %arg0, i32 4)
%res = select <32 x i1> %msk, <32 x half> %res0, <32 x half> %arg1
ret <32 x half> %res
}
define <32 x half> @test_int_x86_avx512fp16_mask_cvtw2ph_512_2(<32 x i16> %arg0, <32 x half> %arg1, i32 %mask) {
; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtw2ph_512_2:
; CHECK: # %bb.0:
; CHECK-NEXT: kmovd %edi, %k1
; CHECK-NEXT: vcvtw2ph %zmm0, %zmm1 {%k1}
; CHECK-NEXT: vmovaps %zmm1, %zmm0
; CHECK-NEXT: retq
%msk = bitcast i32 %mask to <32 x i1>
%res0 = sitofp <32 x i16> %arg0 to <32 x half>
%res = select <32 x i1> %msk, <32 x half> %res0, <32 x half> %arg1
ret <32 x half> %res
}
define <32 x half> @test_int_x86_avx512fp16_mask_cvtw2ph_512_b(ptr %arg0, <32 x half> %arg1, i32 %mask) {
; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtw2ph_512_b:
; CHECK: # %bb.0:
; CHECK-NEXT: kmovd %esi, %k1
; CHECK-NEXT: vcvtw2ph (%rdi){1to32}, %zmm0 {%k1}
; CHECK-NEXT: retq
%msk = bitcast i32 %mask to <32 x i1>
%scalar = load i16, ptr %arg0
%scalar_in_vector = insertelement <32 x i16> undef, i16 %scalar, i32 0
%val = shufflevector <32 x i16> %scalar_in_vector, <32 x i16> undef, <32 x i32> zeroinitializer
%res0 = call <32 x half> @llvm.x86.avx512.sitofp.round.v32f16.v32i16(<32 x i16> %val, i32 4)
%res = select <32 x i1> %msk, <32 x half> %res0, <32 x half> %arg1
ret <32 x half> %res
}
define <32 x half> @test_int_x86_avx512fp16_mask_cvtw2ph_512_b_2(ptr %arg0, <32 x half> %arg1, i32 %mask) {
; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtw2ph_512_b_2:
; CHECK: # %bb.0:
; CHECK-NEXT: kmovd %esi, %k1
; CHECK-NEXT: vcvtw2ph (%rdi){1to32}, %zmm0 {%k1}
; CHECK-NEXT: retq
%msk = bitcast i32 %mask to <32 x i1>
%scalar = load i16, ptr %arg0
%scalar_in_vector = insertelement <32 x i16> undef, i16 %scalar, i32 0
%val = shufflevector <32 x i16> %scalar_in_vector, <32 x i16> undef, <32 x i32> zeroinitializer
%res0 = sitofp <32 x i16> %val to <32 x half>
%res = select <32 x i1> %msk, <32 x half> %res0, <32 x half> %arg1
ret <32 x half> %res
}
define <32 x half> @test_int_x86_avx512fp16_mask_cvtw2ph_512_r(<32 x i16> %arg0, <32 x half> %arg1, i32 %mask) {
; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtw2ph_512_r:
; CHECK: # %bb.0:
; CHECK-NEXT: kmovd %edi, %k1
; CHECK-NEXT: vcvtw2ph {ru-sae}, %zmm0, %zmm1 {%k1}
; CHECK-NEXT: vmovaps %zmm1, %zmm0
; CHECK-NEXT: retq
%msk = bitcast i32 %mask to <32 x i1>
%res0 = call <32 x half> @llvm.x86.avx512.sitofp.round.v32f16.v32i16(<32 x i16> %arg0, i32 10)
%res = select <32 x i1> %msk, <32 x half> %res0, <32 x half> %arg1
ret <32 x half> %res
}
define <32 x half> @test_int_x86_avx512fp16_mask_cvtw2ph_512_nomask(<32 x i16> %arg0) {
; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtw2ph_512_nomask:
; CHECK: # %bb.0:
; CHECK-NEXT: vcvtw2ph %zmm0, %zmm0
; CHECK-NEXT: retq
%res = call <32 x half> @llvm.x86.avx512.sitofp.round.v32f16.v32i16(<32 x i16> %arg0, i32 4)
ret <32 x half> %res
}
define <32 x half> @test_int_x86_avx512fp16_mask_cvtw2ph_512_nomask_2(<32 x i16> %arg0) {
; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtw2ph_512_nomask_2:
; CHECK: # %bb.0:
; CHECK-NEXT: vcvtw2ph %zmm0, %zmm0
; CHECK-NEXT: retq
%res = sitofp <32 x i16> %arg0 to <32 x half>
ret <32 x half> %res
}
define <32 x half> @test_int_x86_avx512fp16_mask_cvtw2ph_512_z(<32 x i16> %arg0, i32 %mask) {
; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtw2ph_512_z:
; CHECK: # %bb.0:
; CHECK-NEXT: kmovd %edi, %k1
; CHECK-NEXT: vcvtw2ph %zmm0, %zmm0 {%k1} {z}
; CHECK-NEXT: retq
%msk = bitcast i32 %mask to <32 x i1>
%res0 = call <32 x half> @llvm.x86.avx512.sitofp.round.v32f16.v32i16(<32 x i16> %arg0, i32 4)
%res = select <32 x i1> %msk, <32 x half> %res0, <32 x half> zeroinitializer
ret <32 x half> %res
}
define <32 x half> @test_int_x86_avx512fp16_mask_cvtw2ph_512_z_2(<32 x i16> %arg0, i32 %mask) {
; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtw2ph_512_z_2:
; CHECK: # %bb.0:
; CHECK-NEXT: kmovd %edi, %k1
; CHECK-NEXT: vcvtw2ph %zmm0, %zmm0 {%k1} {z}
; CHECK-NEXT: retq
%msk = bitcast i32 %mask to <32 x i1>
%res0 = sitofp <32 x i16> %arg0 to <32 x half>
%res = select <32 x i1> %msk, <32 x half> %res0, <32 x half> zeroinitializer
ret <32 x half> %res
}
define <32 x half> @test_int_x86_avx512fp16_mask_cvtw2ph_512_load(ptr %arg0, <32 x half> %arg1, i32 %mask) {
; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtw2ph_512_load:
; CHECK: # %bb.0:
; CHECK-NEXT: kmovd %esi, %k1
; CHECK-NEXT: vcvtw2ph (%rdi), %zmm0 {%k1}
; CHECK-NEXT: retq
%msk = bitcast i32 %mask to <32 x i1>
%val = load <32 x i16>, ptr %arg0
%res0 = call <32 x half> @llvm.x86.avx512.sitofp.round.v32f16.v32i16(<32 x i16> %val, i32 4)
%res = select <32 x i1> %msk, <32 x half> %res0, <32 x half> %arg1
ret <32 x half> %res
}
define <32 x half> @test_int_x86_avx512fp16_mask_cvtw2ph_512_load_2(ptr %arg0, <32 x half> %arg1, i32 %mask) {
; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtw2ph_512_load_2:
; CHECK: # %bb.0:
; CHECK-NEXT: kmovd %esi, %k1
; CHECK-NEXT: vcvtw2ph (%rdi), %zmm0 {%k1}
; CHECK-NEXT: retq
%msk = bitcast i32 %mask to <32 x i1>
%val = load <32 x i16>, ptr %arg0
%res0 = sitofp <32 x i16> %val to <32 x half>
%res = select <32 x i1> %msk, <32 x half> %res0, <32 x half> %arg1
ret <32 x half> %res
}
declare <32 x i16> @llvm.x86.avx512fp16.mask.vcvtph2w.512(<32 x half>, <32 x i16>, i32, i32)
define <32 x i16> @test_int_x86_avx512fp16_mask_cvtph2w_512(<32 x half> %arg0, <32 x i16> %arg1, i32 %mask) {
; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtph2w_512:
; CHECK: # %bb.0:
; CHECK-NEXT: kmovd %edi, %k1
; CHECK-NEXT: vcvtph2w %zmm0, %zmm1 {%k1}
; CHECK-NEXT: vmovaps %zmm1, %zmm0
; CHECK-NEXT: retq
%res = call <32 x i16> @llvm.x86.avx512fp16.mask.vcvtph2w.512(<32 x half> %arg0, <32 x i16> %arg1, i32 %mask, i32 4)
ret <32 x i16> %res
}
define <32 x i16> @test_int_x86_avx512fp16_mask_cvtph2w_512_b(ptr %arg0, <32 x i16> %arg1, i32 %mask) {
; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtph2w_512_b:
; CHECK: # %bb.0:
; CHECK-NEXT: kmovd %esi, %k1
; CHECK-NEXT: vcvtph2w (%rdi){1to32}, %zmm0 {%k1}
; CHECK-NEXT: retq
%scalar = load half, ptr %arg0
%scalar_in_vector = insertelement <32 x half> undef, half %scalar, i32 0
%val = shufflevector <32 x half> %scalar_in_vector, <32 x half> undef, <32 x i32> zeroinitializer
%res = call <32 x i16> @llvm.x86.avx512fp16.mask.vcvtph2w.512(<32 x half> %val, <32 x i16> %arg1, i32 %mask, i32 4)
ret <32 x i16> %res
}
define <32 x i16> @test_int_x86_avx512fp16_mask_cvtph2w_512_r(<32 x half> %arg0, <32 x i16> %arg1, i32 %mask) {
; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtph2w_512_r:
; CHECK: # %bb.0:
; CHECK-NEXT: kmovd %edi, %k1
; CHECK-NEXT: vcvtph2w {rd-sae}, %zmm0, %zmm1 {%k1}
; CHECK-NEXT: vmovaps %zmm1, %zmm0
; CHECK-NEXT: retq
%res = call <32 x i16> @llvm.x86.avx512fp16.mask.vcvtph2w.512(<32 x half> %arg0, <32 x i16> %arg1, i32 %mask, i32 9)
ret <32 x i16> %res
}
define <32 x i16> @test_int_x86_avx512fp16_mask_cvtph2w_512_nomask(<32 x half> %arg0, <32 x i16> %arg1) {
; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtph2w_512_nomask:
; CHECK: # %bb.0:
; CHECK-NEXT: vcvtph2w %zmm0, %zmm0
; CHECK-NEXT: retq
%res = call <32 x i16> @llvm.x86.avx512fp16.mask.vcvtph2w.512(<32 x half> %arg0, <32 x i16> %arg1, i32 -1, i32 4)
ret <32 x i16> %res
}
define <32 x i16> @test_int_x86_avx512fp16_mask_cvtph2w_512_z(<32 x half> %arg0, i32 %mask) {
; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtph2w_512_z:
; CHECK: # %bb.0:
; CHECK-NEXT: kmovd %edi, %k1
; CHECK-NEXT: vcvtph2w %zmm0, %zmm0 {%k1} {z}
; CHECK-NEXT: retq
%res = call <32 x i16> @llvm.x86.avx512fp16.mask.vcvtph2w.512(<32 x half> %arg0, <32 x i16> zeroinitializer, i32 %mask, i32 4)
ret <32 x i16> %res
}
define <32 x i16> @test_int_x86_avx512fp16_mask_cvtph2w_512_load(ptr %arg0, <32 x i16> %arg1, i32 %mask) {
; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtph2w_512_load:
; CHECK: # %bb.0:
; CHECK-NEXT: kmovd %esi, %k1
; CHECK-NEXT: vcvtph2w (%rdi), %zmm0 {%k1}
; CHECK-NEXT: retq
%val = load <32 x half>, ptr %arg0
%res = call <32 x i16> @llvm.x86.avx512fp16.mask.vcvtph2w.512(<32 x half> %val, <32 x i16> %arg1, i32 %mask, i32 4)
ret <32 x i16> %res
}
declare <32 x half> @llvm.x86.avx512.uitofp.round.v32f16.v32i16(<32 x i16>, i32)
define <32 x half> @test_int_x86_avx512fp16_mask_cvtuw2ph_512(<32 x i16> %arg0, <32 x half> %arg1, i32 %mask) {
; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtuw2ph_512:
; CHECK: # %bb.0:
; CHECK-NEXT: kmovd %edi, %k1
; CHECK-NEXT: vcvtuw2ph %zmm0, %zmm1 {%k1}
; CHECK-NEXT: vmovaps %zmm1, %zmm0
; CHECK-NEXT: retq
%msk = bitcast i32 %mask to <32 x i1>
%res0 = call <32 x half> @llvm.x86.avx512.uitofp.round.v32f16.v32i16(<32 x i16> %arg0, i32 4)
%res = select <32 x i1> %msk, <32 x half> %res0, <32 x half> %arg1
ret <32 x half> %res
}
define <32 x half> @test_int_x86_avx512fp16_mask_cvtuw2ph_512_2(<32 x i16> %arg0, <32 x half> %arg1, i32 %mask) {
; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtuw2ph_512_2:
; CHECK: # %bb.0:
; CHECK-NEXT: kmovd %edi, %k1
; CHECK-NEXT: vcvtuw2ph %zmm0, %zmm1 {%k1}
; CHECK-NEXT: vmovaps %zmm1, %zmm0
; CHECK-NEXT: retq
%msk = bitcast i32 %mask to <32 x i1>
%res0 = uitofp <32 x i16> %arg0 to <32 x half>
%res = select <32 x i1> %msk, <32 x half> %res0, <32 x half> %arg1
ret <32 x half> %res
}
define <32 x half> @test_int_x86_avx512fp16_mask_cvtuw2ph_512_b(ptr %arg0, <32 x half> %arg1, i32 %mask) {
; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtuw2ph_512_b:
; CHECK: # %bb.0:
; CHECK-NEXT: kmovd %esi, %k1
; CHECK-NEXT: vcvtuw2ph (%rdi){1to32}, %zmm0 {%k1}
; CHECK-NEXT: retq
%msk = bitcast i32 %mask to <32 x i1>
%scalar = load i16, ptr %arg0
%scalar_in_vector = insertelement <32 x i16> undef, i16 %scalar, i32 0
%val = shufflevector <32 x i16> %scalar_in_vector, <32 x i16> undef, <32 x i32> zeroinitializer
%res0 = call <32 x half> @llvm.x86.avx512.uitofp.round.v32f16.v32i16(<32 x i16> %val, i32 4)
%res = select <32 x i1> %msk, <32 x half> %res0, <32 x half> %arg1
ret <32 x half> %res
}
define <32 x half> @test_int_x86_avx512fp16_mask_cvtuw2ph_512_b_2(ptr %arg0, <32 x half> %arg1, i32 %mask) {
; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtuw2ph_512_b_2:
; CHECK: # %bb.0:
; CHECK-NEXT: kmovd %esi, %k1
; CHECK-NEXT: vcvtuw2ph (%rdi){1to32}, %zmm0 {%k1}
; CHECK-NEXT: retq
%msk = bitcast i32 %mask to <32 x i1>
%scalar = load i16, ptr %arg0
%scalar_in_vector = insertelement <32 x i16> undef, i16 %scalar, i32 0
%val = shufflevector <32 x i16> %scalar_in_vector, <32 x i16> undef, <32 x i32> zeroinitializer
%res0 = uitofp <32 x i16> %val to <32 x half>
%res = select <32 x i1> %msk, <32 x half> %res0, <32 x half> %arg1
ret <32 x half> %res
}
define <32 x half> @test_int_x86_avx512fp16_mask_cvtuw2ph_512_r(<32 x i16> %arg0, <32 x half> %arg1, i32 %mask) {
; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtuw2ph_512_r:
; CHECK: # %bb.0:
; CHECK-NEXT: kmovd %edi, %k1
; CHECK-NEXT: vcvtuw2ph {ru-sae}, %zmm0, %zmm1 {%k1}
; CHECK-NEXT: vmovaps %zmm1, %zmm0
; CHECK-NEXT: retq
%msk = bitcast i32 %mask to <32 x i1>
%res0 = call <32 x half> @llvm.x86.avx512.uitofp.round.v32f16.v32i16(<32 x i16> %arg0, i32 10)
%res = select <32 x i1> %msk, <32 x half> %res0, <32 x half> %arg1
ret <32 x half> %res
}
define <32 x half> @test_int_x86_avx512fp16_mask_cvtuw2ph_512_nomask(<32 x i16> %arg0) {
; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtuw2ph_512_nomask:
; CHECK: # %bb.0:
; CHECK-NEXT: vcvtuw2ph %zmm0, %zmm0
; CHECK-NEXT: retq
%res = call <32 x half> @llvm.x86.avx512.uitofp.round.v32f16.v32i16(<32 x i16> %arg0, i32 4)
ret <32 x half> %res
}
define <32 x half> @test_int_x86_avx512fp16_mask_cvtuw2ph_512_nomask_2(<32 x i16> %arg0) {
; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtuw2ph_512_nomask_2:
; CHECK: # %bb.0:
; CHECK-NEXT: vcvtuw2ph %zmm0, %zmm0
; CHECK-NEXT: retq
%res = uitofp <32 x i16> %arg0 to <32 x half>
ret <32 x half> %res
}
define <32 x half> @test_int_x86_avx512fp16_mask_cvtuw2ph_512_z(<32 x i16> %arg0, i32 %mask) {
; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtuw2ph_512_z:
; CHECK: # %bb.0:
; CHECK-NEXT: kmovd %edi, %k1
; CHECK-NEXT: vcvtuw2ph %zmm0, %zmm0 {%k1} {z}
; CHECK-NEXT: retq
%msk = bitcast i32 %mask to <32 x i1>
%res0 = call <32 x half> @llvm.x86.avx512.uitofp.round.v32f16.v32i16(<32 x i16> %arg0, i32 4)
%res = select <32 x i1> %msk, <32 x half> %res0, <32 x half> zeroinitializer
ret <32 x half> %res
}
define <32 x half> @test_int_x86_avx512fp16_mask_cvtuw2ph_512_z_2(<32 x i16> %arg0, i32 %mask) {
; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtuw2ph_512_z_2:
; CHECK: # %bb.0:
; CHECK-NEXT: kmovd %edi, %k1
; CHECK-NEXT: vcvtuw2ph %zmm0, %zmm0 {%k1} {z}
; CHECK-NEXT: retq
%msk = bitcast i32 %mask to <32 x i1>
%res0 = uitofp <32 x i16> %arg0 to <32 x half>
%res = select <32 x i1> %msk, <32 x half> %res0, <32 x half> zeroinitializer
ret <32 x half> %res
}
define <32 x half> @test_int_x86_avx512fp16_mask_cvtuw2ph_512_load(ptr %arg0, <32 x half> %arg1, i32 %mask) {
; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtuw2ph_512_load:
; CHECK: # %bb.0:
; CHECK-NEXT: kmovd %esi, %k1
; CHECK-NEXT: vcvtuw2ph (%rdi), %zmm0 {%k1}
; CHECK-NEXT: retq
%msk = bitcast i32 %mask to <32 x i1>
%val = load <32 x i16>, ptr %arg0
%res0 = call <32 x half> @llvm.x86.avx512.uitofp.round.v32f16.v32i16(<32 x i16> %val, i32 4)
%res = select <32 x i1> %msk, <32 x half> %res0, <32 x half> %arg1
ret <32 x half> %res
}
define <32 x half> @test_int_x86_avx512fp16_mask_cvtuw2ph_512_load_2(ptr %arg0, <32 x half> %arg1, i32 %mask) {
; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtuw2ph_512_load_2:
; CHECK: # %bb.0:
; CHECK-NEXT: kmovd %esi, %k1
; CHECK-NEXT: vcvtuw2ph (%rdi), %zmm0 {%k1}
; CHECK-NEXT: retq
%msk = bitcast i32 %mask to <32 x i1>
%val = load <32 x i16>, ptr %arg0
%res0 = uitofp <32 x i16> %val to <32 x half>
%res = select <32 x i1> %msk, <32 x half> %res0, <32 x half> %arg1
ret <32 x half> %res
}
declare <32 x i16> @llvm.x86.avx512fp16.mask.vcvtph2uw.512(<32 x half>, <32 x i16>, i32, i32)
define <32 x i16> @test_int_x86_avx512fp16_mask_cvtph2uw_512(<32 x half> %arg0, <32 x i16> %arg1, i32 %mask) {
; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtph2uw_512:
; CHECK: # %bb.0:
; CHECK-NEXT: kmovd %edi, %k1
; CHECK-NEXT: vcvtph2uw %zmm0, %zmm1 {%k1}
; CHECK-NEXT: vmovaps %zmm1, %zmm0
; CHECK-NEXT: retq
%res = call <32 x i16> @llvm.x86.avx512fp16.mask.vcvtph2uw.512(<32 x half> %arg0, <32 x i16> %arg1, i32 %mask, i32 4)
ret <32 x i16> %res
}
define <32 x i16> @test_int_x86_avx512fp16_mask_cvtph2uw_512_b(ptr %arg0, <32 x i16> %arg1, i32 %mask) {
; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtph2uw_512_b:
; CHECK: # %bb.0:
; CHECK-NEXT: kmovd %esi, %k1
; CHECK-NEXT: vcvtph2uw (%rdi){1to32}, %zmm0 {%k1}
; CHECK-NEXT: retq
%scalar = load half, ptr %arg0
%scalar_in_vector = insertelement <32 x half> undef, half %scalar, i32 0
%val = shufflevector <32 x half> %scalar_in_vector, <32 x half> undef, <32 x i32> zeroinitializer
%res = call <32 x i16> @llvm.x86.avx512fp16.mask.vcvtph2uw.512(<32 x half> %val, <32 x i16> %arg1, i32 %mask, i32 4)
ret <32 x i16> %res
}
define <32 x i16> @test_int_x86_avx512fp16_mask_cvtph2uw_512_r(<32 x half> %arg0, <32 x i16> %arg1, i32 %mask) {
; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtph2uw_512_r:
; CHECK: # %bb.0:
; CHECK-NEXT: kmovd %edi, %k1
; CHECK-NEXT: vcvtph2uw {rd-sae}, %zmm0, %zmm1 {%k1}
; CHECK-NEXT: vmovaps %zmm1, %zmm0
; CHECK-NEXT: retq
%res = call <32 x i16> @llvm.x86.avx512fp16.mask.vcvtph2uw.512(<32 x half> %arg0, <32 x i16> %arg1, i32 %mask, i32 9)
ret <32 x i16> %res
}
define <32 x i16> @test_int_x86_avx512fp16_mask_cvtph2uw_512_nomask(<32 x half> %arg0, <32 x i16> %arg1) {
; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtph2uw_512_nomask:
; CHECK: # %bb.0:
; CHECK-NEXT: vcvtph2uw %zmm0, %zmm0
; CHECK-NEXT: retq
%res = call <32 x i16> @llvm.x86.avx512fp16.mask.vcvtph2uw.512(<32 x half> %arg0, <32 x i16> %arg1, i32 -1, i32 4)
ret <32 x i16> %res
}
define <32 x i16> @test_int_x86_avx512fp16_mask_cvtph2uw_512_z(<32 x half> %arg0, i32 %mask) {
; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtph2uw_512_z:
; CHECK: # %bb.0:
; CHECK-NEXT: kmovd %edi, %k1
; CHECK-NEXT: vcvtph2uw %zmm0, %zmm0 {%k1} {z}
; CHECK-NEXT: retq
%res = call <32 x i16> @llvm.x86.avx512fp16.mask.vcvtph2uw.512(<32 x half> %arg0, <32 x i16> zeroinitializer, i32 %mask, i32 4)
ret <32 x i16> %res
}
define <32 x i16> @test_int_x86_avx512fp16_mask_cvtph2uw_512_load(ptr %arg0, <32 x i16> %arg1, i32 %mask) {
; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtph2uw_512_load:
; CHECK: # %bb.0:
; CHECK-NEXT: kmovd %esi, %k1
; CHECK-NEXT: vcvtph2uw (%rdi), %zmm0 {%k1}
; CHECK-NEXT: retq
%val = load <32 x half>, ptr %arg0
%res = call <32 x i16> @llvm.x86.avx512fp16.mask.vcvtph2uw.512(<32 x half> %val, <32 x i16> %arg1, i32 %mask, i32 4)
ret <32 x i16> %res
}
declare <32 x i16> @llvm.x86.avx512fp16.mask.vcvttph2w.512(<32 x half>, <32 x i16>, i32, i32)
define <32 x i16> @test_int_x86_avx512fp16_mask_cvttph2w_512(<32 x half> %arg0, <32 x i16> %arg1, i32 %mask) {
; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvttph2w_512:
; CHECK: # %bb.0:
; CHECK-NEXT: kmovd %edi, %k1
; CHECK-NEXT: vcvttph2w %zmm0, %zmm1 {%k1}
; CHECK-NEXT: vmovaps %zmm1, %zmm0
; CHECK-NEXT: retq
%res = call <32 x i16> @llvm.x86.avx512fp16.mask.vcvttph2w.512(<32 x half> %arg0, <32 x i16> %arg1, i32 %mask, i32 4)
ret <32 x i16> %res
}
define <32 x i16> @test_int_x86_avx512fp16_mask_cvttph2w_512_b(ptr %arg0, <32 x i16> %arg1, i32 %mask) {
; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvttph2w_512_b:
; CHECK: # %bb.0:
; CHECK-NEXT: kmovd %esi, %k1
; CHECK-NEXT: vcvttph2w (%rdi){1to32}, %zmm0 {%k1}
; CHECK-NEXT: retq
%scalar = load half, ptr %arg0
%scalar_in_vector = insertelement <32 x half> undef, half %scalar, i32 0
%val = shufflevector <32 x half> %scalar_in_vector, <32 x half> undef, <32 x i32> zeroinitializer
%res = call <32 x i16> @llvm.x86.avx512fp16.mask.vcvttph2w.512(<32 x half> %val, <32 x i16> %arg1, i32 %mask, i32 4)
ret <32 x i16> %res
}
define <32 x i16> @test_int_x86_avx512fp16_mask_cvttph2w_512_sae(<32 x half> %arg0, <32 x i16> %arg1, i32 %mask) {
; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvttph2w_512_sae:
; CHECK: # %bb.0:
; CHECK-NEXT: kmovd %edi, %k1
; CHECK-NEXT: vcvttph2w {sae}, %zmm0, %zmm1 {%k1}
; CHECK-NEXT: vmovaps %zmm1, %zmm0
; CHECK-NEXT: retq
%res = call <32 x i16> @llvm.x86.avx512fp16.mask.vcvttph2w.512(<32 x half> %arg0, <32 x i16> %arg1, i32 %mask, i32 8)
ret <32 x i16> %res
}
define <32 x i16> @test_int_x86_avx512fp16_mask_cvttph2w_512_nomask(<32 x half> %arg0, <32 x i16> %arg1) {
; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvttph2w_512_nomask:
; CHECK: # %bb.0:
; CHECK-NEXT: vcvttph2w %zmm0, %zmm0
; CHECK-NEXT: retq
%res = call <32 x i16> @llvm.x86.avx512fp16.mask.vcvttph2w.512(<32 x half> %arg0, <32 x i16> %arg1, i32 -1, i32 4)
ret <32 x i16> %res
}
define <32 x i16> @test_int_x86_avx512fp16_mask_cvttph2w_512_z(<32 x half> %arg0, i32 %mask) {
; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvttph2w_512_z:
; CHECK: # %bb.0:
; CHECK-NEXT: kmovd %edi, %k1
; CHECK-NEXT: vcvttph2w %zmm0, %zmm0 {%k1} {z}
; CHECK-NEXT: retq
%res = call <32 x i16> @llvm.x86.avx512fp16.mask.vcvttph2w.512(<32 x half> %arg0, <32 x i16> zeroinitializer, i32 %mask, i32 4)
ret <32 x i16> %res
}
define <32 x i16> @test_int_x86_avx512fp16_mask_cvttph2w_512_load(ptr %arg0, <32 x i16> %arg1, i32 %mask) {
; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvttph2w_512_load:
; CHECK: # %bb.0:
; CHECK-NEXT: kmovd %esi, %k1
; CHECK-NEXT: vcvttph2w (%rdi), %zmm0 {%k1}
; CHECK-NEXT: retq
%val = load <32 x half>, ptr %arg0
%res = call <32 x i16> @llvm.x86.avx512fp16.mask.vcvttph2w.512(<32 x half> %val, <32 x i16> %arg1, i32 %mask, i32 4)
ret <32 x i16> %res
}
declare <32 x i16> @llvm.x86.avx512fp16.mask.vcvttph2uw.512(<32 x half>, <32 x i16>, i32, i32)
define <32 x i16> @test_int_x86_avx512fp16_mask_cvttph2uw_512(<32 x half> %arg0, <32 x i16> %arg1, i32 %mask) {
; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvttph2uw_512:
; CHECK: # %bb.0:
; CHECK-NEXT: kmovd %edi, %k1
; CHECK-NEXT: vcvttph2uw %zmm0, %zmm1 {%k1}
; CHECK-NEXT: vmovaps %zmm1, %zmm0
; CHECK-NEXT: retq
%res = call <32 x i16> @llvm.x86.avx512fp16.mask.vcvttph2uw.512(<32 x half> %arg0, <32 x i16> %arg1, i32 %mask, i32 4)
ret <32 x i16> %res
}
define <32 x i16> @test_int_x86_avx512fp16_mask_cvttph2uw_512_b(ptr %arg0, <32 x i16> %arg1, i32 %mask) {
; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvttph2uw_512_b:
; CHECK: # %bb.0:
; CHECK-NEXT: kmovd %esi, %k1
; CHECK-NEXT: vcvttph2uw (%rdi){1to32}, %zmm0 {%k1}
; CHECK-NEXT: retq
%scalar = load half, ptr %arg0
%scalar_in_vector = insertelement <32 x half> undef, half %scalar, i32 0
%val = shufflevector <32 x half> %scalar_in_vector, <32 x half> undef, <32 x i32> zeroinitializer
%res = call <32 x i16> @llvm.x86.avx512fp16.mask.vcvttph2uw.512(<32 x half> %val, <32 x i16> %arg1, i32 %mask, i32 4)
ret <32 x i16> %res
}
define <32 x i16> @test_int_x86_avx512fp16_mask_cvttph2uw_512_sae(<32 x half> %arg0, <32 x i16> %arg1, i32 %mask) {
; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvttph2uw_512_sae:
; CHECK: # %bb.0:
; CHECK-NEXT: kmovd %edi, %k1
; CHECK-NEXT: vcvttph2uw {sae}, %zmm0, %zmm1 {%k1}
; CHECK-NEXT: vmovaps %zmm1, %zmm0
; CHECK-NEXT: retq
%res = call <32 x i16> @llvm.x86.avx512fp16.mask.vcvttph2uw.512(<32 x half> %arg0, <32 x i16> %arg1, i32 %mask, i32 8)
ret <32 x i16> %res
}
define <32 x i16> @test_int_x86_avx512fp16_mask_cvttph2uw_512_nomask(<32 x half> %arg0, <32 x i16> %arg1) {
; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvttph2uw_512_nomask:
; CHECK: # %bb.0:
; CHECK-NEXT: vcvttph2uw %zmm0, %zmm0
; CHECK-NEXT: retq
%res = call <32 x i16> @llvm.x86.avx512fp16.mask.vcvttph2uw.512(<32 x half> %arg0, <32 x i16> %arg1, i32 -1, i32 4)
ret <32 x i16> %res
}
define <32 x i16> @test_int_x86_avx512fp16_mask_cvttph2uw_512_z(<32 x half> %arg0, i32 %mask) {
; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvttph2uw_512_z:
; CHECK: # %bb.0:
; CHECK-NEXT: kmovd %edi, %k1
; CHECK-NEXT: vcvttph2uw %zmm0, %zmm0 {%k1} {z}
; CHECK-NEXT: retq
%res = call <32 x i16> @llvm.x86.avx512fp16.mask.vcvttph2uw.512(<32 x half> %arg0, <32 x i16> zeroinitializer, i32 %mask, i32 4)
ret <32 x i16> %res
}
define <32 x i16> @test_int_x86_avx512fp16_mask_cvttph2uw_512_load(ptr %arg0, <32 x i16> %arg1, i32 %mask) {
; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvttph2uw_512_load:
; CHECK: # %bb.0:
; CHECK-NEXT: kmovd %esi, %k1
; CHECK-NEXT: vcvttph2uw (%rdi), %zmm0 {%k1}
; CHECK-NEXT: retq
%val = load <32 x half>, ptr %arg0
%res = call <32 x i16> @llvm.x86.avx512fp16.mask.vcvttph2uw.512(<32 x half> %val, <32 x i16> %arg1, i32 %mask, i32 4)
ret <32 x i16> %res
}