| ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py |
| ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512fp16 | FileCheck %s |
| |
| declare i32 @llvm.x86.avx512fp16.vcomi.sh(<8 x half>, <8 x half>, i32, i32) |
| |
| define i32 @test_x86_avx512fp16_ucomi_sh_lt(<8 x half> %a0, <8 x half> %a1) { |
| ; CHECK-LABEL: test_x86_avx512fp16_ucomi_sh_lt: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vcmpngesh %xmm1, %xmm0, %k0 |
| ; CHECK-NEXT: kmovw %k0, %eax |
| ; CHECK-NEXT: retq |
| %res = call i32 @llvm.x86.avx512fp16.vcomi.sh(<8 x half> %a0, <8 x half> %a1, i32 9, i32 4) |
| ret i32 %res |
| } |
| |
| declare <32 x half> @llvm.x86.avx512fp16.sqrt.ph.512(<32 x half>, i32) nounwind readnone |
| |
| define <32 x half> @test_sqrt_ph_512(<32 x half> %a0) { |
| ; CHECK-LABEL: test_sqrt_ph_512: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vsqrtph %zmm0, %zmm0 |
| ; CHECK-NEXT: retq |
| %1 = call <32 x half> @llvm.sqrt.v32f16(<32 x half> %a0) |
| ret <32 x half> %1 |
| } |
| |
| define <32 x half> @test_sqrt_ph_512_fast(<32 x half> %a0, <32 x half> %a1) { |
| ; CHECK-LABEL: test_sqrt_ph_512_fast: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vrsqrtph %zmm0, %zmm0 |
| ; CHECK-NEXT: vmulph %zmm0, %zmm1, %zmm0 |
| ; CHECK-NEXT: retq |
| %1 = call fast <32 x half> @llvm.sqrt.v32f16(<32 x half> %a0) |
| %2 = fdiv fast <32 x half> %a1, %1 |
| ret <32 x half> %2 |
| } |
| |
| define <32 x half> @test_sqrt_ph_512_fast_estimate_attribute(<32 x half> %a0, <32 x half> %a1) "reciprocal-estimates"="vec-sqrt" { |
| ; CHECK-LABEL: test_sqrt_ph_512_fast_estimate_attribute: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vrsqrtph %zmm0, %zmm0 |
| ; CHECK-NEXT: vmulph %zmm0, %zmm1, %zmm0 |
| ; CHECK-NEXT: retq |
| %1 = call fast <32 x half> @llvm.sqrt.v32f16(<32 x half> %a0) |
| %2 = fdiv fast <32 x half> %a1, %1 |
| ret <32 x half> %2 |
| } |
| |
| define <32 x half> @test_sqrt_ph_512_fast_estimate_attribute_2(<32 x half> %a0, <32 x half> %a1) "reciprocal-estimates"="vec-sqrth:1" { |
| ; CHECK-LABEL: test_sqrt_ph_512_fast_estimate_attribute_2: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vrsqrtph %zmm0, %zmm2 |
| ; CHECK-NEXT: vmulph %zmm2, %zmm0, %zmm0 |
| ; CHECK-NEXT: vfmadd213ph {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to32}, %zmm2, %zmm0 |
| ; CHECK-NEXT: vmulph {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to32}, %zmm2, %zmm2 |
| ; CHECK-NEXT: vmulph %zmm1, %zmm0, %zmm0 |
| ; CHECK-NEXT: vmulph %zmm0, %zmm2, %zmm0 |
| ; CHECK-NEXT: retq |
| %1 = call fast <32 x half> @llvm.sqrt.v32f16(<32 x half> %a0) |
| %2 = fdiv fast <32 x half> %a1, %1 |
| ret <32 x half> %2 |
| } |
| |
| define <32 x half> @test_mask_sqrt_ph_512(<32 x half> %a0, <32 x half> %passthru, i32 %mask) { |
| ; CHECK-LABEL: test_mask_sqrt_ph_512: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: kmovd %edi, %k1 |
| ; CHECK-NEXT: vsqrtph %zmm0, %zmm1 {%k1} |
| ; CHECK-NEXT: vmovaps %zmm1, %zmm0 |
| ; CHECK-NEXT: retq |
| %1 = call <32 x half> @llvm.sqrt.v32f16(<32 x half> %a0) |
| %2 = bitcast i32 %mask to <32 x i1> |
| %3 = select <32 x i1> %2, <32 x half> %1, <32 x half> %passthru |
| ret <32 x half> %3 |
| } |
| |
| define <32 x half> @test_maskz_sqrt_ph_512(<32 x half> %a0, i32 %mask) { |
| ; CHECK-LABEL: test_maskz_sqrt_ph_512: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: kmovd %edi, %k1 |
| ; CHECK-NEXT: vsqrtph %zmm0, %zmm0 {%k1} {z} |
| ; CHECK-NEXT: retq |
| %1 = call <32 x half> @llvm.sqrt.v32f16(<32 x half> %a0) |
| %2 = bitcast i32 %mask to <32 x i1> |
| %3 = select <32 x i1> %2, <32 x half> %1, <32 x half> zeroinitializer |
| ret <32 x half> %3 |
| } |
| |
| declare <32 x half> @llvm.sqrt.v32f16(<32 x half>) |
| |
| define <32 x half> @test_sqrt_round_ph_512(<32 x half> %a0) { |
| ; CHECK-LABEL: test_sqrt_round_ph_512: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vsqrtph {rz-sae}, %zmm0, %zmm0 |
| ; CHECK-NEXT: retq |
| %1 = call <32 x half> @llvm.x86.avx512fp16.sqrt.ph.512(<32 x half> %a0, i32 11) |
| ret <32 x half> %1 |
| } |
| |
| define <32 x half> @test_mask_sqrt_round_ph_512(<32 x half> %a0, <32 x half> %passthru, i32 %mask) { |
| ; CHECK-LABEL: test_mask_sqrt_round_ph_512: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: kmovd %edi, %k1 |
| ; CHECK-NEXT: vsqrtph {rz-sae}, %zmm0, %zmm1 {%k1} |
| ; CHECK-NEXT: vmovaps %zmm1, %zmm0 |
| ; CHECK-NEXT: retq |
| %1 = call <32 x half> @llvm.x86.avx512fp16.sqrt.ph.512(<32 x half> %a0, i32 11) |
| %2 = bitcast i32 %mask to <32 x i1> |
| %3 = select <32 x i1> %2, <32 x half> %1, <32 x half> %passthru |
| ret <32 x half> %3 |
| } |
| |
| define <32 x half> @test_maskz_sqrt_round_ph_512(<32 x half> %a0, i32 %mask) { |
| ; CHECK-LABEL: test_maskz_sqrt_round_ph_512: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: kmovd %edi, %k1 |
| ; CHECK-NEXT: vsqrtph {rz-sae}, %zmm0, %zmm0 {%k1} {z} |
| ; CHECK-NEXT: retq |
| %1 = call <32 x half> @llvm.x86.avx512fp16.sqrt.ph.512(<32 x half> %a0, i32 11) |
| %2 = bitcast i32 %mask to <32 x i1> |
| %3 = select <32 x i1> %2, <32 x half> %1, <32 x half> zeroinitializer |
| ret <32 x half> %3 |
| } |
| |
| declare <8 x half> @llvm.x86.avx512fp16.mask.sqrt.sh(<8 x half>, <8 x half>, <8 x half>, i8, i32) nounwind readnone |
| |
| define <8 x half> @test_sqrt_sh(<8 x half> %a0, <8 x half> %a1, <8 x half> %a2, i8 %mask) { |
| ; CHECK-LABEL: test_sqrt_sh: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: kmovd %edi, %k1 |
| ; CHECK-NEXT: vsqrtsh %xmm1, %xmm0, %xmm2 {%k1} |
| ; CHECK-NEXT: vmovaps %xmm2, %xmm0 |
| ; CHECK-NEXT: retq |
| %res = call <8 x half> @llvm.x86.avx512fp16.mask.sqrt.sh(<8 x half> %a0, <8 x half> %a1, <8 x half> %a2, i8 %mask, i32 4) |
| ret <8 x half> %res |
| } |
| |
| define half @test_sqrt_sh2(half %a0, half %a1) { |
| ; CHECK-LABEL: test_sqrt_sh2: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vrsqrtsh %xmm0, %xmm0, %xmm0 |
| ; CHECK-NEXT: vmulsh %xmm0, %xmm1, %xmm0 |
| ; CHECK-NEXT: retq |
| %1 = call fast half @llvm.sqrt.f16(half %a0) |
| %2 = fdiv fast half %a1, %1 |
| ret half %2 |
| } |
| |
| define half @test_sqrt_sh3(half %a0, half %a1) { |
| ; CHECK-LABEL: test_sqrt_sh3: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vsqrtsh %xmm0, %xmm0, %xmm0 |
| ; CHECK-NEXT: retq |
| %1 = call fast half @llvm.sqrt.f16(half %a0) |
| ret half %1 |
| } |
| |
| declare half @llvm.sqrt.f16(half) |
| |
| define <8 x half> @test_sqrt_sh_r(<8 x half> %a0, <8 x half> %a1, <8 x half> %a2, i8 %mask) { |
| ; CHECK-LABEL: test_sqrt_sh_r: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: kmovd %edi, %k1 |
| ; CHECK-NEXT: vsqrtsh {ru-sae}, %xmm1, %xmm0, %xmm2 {%k1} |
| ; CHECK-NEXT: vmovaps %xmm2, %xmm0 |
| ; CHECK-NEXT: retq |
| %res = call <8 x half> @llvm.x86.avx512fp16.mask.sqrt.sh(<8 x half> %a0, <8 x half> %a1, <8 x half> %a2, i8 %mask, i32 10) |
| ret <8 x half> %res |
| } |
| |
| define <8 x half> @test_sqrt_sh_nomask(<8 x half> %a0, <8 x half> %a1, <8 x half> %a2) { |
| ; CHECK-LABEL: test_sqrt_sh_nomask: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vsqrtsh %xmm1, %xmm0, %xmm0 |
| ; CHECK-NEXT: retq |
| %res = call <8 x half> @llvm.x86.avx512fp16.mask.sqrt.sh(<8 x half> %a0, <8 x half> %a1, <8 x half> %a2, i8 -1, i32 4) |
| ret <8 x half> %res |
| } |
| |
| define <8 x half> @test_sqrt_sh_z(<8 x half> %a0, <8 x half> %a1, <8 x half> %a2, i8 %mask) { |
| ; CHECK-LABEL: test_sqrt_sh_z: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: kmovd %edi, %k1 |
| ; CHECK-NEXT: vsqrtsh {ru-sae}, %xmm1, %xmm0, %xmm0 {%k1} {z} |
| ; CHECK-NEXT: retq |
| %res = call <8 x half> @llvm.x86.avx512fp16.mask.sqrt.sh(<8 x half> %a0, <8 x half> %a1, <8 x half> zeroinitializer, i8 %mask, i32 10) |
| ret <8 x half> %res |
| } |
| |
| declare <32 x half> @llvm.x86.avx512fp16.mask.rsqrt.ph.512(<32 x half>, <32 x half>, i32) |
| declare <8 x half> @llvm.x86.avx512fp16.mask.rsqrt.sh(<8 x half>, <8 x half>, <8 x half>, i8) |
| |
| define <32 x half> @test_rsqrt_ph_512(<32 x half> %a0) { |
| ; CHECK-LABEL: test_rsqrt_ph_512: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vrsqrtph %zmm0, %zmm0 |
| ; CHECK-NEXT: retq |
| %res = call <32 x half> @llvm.x86.avx512fp16.mask.rsqrt.ph.512(<32 x half> %a0, <32 x half> zeroinitializer, i32 -1) |
| ret <32 x half> %res |
| } |
| |
| define <8 x half> @test_rsqrt_sh(<8 x half> %a0, <8 x half> %a1, <8 x half> %a2) { |
| ; CHECK-LABEL: test_rsqrt_sh: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vrsqrtsh %xmm0, %xmm0, %xmm0 |
| ; CHECK-NEXT: retq |
| %res = call <8 x half> @llvm.x86.avx512fp16.mask.rsqrt.sh(<8 x half> %a0, <8 x half> %a0, <8 x half> %a2, i8 -1) |
| ret <8 x half> %res |
| } |
| |
| define <8 x half> @test_rsqrt_sh_load(<8 x half> %a0, ptr %a1ptr) { |
| ; CHECK-LABEL: test_rsqrt_sh_load: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vrsqrtsh (%rdi), %xmm0, %xmm0 |
| ; CHECK-NEXT: retq |
| %a1 = load <8 x half>, ptr %a1ptr |
| %res = call <8 x half> @llvm.x86.avx512fp16.mask.rsqrt.sh(<8 x half> %a0, <8 x half> %a1, <8 x half> undef, i8 -1) |
| ret <8 x half> %res |
| } |
| |
| define <8 x half> @test_rsqrt_sh_maskz(<8 x half> %a0, i8 %mask) { |
| ; CHECK-LABEL: test_rsqrt_sh_maskz: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: kmovd %edi, %k1 |
| ; CHECK-NEXT: vrsqrtsh %xmm0, %xmm0, %xmm0 {%k1} {z} |
| ; CHECK-NEXT: retq |
| %res = call <8 x half> @llvm.x86.avx512fp16.mask.rsqrt.sh(<8 x half> %a0, <8 x half> %a0, <8 x half> zeroinitializer, i8 %mask) |
| ret <8 x half> %res |
| } |
| |
| define <8 x half> @test_rsqrt_sh_mask(<8 x half> %a0, <8 x half> %b0, <8 x half> %c0, i8 %mask) { |
| ; CHECK-LABEL: test_rsqrt_sh_mask: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: kmovd %edi, %k1 |
| ; CHECK-NEXT: vrsqrtsh %xmm1, %xmm0, %xmm2 {%k1} |
| ; CHECK-NEXT: vmovaps %xmm2, %xmm0 |
| ; CHECK-NEXT: retq |
| %res = call <8 x half> @llvm.x86.avx512fp16.mask.rsqrt.sh(<8 x half> %a0, <8 x half> %b0, <8 x half> %c0, i8 %mask) |
| ret <8 x half> %res |
| } |
| |
| declare <32 x i1> @llvm.x86.avx512fp16.fpclass.ph.512(<32 x half>, i32) |
| |
| define i32 @test_int_x86_avx512_fpclass_ph_512(<32 x half> %x0) { |
| ; CHECK-LABEL: test_int_x86_avx512_fpclass_ph_512: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vfpclassph $2, %zmm0, %k1 |
| ; CHECK-NEXT: vfpclassph $4, %zmm0, %k0 {%k1} |
| ; CHECK-NEXT: kmovd %k0, %eax |
| ; CHECK-NEXT: vzeroupper |
| ; CHECK-NEXT: retq |
| %res = call <32 x i1> @llvm.x86.avx512fp16.fpclass.ph.512(<32 x half> %x0, i32 4) |
| %res1 = call <32 x i1> @llvm.x86.avx512fp16.fpclass.ph.512(<32 x half> %x0, i32 2) |
| %1 = and <32 x i1> %res1, %res |
| %2 = bitcast <32 x i1> %1 to i32 |
| ret i32 %2 |
| } |
| |
| declare i8 @llvm.x86.avx512fp16.mask.fpclass.sh(<8 x half>, i32, i8) |
| |
| define i8 @test_int_x86_avx512_mask_fpclass_sh(<8 x half> %x0) { |
| ; CHECK-LABEL: test_int_x86_avx512_mask_fpclass_sh: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vfpclasssh $4, %xmm0, %k1 |
| ; CHECK-NEXT: vfpclasssh $2, %xmm0, %k0 {%k1} |
| ; CHECK-NEXT: kmovd %k0, %eax |
| ; CHECK-NEXT: # kill: def $al killed $al killed $eax |
| ; CHECK-NEXT: retq |
| %res = call i8 @llvm.x86.avx512fp16.mask.fpclass.sh(<8 x half> %x0, i32 2, i8 -1) |
| %res1 = call i8 @llvm.x86.avx512fp16.mask.fpclass.sh(<8 x half> %x0, i32 4, i8 %res) |
| ret i8 %res1 |
| } |
| |
| define i8 @test_int_x86_avx512_mask_fpclass_sh_load(ptr %x0ptr) { |
| ; CHECK-LABEL: test_int_x86_avx512_mask_fpclass_sh_load: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vfpclasssh $4, (%rdi), %k0 |
| ; CHECK-NEXT: kmovd %k0, %eax |
| ; CHECK-NEXT: # kill: def $al killed $al killed $eax |
| ; CHECK-NEXT: retq |
| %x0 = load <8 x half>, ptr %x0ptr |
| %res = call i8 @llvm.x86.avx512fp16.mask.fpclass.sh(<8 x half> %x0, i32 4, i8 -1) |
| ret i8 %res |
| } |
| |
| declare <32 x half> @llvm.x86.avx512fp16.mask.rcp.ph.512(<32 x half>, <32 x half>, i32) |
| |
| define <32 x half> @test_rcp_ph_512(<32 x half> %a0, <32 x half> %a1, i32 %mask) { |
| ; CHECK-LABEL: test_rcp_ph_512: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: kmovd %edi, %k1 |
| ; CHECK-NEXT: vrcpph %zmm0, %zmm1 {%k1} |
| ; CHECK-NEXT: vmovaps %zmm1, %zmm0 |
| ; CHECK-NEXT: retq |
| %res = call <32 x half> @llvm.x86.avx512fp16.mask.rcp.ph.512(<32 x half> %a0, <32 x half> %a1, i32 %mask) |
| ret <32 x half> %res |
| } |
| |
| declare <8 x half> @llvm.x86.avx512fp16.mask.rcp.sh(<8 x half>, <8 x half>, <8 x half>, i8) |
| |
| define <8 x half> @test_rcp_sh(<8 x half> %a0) { |
| ; CHECK-LABEL: test_rcp_sh: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vrcpsh %xmm0, %xmm0, %xmm0 |
| ; CHECK-NEXT: retq |
| %res = call <8 x half> @llvm.x86.avx512fp16.mask.rcp.sh(<8 x half> %a0, <8 x half> %a0, <8 x half> zeroinitializer, i8 -1) |
| ret <8 x half> %res |
| } |
| |
| define <8 x half> @test_rcp_sh_load(<8 x half> %a0, ptr %a1ptr) { |
| ; CHECK-LABEL: test_rcp_sh_load: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vrcpsh (%rdi), %xmm0, %xmm0 |
| ; CHECK-NEXT: retq |
| %a1 = load <8 x half>, ptr %a1ptr |
| %res = call <8 x half> @llvm.x86.avx512fp16.mask.rcp.sh(<8 x half> %a0, <8 x half> %a1, <8 x half> zeroinitializer, i8 -1) |
| ret <8 x half> %res |
| } |
| |
| declare <32 x half> @llvm.x86.avx512fp16.mask.reduce.ph.512(<32 x half>, i32, <32 x half>, i32, i32) |
| |
| define <32 x half>@test_int_x86_avx512_mask_reduce_ph_512(<32 x half> %x0, <32 x half> %x2, i32 %x3) { |
| ; CHECK-LABEL: test_int_x86_avx512_mask_reduce_ph_512: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: kmovd %edi, %k1 |
| ; CHECK-NEXT: vreduceph $8, %zmm0, %zmm1 {%k1} |
| ; CHECK-NEXT: vreduceph $4, {sae}, %zmm0, %zmm0 |
| ; CHECK-NEXT: vaddph %zmm0, %zmm1, %zmm0 |
| ; CHECK-NEXT: retq |
| %res = call <32 x half> @llvm.x86.avx512fp16.mask.reduce.ph.512(<32 x half> %x0, i32 8, <32 x half> %x2, i32 %x3, i32 4) |
| %res1 = call <32 x half> @llvm.x86.avx512fp16.mask.reduce.ph.512(<32 x half> %x0, i32 4, <32 x half> %x2, i32 -1, i32 8) |
| %res2 = fadd <32 x half> %res, %res1 |
| ret <32 x half> %res2 |
| } |
| |
| declare <8 x half> @llvm.x86.avx512fp16.mask.reduce.sh(<8 x half>, <8 x half>,<8 x half>, i8, i32, i32) |
| |
| define <8 x half>@test_int_x86_avx512_mask_reduce_sh(<8 x half> %x0, <8 x half> %x1, <8 x half> %x3, i8 %x4) { |
| ; CHECK-LABEL: test_int_x86_avx512_mask_reduce_sh: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: kmovd %edi, %k1 |
| ; CHECK-NEXT: vreducesh $4, %xmm1, %xmm0, %xmm2 {%k1} |
| ; CHECK-NEXT: vmovaps %xmm2, %xmm0 |
| ; CHECK-NEXT: retq |
| %res = call <8 x half> @llvm.x86.avx512fp16.mask.reduce.sh(<8 x half> %x0, <8 x half> %x1, <8 x half> %x3, i8 %x4, i32 4, i32 4) |
| ret <8 x half> %res |
| } |
| |
| define <8 x half>@test_int_x86_avx512_mask_reduce_sh_nomask(<8 x half> %x0, <8 x half> %x1, <8 x half> %x3) { |
| ; CHECK-LABEL: test_int_x86_avx512_mask_reduce_sh_nomask: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vreducesh $4, {sae}, %xmm1, %xmm0, %xmm0 |
| ; CHECK-NEXT: retq |
| %res = call <8 x half> @llvm.x86.avx512fp16.mask.reduce.sh(<8 x half> %x0, <8 x half> %x1, <8 x half> %x3, i8 -1, i32 4, i32 8) |
| ret <8 x half> %res |
| } |
| |
| declare <32 x half> @llvm.x86.avx512fp16.mask.rndscale.ph.512(<32 x half>, i32, <32 x half>, i32, i32) |
| |
| define <32 x half>@test_int_x86_avx512_mask_rndscale_ph_512(<32 x half> %x0, <32 x half> %x2, i32 %x3) { |
| ; CHECK-LABEL: test_int_x86_avx512_mask_rndscale_ph_512: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: kmovd %edi, %k1 |
| ; CHECK-NEXT: vrndscaleph $8, %zmm0, %zmm1 {%k1} |
| ; CHECK-NEXT: vrndscaleph $4, {sae}, %zmm0, %zmm0 |
| ; CHECK-NEXT: vaddph %zmm0, %zmm1, %zmm0 |
| ; CHECK-NEXT: retq |
| %res = call <32 x half> @llvm.x86.avx512fp16.mask.rndscale.ph.512(<32 x half> %x0, i32 8, <32 x half> %x2, i32 %x3, i32 4) |
| %res1 = call <32 x half> @llvm.x86.avx512fp16.mask.rndscale.ph.512(<32 x half> %x0, i32 4, <32 x half> %x2, i32 -1, i32 8) |
| %res2 = fadd <32 x half> %res, %res1 |
| ret <32 x half> %res2 |
| } |
| |
| declare <8 x half> @llvm.x86.avx512fp16.mask.rndscale.sh(<8 x half>, <8 x half>,<8 x half>, i8, i32, i32) |
| |
| define <8 x half>@test_int_x86_avx512_mask_rndscale_sh(<8 x half> %x0, <8 x half> %x1, <8 x half> %x3, i8 %x4) { |
| ; CHECK-LABEL: test_int_x86_avx512_mask_rndscale_sh: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: kmovd %edi, %k1 |
| ; CHECK-NEXT: vrndscalesh $4, %xmm1, %xmm0, %xmm2 {%k1} |
| ; CHECK-NEXT: vmovaps %xmm2, %xmm0 |
| ; CHECK-NEXT: retq |
| %res = call <8 x half> @llvm.x86.avx512fp16.mask.rndscale.sh(<8 x half> %x0, <8 x half> %x1, <8 x half> %x3, i8 %x4, i32 4, i32 4) |
| ret <8 x half> %res |
| } |
| |
| define <8 x half>@test_int_x86_avx512_mask_rndscale_sh_nomask(<8 x half> %x0, <8 x half> %x1, <8 x half> %x3) { |
| ; CHECK-LABEL: test_int_x86_avx512_mask_rndscale_sh_nomask: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vrndscalesh $4, {sae}, %xmm1, %xmm0, %xmm0 |
| ; CHECK-NEXT: retq |
| %res = call <8 x half> @llvm.x86.avx512fp16.mask.rndscale.sh(<8 x half> %x0, <8 x half> %x1, <8 x half> %x3, i8 -1, i32 4, i32 8) |
| ret <8 x half> %res |
| } |
| |
| declare <32 x half> @llvm.x86.avx512fp16.mask.getexp.ph.512(<32 x half>, <32 x half>, i32, i32) |
| |
| define <32 x half>@test_int_x86_avx512_mask_getexp_ph_512(<32 x half> %x0, <32 x half> %x1, i32 %x2) { |
| ; CHECK-LABEL: test_int_x86_avx512_mask_getexp_ph_512: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: kmovd %edi, %k1 |
| ; CHECK-NEXT: vgetexpph %zmm0, %zmm1 {%k1} |
| ; CHECK-NEXT: vgetexpph {sae}, %zmm0, %zmm0 |
| ; CHECK-NEXT: vaddph %zmm0, %zmm1, %zmm0 |
| ; CHECK-NEXT: retq |
| %res1 = call <32 x half> @llvm.x86.avx512fp16.mask.getexp.ph.512(<32 x half> %x0, <32 x half> %x1, i32 %x2, i32 4) |
| %res2 = call <32 x half> @llvm.x86.avx512fp16.mask.getexp.ph.512(<32 x half> %x0, <32 x half> zeroinitializer, i32 -1, i32 8) |
| %res3 = fadd <32 x half> %res1, %res2 |
| ret <32 x half> %res3 |
| } |
| |
| declare <8 x half> @llvm.x86.avx512fp16.mask.getexp.sh(<8 x half>, <8 x half>,<8 x half>, i8, i32) |
| |
| define <8 x half>@test_int_x86_avx512_mask_getexp_sh(<8 x half> %x0, <8 x half> %x1, <8 x half> %x3, i8 %x4) { |
| ; CHECK-LABEL: test_int_x86_avx512_mask_getexp_sh: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: kmovd %edi, %k1 |
| ; CHECK-NEXT: vgetexpsh %xmm1, %xmm0, %xmm2 {%k1} |
| ; CHECK-NEXT: vmovaps %xmm2, %xmm0 |
| ; CHECK-NEXT: retq |
| %res = call <8 x half> @llvm.x86.avx512fp16.mask.getexp.sh(<8 x half> %x0, <8 x half> %x1, <8 x half> %x3, i8 %x4, i32 4) |
| ret <8 x half> %res |
| } |
| |
| define <8 x half>@test_int_x86_avx512_mask_getexp_sh_nomask(<8 x half> %x0, <8 x half> %x1, <8 x half> %x3) { |
| ; CHECK-LABEL: test_int_x86_avx512_mask_getexp_sh_nomask: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vgetexpsh {sae}, %xmm1, %xmm0, %xmm0 |
| ; CHECK-NEXT: retq |
| %res = call <8 x half> @llvm.x86.avx512fp16.mask.getexp.sh(<8 x half> %x0, <8 x half> %x1, <8 x half> %x3, i8 -1, i32 8) |
| ret <8 x half> %res |
| } |
| |
| define <8 x half>@test_int_x86_avx512_mask_getexp_sh_load(<8 x half> %x0, ptr %x1ptr) { |
| ; CHECK-LABEL: test_int_x86_avx512_mask_getexp_sh_load: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vgetexpsh (%rdi), %xmm0, %xmm0 |
| ; CHECK-NEXT: retq |
| %x1 = load <8 x half>, ptr %x1ptr |
| %res = call <8 x half> @llvm.x86.avx512fp16.mask.getexp.sh(<8 x half> %x0, <8 x half> %x1, <8 x half> undef, i8 -1, i32 4) |
| ret <8 x half> %res |
| } |
| |
| declare <32 x half> @llvm.x86.avx512fp16.mask.getmant.ph.512(<32 x half>, i32, <32 x half>, i32, i32) |
| |
| define <32 x half>@test_int_x86_avx512_mask_getmant_ph_512(<32 x half> %x0, <32 x half> %x2, i32 %x3) { |
| ; CHECK-LABEL: test_int_x86_avx512_mask_getmant_ph_512: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: kmovd %edi, %k1 |
| ; CHECK-NEXT: vgetmantph $8, %zmm0, %zmm1 {%k1} |
| ; CHECK-NEXT: vgetmantph $4, {sae}, %zmm0, %zmm0 |
| ; CHECK-NEXT: vaddph %zmm0, %zmm1, %zmm0 |
| ; CHECK-NEXT: retq |
| %res = call <32 x half> @llvm.x86.avx512fp16.mask.getmant.ph.512(<32 x half> %x0, i32 8, <32 x half> %x2, i32 %x3, i32 4) |
| %res1 = call <32 x half> @llvm.x86.avx512fp16.mask.getmant.ph.512(<32 x half> %x0, i32 4, <32 x half> %x2, i32 -1, i32 8) |
| %res2 = fadd <32 x half> %res, %res1 |
| ret <32 x half> %res2 |
| } |
| |
| declare <8 x half> @llvm.x86.avx512fp16.mask.getmant.sh(<8 x half>, <8 x half>, i32, <8 x half>, i8, i32) |
| |
| define <8 x half>@test_int_x86_avx512_mask_getmant_sh(<8 x half> %x0, <8 x half> %x1, <8 x half> %x3, i8 %x4) { |
| ; CHECK-LABEL: test_int_x86_avx512_mask_getmant_sh: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: kmovd %edi, %k1 |
| ; CHECK-NEXT: vgetmantsh $11, %xmm1, %xmm0, %xmm2 {%k1} |
| ; CHECK-NEXT: vmovaps %xmm2, %xmm0 |
| ; CHECK-NEXT: retq |
| %res = call <8 x half> @llvm.x86.avx512fp16.mask.getmant.sh(<8 x half> %x0, <8 x half> %x1, i32 11, <8 x half> %x3, i8 %x4, i32 4) |
| ret <8 x half> %res |
| } |
| |
| define <8 x half>@test_int_x86_avx512_mask_getmant_sh_nomask(<8 x half> %x0, <8 x half> %x1, <8 x half> %x3) { |
| ; CHECK-LABEL: test_int_x86_avx512_mask_getmant_sh_nomask: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vgetmantsh $11, %xmm1, %xmm0, %xmm0 |
| ; CHECK-NEXT: retq |
| %res = call <8 x half> @llvm.x86.avx512fp16.mask.getmant.sh(<8 x half> %x0, <8 x half> %x1, i32 11, <8 x half> %x3, i8 -1, i32 4) |
| ret <8 x half> %res |
| } |
| |
| define <8 x half>@test_int_x86_avx512_mask_getmant_sh_z(<8 x half> %x0, <8 x half> %x1, i8 %x4) { |
| ; CHECK-LABEL: test_int_x86_avx512_mask_getmant_sh_z: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: kmovd %edi, %k1 |
| ; CHECK-NEXT: vgetmantsh $11, %xmm1, %xmm0, %xmm0 {%k1} {z} |
| ; CHECK-NEXT: retq |
| %res = call <8 x half> @llvm.x86.avx512fp16.mask.getmant.sh(<8 x half> %x0, <8 x half> %x1, i32 11, <8 x half> zeroinitializer, i8 %x4, i32 4) |
| ret <8 x half> %res |
| } |
| |
| declare <32 x half> @llvm.x86.avx512fp16.mask.scalef.ph.512(<32 x half>, <32 x half>, <32 x half>, i32, i32) |
| |
| define <32 x half>@test_int_x86_avx512_mask_scalef_ph_512(<32 x half> %x0, <32 x half> %x1, <32 x half> %x2, i32 %x3) { |
| ; CHECK-LABEL: test_int_x86_avx512_mask_scalef_ph_512: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: kmovd %edi, %k1 |
| ; CHECK-NEXT: vscalefph {rz-sae}, %zmm1, %zmm0, %zmm2 {%k1} |
| ; CHECK-NEXT: vscalefph {rn-sae}, %zmm1, %zmm0, %zmm0 |
| ; CHECK-NEXT: vaddph %zmm0, %zmm2, %zmm0 |
| ; CHECK-NEXT: retq |
| %mask = bitcast i32 %x3 to <32 x i1> |
| %res1 = call <32 x half> @llvm.x86.avx512fp16.mask.scalef.ph.512(<32 x half> %x0, <32 x half> %x1, <32 x half> %x2, i32 %x3, i32 11) |
| %res2 = call <32 x half> @llvm.x86.avx512fp16.mask.scalef.ph.512(<32 x half> %x0, <32 x half> %x1, <32 x half> zeroinitializer, i32 -1, i32 8) |
| %res3 = fadd <32 x half> %res1, %res2 |
| ret <32 x half> %res3 |
| } |
| |
| declare <8 x half> @llvm.x86.avx512fp16.mask.scalef.sh(<8 x half>, <8 x half>,<8 x half>, i8, i32) |
| |
| define <8 x half>@test_int_x86_avx512_mask_scalef_sh(<8 x half> %x0, <8 x half> %x1, <8 x half> %x3, i8 %x4) { |
| ; CHECK-LABEL: test_int_x86_avx512_mask_scalef_sh: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: kmovd %edi, %k1 |
| ; CHECK-NEXT: vscalefsh %xmm1, %xmm0, %xmm2 {%k1} |
| ; CHECK-NEXT: vmovaps %xmm2, %xmm0 |
| ; CHECK-NEXT: retq |
| %res = call <8 x half> @llvm.x86.avx512fp16.mask.scalef.sh(<8 x half> %x0, <8 x half> %x1, <8 x half> %x3, i8 %x4, i32 4) |
| ret <8 x half> %res |
| } |
| |
| define <8 x half>@test_int_x86_avx512_mask_scalef_sh_nomask(<8 x half> %x0, <8 x half> %x1, <8 x half> %x3) { |
| ; CHECK-LABEL: test_int_x86_avx512_mask_scalef_sh_nomask: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vscalefsh {rn-sae}, %xmm1, %xmm0, %xmm0 |
| ; CHECK-NEXT: retq |
| %res = call <8 x half> @llvm.x86.avx512fp16.mask.scalef.sh(<8 x half> %x0, <8 x half> %x1, <8 x half> %x3, i8 -1, i32 8) |
| ret <8 x half> %res |
| } |
| |
| define <8 x half>@test_int_x86_avx512_mask_scalef_sh_load(<8 x half> %x0, ptr %x1ptr) { |
| ; CHECK-LABEL: test_int_x86_avx512_mask_scalef_sh_load: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vscalefsh (%rdi), %xmm0, %xmm0 |
| ; CHECK-NEXT: retq |
| %x1 = load <8 x half>, ptr %x1ptr |
| %res = call <8 x half> @llvm.x86.avx512fp16.mask.scalef.sh(<8 x half> %x0, <8 x half> %x1, <8 x half> undef, i8 -1, i32 4) |
| ret <8 x half> %res |
| } |
| |
| declare <8 x half> @llvm.x86.avx512fp16.mask.add.sh.round(<8 x half>, <8 x half>, <8 x half>, i8, i32) |
| |
| define <8 x half> @test_int_x86_avx512fp16_mask_add_sh(<8 x half> %x1, <8 x half> %x2, <8 x half> %src, i8 %mask, ptr %ptr) { |
| ; CHECK-LABEL: test_int_x86_avx512fp16_mask_add_sh: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: kmovd %edi, %k1 |
| ; CHECK-NEXT: vaddsh %xmm1, %xmm0, %xmm0 |
| ; CHECK-NEXT: vmovaps %xmm2, %xmm3 |
| ; CHECK-NEXT: vaddsh %xmm1, %xmm0, %xmm3 {%k1} |
| ; CHECK-NEXT: vaddsh %xmm1, %xmm3, %xmm0 {%k1} {z} |
| ; CHECK-NEXT: vaddsh (%rsi), %xmm0, %xmm2 {%k1} |
| ; CHECK-NEXT: vmovaps %xmm2, %xmm0 |
| ; CHECK-NEXT: retq |
| %val.half = load half,ptr %ptr |
| %val = insertelement <8 x half> undef, half %val.half, i32 0 |
| %res0 = call <8 x half> @llvm.x86.avx512fp16.mask.add.sh.round(<8 x half> %x1, <8 x half> %x2, <8 x half> zeroinitializer, i8 -1, i32 4) |
| %res1 = call <8 x half> @llvm.x86.avx512fp16.mask.add.sh.round(<8 x half> %res0, <8 x half> %x2, <8 x half> %src , i8 %mask, i32 4) |
| %res2 = call <8 x half> @llvm.x86.avx512fp16.mask.add.sh.round(<8 x half> %res1, <8 x half> %x2, <8 x half> zeroinitializer , i8 %mask, i32 4) |
| %res3 = call <8 x half> @llvm.x86.avx512fp16.mask.add.sh.round(<8 x half> %res2, <8 x half> %val, <8 x half> %src , i8 %mask, i32 4) |
| ret <8 x half> %res3 |
| } |
| |
| declare <8 x half> @llvm.x86.avx512fp16.mask.sub.sh.round(<8 x half>, <8 x half>, <8 x half>, i8, i32) |
| |
| define <8 x half> @test_int_x86_avx512fp16_mask_sub_sh(<8 x half> %x1, <8 x half> %x2, <8 x half> %src, i8 %mask, ptr %ptr) { |
| ; CHECK-LABEL: test_int_x86_avx512fp16_mask_sub_sh: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: kmovd %edi, %k1 |
| ; CHECK-NEXT: vsubsh %xmm1, %xmm0, %xmm0 |
| ; CHECK-NEXT: vmovaps %xmm2, %xmm3 |
| ; CHECK-NEXT: vsubsh %xmm1, %xmm0, %xmm3 {%k1} |
| ; CHECK-NEXT: vsubsh %xmm1, %xmm3, %xmm0 {%k1} {z} |
| ; CHECK-NEXT: vsubsh (%rsi), %xmm0, %xmm2 {%k1} |
| ; CHECK-NEXT: vmovaps %xmm2, %xmm0 |
| ; CHECK-NEXT: retq |
| %val.half = load half,ptr %ptr |
| %val = insertelement <8 x half> undef, half %val.half, i32 0 |
| %res0 = call <8 x half> @llvm.x86.avx512fp16.mask.sub.sh.round(<8 x half> %x1, <8 x half> %x2, <8 x half> zeroinitializer, i8 -1, i32 4) |
| %res1 = call <8 x half> @llvm.x86.avx512fp16.mask.sub.sh.round(<8 x half> %res0, <8 x half> %x2, <8 x half> %src , i8 %mask, i32 4) |
| %res2 = call <8 x half> @llvm.x86.avx512fp16.mask.sub.sh.round(<8 x half> %res1, <8 x half> %x2, <8 x half> zeroinitializer , i8 %mask, i32 4) |
| %res3 = call <8 x half> @llvm.x86.avx512fp16.mask.sub.sh.round(<8 x half> %res2, <8 x half> %val, <8 x half> %src , i8 %mask, i32 4) |
| ret <8 x half> %res3 |
| } |
| |
| declare <8 x half> @llvm.x86.avx512fp16.mask.mul.sh.round(<8 x half>, <8 x half>, <8 x half>, i8, i32) |
| |
| define <8 x half> @test_int_x86_avx512fp16_mask_mul_sh(<8 x half> %x1, <8 x half> %x2, <8 x half> %src, i8 %mask, ptr %ptr) { |
| ; CHECK-LABEL: test_int_x86_avx512fp16_mask_mul_sh: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: kmovd %edi, %k1 |
| ; CHECK-NEXT: vmulsh %xmm1, %xmm0, %xmm0 |
| ; CHECK-NEXT: vmovaps %xmm2, %xmm3 |
| ; CHECK-NEXT: vmulsh %xmm1, %xmm0, %xmm3 {%k1} |
| ; CHECK-NEXT: vmulsh %xmm1, %xmm3, %xmm0 {%k1} {z} |
| ; CHECK-NEXT: vmulsh (%rsi), %xmm0, %xmm2 {%k1} |
| ; CHECK-NEXT: vmovaps %xmm2, %xmm0 |
| ; CHECK-NEXT: retq |
| %val.half = load half,ptr %ptr |
| %val = insertelement <8 x half> undef, half %val.half, i32 0 |
| %res0 = call <8 x half> @llvm.x86.avx512fp16.mask.mul.sh.round(<8 x half> %x1, <8 x half> %x2, <8 x half> zeroinitializer, i8 -1, i32 4) |
| %res1 = call <8 x half> @llvm.x86.avx512fp16.mask.mul.sh.round(<8 x half> %res0, <8 x half> %x2, <8 x half> %src , i8 %mask, i32 4) |
| %res2 = call <8 x half> @llvm.x86.avx512fp16.mask.mul.sh.round(<8 x half> %res1, <8 x half> %x2, <8 x half> zeroinitializer , i8 %mask, i32 4) |
| %res3 = call <8 x half> @llvm.x86.avx512fp16.mask.mul.sh.round(<8 x half> %res2, <8 x half> %val, <8 x half> %src , i8 %mask, i32 4) |
| ret <8 x half> %res3 |
| } |
| |
| declare <8 x half> @llvm.x86.avx512fp16.mask.div.sh.round(<8 x half>, <8 x half>, <8 x half>, i8, i32) |
| |
| define <8 x half> @test_int_x86_avx512fp16_mask_div_sh(<8 x half> %x1, <8 x half> %x2, <8 x half> %src, i8 %mask, ptr %ptr) { |
| ; CHECK-LABEL: test_int_x86_avx512fp16_mask_div_sh: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: kmovd %edi, %k1 |
| ; CHECK-NEXT: vdivsh %xmm1, %xmm0, %xmm0 |
| ; CHECK-NEXT: vmovaps %xmm2, %xmm3 |
| ; CHECK-NEXT: vdivsh %xmm1, %xmm0, %xmm3 {%k1} |
| ; CHECK-NEXT: vdivsh %xmm1, %xmm3, %xmm0 {%k1} {z} |
| ; CHECK-NEXT: vdivsh (%rsi), %xmm0, %xmm2 {%k1} |
| ; CHECK-NEXT: vmovaps %xmm2, %xmm0 |
| ; CHECK-NEXT: retq |
| %val.half = load half,ptr %ptr |
| %val = insertelement <8 x half> undef, half %val.half, i32 0 |
| %res0 = call <8 x half> @llvm.x86.avx512fp16.mask.div.sh.round(<8 x half> %x1, <8 x half> %x2, <8 x half> zeroinitializer, i8 -1, i32 4) |
| %res1 = call <8 x half> @llvm.x86.avx512fp16.mask.div.sh.round(<8 x half> %res0, <8 x half> %x2, <8 x half> %src , i8 %mask, i32 4) |
| %res2 = call <8 x half> @llvm.x86.avx512fp16.mask.div.sh.round(<8 x half> %res1, <8 x half> %x2, <8 x half> zeroinitializer , i8 %mask, i32 4) |
| %res3 = call <8 x half> @llvm.x86.avx512fp16.mask.div.sh.round(<8 x half> %res2, <8 x half> %val, <8 x half> %src , i8 %mask, i32 4) |
| ret <8 x half> %res3 |
| } |
| |
| declare <8 x half> @llvm.x86.avx512fp16.mask.min.sh.round(<8 x half>, <8 x half>, <8 x half>, i8, i32) |
| |
| define <8 x half> @test_int_x86_avx512fp16_mask_min_sh(<8 x half> %x1, <8 x half> %x2, <8 x half> %src, i8 %mask, ptr %ptr) { |
| ; CHECK-LABEL: test_int_x86_avx512fp16_mask_min_sh: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: kmovd %edi, %k1 |
| ; CHECK-NEXT: vminsh %xmm1, %xmm0, %xmm0 |
| ; CHECK-NEXT: vmovaps %xmm2, %xmm3 |
| ; CHECK-NEXT: vminsh %xmm1, %xmm0, %xmm3 {%k1} |
| ; CHECK-NEXT: vminsh %xmm1, %xmm3, %xmm0 {%k1} {z} |
| ; CHECK-NEXT: vminsh (%rsi), %xmm0, %xmm2 {%k1} |
| ; CHECK-NEXT: vmovaps %xmm2, %xmm0 |
| ; CHECK-NEXT: retq |
| %val.half = load half,ptr %ptr |
| %val = insertelement <8 x half> undef, half %val.half, i32 0 |
| %res0 = call <8 x half> @llvm.x86.avx512fp16.mask.min.sh.round(<8 x half> %x1, <8 x half> %x2, <8 x half> zeroinitializer, i8 -1, i32 4) |
| %res1 = call <8 x half> @llvm.x86.avx512fp16.mask.min.sh.round(<8 x half> %res0, <8 x half> %x2, <8 x half> %src , i8 %mask, i32 4) |
| %res2 = call <8 x half> @llvm.x86.avx512fp16.mask.min.sh.round(<8 x half> %res1, <8 x half> %x2, <8 x half> zeroinitializer , i8 %mask, i32 4) |
| %res3 = call <8 x half> @llvm.x86.avx512fp16.mask.min.sh.round(<8 x half> %res2, <8 x half> %val, <8 x half> %src , i8 %mask, i32 4) |
| ret <8 x half> %res3 |
| } |
| |
| declare <8 x half> @llvm.x86.avx512fp16.mask.max.sh.round(<8 x half>, <8 x half>, <8 x half>, i8, i32) |
| |
| define <8 x half> @test_int_x86_avx512fp16_mask_max_sh(<8 x half> %x1, <8 x half> %x2, <8 x half> %src, i8 %mask, ptr %ptr) { |
| ; CHECK-LABEL: test_int_x86_avx512fp16_mask_max_sh: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: kmovd %edi, %k1 |
| ; CHECK-NEXT: vmaxsh %xmm1, %xmm0, %xmm0 |
| ; CHECK-NEXT: vmovaps %xmm2, %xmm3 |
| ; CHECK-NEXT: vmaxsh %xmm1, %xmm0, %xmm3 {%k1} |
| ; CHECK-NEXT: vmaxsh %xmm1, %xmm3, %xmm0 {%k1} {z} |
| ; CHECK-NEXT: vmaxsh (%rsi), %xmm0, %xmm2 {%k1} |
| ; CHECK-NEXT: vmovaps %xmm2, %xmm0 |
| ; CHECK-NEXT: retq |
| %val.half = load half,ptr %ptr |
| %val = insertelement <8 x half> undef, half %val.half, i32 0 |
| %res0 = call <8 x half> @llvm.x86.avx512fp16.mask.max.sh.round(<8 x half> %x1, <8 x half> %x2, <8 x half> zeroinitializer, i8 -1, i32 4) |
| %res1 = call <8 x half> @llvm.x86.avx512fp16.mask.max.sh.round(<8 x half> %res0, <8 x half> %x2, <8 x half> %src , i8 %mask, i32 4) |
| %res2 = call <8 x half> @llvm.x86.avx512fp16.mask.max.sh.round(<8 x half> %res1, <8 x half> %x2, <8 x half> zeroinitializer , i8 %mask, i32 4) |
| %res3 = call <8 x half> @llvm.x86.avx512fp16.mask.max.sh.round(<8 x half> %res2, <8 x half> %val, <8 x half> %src , i8 %mask, i32 4) |
| ret <8 x half> %res3 |
| } |
| |
| declare i8 @llvm.x86.avx512fp16.mask.cmp.sh(<8 x half>, <8 x half>, i32, i8, i32) |
| |
| define i8 @test_int_x86_avx512_mask_cmp_sh(<8 x half> %x0, <8 x half> %x1, i8 %x3, i32 %x4) { |
| ; CHECK-LABEL: test_int_x86_avx512_mask_cmp_sh: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: kmovd %edi, %k1 |
| ; CHECK-NEXT: vcmpunordsh %xmm1, %xmm0, %k0 {%k1} |
| ; CHECK-NEXT: kmovd %k0, %eax |
| ; CHECK-NEXT: # kill: def $al killed $al killed $eax |
| ; CHECK-NEXT: retq |
| %res2 = call i8 @llvm.x86.avx512fp16.mask.cmp.sh(<8 x half> %x0, <8 x half> %x1, i32 3, i8 %x3, i32 4) |
| ret i8 %res2 |
| } |
| |
| |
| define i8 @test_int_x86_avx512_mask_cmp_sh_all(<8 x half> %x0, <8 x half> %x1, i8 %x3, i32 %x4) { |
| ; CHECK-LABEL: test_int_x86_avx512_mask_cmp_sh_all: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: kmovd %edi, %k1 |
| ; CHECK-NEXT: vcmplesh %xmm1, %xmm0, %k0 |
| ; CHECK-NEXT: kmovd %k0, %ecx |
| ; CHECK-NEXT: vcmpunordsh {sae}, %xmm1, %xmm0, %k0 |
| ; CHECK-NEXT: kmovd %k0, %edx |
| ; CHECK-NEXT: vcmpneqsh %xmm1, %xmm0, %k0 {%k1} |
| ; CHECK-NEXT: kmovd %k0, %esi |
| ; CHECK-NEXT: vcmpnltsh {sae}, %xmm1, %xmm0, %k0 {%k1} |
| ; CHECK-NEXT: kmovd %k0, %eax |
| ; CHECK-NEXT: andb %sil, %al |
| ; CHECK-NEXT: andb %dl, %al |
| ; CHECK-NEXT: andb %cl, %al |
| ; CHECK-NEXT: # kill: def $al killed $al killed $eax |
| ; CHECK-NEXT: retq |
| %res1 = call i8 @llvm.x86.avx512fp16.mask.cmp.sh(<8 x half> %x0, <8 x half> %x1, i32 2, i8 -1, i32 4) |
| %res2 = call i8 @llvm.x86.avx512fp16.mask.cmp.sh(<8 x half> %x0, <8 x half> %x1, i32 3, i8 -1, i32 8) |
| %res3 = call i8 @llvm.x86.avx512fp16.mask.cmp.sh(<8 x half> %x0, <8 x half> %x1, i32 4, i8 %x3, i32 4) |
| %res4 = call i8 @llvm.x86.avx512fp16.mask.cmp.sh(<8 x half> %x0, <8 x half> %x1, i32 5, i8 %x3, i32 8) |
| |
| %res11 = and i8 %res1, %res2 |
| %res12 = and i8 %res3, %res4 |
| %res13 = and i8 %res11, %res12 |
| ret i8 %res13 |
| } |
| |
| declare <16 x half> @llvm.x86.avx512.sitofp.round.v16f16.v16i32(<16 x i32>, i32) |
| |
| define <16 x half> @test_int_x86_avx512_mask_cvt_dq2ph_512(<16 x i32> %x0, <16 x half> %x1, i16 %x2) { |
| ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_dq2ph_512: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: kmovd %edi, %k1 |
| ; CHECK-NEXT: vcvtdq2ph %zmm0, %ymm1 {%k1} |
| ; CHECK-NEXT: vmovaps %ymm1, %ymm0 |
| ; CHECK-NEXT: retq |
| %mask = bitcast i16 %x2 to <16 x i1> |
| %res0 = call <16 x half> @llvm.x86.avx512.sitofp.round.v16f16.v16i32(<16 x i32> %x0, i32 4) |
| %res = select <16 x i1> %mask, <16 x half> %res0, <16 x half> %x1 |
| ret <16 x half> %res |
| } |
| |
| define <16 x half> @test_int_x86_avx512_mask_cvt_dq2ph_512_r(<16 x i32> %x0, <16 x half> %x1, i16 %x2) { |
| ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_dq2ph_512_r: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: kmovd %edi, %k1 |
| ; CHECK-NEXT: vcvtdq2ph {ru-sae}, %zmm0, %ymm1 {%k1} |
| ; CHECK-NEXT: vmovaps %ymm1, %ymm0 |
| ; CHECK-NEXT: retq |
| %mask = bitcast i16 %x2 to <16 x i1> |
| %res0 = call <16 x half> @llvm.x86.avx512.sitofp.round.v16f16.v16i32(<16 x i32> %x0, i32 10) |
| %res = select <16 x i1> %mask, <16 x half> %res0, <16 x half> %x1 |
| ret <16 x half> %res |
| } |
| |
| define <16 x half> @test_int_x86_avx512_mask_cvt_dq2ph_512_nomask(<16 x i32> %x0, <16 x half> %x1) { |
| ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_dq2ph_512_nomask: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vcvtdq2ph %zmm0, %ymm0 |
| ; CHECK-NEXT: retq |
| %res = call <16 x half> @llvm.x86.avx512.sitofp.round.v16f16.v16i32(<16 x i32> %x0, i32 4) |
| ret <16 x half> %res |
| } |
| |
| define <16 x half> @test_int_x86_avx512_mask_cvt_dq2ph_512_z(<16 x i32> %x0, i16 %x2) { |
| ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_dq2ph_512_z: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: kmovd %edi, %k1 |
| ; CHECK-NEXT: vcvtdq2ph %zmm0, %ymm0 {%k1} {z} |
| ; CHECK-NEXT: retq |
| %mask = bitcast i16 %x2 to <16 x i1> |
| %res0 = call <16 x half> @llvm.x86.avx512.sitofp.round.v16f16.v16i32(<16 x i32> %x0, i32 4) |
| %res = select <16 x i1> %mask, <16 x half> %res0, <16 x half> zeroinitializer |
| ret <16 x half> %res |
| } |
| |
| define <16 x half> @sint_to_fp_16i32_to_16f16(<16 x i32> %x) { |
| ; CHECK-LABEL: sint_to_fp_16i32_to_16f16: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vcvtdq2ph %zmm0, %ymm0 |
| ; CHECK-NEXT: retq |
| %res = sitofp <16 x i32> %x to <16 x half> |
| ret <16 x half> %res |
| } |
| |
| declare <16 x half> @llvm.x86.avx512.uitofp.round.v16f16.v16i32(<16 x i32>, i32) |
| |
| define <16 x half> @test_int_x86_avx512_mask_cvt_udq2ph_512_r(<16 x i32> %x0, <16 x half> %x1, i16 %x2) { |
| ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_udq2ph_512_r: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: kmovd %edi, %k1 |
| ; CHECK-NEXT: vcvtudq2ph {ru-sae}, %zmm0, %ymm1 {%k1} |
| ; CHECK-NEXT: vmovaps %ymm1, %ymm0 |
| ; CHECK-NEXT: retq |
| %mask = bitcast i16 %x2 to <16 x i1> |
| %res0 = call <16 x half> @llvm.x86.avx512.uitofp.round.v16f16.v16i32(<16 x i32> %x0, i32 10) |
| %res = select <16 x i1> %mask, <16 x half> %res0, <16 x half> %x1 |
| ret <16 x half> %res |
| } |
| |
| define <16 x half> @test_int_x86_avx512_mask_cvt_udq2ph_512_nomask(<16 x i32> %x0, <16 x half> %x1) { |
| ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_udq2ph_512_nomask: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vcvtudq2ph %zmm0, %ymm0 |
| ; CHECK-NEXT: retq |
| %res = call <16 x half> @llvm.x86.avx512.uitofp.round.v16f16.v16i32(<16 x i32> %x0, i32 4) |
| ret <16 x half> %res |
| } |
| |
| define <16 x half> @test_int_x86_avx512_mask_cvt_udq2ph_512_z(<16 x i32> %x0, i16 %x2) { |
| ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_udq2ph_512_z: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: kmovd %edi, %k1 |
| ; CHECK-NEXT: vcvtudq2ph %zmm0, %ymm0 {%k1} {z} |
| ; CHECK-NEXT: retq |
| %mask = bitcast i16 %x2 to <16 x i1> |
| %res0 = call <16 x half> @llvm.x86.avx512.uitofp.round.v16f16.v16i32(<16 x i32> %x0, i32 4) |
| %res = select <16 x i1> %mask, <16 x half> %res0, <16 x half> zeroinitializer |
| ret <16 x half> %res |
| } |
| |
| define <16 x half> @uint_to_fp_16i32_to_16f16(<16 x i32> %x) { |
| ; CHECK-LABEL: uint_to_fp_16i32_to_16f16: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vcvtudq2ph %zmm0, %ymm0 |
| ; CHECK-NEXT: retq |
| %res = uitofp <16 x i32> %x to <16 x half> |
| ret <16 x half> %res |
| } |
| |
| declare <16 x i32> @llvm.x86.avx512fp16.mask.vcvtph2dq.512(<16 x half>, <16 x i32>, i16, i32) |
| |
| define <16 x i32> @test_int_x86_avx512_mask_cvt_ph2dq_512(<16 x half> %x0, <16 x i32> %x1, i16 %x2) { |
| ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_ph2dq_512: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: kmovd %edi, %k1 |
| ; CHECK-NEXT: vcvtph2dq {ru-sae}, %ymm0, %zmm1 {%k1} |
| ; CHECK-NEXT: vcvtph2dq {rn-sae}, %ymm0, %zmm0 |
| ; CHECK-NEXT: vpaddd %zmm0, %zmm1, %zmm0 |
| ; CHECK-NEXT: retq |
| %res = call <16 x i32> @llvm.x86.avx512fp16.mask.vcvtph2dq.512(<16 x half> %x0, <16 x i32> %x1, i16 %x2, i32 10) |
| %res1 = call <16 x i32> @llvm.x86.avx512fp16.mask.vcvtph2dq.512(<16 x half> %x0, <16 x i32> %x1, i16 -1, i32 8) |
| %res2 = add <16 x i32> %res, %res1 |
| ret <16 x i32> %res2 |
| } |
| |
| declare <16 x i32> @llvm.x86.avx512fp16.mask.vcvtph2udq.512(<16 x half>, <16 x i32>, i16, i32) |
| |
| define <16 x i32> @test_int_x86_avx512_mask_cvt_ph2udq_512(<16 x half> %x0, <16 x i32> %x1, i16 %x2) { |
| ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_ph2udq_512: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: kmovd %edi, %k1 |
| ; CHECK-NEXT: vcvtph2udq {ru-sae}, %ymm0, %zmm1 {%k1} |
| ; CHECK-NEXT: vcvtph2udq {rn-sae}, %ymm0, %zmm0 |
| ; CHECK-NEXT: vpaddd %zmm0, %zmm1, %zmm0 |
| ; CHECK-NEXT: retq |
| %res = call <16 x i32> @llvm.x86.avx512fp16.mask.vcvtph2udq.512(<16 x half> %x0, <16 x i32> %x1, i16 %x2, i32 10) |
| %res1 = call <16 x i32> @llvm.x86.avx512fp16.mask.vcvtph2udq.512(<16 x half> %x0, <16 x i32> %x1, i16 -1, i32 8) |
| %res2 = add <16 x i32> %res, %res1 |
| ret <16 x i32> %res2 |
| } |
| |
| declare <16 x i32> @llvm.x86.avx512fp16.mask.vcvttph2dq.512(<16 x half>, <16 x i32>, i16, i32) |
| |
| define <16 x i32> @test_int_x86_avx512_mask_cvtt_ph2dq_512(<16 x half> %x0, <16 x i32> %x1, i16 %x2) { |
| ; CHECK-LABEL: test_int_x86_avx512_mask_cvtt_ph2dq_512: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: kmovd %edi, %k1 |
| ; CHECK-NEXT: vcvttph2dq %ymm0, %zmm1 {%k1} |
| ; CHECK-NEXT: vcvttph2dq {sae}, %ymm0, %zmm0 |
| ; CHECK-NEXT: vpaddd %zmm0, %zmm1, %zmm0 |
| ; CHECK-NEXT: retq |
| %res = call <16 x i32> @llvm.x86.avx512fp16.mask.vcvttph2dq.512(<16 x half> %x0, <16 x i32> %x1, i16 %x2, i32 4) |
| %res1 = call <16 x i32> @llvm.x86.avx512fp16.mask.vcvttph2dq.512(<16 x half> %x0, <16 x i32> %x1, i16 -1, i32 8) |
| %res2 = add <16 x i32> %res, %res1 |
| ret <16 x i32> %res2 |
| } |
| |
| declare <16 x i32> @llvm.x86.avx512fp16.mask.vcvttph2udq.512(<16 x half>, <16 x i32>, i16, i32) |
| |
| define <16 x i32> @test_int_x86_avx512_mask_cvtt_ph2udq_512(<16 x half> %x0, <16 x i32> %x1, i16 %x2) { |
| ; CHECK-LABEL: test_int_x86_avx512_mask_cvtt_ph2udq_512: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: kmovd %edi, %k1 |
| ; CHECK-NEXT: vcvttph2udq %ymm0, %zmm1 {%k1} |
| ; CHECK-NEXT: vcvttph2udq {sae}, %ymm0, %zmm0 |
| ; CHECK-NEXT: vpaddd %zmm0, %zmm1, %zmm0 |
| ; CHECK-NEXT: retq |
| %res = call <16 x i32> @llvm.x86.avx512fp16.mask.vcvttph2udq.512(<16 x half> %x0, <16 x i32> %x1, i16 %x2, i32 4) |
| %res1 = call <16 x i32> @llvm.x86.avx512fp16.mask.vcvttph2udq.512(<16 x half> %x0, <16 x i32> %x1, i16 -1, i32 8) |
| %res2 = add <16 x i32> %res, %res1 |
| ret <16 x i32> %res2 |
| } |
| |
| declare <8 x half> @llvm.x86.avx512.sitofp.round.v8f16.v8i64(<8 x i64>, i32) |
| |
| define <8 x half> @test_int_x86_avx512_mask_cvt_qq2ph_512(<8 x i64> %x0, <8 x half> %x1, i8 %x2) { |
| ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_qq2ph_512: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: kmovd %edi, %k1 |
| ; CHECK-NEXT: vcvtqq2ph %zmm0, %xmm1 {%k1} |
| ; CHECK-NEXT: vmovaps %xmm1, %xmm0 |
| ; CHECK-NEXT: vzeroupper |
| ; CHECK-NEXT: retq |
| %mask = bitcast i8 %x2 to <8 x i1> |
| %res0 = call <8 x half> @llvm.x86.avx512.sitofp.round.v8f16.v8i64(<8 x i64> %x0, i32 4) |
| %res = select <8 x i1> %mask, <8 x half> %res0, <8 x half> %x1 |
| ret <8 x half> %res |
| } |
| |
| define <8 x half> @test_int_x86_avx512_mask_cvt_qq2ph_512_r(<8 x i64> %x0, <8 x half> %x1, i8 %x2) { |
| ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_qq2ph_512_r: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: kmovd %edi, %k1 |
| ; CHECK-NEXT: vcvtqq2ph {ru-sae}, %zmm0, %xmm1 {%k1} |
| ; CHECK-NEXT: vmovaps %xmm1, %xmm0 |
| ; CHECK-NEXT: vzeroupper |
| ; CHECK-NEXT: retq |
| %mask = bitcast i8 %x2 to <8 x i1> |
| %res0 = call <8 x half> @llvm.x86.avx512.sitofp.round.v8f16.v8i64(<8 x i64> %x0, i32 10) |
| %res = select <8 x i1> %mask, <8 x half> %res0, <8 x half> %x1 |
| ret <8 x half> %res |
| } |
| |
| define <8 x half> @test_int_x86_avx512_mask_cvt_qq2ph_512_nomask(<8 x i64> %x0, <8 x half> %x1) { |
| ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_qq2ph_512_nomask: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vcvtqq2ph %zmm0, %xmm0 |
| ; CHECK-NEXT: vzeroupper |
| ; CHECK-NEXT: retq |
| %res = call <8 x half> @llvm.x86.avx512.sitofp.round.v8f16.v8i64(<8 x i64> %x0, i32 4) |
| ret <8 x half> %res |
| } |
| |
| define <8 x half> @test_int_x86_avx512_mask_cvt_qq2ph_512_z(<8 x i64> %x0, i8 %x2) { |
| ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_qq2ph_512_z: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: kmovd %edi, %k1 |
| ; CHECK-NEXT: vcvtqq2ph %zmm0, %xmm0 {%k1} {z} |
| ; CHECK-NEXT: vzeroupper |
| ; CHECK-NEXT: retq |
| %mask = bitcast i8 %x2 to <8 x i1> |
| %res0 = call <8 x half> @llvm.x86.avx512.sitofp.round.v8f16.v8i64(<8 x i64> %x0, i32 4) |
| %res = select <8 x i1> %mask, <8 x half> %res0, <8 x half> zeroinitializer |
| ret <8 x half> %res |
| } |
| |
| declare <8 x half> @llvm.x86.avx512.uitofp.round.v8f16.v8i64(<8 x i64>, i32) |
| |
| define <8 x half> @test_int_x86_avx512_mask_cvt_uqq2ph_512(<8 x i64> %x0, <8 x half> %x1, i8 %x2) { |
| ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_uqq2ph_512: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: kmovd %edi, %k1 |
| ; CHECK-NEXT: vcvtuqq2ph %zmm0, %xmm1 {%k1} |
| ; CHECK-NEXT: vmovaps %xmm1, %xmm0 |
| ; CHECK-NEXT: vzeroupper |
| ; CHECK-NEXT: retq |
| %mask = bitcast i8 %x2 to <8 x i1> |
| %res0 = call <8 x half> @llvm.x86.avx512.uitofp.round.v8f16.v8i64(<8 x i64> %x0, i32 4) |
| %res = select <8 x i1> %mask, <8 x half> %res0, <8 x half> %x1 |
| ret <8 x half> %res |
| } |
| |
| define <8 x half> @test_int_x86_avx512_mask_cvt_uqq2ph_512_r(<8 x i64> %x0, <8 x half> %x1, i8 %x2) { |
| ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_uqq2ph_512_r: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: kmovd %edi, %k1 |
| ; CHECK-NEXT: vcvtuqq2ph {ru-sae}, %zmm0, %xmm1 {%k1} |
| ; CHECK-NEXT: vmovaps %xmm1, %xmm0 |
| ; CHECK-NEXT: vzeroupper |
| ; CHECK-NEXT: retq |
| %mask = bitcast i8 %x2 to <8 x i1> |
| %res0 = call <8 x half> @llvm.x86.avx512.uitofp.round.v8f16.v8i64(<8 x i64> %x0, i32 10) |
| %res = select <8 x i1> %mask, <8 x half> %res0, <8 x half> %x1 |
| ret <8 x half> %res |
| } |
| |
| define <8 x half> @test_int_x86_avx512_mask_cvt_uqq2ph_512_nomask(<8 x i64> %x0, <8 x half> %x1) { |
| ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_uqq2ph_512_nomask: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vcvtuqq2ph %zmm0, %xmm0 |
| ; CHECK-NEXT: vzeroupper |
| ; CHECK-NEXT: retq |
| %res = call <8 x half> @llvm.x86.avx512.uitofp.round.v8f16.v8i64(<8 x i64> %x0, i32 4) |
| ret <8 x half> %res |
| } |
| |
| define <8 x half> @test_int_x86_avx512_mask_cvt_uqq2ph_512_z(<8 x i64> %x0, i8 %x2) { |
| ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_uqq2ph_512_z: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: kmovd %edi, %k1 |
| ; CHECK-NEXT: vcvtuqq2ph %zmm0, %xmm0 {%k1} {z} |
| ; CHECK-NEXT: vzeroupper |
| ; CHECK-NEXT: retq |
| %mask = bitcast i8 %x2 to <8 x i1> |
| %res0 = call <8 x half> @llvm.x86.avx512.uitofp.round.v8f16.v8i64(<8 x i64> %x0, i32 4) |
| %res = select <8 x i1> %mask, <8 x half> %res0, <8 x half> zeroinitializer |
| ret <8 x half> %res |
| } |
| |
| declare <8 x i64> @llvm.x86.avx512fp16.mask.vcvtph2qq.512(<8 x half>, <8 x i64>, i8, i32) |
| |
| define <8 x i64> @test_int_x86_avx512_mask_cvt_ph2qq_512(<8 x half> %x0, <8 x i64> %x1, i8 %x2) { |
| ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_ph2qq_512: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: kmovd %edi, %k1 |
| ; CHECK-NEXT: vcvtph2qq {ru-sae}, %xmm0, %zmm1 {%k1} |
| ; CHECK-NEXT: vcvtph2qq {rn-sae}, %xmm0, %zmm0 |
| ; CHECK-NEXT: vpaddq %zmm0, %zmm1, %zmm0 |
| ; CHECK-NEXT: retq |
| %res = call <8 x i64> @llvm.x86.avx512fp16.mask.vcvtph2qq.512(<8 x half> %x0, <8 x i64> %x1, i8 %x2, i32 10) |
| %res1 = call <8 x i64> @llvm.x86.avx512fp16.mask.vcvtph2qq.512(<8 x half> %x0, <8 x i64> %x1, i8 -1, i32 8) |
| %res2 = add <8 x i64> %res, %res1 |
| ret <8 x i64> %res2 |
| } |
| |
| declare <8 x i64> @llvm.x86.avx512fp16.mask.vcvtph2uqq.512(<8 x half>, <8 x i64>, i8, i32) |
| |
| define <8 x i64> @test_int_x86_avx512_mask_cvt_ph2uqq_512(<8 x half> %x0, <8 x i64> %x1, i8 %x2) { |
| ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_ph2uqq_512: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: kmovd %edi, %k1 |
| ; CHECK-NEXT: vcvtph2uqq {ru-sae}, %xmm0, %zmm1 {%k1} |
| ; CHECK-NEXT: vcvtph2uqq {rn-sae}, %xmm0, %zmm0 |
| ; CHECK-NEXT: vpaddq %zmm0, %zmm1, %zmm0 |
| ; CHECK-NEXT: retq |
| %res = call <8 x i64> @llvm.x86.avx512fp16.mask.vcvtph2uqq.512(<8 x half> %x0, <8 x i64> %x1, i8 %x2, i32 10) |
| %res1 = call <8 x i64> @llvm.x86.avx512fp16.mask.vcvtph2uqq.512(<8 x half> %x0, <8 x i64> %x1, i8 -1, i32 8) |
| %res2 = add <8 x i64> %res, %res1 |
| ret <8 x i64> %res2 |
| } |
| |
| declare <8 x i64> @llvm.x86.avx512fp16.mask.vcvttph2uqq.512(<8 x half>, <8 x i64>, i8, i32) |
| |
| define <8 x i64> @test_int_x86_avx512_mask_cvtt_ph2uqq_512(<8 x half> %x0, <8 x i64> %x1, i8 %x2) { |
| ; CHECK-LABEL: test_int_x86_avx512_mask_cvtt_ph2uqq_512: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: kmovd %edi, %k1 |
| ; CHECK-NEXT: vcvttph2uqq {sae}, %xmm0, %zmm1 {%k1} |
| ; CHECK-NEXT: vcvttph2uqq %xmm0, %zmm0 |
| ; CHECK-NEXT: vpaddq %zmm0, %zmm1, %zmm0 |
| ; CHECK-NEXT: retq |
| %res = call <8 x i64> @llvm.x86.avx512fp16.mask.vcvttph2uqq.512(<8 x half> %x0, <8 x i64> %x1, i8 %x2, i32 8) |
| %res1 = call <8 x i64> @llvm.x86.avx512fp16.mask.vcvttph2uqq.512(<8 x half> %x0, <8 x i64> %x1, i8 -1, i32 4) |
| %res2 = add <8 x i64> %res, %res1 |
| ret <8 x i64> %res2 |
| } |
| |
| declare i32 @llvm.x86.avx512fp16.vcvtsh2si32(<8 x half>, i32) |
| |
| define i32 @test_x86_avx512fp16_vcvtsh2si32(<8 x half> %arg0) { |
| ; CHECK-LABEL: test_x86_avx512fp16_vcvtsh2si32: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vcvtsh2si %xmm0, %ecx |
| ; CHECK-NEXT: vcvtsh2si {rz-sae}, %xmm0, %eax |
| ; CHECK-NEXT: addl %ecx, %eax |
| ; CHECK-NEXT: retq |
| %res1 = call i32 @llvm.x86.avx512fp16.vcvtsh2si32(<8 x half> %arg0, i32 4) |
| %res2 = call i32 @llvm.x86.avx512fp16.vcvtsh2si32(<8 x half> %arg0, i32 11) |
| %res = add i32 %res1, %res2 |
| ret i32 %res |
| } |
| |
| declare i64 @llvm.x86.avx512fp16.vcvtsh2si64(<8 x half>, i32) |
| |
| define i64 @test_x86_avx512fp16_vcvtsh2si64(<8 x half> %arg0) { |
| ; CHECK-LABEL: test_x86_avx512fp16_vcvtsh2si64: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vcvtsh2si %xmm0, %rcx |
| ; CHECK-NEXT: vcvtsh2si {ru-sae}, %xmm0, %rax |
| ; CHECK-NEXT: addq %rcx, %rax |
| ; CHECK-NEXT: retq |
| %res1 = call i64 @llvm.x86.avx512fp16.vcvtsh2si64(<8 x half> %arg0, i32 4) |
| %res2 = call i64 @llvm.x86.avx512fp16.vcvtsh2si64(<8 x half> %arg0, i32 10) |
| %res = add i64 %res1, %res2 |
| ret i64 %res |
| } |
| |
| declare i32 @llvm.x86.avx512fp16.vcvttsh2si32(<8 x half>, i32) |
| |
| define i32 @test_x86_avx512fp16_vcvttsh2si32(<8 x half> %arg0) { |
| ; CHECK-LABEL: test_x86_avx512fp16_vcvttsh2si32: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vcvttsh2si %xmm0, %ecx |
| ; CHECK-NEXT: vcvttsh2si {sae}, %xmm0, %eax |
| ; CHECK-NEXT: addl %ecx, %eax |
| ; CHECK-NEXT: retq |
| %res1 = call i32 @llvm.x86.avx512fp16.vcvttsh2si32(<8 x half> %arg0, i32 4) |
| %res2 = call i32 @llvm.x86.avx512fp16.vcvttsh2si32(<8 x half> %arg0, i32 8) |
| %res = add i32 %res1, %res2 |
| ret i32 %res |
| } |
| |
| declare i64 @llvm.x86.avx512fp16.vcvttsh2si64(<8 x half>, i32) |
| |
| define i64 @test_x86_avx512fp16_vcvttsh2si64(<8 x half> %arg0) { |
| ; CHECK-LABEL: test_x86_avx512fp16_vcvttsh2si64: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vcvttsh2si %xmm0, %rcx |
| ; CHECK-NEXT: vcvttsh2si {sae}, %xmm0, %rax |
| ; CHECK-NEXT: addq %rcx, %rax |
| ; CHECK-NEXT: retq |
| %res1 = call i64 @llvm.x86.avx512fp16.vcvttsh2si64(<8 x half> %arg0, i32 4) |
| %res2 = call i64 @llvm.x86.avx512fp16.vcvttsh2si64(<8 x half> %arg0, i32 8) |
| %res = add i64 %res1, %res2 |
| ret i64 %res |
| } |
| |
| |
| declare i32 @llvm.x86.avx512fp16.vcvtsh2usi32(<8 x half>, i32) |
| |
| define i32 @test_x86_avx512fp16_vcvtsh2usi32(<8 x half> %arg0) { |
| ; CHECK-LABEL: test_x86_avx512fp16_vcvtsh2usi32: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vcvtsh2usi %xmm0, %ecx |
| ; CHECK-NEXT: vcvtsh2usi {rd-sae}, %xmm0, %eax |
| ; CHECK-NEXT: addl %ecx, %eax |
| ; CHECK-NEXT: retq |
| %res1 = call i32 @llvm.x86.avx512fp16.vcvtsh2usi32(<8 x half> %arg0, i32 4) |
| %res2 = call i32 @llvm.x86.avx512fp16.vcvtsh2usi32(<8 x half> %arg0, i32 9) |
| %res = add i32 %res1, %res2 |
| ret i32 %res |
| } |
| |
| |
| declare i64 @llvm.x86.avx512fp16.vcvtsh2usi64(<8 x half>, i32) |
| |
| define i64 @test_x86_avx512fp16_vcvtsh2usi64(<8 x half> %arg0) { |
| ; CHECK-LABEL: test_x86_avx512fp16_vcvtsh2usi64: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vcvtsh2usi %xmm0, %rcx |
| ; CHECK-NEXT: vcvtsh2usi {ru-sae}, %xmm0, %rax |
| ; CHECK-NEXT: addq %rcx, %rax |
| ; CHECK-NEXT: retq |
| %res1 = call i64 @llvm.x86.avx512fp16.vcvtsh2usi64(<8 x half> %arg0, i32 4) |
| %res2 = call i64 @llvm.x86.avx512fp16.vcvtsh2usi64(<8 x half> %arg0, i32 10) |
| %res = add i64 %res1, %res2 |
| ret i64 %res |
| } |
| |
| declare i32 @llvm.x86.avx512fp16.vcvttsh2usi32(<8 x half>, i32) |
| |
| define i32 @test_x86_avx512fp16_vcvttsh2usi32(<8 x half> %arg0) { |
| ; CHECK-LABEL: test_x86_avx512fp16_vcvttsh2usi32: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vcvttsh2usi %xmm0, %ecx |
| ; CHECK-NEXT: vcvttsh2usi {sae}, %xmm0, %eax |
| ; CHECK-NEXT: addl %ecx, %eax |
| ; CHECK-NEXT: retq |
| %res1 = call i32 @llvm.x86.avx512fp16.vcvttsh2usi32(<8 x half> %arg0, i32 4) |
| %res2 = call i32 @llvm.x86.avx512fp16.vcvttsh2usi32(<8 x half> %arg0, i32 8) |
| %res = add i32 %res1, %res2 |
| ret i32 %res |
| } |
| |
| declare i64 @llvm.x86.avx512fp16.vcvttsh2usi64(<8 x half>, i32) |
| |
| define i64 @test_x86_avx512fp16_vcvttsh2usi64(<8 x half> %arg0) { |
| ; CHECK-LABEL: test_x86_avx512fp16_vcvttsh2usi64: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vcvttsh2usi %xmm0, %rcx |
| ; CHECK-NEXT: vcvttsh2usi {sae}, %xmm0, %rax |
| ; CHECK-NEXT: addq %rcx, %rax |
| ; CHECK-NEXT: retq |
| %res1 = call i64 @llvm.x86.avx512fp16.vcvttsh2usi64(<8 x half> %arg0, i32 4) |
| %res2 = call i64 @llvm.x86.avx512fp16.vcvttsh2usi64(<8 x half> %arg0, i32 8) |
| %res = add i64 %res1, %res2 |
| ret i64 %res |
| } |
| |
| declare <8 x half> @llvm.x86.avx512fp16.vcvtsi2sh(<8 x half>, i32, i32) |
| |
| define <8 x half> @test_x86_avx512fp16_vcvtsi2sh(<8 x half> %arg0, i32 %arg1) { |
| ; CHECK-LABEL: test_x86_avx512fp16_vcvtsi2sh: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vcvtsi2sh %edi, %xmm0, %xmm1 |
| ; CHECK-NEXT: vcvtsi2sh %edi, {rd-sae}, %xmm0, %xmm0 |
| ; CHECK-NEXT: vaddph %xmm0, %xmm1, %xmm0 |
| ; CHECK-NEXT: retq |
| %res1 = call <8 x half> @llvm.x86.avx512fp16.vcvtsi2sh(<8 x half> %arg0, i32 %arg1, i32 4) |
| %res2 = call <8 x half> @llvm.x86.avx512fp16.vcvtsi2sh(<8 x half> %arg0, i32 %arg1, i32 9) |
| %res = fadd <8 x half> %res1, %res2 |
| ret <8 x half> %res |
| } |
| |
| declare <8 x half> @llvm.x86.avx512fp16.vcvtsi642sh(<8 x half>, i64, i32) |
| |
| define <8 x half> @test_x86_avx512fp16_vcvtsi642sh(<8 x half> %arg0, i64 %arg1) { |
| ; CHECK-LABEL: test_x86_avx512fp16_vcvtsi642sh: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vcvtsi2sh %rdi, %xmm0, %xmm1 |
| ; CHECK-NEXT: vcvtsi2sh %rdi, {rn-sae}, %xmm0, %xmm0 |
| ; CHECK-NEXT: vaddph %xmm0, %xmm1, %xmm0 |
| ; CHECK-NEXT: retq |
| %res1 = call <8 x half> @llvm.x86.avx512fp16.vcvtsi642sh(<8 x half> %arg0, i64 %arg1, i32 4) |
| %res2 = call <8 x half> @llvm.x86.avx512fp16.vcvtsi642sh(<8 x half> %arg0, i64 %arg1, i32 8) |
| %res = fadd <8 x half> %res1, %res2 |
| ret <8 x half> %res |
| } |
| |
| declare <8 x half> @llvm.x86.avx512fp16.vcvtusi2sh(<8 x half>, i32, i32) |
| |
| define <8 x half> @test_x86_avx512fp16_vcvtusi2sh(<8 x half> %arg0, i32 %arg1) { |
| ; CHECK-LABEL: test_x86_avx512fp16_vcvtusi2sh: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vcvtusi2sh %edi, %xmm0, %xmm1 |
| ; CHECK-NEXT: vcvtusi2sh %edi, {rd-sae}, %xmm0, %xmm0 |
| ; CHECK-NEXT: vaddph %xmm0, %xmm1, %xmm0 |
| ; CHECK-NEXT: retq |
| %res1 = call <8 x half> @llvm.x86.avx512fp16.vcvtusi2sh(<8 x half> %arg0, i32 %arg1, i32 4) |
| %res2 = call <8 x half> @llvm.x86.avx512fp16.vcvtusi2sh(<8 x half> %arg0, i32 %arg1, i32 9) |
| %res = fadd <8 x half> %res1, %res2 |
| ret <8 x half> %res |
| } |
| |
| declare <8 x half> @llvm.x86.avx512fp16.vcvtusi642sh(<8 x half>, i64, i32) |
| |
| define <8 x half> @test_x86_avx512fp16_vcvtusi642sh(<8 x half> %arg0, i64 %arg1) { |
| ; CHECK-LABEL: test_x86_avx512fp16_vcvtusi642sh: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vcvtusi2sh %rdi, %xmm0, %xmm1 |
| ; CHECK-NEXT: vcvtusi2sh %rdi, {rd-sae}, %xmm0, %xmm0 |
| ; CHECK-NEXT: vaddph %xmm0, %xmm1, %xmm0 |
| ; CHECK-NEXT: retq |
| %res1 = call <8 x half> @llvm.x86.avx512fp16.vcvtusi642sh(<8 x half> %arg0, i64 %arg1, i32 4) |
| %res2 = call <8 x half> @llvm.x86.avx512fp16.vcvtusi642sh(<8 x half> %arg0, i64 %arg1, i32 9) |
| %res = fadd <8 x half> %res1, %res2 |
| ret <8 x half> %res |
| } |
| |
| |
| define <16 x half> @test_mm256_castph128_ph256_freeze(<8 x half> %a0) nounwind { |
| ; CHECK-LABEL: test_mm256_castph128_ph256_freeze: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0 |
| ; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 |
| ; CHECK-NEXT: retq |
| %a1 = freeze <8 x half> poison |
| %res = shufflevector <8 x half> %a0, <8 x half> %a1, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> |
| ret <16 x half> %res |
| } |
| |
| |
| define <32 x half> @test_mm512_castph128_ph512_freeze(<8 x half> %a0) nounwind { |
| ; CHECK-LABEL: test_mm512_castph128_ph512_freeze: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0 |
| ; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm1 |
| ; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 |
| ; CHECK-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0 |
| ; CHECK-NEXT: retq |
| %a1 = freeze <8 x half> poison |
| %res = shufflevector <8 x half> %a0, <8 x half> %a1, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> |
| ret <32 x half> %res |
| } |
| |
| |
| define <32 x half> @test_mm512_castph256_ph512_freeze(<16 x half> %a0) nounwind { |
| ; CHECK-LABEL: test_mm512_castph256_ph512_freeze: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 |
| ; CHECK-NEXT: vinsertf64x4 $1, %ymm0, %zmm0, %zmm0 |
| ; CHECK-NEXT: retq |
| %a1 = freeze <16 x half> poison |
| %res = shufflevector <16 x half> %a0, <16 x half> %a1, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31> |
| ret <32 x half> %res |
| } |