| ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py |
| ; RUN: llc < %s -mtriple=x86_64-unkown-unkown -mattr=+avx512bw -mattr=+avx512vl -mattr=+avx512fp16 | FileCheck %s |
| |
| define <16 x half> @test_int_x86_avx512fp16_add_ph_256(<16 x half> %x1, <16 x half> %x2) { |
| ; CHECK-LABEL: test_int_x86_avx512fp16_add_ph_256: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vaddph %ymm1, %ymm0, %ymm0 |
| ; CHECK-NEXT: retq |
| %res = fadd <16 x half> %x1, %x2 |
| ret <16 x half> %res |
| } |
| |
| define <16 x half> @test_int_x86_avx512fp16_mask_add_ph_256(<16 x half> %x1, <16 x half> %x2, <16 x half> %src, i16 %mask, <16 x half>* %ptr) { |
| ; CHECK-LABEL: test_int_x86_avx512fp16_mask_add_ph_256: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: kmovd %edi, %k1 |
| ; CHECK-NEXT: vmovaps %ymm2, %ymm3 |
| ; CHECK-NEXT: vaddph %ymm1, %ymm0, %ymm3 {%k1} |
| ; CHECK-NEXT: vaddph (%rsi), %ymm0, %ymm2 {%k1} |
| ; CHECK-NEXT: vaddph %ymm2, %ymm3, %ymm0 |
| ; CHECK-NEXT: retq |
| %msk = bitcast i16 %mask to <16 x i1> |
| %val = load <16 x half>, <16 x half>* %ptr |
| %res0 = fadd <16 x half> %x1, %x2 |
| %res1 = select <16 x i1> %msk, <16 x half> %res0, <16 x half> %src |
| %t3 = fadd <16 x half> %x1, %val |
| %res2 = select <16 x i1> %msk, <16 x half> %t3, <16 x half> %src |
| %res = fadd <16 x half> %res1 , %res2 |
| ret <16 x half> %res |
| } |
| |
| define <16 x half> @test_int_x86_avx512fp16_maskz_add_ph_256(<16 x half> %x1, <16 x half> %x2, i16 %mask, <16 x half>* %ptr) { |
| ; CHECK-LABEL: test_int_x86_avx512fp16_maskz_add_ph_256: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: kmovd %edi, %k1 |
| ; CHECK-NEXT: vaddph %ymm1, %ymm0, %ymm0 {%k1} {z} |
| ; CHECK-NEXT: retq |
| %msk = bitcast i16 %mask to <16 x i1> |
| %res0 = fadd <16 x half> %x1, %x2 |
| %res1 = select <16 x i1> %msk, <16 x half> %res0, <16 x half> zeroinitializer |
| ret <16 x half> %res1 |
| } |
| |
| define <8 x half> @test_int_x86_avx512fp16_add_ph_128(<8 x half> %x1, <8 x half> %x2) { |
| ; CHECK-LABEL: test_int_x86_avx512fp16_add_ph_128: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vaddph %xmm1, %xmm0, %xmm0 |
| ; CHECK-NEXT: retq |
| %res = fadd <8 x half> %x1, %x2 |
| ret <8 x half> %res |
| } |
| |
| define <8 x half> @test_int_x86_avx512fp16_mask_add_ph_128(<8 x half> %x1, <8 x half> %x2, <8 x half> %src, i8 %mask, <8 x half>* %ptr) { |
| ; CHECK-LABEL: test_int_x86_avx512fp16_mask_add_ph_128: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: kmovd %edi, %k1 |
| ; CHECK-NEXT: vmovaps %xmm2, %xmm3 |
| ; CHECK-NEXT: vaddph %xmm1, %xmm0, %xmm3 {%k1} |
| ; CHECK-NEXT: vaddph (%rsi), %xmm0, %xmm2 {%k1} |
| ; CHECK-NEXT: vaddph %xmm2, %xmm3, %xmm0 |
| ; CHECK-NEXT: retq |
| %msk = bitcast i8 %mask to <8 x i1> |
| %val = load <8 x half>, <8 x half>* %ptr |
| %res0 = fadd <8 x half> %x1, %x2 |
| %res1 = select <8 x i1> %msk, <8 x half> %res0, <8 x half> %src |
| %t3 = fadd <8 x half> %x1, %val |
| %res2 = select <8 x i1> %msk, <8 x half> %t3, <8 x half> %src |
| %res = fadd <8 x half> %res1 , %res2 |
| ret <8 x half> %res |
| } |
| |
| define <8 x half> @test_int_x86_avx512fp16_maskz_add_ph_128(<8 x half> %x1, <8 x half> %x2, i8 %mask, <8 x half>* %ptr) { |
| ; CHECK-LABEL: test_int_x86_avx512fp16_maskz_add_ph_128: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: kmovd %edi, %k1 |
| ; CHECK-NEXT: vaddph %xmm1, %xmm0, %xmm0 {%k1} {z} |
| ; CHECK-NEXT: retq |
| %msk = bitcast i8 %mask to <8 x i1> |
| %res0 = fadd <8 x half> %x1, %x2 |
| %res1 = select <8 x i1> %msk, <8 x half> %res0, <8 x half> zeroinitializer |
| ret <8 x half> %res1 |
| } |
| |
| define <16 x half> @test_int_x86_avx512fp16_sub_ph_256(<16 x half> %x1, <16 x half> %x2) { |
| ; CHECK-LABEL: test_int_x86_avx512fp16_sub_ph_256: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vsubph %ymm1, %ymm0, %ymm0 |
| ; CHECK-NEXT: retq |
| %res = fsub <16 x half> %x1, %x2 |
| ret <16 x half> %res |
| } |
| |
| define <16 x half> @test_int_x86_avx512fp16_mask_sub_ph_256(<16 x half> %x1, <16 x half> %x2, <16 x half> %src, i16 %mask, <16 x half>* %ptr) { |
| ; CHECK-LABEL: test_int_x86_avx512fp16_mask_sub_ph_256: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: kmovd %edi, %k1 |
| ; CHECK-NEXT: vmovaps %ymm2, %ymm3 |
| ; CHECK-NEXT: vsubph %ymm1, %ymm0, %ymm3 {%k1} |
| ; CHECK-NEXT: vsubph (%rsi), %ymm0, %ymm2 {%k1} |
| ; CHECK-NEXT: vsubph %ymm2, %ymm3, %ymm0 |
| ; CHECK-NEXT: retq |
| %msk = bitcast i16 %mask to <16 x i1> |
| %val = load <16 x half>, <16 x half>* %ptr |
| %res0 = fsub <16 x half> %x1, %x2 |
| %res1 = select <16 x i1> %msk, <16 x half> %res0, <16 x half> %src |
| %t3 = fsub <16 x half> %x1, %val |
| %res2 = select <16 x i1> %msk, <16 x half> %t3, <16 x half> %src |
| %res = fsub <16 x half> %res1 , %res2 |
| ret <16 x half> %res |
| } |
| |
| define <16 x half> @test_int_x86_avx512fp16_maskz_sub_ph_256(<16 x half> %x1, <16 x half> %x2, i16 %mask, <16 x half>* %ptr) { |
| ; CHECK-LABEL: test_int_x86_avx512fp16_maskz_sub_ph_256: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: kmovd %edi, %k1 |
| ; CHECK-NEXT: vsubph %ymm1, %ymm0, %ymm0 {%k1} {z} |
| ; CHECK-NEXT: retq |
| %msk = bitcast i16 %mask to <16 x i1> |
| %res0 = fsub <16 x half> %x1, %x2 |
| %res1 = select <16 x i1> %msk, <16 x half> %res0, <16 x half> zeroinitializer |
| ret <16 x half> %res1 |
| } |
| |
| define <8 x half> @test_int_x86_avx512fp16_sub_ph_128(<8 x half> %x1, <8 x half> %x2) { |
| ; CHECK-LABEL: test_int_x86_avx512fp16_sub_ph_128: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vsubph %xmm1, %xmm0, %xmm0 |
| ; CHECK-NEXT: retq |
| %res = fsub <8 x half> %x1, %x2 |
| ret <8 x half> %res |
| } |
| |
| define <8 x half> @test_int_x86_avx512fp16_mask_sub_ph_128(<8 x half> %x1, <8 x half> %x2, <8 x half> %src, i8 %mask, <8 x half>* %ptr) { |
| ; CHECK-LABEL: test_int_x86_avx512fp16_mask_sub_ph_128: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: kmovd %edi, %k1 |
| ; CHECK-NEXT: vmovaps %xmm2, %xmm3 |
| ; CHECK-NEXT: vsubph %xmm1, %xmm0, %xmm3 {%k1} |
| ; CHECK-NEXT: vsubph (%rsi), %xmm0, %xmm2 {%k1} |
| ; CHECK-NEXT: vsubph %xmm2, %xmm3, %xmm0 |
| ; CHECK-NEXT: retq |
| %msk = bitcast i8 %mask to <8 x i1> |
| %val = load <8 x half>, <8 x half>* %ptr |
| %res0 = fsub <8 x half> %x1, %x2 |
| %res1 = select <8 x i1> %msk, <8 x half> %res0, <8 x half> %src |
| %t3 = fsub <8 x half> %x1, %val |
| %res2 = select <8 x i1> %msk, <8 x half> %t3, <8 x half> %src |
| %res = fsub <8 x half> %res1 , %res2 |
| ret <8 x half> %res |
| } |
| |
| define <8 x half> @test_int_x86_avx512fp16_maskz_sub_ph_128(<8 x half> %x1, <8 x half> %x2, i8 %mask, <8 x half>* %ptr) { |
| ; CHECK-LABEL: test_int_x86_avx512fp16_maskz_sub_ph_128: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: kmovd %edi, %k1 |
| ; CHECK-NEXT: vsubph %xmm1, %xmm0, %xmm0 {%k1} {z} |
| ; CHECK-NEXT: retq |
| %msk = bitcast i8 %mask to <8 x i1> |
| %res0 = fsub <8 x half> %x1, %x2 |
| %res1 = select <8 x i1> %msk, <8 x half> %res0, <8 x half> zeroinitializer |
| ret <8 x half> %res1 |
| } |
| |
| define <16 x half> @test_int_x86_avx512fp16_mul_ph_256(<16 x half> %x1, <16 x half> %x2) { |
| ; CHECK-LABEL: test_int_x86_avx512fp16_mul_ph_256: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vmulph %ymm1, %ymm0, %ymm0 |
| ; CHECK-NEXT: retq |
| %res = fmul <16 x half> %x1, %x2 |
| ret <16 x half> %res |
| } |
| |
| define <16 x half> @test_int_x86_avx512fp16_mask_mul_ph_256(<16 x half> %x1, <16 x half> %x2, <16 x half> %src, i16 %mask, <16 x half>* %ptr) { |
| ; CHECK-LABEL: test_int_x86_avx512fp16_mask_mul_ph_256: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: kmovd %edi, %k1 |
| ; CHECK-NEXT: vmovaps %ymm2, %ymm3 |
| ; CHECK-NEXT: vmulph %ymm1, %ymm0, %ymm3 {%k1} |
| ; CHECK-NEXT: vmulph (%rsi), %ymm0, %ymm2 {%k1} |
| ; CHECK-NEXT: vmulph %ymm2, %ymm3, %ymm0 |
| ; CHECK-NEXT: retq |
| %msk = bitcast i16 %mask to <16 x i1> |
| %val = load <16 x half>, <16 x half>* %ptr |
| %res0 = fmul <16 x half> %x1, %x2 |
| %res1 = select <16 x i1> %msk, <16 x half> %res0, <16 x half> %src |
| %t3 = fmul <16 x half> %x1, %val |
| %res2 = select <16 x i1> %msk, <16 x half> %t3, <16 x half> %src |
| %res = fmul <16 x half> %res1 , %res2 |
| ret <16 x half> %res |
| } |
| |
| define <16 x half> @test_int_x86_avx512fp16_maskz_mul_ph_256(<16 x half> %x1, <16 x half> %x2, i16 %mask, <16 x half>* %ptr) { |
| ; CHECK-LABEL: test_int_x86_avx512fp16_maskz_mul_ph_256: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: kmovd %edi, %k1 |
| ; CHECK-NEXT: vmulph %ymm1, %ymm0, %ymm0 {%k1} {z} |
| ; CHECK-NEXT: retq |
| %msk = bitcast i16 %mask to <16 x i1> |
| %res0 = fmul <16 x half> %x1, %x2 |
| %res1 = select <16 x i1> %msk, <16 x half> %res0, <16 x half> zeroinitializer |
| ret <16 x half> %res1 |
| } |
| |
| define <8 x half> @test_int_x86_avx512fp16_mul_ph_128(<8 x half> %x1, <8 x half> %x2) { |
| ; CHECK-LABEL: test_int_x86_avx512fp16_mul_ph_128: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vmulph %xmm1, %xmm0, %xmm0 |
| ; CHECK-NEXT: retq |
| %res = fmul <8 x half> %x1, %x2 |
| ret <8 x half> %res |
| } |
| |
| define <8 x half> @test_int_x86_avx512fp16_mask_mul_ph_128(<8 x half> %x1, <8 x half> %x2, <8 x half> %src, i8 %mask, <8 x half>* %ptr) { |
| ; CHECK-LABEL: test_int_x86_avx512fp16_mask_mul_ph_128: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: kmovd %edi, %k1 |
| ; CHECK-NEXT: vmovaps %xmm2, %xmm3 |
| ; CHECK-NEXT: vmulph %xmm1, %xmm0, %xmm3 {%k1} |
| ; CHECK-NEXT: vmulph (%rsi), %xmm0, %xmm2 {%k1} |
| ; CHECK-NEXT: vmulph %xmm2, %xmm3, %xmm0 |
| ; CHECK-NEXT: retq |
| %msk = bitcast i8 %mask to <8 x i1> |
| %val = load <8 x half>, <8 x half>* %ptr |
| %res0 = fmul <8 x half> %x1, %x2 |
| %res1 = select <8 x i1> %msk, <8 x half> %res0, <8 x half> %src |
| %t3 = fmul <8 x half> %x1, %val |
| %res2 = select <8 x i1> %msk, <8 x half> %t3, <8 x half> %src |
| %res = fmul <8 x half> %res1 , %res2 |
| ret <8 x half> %res |
| } |
| |
| define <8 x half> @test_int_x86_avx512fp16_maskz_mul_ph_128(<8 x half> %x1, <8 x half> %x2, i8 %mask, <8 x half>* %ptr) { |
| ; CHECK-LABEL: test_int_x86_avx512fp16_maskz_mul_ph_128: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: kmovd %edi, %k1 |
| ; CHECK-NEXT: vmulph %xmm1, %xmm0, %xmm0 {%k1} {z} |
| ; CHECK-NEXT: retq |
| %msk = bitcast i8 %mask to <8 x i1> |
| %res0 = fmul <8 x half> %x1, %x2 |
| %res1 = select <8 x i1> %msk, <8 x half> %res0, <8 x half> zeroinitializer |
| ret <8 x half> %res1 |
| } |
| |
| define <16 x half> @test_int_x86_avx512fp16_div_ph_256(<16 x half> %x1, <16 x half> %x2) { |
| ; CHECK-LABEL: test_int_x86_avx512fp16_div_ph_256: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vdivph %ymm1, %ymm0, %ymm0 |
| ; CHECK-NEXT: retq |
| %res = fdiv <16 x half> %x1, %x2 |
| ret <16 x half> %res |
| } |
| |
| define <16 x half> @test_int_x86_avx512fp16_div_ph_256_fast(<16 x half> %x1, <16 x half> %x2) { |
| ; CHECK-LABEL: test_int_x86_avx512fp16_div_ph_256_fast: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vrcpph %ymm1, %ymm1 |
| ; CHECK-NEXT: vmulph %ymm0, %ymm1, %ymm0 |
| ; CHECK-NEXT: retq |
| %res = fdiv fast <16 x half> %x1, %x2 |
| ret <16 x half> %res |
| } |
| |
| define <16 x half> @test_int_x86_avx512fp16_mask_div_ph_256(<16 x half> %x1, <16 x half> %x2, <16 x half> %src, i16 %mask, <16 x half>* %ptr) { |
| ; CHECK-LABEL: test_int_x86_avx512fp16_mask_div_ph_256: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: kmovd %edi, %k1 |
| ; CHECK-NEXT: vmovaps %ymm2, %ymm3 |
| ; CHECK-NEXT: vdivph %ymm1, %ymm0, %ymm3 {%k1} |
| ; CHECK-NEXT: vdivph (%rsi), %ymm0, %ymm2 {%k1} |
| ; CHECK-NEXT: vdivph %ymm2, %ymm3, %ymm0 |
| ; CHECK-NEXT: retq |
| %msk = bitcast i16 %mask to <16 x i1> |
| %val = load <16 x half>, <16 x half>* %ptr |
| %res0 = fdiv <16 x half> %x1, %x2 |
| %res1 = select <16 x i1> %msk, <16 x half> %res0, <16 x half> %src |
| %t3 = fdiv <16 x half> %x1, %val |
| %res2 = select <16 x i1> %msk, <16 x half> %t3, <16 x half> %src |
| %res = fdiv <16 x half> %res1 , %res2 |
| ret <16 x half> %res |
| } |
| |
| define <16 x half> @test_int_x86_avx512fp16_maskz_div_ph_256(<16 x half> %x1, <16 x half> %x2, i16 %mask, <16 x half>* %ptr) { |
| ; CHECK-LABEL: test_int_x86_avx512fp16_maskz_div_ph_256: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: kmovd %edi, %k1 |
| ; CHECK-NEXT: vdivph %ymm1, %ymm0, %ymm0 {%k1} {z} |
| ; CHECK-NEXT: retq |
| %msk = bitcast i16 %mask to <16 x i1> |
| %res0 = fdiv <16 x half> %x1, %x2 |
| %res1 = select <16 x i1> %msk, <16 x half> %res0, <16 x half> zeroinitializer |
| ret <16 x half> %res1 |
| } |
| |
| define <8 x half> @test_int_x86_avx512fp16_div_ph_128(<8 x half> %x1, <8 x half> %x2) { |
| ; CHECK-LABEL: test_int_x86_avx512fp16_div_ph_128: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vdivph %xmm1, %xmm0, %xmm0 |
| ; CHECK-NEXT: retq |
| %res = fdiv <8 x half> %x1, %x2 |
| ret <8 x half> %res |
| } |
| |
| define <8 x half> @test_int_x86_avx512fp16_div_ph_128_fast(<8 x half> %x1, <8 x half> %x2) { |
| ; CHECK-LABEL: test_int_x86_avx512fp16_div_ph_128_fast: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vrcpph %xmm1, %xmm1 |
| ; CHECK-NEXT: vmulph %xmm0, %xmm1, %xmm0 |
| ; CHECK-NEXT: retq |
| %res = fdiv fast <8 x half> %x1, %x2 |
| ret <8 x half> %res |
| } |
| |
| define <8 x half> @test_int_x86_avx512fp16_mask_div_ph_128(<8 x half> %x1, <8 x half> %x2, <8 x half> %src, i8 %mask, <8 x half>* %ptr) { |
| ; CHECK-LABEL: test_int_x86_avx512fp16_mask_div_ph_128: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: kmovd %edi, %k1 |
| ; CHECK-NEXT: vmovaps %xmm2, %xmm3 |
| ; CHECK-NEXT: vdivph %xmm1, %xmm0, %xmm3 {%k1} |
| ; CHECK-NEXT: vdivph (%rsi), %xmm0, %xmm2 {%k1} |
| ; CHECK-NEXT: vdivph %xmm2, %xmm3, %xmm0 |
| ; CHECK-NEXT: retq |
| %msk = bitcast i8 %mask to <8 x i1> |
| %val = load <8 x half>, <8 x half>* %ptr |
| %res0 = fdiv <8 x half> %x1, %x2 |
| %res1 = select <8 x i1> %msk, <8 x half> %res0, <8 x half> %src |
| %t3 = fdiv <8 x half> %x1, %val |
| %res2 = select <8 x i1> %msk, <8 x half> %t3, <8 x half> %src |
| %res = fdiv <8 x half> %res1 , %res2 |
| ret <8 x half> %res |
| } |
| |
| define <8 x half> @test_int_x86_avx512fp16_maskz_div_ph_128(<8 x half> %x1, <8 x half> %x2, i8 %mask, <8 x half>* %ptr) { |
| ; CHECK-LABEL: test_int_x86_avx512fp16_maskz_div_ph_128: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: kmovd %edi, %k1 |
| ; CHECK-NEXT: vdivph %xmm1, %xmm0, %xmm0 {%k1} {z} |
| ; CHECK-NEXT: retq |
| %msk = bitcast i8 %mask to <8 x i1> |
| %res0 = fdiv <8 x half> %x1, %x2 |
| %res1 = select <8 x i1> %msk, <8 x half> %res0, <8 x half> zeroinitializer |
| ret <8 x half> %res1 |
| } |
| |
| define <16 x half> @test_min_ph_256(<16 x half> %x1, <16 x half> %x2) { |
| ; CHECK-LABEL: test_min_ph_256: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vminph %ymm1, %ymm0, %ymm0 |
| ; CHECK-NEXT: retq |
| %res0 = fcmp olt <16 x half> %x1, %x2 |
| %res1 = select <16 x i1> %res0, <16 x half> %x1, <16 x half> %x2 |
| ret <16 x half> %res1 |
| } |
| |
| define <16 x half> @test_max_ph_256(<16 x half> %x1, <16 x half> %x2) { |
| ; CHECK-LABEL: test_max_ph_256: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vmaxph %ymm1, %ymm0, %ymm0 |
| ; CHECK-NEXT: retq |
| %res0 = fcmp ogt <16 x half> %x1, %x2 |
| %res1 = select <16 x i1> %res0, <16 x half> %x1, <16 x half> %x2 |
| ret <16 x half> %res1 |
| } |
| |
| define <8 x half> @test_min_ph_128(<8 x half> %x1, <8 x half> %x2) { |
| ; CHECK-LABEL: test_min_ph_128: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vminph %xmm1, %xmm0, %xmm0 |
| ; CHECK-NEXT: retq |
| %res0 = fcmp olt <8 x half> %x1, %x2 |
| %res1 = select <8 x i1> %res0, <8 x half> %x1, <8 x half> %x2 |
| ret <8 x half> %res1 |
| } |
| |
| define <8 x half> @test_max_ph_128(<8 x half> %x1, <8 x half> %x2) { |
| ; CHECK-LABEL: test_max_ph_128: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vmaxph %xmm1, %xmm0, %xmm0 |
| ; CHECK-NEXT: retq |
| %res0 = fcmp ogt <8 x half> %x1, %x2 |
| %res1 = select <8 x i1> %res0, <8 x half> %x1, <8 x half> %x2 |
| ret <8 x half> %res1 |
| } |
| |
| declare <8 x half> @llvm.x86.avx512fp16.max.ph.128(<8 x half>, <8 x half>) |
| declare <16 x half> @llvm.x86.avx512fp16.max.ph.256(<16 x half>, <16 x half>) |
| |
| define <8 x half> @test_max_ph_128_2(<8 x half> %x1, <8 x half> %x2) { |
| ; CHECK-LABEL: test_max_ph_128_2: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vmaxph %xmm1, %xmm0, %xmm0 |
| ; CHECK-NEXT: retq |
| %res0 = call <8 x half> @llvm.x86.avx512fp16.max.ph.128(<8 x half> %x1, <8 x half> %x2) |
| ret <8 x half> %res0 |
| } |
| |
| define <16 x half> @test_max_ph_256_2(<16 x half> %x1, <16 x half> %x2) { |
| ; CHECK-LABEL: test_max_ph_256_2: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vmaxph %ymm1, %ymm0, %ymm0 |
| ; CHECK-NEXT: retq |
| %res0 = call <16 x half> @llvm.x86.avx512fp16.max.ph.256(<16 x half> %x1, <16 x half> %x2) |
| ret <16 x half> %res0 |
| } |
| |
| declare <8 x half> @llvm.x86.avx512fp16.min.ph.128(<8 x half>, <8 x half>) |
| declare <16 x half> @llvm.x86.avx512fp16.min.ph.256(<16 x half>, <16 x half>) |
| |
| define <8 x half> @test_min_ph_128_2(<8 x half> %x1, <8 x half> %x2) { |
| ; CHECK-LABEL: test_min_ph_128_2: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vminph %xmm1, %xmm0, %xmm0 |
| ; CHECK-NEXT: retq |
| %res0 = call <8 x half> @llvm.x86.avx512fp16.min.ph.128(<8 x half> %x1, <8 x half> %x2) |
| ret <8 x half> %res0 |
| } |
| |
| define <16 x half> @test_min_ph_256_2(<16 x half> %x1, <16 x half> %x2) { |
| ; CHECK-LABEL: test_min_ph_256_2: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vminph %ymm1, %ymm0, %ymm0 |
| ; CHECK-NEXT: retq |
| %res0 = call <16 x half> @llvm.x86.avx512fp16.min.ph.256(<16 x half> %x1, <16 x half> %x2) |
| ret <16 x half> %res0 |
| } |
| |
| declare <4 x double> @llvm.x86.avx512fp16.mask.vcvtph2pd.256(<8 x half>, <4 x double>, i8) |
| |
| define <4 x double> @test_int_x86_avx512_mask_vcvt_ph2pd_256(<8 x half> %x0, <4 x double> %x1, i8 %x2) { |
| ; CHECK-LABEL: test_int_x86_avx512_mask_vcvt_ph2pd_256: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: kmovd %edi, %k1 |
| ; CHECK-NEXT: vcvtph2pd %xmm0, %ymm1 {%k1} |
| ; CHECK-NEXT: vmovaps %ymm1, %ymm0 |
| ; CHECK-NEXT: retq |
| %res = call <4 x double> @llvm.x86.avx512fp16.mask.vcvtph2pd.256(<8 x half> %x0, <4 x double> %x1, i8 %x2) |
| ret <4 x double> %res |
| } |
| |
| define <4 x double> @test_int_x86_avx512_mask_vcvt_ph2pd_256_nomask(<8 x half> %x0, <4 x double> %x1) { |
| ; CHECK-LABEL: test_int_x86_avx512_mask_vcvt_ph2pd_256_nomask: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vcvtph2pd %xmm0, %ymm0 |
| ; CHECK-NEXT: retq |
| %res = call <4 x double> @llvm.x86.avx512fp16.mask.vcvtph2pd.256(<8 x half> %x0, <4 x double> %x1, i8 -1) |
| ret <4 x double> %res |
| } |
| |
| declare <2 x double> @llvm.x86.avx512fp16.mask.vcvtph2pd.128(<8 x half>, <2 x double>, i8) |
| |
| define <2 x double> @test_int_x86_avx512_mask_vcvt_ph2pd_128(<8 x half> %x0, <2 x double> %x1, i8 %x2) { |
| ; CHECK-LABEL: test_int_x86_avx512_mask_vcvt_ph2pd_128: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: kmovd %edi, %k1 |
| ; CHECK-NEXT: vcvtph2pd %xmm0, %xmm1 {%k1} |
| ; CHECK-NEXT: vmovaps %xmm1, %xmm0 |
| ; CHECK-NEXT: retq |
| %res = call <2 x double> @llvm.x86.avx512fp16.mask.vcvtph2pd.128(<8 x half> %x0, <2 x double> %x1, i8 %x2) |
| ret <2 x double> %res |
| } |
| |
| define <2 x double> @test_int_x86_avx512_mask_vcvt_ph2pd_128_nomask(<8 x half> %x0, <2 x double> %x1) { |
| ; CHECK-LABEL: test_int_x86_avx512_mask_vcvt_ph2pd_128_nomask: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vcvtph2pd %xmm0, %xmm0 |
| ; CHECK-NEXT: retq |
| %res = call <2 x double> @llvm.x86.avx512fp16.mask.vcvtph2pd.128(<8 x half> %x0, <2 x double> %x1, i8 -1) |
| ret <2 x double> %res |
| } |
| |
| declare <8 x half> @llvm.x86.avx512fp16.mask.vcvtpd2ph.256(<4 x double>, <8 x half>, i8) |
| |
| define <8 x half> @test_int_x86_avx512_mask_vcvt_pd2ph_256(<4 x double> %x0, <8 x half> %x1, i8 %x2) { |
| ; CHECK-LABEL: test_int_x86_avx512_mask_vcvt_pd2ph_256: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: kmovd %edi, %k1 |
| ; CHECK-NEXT: vcvtpd2ph %ymm0, %xmm1 {%k1} |
| ; CHECK-NEXT: vmovaps %xmm1, %xmm0 |
| ; CHECK-NEXT: vzeroupper |
| ; CHECK-NEXT: retq |
| %res = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtpd2ph.256(<4 x double> %x0, <8 x half> %x1, i8 %x2) |
| ret <8 x half> %res |
| } |
| |
| define <8 x half> @test_int_x86_avx512_mask_vcvt_pd2ph_256_load(<4 x double>* %px0, <8 x half> %x1, i8 %x2) { |
| ; CHECK-LABEL: test_int_x86_avx512_mask_vcvt_pd2ph_256_load: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: kmovd %esi, %k1 |
| ; CHECK-NEXT: vcvtpd2phy (%rdi), %xmm0 {%k1} |
| ; CHECK-NEXT: retq |
| %x0 = load <4 x double>, <4 x double>* %px0, align 32 |
| %res = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtpd2ph.256(<4 x double> %x0, <8 x half> %x1, i8 %x2) |
| ret <8 x half> %res |
| } |
| |
| declare <8 x half> @llvm.x86.avx512fp16.mask.vcvtpd2ph.128(<2 x double>, <8 x half>, i8) |
| |
| define <8 x half> @test_int_x86_avx512_mask_vcvt_pd2ph_128(<2 x double> %x0, <8 x half> %x1, i8 %x2) { |
| ; CHECK-LABEL: test_int_x86_avx512_mask_vcvt_pd2ph_128: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: kmovd %edi, %k1 |
| ; CHECK-NEXT: vcvtpd2ph %xmm0, %xmm1 {%k1} |
| ; CHECK-NEXT: vmovaps %xmm1, %xmm0 |
| ; CHECK-NEXT: retq |
| %res = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtpd2ph.128(<2 x double> %x0, <8 x half> %x1, i8 %x2) |
| ret <8 x half> %res |
| } |
| |
| define <8 x half> @test_int_x86_avx512_mask_vcvt_pd2ph_128_load(<2 x double>* %px0, <8 x half> %x1, i8 %x2) { |
| ; CHECK-LABEL: test_int_x86_avx512_mask_vcvt_pd2ph_128_load: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: kmovd %esi, %k1 |
| ; CHECK-NEXT: vcvtpd2phx (%rdi), %xmm0 {%k1} |
| ; CHECK-NEXT: retq |
| %x0 = load <2 x double>, <2 x double>* %px0, align 16 |
| %res = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtpd2ph.128(<2 x double> %x0, <8 x half> %x1, i8 %x2) |
| ret <8 x half> %res |
| } |
| |
| declare <4 x i32> @llvm.x86.avx512fp16.mask.vcvtph2udq.128(<8 x half>, <4 x i32>, i8) |
| |
| define <4 x i32> @test_int_x86_avx512_cvt_ph2udq_128(<8 x half> %x0) { |
| ; CHECK-LABEL: test_int_x86_avx512_cvt_ph2udq_128: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vcvtph2udq %xmm0, %xmm0 |
| ; CHECK-NEXT: retq |
| %res = call <4 x i32> @llvm.x86.avx512fp16.mask.vcvtph2udq.128(<8 x half> %x0, <4 x i32> undef, i8 -1) |
| ret <4 x i32> %res |
| } |
| |
| define <4 x i32> @test_int_x86_avx512_mask_cvt_ph2udq_128(<8 x half> %x0, <4 x i32> %x1, i8 %x2) { |
| ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_ph2udq_128: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: kmovd %edi, %k1 |
| ; CHECK-NEXT: vcvtph2udq %xmm0, %xmm1 {%k1} |
| ; CHECK-NEXT: vmovaps %xmm1, %xmm0 |
| ; CHECK-NEXT: retq |
| %res = call <4 x i32> @llvm.x86.avx512fp16.mask.vcvtph2udq.128(<8 x half> %x0, <4 x i32> %x1, i8 %x2) |
| ret <4 x i32> %res |
| } |
| |
| define <4 x i32> @test_int_x86_avx512_maskz_cvt_ph2udq_128(<8 x half> %x0, i8 %x2) { |
| ; CHECK-LABEL: test_int_x86_avx512_maskz_cvt_ph2udq_128: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: kmovd %edi, %k1 |
| ; CHECK-NEXT: vcvtph2udq %xmm0, %xmm0 {%k1} {z} |
| ; CHECK-NEXT: retq |
| %res = call <4 x i32> @llvm.x86.avx512fp16.mask.vcvtph2udq.128(<8 x half> %x0, <4 x i32> zeroinitializer, i8 %x2) |
| ret <4 x i32> %res |
| } |
| |
| declare <8 x i32> @llvm.x86.avx512fp16.mask.vcvtph2udq.256(<8 x half>, <8 x i32>, i8) |
| |
| define <8 x i32> @test_int_x86_avx512_cvt_ph2udq_256(<8 x half> %x0) { |
| ; CHECK-LABEL: test_int_x86_avx512_cvt_ph2udq_256: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vcvtph2udq %xmm0, %ymm0 |
| ; CHECK-NEXT: retq |
| %res = call <8 x i32> @llvm.x86.avx512fp16.mask.vcvtph2udq.256(<8 x half> %x0, <8 x i32> undef, i8 -1) |
| ret <8 x i32> %res |
| } |
| |
| define <8 x i32> @test_int_x86_avx512_mask_cvt_ph2udq_256(<8 x half> %x0, <8 x i32> %x1, i8 %x2) { |
| ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_ph2udq_256: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: kmovd %edi, %k1 |
| ; CHECK-NEXT: vcvtph2udq %xmm0, %ymm1 {%k1} |
| ; CHECK-NEXT: vmovaps %ymm1, %ymm0 |
| ; CHECK-NEXT: retq |
| %res = call <8 x i32> @llvm.x86.avx512fp16.mask.vcvtph2udq.256(<8 x half> %x0, <8 x i32> %x1, i8 %x2) |
| ret <8 x i32> %res |
| } |
| |
| define <8 x i32> @test_int_x86_avx512_maskz_cvt_ph2udq_256(<8 x half> %x0, i8 %x2) { |
| ; CHECK-LABEL: test_int_x86_avx512_maskz_cvt_ph2udq_256: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: kmovd %edi, %k1 |
| ; CHECK-NEXT: vcvtph2udq %xmm0, %ymm0 {%k1} {z} |
| ; CHECK-NEXT: retq |
| %res = call <8 x i32> @llvm.x86.avx512fp16.mask.vcvtph2udq.256(<8 x half> %x0, <8 x i32> zeroinitializer, i8 %x2) |
| ret <8 x i32> %res |
| } |
| |
| declare <4 x i32> @llvm.x86.avx512fp16.mask.vcvttph2dq.128(<8 x half>, <4 x i32>, i8) |
| |
| define <4 x i32> @test_int_x86_avx512_cvtt_ph2dq_128(<8 x half> %x0) { |
| ; CHECK-LABEL: test_int_x86_avx512_cvtt_ph2dq_128: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vcvttph2dq %xmm0, %xmm0 |
| ; CHECK-NEXT: retq |
| %res = call <4 x i32> @llvm.x86.avx512fp16.mask.vcvttph2dq.128(<8 x half> %x0, <4 x i32> undef, i8 -1) |
| ret <4 x i32> %res |
| } |
| |
| define <4 x i32> @test_int_x86_avx512_mask_cvtt_ph2dq_128(<8 x half> %x0, <4 x i32> %x1, i8 %x2) { |
| ; CHECK-LABEL: test_int_x86_avx512_mask_cvtt_ph2dq_128: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: kmovd %edi, %k1 |
| ; CHECK-NEXT: vcvttph2dq %xmm0, %xmm1 {%k1} |
| ; CHECK-NEXT: vmovaps %xmm1, %xmm0 |
| ; CHECK-NEXT: retq |
| %res = call <4 x i32> @llvm.x86.avx512fp16.mask.vcvttph2dq.128(<8 x half> %x0, <4 x i32> %x1, i8 %x2) |
| ret <4 x i32> %res |
| } |
| |
| define <4 x i32> @test_int_x86_avx512_maskz_cvtt_ph2dq_128(<8 x half> %x0, i8 %x2) { |
| ; CHECK-LABEL: test_int_x86_avx512_maskz_cvtt_ph2dq_128: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: kmovd %edi, %k1 |
| ; CHECK-NEXT: vcvttph2dq %xmm0, %xmm0 {%k1} {z} |
| ; CHECK-NEXT: retq |
| %res = call <4 x i32> @llvm.x86.avx512fp16.mask.vcvttph2dq.128(<8 x half> %x0, <4 x i32> zeroinitializer, i8 %x2) |
| ret <4 x i32> %res |
| } |
| |
| declare <8 x i32> @llvm.x86.avx512fp16.mask.vcvttph2dq.256(<8 x half>, <8 x i32>, i8) |
| |
| define <8 x i32> @test_int_x86_avx512_cvtt_ph2dq_256(<8 x half> %x0) { |
| ; CHECK-LABEL: test_int_x86_avx512_cvtt_ph2dq_256: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vcvttph2dq %xmm0, %ymm0 |
| ; CHECK-NEXT: retq |
| %res = call <8 x i32> @llvm.x86.avx512fp16.mask.vcvttph2dq.256(<8 x half> %x0, <8 x i32> undef, i8 -1) |
| ret <8 x i32> %res |
| } |
| |
| define <8 x i32> @test_int_x86_avx512_mask_cvtt_ph2dq_256(<8 x half> %x0, <8 x i32> %x1, i8 %x2) { |
| ; CHECK-LABEL: test_int_x86_avx512_mask_cvtt_ph2dq_256: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: kmovd %edi, %k1 |
| ; CHECK-NEXT: vcvttph2dq %xmm0, %ymm1 {%k1} |
| ; CHECK-NEXT: vmovaps %ymm1, %ymm0 |
| ; CHECK-NEXT: retq |
| %res = call <8 x i32> @llvm.x86.avx512fp16.mask.vcvttph2dq.256(<8 x half> %x0, <8 x i32> %x1, i8 %x2) |
| ret <8 x i32> %res |
| } |
| |
| define <8 x i32> @test_int_x86_avx512_maskz_cvtt_ph2dq_256(<8 x half> %x0, i8 %x2) { |
| ; CHECK-LABEL: test_int_x86_avx512_maskz_cvtt_ph2dq_256: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: kmovd %edi, %k1 |
| ; CHECK-NEXT: vcvttph2dq %xmm0, %ymm0 {%k1} {z} |
| ; CHECK-NEXT: retq |
| %res = call <8 x i32> @llvm.x86.avx512fp16.mask.vcvttph2dq.256(<8 x half> %x0, <8 x i32> zeroinitializer, i8 %x2) |
| ret <8 x i32> %res |
| } |
| |
| declare <4 x i32> @llvm.x86.avx512fp16.mask.vcvttph2udq.128(<8 x half>, <4 x i32>, i8) |
| |
| define <4 x i32> @test_int_x86_avx512_cvtt_ph2udq_128(<8 x half> %x0) { |
| ; CHECK-LABEL: test_int_x86_avx512_cvtt_ph2udq_128: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vcvttph2udq %xmm0, %xmm0 |
| ; CHECK-NEXT: retq |
| %res = call <4 x i32> @llvm.x86.avx512fp16.mask.vcvttph2udq.128(<8 x half> %x0, <4 x i32> undef, i8 -1) |
| ret <4 x i32> %res |
| } |
| |
| define <4 x i32> @test_int_x86_avx512_mask_cvtt_ph2udq_128(<8 x half> %x0, <4 x i32> %x1, i8 %x2) { |
| ; CHECK-LABEL: test_int_x86_avx512_mask_cvtt_ph2udq_128: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: kmovd %edi, %k1 |
| ; CHECK-NEXT: vcvttph2udq %xmm0, %xmm1 {%k1} |
| ; CHECK-NEXT: vmovaps %xmm1, %xmm0 |
| ; CHECK-NEXT: retq |
| %res = call <4 x i32> @llvm.x86.avx512fp16.mask.vcvttph2udq.128(<8 x half> %x0, <4 x i32> %x1, i8 %x2) |
| ret <4 x i32> %res |
| } |
| |
| define <4 x i32> @test_int_x86_avx512_maskz_cvtt_ph2udq_128(<8 x half> %x0, i8 %x2) { |
| ; CHECK-LABEL: test_int_x86_avx512_maskz_cvtt_ph2udq_128: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: kmovd %edi, %k1 |
| ; CHECK-NEXT: vcvttph2udq %xmm0, %xmm0 {%k1} {z} |
| ; CHECK-NEXT: retq |
| %res = call <4 x i32> @llvm.x86.avx512fp16.mask.vcvttph2udq.128(<8 x half> %x0, <4 x i32> zeroinitializer, i8 %x2) |
| ret <4 x i32> %res |
| } |
| |
| declare <8 x i32> @llvm.x86.avx512fp16.mask.vcvttph2udq.256(<8 x half>, <8 x i32>, i8) |
| |
| define <8 x i32> @test_int_x86_avx512_cvtt_ph2udq_256(<8 x half> %x0) { |
| ; CHECK-LABEL: test_int_x86_avx512_cvtt_ph2udq_256: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vcvttph2udq %xmm0, %ymm0 |
| ; CHECK-NEXT: retq |
| %res = call <8 x i32> @llvm.x86.avx512fp16.mask.vcvttph2udq.256(<8 x half> %x0, <8 x i32> undef, i8 -1) |
| ret <8 x i32> %res |
| } |
| |
| define <8 x i32> @test_int_x86_avx512_mask_cvtt_ph2udq_256(<8 x half> %x0, <8 x i32> %x1, i8 %x2) { |
| ; CHECK-LABEL: test_int_x86_avx512_mask_cvtt_ph2udq_256: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: kmovd %edi, %k1 |
| ; CHECK-NEXT: vcvttph2udq %xmm0, %ymm1 {%k1} |
| ; CHECK-NEXT: vmovaps %ymm1, %ymm0 |
| ; CHECK-NEXT: retq |
| %res = call <8 x i32> @llvm.x86.avx512fp16.mask.vcvttph2udq.256(<8 x half> %x0, <8 x i32> %x1, i8 %x2) |
| ret <8 x i32> %res |
| } |
| |
| define <8 x i32> @test_int_x86_avx512_maskz_cvtt_ph2udq_256(<8 x half> %x0, i8 %x2) { |
| ; CHECK-LABEL: test_int_x86_avx512_maskz_cvtt_ph2udq_256: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: kmovd %edi, %k1 |
| ; CHECK-NEXT: vcvttph2udq %xmm0, %ymm0 {%k1} {z} |
| ; CHECK-NEXT: retq |
| %res = call <8 x i32> @llvm.x86.avx512fp16.mask.vcvttph2udq.256(<8 x half> %x0, <8 x i32> zeroinitializer, i8 %x2) |
| ret <8 x i32> %res |
| } |
| |
| declare <4 x float> @llvm.x86.avx512fp16.mask.vcvtph2psx.128(<8 x half>, <4 x float>, i8) |
| |
| define <4 x float> @test_int_x86_avx512_cvt_ph2psx_128(<8 x half> %x0) { |
| ; CHECK-LABEL: test_int_x86_avx512_cvt_ph2psx_128: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vcvtph2psx %xmm0, %xmm0 |
| ; CHECK-NEXT: retq |
| %res = call <4 x float> @llvm.x86.avx512fp16.mask.vcvtph2psx.128(<8 x half> %x0, <4 x float> undef, i8 -1) |
| ret <4 x float> %res |
| } |
| |
| define <4 x float> @test_int_x86_avx512_mask_cvt_ph2psx_128(<8 x half> %x0, <4 x float> %x1, i8 %x2) { |
| ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_ph2psx_128: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: kmovd %edi, %k1 |
| ; CHECK-NEXT: vcvtph2psx %xmm0, %xmm1 {%k1} |
| ; CHECK-NEXT: vmovaps %xmm1, %xmm0 |
| ; CHECK-NEXT: retq |
| %res = call <4 x float> @llvm.x86.avx512fp16.mask.vcvtph2psx.128(<8 x half> %x0, <4 x float> %x1, i8 %x2) |
| ret <4 x float> %res |
| } |
| |
| define <4 x float> @test_int_x86_avx512_maskz_cvt_ph2psx_128(<8 x half> %x0, i8 %x2) { |
| ; CHECK-LABEL: test_int_x86_avx512_maskz_cvt_ph2psx_128: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: kmovd %edi, %k1 |
| ; CHECK-NEXT: vcvtph2psx %xmm0, %xmm0 {%k1} {z} |
| ; CHECK-NEXT: retq |
| %res = call <4 x float> @llvm.x86.avx512fp16.mask.vcvtph2psx.128(<8 x half> %x0, <4 x float> zeroinitializer, i8 %x2) |
| ret <4 x float> %res |
| } |
| |
| declare <8 x float> @llvm.x86.avx512fp16.mask.vcvtph2psx.256(<8 x half>, <8 x float>, i8) |
| |
| define <8 x float> @test_int_x86_avx512_cvt_ph2psx_256(<8 x half> %x0) { |
| ; CHECK-LABEL: test_int_x86_avx512_cvt_ph2psx_256: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vcvtph2psx %xmm0, %ymm0 |
| ; CHECK-NEXT: retq |
| %res = call <8 x float> @llvm.x86.avx512fp16.mask.vcvtph2psx.256(<8 x half> %x0, <8 x float> undef, i8 -1) |
| ret <8 x float> %res |
| } |
| |
| define <8 x float> @test_int_x86_avx512_mask_cvt_ph2psx_256(<8 x half> %x0, <8 x float> %x1, i8 %x2) { |
| ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_ph2psx_256: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: kmovd %edi, %k1 |
| ; CHECK-NEXT: vcvtph2psx %xmm0, %ymm1 {%k1} |
| ; CHECK-NEXT: vmovaps %ymm1, %ymm0 |
| ; CHECK-NEXT: retq |
| %res = call <8 x float> @llvm.x86.avx512fp16.mask.vcvtph2psx.256(<8 x half> %x0, <8 x float> %x1, i8 %x2) |
| ret <8 x float> %res |
| } |
| |
| define <8 x float> @test_int_x86_avx512_maskz_cvt_ph2psx_256(<8 x half> %x0, i8 %x2) { |
| ; CHECK-LABEL: test_int_x86_avx512_maskz_cvt_ph2psx_256: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: kmovd %edi, %k1 |
| ; CHECK-NEXT: vcvtph2psx %xmm0, %ymm0 {%k1} {z} |
| ; CHECK-NEXT: retq |
| %res = call <8 x float> @llvm.x86.avx512fp16.mask.vcvtph2psx.256(<8 x half> %x0, <8 x float> zeroinitializer, i8 %x2) |
| ret <8 x float> %res |
| } |
| |
| declare <8 x half> @llvm.x86.avx512fp16.mask.vcvtps2phx.128(<4 x float>, <8 x half>, i8) |
| |
| define <8 x half> @test_int_x86_avx512_mask_cvt_ps2phx_128(<4 x float> %x0, <8 x half> %x1, i8 %x2) { |
| ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_ps2phx_128: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: kmovd %edi, %k1 |
| ; CHECK-NEXT: vcvtps2phx %xmm0, %xmm1 {%k1} |
| ; CHECK-NEXT: vcvtps2phx %xmm0, %xmm0 |
| ; CHECK-NEXT: vaddph %xmm0, %xmm1, %xmm0 |
| ; CHECK-NEXT: retq |
| %res = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtps2phx.128(<4 x float> %x0, <8 x half> %x1, i8 %x2) |
| %res1 = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtps2phx.128(<4 x float> %x0, <8 x half> %x1, i8 -1) |
| %res2 = fadd <8 x half> %res, %res1 |
| ret <8 x half> %res2 |
| } |
| |
| declare <8 x half> @llvm.x86.avx512fp16.mask.vcvtps2phx.256(<8 x float>, <8 x half>, i8) |
| |
| define <8 x half> @test_int_x86_avx512_cvt_ps2phx_256(<8 x float> %x0) { |
| ; CHECK-LABEL: test_int_x86_avx512_cvt_ps2phx_256: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vcvtps2phx %ymm0, %xmm0 |
| ; CHECK-NEXT: vzeroupper |
| ; CHECK-NEXT: retq |
| %res = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtps2phx.256(<8 x float> %x0, <8 x half> undef, i8 -1) |
| ret <8 x half> %res |
| } |
| |
| define <8 x half> @test_int_x86_avx512_mask_cvt_ps2phx_256(<8 x float> %x0, <8 x half> %x1, i8 %x2) { |
| ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_ps2phx_256: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: kmovd %edi, %k1 |
| ; CHECK-NEXT: vcvtps2phx %ymm0, %xmm1 {%k1} |
| ; CHECK-NEXT: vmovaps %xmm1, %xmm0 |
| ; CHECK-NEXT: vzeroupper |
| ; CHECK-NEXT: retq |
| %res = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtps2phx.256(<8 x float> %x0, <8 x half> %x1, i8 %x2) |
| ret <8 x half> %res |
| } |
| |
| define <8 x half> @test_int_x86_avx512_maskz_cvt_ps2phx_256(<8 x float> %x0, i8 %x2) { |
| ; CHECK-LABEL: test_int_x86_avx512_maskz_cvt_ps2phx_256: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: kmovd %edi, %k1 |
| ; CHECK-NEXT: vcvtps2phx %ymm0, %xmm0 {%k1} {z} |
| ; CHECK-NEXT: vzeroupper |
| ; CHECK-NEXT: retq |
| %res = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtps2phx.256(<8 x float> %x0, <8 x half> zeroinitializer, i8 %x2) |
| ret <8 x half> %res |
| } |