| ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py |
| ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vl -mattr=+avx512fp16 | FileCheck %s |
| |
| define signext i16 @test_mm_cvtsi128_si16(<2 x i64> %A) local_unnamed_addr #0 { |
| ; CHECK-LABEL: test_mm_cvtsi128_si16: |
| ; CHECK: # %bb.0: # %entry |
| ; CHECK-NEXT: vmovw %xmm0, %eax |
| ; CHECK-NEXT: # kill: def $ax killed $ax killed $eax |
| ; CHECK-NEXT: retq |
| entry: |
| %0 = bitcast <2 x i64> %A to <8 x i16> |
| %vecext.i = extractelement <8 x i16> %0, i32 0 |
| ret i16 %vecext.i |
| } |
| |
| define <2 x i64> @test_mm_cvtsi16_si128(i16 signext %A) local_unnamed_addr #0 { |
| ; CHECK-LABEL: test_mm_cvtsi16_si128: |
| ; CHECK: # %bb.0: # %entry |
| ; CHECK-NEXT: vmovw %edi, %xmm0 |
| ; CHECK-NEXT: retq |
| entry: |
| %vecinit7.i = insertelement <8 x i16> <i16 undef, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, i16 %A, i32 0 |
| %0 = bitcast <8 x i16> %vecinit7.i to <2 x i64> |
| ret <2 x i64> %0 |
| } |
| |
| define <8 x half> @test_int_x86_avx512_mask_cvt_dq2ph_256(<8 x i32> %x0, <8 x half> %x1, i8 %x2) { |
| ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_dq2ph_256: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: kmovd %edi, %k1 |
| ; CHECK-NEXT: vcvtdq2ph %ymm0, %xmm1 {%k1} |
| ; CHECK-NEXT: vmovaps %xmm1, %xmm0 |
| ; CHECK-NEXT: vzeroupper |
| ; CHECK-NEXT: retq |
| %mask = bitcast i8 %x2 to <8 x i1> |
| %res0 = sitofp <8 x i32> %x0 to <8 x half> |
| %res = select <8 x i1> %mask, <8 x half> %res0, <8 x half> %x1 |
| ret <8 x half> %res |
| } |
| |
| define <8 x half> @test_int_x86_avx512_mask_cvt_dq2ph_256_z(<8 x i32> %x0, i8 %x2) { |
| ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_dq2ph_256_z: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: kmovd %edi, %k1 |
| ; CHECK-NEXT: vcvtdq2ph %ymm0, %xmm0 {%k1} {z} |
| ; CHECK-NEXT: vzeroupper |
| ; CHECK-NEXT: retq |
| %mask = bitcast i8 %x2 to <8 x i1> |
| %res0 = sitofp <8 x i32> %x0 to <8 x half> |
| %res = select <8 x i1> %mask, <8 x half> %res0, <8 x half> zeroinitializer |
| ret <8 x half> %res |
| } |
| |
| define <8 x half> @sint_to_fp_8i32_to_8f16(<8 x i32> %x) { |
| ; CHECK-LABEL: sint_to_fp_8i32_to_8f16: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vcvtdq2ph %ymm0, %xmm0 |
| ; CHECK-NEXT: vzeroupper |
| ; CHECK-NEXT: retq |
| %res = sitofp <8 x i32> %x to <8 x half> |
| ret <8 x half> %res |
| } |
| |
| declare <8 x half> @llvm.x86.avx512fp16.mask.vcvtdq2ph.128(<4 x i32>, <8 x half>, i8) |
| |
| define <8 x half> @test_int_x86_avx512_mask_cvt_dq2ph_128(<4 x i32> %x0, <8 x half> %x1, i8 %x2) { |
| ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_dq2ph_128: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: kmovd %edi, %k1 |
| ; CHECK-NEXT: vcvtdq2ph %xmm0, %xmm1 {%k1} |
| ; CHECK-NEXT: vmovaps %xmm1, %xmm0 |
| ; CHECK-NEXT: retq |
| %res = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtdq2ph.128(<4 x i32> %x0, <8 x half> %x1, i8 %x2) |
| ret <8 x half> %res |
| } |
| |
| define <8 x half> @test_int_x86_avx512_mask_cvt_dq2ph_128_nomask(<4 x i32> %x0, <8 x half> %x1) { |
| ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_dq2ph_128_nomask: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vcvtdq2ph %xmm0, %xmm0 |
| ; CHECK-NEXT: retq |
| %res = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtdq2ph.128(<4 x i32> %x0, <8 x half> %x1, i8 -1) |
| ret <8 x half> %res |
| } |
| |
| define <8 x half> @test_int_x86_avx512_mask_cvt_dq2ph_128_z(<4 x i32> %x0, i8 %x2) { |
| ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_dq2ph_128_z: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: kmovd %edi, %k1 |
| ; CHECK-NEXT: vcvtdq2ph %xmm0, %xmm0 {%k1} {z} |
| ; CHECK-NEXT: retq |
| %res = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtdq2ph.128(<4 x i32> %x0, <8 x half> zeroinitializer, i8 %x2) |
| ret <8 x half> %res |
| } |
| |
| define <4 x half> @sint_to_fp_4i32_to_4f16(<4 x i32> %x) { |
| ; CHECK-LABEL: sint_to_fp_4i32_to_4f16: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vcvtdq2ph %xmm0, %xmm0 |
| ; CHECK-NEXT: retq |
| %res = sitofp <4 x i32> %x to <4 x half> |
| ret <4 x half> %res |
| } |
| |
| define <2 x half> @sint_to_fp_2i32_to_2f16(<2 x i32> %x) { |
| ; CHECK-LABEL: sint_to_fp_2i32_to_2f16: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vcvtdq2ph %xmm0, %xmm0 |
| ; CHECK-NEXT: retq |
| %res = sitofp <2 x i32> %x to <2 x half> |
| ret <2 x half> %res |
| } |
| |
| define <4 x i32> @fp_to_sint_4f16_to_4i32(<4 x half> %x) { |
| ; CHECK-LABEL: fp_to_sint_4f16_to_4i32: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vcvttph2dq %xmm0, %xmm0 |
| ; CHECK-NEXT: retq |
| %res = fptosi <4 x half> %x to <4 x i32> |
| ret <4 x i32> %res |
| } |
| |
| define <2 x i32> @fp_to_sint_2f16_to_2i32(<2 x half> %x) { |
| ; CHECK-LABEL: fp_to_sint_2f16_to_2i32: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vcvttph2dq %xmm0, %xmm0 |
| ; CHECK-NEXT: retq |
| %res = fptosi <2 x half> %x to <2 x i32> |
| ret <2 x i32> %res |
| } |
| |
| define <2 x i16> @fp_to_sint_2f16_to_2i16(<2 x half> %x) { |
| ; CHECK-LABEL: fp_to_sint_2f16_to_2i16: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vcvttph2w %xmm0, %xmm0 |
| ; CHECK-NEXT: retq |
| %res = fptosi <2 x half> %x to <2 x i16> |
| ret <2 x i16> %res |
| } |
| |
| define <8 x half> @test_int_x86_avx512_mask_cvt_udq2ph_256(<8 x i32> %x0, <8 x half> %x1, i8 %x2) { |
| ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_udq2ph_256: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: kmovd %edi, %k1 |
| ; CHECK-NEXT: vcvtudq2ph %ymm0, %xmm1 {%k1} |
| ; CHECK-NEXT: vmovaps %xmm1, %xmm0 |
| ; CHECK-NEXT: vzeroupper |
| ; CHECK-NEXT: retq |
| %mask = bitcast i8 %x2 to <8 x i1> |
| %res0 = uitofp <8 x i32> %x0 to <8 x half> |
| %res = select <8 x i1> %mask, <8 x half> %res0, <8 x half> %x1 |
| ret <8 x half> %res |
| } |
| |
| define <8 x half> @test_int_x86_avx512_mask_cvt_udq2ph_256_z(<8 x i32> %x0, i8 %x2) { |
| ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_udq2ph_256_z: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: kmovd %edi, %k1 |
| ; CHECK-NEXT: vcvtudq2ph %ymm0, %xmm0 {%k1} {z} |
| ; CHECK-NEXT: vzeroupper |
| ; CHECK-NEXT: retq |
| %mask = bitcast i8 %x2 to <8 x i1> |
| %res0 = uitofp <8 x i32> %x0 to <8 x half> |
| %res = select <8 x i1> %mask, <8 x half> %res0, <8 x half> zeroinitializer |
| ret <8 x half> %res |
| } |
| |
| define <8 x half> @uint_to_fp_8i32_to_8f16(<8 x i32> %x) { |
| ; CHECK-LABEL: uint_to_fp_8i32_to_8f16: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vcvtudq2ph %ymm0, %xmm0 |
| ; CHECK-NEXT: vzeroupper |
| ; CHECK-NEXT: retq |
| %res = uitofp <8 x i32> %x to <8 x half> |
| ret <8 x half> %res |
| } |
| |
| define <8 x i32> @fp_to_uint_8f16_to_8i32(<8 x half> %x) { |
| ; CHECK-LABEL: fp_to_uint_8f16_to_8i32: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vcvttph2udq %xmm0, %ymm0 |
| ; CHECK-NEXT: retq |
| %res = fptoui <8 x half> %x to <8 x i32> |
| ret <8 x i32> %res |
| } |
| |
| declare <8 x half> @llvm.x86.avx512fp16.mask.vcvtudq2ph.128(<4 x i32>, <8 x half>, i8) |
| |
| define <8 x half> @test_int_x86_avx512_mask_cvt_udq2ph_128(<4 x i32> %x0, <8 x half> %x1, i8 %x2) { |
| ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_udq2ph_128: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: kmovd %edi, %k1 |
| ; CHECK-NEXT: vcvtudq2ph %xmm0, %xmm1 {%k1} |
| ; CHECK-NEXT: vmovaps %xmm1, %xmm0 |
| ; CHECK-NEXT: retq |
| %res = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtudq2ph.128(<4 x i32> %x0, <8 x half> %x1, i8 %x2) |
| ret <8 x half> %res |
| } |
| |
| define <8 x half> @test_int_x86_avx512_mask_cvt_udq2ph_128_nomask(<4 x i32> %x0, <8 x half> %x1) { |
| ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_udq2ph_128_nomask: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vcvtudq2ph %xmm0, %xmm0 |
| ; CHECK-NEXT: retq |
| %res = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtudq2ph.128(<4 x i32> %x0, <8 x half> %x1, i8 -1) |
| ret <8 x half> %res |
| } |
| |
| define <8 x half> @test_int_x86_avx512_mask_cvt_udq2ph_128_z(<4 x i32> %x0, i8 %x2) { |
| ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_udq2ph_128_z: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: kmovd %edi, %k1 |
| ; CHECK-NEXT: vcvtudq2ph %xmm0, %xmm0 {%k1} {z} |
| ; CHECK-NEXT: retq |
| %res = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtudq2ph.128(<4 x i32> %x0, <8 x half> zeroinitializer, i8 %x2) |
| ret <8 x half> %res |
| } |
| |
| define <4 x half> @uint_to_fp_4i32_to_4f16(<4 x i32> %x) { |
| ; CHECK-LABEL: uint_to_fp_4i32_to_4f16: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vcvtudq2ph %xmm0, %xmm0 |
| ; CHECK-NEXT: retq |
| %res = uitofp <4 x i32> %x to <4 x half> |
| ret <4 x half> %res |
| } |
| |
| define <2 x half> @uint_to_fp_2i32_to_2f16(<2 x i32> %x) { |
| ; CHECK-LABEL: uint_to_fp_2i32_to_2f16: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vcvtudq2ph %xmm0, %xmm0 |
| ; CHECK-NEXT: retq |
| %res = uitofp <2 x i32> %x to <2 x half> |
| ret <2 x half> %res |
| } |
| |
| define <4 x i32> @fp_to_uint_4f16_to_4i32(<4 x half> %x) { |
| ; CHECK-LABEL: fp_to_uint_4f16_to_4i32: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vcvttph2udq %xmm0, %xmm0 |
| ; CHECK-NEXT: retq |
| %res = fptoui <4 x half> %x to <4 x i32> |
| ret <4 x i32> %res |
| } |
| |
| define <2 x i32> @fp_to_uint_2f16_to_2i32(<2 x half> %x) { |
| ; CHECK-LABEL: fp_to_uint_2f16_to_2i32: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vcvttph2udq %xmm0, %xmm0 |
| ; CHECK-NEXT: retq |
| %res = fptoui <2 x half> %x to <2 x i32> |
| ret <2 x i32> %res |
| } |
| |
| define <2 x i16> @fp_to_uint_2f16_to_2i16(<2 x half> %x) { |
| ; CHECK-LABEL: fp_to_uint_2f16_to_2i16: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vcvttph2uw %xmm0, %xmm0 |
| ; CHECK-NEXT: retq |
| %res = fptoui <2 x half> %x to <2 x i16> |
| ret <2 x i16> %res |
| } |
| |
| declare <4 x i32> @llvm.x86.avx512fp16.mask.vcvtph2dq.128(<8 x half>, <4 x i32>, i8) |
| |
| define <4 x i32> @test_int_x86_avx512_cvt_ph2dq_128(<8 x half> %x0) { |
| ; CHECK-LABEL: test_int_x86_avx512_cvt_ph2dq_128: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vcvtph2dq %xmm0, %xmm0 |
| ; CHECK-NEXT: retq |
| %res = call <4 x i32> @llvm.x86.avx512fp16.mask.vcvtph2dq.128(<8 x half> %x0, <4 x i32> undef, i8 -1) |
| ret <4 x i32> %res |
| } |
| |
| define <4 x i32> @test_int_x86_avx512_mask_cvt_ph2dq_128(<8 x half> %x0, <4 x i32> %x1, i8 %x2) { |
| ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_ph2dq_128: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: kmovd %edi, %k1 |
| ; CHECK-NEXT: vcvtph2dq %xmm0, %xmm1 {%k1} |
| ; CHECK-NEXT: vmovaps %xmm1, %xmm0 |
| ; CHECK-NEXT: retq |
| %res = call <4 x i32> @llvm.x86.avx512fp16.mask.vcvtph2dq.128(<8 x half> %x0, <4 x i32> %x1, i8 %x2) |
| ret <4 x i32> %res |
| } |
| |
| define <4 x i32> @test_int_x86_avx512_maskz_cvt_ph2dq_128(<8 x half> %x0, i8 %x2) { |
| ; CHECK-LABEL: test_int_x86_avx512_maskz_cvt_ph2dq_128: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: kmovd %edi, %k1 |
| ; CHECK-NEXT: vcvtph2dq %xmm0, %xmm0 {%k1} {z} |
| ; CHECK-NEXT: retq |
| %res = call <4 x i32> @llvm.x86.avx512fp16.mask.vcvtph2dq.128(<8 x half> %x0, <4 x i32> zeroinitializer, i8 %x2) |
| ret <4 x i32> %res |
| } |
| |
| declare <8 x i32> @llvm.x86.avx512fp16.mask.vcvtph2dq.256(<8 x half>, <8 x i32>, i8) |
| |
| define <8 x i32> @test_int_x86_avx512_cvt_ph2dq_256(<8 x half> %x0) { |
| ; CHECK-LABEL: test_int_x86_avx512_cvt_ph2dq_256: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vcvtph2dq %xmm0, %ymm0 |
| ; CHECK-NEXT: retq |
| %res = call <8 x i32> @llvm.x86.avx512fp16.mask.vcvtph2dq.256(<8 x half> %x0, <8 x i32> undef, i8 -1) |
| ret <8 x i32> %res |
| } |
| |
| define <8 x i32> @test_int_x86_avx512_mask_cvt_ph2dq_256(<8 x half> %x0, <8 x i32> %x1, i8 %x2) { |
| ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_ph2dq_256: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: kmovd %edi, %k1 |
| ; CHECK-NEXT: vcvtph2dq %xmm0, %ymm1 {%k1} |
| ; CHECK-NEXT: vmovaps %ymm1, %ymm0 |
| ; CHECK-NEXT: retq |
| %res = call <8 x i32> @llvm.x86.avx512fp16.mask.vcvtph2dq.256(<8 x half> %x0, <8 x i32> %x1, i8 %x2) |
| ret <8 x i32> %res |
| } |
| |
| define <8 x i32> @test_int_x86_avx512_maskz_cvt_ph2dq_256(<8 x half> %x0, i8 %x2) { |
| ; CHECK-LABEL: test_int_x86_avx512_maskz_cvt_ph2dq_256: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: kmovd %edi, %k1 |
| ; CHECK-NEXT: vcvtph2dq %xmm0, %ymm0 {%k1} {z} |
| ; CHECK-NEXT: retq |
| %res = call <8 x i32> @llvm.x86.avx512fp16.mask.vcvtph2dq.256(<8 x half> %x0, <8 x i32> zeroinitializer, i8 %x2) |
| ret <8 x i32> %res |
| } |
| |
| declare <4 x i32> @llvm.x86.avx512fp16.mask.vcvtph2udq.128(<8 x half>, <4 x i32>, i8) |
| |
| define <4 x i32> @test_int_x86_avx512_cvt_ph2udq_128(<8 x half> %x0) { |
| ; CHECK-LABEL: test_int_x86_avx512_cvt_ph2udq_128: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vcvtph2udq %xmm0, %xmm0 |
| ; CHECK-NEXT: retq |
| %res = call <4 x i32> @llvm.x86.avx512fp16.mask.vcvtph2udq.128(<8 x half> %x0, <4 x i32> undef, i8 -1) |
| ret <4 x i32> %res |
| } |
| |
| define <4 x i32> @test_int_x86_avx512_mask_cvt_ph2udq_128(<8 x half> %x0, <4 x i32> %x1, i8 %x2) { |
| ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_ph2udq_128: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: kmovd %edi, %k1 |
| ; CHECK-NEXT: vcvtph2udq %xmm0, %xmm1 {%k1} |
| ; CHECK-NEXT: vmovaps %xmm1, %xmm0 |
| ; CHECK-NEXT: retq |
| %res = call <4 x i32> @llvm.x86.avx512fp16.mask.vcvtph2udq.128(<8 x half> %x0, <4 x i32> %x1, i8 %x2) |
| ret <4 x i32> %res |
| } |
| |
| define <4 x i32> @test_int_x86_avx512_maskz_cvt_ph2udq_128(<8 x half> %x0, i8 %x2) { |
| ; CHECK-LABEL: test_int_x86_avx512_maskz_cvt_ph2udq_128: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: kmovd %edi, %k1 |
| ; CHECK-NEXT: vcvtph2udq %xmm0, %xmm0 {%k1} {z} |
| ; CHECK-NEXT: retq |
| %res = call <4 x i32> @llvm.x86.avx512fp16.mask.vcvtph2udq.128(<8 x half> %x0, <4 x i32> zeroinitializer, i8 %x2) |
| ret <4 x i32> %res |
| } |
| |
| declare <8 x i32> @llvm.x86.avx512fp16.mask.vcvtph2udq.256(<8 x half>, <8 x i32>, i8) |
| |
| define <8 x i32> @test_int_x86_avx512_cvt_ph2udq_256(<8 x half> %x0) { |
| ; CHECK-LABEL: test_int_x86_avx512_cvt_ph2udq_256: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vcvtph2udq %xmm0, %ymm0 |
| ; CHECK-NEXT: retq |
| %res = call <8 x i32> @llvm.x86.avx512fp16.mask.vcvtph2udq.256(<8 x half> %x0, <8 x i32> undef, i8 -1) |
| ret <8 x i32> %res |
| } |
| |
| define <8 x i32> @test_int_x86_avx512_mask_cvt_ph2udq_256(<8 x half> %x0, <8 x i32> %x1, i8 %x2) { |
| ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_ph2udq_256: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: kmovd %edi, %k1 |
| ; CHECK-NEXT: vcvtph2udq %xmm0, %ymm1 {%k1} |
| ; CHECK-NEXT: vmovaps %ymm1, %ymm0 |
| ; CHECK-NEXT: retq |
| %res = call <8 x i32> @llvm.x86.avx512fp16.mask.vcvtph2udq.256(<8 x half> %x0, <8 x i32> %x1, i8 %x2) |
| ret <8 x i32> %res |
| } |
| |
| define <8 x i32> @test_int_x86_avx512_maskz_cvt_ph2udq_256(<8 x half> %x0, i8 %x2) { |
| ; CHECK-LABEL: test_int_x86_avx512_maskz_cvt_ph2udq_256: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: kmovd %edi, %k1 |
| ; CHECK-NEXT: vcvtph2udq %xmm0, %ymm0 {%k1} {z} |
| ; CHECK-NEXT: retq |
| %res = call <8 x i32> @llvm.x86.avx512fp16.mask.vcvtph2udq.256(<8 x half> %x0, <8 x i32> zeroinitializer, i8 %x2) |
| ret <8 x i32> %res |
| } |
| |
| declare <4 x i32> @llvm.x86.avx512fp16.mask.vcvttph2dq.128(<8 x half>, <4 x i32>, i8) |
| |
| define <4 x i32> @test_int_x86_avx512_cvtt_ph2dq_128(<8 x half> %x0) { |
| ; CHECK-LABEL: test_int_x86_avx512_cvtt_ph2dq_128: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vcvttph2dq %xmm0, %xmm0 |
| ; CHECK-NEXT: retq |
| %res = call <4 x i32> @llvm.x86.avx512fp16.mask.vcvttph2dq.128(<8 x half> %x0, <4 x i32> undef, i8 -1) |
| ret <4 x i32> %res |
| } |
| |
| define <4 x i32> @test_int_x86_avx512_mask_cvtt_ph2dq_128(<8 x half> %x0, <4 x i32> %x1, i8 %x2) { |
| ; CHECK-LABEL: test_int_x86_avx512_mask_cvtt_ph2dq_128: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: kmovd %edi, %k1 |
| ; CHECK-NEXT: vcvttph2dq %xmm0, %xmm1 {%k1} |
| ; CHECK-NEXT: vmovaps %xmm1, %xmm0 |
| ; CHECK-NEXT: retq |
| %res = call <4 x i32> @llvm.x86.avx512fp16.mask.vcvttph2dq.128(<8 x half> %x0, <4 x i32> %x1, i8 %x2) |
| ret <4 x i32> %res |
| } |
| |
| define <4 x i32> @test_int_x86_avx512_maskz_cvtt_ph2dq_128(<8 x half> %x0, i8 %x2) { |
| ; CHECK-LABEL: test_int_x86_avx512_maskz_cvtt_ph2dq_128: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: kmovd %edi, %k1 |
| ; CHECK-NEXT: vcvttph2dq %xmm0, %xmm0 {%k1} {z} |
| ; CHECK-NEXT: retq |
| %res = call <4 x i32> @llvm.x86.avx512fp16.mask.vcvttph2dq.128(<8 x half> %x0, <4 x i32> zeroinitializer, i8 %x2) |
| ret <4 x i32> %res |
| } |
| |
| declare <8 x i32> @llvm.x86.avx512fp16.mask.vcvttph2dq.256(<8 x half>, <8 x i32>, i8) |
| |
| define <8 x i32> @test_int_x86_avx512_cvtt_ph2dq_256(<8 x half> %x0) { |
| ; CHECK-LABEL: test_int_x86_avx512_cvtt_ph2dq_256: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vcvttph2dq %xmm0, %ymm0 |
| ; CHECK-NEXT: retq |
| %res = call <8 x i32> @llvm.x86.avx512fp16.mask.vcvttph2dq.256(<8 x half> %x0, <8 x i32> undef, i8 -1) |
| ret <8 x i32> %res |
| } |
| |
| define <8 x i32> @test_int_x86_avx512_mask_cvtt_ph2dq_256(<8 x half> %x0, <8 x i32> %x1, i8 %x2) { |
| ; CHECK-LABEL: test_int_x86_avx512_mask_cvtt_ph2dq_256: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: kmovd %edi, %k1 |
| ; CHECK-NEXT: vcvttph2dq %xmm0, %ymm1 {%k1} |
| ; CHECK-NEXT: vmovaps %ymm1, %ymm0 |
| ; CHECK-NEXT: retq |
| %res = call <8 x i32> @llvm.x86.avx512fp16.mask.vcvttph2dq.256(<8 x half> %x0, <8 x i32> %x1, i8 %x2) |
| ret <8 x i32> %res |
| } |
| |
| define <8 x i32> @test_int_x86_avx512_maskz_cvtt_ph2dq_256(<8 x half> %x0, i8 %x2) { |
| ; CHECK-LABEL: test_int_x86_avx512_maskz_cvtt_ph2dq_256: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: kmovd %edi, %k1 |
| ; CHECK-NEXT: vcvttph2dq %xmm0, %ymm0 {%k1} {z} |
| ; CHECK-NEXT: retq |
| %res = call <8 x i32> @llvm.x86.avx512fp16.mask.vcvttph2dq.256(<8 x half> %x0, <8 x i32> zeroinitializer, i8 %x2) |
| ret <8 x i32> %res |
| } |
| |
| declare <4 x i32> @llvm.x86.avx512fp16.mask.vcvttph2udq.128(<8 x half>, <4 x i32>, i8) |
| |
| define <4 x i32> @test_int_x86_avx512_cvtt_ph2udq_128(<8 x half> %x0) { |
| ; CHECK-LABEL: test_int_x86_avx512_cvtt_ph2udq_128: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vcvttph2udq %xmm0, %xmm0 |
| ; CHECK-NEXT: retq |
| %res = call <4 x i32> @llvm.x86.avx512fp16.mask.vcvttph2udq.128(<8 x half> %x0, <4 x i32> undef, i8 -1) |
| ret <4 x i32> %res |
| } |
| |
| define <4 x i32> @test_int_x86_avx512_mask_cvtt_ph2udq_128(<8 x half> %x0, <4 x i32> %x1, i8 %x2) { |
| ; CHECK-LABEL: test_int_x86_avx512_mask_cvtt_ph2udq_128: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: kmovd %edi, %k1 |
| ; CHECK-NEXT: vcvttph2udq %xmm0, %xmm1 {%k1} |
| ; CHECK-NEXT: vmovaps %xmm1, %xmm0 |
| ; CHECK-NEXT: retq |
| %res = call <4 x i32> @llvm.x86.avx512fp16.mask.vcvttph2udq.128(<8 x half> %x0, <4 x i32> %x1, i8 %x2) |
| ret <4 x i32> %res |
| } |
| |
| define <4 x i32> @test_int_x86_avx512_maskz_cvtt_ph2udq_128(<8 x half> %x0, i8 %x2) { |
| ; CHECK-LABEL: test_int_x86_avx512_maskz_cvtt_ph2udq_128: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: kmovd %edi, %k1 |
| ; CHECK-NEXT: vcvttph2udq %xmm0, %xmm0 {%k1} {z} |
| ; CHECK-NEXT: retq |
| %res = call <4 x i32> @llvm.x86.avx512fp16.mask.vcvttph2udq.128(<8 x half> %x0, <4 x i32> zeroinitializer, i8 %x2) |
| ret <4 x i32> %res |
| } |
| |
| declare <8 x i32> @llvm.x86.avx512fp16.mask.vcvttph2udq.256(<8 x half>, <8 x i32>, i8) |
| |
| define <8 x i32> @test_int_x86_avx512_cvtt_ph2udq_256(<8 x half> %x0) { |
| ; CHECK-LABEL: test_int_x86_avx512_cvtt_ph2udq_256: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vcvttph2udq %xmm0, %ymm0 |
| ; CHECK-NEXT: retq |
| %res = call <8 x i32> @llvm.x86.avx512fp16.mask.vcvttph2udq.256(<8 x half> %x0, <8 x i32> undef, i8 -1) |
| ret <8 x i32> %res |
| } |
| |
| define <8 x i32> @test_int_x86_avx512_mask_cvtt_ph2udq_256(<8 x half> %x0, <8 x i32> %x1, i8 %x2) { |
| ; CHECK-LABEL: test_int_x86_avx512_mask_cvtt_ph2udq_256: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: kmovd %edi, %k1 |
| ; CHECK-NEXT: vcvttph2udq %xmm0, %ymm1 {%k1} |
| ; CHECK-NEXT: vmovaps %ymm1, %ymm0 |
| ; CHECK-NEXT: retq |
| %res = call <8 x i32> @llvm.x86.avx512fp16.mask.vcvttph2udq.256(<8 x half> %x0, <8 x i32> %x1, i8 %x2) |
| ret <8 x i32> %res |
| } |
| |
| define <8 x i32> @test_int_x86_avx512_maskz_cvtt_ph2udq_256(<8 x half> %x0, i8 %x2) { |
| ; CHECK-LABEL: test_int_x86_avx512_maskz_cvtt_ph2udq_256: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: kmovd %edi, %k1 |
| ; CHECK-NEXT: vcvttph2udq %xmm0, %ymm0 {%k1} {z} |
| ; CHECK-NEXT: retq |
| %res = call <8 x i32> @llvm.x86.avx512fp16.mask.vcvttph2udq.256(<8 x half> %x0, <8 x i32> zeroinitializer, i8 %x2) |
| ret <8 x i32> %res |
| } |
| |
| declare <4 x double> @llvm.x86.avx512fp16.mask.vcvtph2pd.256(<8 x half>, <4 x double>, i8) |
| |
| define <4 x double> @test_int_x86_avx512_mask_cvt_ph2pd_256(<8 x half> %x0, <4 x double> %x1, i8 %x2) { |
| ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_ph2pd_256: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: kmovd %edi, %k1 |
| ; CHECK-NEXT: vcvtph2pd %xmm0, %ymm1 {%k1} |
| ; CHECK-NEXT: vmovaps %ymm1, %ymm0 |
| ; CHECK-NEXT: retq |
| %res = call <4 x double> @llvm.x86.avx512fp16.mask.vcvtph2pd.256(<8 x half> %x0, <4 x double> %x1, i8 %x2) |
| ret <4 x double> %res |
| } |
| |
| define <4 x double> @test_int_x86_avx512_mask_cvt_ph2pd_256_nomask(<8 x half> %x0, <4 x double> %x1) { |
| ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_ph2pd_256_nomask: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vcvtph2pd %xmm0, %ymm0 |
| ; CHECK-NEXT: retq |
| %res = call <4 x double> @llvm.x86.avx512fp16.mask.vcvtph2pd.256(<8 x half> %x0, <4 x double> %x1, i8 -1) |
| ret <4 x double> %res |
| } |
| |
| declare <2 x double> @llvm.x86.avx512fp16.mask.vcvtph2pd.128(<8 x half>, <2 x double>, i8) |
| |
| define <2 x double> @test_int_x86_avx512_mask_cvt_ph2pd_128(<8 x half> %x0, <2 x double> %x1, i8 %x2) { |
| ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_ph2pd_128: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: kmovd %edi, %k1 |
| ; CHECK-NEXT: vcvtph2pd %xmm0, %xmm1 {%k1} |
| ; CHECK-NEXT: vmovaps %xmm1, %xmm0 |
| ; CHECK-NEXT: retq |
| %res = call <2 x double> @llvm.x86.avx512fp16.mask.vcvtph2pd.128(<8 x half> %x0, <2 x double> %x1, i8 %x2) |
| ret <2 x double> %res |
| } |
| |
| define <2 x double> @test_int_x86_avx512_mask_cvt_ph2pd_128_nomask(<8 x half> %x0, <2 x double> %x1) { |
| ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_ph2pd_128_nomask: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vcvtph2pd %xmm0, %xmm0 |
| ; CHECK-NEXT: retq |
| %res = call <2 x double> @llvm.x86.avx512fp16.mask.vcvtph2pd.128(<8 x half> %x0, <2 x double> %x1, i8 -1) |
| ret <2 x double> %res |
| } |
| |
| declare <8 x half> @llvm.x86.avx512fp16.mask.vcvtpd2ph.256(<4 x double>, <8 x half>, i8) |
| |
| define <8 x half> @test_int_x86_avx512_mask_cvt_pd2ph_256(<4 x double> %x0, <8 x half> %x1, i8 %x2) { |
| ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_pd2ph_256: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: kmovd %edi, %k1 |
| ; CHECK-NEXT: vcvtpd2ph %ymm0, %xmm1 {%k1} |
| ; CHECK-NEXT: vmovaps %xmm1, %xmm0 |
| ; CHECK-NEXT: vzeroupper |
| ; CHECK-NEXT: retq |
| %res = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtpd2ph.256(<4 x double> %x0, <8 x half> %x1, i8 %x2) |
| ret <8 x half> %res |
| } |
| |
| define <8 x half> @test_int_x86_avx512_mask_cvt_pd2ph_256_load(<4 x double>* %px0, <8 x half> %x1, i8 %x2) { |
| ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_pd2ph_256_load: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: kmovd %esi, %k1 |
| ; CHECK-NEXT: vcvtpd2phy (%rdi), %xmm0 {%k1} |
| ; CHECK-NEXT: retq |
| %x0 = load <4 x double>, <4 x double>* %px0, align 32 |
| %res = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtpd2ph.256(<4 x double> %x0, <8 x half> %x1, i8 %x2) |
| ret <8 x half> %res |
| } |
| |
| declare <8 x half> @llvm.x86.avx512fp16.mask.vcvtpd2ph.128(<2 x double>, <8 x half>, i8) |
| |
| define <8 x half> @test_int_x86_avx512_mask_cvt_pd2ph_128(<2 x double> %x0, <8 x half> %x1, i8 %x2) { |
| ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_pd2ph_128: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: kmovd %edi, %k1 |
| ; CHECK-NEXT: vcvtpd2ph %xmm0, %xmm1 {%k1} |
| ; CHECK-NEXT: vmovaps %xmm1, %xmm0 |
| ; CHECK-NEXT: retq |
| %res = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtpd2ph.128(<2 x double> %x0, <8 x half> %x1, i8 %x2) |
| ret <8 x half> %res |
| } |
| |
| define <8 x half> @test_int_x86_avx512_mask_cvt_pd2ph_128_load(<2 x double>* %px0, <8 x half> %x1, i8 %x2) { |
| ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_pd2ph_128_load: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: kmovd %esi, %k1 |
| ; CHECK-NEXT: vcvtpd2phx (%rdi), %xmm0 {%k1} |
| ; CHECK-NEXT: retq |
| %x0 = load <2 x double>, <2 x double>* %px0, align 16 |
| %res = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtpd2ph.128(<2 x double> %x0, <8 x half> %x1, i8 %x2) |
| ret <8 x half> %res |
| } |
| |
| declare <8 x half> @llvm.x86.avx512fp16.mask.vcvtqq2ph.256(<4 x i64>, <8 x half>, i8) |
| |
| define <8 x half> @test_int_x86_avx512_mask_cvt_qq2ph_256(<4 x i64> %x0, <8 x half> %x1, i8 %x2) { |
| ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_qq2ph_256: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: kmovd %edi, %k1 |
| ; CHECK-NEXT: vcvtqq2ph %ymm0, %xmm1 {%k1} |
| ; CHECK-NEXT: vmovaps %xmm1, %xmm0 |
| ; CHECK-NEXT: vzeroupper |
| ; CHECK-NEXT: retq |
| %res = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtqq2ph.256(<4 x i64> %x0, <8 x half> %x1, i8 %x2) |
| ret <8 x half> %res |
| } |
| |
| define <8 x half> @test_int_x86_avx512_mask_cvt_qq2ph_256_nomask(<4 x i64> %x0, <8 x half> %x1) { |
| ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_qq2ph_256_nomask: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vcvtqq2ph %ymm0, %xmm0 |
| ; CHECK-NEXT: vzeroupper |
| ; CHECK-NEXT: retq |
| %res = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtqq2ph.256(<4 x i64> %x0, <8 x half> %x1, i8 -1) |
| ret <8 x half> %res |
| } |
| |
| define <8 x half> @test_int_x86_avx512_mask_cvt_qq2ph_256_z(<4 x i64> %x0, i8 %x2) { |
| ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_qq2ph_256_z: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: kmovd %edi, %k1 |
| ; CHECK-NEXT: vcvtqq2ph %ymm0, %xmm0 {%k1} {z} |
| ; CHECK-NEXT: vzeroupper |
| ; CHECK-NEXT: retq |
| %res = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtqq2ph.256(<4 x i64> %x0, <8 x half> zeroinitializer, i8 %x2) |
| ret <8 x half> %res |
| } |
| |
| define <4 x half> @sint_to_fp_4i64_to_4f16(<4 x i64> %x) { |
| ; CHECK-LABEL: sint_to_fp_4i64_to_4f16: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vcvtqq2ph %ymm0, %xmm0 |
| ; CHECK-NEXT: vzeroupper |
| ; CHECK-NEXT: retq |
| %res = sitofp <4 x i64> %x to <4 x half> |
| ret <4 x half> %res |
| } |
| |
| define <4 x i64> @fp_to_sint_4f16_to_4i64(<4 x half> %x) { |
| ; CHECK-LABEL: fp_to_sint_4f16_to_4i64: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vcvttph2qq %xmm0, %ymm0 |
| ; CHECK-NEXT: retq |
| %res = fptosi <4 x half> %x to <4 x i64> |
| ret <4 x i64> %res |
| } |
| |
| declare <8 x half> @llvm.x86.avx512fp16.mask.vcvtqq2ph.128(<2 x i64>, <8 x half>, i8) |
| |
| define <8 x half> @test_int_x86_avx512_mask_cvt_qq2ph_128(<2 x i64> %x0, <8 x half> %x1, i8 %x2) { |
| ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_qq2ph_128: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: kmovd %edi, %k1 |
| ; CHECK-NEXT: vcvtqq2ph %xmm0, %xmm1 {%k1} |
| ; CHECK-NEXT: vmovaps %xmm1, %xmm0 |
| ; CHECK-NEXT: retq |
| %res = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtqq2ph.128(<2 x i64> %x0, <8 x half> %x1, i8 %x2) |
| ret <8 x half> %res |
| } |
| |
| define <8 x half> @test_int_x86_avx512_mask_cvt_qq2ph_128_nomask(<2 x i64> %x0, <8 x half> %x1) { |
| ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_qq2ph_128_nomask: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vcvtqq2ph %xmm0, %xmm0 |
| ; CHECK-NEXT: retq |
| %res = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtqq2ph.128(<2 x i64> %x0, <8 x half> %x1, i8 -1) |
| ret <8 x half> %res |
| } |
| |
| define <8 x half> @test_int_x86_avx512_mask_cvt_qq2ph_128_z(<2 x i64> %x0, i8 %x2) { |
| ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_qq2ph_128_z: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: kmovd %edi, %k1 |
| ; CHECK-NEXT: vcvtqq2ph %xmm0, %xmm0 {%k1} {z} |
| ; CHECK-NEXT: retq |
| %res = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtqq2ph.128(<2 x i64> %x0, <8 x half> zeroinitializer, i8 %x2) |
| ret <8 x half> %res |
| } |
| |
| define <2 x half> @sint_to_fp_2i64_to_2f16(<2 x i64> %x) { |
| ; CHECK-LABEL: sint_to_fp_2i64_to_2f16: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vcvtqq2ph %xmm0, %xmm0 |
| ; CHECK-NEXT: retq |
| %res = sitofp <2 x i64> %x to <2 x half> |
| ret <2 x half> %res |
| } |
| |
| define <2 x i64> @fp_to_sint_2f16_to_2i64(<2 x half> %x) { |
| ; CHECK-LABEL: fp_to_sint_2f16_to_2i64: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vcvttph2qq %xmm0, %xmm0 |
| ; CHECK-NEXT: retq |
| %res = fptosi <2 x half> %x to <2 x i64> |
| ret <2 x i64> %res |
| } |
| |
| declare <8 x half> @llvm.x86.avx512fp16.mask.vcvtuqq2ph.256(<4 x i64>, <8 x half>, i8) |
| |
| define <8 x half> @test_int_x86_avx512_mask_cvt_uqq2ph_256(<4 x i64> %x0, <8 x half> %x1, i8 %x2) { |
| ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_uqq2ph_256: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: kmovd %edi, %k1 |
| ; CHECK-NEXT: vcvtuqq2ph %ymm0, %xmm1 {%k1} |
| ; CHECK-NEXT: vmovaps %xmm1, %xmm0 |
| ; CHECK-NEXT: vzeroupper |
| ; CHECK-NEXT: retq |
| %res = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtuqq2ph.256(<4 x i64> %x0, <8 x half> %x1, i8 %x2) |
| ret <8 x half> %res |
| } |
| |
| define <8 x half> @test_int_x86_avx512_mask_cvt_uqq2ph_256_nomask(<4 x i64> %x0, <8 x half> %x1) { |
| ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_uqq2ph_256_nomask: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vcvtuqq2ph %ymm0, %xmm0 |
| ; CHECK-NEXT: vzeroupper |
| ; CHECK-NEXT: retq |
| %res = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtuqq2ph.256(<4 x i64> %x0, <8 x half> %x1, i8 -1) |
| ret <8 x half> %res |
| } |
| |
| define <8 x half> @test_int_x86_avx512_mask_cvt_uqq2ph_256_z(<4 x i64> %x0, i8 %x2) { |
| ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_uqq2ph_256_z: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: kmovd %edi, %k1 |
| ; CHECK-NEXT: vcvtuqq2ph %ymm0, %xmm0 {%k1} {z} |
| ; CHECK-NEXT: vzeroupper |
| ; CHECK-NEXT: retq |
| %res = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtuqq2ph.256(<4 x i64> %x0, <8 x half> zeroinitializer, i8 %x2) |
| ret <8 x half> %res |
| } |
| |
| define <4 x half> @uint_to_fp_4i64_to_4f16(<4 x i64> %x) { |
| ; CHECK-LABEL: uint_to_fp_4i64_to_4f16: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vcvtuqq2ph %ymm0, %xmm0 |
| ; CHECK-NEXT: vzeroupper |
| ; CHECK-NEXT: retq |
| %res = uitofp <4 x i64> %x to <4 x half> |
| ret <4 x half> %res |
| } |
| |
| define <4 x i64> @fp_to_uint_4f16_to_4i64(<4 x half> %x) { |
| ; CHECK-LABEL: fp_to_uint_4f16_to_4i64: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vcvttph2uqq %xmm0, %ymm0 |
| ; CHECK-NEXT: retq |
| %res = fptoui <4 x half> %x to <4 x i64> |
| ret <4 x i64> %res |
| } |
| |
| declare <8 x half> @llvm.x86.avx512fp16.mask.vcvtuqq2ph.128(<2 x i64>, <8 x half>, i8) |
| |
| define <8 x half> @test_int_x86_avx512_mask_cvt_uqq2ph_128(<2 x i64> %x0, <8 x half> %x1, i8 %x2) { |
| ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_uqq2ph_128: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: kmovd %edi, %k1 |
| ; CHECK-NEXT: vcvtuqq2ph %xmm0, %xmm1 {%k1} |
| ; CHECK-NEXT: vmovaps %xmm1, %xmm0 |
| ; CHECK-NEXT: retq |
| %res = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtuqq2ph.128(<2 x i64> %x0, <8 x half> %x1, i8 %x2) |
| ret <8 x half> %res |
| } |
| |
| define <8 x half> @test_int_x86_avx512_mask_cvt_uqq2ph_128_nomask(<2 x i64> %x0, <8 x half> %x1) { |
| ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_uqq2ph_128_nomask: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vcvtuqq2ph %xmm0, %xmm0 |
| ; CHECK-NEXT: retq |
| %res = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtuqq2ph.128(<2 x i64> %x0, <8 x half> %x1, i8 -1) |
| ret <8 x half> %res |
| } |
| |
| define <8 x half> @test_int_x86_avx512_mask_cvt_uqq2ph_128_z(<2 x i64> %x0, i8 %x2) { |
| ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_uqq2ph_128_z: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: kmovd %edi, %k1 |
| ; CHECK-NEXT: vcvtuqq2ph %xmm0, %xmm0 {%k1} {z} |
| ; CHECK-NEXT: retq |
| %res = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtuqq2ph.128(<2 x i64> %x0, <8 x half> zeroinitializer, i8 %x2) |
| ret <8 x half> %res |
| } |
| |
| define <2 x half> @uint_to_fp_2i64_to_2f16(<2 x i64> %x) { |
| ; CHECK-LABEL: uint_to_fp_2i64_to_2f16: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vcvtuqq2ph %xmm0, %xmm0 |
| ; CHECK-NEXT: retq |
| %res = uitofp <2 x i64> %x to <2 x half> |
| ret <2 x half> %res |
| } |
| |
| define <2 x i64> @fp_to_uint_2f16_to_2i64(<2 x half> %x) { |
| ; CHECK-LABEL: fp_to_uint_2f16_to_2i64: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vcvttph2uqq %xmm0, %xmm0 |
| ; CHECK-NEXT: retq |
| %res = fptoui <2 x half> %x to <2 x i64> |
| ret <2 x i64> %res |
| } |
| |
| declare <2 x i64> @llvm.x86.avx512fp16.mask.vcvttph2qq.128(<8 x half>, <2 x i64>, i8) |
| |
| define <2 x i64> @test_int_x86_avx512_cvtt_ph2qq_128(<8 x half> %x0) { |
| ; CHECK-LABEL: test_int_x86_avx512_cvtt_ph2qq_128: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vcvttph2qq %xmm0, %xmm0 |
| ; CHECK-NEXT: retq |
| %res = call <2 x i64> @llvm.x86.avx512fp16.mask.vcvttph2qq.128(<8 x half> %x0, <2 x i64> undef, i8 -1) |
| ret <2 x i64> %res |
| } |
| |
| define <2 x i64> @test_int_x86_avx512_mask_cvtt_ph2qq_128(<8 x half> %x0, <2 x i64> %x1, i8 %x2) { |
| ; CHECK-LABEL: test_int_x86_avx512_mask_cvtt_ph2qq_128: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: kmovd %edi, %k1 |
| ; CHECK-NEXT: vcvttph2qq %xmm0, %xmm1 {%k1} |
| ; CHECK-NEXT: vmovaps %xmm1, %xmm0 |
| ; CHECK-NEXT: retq |
| %res = call <2 x i64> @llvm.x86.avx512fp16.mask.vcvttph2qq.128(<8 x half> %x0, <2 x i64> %x1, i8 %x2) |
| ret <2 x i64> %res |
| } |
| |
| define <2 x i64> @test_int_x86_avx512_maskz_cvtt_ph2qq_128(<8 x half> %x0, i8 %x2) { |
| ; CHECK-LABEL: test_int_x86_avx512_maskz_cvtt_ph2qq_128: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: kmovd %edi, %k1 |
| ; CHECK-NEXT: vcvttph2qq %xmm0, %xmm0 {%k1} {z} |
| ; CHECK-NEXT: retq |
| %res = call <2 x i64> @llvm.x86.avx512fp16.mask.vcvttph2qq.128(<8 x half> %x0, <2 x i64> zeroinitializer, i8 %x2) |
| ret <2 x i64> %res |
| } |
| |
| declare <4 x i64> @llvm.x86.avx512fp16.mask.vcvttph2qq.256(<8 x half>, <4 x i64>, i8) |
| |
| define <4 x i64> @test_int_x86_avx512_cvtt_ph2qq_256(<8 x half> %x0) { |
| ; CHECK-LABEL: test_int_x86_avx512_cvtt_ph2qq_256: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vcvttph2qq %xmm0, %ymm0 |
| ; CHECK-NEXT: retq |
| %res = call <4 x i64> @llvm.x86.avx512fp16.mask.vcvttph2qq.256(<8 x half> %x0, <4 x i64> undef, i8 -1) |
| ret <4 x i64> %res |
| } |
| |
| define <4 x i64> @test_int_x86_avx512_mask_cvtt_ph2qq_256(<8 x half> %x0, <4 x i64> %x1, i8 %x2) { |
| ; CHECK-LABEL: test_int_x86_avx512_mask_cvtt_ph2qq_256: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: kmovd %edi, %k1 |
| ; CHECK-NEXT: vcvttph2qq %xmm0, %ymm1 {%k1} |
| ; CHECK-NEXT: vmovaps %ymm1, %ymm0 |
| ; CHECK-NEXT: retq |
| %res = call <4 x i64> @llvm.x86.avx512fp16.mask.vcvttph2qq.256(<8 x half> %x0, <4 x i64> %x1, i8 %x2) |
| ret <4 x i64> %res |
| } |
| |
| define <4 x i64> @test_int_x86_avx512_maskz_cvtt_ph2qq_256(<8 x half> %x0, i8 %x2) { |
| ; CHECK-LABEL: test_int_x86_avx512_maskz_cvtt_ph2qq_256: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: kmovd %edi, %k1 |
| ; CHECK-NEXT: vcvttph2qq %xmm0, %ymm0 {%k1} {z} |
| ; CHECK-NEXT: retq |
| %res = call <4 x i64> @llvm.x86.avx512fp16.mask.vcvttph2qq.256(<8 x half> %x0, <4 x i64> zeroinitializer, i8 %x2) |
| ret <4 x i64> %res |
| } |
| |
| declare <2 x i64> @llvm.x86.avx512fp16.mask.vcvttph2uqq.128(<8 x half>, <2 x i64>, i8) |
| |
| define <2 x i64> @test_int_x86_avx512_cvtt_ph2uqq_128(<8 x half> %x0) { |
| ; CHECK-LABEL: test_int_x86_avx512_cvtt_ph2uqq_128: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vcvttph2uqq %xmm0, %xmm0 |
| ; CHECK-NEXT: retq |
| %res = call <2 x i64> @llvm.x86.avx512fp16.mask.vcvttph2uqq.128(<8 x half> %x0, <2 x i64> undef, i8 -1) |
| ret <2 x i64> %res |
| } |
| |
| define <2 x i64> @test_int_x86_avx512_mask_cvtt_ph2uqq_128(<8 x half> %x0, <2 x i64> %x1, i8 %x2) { |
| ; CHECK-LABEL: test_int_x86_avx512_mask_cvtt_ph2uqq_128: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: kmovd %edi, %k1 |
| ; CHECK-NEXT: vcvttph2uqq %xmm0, %xmm1 {%k1} |
| ; CHECK-NEXT: vmovaps %xmm1, %xmm0 |
| ; CHECK-NEXT: retq |
| %res = call <2 x i64> @llvm.x86.avx512fp16.mask.vcvttph2uqq.128(<8 x half> %x0, <2 x i64> %x1, i8 %x2) |
| ret <2 x i64> %res |
| } |
| |
| define <2 x i64> @test_int_x86_avx512_maskz_cvtt_ph2uqq_128(<8 x half> %x0, i8 %x2) { |
| ; CHECK-LABEL: test_int_x86_avx512_maskz_cvtt_ph2uqq_128: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: kmovd %edi, %k1 |
| ; CHECK-NEXT: vcvttph2uqq %xmm0, %xmm0 {%k1} {z} |
| ; CHECK-NEXT: retq |
| %res = call <2 x i64> @llvm.x86.avx512fp16.mask.vcvttph2uqq.128(<8 x half> %x0, <2 x i64> zeroinitializer, i8 %x2) |
| ret <2 x i64> %res |
| } |
| |
| declare <4 x i64> @llvm.x86.avx512fp16.mask.vcvttph2uqq.256(<8 x half>, <4 x i64>, i8) |
| |
| define <4 x i64> @test_int_x86_avx512_cvtt_ph2uqq_256(<8 x half> %x0) { |
| ; CHECK-LABEL: test_int_x86_avx512_cvtt_ph2uqq_256: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vcvttph2uqq %xmm0, %ymm0 |
| ; CHECK-NEXT: retq |
| %res = call <4 x i64> @llvm.x86.avx512fp16.mask.vcvttph2uqq.256(<8 x half> %x0, <4 x i64> undef, i8 -1) |
| ret <4 x i64> %res |
| } |
| |
| define <4 x i64> @test_int_x86_avx512_mask_cvtt_ph2uqq_256(<8 x half> %x0, <4 x i64> %x1, i8 %x2) { |
| ; CHECK-LABEL: test_int_x86_avx512_mask_cvtt_ph2uqq_256: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: kmovd %edi, %k1 |
| ; CHECK-NEXT: vcvttph2uqq %xmm0, %ymm1 {%k1} |
| ; CHECK-NEXT: vmovaps %ymm1, %ymm0 |
| ; CHECK-NEXT: retq |
| %res = call <4 x i64> @llvm.x86.avx512fp16.mask.vcvttph2uqq.256(<8 x half> %x0, <4 x i64> %x1, i8 %x2) |
| ret <4 x i64> %res |
| } |
| |
| define <4 x i64> @test_int_x86_avx512_maskz_cvtt_ph2uqq_256(<8 x half> %x0, i8 %x2) { |
| ; CHECK-LABEL: test_int_x86_avx512_maskz_cvtt_ph2uqq_256: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: kmovd %edi, %k1 |
| ; CHECK-NEXT: vcvttph2uqq %xmm0, %ymm0 {%k1} {z} |
| ; CHECK-NEXT: retq |
| %res = call <4 x i64> @llvm.x86.avx512fp16.mask.vcvttph2uqq.256(<8 x half> %x0, <4 x i64> zeroinitializer, i8 %x2) |
| ret <4 x i64> %res |
| } |
| |
| declare <8 x half> @llvm.sqrt.v8f16(<8 x half>) |
| declare <16 x half> @llvm.sqrt.v16f16(<16 x half>) |
| |
| define <8 x half> @test_sqrt_ph_128(<8 x half> %a0) { |
| ; CHECK-LABEL: test_sqrt_ph_128: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vsqrtph %xmm0, %xmm0 |
| ; CHECK-NEXT: retq |
| %1 = call <8 x half> @llvm.sqrt.v8f16(<8 x half> %a0) |
| ret <8 x half> %1 |
| } |
| |
| define <8 x half> @test_sqrt_ph_128_fast(<8 x half> %a0, <8 x half> %a1) { |
| ; CHECK-LABEL: test_sqrt_ph_128_fast: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vrsqrtph %xmm0, %xmm0 |
| ; CHECK-NEXT: vmulph %xmm0, %xmm1, %xmm0 |
| ; CHECK-NEXT: retq |
| %1 = call fast <8 x half> @llvm.sqrt.v8f16(<8 x half> %a0) |
| %2 = fdiv fast <8 x half> %a1, %1 |
| ret <8 x half> %2 |
| } |
| |
| define <8 x half> @test_mask_sqrt_ph_128(<8 x half> %a0, <8 x half> %passthru, i8 %mask) { |
| ; CHECK-LABEL: test_mask_sqrt_ph_128: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: kmovd %edi, %k1 |
| ; CHECK-NEXT: vsqrtph %xmm0, %xmm1 {%k1} |
| ; CHECK-NEXT: vmovaps %xmm1, %xmm0 |
| ; CHECK-NEXT: retq |
| %1 = call <8 x half> @llvm.sqrt.v8f16(<8 x half> %a0) |
| %2 = bitcast i8 %mask to <8 x i1> |
| %3 = select <8 x i1> %2, <8 x half> %1, <8 x half> %passthru |
| ret <8 x half> %3 |
| } |
| |
| define <8 x half> @test_maskz_sqrt_ph_128(<8 x half> %a0, i8 %mask) { |
| ; CHECK-LABEL: test_maskz_sqrt_ph_128: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: kmovd %edi, %k1 |
| ; CHECK-NEXT: vsqrtph %xmm0, %xmm0 {%k1} {z} |
| ; CHECK-NEXT: retq |
| %1 = call <8 x half> @llvm.sqrt.v8f16(<8 x half> %a0) |
| %2 = bitcast i8 %mask to <8 x i1> |
| %3 = select <8 x i1> %2, <8 x half> %1, <8 x half> zeroinitializer |
| ret <8 x half> %3 |
| } |
| |
| define <16 x half> @test_sqrt_ph_256(<16 x half> %a0) { |
| ; CHECK-LABEL: test_sqrt_ph_256: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vsqrtph %ymm0, %ymm0 |
| ; CHECK-NEXT: retq |
| %1 = call <16 x half> @llvm.sqrt.v16f16(<16 x half> %a0) |
| ret <16 x half> %1 |
| } |
| |
| define <16 x half> @test_sqrt_ph_256_fast(<16 x half> %a0, <16 x half> %a1) { |
| ; CHECK-LABEL: test_sqrt_ph_256_fast: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vrsqrtph %ymm0, %ymm0 |
| ; CHECK-NEXT: vmulph %ymm0, %ymm1, %ymm0 |
| ; CHECK-NEXT: retq |
| %1 = call fast <16 x half> @llvm.sqrt.v16f16(<16 x half> %a0) |
| %2 = fdiv fast <16 x half> %a1, %1 |
| ret <16 x half> %2 |
| } |
| |
| define <16 x half> @test_mask_sqrt_ph_256(<16 x half> %a0, <16 x half> %passthru, i16 %mask) { |
| ; CHECK-LABEL: test_mask_sqrt_ph_256: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: kmovd %edi, %k1 |
| ; CHECK-NEXT: vsqrtph %ymm0, %ymm1 {%k1} |
| ; CHECK-NEXT: vmovaps %ymm1, %ymm0 |
| ; CHECK-NEXT: retq |
| %1 = call <16 x half> @llvm.sqrt.v16f16(<16 x half> %a0) |
| %2 = bitcast i16 %mask to <16 x i1> |
| %3 = select <16 x i1> %2, <16 x half> %1, <16 x half> %passthru |
| ret <16 x half> %3 |
| } |
| |
| define <16 x half> @test_maskz_sqrt_ph_256(<16 x half> %a0, i16 %mask) { |
| ; CHECK-LABEL: test_maskz_sqrt_ph_256: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: kmovd %edi, %k1 |
| ; CHECK-NEXT: vsqrtph %ymm0, %ymm0 {%k1} {z} |
| ; CHECK-NEXT: retq |
| %1 = call <16 x half> @llvm.sqrt.v16f16(<16 x half> %a0) |
| %2 = bitcast i16 %mask to <16 x i1> |
| %3 = select <16 x i1> %2, <16 x half> %1, <16 x half> zeroinitializer |
| ret <16 x half> %3 |
| } |
| |
| declare <8 x half> @llvm.x86.avx512fp16.mask.rsqrt.ph.128(<8 x half>, <8 x half>, i8) |
| declare <16 x half> @llvm.x86.avx512fp16.mask.rsqrt.ph.256(<16 x half>, <16 x half>, i16) |
| |
| define <8 x half> @test_rsqrt_ph_128(<8 x half> %a0) { |
| ; CHECK-LABEL: test_rsqrt_ph_128: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vrsqrtph %xmm0, %xmm0 |
| ; CHECK-NEXT: retq |
| %res = call <8 x half> @llvm.x86.avx512fp16.mask.rsqrt.ph.128(<8 x half> %a0, <8 x half> zeroinitializer, i8 -1) |
| ret <8 x half> %res |
| } |
| |
| define <16 x half> @test_rsqrt_ph_256(<16 x half> %a0) { |
| ; CHECK-LABEL: test_rsqrt_ph_256: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vrsqrtph %ymm0, %ymm0 |
| ; CHECK-NEXT: retq |
| %res = call <16 x half> @llvm.x86.avx512fp16.mask.rsqrt.ph.256(<16 x half> %a0, <16 x half> zeroinitializer, i16 -1) |
| ret <16 x half> %res |
| } |
| |
| declare <8 x half> @llvm.x86.avx512fp16.mask.rcp.ph.128(<8 x half>, <8 x half>, i8) |
| declare <16 x half> @llvm.x86.avx512fp16.mask.rcp.ph.256(<16 x half>, <16 x half>, i16) |
| |
| define <8 x half> @test_rcp_ph_128(<8 x half> %a0, <8 x half> %a1, i8 %mask) { |
| ; CHECK-LABEL: test_rcp_ph_128: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: kmovd %edi, %k1 |
| ; CHECK-NEXT: vrcpph %xmm0, %xmm1 {%k1} |
| ; CHECK-NEXT: vmovaps %xmm1, %xmm0 |
| ; CHECK-NEXT: retq |
| %res = call <8 x half> @llvm.x86.avx512fp16.mask.rcp.ph.128(<8 x half> %a0, <8 x half> %a1, i8 %mask) |
| ret <8 x half> %res |
| } |
| |
| define <16 x half> @test_rcp_ph_256(<16 x half> %a0, <16 x half> %a1, i16 %mask) { |
| ; CHECK-LABEL: test_rcp_ph_256: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: kmovd %edi, %k1 |
| ; CHECK-NEXT: vrcpph %ymm0, %ymm1 {%k1} |
| ; CHECK-NEXT: vmovaps %ymm1, %ymm0 |
| ; CHECK-NEXT: retq |
| %res = call <16 x half> @llvm.x86.avx512fp16.mask.rcp.ph.256(<16 x half> %a0, <16 x half> %a1, i16 %mask) |
| ret <16 x half> %res |
| } |
| |
| declare <8 x half> @llvm.x86.avx512fp16.mask.reduce.ph.128(<8 x half>, i32, <8 x half>, i8) |
| declare <16 x half> @llvm.x86.avx512fp16.mask.reduce.ph.256(<16 x half>, i32, <16 x half>, i16) |
| |
| define <8 x half>@test_int_x86_avx512_mask_reduce_ph_128(<8 x half> %x0, <8 x half> %x2, i8 %x3) { |
| ; CHECK-LABEL: test_int_x86_avx512_mask_reduce_ph_128: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: kmovd %edi, %k1 |
| ; CHECK-NEXT: vreduceph $8, %xmm0, %xmm1 {%k1} |
| ; CHECK-NEXT: vreduceph $4, %xmm0, %xmm0 |
| ; CHECK-NEXT: vaddph %xmm0, %xmm1, %xmm0 |
| ; CHECK-NEXT: retq |
| %res = call <8 x half> @llvm.x86.avx512fp16.mask.reduce.ph.128(<8 x half> %x0, i32 8, <8 x half> %x2, i8 %x3) |
| %res1 = call <8 x half> @llvm.x86.avx512fp16.mask.reduce.ph.128(<8 x half> %x0, i32 4, <8 x half> %x2, i8 -1) |
| %res2 = fadd <8 x half> %res, %res1 |
| ret <8 x half> %res2 |
| } |
| |
| define <16 x half>@test_int_x86_avx512_mask_reduce_ph_256(<16 x half> %x0, <16 x half> %x2, i16 %x3) { |
| ; CHECK-LABEL: test_int_x86_avx512_mask_reduce_ph_256: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: kmovd %edi, %k1 |
| ; CHECK-NEXT: vreduceph $8, %ymm0, %ymm1 {%k1} |
| ; CHECK-NEXT: vreduceph $4, %ymm0, %ymm0 |
| ; CHECK-NEXT: vaddph %ymm0, %ymm1, %ymm0 |
| ; CHECK-NEXT: retq |
| %res = call <16 x half> @llvm.x86.avx512fp16.mask.reduce.ph.256(<16 x half> %x0, i32 8, <16 x half> %x2, i16 %x3) |
| %res1 = call <16 x half> @llvm.x86.avx512fp16.mask.reduce.ph.256(<16 x half> %x0, i32 4, <16 x half> %x2, i16 -1) |
| %res2 = fadd <16 x half> %res, %res1 |
| ret <16 x half> %res2 |
| } |
| |
| declare <8 x i1> @llvm.x86.avx512fp16.fpclass.ph.128(<8 x half>, i32) |
| declare <16 x i1> @llvm.x86.avx512fp16.fpclass.ph.256(<16 x half>, i32) |
| |
| define i8 @test_int_x86_avx512_fpclass_ph_128(<8 x half> %x0) { |
| ; CHECK-LABEL: test_int_x86_avx512_fpclass_ph_128: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vfpclassph $2, %xmm0, %k1 |
| ; CHECK-NEXT: vfpclassph $4, %xmm0, %k0 {%k1} |
| ; CHECK-NEXT: kmovd %k0, %eax |
| ; CHECK-NEXT: # kill: def $al killed $al killed $eax |
| ; CHECK-NEXT: retq |
| %res = call <8 x i1> @llvm.x86.avx512fp16.fpclass.ph.128(<8 x half> %x0, i32 4) |
| %res1 = call <8 x i1> @llvm.x86.avx512fp16.fpclass.ph.128(<8 x half> %x0, i32 2) |
| %1 = and <8 x i1> %res1, %res |
| %2 = bitcast <8 x i1> %1 to i8 |
| ret i8 %2 |
| } |
| |
| define i16 @test_int_x86_avx512_fpclass_ph_256(<16 x half> %x0) { |
| ; CHECK-LABEL: test_int_x86_avx512_fpclass_ph_256: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vfpclassph $2, %ymm0, %k1 |
| ; CHECK-NEXT: vfpclassph $4, %ymm0, %k0 {%k1} |
| ; CHECK-NEXT: kmovd %k0, %eax |
| ; CHECK-NEXT: # kill: def $ax killed $ax killed $eax |
| ; CHECK-NEXT: vzeroupper |
| ; CHECK-NEXT: retq |
| %res = call <16 x i1> @llvm.x86.avx512fp16.fpclass.ph.256(<16 x half> %x0, i32 4) |
| %res1 = call <16 x i1> @llvm.x86.avx512fp16.fpclass.ph.256(<16 x half> %x0, i32 2) |
| %1 = and <16 x i1> %res1, %res |
| %2 = bitcast <16 x i1> %1 to i16 |
| ret i16 %2 |
| } |
| |
| declare <8 x half> @llvm.x86.avx512fp16.mask.getexp.ph.128(<8 x half>, <8 x half>, i8) |
| declare <16 x half> @llvm.x86.avx512fp16.mask.getexp.ph.256(<16 x half>, <16 x half>, i16) |
| |
| define <8 x half>@test_int_x86_avx512_getexp_ph_128(<8 x half> %x0) { |
| ; CHECK-LABEL: test_int_x86_avx512_getexp_ph_128: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vgetexpph %xmm0, %xmm0 |
| ; CHECK-NEXT: retq |
| %res = call <8 x half> @llvm.x86.avx512fp16.mask.getexp.ph.128(<8 x half> %x0, <8 x half> zeroinitializer, i8 -1) |
| ret <8 x half> %res |
| } |
| |
| define <8 x half>@test_int_x86_avx512_mask_getexp_ph_128(<8 x half> %x0, <8 x half> %x1, i8 %x2) { |
| ; CHECK-LABEL: test_int_x86_avx512_mask_getexp_ph_128: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: kmovd %edi, %k1 |
| ; CHECK-NEXT: vgetexpph %xmm0, %xmm1 {%k1} |
| ; CHECK-NEXT: vmovaps %xmm1, %xmm0 |
| ; CHECK-NEXT: retq |
| %res = call <8 x half> @llvm.x86.avx512fp16.mask.getexp.ph.128(<8 x half> %x0, <8 x half> %x1, i8 %x2) |
| ret <8 x half> %res |
| } |
| |
| define <8 x half>@test_int_x86_avx512_maskz_getexp_ph_128(<8 x half> %x0, i8 %x2) { |
| ; CHECK-LABEL: test_int_x86_avx512_maskz_getexp_ph_128: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: kmovd %edi, %k1 |
| ; CHECK-NEXT: vgetexpph %xmm0, %xmm0 {%k1} {z} |
| ; CHECK-NEXT: retq |
| %res = call <8 x half> @llvm.x86.avx512fp16.mask.getexp.ph.128(<8 x half> %x0, <8 x half> zeroinitializer, i8 %x2) |
| ret <8 x half> %res |
| } |
| |
| define <16 x half>@test_int_x86_avx512_getexp_ph_256(<16 x half> %x0) { |
| ; CHECK-LABEL: test_int_x86_avx512_getexp_ph_256: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vgetexpph %ymm0, %ymm0 |
| ; CHECK-NEXT: retq |
| %res = call <16 x half> @llvm.x86.avx512fp16.mask.getexp.ph.256(<16 x half> %x0, <16 x half> zeroinitializer, i16 -1) |
| ret <16 x half> %res |
| } |
| |
| define <16 x half>@test_int_x86_avx512_mask_getexp_ph_256(<16 x half> %x0, <16 x half> %x1, i16 %x2) { |
| ; CHECK-LABEL: test_int_x86_avx512_mask_getexp_ph_256: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: kmovd %edi, %k1 |
| ; CHECK-NEXT: vgetexpph %ymm0, %ymm1 {%k1} |
| ; CHECK-NEXT: vmovaps %ymm1, %ymm0 |
| ; CHECK-NEXT: retq |
| %res = call <16 x half> @llvm.x86.avx512fp16.mask.getexp.ph.256(<16 x half> %x0, <16 x half> %x1, i16 %x2) |
| ret <16 x half> %res |
| } |
| |
| define <16 x half>@test_int_x86_avx512_maskz_getexp_ph_256(<16 x half> %x0, i16 %x2) { |
| ; CHECK-LABEL: test_int_x86_avx512_maskz_getexp_ph_256: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: kmovd %edi, %k1 |
| ; CHECK-NEXT: vgetexpph %ymm0, %ymm0 {%k1} {z} |
| ; CHECK-NEXT: retq |
| %res = call <16 x half> @llvm.x86.avx512fp16.mask.getexp.ph.256(<16 x half> %x0, <16 x half> zeroinitializer, i16 %x2) |
| ret <16 x half> %res |
| } |
| |
| declare <8 x half> @llvm.x86.avx512fp16.mask.getmant.ph.128(<8 x half>, i32, <8 x half>, i8) |
| declare <16 x half> @llvm.x86.avx512fp16.mask.getmant.ph.256(<16 x half>, i32, <16 x half>, i16) |
| |
| define <8 x half>@test_int_x86_avx512_mask_getmant_ph_128(<8 x half> %x0, <8 x half> %x2, i8 %x3) { |
| ; CHECK-LABEL: test_int_x86_avx512_mask_getmant_ph_128: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: kmovd %edi, %k1 |
| ; CHECK-NEXT: vgetmantph $8, %xmm0, %xmm1 {%k1} |
| ; CHECK-NEXT: vgetmantph $4, %xmm0, %xmm0 |
| ; CHECK-NEXT: vaddph %xmm0, %xmm1, %xmm0 |
| ; CHECK-NEXT: retq |
| %res = call <8 x half> @llvm.x86.avx512fp16.mask.getmant.ph.128(<8 x half> %x0, i32 8, <8 x half> %x2, i8 %x3) |
| %res1 = call <8 x half> @llvm.x86.avx512fp16.mask.getmant.ph.128(<8 x half> %x0, i32 4, <8 x half> %x2, i8 -1) |
| %res2 = fadd <8 x half> %res, %res1 |
| ret <8 x half> %res2 |
| } |
| |
| define <16 x half>@test_int_x86_avx512_mask_getmant_ph_256(<16 x half> %x0, <16 x half> %x2, i16 %x3) { |
| ; CHECK-LABEL: test_int_x86_avx512_mask_getmant_ph_256: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: kmovd %edi, %k1 |
| ; CHECK-NEXT: vgetmantph $8, %ymm0, %ymm1 {%k1} |
| ; CHECK-NEXT: vgetmantph $4, %ymm0, %ymm0 |
| ; CHECK-NEXT: vaddph %ymm0, %ymm1, %ymm0 |
| ; CHECK-NEXT: retq |
| %res = call <16 x half> @llvm.x86.avx512fp16.mask.getmant.ph.256(<16 x half> %x0, i32 8, <16 x half> %x2, i16 %x3) |
| %res1 = call <16 x half> @llvm.x86.avx512fp16.mask.getmant.ph.256(<16 x half> %x0, i32 4, <16 x half> %x2, i16 -1) |
| %res2 = fadd <16 x half> %res, %res1 |
| ret <16 x half> %res2 |
| } |
| |
| declare <8 x half> @llvm.x86.avx512fp16.mask.rndscale.ph.128(<8 x half>, i32, <8 x half>, i8) |
| declare <16 x half> @llvm.x86.avx512fp16.mask.rndscale.ph.256(<16 x half>, i32, <16 x half>, i16) |
| |
| define <8 x half>@test_int_x86_avx512_mask_rndscale_ph_128(<8 x half> %x0, <8 x half> %x2, i8 %x3) { |
| ; CHECK-LABEL: test_int_x86_avx512_mask_rndscale_ph_128: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: kmovd %edi, %k1 |
| ; CHECK-NEXT: vrndscaleph $8, %xmm0, %xmm1 {%k1} |
| ; CHECK-NEXT: vrndscaleph $4, %xmm0, %xmm0 |
| ; CHECK-NEXT: vaddph %xmm0, %xmm1, %xmm0 |
| ; CHECK-NEXT: retq |
| %res = call <8 x half> @llvm.x86.avx512fp16.mask.rndscale.ph.128(<8 x half> %x0, i32 8, <8 x half> %x2, i8 %x3) |
| %res1 = call <8 x half> @llvm.x86.avx512fp16.mask.rndscale.ph.128(<8 x half> %x0, i32 4, <8 x half> %x2, i8 -1) |
| %res2 = fadd <8 x half> %res, %res1 |
| ret <8 x half> %res2 |
| } |
| |
| define <16 x half>@test_int_x86_avx512_mask_rndscale_ph_256(<16 x half> %x0, <16 x half> %x2, i16 %x3) { |
| ; CHECK-LABEL: test_int_x86_avx512_mask_rndscale_ph_256: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: kmovd %edi, %k1 |
| ; CHECK-NEXT: vrndscaleph $8, %ymm0, %ymm1 {%k1} |
| ; CHECK-NEXT: vrndscaleph $4, %ymm0, %ymm0 |
| ; CHECK-NEXT: vaddph %ymm0, %ymm1, %ymm0 |
| ; CHECK-NEXT: retq |
| %res = call <16 x half> @llvm.x86.avx512fp16.mask.rndscale.ph.256(<16 x half> %x0, i32 8, <16 x half> %x2, i16 %x3) |
| %res1 = call <16 x half> @llvm.x86.avx512fp16.mask.rndscale.ph.256(<16 x half> %x0, i32 4, <16 x half> %x2, i16 -1) |
| %res2 = fadd <16 x half> %res, %res1 |
| ret <16 x half> %res2 |
| } |
| |
| declare <8 x half> @llvm.x86.avx512fp16.mask.scalef.ph.128(<8 x half>, <8 x half>, <8 x half>, i8) |
| declare <16 x half> @llvm.x86.avx512fp16.mask.scalef.ph.256(<16 x half>, <16 x half>, <16 x half>, i16) |
| |
| define <8 x half>@test_int_x86_avx512_scalef_ph_128(<8 x half> %x0, <8 x half> %x1) { |
| ; CHECK-LABEL: test_int_x86_avx512_scalef_ph_128: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vscalefph %xmm1, %xmm0, %xmm0 |
| ; CHECK-NEXT: retq |
| %res = call <8 x half> @llvm.x86.avx512fp16.mask.scalef.ph.128(<8 x half> %x0, <8 x half> %x1, <8 x half> zeroinitializer, i8 -1) |
| ret <8 x half> %res |
| } |
| |
| define <8 x half>@test_int_x86_avx512_mask_scalef_ph_128(<8 x half> %x0, <8 x half> %x1, <8 x half> %x2, i8 %x3) { |
| ; CHECK-LABEL: test_int_x86_avx512_mask_scalef_ph_128: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: kmovd %edi, %k1 |
| ; CHECK-NEXT: vscalefph %xmm1, %xmm0, %xmm2 {%k1} |
| ; CHECK-NEXT: vmovaps %xmm2, %xmm0 |
| ; CHECK-NEXT: retq |
| %mask = bitcast i8 %x3 to <8 x i1> |
| %res = call <8 x half> @llvm.x86.avx512fp16.mask.scalef.ph.128(<8 x half> %x0, <8 x half> %x1, <8 x half> %x2, i8 %x3) |
| ret <8 x half> %res |
| } |
| |
| define <8 x half>@test_int_x86_avx512_maskz_scalef_ph_128(<8 x half> %x0, <8 x half> %x1, i8 %x3) { |
| ; CHECK-LABEL: test_int_x86_avx512_maskz_scalef_ph_128: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: kmovd %edi, %k1 |
| ; CHECK-NEXT: vscalefph %xmm1, %xmm0, %xmm0 {%k1} {z} |
| ; CHECK-NEXT: retq |
| %mask = bitcast i8 %x3 to <8 x i1> |
| %res = call <8 x half> @llvm.x86.avx512fp16.mask.scalef.ph.128(<8 x half> %x0, <8 x half> %x1, <8 x half> zeroinitializer, i8 %x3) |
| ret <8 x half> %res |
| } |
| |
| define <16 x half>@test_int_x86_avx512_scalef_ph_256(<16 x half> %x0, <16 x half> %x1) { |
| ; CHECK-LABEL: test_int_x86_avx512_scalef_ph_256: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vscalefph %ymm1, %ymm0, %ymm0 |
| ; CHECK-NEXT: retq |
| %res = call <16 x half> @llvm.x86.avx512fp16.mask.scalef.ph.256(<16 x half> %x0, <16 x half> %x1, <16 x half> zeroinitializer, i16 -1) |
| ret <16 x half> %res |
| } |
| |
| define <16 x half>@test_int_x86_avx512_mask_scalef_ph_256(<16 x half> %x0, <16 x half> %x1, <16 x half> %x2, i16 %x3) { |
| ; CHECK-LABEL: test_int_x86_avx512_mask_scalef_ph_256: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: kmovd %edi, %k1 |
| ; CHECK-NEXT: vscalefph %ymm1, %ymm0, %ymm2 {%k1} |
| ; CHECK-NEXT: vmovaps %ymm2, %ymm0 |
| ; CHECK-NEXT: retq |
| %mask = bitcast i16 %x3 to <16 x i1> |
| %res = call <16 x half> @llvm.x86.avx512fp16.mask.scalef.ph.256(<16 x half> %x0, <16 x half> %x1, <16 x half> %x2, i16 %x3) |
| ret <16 x half> %res |
| } |
| |
| define <16 x half>@test_int_x86_avx512_maskz_scalef_ph_256(<16 x half> %x0, <16 x half> %x1, i16 %x3) { |
| ; CHECK-LABEL: test_int_x86_avx512_maskz_scalef_ph_256: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: kmovd %edi, %k1 |
| ; CHECK-NEXT: vscalefph %ymm1, %ymm0, %ymm0 {%k1} {z} |
| ; CHECK-NEXT: retq |
| %mask = bitcast i16 %x3 to <16 x i1> |
| %res = call <16 x half> @llvm.x86.avx512fp16.mask.scalef.ph.256(<16 x half> %x0, <16 x half> %x1, <16 x half> zeroinitializer, i16 %x3) |
| ret <16 x half> %res |
| } |