| ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3 |
| ; RUN: llc -o - %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64-v4 | FileCheck %s --check-prefixes=CHECK-NO_FP16 |
| ; RUN: llc -o - %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64-v4 -mattr=+avx512fp16 | FileCheck %s --check-prefixes=CHECK-WITH_FP16 |
| |
| ; Note: We could check more configurations, but anything with software |
| ; emulation of fp16 generates a ton of assembly code and is not particularly |
| ; interesting. |
| |
| ;---------------------------------------- |
| ; i8 input |
| ;---------------------------------------- |
| |
| ; uint8_t to float. |
| ; - Go from i8 to i32: zext |
| ; - Convert i32 to float |
| define float @uint8ToFloat(i8 %int8) { |
| ; CHECK-NO_FP16-LABEL: uint8ToFloat: |
| ; CHECK-NO_FP16: # %bb.0: |
| ; CHECK-NO_FP16-NEXT: movzbl %dil, %eax |
| ; CHECK-NO_FP16-NEXT: vcvtsi2ss %eax, %xmm0, %xmm0 |
| ; CHECK-NO_FP16-NEXT: retq |
| ; |
| ; CHECK-WITH_FP16-LABEL: uint8ToFloat: |
| ; CHECK-WITH_FP16: # %bb.0: |
| ; CHECK-WITH_FP16-NEXT: movzbl %dil, %eax |
| ; CHECK-WITH_FP16-NEXT: vcvtsi2ss %eax, %xmm0, %xmm0 |
| ; CHECK-WITH_FP16-NEXT: retq |
| %fp32 = uitofp i8 %int8 to float |
| ret float %fp32 |
| } |
| |
| ; vector uint8_t to float. |
| ; Same as @uint8ToFloat but with vector types. |
| define <16 x float> @vector_uint8ToFloat(<16 x i8> %int8) { |
| ; CHECK-NO_FP16-LABEL: vector_uint8ToFloat: |
| ; CHECK-NO_FP16: # %bb.0: |
| ; CHECK-NO_FP16-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero |
| ; CHECK-NO_FP16-NEXT: vcvtdq2ps %zmm0, %zmm0 |
| ; CHECK-NO_FP16-NEXT: retq |
| ; |
| ; CHECK-WITH_FP16-LABEL: vector_uint8ToFloat: |
| ; CHECK-WITH_FP16: # %bb.0: |
| ; CHECK-WITH_FP16-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero |
| ; CHECK-WITH_FP16-NEXT: vcvtdq2ps %zmm0, %zmm0 |
| ; CHECK-WITH_FP16-NEXT: retq |
| %fp32 = uitofp <16 x i8> %int8 to <16 x float> |
| ret <16 x float> %fp32 |
| } |
| |
| |
| ; uint8_t to half. |
| ; |
| ; If no half support: |
| ; - Go from i8 to i32: zext |
| ; - Convert i32 to float |
| ; - Trunc from float to half |
| ; |
| ; Else if half support: |
| ; - Go from i8 to i32: zext |
| ; - Convert i32 to half |
| define half @uint8ToHalf(i8 %int8) { |
| ; CHECK-NO_FP16-LABEL: uint8ToHalf: |
| ; CHECK-NO_FP16: # %bb.0: |
| ; CHECK-NO_FP16-NEXT: movzbl %dil, %eax |
| ; CHECK-NO_FP16-NEXT: vcvtsi2ss %eax, %xmm0, %xmm0 |
| ; CHECK-NO_FP16-NEXT: vcvtps2ph $4, %xmm0, %xmm0 |
| ; CHECK-NO_FP16-NEXT: vmovd %xmm0, %eax |
| ; CHECK-NO_FP16-NEXT: vpinsrw $0, %eax, %xmm0, %xmm0 |
| ; CHECK-NO_FP16-NEXT: retq |
| ; |
| ; CHECK-WITH_FP16-LABEL: uint8ToHalf: |
| ; CHECK-WITH_FP16: # %bb.0: |
| ; CHECK-WITH_FP16-NEXT: movzbl %dil, %eax |
| ; CHECK-WITH_FP16-NEXT: vcvtsi2sh %eax, %xmm0, %xmm0 |
| ; CHECK-WITH_FP16-NEXT: retq |
| %fp32 = uitofp i8 %int8 to half |
| ret half %fp32 |
| } |
| |
| ; vector uint8_t to half. |
| ; |
| ; If no half support: |
| ; - Go from i8 to i32: zext |
| ; - Convert i32 to float |
| ; - Trunc from float to half |
| ; |
| ; Else if half support: |
| ; - Go from i8 to i16: zext |
| ; - Convert i16 to half |
| ; |
| ; The difference with the scalar version (uint8ToHalf) is that we use i16 |
| ; for the intermediate type when we have half support. |
| define <16 x half> @vector_uint8ToHalf(<16 x i8> %int8) { |
| ; CHECK-NO_FP16-LABEL: vector_uint8ToHalf: |
| ; CHECK-NO_FP16: # %bb.0: |
| ; CHECK-NO_FP16-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero |
| ; CHECK-NO_FP16-NEXT: vcvtdq2ps %zmm0, %zmm0 |
| ; CHECK-NO_FP16-NEXT: vcvtps2ph $4, %zmm0, %ymm0 |
| ; CHECK-NO_FP16-NEXT: retq |
| ; |
| ; CHECK-WITH_FP16-LABEL: vector_uint8ToHalf: |
| ; CHECK-WITH_FP16: # %bb.0: |
| ; CHECK-WITH_FP16-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero |
| ; CHECK-WITH_FP16-NEXT: vcvtw2ph %ymm0, %ymm0 |
| ; CHECK-WITH_FP16-NEXT: retq |
| %fp32 = uitofp <16 x i8> %int8 to <16 x half> |
| ret <16 x half> %fp32 |
| } |
| |
| ; Same as uint8_t but with the signed variant. |
| ; I.e., use sext instead of zext. |
| define float @sint8ToFloat(i8 %int8) { |
| ; CHECK-NO_FP16-LABEL: sint8ToFloat: |
| ; CHECK-NO_FP16: # %bb.0: |
| ; CHECK-NO_FP16-NEXT: movsbl %dil, %eax |
| ; CHECK-NO_FP16-NEXT: vcvtsi2ss %eax, %xmm0, %xmm0 |
| ; CHECK-NO_FP16-NEXT: retq |
| ; |
| ; CHECK-WITH_FP16-LABEL: sint8ToFloat: |
| ; CHECK-WITH_FP16: # %bb.0: |
| ; CHECK-WITH_FP16-NEXT: movsbl %dil, %eax |
| ; CHECK-WITH_FP16-NEXT: vcvtsi2ss %eax, %xmm0, %xmm0 |
| ; CHECK-WITH_FP16-NEXT: retq |
| %fp32 = sitofp i8 %int8 to float |
| ret float %fp32 |
| } |
| |
| define <16 x float> @vector_sint8ToFloat(<16 x i8> %int8) { |
| ; CHECK-NO_FP16-LABEL: vector_sint8ToFloat: |
| ; CHECK-NO_FP16: # %bb.0: |
| ; CHECK-NO_FP16-NEXT: vpmovsxbd %xmm0, %zmm0 |
| ; CHECK-NO_FP16-NEXT: vcvtdq2ps %zmm0, %zmm0 |
| ; CHECK-NO_FP16-NEXT: retq |
| ; |
| ; CHECK-WITH_FP16-LABEL: vector_sint8ToFloat: |
| ; CHECK-WITH_FP16: # %bb.0: |
| ; CHECK-WITH_FP16-NEXT: vpmovsxbd %xmm0, %zmm0 |
| ; CHECK-WITH_FP16-NEXT: vcvtdq2ps %zmm0, %zmm0 |
| ; CHECK-WITH_FP16-NEXT: retq |
| %fp32 = sitofp <16 x i8> %int8 to <16 x float> |
| ret <16 x float> %fp32 |
| } |
| |
| define half @sint8ToHalf(i8 %int8) { |
| ; CHECK-NO_FP16-LABEL: sint8ToHalf: |
| ; CHECK-NO_FP16: # %bb.0: |
| ; CHECK-NO_FP16-NEXT: movsbl %dil, %eax |
| ; CHECK-NO_FP16-NEXT: vcvtsi2ss %eax, %xmm0, %xmm0 |
| ; CHECK-NO_FP16-NEXT: vcvtps2ph $4, %xmm0, %xmm0 |
| ; CHECK-NO_FP16-NEXT: vmovd %xmm0, %eax |
| ; CHECK-NO_FP16-NEXT: vpinsrw $0, %eax, %xmm0, %xmm0 |
| ; CHECK-NO_FP16-NEXT: retq |
| ; |
| ; CHECK-WITH_FP16-LABEL: sint8ToHalf: |
| ; CHECK-WITH_FP16: # %bb.0: |
| ; CHECK-WITH_FP16-NEXT: movsbl %dil, %eax |
| ; CHECK-WITH_FP16-NEXT: vcvtsi2sh %eax, %xmm0, %xmm0 |
| ; CHECK-WITH_FP16-NEXT: retq |
| %fp32 = sitofp i8 %int8 to half |
| ret half %fp32 |
| } |
| |
| define <16 x half> @vector_sint8ToHalf(<16 x i8> %int8) { |
| ; CHECK-NO_FP16-LABEL: vector_sint8ToHalf: |
| ; CHECK-NO_FP16: # %bb.0: |
| ; CHECK-NO_FP16-NEXT: vpmovsxbd %xmm0, %zmm0 |
| ; CHECK-NO_FP16-NEXT: vcvtdq2ps %zmm0, %zmm0 |
| ; CHECK-NO_FP16-NEXT: vcvtps2ph $4, %zmm0, %ymm0 |
| ; CHECK-NO_FP16-NEXT: retq |
| ; |
| ; CHECK-WITH_FP16-LABEL: vector_sint8ToHalf: |
| ; CHECK-WITH_FP16: # %bb.0: |
| ; CHECK-WITH_FP16-NEXT: vpmovsxbw %xmm0, %ymm0 |
| ; CHECK-WITH_FP16-NEXT: vcvtw2ph %ymm0, %ymm0 |
| ; CHECK-WITH_FP16-NEXT: retq |
| %fp32 = sitofp <16 x i8> %int8 to <16 x half> |
| ret <16 x half> %fp32 |
| } |
| |
| |
| ;---------------------------------------- |
| ; i16 input |
| ;---------------------------------------- |
| |
| ; Similar lowering as i8, but with i16 as the input type. |
| |
| define float @uint16ToFloat(i16 %int16) { |
| ; CHECK-NO_FP16-LABEL: uint16ToFloat: |
| ; CHECK-NO_FP16: # %bb.0: |
| ; CHECK-NO_FP16-NEXT: movzwl %di, %eax |
| ; CHECK-NO_FP16-NEXT: vcvtsi2ss %eax, %xmm0, %xmm0 |
| ; CHECK-NO_FP16-NEXT: retq |
| ; |
| ; CHECK-WITH_FP16-LABEL: uint16ToFloat: |
| ; CHECK-WITH_FP16: # %bb.0: |
| ; CHECK-WITH_FP16-NEXT: movzwl %di, %eax |
| ; CHECK-WITH_FP16-NEXT: vcvtsi2ss %eax, %xmm0, %xmm0 |
| ; CHECK-WITH_FP16-NEXT: retq |
| %fp32 = uitofp i16 %int16 to float |
| ret float %fp32 |
| } |
| |
| define <16 x float> @vector_uint16ToFloat(<16 x i16> %int16) { |
| ; CHECK-NO_FP16-LABEL: vector_uint16ToFloat: |
| ; CHECK-NO_FP16: # %bb.0: |
| ; CHECK-NO_FP16-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero |
| ; CHECK-NO_FP16-NEXT: vcvtdq2ps %zmm0, %zmm0 |
| ; CHECK-NO_FP16-NEXT: retq |
| ; |
| ; CHECK-WITH_FP16-LABEL: vector_uint16ToFloat: |
| ; CHECK-WITH_FP16: # %bb.0: |
| ; CHECK-WITH_FP16-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero |
| ; CHECK-WITH_FP16-NEXT: vcvtdq2ps %zmm0, %zmm0 |
| ; CHECK-WITH_FP16-NEXT: retq |
| %fp32 = uitofp <16 x i16> %int16 to <16 x float> |
| ret <16 x float> %fp32 |
| } |
| |
| define half @uint16ToHalf(i16 %int16) { |
| ; CHECK-NO_FP16-LABEL: uint16ToHalf: |
| ; CHECK-NO_FP16: # %bb.0: |
| ; CHECK-NO_FP16-NEXT: movzwl %di, %eax |
| ; CHECK-NO_FP16-NEXT: vcvtsi2ss %eax, %xmm0, %xmm0 |
| ; CHECK-NO_FP16-NEXT: vcvtps2ph $4, %xmm0, %xmm0 |
| ; CHECK-NO_FP16-NEXT: vmovd %xmm0, %eax |
| ; CHECK-NO_FP16-NEXT: vpinsrw $0, %eax, %xmm0, %xmm0 |
| ; CHECK-NO_FP16-NEXT: retq |
| ; |
| ; CHECK-WITH_FP16-LABEL: uint16ToHalf: |
| ; CHECK-WITH_FP16: # %bb.0: |
| ; CHECK-WITH_FP16-NEXT: movzwl %di, %eax |
| ; CHECK-WITH_FP16-NEXT: vcvtsi2sh %eax, %xmm0, %xmm0 |
| ; CHECK-WITH_FP16-NEXT: retq |
| %fp32 = uitofp i16 %int16 to half |
| ret half %fp32 |
| } |
| |
| define <16 x half> @vector_uint16ToHalf(<16 x i16> %int16) { |
| ; CHECK-NO_FP16-LABEL: vector_uint16ToHalf: |
| ; CHECK-NO_FP16: # %bb.0: |
| ; CHECK-NO_FP16-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero |
| ; CHECK-NO_FP16-NEXT: vcvtdq2ps %zmm0, %zmm0 |
| ; CHECK-NO_FP16-NEXT: vcvtps2ph $4, %zmm0, %ymm0 |
| ; CHECK-NO_FP16-NEXT: retq |
| ; |
| ; CHECK-WITH_FP16-LABEL: vector_uint16ToHalf: |
| ; CHECK-WITH_FP16: # %bb.0: |
| ; CHECK-WITH_FP16-NEXT: vcvtuw2ph %ymm0, %ymm0 |
| ; CHECK-WITH_FP16-NEXT: retq |
| %fp32 = uitofp <16 x i16> %int16 to <16 x half> |
| ret <16 x half> %fp32 |
| } |
| |
| define float @sint16ToFloat(i16 %int16) { |
| ; CHECK-NO_FP16-LABEL: sint16ToFloat: |
| ; CHECK-NO_FP16: # %bb.0: |
| ; CHECK-NO_FP16-NEXT: movswl %di, %eax |
| ; CHECK-NO_FP16-NEXT: vcvtsi2ss %eax, %xmm0, %xmm0 |
| ; CHECK-NO_FP16-NEXT: retq |
| ; |
| ; CHECK-WITH_FP16-LABEL: sint16ToFloat: |
| ; CHECK-WITH_FP16: # %bb.0: |
| ; CHECK-WITH_FP16-NEXT: movswl %di, %eax |
| ; CHECK-WITH_FP16-NEXT: vcvtsi2ss %eax, %xmm0, %xmm0 |
| ; CHECK-WITH_FP16-NEXT: retq |
| %fp32 = sitofp i16 %int16 to float |
| ret float %fp32 |
| } |
| |
| define <16 x float> @vector_sint16ToFloat(<16 x i16> %int16) { |
| ; CHECK-NO_FP16-LABEL: vector_sint16ToFloat: |
| ; CHECK-NO_FP16: # %bb.0: |
| ; CHECK-NO_FP16-NEXT: vpmovsxwd %ymm0, %zmm0 |
| ; CHECK-NO_FP16-NEXT: vcvtdq2ps %zmm0, %zmm0 |
| ; CHECK-NO_FP16-NEXT: retq |
| ; |
| ; CHECK-WITH_FP16-LABEL: vector_sint16ToFloat: |
| ; CHECK-WITH_FP16: # %bb.0: |
| ; CHECK-WITH_FP16-NEXT: vpmovsxwd %ymm0, %zmm0 |
| ; CHECK-WITH_FP16-NEXT: vcvtdq2ps %zmm0, %zmm0 |
| ; CHECK-WITH_FP16-NEXT: retq |
| %fp32 = sitofp <16 x i16> %int16 to <16 x float> |
| ret <16 x float> %fp32 |
| } |
| |
| define half @sint16ToHalf(i16 %int16) { |
| ; CHECK-NO_FP16-LABEL: sint16ToHalf: |
| ; CHECK-NO_FP16: # %bb.0: |
| ; CHECK-NO_FP16-NEXT: movswl %di, %eax |
| ; CHECK-NO_FP16-NEXT: vcvtsi2ss %eax, %xmm0, %xmm0 |
| ; CHECK-NO_FP16-NEXT: vcvtps2ph $4, %xmm0, %xmm0 |
| ; CHECK-NO_FP16-NEXT: vmovd %xmm0, %eax |
| ; CHECK-NO_FP16-NEXT: vpinsrw $0, %eax, %xmm0, %xmm0 |
| ; CHECK-NO_FP16-NEXT: retq |
| ; |
| ; CHECK-WITH_FP16-LABEL: sint16ToHalf: |
| ; CHECK-WITH_FP16: # %bb.0: |
| ; CHECK-WITH_FP16-NEXT: movswl %di, %eax |
| ; CHECK-WITH_FP16-NEXT: vcvtsi2sh %eax, %xmm0, %xmm0 |
| ; CHECK-WITH_FP16-NEXT: retq |
| %fp32 = sitofp i16 %int16 to half |
| ret half %fp32 |
| } |
| |
| define <16 x half> @vector_sint16ToHalf(<16 x i16> %int16) { |
| ; CHECK-NO_FP16-LABEL: vector_sint16ToHalf: |
| ; CHECK-NO_FP16: # %bb.0: |
| ; CHECK-NO_FP16-NEXT: vpmovsxwd %ymm0, %zmm0 |
| ; CHECK-NO_FP16-NEXT: vcvtdq2ps %zmm0, %zmm0 |
| ; CHECK-NO_FP16-NEXT: vcvtps2ph $4, %zmm0, %ymm0 |
| ; CHECK-NO_FP16-NEXT: retq |
| ; |
| ; CHECK-WITH_FP16-LABEL: vector_sint16ToHalf: |
| ; CHECK-WITH_FP16: # %bb.0: |
| ; CHECK-WITH_FP16-NEXT: vcvtw2ph %ymm0, %ymm0 |
| ; CHECK-WITH_FP16-NEXT: retq |
| %fp32 = sitofp <16 x i16> %int16 to <16 x half> |
| ret <16 x half> %fp32 |
| } |