| ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6 |
| ; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1150 < %s | FileCheck %s --check-prefixes=CHECK,SDAG |
| ; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1150 < %s | FileCheck %s --check-prefixes=CHECK,GISEL-GFX11 |
| ; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1200 < %s | FileCheck %s --check-prefixes=CHECK,SDAG |
| ; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1200 < %s | FileCheck %s --check-prefixes=CHECK,GISEL-GFX12 |
| |
| define amdgpu_vs float @sitofp_i32_to_f32(i32 inreg %val) { |
| ; CHECK-LABEL: sitofp_i32_to_f32: |
| ; CHECK: ; %bb.0: |
| ; CHECK-NEXT: s_cvt_f32_i32 s0, s0 |
| ; CHECK-NEXT: s_delay_alu instid0(SALU_CYCLE_3) |
| ; CHECK-NEXT: v_mov_b32_e32 v0, s0 |
| ; CHECK-NEXT: ; return to shader part epilog |
| %res = sitofp i32 %val to float |
| ret float %res |
| } |
| |
| define amdgpu_vs float @uitofp_u32_to_f32(i32 inreg %val) { |
| ; CHECK-LABEL: uitofp_u32_to_f32: |
| ; CHECK: ; %bb.0: |
| ; CHECK-NEXT: s_cvt_f32_u32 s0, s0 |
| ; CHECK-NEXT: s_delay_alu instid0(SALU_CYCLE_3) |
| ; CHECK-NEXT: v_mov_b32_e32 v0, s0 |
| ; CHECK-NEXT: ; return to shader part epilog |
| %res = uitofp i32 %val to float |
| ret float %res |
| } |
| |
| define amdgpu_vs i32 @fptosi_f32_to_i32(float inreg %val) { |
| ; CHECK-LABEL: fptosi_f32_to_i32: |
| ; CHECK: ; %bb.0: |
| ; CHECK-NEXT: s_cvt_i32_f32 s0, s0 |
| ; CHECK-NEXT: ; return to shader part epilog |
| %res = fptosi float %val to i32 |
| ret i32 %res |
| } |
| |
| define amdgpu_vs i32 @fptoui_f32_to_u32(float inreg %val) { |
| ; CHECK-LABEL: fptoui_f32_to_u32: |
| ; CHECK: ; %bb.0: |
| ; CHECK-NEXT: s_cvt_u32_f32 s0, s0 |
| ; CHECK-NEXT: ; return to shader part epilog |
| %res = fptoui float %val to i32 |
| ret i32 %res |
| } |
| |
| define amdgpu_vs i16 @fptosi_f32_to_i16(float inreg %val) { |
| ; CHECK-LABEL: fptosi_f32_to_i16: |
| ; CHECK: ; %bb.0: |
| ; CHECK-NEXT: s_cvt_i32_f32 s0, s0 |
| ; CHECK-NEXT: ; return to shader part epilog |
| %res = fptosi float %val to i16 |
| ret i16 %res |
| } |
| |
| define amdgpu_vs i16 @fptoui_f32_to_u16(float inreg %val) { |
| ; CHECK-LABEL: fptoui_f32_to_u16: |
| ; CHECK: ; %bb.0: |
| ; CHECK-NEXT: s_cvt_u32_f32 s0, s0 |
| ; CHECK-NEXT: ; return to shader part epilog |
| %res = fptoui float %val to i16 |
| ret i16 %res |
| } |
| |
| define amdgpu_vs float @fpext_f16_to_f32(half inreg %val) { |
| ; CHECK-LABEL: fpext_f16_to_f32: |
| ; CHECK: ; %bb.0: |
| ; CHECK-NEXT: s_cvt_f32_f16 s0, s0 |
| ; CHECK-NEXT: s_delay_alu instid0(SALU_CYCLE_3) |
| ; CHECK-NEXT: v_mov_b32_e32 v0, s0 |
| ; CHECK-NEXT: ; return to shader part epilog |
| %res = fpext half %val to float |
| ret float %res |
| } |
| |
| define amdgpu_vs float @fpext_hif16_to_32(<2 x half> inreg %val) { |
| ; CHECK-LABEL: fpext_hif16_to_32: |
| ; CHECK: ; %bb.0: |
| ; CHECK-NEXT: s_cvt_hi_f32_f16 s0, s0 |
| ; CHECK-NEXT: s_delay_alu instid0(SALU_CYCLE_3) |
| ; CHECK-NEXT: v_mov_b32_e32 v0, s0 |
| ; CHECK-NEXT: ; return to shader part epilog |
| %hielt = extractelement <2 x half> %val, i32 1 |
| %res = fpext half %hielt to float |
| ret float %res |
| } |
| |
| define amdgpu_vs half @fptrunc_f32_to_f16(float inreg %val) { |
| ; CHECK-LABEL: fptrunc_f32_to_f16: |
| ; CHECK: ; %bb.0: |
| ; CHECK-NEXT: s_cvt_f16_f32 s0, s0 |
| ; CHECK-NEXT: s_delay_alu instid0(SALU_CYCLE_3) |
| ; CHECK-NEXT: v_mov_b32_e32 v0, s0 |
| ; CHECK-NEXT: ; return to shader part epilog |
| %res = fptrunc float %val to half |
| ret half %res |
| } |
| |
| define amdgpu_vs float @fceil_f32(float inreg %val) { |
| ; CHECK-LABEL: fceil_f32: |
| ; CHECK: ; %bb.0: |
| ; CHECK-NEXT: s_ceil_f32 s0, s0 |
| ; CHECK-NEXT: s_delay_alu instid0(SALU_CYCLE_3) |
| ; CHECK-NEXT: v_mov_b32_e32 v0, s0 |
| ; CHECK-NEXT: ; return to shader part epilog |
| %res = call float @llvm.ceil.f32(float %val) |
| ret float %res |
| } |
| |
| define amdgpu_vs float @ffloor_f32(float inreg %val) { |
| ; CHECK-LABEL: ffloor_f32: |
| ; CHECK: ; %bb.0: |
| ; CHECK-NEXT: s_floor_f32 s0, s0 |
| ; CHECK-NEXT: s_delay_alu instid0(SALU_CYCLE_3) |
| ; CHECK-NEXT: v_mov_b32_e32 v0, s0 |
| ; CHECK-NEXT: ; return to shader part epilog |
| %res = call float @llvm.floor.f32(float %val) |
| ret float %res |
| } |
| |
| define amdgpu_vs float @ftrunc_f32(float inreg %val) { |
| ; CHECK-LABEL: ftrunc_f32: |
| ; CHECK: ; %bb.0: |
| ; CHECK-NEXT: s_trunc_f32 s0, s0 |
| ; CHECK-NEXT: s_delay_alu instid0(SALU_CYCLE_3) |
| ; CHECK-NEXT: v_mov_b32_e32 v0, s0 |
| ; CHECK-NEXT: ; return to shader part epilog |
| %res = call float @llvm.trunc.f32(float %val) |
| ret float %res |
| } |
| |
| define amdgpu_vs float @frint_f32(float inreg %val) { |
| ; CHECK-LABEL: frint_f32: |
| ; CHECK: ; %bb.0: |
| ; CHECK-NEXT: s_rndne_f32 s0, s0 |
| ; CHECK-NEXT: s_delay_alu instid0(SALU_CYCLE_3) |
| ; CHECK-NEXT: v_mov_b32_e32 v0, s0 |
| ; CHECK-NEXT: ; return to shader part epilog |
| %res = call float @llvm.rint.f32(float %val) |
| ret float %res |
| } |
| |
| define amdgpu_vs half @fceil_f16(half inreg %val) { |
| ; CHECK-LABEL: fceil_f16: |
| ; CHECK: ; %bb.0: |
| ; CHECK-NEXT: s_ceil_f16 s0, s0 |
| ; CHECK-NEXT: s_delay_alu instid0(SALU_CYCLE_3) |
| ; CHECK-NEXT: v_mov_b32_e32 v0, s0 |
| ; CHECK-NEXT: ; return to shader part epilog |
| %res = call half @llvm.ceil.f16(half %val) |
| ret half %res |
| } |
| |
| define amdgpu_vs half @ffloor_f16(half inreg %val) { |
| ; CHECK-LABEL: ffloor_f16: |
| ; CHECK: ; %bb.0: |
| ; CHECK-NEXT: s_floor_f16 s0, s0 |
| ; CHECK-NEXT: s_delay_alu instid0(SALU_CYCLE_3) |
| ; CHECK-NEXT: v_mov_b32_e32 v0, s0 |
| ; CHECK-NEXT: ; return to shader part epilog |
| %res = call half @llvm.floor.f16(half %val) |
| ret half %res |
| } |
| |
| define amdgpu_vs half @ftrunc_f16(half inreg %val) { |
| ; CHECK-LABEL: ftrunc_f16: |
| ; CHECK: ; %bb.0: |
| ; CHECK-NEXT: s_trunc_f16 s0, s0 |
| ; CHECK-NEXT: s_delay_alu instid0(SALU_CYCLE_3) |
| ; CHECK-NEXT: v_mov_b32_e32 v0, s0 |
| ; CHECK-NEXT: ; return to shader part epilog |
| %res = call half @llvm.trunc.f16(half %val) |
| ret half %res |
| } |
| |
| define amdgpu_vs half @frint_f16(half inreg %val) { |
| ; CHECK-LABEL: frint_f16: |
| ; CHECK: ; %bb.0: |
| ; CHECK-NEXT: s_rndne_f16 s0, s0 |
| ; CHECK-NEXT: s_delay_alu instid0(SALU_CYCLE_3) |
| ; CHECK-NEXT: v_mov_b32_e32 v0, s0 |
| ; CHECK-NEXT: ; return to shader part epilog |
| %res = call half @llvm.rint.f16(half %val) |
| ret half %res |
| } |
| |
| define amdgpu_vs i32 @fptosi_f16_to_i32(half inreg %x) { |
| ; SDAG-LABEL: fptosi_f16_to_i32: |
| ; SDAG: ; %bb.0: |
| ; SDAG-NEXT: s_cvt_f32_f16 s0, s0 |
| ; SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_3) |
| ; SDAG-NEXT: s_cvt_i32_f32 s0, s0 |
| ; SDAG-NEXT: ; return to shader part epilog |
| ; |
| ; GISEL-GFX11-LABEL: fptosi_f16_to_i32: |
| ; GISEL-GFX11: ; %bb.0: |
| ; GISEL-GFX11-NEXT: v_cvt_f32_f16_e32 v0, s0 |
| ; GISEL-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) |
| ; GISEL-GFX11-NEXT: v_cvt_i32_f32_e32 v0, v0 |
| ; GISEL-GFX11-NEXT: v_readfirstlane_b32 s0, v0 |
| ; GISEL-GFX11-NEXT: ; return to shader part epilog |
| ; |
| ; GISEL-GFX12-LABEL: fptosi_f16_to_i32: |
| ; GISEL-GFX12: ; %bb.0: |
| ; GISEL-GFX12-NEXT: v_cvt_f32_f16_e32 v0, s0 |
| ; GISEL-GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) |
| ; GISEL-GFX12-NEXT: v_cvt_i32_f32_e32 v0, v0 |
| ; GISEL-GFX12-NEXT: v_readfirstlane_b32 s0, v0 |
| ; GISEL-GFX12-NEXT: s_wait_alu depctr_va_sdst(0) |
| ; GISEL-GFX12-NEXT: ; return to shader part epilog |
| %res = fptosi half %x to i32 |
| ret i32 %res |
| } |
| |
| define amdgpu_vs i32 @fptoui_f16_to_i32(half inreg %x) { |
| ; SDAG-LABEL: fptoui_f16_to_i32: |
| ; SDAG: ; %bb.0: |
| ; SDAG-NEXT: s_cvt_f32_f16 s0, s0 |
| ; SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_3) |
| ; SDAG-NEXT: s_cvt_u32_f32 s0, s0 |
| ; SDAG-NEXT: ; return to shader part epilog |
| ; |
| ; GISEL-GFX11-LABEL: fptoui_f16_to_i32: |
| ; GISEL-GFX11: ; %bb.0: |
| ; GISEL-GFX11-NEXT: v_cvt_f32_f16_e32 v0, s0 |
| ; GISEL-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) |
| ; GISEL-GFX11-NEXT: v_cvt_u32_f32_e32 v0, v0 |
| ; GISEL-GFX11-NEXT: v_readfirstlane_b32 s0, v0 |
| ; GISEL-GFX11-NEXT: ; return to shader part epilog |
| ; |
| ; GISEL-GFX12-LABEL: fptoui_f16_to_i32: |
| ; GISEL-GFX12: ; %bb.0: |
| ; GISEL-GFX12-NEXT: v_cvt_f32_f16_e32 v0, s0 |
| ; GISEL-GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) |
| ; GISEL-GFX12-NEXT: v_cvt_u32_f32_e32 v0, v0 |
| ; GISEL-GFX12-NEXT: v_readfirstlane_b32 s0, v0 |
| ; GISEL-GFX12-NEXT: s_wait_alu depctr_va_sdst(0) |
| ; GISEL-GFX12-NEXT: ; return to shader part epilog |
| %res = fptoui half %x to i32 |
| ret i32 %res |
| } |
| |
| define amdgpu_vs half @sitofp_i32_to_f16(i32 inreg %x) { |
| ; SDAG-LABEL: sitofp_i32_to_f16: |
| ; SDAG: ; %bb.0: |
| ; SDAG-NEXT: s_cvt_f32_i32 s0, s0 |
| ; SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_3) | instskip(NEXT) | instid1(SALU_CYCLE_3) |
| ; SDAG-NEXT: s_cvt_f16_f32 s0, s0 |
| ; SDAG-NEXT: v_mov_b32_e32 v0, s0 |
| ; SDAG-NEXT: ; return to shader part epilog |
| ; |
| ; GISEL-GFX11-LABEL: sitofp_i32_to_f16: |
| ; GISEL-GFX11: ; %bb.0: |
| ; GISEL-GFX11-NEXT: v_cvt_f32_i32_e32 v0, s0 |
| ; GISEL-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GISEL-GFX11-NEXT: v_cvt_f16_f32_e32 v0.l, v0 |
| ; GISEL-GFX11-NEXT: ; return to shader part epilog |
| ; |
| ; GISEL-GFX12-LABEL: sitofp_i32_to_f16: |
| ; GISEL-GFX12: ; %bb.0: |
| ; GISEL-GFX12-NEXT: v_cvt_f32_i32_e32 v0, s0 |
| ; GISEL-GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GISEL-GFX12-NEXT: v_cvt_f16_f32_e32 v0, v0 |
| ; GISEL-GFX12-NEXT: ; return to shader part epilog |
| %res = sitofp i32 %x to half |
| ret half %res |
| } |
| |
| define amdgpu_vs half @uitofp_i32_to_f16(i32 inreg %x) { |
| ; SDAG-LABEL: uitofp_i32_to_f16: |
| ; SDAG: ; %bb.0: |
| ; SDAG-NEXT: s_cvt_f32_u32 s0, s0 |
| ; SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_3) | instskip(NEXT) | instid1(SALU_CYCLE_3) |
| ; SDAG-NEXT: s_cvt_f16_f32 s0, s0 |
| ; SDAG-NEXT: v_mov_b32_e32 v0, s0 |
| ; SDAG-NEXT: ; return to shader part epilog |
| ; |
| ; GISEL-GFX11-LABEL: uitofp_i32_to_f16: |
| ; GISEL-GFX11: ; %bb.0: |
| ; GISEL-GFX11-NEXT: v_cvt_f32_u32_e32 v0, s0 |
| ; GISEL-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GISEL-GFX11-NEXT: v_cvt_f16_f32_e32 v0.l, v0 |
| ; GISEL-GFX11-NEXT: ; return to shader part epilog |
| ; |
| ; GISEL-GFX12-LABEL: uitofp_i32_to_f16: |
| ; GISEL-GFX12: ; %bb.0: |
| ; GISEL-GFX12-NEXT: v_cvt_f32_u32_e32 v0, s0 |
| ; GISEL-GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GISEL-GFX12-NEXT: v_cvt_f16_f32_e32 v0, v0 |
| ; GISEL-GFX12-NEXT: ; return to shader part epilog |
| %res = uitofp i32 %x to half |
| ret half %res |
| } |
| |
| declare float @llvm.ceil.f32(float) |
| declare float @llvm.floor.f32(float) |
| declare float @llvm.trunc.f32(float) |
| declare float @llvm.rint.f32(float) |
| declare half @llvm.ceil.f16(half) |
| declare half @llvm.floor.f16(half) |
| declare half @llvm.trunc.f16(half) |
| declare half @llvm.rint.f16(half) |