| ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py |
| ; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1200 < %s | FileCheck -check-prefixes=GFX12,GFX12-SDAG %s |
| ; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1200 < %s | FileCheck -check-prefixes=GFX12,GFX12-GISEL %s |
| |
| define amdgpu_cs float @v_s_exp_f32(float inreg %src) { |
| ; GFX12-LABEL: v_s_exp_f32: |
| ; GFX12: ; %bb.0: |
| ; GFX12-NEXT: s_cmp_lt_f32 s0, 0xc2fc0000 |
| ; GFX12-NEXT: s_cselect_b32 s1, 0x42800000, 0 |
| ; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_2) |
| ; GFX12-NEXT: s_add_f32 s0, s0, s1 |
| ; GFX12-NEXT: s_cselect_b32 s1, 0x1f800000, 1.0 |
| ; GFX12-NEXT: v_s_exp_f32 s0, s0 |
| ; GFX12-NEXT: s_wait_alu 0xfffe |
| ; GFX12-NEXT: s_delay_alu instid0(TRANS32_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_2) |
| ; GFX12-NEXT: s_mul_f32 s0, s0, s1 |
| ; GFX12-NEXT: s_wait_alu 0xfffe |
| ; GFX12-NEXT: v_mov_b32_e32 v0, s0 |
| ; GFX12-NEXT: ; return to shader part epilog |
| %result = call float @llvm.exp2.f32(float %src) |
| ret float %result |
| } |
| |
| define amdgpu_cs half @v_s_exp_f16(half inreg %src) { |
| ; GFX12-LABEL: v_s_exp_f16: |
| ; GFX12: ; %bb.0: |
| ; GFX12-NEXT: v_s_exp_f16 s0, s0 |
| ; GFX12-NEXT: s_delay_alu instid0(TRANS32_DEP_1) |
| ; GFX12-NEXT: v_mov_b32_e32 v0, s0 |
| ; GFX12-NEXT: ; return to shader part epilog |
| %result = call half @llvm.exp2.f16(half %src) |
| ret half %result |
| } |
| |
| define amdgpu_cs float @v_s_amdgcn_exp_f32(float inreg %src) { |
| ; GFX12-LABEL: v_s_amdgcn_exp_f32: |
| ; GFX12: ; %bb.0: |
| ; GFX12-NEXT: v_s_exp_f32 s0, s0 |
| ; GFX12-NEXT: s_delay_alu instid0(TRANS32_DEP_1) |
| ; GFX12-NEXT: v_mov_b32_e32 v0, s0 |
| ; GFX12-NEXT: ; return to shader part epilog |
| %result = call float @llvm.amdgcn.exp2.f32(float %src) |
| ret float %result |
| } |
| |
| define amdgpu_cs half @v_s_amdgcn_exp_f16(half inreg %src) { |
| ; GFX12-LABEL: v_s_amdgcn_exp_f16: |
| ; GFX12: ; %bb.0: |
| ; GFX12-NEXT: v_s_exp_f16 s0, s0 |
| ; GFX12-NEXT: s_delay_alu instid0(TRANS32_DEP_1) |
| ; GFX12-NEXT: v_mov_b32_e32 v0, s0 |
| ; GFX12-NEXT: ; return to shader part epilog |
| %result = call half @llvm.amdgcn.exp2.f16(half %src) |
| ret half %result |
| } |
| |
| define amdgpu_cs float @v_s_log_f32(float inreg %src) { |
| ; GFX12-LABEL: v_s_log_f32: |
| ; GFX12: ; %bb.0: |
| ; GFX12-NEXT: s_cmp_lt_f32 s0, 0x800000 |
| ; GFX12-NEXT: s_cselect_b32 s1, 0x4f800000, 1.0 |
| ; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_2) |
| ; GFX12-NEXT: s_mul_f32 s0, s0, s1 |
| ; GFX12-NEXT: s_cselect_b32 s1, 0x42000000, 0 |
| ; GFX12-NEXT: v_s_log_f32 s0, s0 |
| ; GFX12-NEXT: s_wait_alu 0xfffe |
| ; GFX12-NEXT: s_delay_alu instid0(TRANS32_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_2) |
| ; GFX12-NEXT: s_sub_f32 s0, s0, s1 |
| ; GFX12-NEXT: s_wait_alu 0xfffe |
| ; GFX12-NEXT: v_mov_b32_e32 v0, s0 |
| ; GFX12-NEXT: ; return to shader part epilog |
| %result = call float @llvm.log2.f32(float %src) |
| ret float %result |
| } |
| |
| define amdgpu_cs half @v_s_log_f16(half inreg %src) { |
| ; GFX12-LABEL: v_s_log_f16: |
| ; GFX12: ; %bb.0: |
| ; GFX12-NEXT: v_s_log_f16 s0, s0 |
| ; GFX12-NEXT: s_delay_alu instid0(TRANS32_DEP_1) |
| ; GFX12-NEXT: v_mov_b32_e32 v0, s0 |
| ; GFX12-NEXT: ; return to shader part epilog |
| %result = call half @llvm.log2.f16(half %src) |
| ret half %result |
| } |
| |
| define amdgpu_cs float @v_s_amdgcn_log_f32(float inreg %src) { |
| ; GFX12-LABEL: v_s_amdgcn_log_f32: |
| ; GFX12: ; %bb.0: |
| ; GFX12-NEXT: v_s_log_f32 s0, s0 |
| ; GFX12-NEXT: s_delay_alu instid0(TRANS32_DEP_1) |
| ; GFX12-NEXT: v_mov_b32_e32 v0, s0 |
| ; GFX12-NEXT: ; return to shader part epilog |
| %result = call float @llvm.amdgcn.log.f32(float %src) |
| ret float %result |
| } |
| |
| define amdgpu_cs half @v_s_amdgcn_log_f16(half inreg %src) { |
| ; GFX12-LABEL: v_s_amdgcn_log_f16: |
| ; GFX12: ; %bb.0: |
| ; GFX12-NEXT: v_s_log_f16 s0, s0 |
| ; GFX12-NEXT: s_delay_alu instid0(TRANS32_DEP_1) |
| ; GFX12-NEXT: v_mov_b32_e32 v0, s0 |
| ; GFX12-NEXT: ; return to shader part epilog |
| %result = call half @llvm.amdgcn.log.f16(half %src) |
| ret half %result |
| } |
| |
| define amdgpu_cs float @v_s_rcp_f32(float inreg %src) { |
| ; GFX12-LABEL: v_s_rcp_f32: |
| ; GFX12: ; %bb.0: |
| ; GFX12-NEXT: v_s_rcp_f32 s0, s0 |
| ; GFX12-NEXT: s_delay_alu instid0(TRANS32_DEP_1) |
| ; GFX12-NEXT: v_mov_b32_e32 v0, s0 |
| ; GFX12-NEXT: ; return to shader part epilog |
| %result = call fast float @llvm.amdgcn.rcp.f32(float %src) |
| ret float %result |
| } |
| |
| define amdgpu_cs half @v_s_rcp_f16(half inreg %src) { |
| ; GFX12-LABEL: v_s_rcp_f16: |
| ; GFX12: ; %bb.0: |
| ; GFX12-NEXT: v_s_rcp_f16 s0, s0 |
| ; GFX12-NEXT: s_delay_alu instid0(TRANS32_DEP_1) |
| ; GFX12-NEXT: v_mov_b32_e32 v0, s0 |
| ; GFX12-NEXT: ; return to shader part epilog |
| %result = call fast half @llvm.amdgcn.rcp.f16(half %src) |
| ret half %result |
| } |
| |
| ; TODO-GFX12: GlobalISel should generate v_s_rsq. |
| define amdgpu_cs float @v_s_rsq_f32(float inreg %src) { |
| ; GFX12-SDAG-LABEL: v_s_rsq_f32: |
| ; GFX12-SDAG: ; %bb.0: |
| ; GFX12-SDAG-NEXT: v_s_rsq_f32 s0, s0 |
| ; GFX12-SDAG-NEXT: s_delay_alu instid0(TRANS32_DEP_1) |
| ; GFX12-SDAG-NEXT: v_mov_b32_e32 v0, s0 |
| ; GFX12-SDAG-NEXT: ; return to shader part epilog |
| ; |
| ; GFX12-GISEL-LABEL: v_s_rsq_f32: |
| ; GFX12-GISEL: ; %bb.0: |
| ; GFX12-GISEL-NEXT: v_s_sqrt_f32 s0, s0 |
| ; GFX12-GISEL-NEXT: s_delay_alu instid0(TRANS32_DEP_1) | instskip(NEXT) | instid1(TRANS32_DEP_1) |
| ; GFX12-GISEL-NEXT: v_s_rcp_f32 s0, s0 |
| ; GFX12-GISEL-NEXT: v_mov_b32_e32 v0, s0 |
| ; GFX12-GISEL-NEXT: ; return to shader part epilog |
| %sqrt = call fast float @llvm.sqrt.f32(float %src) |
| %fdiv = fdiv fast float 1.0, %sqrt |
| ret float %fdiv |
| } |
| |
| define amdgpu_cs half @v_s_rsq_f16(half inreg %src) { |
| ; GFX12-LABEL: v_s_rsq_f16: |
| ; GFX12: ; %bb.0: |
| ; GFX12-NEXT: v_s_rsq_f16 s0, s0 |
| ; GFX12-NEXT: s_delay_alu instid0(TRANS32_DEP_1) |
| ; GFX12-NEXT: v_mov_b32_e32 v0, s0 |
| ; GFX12-NEXT: ; return to shader part epilog |
| %sqrt = call fast half @llvm.sqrt.f16(half %src) |
| %result = fdiv fast half 1.0, %sqrt |
| ret half %result |
| } |
| |
| ; TODO-GFX12: Should not use any VALU instructions. |
| define amdgpu_cs float @v_s_sqrt_f32(float inreg %src) { |
| ; GFX12-SDAG-LABEL: v_s_sqrt_f32: |
| ; GFX12-SDAG: ; %bb.0: |
| ; GFX12-SDAG-NEXT: s_mul_f32 s1, s0, 0x4f800000 |
| ; GFX12-SDAG-NEXT: s_cmp_lt_f32 s0, 0xf800000 |
| ; GFX12-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_2) | instskip(NEXT) | instid1(SALU_CYCLE_1) |
| ; GFX12-SDAG-NEXT: s_cselect_b32 s1, s1, s0 |
| ; GFX12-SDAG-NEXT: v_s_sqrt_f32 s2, s1 |
| ; GFX12-SDAG-NEXT: s_wait_alu 0xfffe |
| ; GFX12-SDAG-NEXT: s_mov_b32 s4, s1 |
| ; GFX12-SDAG-NEXT: s_delay_alu instid0(TRANS32_DEP_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) |
| ; GFX12-SDAG-NEXT: s_add_co_i32 s3, s2, -1 |
| ; GFX12-SDAG-NEXT: s_xor_b32 s5, s3, 0x80000000 |
| ; GFX12-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_2) |
| ; GFX12-SDAG-NEXT: s_fmac_f32 s4, s5, s2 |
| ; GFX12-SDAG-NEXT: s_mov_b32 s5, s1 |
| ; GFX12-SDAG-NEXT: s_cmp_le_f32 s4, 0 |
| ; GFX12-SDAG-NEXT: s_cselect_b32 s3, s3, s2 |
| ; GFX12-SDAG-NEXT: s_add_co_i32 s4, s2, 1 |
| ; GFX12-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) |
| ; GFX12-SDAG-NEXT: s_xor_b32 s6, s4, 0x80000000 |
| ; GFX12-SDAG-NEXT: s_fmac_f32 s5, s6, s2 |
| ; GFX12-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_3) |
| ; GFX12-SDAG-NEXT: s_cmp_gt_f32 s5, 0 |
| ; GFX12-SDAG-NEXT: s_cselect_b32 s2, s4, s3 |
| ; GFX12-SDAG-NEXT: s_cmp_lt_f32 s0, 0xf800000 |
| ; GFX12-SDAG-NEXT: s_mul_f32 s0, s2, 0x37800000 |
| ; GFX12-SDAG-NEXT: v_cmp_class_f32_e64 s3, s1, 0x260 |
| ; GFX12-SDAG-NEXT: s_wait_alu 0xfffe |
| ; GFX12-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1) |
| ; GFX12-SDAG-NEXT: s_cselect_b32 s0, s0, s2 |
| ; GFX12-SDAG-NEXT: s_and_b32 s2, s3, exec_lo |
| ; GFX12-SDAG-NEXT: s_wait_alu 0xfffe |
| ; GFX12-SDAG-NEXT: s_cselect_b32 s0, s1, s0 |
| ; GFX12-SDAG-NEXT: s_wait_alu 0xfffe |
| ; GFX12-SDAG-NEXT: v_mov_b32_e32 v0, s0 |
| ; GFX12-SDAG-NEXT: ; return to shader part epilog |
| ; |
| ; GFX12-GISEL-LABEL: v_s_sqrt_f32: |
| ; GFX12-GISEL: ; %bb.0: |
| ; GFX12-GISEL-NEXT: s_cmp_lt_f32 s0, 0xf800000 |
| ; GFX12-GISEL-NEXT: s_mul_f32 s2, s0, 0x4f800000 |
| ; GFX12-GISEL-NEXT: s_cselect_b32 s1, 1, 0 |
| ; GFX12-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_2) | instskip(NEXT) | instid1(SALU_CYCLE_1) |
| ; GFX12-GISEL-NEXT: s_cselect_b32 s0, s2, s0 |
| ; GFX12-GISEL-NEXT: v_s_sqrt_f32 s2, s0 |
| ; GFX12-GISEL-NEXT: s_wait_alu 0xfffe |
| ; GFX12-GISEL-NEXT: s_mov_b32 s4, s0 |
| ; GFX12-GISEL-NEXT: s_mov_b32 s6, s0 |
| ; GFX12-GISEL-NEXT: s_delay_alu instid0(TRANS32_DEP_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) |
| ; GFX12-GISEL-NEXT: s_add_co_i32 s3, s2, -1 |
| ; GFX12-GISEL-NEXT: s_xor_b32 s5, s3, 0x80000000 |
| ; GFX12-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1) |
| ; GFX12-GISEL-NEXT: s_fmac_f32 s4, s5, s2 |
| ; GFX12-GISEL-NEXT: s_add_co_i32 s5, s2, 1 |
| ; GFX12-GISEL-NEXT: s_xor_b32 s7, s5, 0x80000000 |
| ; GFX12-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_2) | instid1(SALU_CYCLE_2) |
| ; GFX12-GISEL-NEXT: s_cmp_le_f32 s4, 0 |
| ; GFX12-GISEL-NEXT: s_fmac_f32 s6, s7, s2 |
| ; GFX12-GISEL-NEXT: s_cselect_b32 s2, s3, s2 |
| ; GFX12-GISEL-NEXT: s_cmp_gt_f32 s6, 0 |
| ; GFX12-GISEL-NEXT: s_cselect_b32 s2, s5, s2 |
| ; GFX12-GISEL-NEXT: s_cmp_lg_u32 s1, 0 |
| ; GFX12-GISEL-NEXT: s_mul_f32 s3, s2, 0x37800000 |
| ; GFX12-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_3) | instskip(SKIP_3) | instid1(VALU_DEP_1) |
| ; GFX12-GISEL-NEXT: s_cselect_b32 s1, s3, s2 |
| ; GFX12-GISEL-NEXT: s_wait_alu 0xfffe |
| ; GFX12-GISEL-NEXT: v_mov_b32_e32 v0, s1 |
| ; GFX12-GISEL-NEXT: v_cmp_class_f32_e64 s1, s0, 0x260 |
| ; GFX12-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, s0, s1 |
| ; GFX12-GISEL-NEXT: ; return to shader part epilog |
| %result = call float @llvm.sqrt.f32(float %src) |
| ret float %result |
| } |
| |
| define amdgpu_cs half @v_s_sqrt_f16(half inreg %src) { |
| ; GFX12-LABEL: v_s_sqrt_f16: |
| ; GFX12: ; %bb.0: |
| ; GFX12-NEXT: v_s_sqrt_f16 s0, s0 |
| ; GFX12-NEXT: s_delay_alu instid0(TRANS32_DEP_1) |
| ; GFX12-NEXT: v_mov_b32_e32 v0, s0 |
| ; GFX12-NEXT: ; return to shader part epilog |
| %result = call half @llvm.sqrt.f16(half %src) |
| ret half %result |
| } |
| |
| define amdgpu_cs float @v_amdgcn_sqrt_f32(float inreg %src) { |
| ; GFX12-LABEL: v_amdgcn_sqrt_f32: |
| ; GFX12: ; %bb.0: |
| ; GFX12-NEXT: v_s_sqrt_f32 s0, s0 |
| ; GFX12-NEXT: s_delay_alu instid0(TRANS32_DEP_1) |
| ; GFX12-NEXT: v_mov_b32_e32 v0, s0 |
| ; GFX12-NEXT: ; return to shader part epilog |
| %result = call float @llvm.amdgcn.sqrt.f32(float %src) |
| ret float %result |
| } |
| |
| define amdgpu_cs half @v_amdgcn_sqrt_f16(half inreg %src) { |
| ; GFX12-LABEL: v_amdgcn_sqrt_f16: |
| ; GFX12: ; %bb.0: |
| ; GFX12-NEXT: v_s_sqrt_f16 s0, s0 |
| ; GFX12-NEXT: s_delay_alu instid0(TRANS32_DEP_1) |
| ; GFX12-NEXT: v_mov_b32_e32 v0, s0 |
| ; GFX12-NEXT: ; return to shader part epilog |
| %result = call half @llvm.amdgcn.sqrt.f16(half %src) |
| ret half %result |
| } |
| |
| define amdgpu_cs float @srcmods_abs_f32(float inreg %src) { |
| ; GFX12-LABEL: srcmods_abs_f32: |
| ; GFX12: ; %bb.0: |
| ; GFX12-NEXT: s_bitset0_b32 s0, 31 |
| ; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1) |
| ; GFX12-NEXT: s_cmp_lt_f32 s0, 0x800000 |
| ; GFX12-NEXT: s_cselect_b32 s1, 0x4f800000, 1.0 |
| ; GFX12-NEXT: s_mul_f32 s0, s0, s1 |
| ; GFX12-NEXT: s_cselect_b32 s1, 0x42000000, 0 |
| ; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_2) | instskip(SKIP_1) | instid1(TRANS32_DEP_1) |
| ; GFX12-NEXT: v_s_log_f32 s0, s0 |
| ; GFX12-NEXT: s_wait_alu 0xfffe |
| ; GFX12-NEXT: s_sub_f32 s0, s0, s1 |
| ; GFX12-NEXT: s_wait_alu 0xfffe |
| ; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_2) |
| ; GFX12-NEXT: v_mov_b32_e32 v0, s0 |
| ; GFX12-NEXT: ; return to shader part epilog |
| %abs = call float @llvm.fabs.f32(float %src) |
| %result = call float @llvm.log2.f32(float %abs) |
| ret float %result |
| } |
| |
| define amdgpu_cs float @srcmods_neg_f32(float inreg %src) { |
| ; GFX12-SDAG-LABEL: srcmods_neg_f32: |
| ; GFX12-SDAG: ; %bb.0: |
| ; GFX12-SDAG-NEXT: s_xor_b32 s1, s0, 0x80000000 |
| ; GFX12-SDAG-NEXT: s_cmp_gt_f32 s0, 0x80800000 |
| ; GFX12-SDAG-NEXT: s_cselect_b32 s0, 0x4f800000, 1.0 |
| ; GFX12-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_2) |
| ; GFX12-SDAG-NEXT: s_mul_f32 s0, s1, s0 |
| ; GFX12-SDAG-NEXT: s_cselect_b32 s1, 0x42000000, 0 |
| ; GFX12-SDAG-NEXT: v_s_log_f32 s0, s0 |
| ; GFX12-SDAG-NEXT: s_wait_alu 0xfffe |
| ; GFX12-SDAG-NEXT: s_delay_alu instid0(TRANS32_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_2) |
| ; GFX12-SDAG-NEXT: s_sub_f32 s0, s0, s1 |
| ; GFX12-SDAG-NEXT: s_wait_alu 0xfffe |
| ; GFX12-SDAG-NEXT: v_mov_b32_e32 v0, s0 |
| ; GFX12-SDAG-NEXT: ; return to shader part epilog |
| ; |
| ; GFX12-GISEL-LABEL: srcmods_neg_f32: |
| ; GFX12-GISEL: ; %bb.0: |
| ; GFX12-GISEL-NEXT: s_xor_b32 s0, s0, 0x80000000 |
| ; GFX12-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1) |
| ; GFX12-GISEL-NEXT: s_cmp_lt_f32 s0, 0x800000 |
| ; GFX12-GISEL-NEXT: s_cselect_b32 s1, 0x4f800000, 1.0 |
| ; GFX12-GISEL-NEXT: s_mul_f32 s0, s0, s1 |
| ; GFX12-GISEL-NEXT: s_cselect_b32 s1, 0x42000000, 0 |
| ; GFX12-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_2) | instskip(SKIP_1) | instid1(TRANS32_DEP_1) |
| ; GFX12-GISEL-NEXT: v_s_log_f32 s0, s0 |
| ; GFX12-GISEL-NEXT: s_wait_alu 0xfffe |
| ; GFX12-GISEL-NEXT: s_sub_f32 s0, s0, s1 |
| ; GFX12-GISEL-NEXT: s_wait_alu 0xfffe |
| ; GFX12-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_2) |
| ; GFX12-GISEL-NEXT: v_mov_b32_e32 v0, s0 |
| ; GFX12-GISEL-NEXT: ; return to shader part epilog |
| %neg = fneg float %src |
| %result = call float @llvm.log2.f32(float %neg) |
| ret float %result |
| } |
| |
| define amdgpu_cs half @srcmods_abs_f16(half inreg %src) { |
| ; GFX12-LABEL: srcmods_abs_f16: |
| ; GFX12: ; %bb.0: |
| ; GFX12-NEXT: v_s_log_f16 s0, |s0| |
| ; GFX12-NEXT: s_delay_alu instid0(TRANS32_DEP_1) |
| ; GFX12-NEXT: v_mov_b32_e32 v0, s0 |
| ; GFX12-NEXT: ; return to shader part epilog |
| %abs = call half @llvm.fabs.f16(half %src) |
| %result = call half @llvm.log2.f16(half %abs) |
| ret half %result |
| } |
| |
| define amdgpu_cs half @srcmods_neg_f16(half inreg %src) { |
| ; GFX12-LABEL: srcmods_neg_f16: |
| ; GFX12: ; %bb.0: |
| ; GFX12-NEXT: v_s_log_f16 s0, -s0 |
| ; GFX12-NEXT: s_delay_alu instid0(TRANS32_DEP_1) |
| ; GFX12-NEXT: v_mov_b32_e32 v0, s0 |
| ; GFX12-NEXT: ; return to shader part epilog |
| %neg = fneg half %src |
| %result = call half @llvm.log2.f16(half %neg) |
| ret half %result |
| } |
| |
| declare half @llvm.exp2.f16(half) |
| declare float @llvm.exp2.f32(float) |
| declare half @llvm.amdgcn.exp2.f16(half) |
| declare float @llvm.amdgcn.exp2.f32(float) |
| declare half @llvm.log2.f16(half) |
| declare float @llvm.log2.f32(float) |
| declare half @llvm.amdgcn.log.f16(half) |
| declare float @llvm.amdgcn.log.f32(float) |
| declare half @llvm.amdgcn.rcp.f16(half) |
| declare float @llvm.amdgcn.rcp.f32(float) |
| declare half @llvm.sqrt.f16(half) |
| declare float @llvm.sqrt.f32(float) |
| declare half @llvm.amdgcn.sqrt.f16(half) |
| declare float @llvm.amdgcn.sqrt.f32(float) |
| declare half @llvm.fabs.f16(half) |
| declare float @llvm.fabs.f32(float) |