| ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py |
| ; RUN: llc -global-isel -mtriple=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s |
| ; RUN: llc -global-isel -mtriple=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=VI %s |
| ; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX12 %s |
| |
| define float @v_rsq_clamp_f32(float %src) #0 { |
| ; SI-LABEL: v_rsq_clamp_f32: |
| ; SI: ; %bb.0: |
| ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; SI-NEXT: v_rsq_clamp_f32_e32 v0, v0 |
| ; SI-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; VI-LABEL: v_rsq_clamp_f32: |
| ; VI: ; %bb.0: |
| ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; VI-NEXT: v_rsq_f32_e32 v0, v0 |
| ; VI-NEXT: v_min_f32_e32 v0, 0x7f7fffff, v0 |
| ; VI-NEXT: v_max_f32_e32 v0, 0xff7fffff, v0 |
| ; VI-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX12-LABEL: v_rsq_clamp_f32: |
| ; GFX12: ; %bb.0: |
| ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX12-NEXT: s_wait_expcnt 0x0 |
| ; GFX12-NEXT: s_wait_samplecnt 0x0 |
| ; GFX12-NEXT: s_wait_bvhcnt 0x0 |
| ; GFX12-NEXT: s_wait_kmcnt 0x0 |
| ; GFX12-NEXT: v_rsq_f32_e32 v0, v0 |
| ; GFX12-NEXT: v_mov_b32_e32 v1, 0xff7fffff |
| ; GFX12-NEXT: s_delay_alu instid0(TRANS32_DEP_1) | instid1(VALU_DEP_1) |
| ; GFX12-NEXT: v_minmax_num_f32 v0, v0, 0x7f7fffff, v1 |
| ; GFX12-NEXT: s_setpc_b64 s[30:31] |
| %rsq_clamp = call float @llvm.amdgcn.rsq.clamp.f32(float %src) |
| ret float %rsq_clamp |
| } |
| |
| define float @v_rsq_clamp_fabs_f32(float %src) #0 { |
| ; SI-LABEL: v_rsq_clamp_fabs_f32: |
| ; SI: ; %bb.0: |
| ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; SI-NEXT: v_rsq_clamp_f32_e64 v0, |v0| |
| ; SI-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; VI-LABEL: v_rsq_clamp_fabs_f32: |
| ; VI: ; %bb.0: |
| ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; VI-NEXT: v_rsq_f32_e64 v0, |v0| |
| ; VI-NEXT: v_min_f32_e32 v0, 0x7f7fffff, v0 |
| ; VI-NEXT: v_max_f32_e32 v0, 0xff7fffff, v0 |
| ; VI-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX12-LABEL: v_rsq_clamp_fabs_f32: |
| ; GFX12: ; %bb.0: |
| ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX12-NEXT: s_wait_expcnt 0x0 |
| ; GFX12-NEXT: s_wait_samplecnt 0x0 |
| ; GFX12-NEXT: s_wait_bvhcnt 0x0 |
| ; GFX12-NEXT: s_wait_kmcnt 0x0 |
| ; GFX12-NEXT: v_rsq_f32_e64 v0, |v0| |
| ; GFX12-NEXT: v_mov_b32_e32 v1, 0xff7fffff |
| ; GFX12-NEXT: s_delay_alu instid0(TRANS32_DEP_1) | instid1(VALU_DEP_1) |
| ; GFX12-NEXT: v_minmax_num_f32 v0, v0, 0x7f7fffff, v1 |
| ; GFX12-NEXT: s_setpc_b64 s[30:31] |
| %fabs.src = call float @llvm.fabs.f32(float %src) |
| %rsq_clamp = call float @llvm.amdgcn.rsq.clamp.f32(float %fabs.src) |
| ret float %rsq_clamp |
| } |
| |
| define double @v_rsq_clamp_f64(double %src) #0 { |
| ; SI-LABEL: v_rsq_clamp_f64: |
| ; SI: ; %bb.0: |
| ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; SI-NEXT: v_rsq_clamp_f64_e32 v[0:1], v[0:1] |
| ; SI-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; VI-LABEL: v_rsq_clamp_f64: |
| ; VI: ; %bb.0: |
| ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; VI-NEXT: v_rsq_f64_e32 v[0:1], v[0:1] |
| ; VI-NEXT: v_mov_b32_e32 v2, -1 |
| ; VI-NEXT: v_mov_b32_e32 v3, 0x7fefffff |
| ; VI-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3] |
| ; VI-NEXT: v_mov_b32_e32 v2, -1 |
| ; VI-NEXT: v_mov_b32_e32 v3, 0xffefffff |
| ; VI-NEXT: v_max_f64 v[0:1], v[0:1], v[2:3] |
| ; VI-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX12-LABEL: v_rsq_clamp_f64: |
| ; GFX12: ; %bb.0: |
| ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX12-NEXT: s_wait_expcnt 0x0 |
| ; GFX12-NEXT: s_wait_samplecnt 0x0 |
| ; GFX12-NEXT: s_wait_bvhcnt 0x0 |
| ; GFX12-NEXT: s_wait_kmcnt 0x0 |
| ; GFX12-NEXT: v_rsq_f64_e32 v[0:1], v[0:1] |
| ; GFX12-NEXT: v_mov_b32_e32 v2, -1 |
| ; GFX12-NEXT: v_mov_b32_e32 v3, 0x7fefffff |
| ; GFX12-NEXT: s_delay_alu instid0(TRANS32_DEP_1) | instid1(VALU_DEP_1) |
| ; GFX12-NEXT: v_min_num_f64_e32 v[0:1], v[0:1], v[2:3] |
| ; GFX12-NEXT: v_mov_b32_e32 v2, -1 |
| ; GFX12-NEXT: v_mov_b32_e32 v3, 0xffefffff |
| ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX12-NEXT: v_max_num_f64_e32 v[0:1], v[0:1], v[2:3] |
| ; GFX12-NEXT: s_setpc_b64 s[30:31] |
| %rsq_clamp = call double @llvm.amdgcn.rsq.clamp.f64(double %src) |
| ret double %rsq_clamp |
| } |
| |
| define double @v_rsq_clamp_fabs_f64(double %src) #0 { |
| ; SI-LABEL: v_rsq_clamp_fabs_f64: |
| ; SI: ; %bb.0: |
| ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; SI-NEXT: v_rsq_clamp_f64_e64 v[0:1], |v[0:1]| |
| ; SI-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; VI-LABEL: v_rsq_clamp_fabs_f64: |
| ; VI: ; %bb.0: |
| ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; VI-NEXT: v_rsq_f64_e64 v[0:1], |v[0:1]| |
| ; VI-NEXT: v_mov_b32_e32 v2, -1 |
| ; VI-NEXT: v_mov_b32_e32 v3, 0x7fefffff |
| ; VI-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3] |
| ; VI-NEXT: v_mov_b32_e32 v2, -1 |
| ; VI-NEXT: v_mov_b32_e32 v3, 0xffefffff |
| ; VI-NEXT: v_max_f64 v[0:1], v[0:1], v[2:3] |
| ; VI-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX12-LABEL: v_rsq_clamp_fabs_f64: |
| ; GFX12: ; %bb.0: |
| ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX12-NEXT: s_wait_expcnt 0x0 |
| ; GFX12-NEXT: s_wait_samplecnt 0x0 |
| ; GFX12-NEXT: s_wait_bvhcnt 0x0 |
| ; GFX12-NEXT: s_wait_kmcnt 0x0 |
| ; GFX12-NEXT: v_rsq_f64_e64 v[0:1], |v[0:1]| |
| ; GFX12-NEXT: v_mov_b32_e32 v2, -1 |
| ; GFX12-NEXT: v_mov_b32_e32 v3, 0x7fefffff |
| ; GFX12-NEXT: s_delay_alu instid0(TRANS32_DEP_1) | instid1(VALU_DEP_1) |
| ; GFX12-NEXT: v_min_num_f64_e32 v[0:1], v[0:1], v[2:3] |
| ; GFX12-NEXT: v_mov_b32_e32 v2, -1 |
| ; GFX12-NEXT: v_mov_b32_e32 v3, 0xffefffff |
| ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX12-NEXT: v_max_num_f64_e32 v[0:1], v[0:1], v[2:3] |
| ; GFX12-NEXT: s_setpc_b64 s[30:31] |
| %fabs.src = call double @llvm.fabs.f64(double %src) |
| %rsq_clamp = call double @llvm.amdgcn.rsq.clamp.f64(double %fabs.src) |
| ret double %rsq_clamp |
| } |
| |
| define float @v_rsq_clamp_undef_f32() #0 { |
| ; SI-LABEL: v_rsq_clamp_undef_f32: |
| ; SI: ; %bb.0: |
| ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; SI-NEXT: v_rsq_clamp_f32_e32 v0, s4 |
| ; SI-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; VI-LABEL: v_rsq_clamp_undef_f32: |
| ; VI: ; %bb.0: |
| ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; VI-NEXT: v_rsq_f32_e32 v0, s4 |
| ; VI-NEXT: v_min_f32_e32 v0, 0x7f7fffff, v0 |
| ; VI-NEXT: v_max_f32_e32 v0, 0xff7fffff, v0 |
| ; VI-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX12-LABEL: v_rsq_clamp_undef_f32: |
| ; GFX12: ; %bb.0: |
| ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX12-NEXT: s_wait_expcnt 0x0 |
| ; GFX12-NEXT: s_wait_samplecnt 0x0 |
| ; GFX12-NEXT: s_wait_bvhcnt 0x0 |
| ; GFX12-NEXT: s_wait_kmcnt 0x0 |
| ; GFX12-NEXT: v_s_rsq_f32 s0, s0 |
| ; GFX12-NEXT: v_mov_b32_e32 v0, 0xff7fffff |
| ; GFX12-NEXT: s_delay_alu instid0(TRANS32_DEP_1) | instid1(VALU_DEP_1) |
| ; GFX12-NEXT: v_minmax_num_f32 v0, s0, 0x7f7fffff, v0 |
| ; GFX12-NEXT: s_setpc_b64 s[30:31] |
| %rsq_clamp = call float @llvm.amdgcn.rsq.clamp.f32(float undef) |
| ret float %rsq_clamp |
| } |
| |
| define double @v_rsq_clamp_undef_f64() #0 { |
| ; SI-LABEL: v_rsq_clamp_undef_f64: |
| ; SI: ; %bb.0: |
| ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; SI-NEXT: v_rsq_clamp_f64_e32 v[0:1], s[4:5] |
| ; SI-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; VI-LABEL: v_rsq_clamp_undef_f64: |
| ; VI: ; %bb.0: |
| ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; VI-NEXT: v_rsq_f64_e32 v[0:1], s[4:5] |
| ; VI-NEXT: v_mov_b32_e32 v2, -1 |
| ; VI-NEXT: v_mov_b32_e32 v3, 0x7fefffff |
| ; VI-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3] |
| ; VI-NEXT: v_mov_b32_e32 v2, -1 |
| ; VI-NEXT: v_mov_b32_e32 v3, 0xffefffff |
| ; VI-NEXT: v_max_f64 v[0:1], v[0:1], v[2:3] |
| ; VI-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX12-LABEL: v_rsq_clamp_undef_f64: |
| ; GFX12: ; %bb.0: |
| ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX12-NEXT: s_wait_expcnt 0x0 |
| ; GFX12-NEXT: s_wait_samplecnt 0x0 |
| ; GFX12-NEXT: s_wait_bvhcnt 0x0 |
| ; GFX12-NEXT: s_wait_kmcnt 0x0 |
| ; GFX12-NEXT: v_rsq_f64_e32 v[0:1], s[0:1] |
| ; GFX12-NEXT: v_mov_b32_e32 v2, -1 |
| ; GFX12-NEXT: v_mov_b32_e32 v3, 0x7fefffff |
| ; GFX12-NEXT: s_delay_alu instid0(TRANS32_DEP_1) | instid1(VALU_DEP_1) |
| ; GFX12-NEXT: v_min_num_f64_e32 v[0:1], v[0:1], v[2:3] |
| ; GFX12-NEXT: v_mov_b32_e32 v2, -1 |
| ; GFX12-NEXT: v_mov_b32_e32 v3, 0xffefffff |
| ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX12-NEXT: v_max_num_f64_e32 v[0:1], v[0:1], v[2:3] |
| ; GFX12-NEXT: s_setpc_b64 s[30:31] |
| %rsq_clamp = call double @llvm.amdgcn.rsq.clamp.f64(double undef) |
| ret double %rsq_clamp |
| } |
| |
| define float @v_rsq_clamp_f32_non_ieee(float %src) #2 { |
| ; SI-LABEL: v_rsq_clamp_f32_non_ieee: |
| ; SI: ; %bb.0: |
| ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; SI-NEXT: v_rsq_clamp_f32_e32 v0, v0 |
| ; SI-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; VI-LABEL: v_rsq_clamp_f32_non_ieee: |
| ; VI: ; %bb.0: |
| ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; VI-NEXT: v_rsq_f32_e32 v0, v0 |
| ; VI-NEXT: v_min_f32_e32 v0, 0x7f7fffff, v0 |
| ; VI-NEXT: v_max_f32_e32 v0, 0xff7fffff, v0 |
| ; VI-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX12-LABEL: v_rsq_clamp_f32_non_ieee: |
| ; GFX12: ; %bb.0: |
| ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX12-NEXT: s_wait_expcnt 0x0 |
| ; GFX12-NEXT: s_wait_samplecnt 0x0 |
| ; GFX12-NEXT: s_wait_bvhcnt 0x0 |
| ; GFX12-NEXT: s_wait_kmcnt 0x0 |
| ; GFX12-NEXT: v_rsq_f32_e32 v0, v0 |
| ; GFX12-NEXT: v_mov_b32_e32 v1, 0xff7fffff |
| ; GFX12-NEXT: s_delay_alu instid0(TRANS32_DEP_1) | instid1(VALU_DEP_1) |
| ; GFX12-NEXT: v_minmax_num_f32 v0, v0, 0x7f7fffff, v1 |
| ; GFX12-NEXT: s_setpc_b64 s[30:31] |
| %rsq_clamp = call float @llvm.amdgcn.rsq.clamp.f32(float %src) |
| ret float %rsq_clamp |
| } |
| |
| define double @v_rsq_clamp_f64_non_ieee(double %src) #2 { |
| ; SI-LABEL: v_rsq_clamp_f64_non_ieee: |
| ; SI: ; %bb.0: |
| ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; SI-NEXT: v_rsq_clamp_f64_e32 v[0:1], v[0:1] |
| ; SI-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; VI-LABEL: v_rsq_clamp_f64_non_ieee: |
| ; VI: ; %bb.0: |
| ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; VI-NEXT: v_rsq_f64_e32 v[0:1], v[0:1] |
| ; VI-NEXT: v_mov_b32_e32 v2, -1 |
| ; VI-NEXT: v_mov_b32_e32 v3, 0x7fefffff |
| ; VI-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3] |
| ; VI-NEXT: v_mov_b32_e32 v2, -1 |
| ; VI-NEXT: v_mov_b32_e32 v3, 0xffefffff |
| ; VI-NEXT: v_max_f64 v[0:1], v[0:1], v[2:3] |
| ; VI-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX12-LABEL: v_rsq_clamp_f64_non_ieee: |
| ; GFX12: ; %bb.0: |
| ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX12-NEXT: s_wait_expcnt 0x0 |
| ; GFX12-NEXT: s_wait_samplecnt 0x0 |
| ; GFX12-NEXT: s_wait_bvhcnt 0x0 |
| ; GFX12-NEXT: s_wait_kmcnt 0x0 |
| ; GFX12-NEXT: v_rsq_f64_e32 v[0:1], v[0:1] |
| ; GFX12-NEXT: v_mov_b32_e32 v2, -1 |
| ; GFX12-NEXT: v_mov_b32_e32 v3, 0x7fefffff |
| ; GFX12-NEXT: s_delay_alu instid0(TRANS32_DEP_1) | instid1(VALU_DEP_1) |
| ; GFX12-NEXT: v_min_num_f64_e32 v[0:1], v[0:1], v[2:3] |
| ; GFX12-NEXT: v_mov_b32_e32 v2, -1 |
| ; GFX12-NEXT: v_mov_b32_e32 v3, 0xffefffff |
| ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX12-NEXT: v_max_num_f64_e32 v[0:1], v[0:1], v[2:3] |
| ; GFX12-NEXT: s_setpc_b64 s[30:31] |
| %rsq_clamp = call double @llvm.amdgcn.rsq.clamp.f64(double %src) |
| ret double %rsq_clamp |
| } |
| |
| declare float @llvm.fabs.f32(float) #1 |
| declare float @llvm.amdgcn.rsq.clamp.f32(float) #1 |
| declare double @llvm.fabs.f64(double) #1 |
| declare double @llvm.amdgcn.rsq.clamp.f64(double) #1 |
| |
| attributes #0 = { nounwind } |
| attributes #1 = { nounwind readnone } |
| attributes #2 = { nounwind "amdgpu-ieee"="false" } |