| ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py |
| ; RUN: llc -global-isel -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s |
| ; RUN: llc -global-isel -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=VI %s |
| |
| define float @v_rsq_clamp_f32(float %src) #0 { |
| ; SI-LABEL: v_rsq_clamp_f32: |
| ; SI: ; %bb.0: |
| ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; SI-NEXT: v_rsq_clamp_f32_e32 v0, v0 |
| ; SI-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; VI-LABEL: v_rsq_clamp_f32: |
| ; VI: ; %bb.0: |
| ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; VI-NEXT: v_rsq_f32_e32 v0, v0 |
| ; VI-NEXT: v_min_f32_e32 v0, 0x7f7fffff, v0 |
| ; VI-NEXT: v_max_f32_e32 v0, 0xff7fffff, v0 |
| ; VI-NEXT: s_setpc_b64 s[30:31] |
| %rsq_clamp = call float @llvm.amdgcn.rsq.clamp.f32(float %src) |
| ret float %rsq_clamp |
| } |
| |
| define float @v_rsq_clamp_fabs_f32(float %src) #0 { |
| ; SI-LABEL: v_rsq_clamp_fabs_f32: |
| ; SI: ; %bb.0: |
| ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; SI-NEXT: v_rsq_clamp_f32_e64 v0, |v0| |
| ; SI-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; VI-LABEL: v_rsq_clamp_fabs_f32: |
| ; VI: ; %bb.0: |
| ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; VI-NEXT: v_rsq_f32_e64 v0, |v0| |
| ; VI-NEXT: v_min_f32_e32 v0, 0x7f7fffff, v0 |
| ; VI-NEXT: v_max_f32_e32 v0, 0xff7fffff, v0 |
| ; VI-NEXT: s_setpc_b64 s[30:31] |
| %fabs.src = call float @llvm.fabs.f32(float %src) |
| %rsq_clamp = call float @llvm.amdgcn.rsq.clamp.f32(float %fabs.src) |
| ret float %rsq_clamp |
| } |
| |
| define double @v_rsq_clamp_f64(double %src) #0 { |
| ; SI-LABEL: v_rsq_clamp_f64: |
| ; SI: ; %bb.0: |
| ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; SI-NEXT: v_rsq_clamp_f64_e32 v[0:1], v[0:1] |
| ; SI-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; VI-LABEL: v_rsq_clamp_f64: |
| ; VI: ; %bb.0: |
| ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; VI-NEXT: v_rsq_f64_e32 v[0:1], v[0:1] |
| ; VI-NEXT: s_mov_b32 s4, -1 |
| ; VI-NEXT: s_mov_b32 s5, 0x7fefffff |
| ; VI-NEXT: v_min_f64 v[0:1], v[0:1], s[4:5] |
| ; VI-NEXT: s_mov_b32 s5, 0xffefffff |
| ; VI-NEXT: v_max_f64 v[0:1], v[0:1], s[4:5] |
| ; VI-NEXT: s_setpc_b64 s[30:31] |
| %rsq_clamp = call double @llvm.amdgcn.rsq.clamp.f64(double %src) |
| ret double %rsq_clamp |
| } |
| |
| define double @v_rsq_clamp_fabs_f64(double %src) #0 { |
| ; SI-LABEL: v_rsq_clamp_fabs_f64: |
| ; SI: ; %bb.0: |
| ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; SI-NEXT: v_rsq_clamp_f64_e64 v[0:1], |v[0:1]| |
| ; SI-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; VI-LABEL: v_rsq_clamp_fabs_f64: |
| ; VI: ; %bb.0: |
| ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; VI-NEXT: v_rsq_f64_e64 v[0:1], |v[0:1]| |
| ; VI-NEXT: s_mov_b32 s4, -1 |
| ; VI-NEXT: s_mov_b32 s5, 0x7fefffff |
| ; VI-NEXT: v_min_f64 v[0:1], v[0:1], s[4:5] |
| ; VI-NEXT: s_mov_b32 s5, 0xffefffff |
| ; VI-NEXT: v_max_f64 v[0:1], v[0:1], s[4:5] |
| ; VI-NEXT: s_setpc_b64 s[30:31] |
| %fabs.src = call double @llvm.fabs.f64(double %src) |
| %rsq_clamp = call double @llvm.amdgcn.rsq.clamp.f64(double %fabs.src) |
| ret double %rsq_clamp |
| } |
| |
| define float @v_rsq_clamp_undef_f32() #0 { |
| ; SI-LABEL: v_rsq_clamp_undef_f32: |
| ; SI: ; %bb.0: |
| ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; SI-NEXT: v_rsq_clamp_f32_e32 v0, s4 |
| ; SI-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; VI-LABEL: v_rsq_clamp_undef_f32: |
| ; VI: ; %bb.0: |
| ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; VI-NEXT: v_rsq_f32_e32 v0, s4 |
| ; VI-NEXT: v_min_f32_e32 v0, 0x7f7fffff, v0 |
| ; VI-NEXT: v_max_f32_e32 v0, 0xff7fffff, v0 |
| ; VI-NEXT: s_setpc_b64 s[30:31] |
| %rsq_clamp = call float @llvm.amdgcn.rsq.clamp.f32(float undef) |
| ret float %rsq_clamp |
| } |
| |
| define double @v_rsq_clamp_undef_f64() #0 { |
| ; SI-LABEL: v_rsq_clamp_undef_f64: |
| ; SI: ; %bb.0: |
| ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; SI-NEXT: v_rsq_clamp_f64_e32 v[0:1], s[4:5] |
| ; SI-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; VI-LABEL: v_rsq_clamp_undef_f64: |
| ; VI: ; %bb.0: |
| ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; VI-NEXT: v_rsq_f64_e32 v[0:1], s[4:5] |
| ; VI-NEXT: s_mov_b32 s4, -1 |
| ; VI-NEXT: s_mov_b32 s5, 0x7fefffff |
| ; VI-NEXT: v_min_f64 v[0:1], v[0:1], s[4:5] |
| ; VI-NEXT: s_mov_b32 s5, 0xffefffff |
| ; VI-NEXT: v_max_f64 v[0:1], v[0:1], s[4:5] |
| ; VI-NEXT: s_setpc_b64 s[30:31] |
| %rsq_clamp = call double @llvm.amdgcn.rsq.clamp.f64(double undef) |
| ret double %rsq_clamp |
| } |
| |
| define float @v_rsq_clamp_f32_non_ieee(float %src) #2 { |
| ; SI-LABEL: v_rsq_clamp_f32_non_ieee: |
| ; SI: ; %bb.0: |
| ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; SI-NEXT: v_rsq_clamp_f32_e32 v0, v0 |
| ; SI-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; VI-LABEL: v_rsq_clamp_f32_non_ieee: |
| ; VI: ; %bb.0: |
| ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; VI-NEXT: v_rsq_f32_e32 v0, v0 |
| ; VI-NEXT: v_min_f32_e32 v0, 0x7f7fffff, v0 |
| ; VI-NEXT: v_max_f32_e32 v0, 0xff7fffff, v0 |
| ; VI-NEXT: s_setpc_b64 s[30:31] |
| %rsq_clamp = call float @llvm.amdgcn.rsq.clamp.f32(float %src) |
| ret float %rsq_clamp |
| } |
| |
| define double @v_rsq_clamp_f64_non_ieee(double %src) #2 { |
| ; SI-LABEL: v_rsq_clamp_f64_non_ieee: |
| ; SI: ; %bb.0: |
| ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; SI-NEXT: v_rsq_clamp_f64_e32 v[0:1], v[0:1] |
| ; SI-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; VI-LABEL: v_rsq_clamp_f64_non_ieee: |
| ; VI: ; %bb.0: |
| ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; VI-NEXT: v_rsq_f64_e32 v[0:1], v[0:1] |
| ; VI-NEXT: s_mov_b32 s4, -1 |
| ; VI-NEXT: s_mov_b32 s5, 0x7fefffff |
| ; VI-NEXT: v_min_f64 v[0:1], v[0:1], s[4:5] |
| ; VI-NEXT: s_mov_b32 s5, 0xffefffff |
| ; VI-NEXT: v_max_f64 v[0:1], v[0:1], s[4:5] |
| ; VI-NEXT: s_setpc_b64 s[30:31] |
| %rsq_clamp = call double @llvm.amdgcn.rsq.clamp.f64(double %src) |
| ret double %rsq_clamp |
| } |
| |
| declare float @llvm.fabs.f32(float) #1 |
| declare float @llvm.amdgcn.rsq.clamp.f32(float) #1 |
| declare double @llvm.fabs.f64(double) #1 |
| declare double @llvm.amdgcn.rsq.clamp.f64(double) #1 |
| |
| attributes #0 = { nounwind } |
| attributes #1 = { nounwind readnone } |
| attributes #2 = { nounwind "amdgpu-ieee"="false" } |