| ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 |
| ; RUN: llc -mtriple=amdgcn -mcpu=tahiti < %s | FileCheck -check-prefixes=GCN,SI %s |
| ; RUN: llc -mtriple=amdgcn -mcpu=tonga < %s | FileCheck -check-prefixes=GCN,VI %s |
| ; RUN: llc -mtriple=r600 -mcpu=cypress < %s | FileCheck -check-prefixes=R600,EG %s |
| ; RUN: llc -mtriple=r600 -mcpu=cayman < %s | FileCheck -check-prefixes=R600,CM %s |
| |
| define float @v_rcp_f32_ieee(float %x) #3 { |
| ; SI-LABEL: v_rcp_f32_ieee: |
| ; SI: ; %bb.0: |
| ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; SI-NEXT: v_div_scale_f32 v1, s[4:5], v0, v0, 1.0 |
| ; SI-NEXT: v_rcp_f32_e32 v2, v1 |
| ; SI-NEXT: v_div_scale_f32 v3, vcc, 1.0, v0, 1.0 |
| ; SI-NEXT: v_fma_f32 v4, -v1, v2, 1.0 |
| ; SI-NEXT: v_fma_f32 v2, v4, v2, v2 |
| ; SI-NEXT: v_mul_f32_e32 v4, v3, v2 |
| ; SI-NEXT: v_fma_f32 v5, -v1, v4, v3 |
| ; SI-NEXT: v_fma_f32 v4, v5, v2, v4 |
| ; SI-NEXT: v_fma_f32 v1, -v1, v4, v3 |
| ; SI-NEXT: v_div_fmas_f32 v1, v1, v2, v4 |
| ; SI-NEXT: v_div_fixup_f32 v0, v1, v0, 1.0 |
| ; SI-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; VI-LABEL: v_rcp_f32_ieee: |
| ; VI: ; %bb.0: |
| ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; VI-NEXT: v_div_scale_f32 v1, s[4:5], v0, v0, 1.0 |
| ; VI-NEXT: v_div_scale_f32 v2, vcc, 1.0, v0, 1.0 |
| ; VI-NEXT: v_rcp_f32_e32 v3, v1 |
| ; VI-NEXT: v_fma_f32 v4, -v1, v3, 1.0 |
| ; VI-NEXT: v_fma_f32 v3, v4, v3, v3 |
| ; VI-NEXT: v_mul_f32_e32 v4, v2, v3 |
| ; VI-NEXT: v_fma_f32 v5, -v1, v4, v2 |
| ; VI-NEXT: v_fma_f32 v4, v5, v3, v4 |
| ; VI-NEXT: v_fma_f32 v1, -v1, v4, v2 |
| ; VI-NEXT: v_div_fmas_f32 v1, v1, v3, v4 |
| ; VI-NEXT: v_div_fixup_f32 v0, v1, v0, 1.0 |
| ; VI-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; R600-LABEL: v_rcp_f32_ieee: |
| ; R600: ; %bb.0: |
| ; R600-NEXT: CF_END |
| ; R600-NEXT: PAD |
| %rcp = fdiv float 1.0, %x |
| ret float %rcp |
| } |
| |
| define float @v_rcp_f32_ieee_unsafe(float %x) #4 { |
| ; GCN-LABEL: v_rcp_f32_ieee_unsafe: |
| ; GCN: ; %bb.0: |
| ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GCN-NEXT: v_rcp_f32_e32 v0, v0 |
| ; GCN-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; R600-LABEL: v_rcp_f32_ieee_unsafe: |
| ; R600: ; %bb.0: |
| ; R600-NEXT: CF_END |
| ; R600-NEXT: PAD |
| %rcp = fdiv float 1.0, %x |
| ret float %rcp |
| } |
| |
| define float @v_rcp_f32_ieee_known_not_denormal(float nofpclass(sub) %x) #3 { |
| ; SI-LABEL: v_rcp_f32_ieee_known_not_denormal: |
| ; SI: ; %bb.0: |
| ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; SI-NEXT: v_div_scale_f32 v1, s[4:5], v0, v0, 1.0 |
| ; SI-NEXT: v_rcp_f32_e32 v2, v1 |
| ; SI-NEXT: v_div_scale_f32 v3, vcc, 1.0, v0, 1.0 |
| ; SI-NEXT: v_fma_f32 v4, -v1, v2, 1.0 |
| ; SI-NEXT: v_fma_f32 v2, v4, v2, v2 |
| ; SI-NEXT: v_mul_f32_e32 v4, v3, v2 |
| ; SI-NEXT: v_fma_f32 v5, -v1, v4, v3 |
| ; SI-NEXT: v_fma_f32 v4, v5, v2, v4 |
| ; SI-NEXT: v_fma_f32 v1, -v1, v4, v3 |
| ; SI-NEXT: v_div_fmas_f32 v1, v1, v2, v4 |
| ; SI-NEXT: v_div_fixup_f32 v0, v1, v0, 1.0 |
| ; SI-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; VI-LABEL: v_rcp_f32_ieee_known_not_denormal: |
| ; VI: ; %bb.0: |
| ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; VI-NEXT: v_div_scale_f32 v1, s[4:5], v0, v0, 1.0 |
| ; VI-NEXT: v_div_scale_f32 v2, vcc, 1.0, v0, 1.0 |
| ; VI-NEXT: v_rcp_f32_e32 v3, v1 |
| ; VI-NEXT: v_fma_f32 v4, -v1, v3, 1.0 |
| ; VI-NEXT: v_fma_f32 v3, v4, v3, v3 |
| ; VI-NEXT: v_mul_f32_e32 v4, v2, v3 |
| ; VI-NEXT: v_fma_f32 v5, -v1, v4, v2 |
| ; VI-NEXT: v_fma_f32 v4, v5, v3, v4 |
| ; VI-NEXT: v_fma_f32 v1, -v1, v4, v2 |
| ; VI-NEXT: v_div_fmas_f32 v1, v1, v3, v4 |
| ; VI-NEXT: v_div_fixup_f32 v0, v1, v0, 1.0 |
| ; VI-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; R600-LABEL: v_rcp_f32_ieee_known_not_denormal: |
| ; R600: ; %bb.0: |
| ; R600-NEXT: CF_END |
| ; R600-NEXT: PAD |
| %rcp = fdiv float 1.0, %x |
| ret float %rcp |
| } |
| |
| define float @v_rcp_f32_ieee_nnan_ninf(float %x) #3 { |
| ; SI-LABEL: v_rcp_f32_ieee_nnan_ninf: |
| ; SI: ; %bb.0: |
| ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; SI-NEXT: v_div_scale_f32 v1, s[4:5], v0, v0, 1.0 |
| ; SI-NEXT: v_rcp_f32_e32 v2, v1 |
| ; SI-NEXT: v_div_scale_f32 v3, vcc, 1.0, v0, 1.0 |
| ; SI-NEXT: v_fma_f32 v4, -v1, v2, 1.0 |
| ; SI-NEXT: v_fma_f32 v2, v4, v2, v2 |
| ; SI-NEXT: v_mul_f32_e32 v4, v3, v2 |
| ; SI-NEXT: v_fma_f32 v5, -v1, v4, v3 |
| ; SI-NEXT: v_fma_f32 v4, v5, v2, v4 |
| ; SI-NEXT: v_fma_f32 v1, -v1, v4, v3 |
| ; SI-NEXT: v_div_fmas_f32 v1, v1, v2, v4 |
| ; SI-NEXT: v_div_fixup_f32 v0, v1, v0, 1.0 |
| ; SI-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; VI-LABEL: v_rcp_f32_ieee_nnan_ninf: |
| ; VI: ; %bb.0: |
| ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; VI-NEXT: v_div_scale_f32 v1, s[4:5], v0, v0, 1.0 |
| ; VI-NEXT: v_div_scale_f32 v2, vcc, 1.0, v0, 1.0 |
| ; VI-NEXT: v_rcp_f32_e32 v3, v1 |
| ; VI-NEXT: v_fma_f32 v4, -v1, v3, 1.0 |
| ; VI-NEXT: v_fma_f32 v3, v4, v3, v3 |
| ; VI-NEXT: v_mul_f32_e32 v4, v2, v3 |
| ; VI-NEXT: v_fma_f32 v5, -v1, v4, v2 |
| ; VI-NEXT: v_fma_f32 v4, v5, v3, v4 |
| ; VI-NEXT: v_fma_f32 v1, -v1, v4, v2 |
| ; VI-NEXT: v_div_fmas_f32 v1, v1, v3, v4 |
| ; VI-NEXT: v_div_fixup_f32 v0, v1, v0, 1.0 |
| ; VI-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; R600-LABEL: v_rcp_f32_ieee_nnan_ninf: |
| ; R600: ; %bb.0: |
| ; R600-NEXT: CF_END |
| ; R600-NEXT: PAD |
| %rcp = fdiv nnan ninf float 1.0, %x |
| ret float %rcp |
| } |
| |
| define float @v_neg_rcp_f32_ieee(float %x) #3 { |
| ; SI-LABEL: v_neg_rcp_f32_ieee: |
| ; SI: ; %bb.0: |
| ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; SI-NEXT: v_div_scale_f32 v1, s[4:5], v0, v0, -1.0 |
| ; SI-NEXT: v_rcp_f32_e32 v2, v1 |
| ; SI-NEXT: v_div_scale_f32 v3, vcc, -1.0, v0, -1.0 |
| ; SI-NEXT: v_fma_f32 v4, -v1, v2, 1.0 |
| ; SI-NEXT: v_fma_f32 v2, v4, v2, v2 |
| ; SI-NEXT: v_mul_f32_e32 v4, v3, v2 |
| ; SI-NEXT: v_fma_f32 v5, -v1, v4, v3 |
| ; SI-NEXT: v_fma_f32 v4, v5, v2, v4 |
| ; SI-NEXT: v_fma_f32 v1, -v1, v4, v3 |
| ; SI-NEXT: v_div_fmas_f32 v1, v1, v2, v4 |
| ; SI-NEXT: v_div_fixup_f32 v0, v1, v0, -1.0 |
| ; SI-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; VI-LABEL: v_neg_rcp_f32_ieee: |
| ; VI: ; %bb.0: |
| ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; VI-NEXT: v_div_scale_f32 v1, s[4:5], v0, v0, -1.0 |
| ; VI-NEXT: v_div_scale_f32 v2, vcc, -1.0, v0, -1.0 |
| ; VI-NEXT: v_rcp_f32_e32 v3, v1 |
| ; VI-NEXT: v_fma_f32 v4, -v1, v3, 1.0 |
| ; VI-NEXT: v_fma_f32 v3, v4, v3, v3 |
| ; VI-NEXT: v_mul_f32_e32 v4, v2, v3 |
| ; VI-NEXT: v_fma_f32 v5, -v1, v4, v2 |
| ; VI-NEXT: v_fma_f32 v4, v5, v3, v4 |
| ; VI-NEXT: v_fma_f32 v1, -v1, v4, v2 |
| ; VI-NEXT: v_div_fmas_f32 v1, v1, v3, v4 |
| ; VI-NEXT: v_div_fixup_f32 v0, v1, v0, -1.0 |
| ; VI-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; R600-LABEL: v_neg_rcp_f32_ieee: |
| ; R600: ; %bb.0: |
| ; R600-NEXT: CF_END |
| ; R600-NEXT: PAD |
| %rcp = fdiv float -1.0, %x |
| ret float %rcp |
| } |
| |
| define float @v_rcp_f32_daz(float %x) #0 { |
| ; SI-LABEL: v_rcp_f32_daz: |
| ; SI: ; %bb.0: |
| ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; SI-NEXT: v_div_scale_f32 v1, s[4:5], v0, v0, 1.0 |
| ; SI-NEXT: v_rcp_f32_e32 v2, v1 |
| ; SI-NEXT: v_div_scale_f32 v3, vcc, 1.0, v0, 1.0 |
| ; SI-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3 |
| ; SI-NEXT: v_fma_f32 v4, -v1, v2, 1.0 |
| ; SI-NEXT: v_fma_f32 v2, v4, v2, v2 |
| ; SI-NEXT: v_mul_f32_e32 v4, v3, v2 |
| ; SI-NEXT: v_fma_f32 v5, -v1, v4, v3 |
| ; SI-NEXT: v_fma_f32 v4, v5, v2, v4 |
| ; SI-NEXT: v_fma_f32 v1, -v1, v4, v3 |
| ; SI-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0 |
| ; SI-NEXT: v_div_fmas_f32 v1, v1, v2, v4 |
| ; SI-NEXT: v_div_fixup_f32 v0, v1, v0, 1.0 |
| ; SI-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; VI-LABEL: v_rcp_f32_daz: |
| ; VI: ; %bb.0: |
| ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; VI-NEXT: v_div_scale_f32 v1, s[4:5], v0, v0, 1.0 |
| ; VI-NEXT: v_div_scale_f32 v2, vcc, 1.0, v0, 1.0 |
| ; VI-NEXT: v_rcp_f32_e32 v3, v1 |
| ; VI-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3 |
| ; VI-NEXT: v_fma_f32 v4, -v1, v3, 1.0 |
| ; VI-NEXT: v_fma_f32 v3, v4, v3, v3 |
| ; VI-NEXT: v_mul_f32_e32 v4, v2, v3 |
| ; VI-NEXT: v_fma_f32 v5, -v1, v4, v2 |
| ; VI-NEXT: v_fma_f32 v4, v5, v3, v4 |
| ; VI-NEXT: v_fma_f32 v1, -v1, v4, v2 |
| ; VI-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0 |
| ; VI-NEXT: v_div_fmas_f32 v1, v1, v3, v4 |
| ; VI-NEXT: v_div_fixup_f32 v0, v1, v0, 1.0 |
| ; VI-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; R600-LABEL: v_rcp_f32_daz: |
| ; R600: ; %bb.0: |
| ; R600-NEXT: CF_END |
| ; R600-NEXT: PAD |
| %rcp = fdiv float 1.0, %x |
| ret float %rcp |
| } |
| |
| define float @v_neg_rcp_f32_daz(float %x) #0 { |
| ; SI-LABEL: v_neg_rcp_f32_daz: |
| ; SI: ; %bb.0: |
| ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; SI-NEXT: v_div_scale_f32 v1, s[4:5], v0, v0, -1.0 |
| ; SI-NEXT: v_rcp_f32_e32 v2, v1 |
| ; SI-NEXT: v_div_scale_f32 v3, vcc, -1.0, v0, -1.0 |
| ; SI-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3 |
| ; SI-NEXT: v_fma_f32 v4, -v1, v2, 1.0 |
| ; SI-NEXT: v_fma_f32 v2, v4, v2, v2 |
| ; SI-NEXT: v_mul_f32_e32 v4, v3, v2 |
| ; SI-NEXT: v_fma_f32 v5, -v1, v4, v3 |
| ; SI-NEXT: v_fma_f32 v4, v5, v2, v4 |
| ; SI-NEXT: v_fma_f32 v1, -v1, v4, v3 |
| ; SI-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0 |
| ; SI-NEXT: v_div_fmas_f32 v1, v1, v2, v4 |
| ; SI-NEXT: v_div_fixup_f32 v0, v1, v0, -1.0 |
| ; SI-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; VI-LABEL: v_neg_rcp_f32_daz: |
| ; VI: ; %bb.0: |
| ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; VI-NEXT: v_div_scale_f32 v1, s[4:5], v0, v0, -1.0 |
| ; VI-NEXT: v_div_scale_f32 v2, vcc, -1.0, v0, -1.0 |
| ; VI-NEXT: v_rcp_f32_e32 v3, v1 |
| ; VI-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3 |
| ; VI-NEXT: v_fma_f32 v4, -v1, v3, 1.0 |
| ; VI-NEXT: v_fma_f32 v3, v4, v3, v3 |
| ; VI-NEXT: v_mul_f32_e32 v4, v2, v3 |
| ; VI-NEXT: v_fma_f32 v5, -v1, v4, v2 |
| ; VI-NEXT: v_fma_f32 v4, v5, v3, v4 |
| ; VI-NEXT: v_fma_f32 v1, -v1, v4, v2 |
| ; VI-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0 |
| ; VI-NEXT: v_div_fmas_f32 v1, v1, v3, v4 |
| ; VI-NEXT: v_div_fixup_f32 v0, v1, v0, -1.0 |
| ; VI-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; R600-LABEL: v_neg_rcp_f32_daz: |
| ; R600: ; %bb.0: |
| ; R600-NEXT: CF_END |
| ; R600-NEXT: PAD |
| %rcp = fdiv float -1.0, %x |
| ret float %rcp |
| } |
| |
| define float @v_rcp_f32_ieee_ulp25(float %x) #3 { |
| ; SI-LABEL: v_rcp_f32_ieee_ulp25: |
| ; SI: ; %bb.0: |
| ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; SI-NEXT: s_mov_b32 s4, 0x7f800000 |
| ; SI-NEXT: v_frexp_mant_f32_e32 v1, v0 |
| ; SI-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s4 |
| ; SI-NEXT: v_cndmask_b32_e32 v1, v0, v1, vcc |
| ; SI-NEXT: v_rcp_f32_e32 v1, v1 |
| ; SI-NEXT: v_frexp_exp_i32_f32_e32 v0, v0 |
| ; SI-NEXT: v_sub_i32_e32 v0, vcc, 0, v0 |
| ; SI-NEXT: v_ldexp_f32_e32 v0, v1, v0 |
| ; SI-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; VI-LABEL: v_rcp_f32_ieee_ulp25: |
| ; VI: ; %bb.0: |
| ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; VI-NEXT: v_frexp_mant_f32_e32 v1, v0 |
| ; VI-NEXT: v_rcp_f32_e32 v1, v1 |
| ; VI-NEXT: v_frexp_exp_i32_f32_e32 v0, v0 |
| ; VI-NEXT: v_sub_u32_e32 v0, vcc, 0, v0 |
| ; VI-NEXT: v_ldexp_f32 v0, v1, v0 |
| ; VI-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; R600-LABEL: v_rcp_f32_ieee_ulp25: |
| ; R600: ; %bb.0: |
| ; R600-NEXT: CF_END |
| ; R600-NEXT: PAD |
| %rcp = fdiv float 1.0, %x, !fpmath !0 |
| ret float %rcp |
| } |
| |
| define float @v_rcp_f32_ieee_ulp25_known_not_denormal(float nofpclass(sub) %x) #3 { |
| ; SI-LABEL: v_rcp_f32_ieee_ulp25_known_not_denormal: |
| ; SI: ; %bb.0: |
| ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; SI-NEXT: s_mov_b32 s4, 0x7f800000 |
| ; SI-NEXT: v_frexp_mant_f32_e32 v1, v0 |
| ; SI-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s4 |
| ; SI-NEXT: v_cndmask_b32_e32 v1, v0, v1, vcc |
| ; SI-NEXT: v_rcp_f32_e32 v1, v1 |
| ; SI-NEXT: v_frexp_exp_i32_f32_e32 v0, v0 |
| ; SI-NEXT: v_sub_i32_e32 v0, vcc, 0, v0 |
| ; SI-NEXT: v_ldexp_f32_e32 v0, v1, v0 |
| ; SI-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; VI-LABEL: v_rcp_f32_ieee_ulp25_known_not_denormal: |
| ; VI: ; %bb.0: |
| ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; VI-NEXT: v_frexp_mant_f32_e32 v1, v0 |
| ; VI-NEXT: v_rcp_f32_e32 v1, v1 |
| ; VI-NEXT: v_frexp_exp_i32_f32_e32 v0, v0 |
| ; VI-NEXT: v_sub_u32_e32 v0, vcc, 0, v0 |
| ; VI-NEXT: v_ldexp_f32 v0, v1, v0 |
| ; VI-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; R600-LABEL: v_rcp_f32_ieee_ulp25_known_not_denormal: |
| ; R600: ; %bb.0: |
| ; R600-NEXT: CF_END |
| ; R600-NEXT: PAD |
| %rcp = fdiv float 1.0, %x, !fpmath !0 |
| ret float %rcp |
| } |
| |
| define float @v_neg_rcp_f32_ieee_ulp25_known_not_denormal(float nofpclass(sub) %x) #3 { |
| ; SI-LABEL: v_neg_rcp_f32_ieee_ulp25_known_not_denormal: |
| ; SI: ; %bb.0: |
| ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; SI-NEXT: s_mov_b32 s4, 0x7f800000 |
| ; SI-NEXT: v_frexp_mant_f32_e64 v1, -v0 |
| ; SI-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, s4 |
| ; SI-NEXT: v_cndmask_b32_e64 v1, -v0, v1, s[4:5] |
| ; SI-NEXT: v_rcp_f32_e32 v1, v1 |
| ; SI-NEXT: v_frexp_exp_i32_f32_e32 v0, v0 |
| ; SI-NEXT: v_sub_i32_e32 v0, vcc, 0, v0 |
| ; SI-NEXT: v_ldexp_f32_e32 v0, v1, v0 |
| ; SI-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; VI-LABEL: v_neg_rcp_f32_ieee_ulp25_known_not_denormal: |
| ; VI: ; %bb.0: |
| ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; VI-NEXT: v_frexp_mant_f32_e64 v1, -v0 |
| ; VI-NEXT: v_rcp_f32_e32 v1, v1 |
| ; VI-NEXT: v_frexp_exp_i32_f32_e32 v0, v0 |
| ; VI-NEXT: v_sub_u32_e32 v0, vcc, 0, v0 |
| ; VI-NEXT: v_ldexp_f32 v0, v1, v0 |
| ; VI-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; R600-LABEL: v_neg_rcp_f32_ieee_ulp25_known_not_denormal: |
| ; R600: ; %bb.0: |
| ; R600-NEXT: CF_END |
| ; R600-NEXT: PAD |
| %rcp = fdiv float -1.0, %x, !fpmath !0 |
| ret float %rcp |
| } |
| |
| define float @v_rcp_f32_ieee_ulp25_ninf_nnan(float %x) #3 { |
| ; SI-LABEL: v_rcp_f32_ieee_ulp25_ninf_nnan: |
| ; SI: ; %bb.0: |
| ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; SI-NEXT: s_mov_b32 s4, 0x7f800000 |
| ; SI-NEXT: v_frexp_mant_f32_e32 v1, v0 |
| ; SI-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s4 |
| ; SI-NEXT: v_cndmask_b32_e32 v1, v0, v1, vcc |
| ; SI-NEXT: v_rcp_f32_e32 v1, v1 |
| ; SI-NEXT: v_frexp_exp_i32_f32_e32 v0, v0 |
| ; SI-NEXT: v_sub_i32_e32 v0, vcc, 0, v0 |
| ; SI-NEXT: v_ldexp_f32_e32 v0, v1, v0 |
| ; SI-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; VI-LABEL: v_rcp_f32_ieee_ulp25_ninf_nnan: |
| ; VI: ; %bb.0: |
| ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; VI-NEXT: v_frexp_mant_f32_e32 v1, v0 |
| ; VI-NEXT: v_rcp_f32_e32 v1, v1 |
| ; VI-NEXT: v_frexp_exp_i32_f32_e32 v0, v0 |
| ; VI-NEXT: v_sub_u32_e32 v0, vcc, 0, v0 |
| ; VI-NEXT: v_ldexp_f32 v0, v1, v0 |
| ; VI-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; R600-LABEL: v_rcp_f32_ieee_ulp25_ninf_nnan: |
| ; R600: ; %bb.0: |
| ; R600-NEXT: CF_END |
| ; R600-NEXT: PAD |
| %rcp = fdiv ninf nnan float 1.0, %x, !fpmath !0 |
| ret float %rcp |
| } |
| |
| define float @v_rcp_f32_daz_ulp25(float %x) #0 { |
| ; GCN-LABEL: v_rcp_f32_daz_ulp25: |
| ; GCN: ; %bb.0: |
| ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GCN-NEXT: v_rcp_f32_e32 v0, v0 |
| ; GCN-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; R600-LABEL: v_rcp_f32_daz_ulp25: |
| ; R600: ; %bb.0: |
| ; R600-NEXT: CF_END |
| ; R600-NEXT: PAD |
| %rcp = fdiv float 1.0, %x, !fpmath !0 |
| ret float %rcp |
| } |
| |
| define float @v_neg_rcp_f32_ieee_ulp25(float %x) #3 { |
| ; SI-LABEL: v_neg_rcp_f32_ieee_ulp25: |
| ; SI: ; %bb.0: |
| ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; SI-NEXT: s_mov_b32 s4, 0x7f800000 |
| ; SI-NEXT: v_frexp_mant_f32_e64 v1, -v0 |
| ; SI-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, s4 |
| ; SI-NEXT: v_cndmask_b32_e64 v1, -v0, v1, s[4:5] |
| ; SI-NEXT: v_rcp_f32_e32 v1, v1 |
| ; SI-NEXT: v_frexp_exp_i32_f32_e32 v0, v0 |
| ; SI-NEXT: v_sub_i32_e32 v0, vcc, 0, v0 |
| ; SI-NEXT: v_ldexp_f32_e32 v0, v1, v0 |
| ; SI-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; VI-LABEL: v_neg_rcp_f32_ieee_ulp25: |
| ; VI: ; %bb.0: |
| ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; VI-NEXT: v_frexp_mant_f32_e64 v1, -v0 |
| ; VI-NEXT: v_rcp_f32_e32 v1, v1 |
| ; VI-NEXT: v_frexp_exp_i32_f32_e32 v0, v0 |
| ; VI-NEXT: v_sub_u32_e32 v0, vcc, 0, v0 |
| ; VI-NEXT: v_ldexp_f32 v0, v1, v0 |
| ; VI-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; R600-LABEL: v_neg_rcp_f32_ieee_ulp25: |
| ; R600: ; %bb.0: |
| ; R600-NEXT: CF_END |
| ; R600-NEXT: PAD |
| %rcp = fdiv float -1.0, %x, !fpmath !0 |
| ret float %rcp |
| } |
| |
| define float @v_neg_rcp_f32_daz_ulp25(float %x) #0 { |
| ; GCN-LABEL: v_neg_rcp_f32_daz_ulp25: |
| ; GCN: ; %bb.0: |
| ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GCN-NEXT: v_rcp_f32_e64 v0, -v0 |
| ; GCN-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; R600-LABEL: v_neg_rcp_f32_daz_ulp25: |
| ; R600: ; %bb.0: |
| ; R600-NEXT: CF_END |
| ; R600-NEXT: PAD |
| %rcp = fdiv float -1.0, %x, !fpmath !0 |
| ret float %rcp |
| } |
| |
| define float @v_rcp_fabs_f32_ieee(float %x) #3 { |
| ; SI-LABEL: v_rcp_fabs_f32_ieee: |
| ; SI: ; %bb.0: |
| ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; SI-NEXT: v_and_b32_e32 v1, 0x7fffffff, v0 |
| ; SI-NEXT: v_div_scale_f32 v2, s[4:5], v1, v1, 1.0 |
| ; SI-NEXT: v_rcp_f32_e32 v3, v2 |
| ; SI-NEXT: v_div_scale_f32 v1, vcc, 1.0, v1, 1.0 |
| ; SI-NEXT: v_fma_f32 v4, -v2, v3, 1.0 |
| ; SI-NEXT: v_fma_f32 v3, v4, v3, v3 |
| ; SI-NEXT: v_mul_f32_e32 v4, v1, v3 |
| ; SI-NEXT: v_fma_f32 v5, -v2, v4, v1 |
| ; SI-NEXT: v_fma_f32 v4, v5, v3, v4 |
| ; SI-NEXT: v_fma_f32 v1, -v2, v4, v1 |
| ; SI-NEXT: v_div_fmas_f32 v1, v1, v3, v4 |
| ; SI-NEXT: v_div_fixup_f32 v0, v1, |v0|, 1.0 |
| ; SI-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; VI-LABEL: v_rcp_fabs_f32_ieee: |
| ; VI: ; %bb.0: |
| ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; VI-NEXT: v_and_b32_e32 v1, 0x7fffffff, v0 |
| ; VI-NEXT: v_div_scale_f32 v2, s[4:5], v1, v1, 1.0 |
| ; VI-NEXT: v_div_scale_f32 v1, vcc, 1.0, v1, 1.0 |
| ; VI-NEXT: v_rcp_f32_e32 v3, v2 |
| ; VI-NEXT: v_fma_f32 v4, -v2, v3, 1.0 |
| ; VI-NEXT: v_fma_f32 v3, v4, v3, v3 |
| ; VI-NEXT: v_mul_f32_e32 v4, v1, v3 |
| ; VI-NEXT: v_fma_f32 v5, -v2, v4, v1 |
| ; VI-NEXT: v_fma_f32 v4, v5, v3, v4 |
| ; VI-NEXT: v_fma_f32 v1, -v2, v4, v1 |
| ; VI-NEXT: v_div_fmas_f32 v1, v1, v3, v4 |
| ; VI-NEXT: v_div_fixup_f32 v0, v1, |v0|, 1.0 |
| ; VI-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; R600-LABEL: v_rcp_fabs_f32_ieee: |
| ; R600: ; %bb.0: |
| ; R600-NEXT: CF_END |
| ; R600-NEXT: PAD |
| %fabs.x = call float @llvm.fabs.f32(float %x) |
| %rcp = fdiv float 1.0, %fabs.x |
| ret float %rcp |
| } |
| |
| define float @v_rcp_fabs_f32_daz(float %x) #0 { |
| ; SI-LABEL: v_rcp_fabs_f32_daz: |
| ; SI: ; %bb.0: |
| ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; SI-NEXT: v_and_b32_e32 v1, 0x7fffffff, v0 |
| ; SI-NEXT: v_div_scale_f32 v2, s[4:5], v1, v1, 1.0 |
| ; SI-NEXT: v_rcp_f32_e32 v3, v2 |
| ; SI-NEXT: v_div_scale_f32 v1, vcc, 1.0, v1, 1.0 |
| ; SI-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3 |
| ; SI-NEXT: v_fma_f32 v4, -v2, v3, 1.0 |
| ; SI-NEXT: v_fma_f32 v3, v4, v3, v3 |
| ; SI-NEXT: v_mul_f32_e32 v4, v1, v3 |
| ; SI-NEXT: v_fma_f32 v5, -v2, v4, v1 |
| ; SI-NEXT: v_fma_f32 v4, v5, v3, v4 |
| ; SI-NEXT: v_fma_f32 v1, -v2, v4, v1 |
| ; SI-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0 |
| ; SI-NEXT: v_div_fmas_f32 v1, v1, v3, v4 |
| ; SI-NEXT: v_div_fixup_f32 v0, v1, |v0|, 1.0 |
| ; SI-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; VI-LABEL: v_rcp_fabs_f32_daz: |
| ; VI: ; %bb.0: |
| ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; VI-NEXT: v_and_b32_e32 v1, 0x7fffffff, v0 |
| ; VI-NEXT: v_div_scale_f32 v2, s[4:5], v1, v1, 1.0 |
| ; VI-NEXT: v_div_scale_f32 v1, vcc, 1.0, v1, 1.0 |
| ; VI-NEXT: v_rcp_f32_e32 v3, v2 |
| ; VI-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3 |
| ; VI-NEXT: v_fma_f32 v4, -v2, v3, 1.0 |
| ; VI-NEXT: v_fma_f32 v3, v4, v3, v3 |
| ; VI-NEXT: v_mul_f32_e32 v4, v1, v3 |
| ; VI-NEXT: v_fma_f32 v5, -v2, v4, v1 |
| ; VI-NEXT: v_fma_f32 v4, v5, v3, v4 |
| ; VI-NEXT: v_fma_f32 v1, -v2, v4, v1 |
| ; VI-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0 |
| ; VI-NEXT: v_div_fmas_f32 v1, v1, v3, v4 |
| ; VI-NEXT: v_div_fixup_f32 v0, v1, |v0|, 1.0 |
| ; VI-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; R600-LABEL: v_rcp_fabs_f32_daz: |
| ; R600: ; %bb.0: |
| ; R600-NEXT: CF_END |
| ; R600-NEXT: PAD |
| %fabs.x = call float @llvm.fabs.f32(float %x) |
| %rcp = fdiv float 1.0, %fabs.x |
| ret float %rcp |
| } |
| |
| define float @v_rcp_fabs_f32_ieee_ulp25(float %x) #3 { |
| ; SI-LABEL: v_rcp_fabs_f32_ieee_ulp25: |
| ; SI: ; %bb.0: |
| ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; SI-NEXT: s_mov_b32 s4, 0x7f800000 |
| ; SI-NEXT: v_frexp_mant_f32_e64 v1, |v0| |
| ; SI-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, s4 |
| ; SI-NEXT: v_cndmask_b32_e64 v1, |v0|, v1, s[4:5] |
| ; SI-NEXT: v_rcp_f32_e32 v1, v1 |
| ; SI-NEXT: v_frexp_exp_i32_f32_e32 v0, v0 |
| ; SI-NEXT: v_sub_i32_e32 v0, vcc, 0, v0 |
| ; SI-NEXT: v_ldexp_f32_e32 v0, v1, v0 |
| ; SI-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; VI-LABEL: v_rcp_fabs_f32_ieee_ulp25: |
| ; VI: ; %bb.0: |
| ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; VI-NEXT: v_frexp_mant_f32_e64 v1, |v0| |
| ; VI-NEXT: v_rcp_f32_e32 v1, v1 |
| ; VI-NEXT: v_frexp_exp_i32_f32_e32 v0, v0 |
| ; VI-NEXT: v_sub_u32_e32 v0, vcc, 0, v0 |
| ; VI-NEXT: v_ldexp_f32 v0, v1, v0 |
| ; VI-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; R600-LABEL: v_rcp_fabs_f32_ieee_ulp25: |
| ; R600: ; %bb.0: |
| ; R600-NEXT: CF_END |
| ; R600-NEXT: PAD |
| %fabs.x = call float @llvm.fabs.f32(float %x) |
| %rcp = fdiv float 1.0, %fabs.x, !fpmath !0 |
| ret float %rcp |
| } |
| |
| define float @v_rcp_fabs_f32_daz_ulp25(float %x) #0 { |
| ; GCN-LABEL: v_rcp_fabs_f32_daz_ulp25: |
| ; GCN: ; %bb.0: |
| ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GCN-NEXT: v_rcp_f32_e64 v0, |v0| |
| ; GCN-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; R600-LABEL: v_rcp_fabs_f32_daz_ulp25: |
| ; R600: ; %bb.0: |
| ; R600-NEXT: CF_END |
| ; R600-NEXT: PAD |
| %fabs.x = call float @llvm.fabs.f32(float %x) |
| %rcp = fdiv float 1.0, %fabs.x, !fpmath !0 |
| ret float %rcp |
| } |
| |
| define float @v_rcp_neg_fabs_f32_ieee(float %x) #3 { |
| ; SI-LABEL: v_rcp_neg_fabs_f32_ieee: |
| ; SI: ; %bb.0: |
| ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; SI-NEXT: v_and_b32_e32 v1, 0x7fffffff, v0 |
| ; SI-NEXT: v_div_scale_f32 v2, s[4:5], v1, v1, -1.0 |
| ; SI-NEXT: v_rcp_f32_e32 v3, v2 |
| ; SI-NEXT: v_div_scale_f32 v1, vcc, -1.0, v1, -1.0 |
| ; SI-NEXT: v_fma_f32 v4, -v2, v3, 1.0 |
| ; SI-NEXT: v_fma_f32 v3, v4, v3, v3 |
| ; SI-NEXT: v_mul_f32_e32 v4, v1, v3 |
| ; SI-NEXT: v_fma_f32 v5, -v2, v4, v1 |
| ; SI-NEXT: v_fma_f32 v4, v5, v3, v4 |
| ; SI-NEXT: v_fma_f32 v1, -v2, v4, v1 |
| ; SI-NEXT: v_div_fmas_f32 v1, v1, v3, v4 |
| ; SI-NEXT: v_div_fixup_f32 v0, v1, |v0|, -1.0 |
| ; SI-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; VI-LABEL: v_rcp_neg_fabs_f32_ieee: |
| ; VI: ; %bb.0: |
| ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; VI-NEXT: v_and_b32_e32 v1, 0x7fffffff, v0 |
| ; VI-NEXT: v_div_scale_f32 v2, s[4:5], v1, v1, -1.0 |
| ; VI-NEXT: v_div_scale_f32 v1, vcc, -1.0, v1, -1.0 |
| ; VI-NEXT: v_rcp_f32_e32 v3, v2 |
| ; VI-NEXT: v_fma_f32 v4, -v2, v3, 1.0 |
| ; VI-NEXT: v_fma_f32 v3, v4, v3, v3 |
| ; VI-NEXT: v_mul_f32_e32 v4, v1, v3 |
| ; VI-NEXT: v_fma_f32 v5, -v2, v4, v1 |
| ; VI-NEXT: v_fma_f32 v4, v5, v3, v4 |
| ; VI-NEXT: v_fma_f32 v1, -v2, v4, v1 |
| ; VI-NEXT: v_div_fmas_f32 v1, v1, v3, v4 |
| ; VI-NEXT: v_div_fixup_f32 v0, v1, |v0|, -1.0 |
| ; VI-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; R600-LABEL: v_rcp_neg_fabs_f32_ieee: |
| ; R600: ; %bb.0: |
| ; R600-NEXT: CF_END |
| ; R600-NEXT: PAD |
| %fabs.x = call float @llvm.fabs.f32(float %x) |
| %rcp = fdiv float -1.0, %fabs.x |
| ret float %rcp |
| } |
| |
| define float @v_rcp_neg_fabs_f32_daz(float %x) #0 { |
| ; SI-LABEL: v_rcp_neg_fabs_f32_daz: |
| ; SI: ; %bb.0: |
| ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; SI-NEXT: v_and_b32_e32 v1, 0x7fffffff, v0 |
| ; SI-NEXT: v_div_scale_f32 v2, s[4:5], v1, v1, -1.0 |
| ; SI-NEXT: v_rcp_f32_e32 v3, v2 |
| ; SI-NEXT: v_div_scale_f32 v1, vcc, -1.0, v1, -1.0 |
| ; SI-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3 |
| ; SI-NEXT: v_fma_f32 v4, -v2, v3, 1.0 |
| ; SI-NEXT: v_fma_f32 v3, v4, v3, v3 |
| ; SI-NEXT: v_mul_f32_e32 v4, v1, v3 |
| ; SI-NEXT: v_fma_f32 v5, -v2, v4, v1 |
| ; SI-NEXT: v_fma_f32 v4, v5, v3, v4 |
| ; SI-NEXT: v_fma_f32 v1, -v2, v4, v1 |
| ; SI-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0 |
| ; SI-NEXT: v_div_fmas_f32 v1, v1, v3, v4 |
| ; SI-NEXT: v_div_fixup_f32 v0, v1, |v0|, -1.0 |
| ; SI-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; VI-LABEL: v_rcp_neg_fabs_f32_daz: |
| ; VI: ; %bb.0: |
| ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; VI-NEXT: v_and_b32_e32 v1, 0x7fffffff, v0 |
| ; VI-NEXT: v_div_scale_f32 v2, s[4:5], v1, v1, -1.0 |
| ; VI-NEXT: v_div_scale_f32 v1, vcc, -1.0, v1, -1.0 |
| ; VI-NEXT: v_rcp_f32_e32 v3, v2 |
| ; VI-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3 |
| ; VI-NEXT: v_fma_f32 v4, -v2, v3, 1.0 |
| ; VI-NEXT: v_fma_f32 v3, v4, v3, v3 |
| ; VI-NEXT: v_mul_f32_e32 v4, v1, v3 |
| ; VI-NEXT: v_fma_f32 v5, -v2, v4, v1 |
| ; VI-NEXT: v_fma_f32 v4, v5, v3, v4 |
| ; VI-NEXT: v_fma_f32 v1, -v2, v4, v1 |
| ; VI-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0 |
| ; VI-NEXT: v_div_fmas_f32 v1, v1, v3, v4 |
| ; VI-NEXT: v_div_fixup_f32 v0, v1, |v0|, -1.0 |
| ; VI-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; R600-LABEL: v_rcp_neg_fabs_f32_daz: |
| ; R600: ; %bb.0: |
| ; R600-NEXT: CF_END |
| ; R600-NEXT: PAD |
| %fabs.x = call float @llvm.fabs.f32(float %x) |
| %rcp = fdiv float -1.0, %fabs.x |
| ret float %rcp |
| } |
| |
| define float @v_rcp_neg_fabs_f32_ieee_ulp25(float %x) #3 { |
| ; SI-LABEL: v_rcp_neg_fabs_f32_ieee_ulp25: |
| ; SI: ; %bb.0: |
| ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; SI-NEXT: s_mov_b32 s4, 0x7f800000 |
| ; SI-NEXT: v_frexp_mant_f32_e64 v1, -|v0| |
| ; SI-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, s4 |
| ; SI-NEXT: v_cndmask_b32_e64 v1, -|v0|, v1, s[4:5] |
| ; SI-NEXT: v_rcp_f32_e32 v1, v1 |
| ; SI-NEXT: v_frexp_exp_i32_f32_e32 v0, v0 |
| ; SI-NEXT: v_sub_i32_e32 v0, vcc, 0, v0 |
| ; SI-NEXT: v_ldexp_f32_e32 v0, v1, v0 |
| ; SI-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; VI-LABEL: v_rcp_neg_fabs_f32_ieee_ulp25: |
| ; VI: ; %bb.0: |
| ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; VI-NEXT: v_frexp_mant_f32_e64 v1, -|v0| |
| ; VI-NEXT: v_rcp_f32_e32 v1, v1 |
| ; VI-NEXT: v_frexp_exp_i32_f32_e32 v0, v0 |
| ; VI-NEXT: v_sub_u32_e32 v0, vcc, 0, v0 |
| ; VI-NEXT: v_ldexp_f32 v0, v1, v0 |
| ; VI-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; R600-LABEL: v_rcp_neg_fabs_f32_ieee_ulp25: |
| ; R600: ; %bb.0: |
| ; R600-NEXT: CF_END |
| ; R600-NEXT: PAD |
| %fabs.x = call float @llvm.fabs.f32(float %x) |
| %rcp = fdiv float -1.0, %fabs.x, !fpmath !0 |
| ret float %rcp |
| } |
| |
| define float @v_rcp_neg_fabs_f32_daz_ulp25(float %x) #0 { |
| ; GCN-LABEL: v_rcp_neg_fabs_f32_daz_ulp25: |
| ; GCN: ; %bb.0: |
| ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GCN-NEXT: v_rcp_f32_e64 v0, -|v0| |
| ; GCN-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; R600-LABEL: v_rcp_neg_fabs_f32_daz_ulp25: |
| ; R600: ; %bb.0: |
| ; R600-NEXT: CF_END |
| ; R600-NEXT: PAD |
| %fabs.x = call float @llvm.fabs.f32(float %x) |
| %rcp = fdiv float -1.0, %fabs.x, !fpmath !0 |
| ret float %rcp |
| } |
| |
| define amdgpu_kernel void @s_rcp_pat_f32_daz(ptr addrspace(1) %out, float %src) #0 { |
| ; SI-LABEL: s_rcp_pat_f32_daz: |
| ; SI: ; %bb.0: |
| ; SI-NEXT: s_load_dword s2, s[4:5], 0xb |
| ; SI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9 |
| ; SI-NEXT: s_mov_b32 s3, 0xf000 |
| ; SI-NEXT: s_waitcnt lgkmcnt(0) |
| ; SI-NEXT: v_rcp_f32_e32 v0, s2 |
| ; SI-NEXT: s_mov_b32 s2, -1 |
| ; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0 |
| ; SI-NEXT: s_endpgm |
| ; |
| ; VI-LABEL: s_rcp_pat_f32_daz: |
| ; VI: ; %bb.0: |
| ; VI-NEXT: s_load_dword s2, s[4:5], 0x2c |
| ; VI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 |
| ; VI-NEXT: s_waitcnt lgkmcnt(0) |
| ; VI-NEXT: v_rcp_f32_e32 v2, s2 |
| ; VI-NEXT: v_mov_b32_e32 v0, s0 |
| ; VI-NEXT: v_mov_b32_e32 v1, s1 |
| ; VI-NEXT: flat_store_dword v[0:1], v2 |
| ; VI-NEXT: s_endpgm |
| ; |
| ; EG-LABEL: s_rcp_pat_f32_daz: |
| ; EG: ; %bb.0: |
| ; EG-NEXT: ALU 2, @4, KC0[CB0:0-32], KC1[] |
| ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T1.X, T0.X, 1 |
| ; EG-NEXT: CF_END |
| ; EG-NEXT: PAD |
| ; EG-NEXT: ALU clause starting at 4: |
| ; EG-NEXT: LSHR T0.X, KC0[2].Y, literal.x, |
| ; EG-NEXT: RECIP_IEEE * T1.X, KC0[2].Z, |
| ; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) |
| ; |
| ; CM-LABEL: s_rcp_pat_f32_daz: |
| ; CM: ; %bb.0: |
| ; CM-NEXT: ALU 5, @4, KC0[CB0:0-32], KC1[] |
| ; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T1.X, T0.X |
| ; CM-NEXT: CF_END |
| ; CM-NEXT: PAD |
| ; CM-NEXT: ALU clause starting at 4: |
| ; CM-NEXT: LSHR * T0.X, KC0[2].Y, literal.x, |
| ; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00) |
| ; CM-NEXT: RECIP_IEEE T1.X, KC0[2].Z, |
| ; CM-NEXT: RECIP_IEEE T1.Y (MASKED), KC0[2].Z, |
| ; CM-NEXT: RECIP_IEEE T1.Z (MASKED), KC0[2].Z, |
| ; CM-NEXT: RECIP_IEEE * T1.W (MASKED), KC0[2].Z, |
| %rcp = fdiv float 1.0, %src, !fpmath !0 |
| store float %rcp, ptr addrspace(1) %out, align 4 |
| ret void |
| } |
| |
| define amdgpu_kernel void @s_rcp_ulp25_pat_f32_daz(ptr addrspace(1) %out, float %src) #0 { |
| ; SI-LABEL: s_rcp_ulp25_pat_f32_daz: |
| ; SI: ; %bb.0: |
| ; SI-NEXT: s_load_dword s2, s[4:5], 0xb |
| ; SI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9 |
| ; SI-NEXT: s_mov_b32 s3, 0xf000 |
| ; SI-NEXT: s_waitcnt lgkmcnt(0) |
| ; SI-NEXT: v_rcp_f32_e32 v0, s2 |
| ; SI-NEXT: s_mov_b32 s2, -1 |
| ; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0 |
| ; SI-NEXT: s_endpgm |
| ; |
| ; VI-LABEL: s_rcp_ulp25_pat_f32_daz: |
| ; VI: ; %bb.0: |
| ; VI-NEXT: s_load_dword s2, s[4:5], 0x2c |
| ; VI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 |
| ; VI-NEXT: s_waitcnt lgkmcnt(0) |
| ; VI-NEXT: v_rcp_f32_e32 v2, s2 |
| ; VI-NEXT: v_mov_b32_e32 v0, s0 |
| ; VI-NEXT: v_mov_b32_e32 v1, s1 |
| ; VI-NEXT: flat_store_dword v[0:1], v2 |
| ; VI-NEXT: s_endpgm |
| ; |
| ; EG-LABEL: s_rcp_ulp25_pat_f32_daz: |
| ; EG: ; %bb.0: |
| ; EG-NEXT: ALU 2, @4, KC0[CB0:0-32], KC1[] |
| ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T1.X, T0.X, 1 |
| ; EG-NEXT: CF_END |
| ; EG-NEXT: PAD |
| ; EG-NEXT: ALU clause starting at 4: |
| ; EG-NEXT: LSHR T0.X, KC0[2].Y, literal.x, |
| ; EG-NEXT: RECIP_IEEE * T1.X, KC0[2].Z, |
| ; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) |
| ; |
| ; CM-LABEL: s_rcp_ulp25_pat_f32_daz: |
| ; CM: ; %bb.0: |
| ; CM-NEXT: ALU 5, @4, KC0[CB0:0-32], KC1[] |
| ; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T1.X, T0.X |
| ; CM-NEXT: CF_END |
| ; CM-NEXT: PAD |
| ; CM-NEXT: ALU clause starting at 4: |
| ; CM-NEXT: LSHR * T0.X, KC0[2].Y, literal.x, |
| ; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00) |
| ; CM-NEXT: RECIP_IEEE T1.X, KC0[2].Z, |
| ; CM-NEXT: RECIP_IEEE T1.Y (MASKED), KC0[2].Z, |
| ; CM-NEXT: RECIP_IEEE T1.Z (MASKED), KC0[2].Z, |
| ; CM-NEXT: RECIP_IEEE * T1.W (MASKED), KC0[2].Z, |
| %rcp = fdiv float 1.0, %src, !fpmath !0 |
| store float %rcp, ptr addrspace(1) %out, align 4 |
| ret void |
| } |
| |
| define amdgpu_kernel void @s_rcp_fast_ulp25_pat_f32_daz(ptr addrspace(1) %out, float %src) #0 { |
| ; SI-LABEL: s_rcp_fast_ulp25_pat_f32_daz: |
| ; SI: ; %bb.0: |
| ; SI-NEXT: s_load_dword s2, s[4:5], 0xb |
| ; SI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9 |
| ; SI-NEXT: s_mov_b32 s3, 0xf000 |
| ; SI-NEXT: s_waitcnt lgkmcnt(0) |
| ; SI-NEXT: v_rcp_f32_e32 v0, s2 |
| ; SI-NEXT: s_mov_b32 s2, -1 |
| ; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0 |
| ; SI-NEXT: s_endpgm |
| ; |
| ; VI-LABEL: s_rcp_fast_ulp25_pat_f32_daz: |
| ; VI: ; %bb.0: |
| ; VI-NEXT: s_load_dword s2, s[4:5], 0x2c |
| ; VI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 |
| ; VI-NEXT: s_waitcnt lgkmcnt(0) |
| ; VI-NEXT: v_rcp_f32_e32 v2, s2 |
| ; VI-NEXT: v_mov_b32_e32 v0, s0 |
| ; VI-NEXT: v_mov_b32_e32 v1, s1 |
| ; VI-NEXT: flat_store_dword v[0:1], v2 |
| ; VI-NEXT: s_endpgm |
| ; |
| ; EG-LABEL: s_rcp_fast_ulp25_pat_f32_daz: |
| ; EG: ; %bb.0: |
| ; EG-NEXT: ALU 2, @4, KC0[CB0:0-32], KC1[] |
| ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T1.X, T0.X, 1 |
| ; EG-NEXT: CF_END |
| ; EG-NEXT: PAD |
| ; EG-NEXT: ALU clause starting at 4: |
| ; EG-NEXT: LSHR T0.X, KC0[2].Y, literal.x, |
| ; EG-NEXT: RECIP_IEEE * T1.X, KC0[2].Z, |
| ; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) |
| ; |
| ; CM-LABEL: s_rcp_fast_ulp25_pat_f32_daz: |
| ; CM: ; %bb.0: |
| ; CM-NEXT: ALU 5, @4, KC0[CB0:0-32], KC1[] |
| ; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T1.X, T0.X |
| ; CM-NEXT: CF_END |
| ; CM-NEXT: PAD |
| ; CM-NEXT: ALU clause starting at 4: |
| ; CM-NEXT: LSHR * T0.X, KC0[2].Y, literal.x, |
| ; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00) |
| ; CM-NEXT: RECIP_IEEE T1.X, KC0[2].Z, |
| ; CM-NEXT: RECIP_IEEE T1.Y (MASKED), KC0[2].Z, |
| ; CM-NEXT: RECIP_IEEE T1.Z (MASKED), KC0[2].Z, |
| ; CM-NEXT: RECIP_IEEE * T1.W (MASKED), KC0[2].Z, |
| %rcp = fdiv fast float 1.0, %src, !fpmath !0 |
| store float %rcp, ptr addrspace(1) %out, align 4 |
| ret void |
| } |
| |
| define amdgpu_kernel void @s_rcp_arcp_ulp25_pat_f32_daz(ptr addrspace(1) %out, float %src) #0 { |
| ; SI-LABEL: s_rcp_arcp_ulp25_pat_f32_daz: |
| ; SI: ; %bb.0: |
| ; SI-NEXT: s_load_dword s2, s[4:5], 0xb |
| ; SI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9 |
| ; SI-NEXT: s_mov_b32 s3, 0xf000 |
| ; SI-NEXT: s_waitcnt lgkmcnt(0) |
| ; SI-NEXT: v_rcp_f32_e32 v0, s2 |
| ; SI-NEXT: s_mov_b32 s2, -1 |
| ; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0 |
| ; SI-NEXT: s_endpgm |
| ; |
| ; VI-LABEL: s_rcp_arcp_ulp25_pat_f32_daz: |
| ; VI: ; %bb.0: |
| ; VI-NEXT: s_load_dword s2, s[4:5], 0x2c |
| ; VI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 |
| ; VI-NEXT: s_waitcnt lgkmcnt(0) |
| ; VI-NEXT: v_rcp_f32_e32 v2, s2 |
| ; VI-NEXT: v_mov_b32_e32 v0, s0 |
| ; VI-NEXT: v_mov_b32_e32 v1, s1 |
| ; VI-NEXT: flat_store_dword v[0:1], v2 |
| ; VI-NEXT: s_endpgm |
| ; |
| ; EG-LABEL: s_rcp_arcp_ulp25_pat_f32_daz: |
| ; EG: ; %bb.0: |
| ; EG-NEXT: ALU 2, @4, KC0[CB0:0-32], KC1[] |
| ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T1.X, T0.X, 1 |
| ; EG-NEXT: CF_END |
| ; EG-NEXT: PAD |
| ; EG-NEXT: ALU clause starting at 4: |
| ; EG-NEXT: LSHR T0.X, KC0[2].Y, literal.x, |
| ; EG-NEXT: RECIP_IEEE * T1.X, KC0[2].Z, |
| ; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) |
| ; |
| ; CM-LABEL: s_rcp_arcp_ulp25_pat_f32_daz: |
| ; CM: ; %bb.0: |
| ; CM-NEXT: ALU 5, @4, KC0[CB0:0-32], KC1[] |
| ; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T1.X, T0.X |
| ; CM-NEXT: CF_END |
| ; CM-NEXT: PAD |
| ; CM-NEXT: ALU clause starting at 4: |
| ; CM-NEXT: LSHR * T0.X, KC0[2].Y, literal.x, |
| ; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00) |
| ; CM-NEXT: RECIP_IEEE T1.X, KC0[2].Z, |
| ; CM-NEXT: RECIP_IEEE T1.Y (MASKED), KC0[2].Z, |
| ; CM-NEXT: RECIP_IEEE T1.Z (MASKED), KC0[2].Z, |
| ; CM-NEXT: RECIP_IEEE * T1.W (MASKED), KC0[2].Z, |
| %rcp = fdiv arcp float 1.0, %src, !fpmath !0 |
| store float %rcp, ptr addrspace(1) %out, align 4 |
| ret void |
| } |
| |
| define amdgpu_kernel void @s_rcp_global_fast_ulp25_pat_f32_daz(ptr addrspace(1) %out, float %src) #2 { |
| ; SI-LABEL: s_rcp_global_fast_ulp25_pat_f32_daz: |
| ; SI: ; %bb.0: |
| ; SI-NEXT: s_load_dword s2, s[4:5], 0xb |
| ; SI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9 |
| ; SI-NEXT: s_mov_b32 s3, 0xf000 |
| ; SI-NEXT: s_waitcnt lgkmcnt(0) |
| ; SI-NEXT: v_rcp_f32_e32 v0, s2 |
| ; SI-NEXT: s_mov_b32 s2, -1 |
| ; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0 |
| ; SI-NEXT: s_endpgm |
| ; |
| ; VI-LABEL: s_rcp_global_fast_ulp25_pat_f32_daz: |
| ; VI: ; %bb.0: |
| ; VI-NEXT: s_load_dword s2, s[4:5], 0x2c |
| ; VI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 |
| ; VI-NEXT: s_waitcnt lgkmcnt(0) |
| ; VI-NEXT: v_rcp_f32_e32 v2, s2 |
| ; VI-NEXT: v_mov_b32_e32 v0, s0 |
| ; VI-NEXT: v_mov_b32_e32 v1, s1 |
| ; VI-NEXT: flat_store_dword v[0:1], v2 |
| ; VI-NEXT: s_endpgm |
| ; |
| ; EG-LABEL: s_rcp_global_fast_ulp25_pat_f32_daz: |
| ; EG: ; %bb.0: |
| ; EG-NEXT: ALU 2, @4, KC0[CB0:0-32], KC1[] |
| ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T1.X, T0.X, 1 |
| ; EG-NEXT: CF_END |
| ; EG-NEXT: PAD |
| ; EG-NEXT: ALU clause starting at 4: |
| ; EG-NEXT: LSHR T0.X, KC0[2].Y, literal.x, |
| ; EG-NEXT: RECIP_IEEE * T1.X, KC0[2].Z, |
| ; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) |
| ; |
| ; CM-LABEL: s_rcp_global_fast_ulp25_pat_f32_daz: |
| ; CM: ; %bb.0: |
| ; CM-NEXT: ALU 5, @4, KC0[CB0:0-32], KC1[] |
| ; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T1.X, T0.X |
| ; CM-NEXT: CF_END |
| ; CM-NEXT: PAD |
| ; CM-NEXT: ALU clause starting at 4: |
| ; CM-NEXT: LSHR * T0.X, KC0[2].Y, literal.x, |
| ; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00) |
| ; CM-NEXT: RECIP_IEEE T1.X, KC0[2].Z, |
| ; CM-NEXT: RECIP_IEEE T1.Y (MASKED), KC0[2].Z, |
| ; CM-NEXT: RECIP_IEEE T1.Z (MASKED), KC0[2].Z, |
| ; CM-NEXT: RECIP_IEEE * T1.W (MASKED), KC0[2].Z, |
| %rcp = fdiv float 1.0, %src, !fpmath !0 |
| store float %rcp, ptr addrspace(1) %out, align 4 |
| ret void |
| } |
| |
| define amdgpu_kernel void @s_rcp_fabs_pat_f32_daz(ptr addrspace(1) %out, float %src) #0 { |
| ; SI-LABEL: s_rcp_fabs_pat_f32_daz: |
| ; SI: ; %bb.0: |
| ; SI-NEXT: s_load_dword s2, s[4:5], 0xb |
| ; SI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9 |
| ; SI-NEXT: s_mov_b32 s3, 0xf000 |
| ; SI-NEXT: s_waitcnt lgkmcnt(0) |
| ; SI-NEXT: v_rcp_f32_e64 v0, |s2| |
| ; SI-NEXT: s_mov_b32 s2, -1 |
| ; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0 |
| ; SI-NEXT: s_endpgm |
| ; |
| ; VI-LABEL: s_rcp_fabs_pat_f32_daz: |
| ; VI: ; %bb.0: |
| ; VI-NEXT: s_load_dword s2, s[4:5], 0x2c |
| ; VI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 |
| ; VI-NEXT: s_waitcnt lgkmcnt(0) |
| ; VI-NEXT: v_rcp_f32_e64 v2, |s2| |
| ; VI-NEXT: v_mov_b32_e32 v0, s0 |
| ; VI-NEXT: v_mov_b32_e32 v1, s1 |
| ; VI-NEXT: flat_store_dword v[0:1], v2 |
| ; VI-NEXT: s_endpgm |
| ; |
| ; EG-LABEL: s_rcp_fabs_pat_f32_daz: |
| ; EG: ; %bb.0: |
| ; EG-NEXT: ALU 2, @4, KC0[CB0:0-32], KC1[] |
| ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T1.X, T0.X, 1 |
| ; EG-NEXT: CF_END |
| ; EG-NEXT: PAD |
| ; EG-NEXT: ALU clause starting at 4: |
| ; EG-NEXT: LSHR T0.X, KC0[2].Y, literal.x, |
| ; EG-NEXT: RECIP_IEEE * T1.X, |KC0[2].Z|, |
| ; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) |
| ; |
| ; CM-LABEL: s_rcp_fabs_pat_f32_daz: |
| ; CM: ; %bb.0: |
| ; CM-NEXT: ALU 5, @4, KC0[CB0:0-32], KC1[] |
| ; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T1.X, T0.X |
| ; CM-NEXT: CF_END |
| ; CM-NEXT: PAD |
| ; CM-NEXT: ALU clause starting at 4: |
| ; CM-NEXT: LSHR * T0.X, KC0[2].Y, literal.x, |
| ; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00) |
| ; CM-NEXT: RECIP_IEEE T1.X, |KC0[2].Z|, |
| ; CM-NEXT: RECIP_IEEE T1.Y (MASKED), |KC0[2].Z|, |
| ; CM-NEXT: RECIP_IEEE T1.Z (MASKED), |KC0[2].Z|, |
| ; CM-NEXT: RECIP_IEEE * T1.W (MASKED), |KC0[2].Z|, |
| %src.fabs = call float @llvm.fabs.f32(float %src) |
| %rcp = fdiv float 1.0, %src.fabs, !fpmath !0 |
| store float %rcp, ptr addrspace(1) %out, align 4 |
| ret void |
| } |
| |
| define amdgpu_kernel void @s_neg_rcp_pat_f32_daz(ptr addrspace(1) %out, float %src) #0 { |
| ; SI-LABEL: s_neg_rcp_pat_f32_daz: |
| ; SI: ; %bb.0: |
| ; SI-NEXT: s_load_dword s2, s[4:5], 0xb |
| ; SI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9 |
| ; SI-NEXT: s_mov_b32 s3, 0xf000 |
| ; SI-NEXT: s_waitcnt lgkmcnt(0) |
| ; SI-NEXT: v_rcp_f32_e64 v0, -s2 |
| ; SI-NEXT: s_mov_b32 s2, -1 |
| ; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0 |
| ; SI-NEXT: s_endpgm |
| ; |
| ; VI-LABEL: s_neg_rcp_pat_f32_daz: |
| ; VI: ; %bb.0: |
| ; VI-NEXT: s_load_dword s2, s[4:5], 0x2c |
| ; VI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 |
| ; VI-NEXT: s_waitcnt lgkmcnt(0) |
| ; VI-NEXT: v_rcp_f32_e64 v2, -s2 |
| ; VI-NEXT: v_mov_b32_e32 v0, s0 |
| ; VI-NEXT: v_mov_b32_e32 v1, s1 |
| ; VI-NEXT: flat_store_dword v[0:1], v2 |
| ; VI-NEXT: s_endpgm |
| ; |
| ; EG-LABEL: s_neg_rcp_pat_f32_daz: |
| ; EG: ; %bb.0: |
| ; EG-NEXT: ALU 3, @4, KC0[CB0:0-32], KC1[] |
| ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.X, T1.X, 1 |
| ; EG-NEXT: CF_END |
| ; EG-NEXT: PAD |
| ; EG-NEXT: ALU clause starting at 4: |
| ; EG-NEXT: RECIP_IEEE * T0.X, KC0[2].Z, |
| ; EG-NEXT: MUL_IEEE T0.X, literal.x, PS, |
| ; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.y, |
| ; EG-NEXT: -1082130432(-1.000000e+00), 2(2.802597e-45) |
| ; |
| ; CM-LABEL: s_neg_rcp_pat_f32_daz: |
| ; CM: ; %bb.0: |
| ; CM-NEXT: ALU 7, @4, KC0[CB0:0-32], KC1[] |
| ; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T0.X, T1.X |
| ; CM-NEXT: CF_END |
| ; CM-NEXT: PAD |
| ; CM-NEXT: ALU clause starting at 4: |
| ; CM-NEXT: RECIP_IEEE T0.X, KC0[2].Z, |
| ; CM-NEXT: RECIP_IEEE T0.Y (MASKED), KC0[2].Z, |
| ; CM-NEXT: RECIP_IEEE T0.Z (MASKED), KC0[2].Z, |
| ; CM-NEXT: RECIP_IEEE * T0.W (MASKED), KC0[2].Z, |
| ; CM-NEXT: MUL_IEEE * T0.X, literal.x, PV.X, |
| ; CM-NEXT: -1082130432(-1.000000e+00), 0(0.000000e+00) |
| ; CM-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, |
| ; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00) |
| %rcp = fdiv float -1.0, %src, !fpmath !0 |
| store float %rcp, ptr addrspace(1) %out, align 4 |
| ret void |
| } |
| |
| define amdgpu_kernel void @s_rcp_fabs_fneg_pat_f32_daz(ptr addrspace(1) %out, float %src) #0 { |
| ; SI-LABEL: s_rcp_fabs_fneg_pat_f32_daz: |
| ; SI: ; %bb.0: |
| ; SI-NEXT: s_load_dword s2, s[4:5], 0xb |
| ; SI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9 |
| ; SI-NEXT: s_mov_b32 s3, 0xf000 |
| ; SI-NEXT: s_waitcnt lgkmcnt(0) |
| ; SI-NEXT: v_rcp_f32_e64 v0, -|s2| |
| ; SI-NEXT: s_mov_b32 s2, -1 |
| ; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0 |
| ; SI-NEXT: s_endpgm |
| ; |
| ; VI-LABEL: s_rcp_fabs_fneg_pat_f32_daz: |
| ; VI: ; %bb.0: |
| ; VI-NEXT: s_load_dword s2, s[4:5], 0x2c |
| ; VI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 |
| ; VI-NEXT: s_waitcnt lgkmcnt(0) |
| ; VI-NEXT: v_rcp_f32_e64 v2, -|s2| |
| ; VI-NEXT: v_mov_b32_e32 v0, s0 |
| ; VI-NEXT: v_mov_b32_e32 v1, s1 |
| ; VI-NEXT: flat_store_dword v[0:1], v2 |
| ; VI-NEXT: s_endpgm |
| ; |
| ; EG-LABEL: s_rcp_fabs_fneg_pat_f32_daz: |
| ; EG: ; %bb.0: |
| ; EG-NEXT: ALU 3, @4, KC0[CB0:0-32], KC1[] |
| ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.X, T1.X, 1 |
| ; EG-NEXT: CF_END |
| ; EG-NEXT: PAD |
| ; EG-NEXT: ALU clause starting at 4: |
| ; EG-NEXT: RECIP_IEEE * T0.X, |KC0[2].Z|, |
| ; EG-NEXT: MUL_IEEE T0.X, literal.x, PS, |
| ; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.y, |
| ; EG-NEXT: -1082130432(-1.000000e+00), 2(2.802597e-45) |
| ; |
| ; CM-LABEL: s_rcp_fabs_fneg_pat_f32_daz: |
| ; CM: ; %bb.0: |
| ; CM-NEXT: ALU 7, @4, KC0[CB0:0-32], KC1[] |
| ; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T0.X, T1.X |
| ; CM-NEXT: CF_END |
| ; CM-NEXT: PAD |
| ; CM-NEXT: ALU clause starting at 4: |
| ; CM-NEXT: RECIP_IEEE T0.X, |KC0[2].Z|, |
| ; CM-NEXT: RECIP_IEEE T0.Y (MASKED), |KC0[2].Z|, |
| ; CM-NEXT: RECIP_IEEE T0.Z (MASKED), |KC0[2].Z|, |
| ; CM-NEXT: RECIP_IEEE * T0.W (MASKED), |KC0[2].Z|, |
| ; CM-NEXT: MUL_IEEE * T0.X, literal.x, PV.X, |
| ; CM-NEXT: -1082130432(-1.000000e+00), 0(0.000000e+00) |
| ; CM-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, |
| ; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00) |
| %src.fabs = call float @llvm.fabs.f32(float %src) |
| %src.fabs.fneg = fneg float %src.fabs |
| %rcp = fdiv float 1.0, %src.fabs.fneg, !fpmath !0 |
| store float %rcp, ptr addrspace(1) %out, align 4 |
| ret void |
| } |
| |
| define amdgpu_kernel void @s_rcp_fabs_fneg_pat_multi_use_f32_daz(ptr addrspace(1) %out, float %src) #0 { |
| ; SI-LABEL: s_rcp_fabs_fneg_pat_multi_use_f32_daz: |
| ; SI: ; %bb.0: |
| ; SI-NEXT: s_load_dword s6, s[4:5], 0xb |
| ; SI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9 |
| ; SI-NEXT: s_mov_b32 s3, 0xf000 |
| ; SI-NEXT: s_mov_b32 s2, -1 |
| ; SI-NEXT: s_waitcnt lgkmcnt(0) |
| ; SI-NEXT: v_rcp_f32_e64 v0, -|s6| |
| ; SI-NEXT: v_mul_f32_e64 v1, s6, -|s6| |
| ; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0 |
| ; SI-NEXT: s_waitcnt vmcnt(0) |
| ; SI-NEXT: buffer_store_dword v1, off, s[0:3], 0 |
| ; SI-NEXT: s_waitcnt vmcnt(0) |
| ; SI-NEXT: s_endpgm |
| ; |
| ; VI-LABEL: s_rcp_fabs_fneg_pat_multi_use_f32_daz: |
| ; VI: ; %bb.0: |
| ; VI-NEXT: s_load_dword s2, s[4:5], 0x2c |
| ; VI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 |
| ; VI-NEXT: s_waitcnt lgkmcnt(0) |
| ; VI-NEXT: v_rcp_f32_e64 v2, -|s2| |
| ; VI-NEXT: v_mov_b32_e32 v0, s0 |
| ; VI-NEXT: v_mov_b32_e32 v1, s1 |
| ; VI-NEXT: v_mul_f32_e64 v3, s2, -|s2| |
| ; VI-NEXT: flat_store_dword v[0:1], v2 |
| ; VI-NEXT: s_waitcnt vmcnt(0) |
| ; VI-NEXT: flat_store_dword v[0:1], v3 |
| ; VI-NEXT: s_waitcnt vmcnt(0) |
| ; VI-NEXT: s_endpgm |
| ; |
| ; EG-LABEL: s_rcp_fabs_fneg_pat_multi_use_f32_daz: |
| ; EG: ; %bb.0: |
| ; EG-NEXT: ALU 4, @4, KC0[CB0:0-32], KC1[] |
| ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T1.X, T2.X, 0 |
| ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.X, T2.X, 1 |
| ; EG-NEXT: CF_END |
| ; EG-NEXT: ALU clause starting at 4: |
| ; EG-NEXT: MUL_IEEE T0.X, KC0[2].Z, -|KC0[2].Z|, |
| ; EG-NEXT: RECIP_IEEE * T0.Y, |KC0[2].Z|, |
| ; EG-NEXT: MUL_IEEE T1.X, literal.x, PS, |
| ; EG-NEXT: LSHR * T2.X, KC0[2].Y, literal.y, |
| ; EG-NEXT: -1082130432(-1.000000e+00), 2(2.802597e-45) |
| ; |
| ; CM-LABEL: s_rcp_fabs_fneg_pat_multi_use_f32_daz: |
| ; CM: ; %bb.0: |
| ; CM-NEXT: ALU 8, @4, KC0[CB0:0-32], KC1[] |
| ; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T1.X, T2.X |
| ; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T0.X, T2.X |
| ; CM-NEXT: CF_END |
| ; CM-NEXT: ALU clause starting at 4: |
| ; CM-NEXT: MUL_IEEE * T0.X, KC0[2].Z, -|KC0[2].Z|, |
| ; CM-NEXT: RECIP_IEEE T0.X (MASKED), |KC0[2].Z|, |
| ; CM-NEXT: RECIP_IEEE T0.Y, |KC0[2].Z|, |
| ; CM-NEXT: RECIP_IEEE T0.Z (MASKED), |KC0[2].Z|, |
| ; CM-NEXT: RECIP_IEEE * T0.W (MASKED), |KC0[2].Z|, |
| ; CM-NEXT: MUL_IEEE * T1.X, literal.x, PV.Y, |
| ; CM-NEXT: -1082130432(-1.000000e+00), 0(0.000000e+00) |
| ; CM-NEXT: LSHR * T2.X, KC0[2].Y, literal.x, |
| ; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00) |
| %src.fabs = call float @llvm.fabs.f32(float %src) |
| %src.fabs.fneg = fneg float %src.fabs |
| %rcp = fdiv float 1.0, %src.fabs.fneg, !fpmath !0 |
| store volatile float %rcp, ptr addrspace(1) %out, align 4 |
| |
| %other = fmul float %src, %src.fabs.fneg |
| store volatile float %other, ptr addrspace(1) %out, align 4 |
| ret void |
| } |
| |
| define amdgpu_kernel void @s_div_arcp_2_x_pat_f32_daz(ptr addrspace(1) %out) #0 { |
| ; SI-LABEL: s_div_arcp_2_x_pat_f32_daz: |
| ; SI: ; %bb.0: |
| ; SI-NEXT: s_load_dword s6, s[0:1], 0x0 |
| ; SI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9 |
| ; SI-NEXT: s_mov_b32 s3, 0xf000 |
| ; SI-NEXT: s_mov_b32 s2, -1 |
| ; SI-NEXT: s_waitcnt lgkmcnt(0) |
| ; SI-NEXT: v_mul_f32_e64 v0, s6, 0.5 |
| ; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0 |
| ; SI-NEXT: s_endpgm |
| ; |
| ; VI-LABEL: s_div_arcp_2_x_pat_f32_daz: |
| ; VI: ; %bb.0: |
| ; VI-NEXT: s_load_dword s2, s[0:1], 0x0 |
| ; VI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 |
| ; VI-NEXT: s_waitcnt lgkmcnt(0) |
| ; VI-NEXT: v_mul_f32_e64 v2, s2, 0.5 |
| ; VI-NEXT: v_mov_b32_e32 v0, s0 |
| ; VI-NEXT: v_mov_b32_e32 v1, s1 |
| ; VI-NEXT: flat_store_dword v[0:1], v2 |
| ; VI-NEXT: s_endpgm |
| ; |
| ; EG-LABEL: s_div_arcp_2_x_pat_f32_daz: |
| ; EG: ; %bb.0: |
| ; EG-NEXT: TEX 0 @4 |
| ; EG-NEXT: ALU 2, @6, KC0[CB0:0-32], KC1[] |
| ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.X, T1.X, 1 |
| ; EG-NEXT: CF_END |
| ; EG-NEXT: Fetch clause starting at 4: |
| ; EG-NEXT: VTX_READ_32 T0.X, T0.X, 0, #1 |
| ; EG-NEXT: ALU clause starting at 6: |
| ; EG-NEXT: MUL_IEEE T0.X, T0.X, 0.5, |
| ; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, |
| ; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) |
| ; |
| ; CM-LABEL: s_div_arcp_2_x_pat_f32_daz: |
| ; CM: ; %bb.0: |
| ; CM-NEXT: TEX 0 @4 |
| ; CM-NEXT: ALU 2, @6, KC0[CB0:0-32], KC1[] |
| ; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T0.X, T1.X |
| ; CM-NEXT: CF_END |
| ; CM-NEXT: Fetch clause starting at 4: |
| ; CM-NEXT: VTX_READ_32 T0.X, T0.X, 0, #1 |
| ; CM-NEXT: ALU clause starting at 6: |
| ; CM-NEXT: MUL_IEEE * T0.X, T0.X, 0.5, |
| ; CM-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, |
| ; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00) |
| %x = load float, ptr addrspace(1) poison |
| %rcp = fdiv arcp float %x, 2.0 |
| store float %rcp, ptr addrspace(1) %out, align 4 |
| ret void |
| } |
| |
| define amdgpu_kernel void @s_div_arcp_k_x_pat_f32_daz(ptr addrspace(1) %out) #0 { |
| ; SI-LABEL: s_div_arcp_k_x_pat_f32_daz: |
| ; SI: ; %bb.0: |
| ; SI-NEXT: s_load_dword s6, s[0:1], 0x0 |
| ; SI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9 |
| ; SI-NEXT: v_mov_b32_e32 v0, 0x3dcccccd |
| ; SI-NEXT: s_mov_b32 s3, 0xf000 |
| ; SI-NEXT: s_mov_b32 s2, -1 |
| ; SI-NEXT: s_waitcnt lgkmcnt(0) |
| ; SI-NEXT: v_mul_f32_e32 v0, s6, v0 |
| ; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0 |
| ; SI-NEXT: s_endpgm |
| ; |
| ; VI-LABEL: s_div_arcp_k_x_pat_f32_daz: |
| ; VI: ; %bb.0: |
| ; VI-NEXT: s_load_dword s2, s[0:1], 0x0 |
| ; VI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 |
| ; VI-NEXT: v_mov_b32_e32 v0, 0x3dcccccd |
| ; VI-NEXT: s_waitcnt lgkmcnt(0) |
| ; VI-NEXT: v_mul_f32_e32 v2, s2, v0 |
| ; VI-NEXT: v_mov_b32_e32 v0, s0 |
| ; VI-NEXT: v_mov_b32_e32 v1, s1 |
| ; VI-NEXT: flat_store_dword v[0:1], v2 |
| ; VI-NEXT: s_endpgm |
| ; |
| ; EG-LABEL: s_div_arcp_k_x_pat_f32_daz: |
| ; EG: ; %bb.0: |
| ; EG-NEXT: TEX 0 @4 |
| ; EG-NEXT: ALU 2, @6, KC0[CB0:0-32], KC1[] |
| ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.X, T1.X, 1 |
| ; EG-NEXT: CF_END |
| ; EG-NEXT: Fetch clause starting at 4: |
| ; EG-NEXT: VTX_READ_32 T0.X, T0.X, 0, #1 |
| ; EG-NEXT: ALU clause starting at 6: |
| ; EG-NEXT: MUL_IEEE T0.X, T0.X, literal.x, |
| ; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.y, |
| ; EG-NEXT: 1036831949(1.000000e-01), 2(2.802597e-45) |
| ; |
| ; CM-LABEL: s_div_arcp_k_x_pat_f32_daz: |
| ; CM: ; %bb.0: |
| ; CM-NEXT: TEX 0 @4 |
| ; CM-NEXT: ALU 3, @6, KC0[CB0:0-32], KC1[] |
| ; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T0.X, T1.X |
| ; CM-NEXT: CF_END |
| ; CM-NEXT: Fetch clause starting at 4: |
| ; CM-NEXT: VTX_READ_32 T0.X, T0.X, 0, #1 |
| ; CM-NEXT: ALU clause starting at 6: |
| ; CM-NEXT: MUL_IEEE * T0.X, T0.X, literal.x, |
| ; CM-NEXT: 1036831949(1.000000e-01), 0(0.000000e+00) |
| ; CM-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, |
| ; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00) |
| %x = load float, ptr addrspace(1) poison |
| %rcp = fdiv arcp float %x, 10.0 |
| store float %rcp, ptr addrspace(1) %out, align 4 |
| ret void |
| } |
| |
| define amdgpu_kernel void @s_div_arcp_neg_k_x_pat_f32_daz(ptr addrspace(1) %out) #0 { |
| ; SI-LABEL: s_div_arcp_neg_k_x_pat_f32_daz: |
| ; SI: ; %bb.0: |
| ; SI-NEXT: s_load_dword s6, s[0:1], 0x0 |
| ; SI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9 |
| ; SI-NEXT: v_mov_b32_e32 v0, 0xbdcccccd |
| ; SI-NEXT: s_mov_b32 s3, 0xf000 |
| ; SI-NEXT: s_mov_b32 s2, -1 |
| ; SI-NEXT: s_waitcnt lgkmcnt(0) |
| ; SI-NEXT: v_mul_f32_e32 v0, s6, v0 |
| ; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0 |
| ; SI-NEXT: s_endpgm |
| ; |
| ; VI-LABEL: s_div_arcp_neg_k_x_pat_f32_daz: |
| ; VI: ; %bb.0: |
| ; VI-NEXT: s_load_dword s2, s[0:1], 0x0 |
| ; VI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 |
| ; VI-NEXT: v_mov_b32_e32 v0, 0xbdcccccd |
| ; VI-NEXT: s_waitcnt lgkmcnt(0) |
| ; VI-NEXT: v_mul_f32_e32 v2, s2, v0 |
| ; VI-NEXT: v_mov_b32_e32 v0, s0 |
| ; VI-NEXT: v_mov_b32_e32 v1, s1 |
| ; VI-NEXT: flat_store_dword v[0:1], v2 |
| ; VI-NEXT: s_endpgm |
| ; |
| ; EG-LABEL: s_div_arcp_neg_k_x_pat_f32_daz: |
| ; EG: ; %bb.0: |
| ; EG-NEXT: TEX 0 @4 |
| ; EG-NEXT: ALU 2, @6, KC0[CB0:0-32], KC1[] |
| ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.X, T1.X, 1 |
| ; EG-NEXT: CF_END |
| ; EG-NEXT: Fetch clause starting at 4: |
| ; EG-NEXT: VTX_READ_32 T0.X, T0.X, 0, #1 |
| ; EG-NEXT: ALU clause starting at 6: |
| ; EG-NEXT: MUL_IEEE T0.X, T0.X, literal.x, |
| ; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.y, |
| ; EG-NEXT: -1110651699(-1.000000e-01), 2(2.802597e-45) |
| ; |
| ; CM-LABEL: s_div_arcp_neg_k_x_pat_f32_daz: |
| ; CM: ; %bb.0: |
| ; CM-NEXT: TEX 0 @4 |
| ; CM-NEXT: ALU 3, @6, KC0[CB0:0-32], KC1[] |
| ; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T0.X, T1.X |
| ; CM-NEXT: CF_END |
| ; CM-NEXT: Fetch clause starting at 4: |
| ; CM-NEXT: VTX_READ_32 T0.X, T0.X, 0, #1 |
| ; CM-NEXT: ALU clause starting at 6: |
| ; CM-NEXT: MUL_IEEE * T0.X, T0.X, literal.x, |
| ; CM-NEXT: -1110651699(-1.000000e-01), 0(0.000000e+00) |
| ; CM-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, |
| ; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00) |
| %x = load float, ptr addrspace(1) poison |
| %rcp = fdiv arcp float %x, -10.0 |
| store float %rcp, ptr addrspace(1) %out, align 4 |
| ret void |
| } |
| |
| declare float @llvm.fabs.f32(float) #1 |
| declare float @llvm.sqrt.f32(float) #1 |
| |
| attributes #0 = { nounwind "unsafe-fp-math"="false" "denormal-fp-math-f32"="preserve-sign,preserve-sign" } |
| attributes #1 = { nounwind readnone } |
| attributes #2 = { nounwind "unsafe-fp-math"="true" "denormal-fp-math-f32"="preserve-sign,preserve-sign" } |
| attributes #3 = { nounwind "denormal-fp-math-f32"="ieee,ieee" } |
| attributes #4 = { nounwind "unsafe-fp-math"="true" "denormal-fp-math-f32"="ieee,ieee" } |
| |
| !0 = !{float 2.500000e+00} |