| ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py |
| ; RUN: llc -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefix=GFX900 %s |
| ; RUN: llc -mtriple=amdgcn -mcpu=gfx1010 < %s | FileCheck -check-prefix=GFX1010 %s |
| |
| ; Test the CMP+SELECT optimization that folds shared constants to reduce |
| ; register pressure. |
| |
| ;------------------------------------------------------------------------------ |
| ; F32 Tests |
| ;------------------------------------------------------------------------------ |
| |
| ; Should be folded: fcmp oeq + select with constant in true value |
| define float @fcmp_select_fold_oeq_f32_imm(float %arg, float %other) { |
| ; GFX900-LABEL: fcmp_select_fold_oeq_f32_imm: |
| ; GFX900: ; %bb.0: ; %entry |
| ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX900-NEXT: s_mov_b32 s4, 0x42487ed8 |
| ; GFX900-NEXT: v_cmp_eq_f32_e32 vcc, s4, v0 |
| ; GFX900-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc |
| ; GFX900-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1010-LABEL: fcmp_select_fold_oeq_f32_imm: |
| ; GFX1010: ; %bb.0: ; %entry |
| ; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1010-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0x42487ed8, v0 |
| ; GFX1010-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc_lo |
| ; GFX1010-NEXT: s_setpc_b64 s[30:31] |
| entry: |
| %cmp = fcmp oeq float %arg, 0x40490FDB00000000 |
| %sel = select i1 %cmp, float 0x40490FDB00000000, float %other |
| ret float %sel |
| } |
| |
| ; Should be folded: fcmp oeq + select with constant in true value (commutative) |
| define float @fcmp_select_fold_oeq_imm_f32(float %arg, float %other) { |
| ; GFX900-LABEL: fcmp_select_fold_oeq_imm_f32: |
| ; GFX900: ; %bb.0: ; %entry |
| ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX900-NEXT: s_mov_b32 s4, 0x42487ed8 |
| ; GFX900-NEXT: v_cmp_eq_f32_e32 vcc, s4, v0 |
| ; GFX900-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc |
| ; GFX900-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1010-LABEL: fcmp_select_fold_oeq_imm_f32: |
| ; GFX1010: ; %bb.0: ; %entry |
| ; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1010-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0x42487ed8, v0 |
| ; GFX1010-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc_lo |
| ; GFX1010-NEXT: s_setpc_b64 s[30:31] |
| entry: |
| %cmp = fcmp oeq float 0x40490FDB00000000, %arg |
| %sel = select i1 %cmp, float 0x40490FDB00000000, float %other |
| ret float %sel |
| } |
| |
| ; Should be folded: fcmp one + select with constant in false value |
| define float @fcmp_select_fold_one_f32_imm(float %arg, float %other) { |
| ; GFX900-LABEL: fcmp_select_fold_one_f32_imm: |
| ; GFX900: ; %bb.0: ; %entry |
| ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX900-NEXT: s_mov_b32 s4, 0x402df850 |
| ; GFX900-NEXT: v_cmp_lg_f32_e32 vcc, s4, v0 |
| ; GFX900-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc |
| ; GFX900-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1010-LABEL: fcmp_select_fold_one_f32_imm: |
| ; GFX1010: ; %bb.0: ; %entry |
| ; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1010-NEXT: v_cmp_lg_f32_e32 vcc_lo, 0x402df850, v0 |
| ; GFX1010-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo |
| ; GFX1010-NEXT: s_setpc_b64 s[30:31] |
| entry: |
| %cmp = fcmp one float %arg, 0x4005BF0A00000000 |
| %sel = select i1 %cmp, float %other, float 0x4005BF0A00000000 |
| ret float %sel |
| } |
| |
| ; Should be folded: fcmp one + select with constant in false value (commutative) |
| define float @fcmp_select_fold_one_imm_f32(float %arg, float %other) { |
| ; GFX900-LABEL: fcmp_select_fold_one_imm_f32: |
| ; GFX900: ; %bb.0: ; %entry |
| ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX900-NEXT: s_mov_b32 s4, 0x402df850 |
| ; GFX900-NEXT: v_cmp_lg_f32_e32 vcc, s4, v0 |
| ; GFX900-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc |
| ; GFX900-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1010-LABEL: fcmp_select_fold_one_imm_f32: |
| ; GFX1010: ; %bb.0: ; %entry |
| ; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1010-NEXT: v_cmp_lg_f32_e32 vcc_lo, 0x402df850, v0 |
| ; GFX1010-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo |
| ; GFX1010-NEXT: s_setpc_b64 s[30:31] |
| entry: |
| %cmp = fcmp one float 0x4005BF0A00000000, %arg |
| %sel = select i1 %cmp, float %other, float 0x4005BF0A00000000 |
| ret float %sel |
| } |
| |
| ; Should NOT be folded: different constants |
| define float @fcmp_select_no_fold_f32_different_const(float %arg, float %other) { |
| ; GFX900-LABEL: fcmp_select_no_fold_f32_different_const: |
| ; GFX900: ; %bb.0: ; %entry |
| ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX900-NEXT: s_mov_b32 s4, 0x42487ed8 |
| ; GFX900-NEXT: v_mov_b32_e32 v2, 0x46487ed8 |
| ; GFX900-NEXT: v_cmp_neq_f32_e32 vcc, s4, v0 |
| ; GFX900-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc |
| ; GFX900-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1010-LABEL: fcmp_select_no_fold_f32_different_const: |
| ; GFX1010: ; %bb.0: ; %entry |
| ; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1010-NEXT: v_cmp_neq_f32_e32 vcc_lo, 0x42487ed8, v0 |
| ; GFX1010-NEXT: v_cndmask_b32_e32 v0, 0x46487ed8, v1, vcc_lo |
| ; GFX1010-NEXT: s_setpc_b64 s[30:31] |
| entry: |
| %cmp = fcmp oeq float %arg, 0x40490FDB00000000 |
| %sel = select i1 %cmp, float 0x40C90FDB00000000, float %other |
| ret float %sel |
| } |
| |
| ; Should NOT be folded: fcmp oeq with constant in other position |
| define float @fcmp_select_no_fold_f32_other_pos(float %arg, float %other) { |
| ; GFX900-LABEL: fcmp_select_no_fold_f32_other_pos: |
| ; GFX900: ; %bb.0: ; %entry |
| ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX900-NEXT: s_mov_b32 s4, 0x42487ed8 |
| ; GFX900-NEXT: v_mov_b32_e32 v2, 0x42487ed8 |
| ; GFX900-NEXT: v_cmp_eq_f32_e32 vcc, s4, v0 |
| ; GFX900-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc |
| ; GFX900-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1010-LABEL: fcmp_select_no_fold_f32_other_pos: |
| ; GFX1010: ; %bb.0: ; %entry |
| ; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1010-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0x42487ed8, v0 |
| ; GFX1010-NEXT: v_cndmask_b32_e32 v0, 0x42487ed8, v1, vcc_lo |
| ; GFX1010-NEXT: s_setpc_b64 s[30:31] |
| entry: |
| %cmp = fcmp oeq float %arg, 0x40490FDB00000000 |
| %sel = select i1 %cmp, float %other, float 0x40490FDB00000000 |
| ret float %sel |
| } |
| |
| ; Should NOT be folded: unsupported comparison type |
| define float @fcmp_select_no_fold_f32_unsupported_cmp(float %arg, float %other) { |
| ; GFX900-LABEL: fcmp_select_no_fold_f32_unsupported_cmp: |
| ; GFX900: ; %bb.0: ; %entry |
| ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX900-NEXT: s_mov_b32 s4, 0x42487ed8 |
| ; GFX900-NEXT: v_mov_b32_e32 v2, 0x42487ed8 |
| ; GFX900-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 |
| ; GFX900-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc |
| ; GFX900-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1010-LABEL: fcmp_select_no_fold_f32_unsupported_cmp: |
| ; GFX1010: ; %bb.0: ; %entry |
| ; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1010-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x42487ed8, v0 |
| ; GFX1010-NEXT: v_cndmask_b32_e32 v0, 0x42487ed8, v1, vcc_lo |
| ; GFX1010-NEXT: s_setpc_b64 s[30:31] |
| entry: |
| %cmp = fcmp olt float %arg, 0x40490FDB00000000 |
| %sel = select i1 %cmp, float %other, float 0x40490FDB00000000 |
| ret float %sel |
| } |
| |
| ; Should NOT be folded: imm can be encoded into cndmask |
| define float @fcmp_select_no_fold_f32_enc_imm(float %arg, float %other) { |
| ; GFX900-LABEL: fcmp_select_no_fold_f32_enc_imm: |
| ; GFX900: ; %bb.0: ; %entry |
| ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX900-NEXT: v_cmp_neq_f32_e32 vcc, 1.0, v0 |
| ; GFX900-NEXT: v_cndmask_b32_e32 v0, 1.0, v1, vcc |
| ; GFX900-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1010-LABEL: fcmp_select_no_fold_f32_enc_imm: |
| ; GFX1010: ; %bb.0: ; %entry |
| ; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1010-NEXT: v_cmp_neq_f32_e32 vcc_lo, 1.0, v0 |
| ; GFX1010-NEXT: v_cndmask_b32_e32 v0, 1.0, v1, vcc_lo |
| ; GFX1010-NEXT: s_setpc_b64 s[30:31] |
| entry: |
| %cmp = fcmp oeq float %arg, 1.0 |
| %sel = select i1 %cmp, float 1.0, float %other |
| ret float %sel |
| } |
| |
| ; Should NOT be folded: imm can be encoded into cndmask |
| define float @fcmp_select_no_fold_f32_enc_imm_2(float %arg, float %other) { |
| ; GFX900-LABEL: fcmp_select_no_fold_f32_enc_imm_2: |
| ; GFX900: ; %bb.0: ; %entry |
| ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX900-NEXT: v_cmp_lg_f32_e32 vcc, -4.0, v0 |
| ; GFX900-NEXT: v_cndmask_b32_e32 v0, -4.0, v1, vcc |
| ; GFX900-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1010-LABEL: fcmp_select_no_fold_f32_enc_imm_2: |
| ; GFX1010: ; %bb.0: ; %entry |
| ; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1010-NEXT: v_cmp_lg_f32_e32 vcc_lo, -4.0, v0 |
| ; GFX1010-NEXT: v_cndmask_b32_e32 v0, -4.0, v1, vcc_lo |
| ; GFX1010-NEXT: s_setpc_b64 s[30:31] |
| entry: |
| %cmp = fcmp one float -4.0, %arg |
| %sel = select i1 %cmp, float %other, float -4.0 |
| ret float %sel |
| } |
| |
| ; Should NOT be folded: fcmp oeq with zero constant |
| define float @fcmp_select_no_fold_oeq_f32_zero(float %arg, float %other) { |
| ; GFX900-LABEL: fcmp_select_no_fold_oeq_f32_zero: |
| ; GFX900: ; %bb.0: ; %entry |
| ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX900-NEXT: v_cmp_neq_f32_e32 vcc, 0, v0 |
| ; GFX900-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc |
| ; GFX900-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1010-LABEL: fcmp_select_no_fold_oeq_f32_zero: |
| ; GFX1010: ; %bb.0: ; %entry |
| ; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1010-NEXT: v_cmp_neq_f32_e32 vcc_lo, 0, v0 |
| ; GFX1010-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc_lo |
| ; GFX1010-NEXT: s_setpc_b64 s[30:31] |
| entry: |
| %cmp = fcmp oeq float %arg, 0.0 |
| %sel = select i1 %cmp, float 0.0, float %other |
| ret float %sel |
| } |
| |
| ; Should NOT be folded: fcmp one with negative zero constant |
| define float @fcmp_select_no_fold_one_f32_negzero(float %arg, float %other) { |
| ; GFX900-LABEL: fcmp_select_no_fold_one_f32_negzero: |
| ; GFX900: ; %bb.0: ; %entry |
| ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX900-NEXT: s_brev_b32 s4, 1 |
| ; GFX900-NEXT: v_bfrev_b32_e32 v2, 1 |
| ; GFX900-NEXT: v_cmp_lg_f32_e32 vcc, s4, v0 |
| ; GFX900-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc |
| ; GFX900-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1010-LABEL: fcmp_select_no_fold_one_f32_negzero: |
| ; GFX1010: ; %bb.0: ; %entry |
| ; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1010-NEXT: v_cmp_lg_f32_e32 vcc_lo, 0x80000000, v0 |
| ; GFX1010-NEXT: v_cndmask_b32_e32 v0, 0x80000000, v1, vcc_lo |
| ; GFX1010-NEXT: s_setpc_b64 s[30:31] |
| entry: |
| %cmp = fcmp one float -0.0, %arg ; 0x8000000000000000 |
| %sel = select i1 %cmp, float %other, float -0.0 ;0x8000000000000000 |
| ret float %sel |
| } |
| |
| ; NaN values should bypass the optimization due to special IEEE 754 behavior |
| ; fcmp oeq with NaN always returns false, so select always chooses %other |
| define float @fcmp_select_no_fold_oeq_f32_nan(float %arg, float %other) { |
| ; GFX900-LABEL: fcmp_select_no_fold_oeq_f32_nan: |
| ; GFX900: ; %bb.0: ; %entry |
| ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX900-NEXT: v_mov_b32_e32 v0, v1 |
| ; GFX900-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1010-LABEL: fcmp_select_no_fold_oeq_f32_nan: |
| ; GFX1010: ; %bb.0: ; %entry |
| ; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1010-NEXT: v_mov_b32_e32 v0, v1 |
| ; GFX1010-NEXT: s_setpc_b64 s[30:31] |
| entry: |
| %cmp = fcmp oeq float %arg, 0x7FF8000000000000 |
| %sel = select i1 %cmp, float 0x7FF8000000000000, float %other |
| ret float %sel |
| } |
| |
| ; NaN values should bypass the optimization due to special IEEE 754 behavior |
| ; fcmp one with NaN always returns false, so select always chooses the NaN constant |
| define float @fcmp_select_no_fold_one_f32_nan(float %arg, float %other) { |
| ; GFX900-LABEL: fcmp_select_no_fold_one_f32_nan: |
| ; GFX900: ; %bb.0: ; %entry |
| ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX900-NEXT: v_mov_b32_e32 v0, 0x7fc00000 |
| ; GFX900-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1010-LABEL: fcmp_select_no_fold_one_f32_nan: |
| ; GFX1010: ; %bb.0: ; %entry |
| ; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1010-NEXT: v_mov_b32_e32 v0, 0x7fc00000 |
| ; GFX1010-NEXT: s_setpc_b64 s[30:31] |
| entry: |
| %cmp = fcmp one float 0x7FF8000000000000, %arg |
| %sel = select i1 %cmp, float %other, float 0x7FF8000000000000 |
| ret float %sel |
| } |
| |
| ; Should NOT be folded: fcmp one with positive infinity |
| ; Infinity values should bypass the optimization, generating unfolded code |
| define float @fcmp_select_no_fold_posinf_oeq_f32(float %arg, float %other) { |
| ; GFX900-LABEL: fcmp_select_no_fold_posinf_oeq_f32: |
| ; GFX900: ; %bb.0: ; %entry |
| ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX900-NEXT: s_mov_b32 s4, 0x7f800000 |
| ; GFX900-NEXT: v_mov_b32_e32 v2, 0x7f800000 |
| ; GFX900-NEXT: v_cmp_neq_f32_e32 vcc, s4, v0 |
| ; GFX900-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc |
| ; GFX900-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1010-LABEL: fcmp_select_no_fold_posinf_oeq_f32: |
| ; GFX1010: ; %bb.0: ; %entry |
| ; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1010-NEXT: v_cmp_neq_f32_e32 vcc_lo, 0x7f800000, v0 |
| ; GFX1010-NEXT: v_cndmask_b32_e32 v0, 0x7f800000, v1, vcc_lo |
| ; GFX1010-NEXT: s_setpc_b64 s[30:31] |
| entry: |
| %cmp = fcmp oeq float %arg, 0x7FF0000000000000 |
| %sel = select i1 %cmp, float 0x7FF0000000000000, float %other |
| ret float %sel |
| } |
| |
| ; Should NOT be folded: fcmp one with negative infinity |
| ; Infinity values should bypass the optimization, generating unfolded code |
| define float @fcmp_select_no_fold_neginf_f32_one(float %arg, float %other) { |
| ; GFX900-LABEL: fcmp_select_no_fold_neginf_f32_one: |
| ; GFX900: ; %bb.0: ; %entry |
| ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX900-NEXT: s_mov_b32 s4, 0xff800000 |
| ; GFX900-NEXT: v_mov_b32_e32 v2, 0xff800000 |
| ; GFX900-NEXT: v_cmp_lg_f32_e32 vcc, s4, v0 |
| ; GFX900-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc |
| ; GFX900-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1010-LABEL: fcmp_select_no_fold_neginf_f32_one: |
| ; GFX1010: ; %bb.0: ; %entry |
| ; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1010-NEXT: v_cmp_lg_f32_e32 vcc_lo, 0xff800000, v0 |
| ; GFX1010-NEXT: v_cndmask_b32_e32 v0, 0xff800000, v1, vcc_lo |
| ; GFX1010-NEXT: s_setpc_b64 s[30:31] |
| entry: |
| %cmp = fcmp one float 0xFFF0000000000000, %arg |
| %sel = select i1 %cmp, float %other, float 0xFFF0000000000000 |
| ret float %sel |
| } |
| |
| ;------------------------------------------------------------------------------ |
| ; F64 Tests |
| ;------------------------------------------------------------------------------ |
| |
| ; Should be folded: f64 fcmp oeq + select with constant in true value |
| define double @fcmp_select_fold_oeq_f64_imm(double %arg, double %other) { |
| ; GFX900-LABEL: fcmp_select_fold_oeq_f64_imm: |
| ; GFX900: ; %bb.0: ; %entry |
| ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX900-NEXT: s_mov_b32 s4, 0x54442d18 |
| ; GFX900-NEXT: s_mov_b32 s5, 0x400921fb |
| ; GFX900-NEXT: v_cmp_eq_f64_e32 vcc, s[4:5], v[0:1] |
| ; GFX900-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc |
| ; GFX900-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc |
| ; GFX900-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1010-LABEL: fcmp_select_fold_oeq_f64_imm: |
| ; GFX1010: ; %bb.0: ; %entry |
| ; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1010-NEXT: s_mov_b32 s4, 0x54442d18 |
| ; GFX1010-NEXT: s_mov_b32 s5, 0x400921fb |
| ; GFX1010-NEXT: v_cmp_eq_f64_e32 vcc_lo, s[4:5], v[0:1] |
| ; GFX1010-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc_lo |
| ; GFX1010-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc_lo |
| ; GFX1010-NEXT: s_setpc_b64 s[30:31] |
| entry: |
| %cmp = fcmp oeq double %arg, 3.141592653589793 |
| %sel = select i1 %cmp, double 3.141592653589793, double %other |
| ret double %sel |
| } |
| ; Should be folded: f64 fcmp oeq + select with constant in true value (commutative) |
| define double @fcmp_select_fold_oeq_imm_f64(double %arg, double %other) { |
| ; GFX900-LABEL: fcmp_select_fold_oeq_imm_f64: |
| ; GFX900: ; %bb.0: ; %entry |
| ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX900-NEXT: s_mov_b32 s4, 0x54442d18 |
| ; GFX900-NEXT: s_mov_b32 s5, 0x400921fb |
| ; GFX900-NEXT: v_cmp_eq_f64_e32 vcc, s[4:5], v[0:1] |
| ; GFX900-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc |
| ; GFX900-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc |
| ; GFX900-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1010-LABEL: fcmp_select_fold_oeq_imm_f64: |
| ; GFX1010: ; %bb.0: ; %entry |
| ; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1010-NEXT: s_mov_b32 s4, 0x54442d18 |
| ; GFX1010-NEXT: s_mov_b32 s5, 0x400921fb |
| ; GFX1010-NEXT: v_cmp_eq_f64_e32 vcc_lo, s[4:5], v[0:1] |
| ; GFX1010-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc_lo |
| ; GFX1010-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc_lo |
| ; GFX1010-NEXT: s_setpc_b64 s[30:31] |
| entry: |
| %cmp = fcmp oeq double 3.141592653589793, %arg |
| %sel = select i1 %cmp, double 3.141592653589793, double %other |
| ret double %sel |
| } |
| |
| ; Should be folded: f64 fcmp one + select with constant in false value |
| define double @fcmp_select_fold_one_f64_imm(double %arg, double %other) { |
| ; GFX900-LABEL: fcmp_select_fold_one_f64_imm: |
| ; GFX900: ; %bb.0: ; %entry |
| ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX900-NEXT: s_mov_b32 s4, 0x8b145769 |
| ; GFX900-NEXT: s_mov_b32 s5, 0x4005bf0a |
| ; GFX900-NEXT: v_cmp_lg_f64_e32 vcc, s[4:5], v[0:1] |
| ; GFX900-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc |
| ; GFX900-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc |
| ; GFX900-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1010-LABEL: fcmp_select_fold_one_f64_imm: |
| ; GFX1010: ; %bb.0: ; %entry |
| ; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1010-NEXT: s_mov_b32 s4, 0x8b145769 |
| ; GFX1010-NEXT: s_mov_b32 s5, 0x4005bf0a |
| ; GFX1010-NEXT: v_cmp_lg_f64_e32 vcc_lo, s[4:5], v[0:1] |
| ; GFX1010-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc_lo |
| ; GFX1010-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc_lo |
| ; GFX1010-NEXT: s_setpc_b64 s[30:31] |
| entry: |
| %cmp = fcmp one double %arg, 2.718281828459045 |
| %sel = select i1 %cmp, double %other, double 2.718281828459045 |
| ret double %sel |
| } |
| ; Should be folded: f64 fcmp one + select with constant in false value (commutative) |
| define double @fcmp_select_fold_one_imm_f64(double %arg, double %other) { |
| ; GFX900-LABEL: fcmp_select_fold_one_imm_f64: |
| ; GFX900: ; %bb.0: ; %entry |
| ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX900-NEXT: s_mov_b32 s4, 0x8b145769 |
| ; GFX900-NEXT: s_mov_b32 s5, 0x4005bf0a |
| ; GFX900-NEXT: v_cmp_lg_f64_e32 vcc, s[4:5], v[0:1] |
| ; GFX900-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc |
| ; GFX900-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc |
| ; GFX900-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1010-LABEL: fcmp_select_fold_one_imm_f64: |
| ; GFX1010: ; %bb.0: ; %entry |
| ; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1010-NEXT: s_mov_b32 s4, 0x8b145769 |
| ; GFX1010-NEXT: s_mov_b32 s5, 0x4005bf0a |
| ; GFX1010-NEXT: v_cmp_lg_f64_e32 vcc_lo, s[4:5], v[0:1] |
| ; GFX1010-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc_lo |
| ; GFX1010-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc_lo |
| ; GFX1010-NEXT: s_setpc_b64 s[30:31] |
| entry: |
| %cmp = fcmp one double 2.718281828459045, %arg |
| %sel = select i1 %cmp, double %other, double 2.718281828459045 |
| ret double %sel |
| } |
| |
| ; Should NOT be folded: f64 fcmp oeq with constant in other position |
| define double @fcmp_select_no_fold_f64_other_pos(double %arg, double %other) { |
| ; GFX900-LABEL: fcmp_select_no_fold_f64_other_pos: |
| ; GFX900: ; %bb.0: ; %entry |
| ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX900-NEXT: s_mov_b32 s4, 0x54442d18 |
| ; GFX900-NEXT: s_mov_b32 s5, 0x400921fb |
| ; GFX900-NEXT: v_cmp_eq_f64_e32 vcc, s[4:5], v[0:1] |
| ; GFX900-NEXT: v_mov_b32_e32 v4, 0x54442d18 |
| ; GFX900-NEXT: v_mov_b32_e32 v1, 0x400921fb |
| ; GFX900-NEXT: v_cndmask_b32_e32 v0, v4, v2, vcc |
| ; GFX900-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc |
| ; GFX900-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1010-LABEL: fcmp_select_no_fold_f64_other_pos: |
| ; GFX1010: ; %bb.0: ; %entry |
| ; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1010-NEXT: s_mov_b32 s4, 0x54442d18 |
| ; GFX1010-NEXT: s_mov_b32 s5, 0x400921fb |
| ; GFX1010-NEXT: v_cmp_eq_f64_e32 vcc_lo, s[4:5], v[0:1] |
| ; GFX1010-NEXT: v_cndmask_b32_e32 v0, 0x54442d18, v2, vcc_lo |
| ; GFX1010-NEXT: v_cndmask_b32_e32 v1, 0x400921fb, v3, vcc_lo |
| ; GFX1010-NEXT: s_setpc_b64 s[30:31] |
| entry: |
| %cmp = fcmp oeq double %arg, 3.141592653589793 |
| %sel = select i1 %cmp, double %other, double 3.141592653589793 |
| ret double %sel |
| } |
| |
| ; Should NOT be folded: f64 fcmp unsupported comparison type |
| define double @fcmp_select_no_fold_f64_unsupported_cmp(double %arg, double %other) { |
| ; GFX900-LABEL: fcmp_select_no_fold_f64_unsupported_cmp: |
| ; GFX900: ; %bb.0: ; %entry |
| ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX900-NEXT: s_mov_b32 s4, 0x54442d18 |
| ; GFX900-NEXT: s_mov_b32 s5, 0x400921fb |
| ; GFX900-NEXT: v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1] |
| ; GFX900-NEXT: v_mov_b32_e32 v4, 0x54442d18 |
| ; GFX900-NEXT: v_mov_b32_e32 v1, 0x400921fb |
| ; GFX900-NEXT: v_cndmask_b32_e32 v0, v4, v2, vcc |
| ; GFX900-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc |
| ; GFX900-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1010-LABEL: fcmp_select_no_fold_f64_unsupported_cmp: |
| ; GFX1010: ; %bb.0: ; %entry |
| ; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1010-NEXT: s_mov_b32 s4, 0x54442d18 |
| ; GFX1010-NEXT: s_mov_b32 s5, 0x400921fb |
| ; GFX1010-NEXT: v_cmp_gt_f64_e32 vcc_lo, s[4:5], v[0:1] |
| ; GFX1010-NEXT: v_cndmask_b32_e32 v0, 0x54442d18, v2, vcc_lo |
| ; GFX1010-NEXT: v_cndmask_b32_e32 v1, 0x400921fb, v3, vcc_lo |
| ; GFX1010-NEXT: s_setpc_b64 s[30:31] |
| entry: |
| %cmp = fcmp olt double %arg, 3.141592653589793 |
| %sel = select i1 %cmp, double %other, double 3.141592653589793 |
| ret double %sel |
| } |
| |
| ; Should NOT be folded: imm can be encoded into cndmask |
| define double @fcmp_select_no_fold_f64_enc_imm(double %arg, double %other) { |
| ; GFX900-LABEL: fcmp_select_no_fold_f64_enc_imm: |
| ; GFX900: ; %bb.0: ; %entry |
| ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX900-NEXT: v_cmp_neq_f64_e32 vcc, 1.0, v[0:1] |
| ; GFX900-NEXT: v_mov_b32_e32 v1, 0x3ff00000 |
| ; GFX900-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc |
| ; GFX900-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc |
| ; GFX900-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1010-LABEL: fcmp_select_no_fold_f64_enc_imm: |
| ; GFX1010: ; %bb.0: ; %entry |
| ; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1010-NEXT: v_cmp_neq_f64_e32 vcc_lo, 1.0, v[0:1] |
| ; GFX1010-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc_lo |
| ; GFX1010-NEXT: v_cndmask_b32_e32 v1, 0x3ff00000, v3, vcc_lo |
| ; GFX1010-NEXT: s_setpc_b64 s[30:31] |
| entry: |
| %cmp = fcmp oeq double %arg, 1.0 |
| %sel = select i1 %cmp, double 1.0, double %other |
| ret double %sel |
| } |
| |
| ; Should NOT be folded: imm can be encoded into cndmask |
| define double @fcmp_select_no_fold_f64_enc_imm_2(double %arg, double %other) { |
| ; GFX900-LABEL: fcmp_select_no_fold_f64_enc_imm_2: |
| ; GFX900: ; %bb.0: ; %entry |
| ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX900-NEXT: v_cmp_lg_f64_e32 vcc, -4.0, v[0:1] |
| ; GFX900-NEXT: v_mov_b32_e32 v1, 0xc0100000 |
| ; GFX900-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc |
| ; GFX900-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc |
| ; GFX900-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1010-LABEL: fcmp_select_no_fold_f64_enc_imm_2: |
| ; GFX1010: ; %bb.0: ; %entry |
| ; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1010-NEXT: v_cmp_lg_f64_e32 vcc_lo, -4.0, v[0:1] |
| ; GFX1010-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc_lo |
| ; GFX1010-NEXT: v_cndmask_b32_e32 v1, 0xc0100000, v3, vcc_lo |
| ; GFX1010-NEXT: s_setpc_b64 s[30:31] |
| entry: |
| %cmp = fcmp one double -4.0, %arg |
| %sel = select i1 %cmp, double %other, double -4.0 |
| ret double %sel |
| } |
| |
| ; Should NOT be folded: f64 fcmp oeq with zero constant |
| define double @fcmp_select_no_fold_oeq_f64_zero(double %arg, double %other) { |
| ; GFX900-LABEL: fcmp_select_no_fold_oeq_f64_zero: |
| ; GFX900: ; %bb.0: ; %entry |
| ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX900-NEXT: v_cmp_neq_f64_e32 vcc, 0, v[0:1] |
| ; GFX900-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc |
| ; GFX900-NEXT: v_cndmask_b32_e32 v1, 0, v3, vcc |
| ; GFX900-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1010-LABEL: fcmp_select_no_fold_oeq_f64_zero: |
| ; GFX1010: ; %bb.0: ; %entry |
| ; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1010-NEXT: v_cmp_neq_f64_e32 vcc_lo, 0, v[0:1] |
| ; GFX1010-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc_lo |
| ; GFX1010-NEXT: v_cndmask_b32_e32 v1, 0, v3, vcc_lo |
| ; GFX1010-NEXT: s_setpc_b64 s[30:31] |
| entry: |
| %cmp = fcmp oeq double %arg, 0.0 |
| %sel = select i1 %cmp, double 0.0, double %other |
| ret double %sel |
| } |
| |
| ; Should NOT be folded: f64 fcmp one with negative zero constant |
| define double @fcmp_select_no_fold_one_f64_negzero(double %arg, double %other) { |
| ; GFX900-LABEL: fcmp_select_no_fold_one_f64_negzero: |
| ; GFX900: ; %bb.0: ; %entry |
| ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX900-NEXT: s_mov_b32 s4, 0 |
| ; GFX900-NEXT: s_brev_b32 s5, 1 |
| ; GFX900-NEXT: v_cmp_lg_f64_e32 vcc, s[4:5], v[0:1] |
| ; GFX900-NEXT: v_bfrev_b32_e32 v1, 1 |
| ; GFX900-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc |
| ; GFX900-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc |
| ; GFX900-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1010-LABEL: fcmp_select_no_fold_one_f64_negzero: |
| ; GFX1010: ; %bb.0: ; %entry |
| ; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1010-NEXT: v_cmp_lg_f64_e32 vcc_lo, 0x80000000, v[0:1] |
| ; GFX1010-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc_lo |
| ; GFX1010-NEXT: v_cndmask_b32_e32 v1, 0x80000000, v3, vcc_lo |
| ; GFX1010-NEXT: s_setpc_b64 s[30:31] |
| entry: |
| %cmp = fcmp one double -0.0, %arg |
| %sel = select i1 %cmp, double %other, double -0.0 |
| ret double %sel |
| } |
| |
| ; Should NOT be folded: f64 different constants |
| define double @fcmp_select_no_fold_f64_different_const(double %arg, double %other) { |
| ; GFX900-LABEL: fcmp_select_no_fold_f64_different_const: |
| ; GFX900: ; %bb.0: ; %entry |
| ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX900-NEXT: s_mov_b32 s4, 0x54442d18 |
| ; GFX900-NEXT: s_mov_b32 s5, 0x400921fb |
| ; GFX900-NEXT: v_cmp_neq_f64_e32 vcc, s[4:5], v[0:1] |
| ; GFX900-NEXT: v_mov_b32_e32 v4, 0x8b145769 |
| ; GFX900-NEXT: v_mov_b32_e32 v1, 0x4005bf0a |
| ; GFX900-NEXT: v_cndmask_b32_e32 v0, v4, v2, vcc |
| ; GFX900-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc |
| ; GFX900-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1010-LABEL: fcmp_select_no_fold_f64_different_const: |
| ; GFX1010: ; %bb.0: ; %entry |
| ; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1010-NEXT: s_mov_b32 s4, 0x54442d18 |
| ; GFX1010-NEXT: s_mov_b32 s5, 0x400921fb |
| ; GFX1010-NEXT: v_cmp_neq_f64_e32 vcc_lo, s[4:5], v[0:1] |
| ; GFX1010-NEXT: v_cndmask_b32_e32 v0, 0x8b145769, v2, vcc_lo |
| ; GFX1010-NEXT: v_cndmask_b32_e32 v1, 0x4005bf0a, v3, vcc_lo |
| ; GFX1010-NEXT: s_setpc_b64 s[30:31] |
| entry: |
| %cmp = fcmp oeq double %arg, 3.141592653589793 |
| %sel = select i1 %cmp, double 2.718281828459045, double %other |
| ret double %sel |
| } |
| |
| ; Should NOT be folded: f64 fcmp oeq with NaN constant |
| ; fcmp oeq with NaN always returns false, so select always chooses %other |
| define double @fcmp_select_no_fold_nan_f64(double %arg, double %other) { |
| ; GFX900-LABEL: fcmp_select_no_fold_nan_f64: |
| ; GFX900: ; %bb.0: ; %entry |
| ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX900-NEXT: v_mov_b32_e32 v1, v3 |
| ; GFX900-NEXT: v_mov_b32_e32 v0, v2 |
| ; GFX900-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1010-LABEL: fcmp_select_no_fold_nan_f64: |
| ; GFX1010: ; %bb.0: ; %entry |
| ; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1010-NEXT: v_mov_b32_e32 v1, v3 |
| ; GFX1010-NEXT: v_mov_b32_e32 v0, v2 |
| ; GFX1010-NEXT: s_setpc_b64 s[30:31] |
| entry: |
| %cmp = fcmp oeq double %arg, 0x7FF8000000000000 |
| %sel = select i1 %cmp, double 0x7FF8000000000000, double %other |
| ret double %sel |
| } |
| |
| ; Should NOT be folded: f64 fcmp oeq with NaN constant (commutative variant) |
| ; fcmp oeq with NaN always returns false, so select always chooses %other |
| define double @fcmp_select_no_fold_nan_f64_comm(double %arg, double %other) { |
| ; GFX900-LABEL: fcmp_select_no_fold_nan_f64_comm: |
| ; GFX900: ; %bb.0: ; %entry |
| ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX900-NEXT: v_mov_b32_e32 v1, v3 |
| ; GFX900-NEXT: v_mov_b32_e32 v0, v2 |
| ; GFX900-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1010-LABEL: fcmp_select_no_fold_nan_f64_comm: |
| ; GFX1010: ; %bb.0: ; %entry |
| ; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1010-NEXT: v_mov_b32_e32 v1, v3 |
| ; GFX1010-NEXT: v_mov_b32_e32 v0, v2 |
| ; GFX1010-NEXT: s_setpc_b64 s[30:31] |
| entry: |
| %cmp = fcmp oeq double 0x7FF8000000000000, %arg |
| %sel = select i1 %cmp, double 0x7FF8000000000000, double %other |
| ret double %sel |
| } |
| |
| ; Should NOT be folded: f64 fcmp one with NaN constant |
| ; fcmp one with NaN always returns false, so select always chooses the NaN constant |
| define double @fcmp_select_no_fold_nan_f64_one(double %arg, double %other) { |
| ; GFX900-LABEL: fcmp_select_no_fold_nan_f64_one: |
| ; GFX900: ; %bb.0: ; %entry |
| ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX900-NEXT: v_mov_b32_e32 v0, 0 |
| ; GFX900-NEXT: v_mov_b32_e32 v1, 0x7ff80000 |
| ; GFX900-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1010-LABEL: fcmp_select_no_fold_nan_f64_one: |
| ; GFX1010: ; %bb.0: ; %entry |
| ; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1010-NEXT: v_mov_b32_e32 v0, 0 |
| ; GFX1010-NEXT: v_mov_b32_e32 v1, 0x7ff80000 |
| ; GFX1010-NEXT: s_setpc_b64 s[30:31] |
| entry: |
| %cmp = fcmp one double %arg, 0x7FF8000000000000 |
| %sel = select i1 %cmp, double %other, double 0x7FF8000000000000 |
| ret double %sel |
| } |
| |
| ; Should NOT be folded: f64 fcmp one with NaN constant (commutative variant) |
| ; fcmp one with NaN always returns false, so select always chooses the NaN constant |
| define double @fcmp_select_no_fold_nan_f64_one_comm(double %arg, double %other) { |
| ; GFX900-LABEL: fcmp_select_no_fold_nan_f64_one_comm: |
| ; GFX900: ; %bb.0: ; %entry |
| ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX900-NEXT: v_mov_b32_e32 v0, 0 |
| ; GFX900-NEXT: v_mov_b32_e32 v1, 0x7ff80000 |
| ; GFX900-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1010-LABEL: fcmp_select_no_fold_nan_f64_one_comm: |
| ; GFX1010: ; %bb.0: ; %entry |
| ; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1010-NEXT: v_mov_b32_e32 v0, 0 |
| ; GFX1010-NEXT: v_mov_b32_e32 v1, 0x7ff80000 |
| ; GFX1010-NEXT: s_setpc_b64 s[30:31] |
| entry: |
| %cmp = fcmp one double 0x7FF8000000000000, %arg |
| %sel = select i1 %cmp, double %other, double 0x7FF8000000000000 |
| ret double %sel |
| } |
| |
| ; Should NOT be folded: f64 fcmp oeq with positive infinity |
| ; Infinity values should bypass the optimization, generating unfolded code |
| define double @fcmp_select_no_fold_posinf_f64(double %arg, double %other) { |
| ; GFX900-LABEL: fcmp_select_no_fold_posinf_f64: |
| ; GFX900: ; %bb.0: ; %entry |
| ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX900-NEXT: s_mov_b32 s4, 0 |
| ; GFX900-NEXT: s_mov_b32 s5, 0x7ff00000 |
| ; GFX900-NEXT: v_cmp_neq_f64_e32 vcc, s[4:5], v[0:1] |
| ; GFX900-NEXT: v_mov_b32_e32 v1, 0x7ff00000 |
| ; GFX900-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc |
| ; GFX900-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc |
| ; GFX900-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1010-LABEL: fcmp_select_no_fold_posinf_f64: |
| ; GFX1010: ; %bb.0: ; %entry |
| ; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1010-NEXT: v_cmp_neq_f64_e32 vcc_lo, 0x7ff00000, v[0:1] |
| ; GFX1010-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc_lo |
| ; GFX1010-NEXT: v_cndmask_b32_e32 v1, 0x7ff00000, v3, vcc_lo |
| ; GFX1010-NEXT: s_setpc_b64 s[30:31] |
| entry: |
| %cmp = fcmp oeq double %arg, 0x7FF0000000000000 |
| %sel = select i1 %cmp, double 0x7FF0000000000000, double %other |
| ret double %sel |
| } |
| |
| ; Should NOT be folded: f64 fcmp oeq with negative infinity |
| ; Infinity values should bypass the optimization, generating unfolded code |
| define double @fcmp_select_no_fold_neginf_f64(double %arg, double %other) { |
| ; GFX900-LABEL: fcmp_select_no_fold_neginf_f64: |
| ; GFX900: ; %bb.0: ; %entry |
| ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX900-NEXT: s_mov_b32 s4, 0 |
| ; GFX900-NEXT: s_mov_b32 s5, 0xfff00000 |
| ; GFX900-NEXT: v_cmp_neq_f64_e32 vcc, s[4:5], v[0:1] |
| ; GFX900-NEXT: v_mov_b32_e32 v1, 0xfff00000 |
| ; GFX900-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc |
| ; GFX900-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc |
| ; GFX900-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1010-LABEL: fcmp_select_no_fold_neginf_f64: |
| ; GFX1010: ; %bb.0: ; %entry |
| ; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1010-NEXT: v_cmp_neq_f64_e32 vcc_lo, 0xfff00000, v[0:1] |
| ; GFX1010-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc_lo |
| ; GFX1010-NEXT: v_cndmask_b32_e32 v1, 0xfff00000, v3, vcc_lo |
| ; GFX1010-NEXT: s_setpc_b64 s[30:31] |
| entry: |
| %cmp = fcmp oeq double %arg, 0xFFF0000000000000 |
| %sel = select i1 %cmp, double 0xFFF0000000000000, double %other |
| ret double %sel |
| } |
| |
| ; Should NOT be folded: f64 fcmp oeq with positive infinity (commutative variant) |
| ; Infinity values should bypass the optimization, generating unfolded code |
| define double @fcmp_select_no_fold_posinf_f64_comm(double %arg, double %other) { |
| ; GFX900-LABEL: fcmp_select_no_fold_posinf_f64_comm: |
| ; GFX900: ; %bb.0: ; %entry |
| ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX900-NEXT: s_mov_b32 s4, 0 |
| ; GFX900-NEXT: s_mov_b32 s5, 0x7ff00000 |
| ; GFX900-NEXT: v_cmp_neq_f64_e32 vcc, s[4:5], v[0:1] |
| ; GFX900-NEXT: v_mov_b32_e32 v1, 0x7ff00000 |
| ; GFX900-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc |
| ; GFX900-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc |
| ; GFX900-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1010-LABEL: fcmp_select_no_fold_posinf_f64_comm: |
| ; GFX1010: ; %bb.0: ; %entry |
| ; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1010-NEXT: v_cmp_neq_f64_e32 vcc_lo, 0x7ff00000, v[0:1] |
| ; GFX1010-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc_lo |
| ; GFX1010-NEXT: v_cndmask_b32_e32 v1, 0x7ff00000, v3, vcc_lo |
| ; GFX1010-NEXT: s_setpc_b64 s[30:31] |
| entry: |
| %cmp = fcmp oeq double 0x7FF0000000000000, %arg |
| %sel = select i1 %cmp, double 0x7FF0000000000000, double %other |
| ret double %sel |
| } |
| |
| ; Should NOT be folded: f64 fcmp oeq with negative infinity (commutative variant) |
| ; Infinity values should bypass the optimization, generating unfolded code |
| define double @fcmp_select_no_fold_neginf_f64_comm(double %arg, double %other) { |
| ; GFX900-LABEL: fcmp_select_no_fold_neginf_f64_comm: |
| ; GFX900: ; %bb.0: ; %entry |
| ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX900-NEXT: s_mov_b32 s4, 0 |
| ; GFX900-NEXT: s_mov_b32 s5, 0xfff00000 |
| ; GFX900-NEXT: v_cmp_neq_f64_e32 vcc, s[4:5], v[0:1] |
| ; GFX900-NEXT: v_mov_b32_e32 v1, 0xfff00000 |
| ; GFX900-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc |
| ; GFX900-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc |
| ; GFX900-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1010-LABEL: fcmp_select_no_fold_neginf_f64_comm: |
| ; GFX1010: ; %bb.0: ; %entry |
| ; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1010-NEXT: v_cmp_neq_f64_e32 vcc_lo, 0xfff00000, v[0:1] |
| ; GFX1010-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc_lo |
| ; GFX1010-NEXT: v_cndmask_b32_e32 v1, 0xfff00000, v3, vcc_lo |
| ; GFX1010-NEXT: s_setpc_b64 s[30:31] |
| entry: |
| %cmp = fcmp oeq double 0xFFF0000000000000, %arg |
| %sel = select i1 %cmp, double 0xFFF0000000000000, double %other |
| ret double %sel |
| } |
| |
| ;------------------------------------------------------------------------------ |
| ; F16 Tests |
| ;------------------------------------------------------------------------------ |
| |
| ; Should be folded: f16 fcmp oeq + select with constant in true value |
| define half @fcmp_select_fold_oeq_f16_imm(half %arg, half %other) { |
| ; GFX900-LABEL: fcmp_select_fold_oeq_f16_imm: |
| ; GFX900: ; %bb.0: ; %entry |
| ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX900-NEXT: s_movk_i32 s4, 0x4248 |
| ; GFX900-NEXT: v_cmp_eq_f16_e32 vcc, s4, v0 |
| ; GFX900-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc |
| ; GFX900-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1010-LABEL: fcmp_select_fold_oeq_f16_imm: |
| ; GFX1010: ; %bb.0: ; %entry |
| ; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1010-NEXT: v_cmp_eq_f16_e32 vcc_lo, 0x4248, v0 |
| ; GFX1010-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc_lo |
| ; GFX1010-NEXT: s_setpc_b64 s[30:31] |
| entry: |
| %cmp = fcmp oeq half %arg, 0xH4248 |
| %sel = select i1 %cmp, half 0xH4248, half %other |
| ret half %sel |
| } |
| |
| ; Should be folded: f16 fcmp oeq + select with constant in true value (commutative) |
| define half @fcmp_select_fold_oeq_imm_f16(half %arg, half %other) { |
| ; GFX900-LABEL: fcmp_select_fold_oeq_imm_f16: |
| ; GFX900: ; %bb.0: ; %entry |
| ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX900-NEXT: s_movk_i32 s4, 0x4248 |
| ; GFX900-NEXT: v_cmp_eq_f16_e32 vcc, s4, v0 |
| ; GFX900-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc |
| ; GFX900-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1010-LABEL: fcmp_select_fold_oeq_imm_f16: |
| ; GFX1010: ; %bb.0: ; %entry |
| ; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1010-NEXT: v_cmp_eq_f16_e32 vcc_lo, 0x4248, v0 |
| ; GFX1010-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc_lo |
| ; GFX1010-NEXT: s_setpc_b64 s[30:31] |
| entry: |
| %cmp = fcmp oeq half 0xH4248, %arg |
| %sel = select i1 %cmp, half 0xH4248, half %other |
| ret half %sel |
| } |
| |
| ; Should be folded: f16 fcmp one + select with constant in false value |
| define half @fcmp_select_fold_one_f16_imm(half %arg, half %other) { |
| ; GFX900-LABEL: fcmp_select_fold_one_f16_imm: |
| ; GFX900: ; %bb.0: ; %entry |
| ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX900-NEXT: s_movk_i32 s4, 0x4020 |
| ; GFX900-NEXT: v_cmp_lg_f16_e32 vcc, s4, v0 |
| ; GFX900-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc |
| ; GFX900-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1010-LABEL: fcmp_select_fold_one_f16_imm: |
| ; GFX1010: ; %bb.0: ; %entry |
| ; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1010-NEXT: v_cmp_lg_f16_e32 vcc_lo, 0x4020, v0 |
| ; GFX1010-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo |
| ; GFX1010-NEXT: s_setpc_b64 s[30:31] |
| entry: |
| %cmp = fcmp one half %arg, 0xH4020 |
| %sel = select i1 %cmp, half %other, half 0xH4020 |
| ret half %sel |
| } |
| |
| ; Should be folded: f16 fcmp one + select with constant in false value (commutative) |
| define half @fcmp_select_fold_one_imm_f16(half %arg, half %other) { |
| ; GFX900-LABEL: fcmp_select_fold_one_imm_f16: |
| ; GFX900: ; %bb.0: ; %entry |
| ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX900-NEXT: s_movk_i32 s4, 0x4020 |
| ; GFX900-NEXT: v_cmp_lg_f16_e32 vcc, s4, v0 |
| ; GFX900-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc |
| ; GFX900-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1010-LABEL: fcmp_select_fold_one_imm_f16: |
| ; GFX1010: ; %bb.0: ; %entry |
| ; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1010-NEXT: v_cmp_lg_f16_e32 vcc_lo, 0x4020, v0 |
| ; GFX1010-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo |
| ; GFX1010-NEXT: s_setpc_b64 s[30:31] |
| entry: |
| %cmp = fcmp one half 0xH4020, %arg |
| %sel = select i1 %cmp, half %other, half 0xH4020 |
| ret half %sel |
| } |
| |
| ; Should NOT be folded: different constants |
| define half @fcmp_select_no_fold_f16_different_const(half %arg, half %other) { |
| ; GFX900-LABEL: fcmp_select_no_fold_f16_different_const: |
| ; GFX900: ; %bb.0: ; %entry |
| ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX900-NEXT: s_movk_i32 s4, 0x4248 |
| ; GFX900-NEXT: v_mov_b32_e32 v2, 0x4300 |
| ; GFX900-NEXT: v_cmp_neq_f16_e32 vcc, s4, v0 |
| ; GFX900-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc |
| ; GFX900-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1010-LABEL: fcmp_select_no_fold_f16_different_const: |
| ; GFX1010: ; %bb.0: ; %entry |
| ; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1010-NEXT: v_cmp_neq_f16_e32 vcc_lo, 0x4248, v0 |
| ; GFX1010-NEXT: v_cndmask_b32_e32 v0, 0x4300, v1, vcc_lo |
| ; GFX1010-NEXT: s_setpc_b64 s[30:31] |
| entry: |
| %cmp = fcmp oeq half %arg, 0xH4248 |
| %sel = select i1 %cmp, half 0xH4300, half %other |
| ret half %sel |
| } |
| |
| ; Should NOT be folded: NaN values bypass optimization |
| define half @fcmp_select_no_fold_nan_f16(half %arg, half %other) { |
| ; GFX900-LABEL: fcmp_select_no_fold_nan_f16: |
| ; GFX900: ; %bb.0: ; %entry |
| ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX900-NEXT: v_mov_b32_e32 v0, v1 |
| ; GFX900-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1010-LABEL: fcmp_select_no_fold_nan_f16: |
| ; GFX1010: ; %bb.0: ; %entry |
| ; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1010-NEXT: v_mov_b32_e32 v0, v1 |
| ; GFX1010-NEXT: s_setpc_b64 s[30:31] |
| entry: |
| %cmp = fcmp oeq half %arg, 0xH7e00 |
| %sel = select i1 %cmp, half 0xH7e00, half %other |
| ret half %sel |
| } |
| |
| ; Should NOT be folded: f16 fcmp one with NaN constant |
| define half @fcmp_select_no_fold_nan_f16_one(half %arg, half %other) { |
| ; GFX900-LABEL: fcmp_select_no_fold_nan_f16_one: |
| ; GFX900: ; %bb.0: ; %entry |
| ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX900-NEXT: v_mov_b32_e32 v0, 0x7e00 |
| ; GFX900-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1010-LABEL: fcmp_select_no_fold_nan_f16_one: |
| ; GFX1010: ; %bb.0: ; %entry |
| ; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1010-NEXT: v_mov_b32_e32 v0, 0x7e00 |
| ; GFX1010-NEXT: s_setpc_b64 s[30:31] |
| entry: |
| %cmp = fcmp one half %arg, 0xH7e00 |
| %sel = select i1 %cmp, half %other, half 0xH7e00 |
| ret half %sel |
| } |
| |
| ; Should NOT be folded: f16 fcmp one with +Inf constant |
| define half @fcmp_select_no_fold_posinf_f16_one(half %arg, half %other) { |
| ; GFX900-LABEL: fcmp_select_no_fold_posinf_f16_one: |
| ; GFX900: ; %bb.0: ; %entry |
| ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX900-NEXT: s_movk_i32 s4, 0x7c00 |
| ; GFX900-NEXT: v_mov_b32_e32 v2, 0x7c00 |
| ; GFX900-NEXT: v_cmp_lg_f16_e32 vcc, s4, v0 |
| ; GFX900-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc |
| ; GFX900-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1010-LABEL: fcmp_select_no_fold_posinf_f16_one: |
| ; GFX1010: ; %bb.0: ; %entry |
| ; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1010-NEXT: v_cmp_lg_f16_e32 vcc_lo, 0x7c00, v0 |
| ; GFX1010-NEXT: v_cndmask_b32_e32 v0, 0x7c00, v1, vcc_lo |
| ; GFX1010-NEXT: s_setpc_b64 s[30:31] |
| entry: |
| %cmp = fcmp one half %arg, 0xH7c00 |
| %sel = select i1 %cmp, half %other, half 0xH7c00 |
| ret half %sel |
| } |
| |
| ; Should NOT be folded: f16 fcmp one with -Inf constant |
| define half @fcmp_select_no_fold_neginf_f16_one(half %arg, half %other) { |
| ; GFX900-LABEL: fcmp_select_no_fold_neginf_f16_one: |
| ; GFX900: ; %bb.0: ; %entry |
| ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX900-NEXT: s_mov_b32 s4, 0xfc00 |
| ; GFX900-NEXT: v_mov_b32_e32 v2, 0xfc00 |
| ; GFX900-NEXT: v_cmp_lg_f16_e32 vcc, s4, v0 |
| ; GFX900-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc |
| ; GFX900-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1010-LABEL: fcmp_select_no_fold_neginf_f16_one: |
| ; GFX1010: ; %bb.0: ; %entry |
| ; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1010-NEXT: v_cmp_lg_f16_e32 vcc_lo, 0xfc00, v0 |
| ; GFX1010-NEXT: v_cndmask_b32_e32 v0, 0xfc00, v1, vcc_lo |
| ; GFX1010-NEXT: s_setpc_b64 s[30:31] |
| entry: |
| %cmp = fcmp one half %arg, 0xHfc00 |
| %sel = select i1 %cmp, half %other, half 0xHfc00 |
| ret half %sel |
| } |
| ; Should NOT be folded: f16 fcmp oeq with zero constant |
| define half @fcmp_select_no_fold_oeq_f16_zero(half %arg, half %other) { |
| ; GFX900-LABEL: fcmp_select_no_fold_oeq_f16_zero: |
| ; GFX900: ; %bb.0: ; %entry |
| ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX900-NEXT: v_cmp_neq_f16_e32 vcc, 0, v0 |
| ; GFX900-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc |
| ; GFX900-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1010-LABEL: fcmp_select_no_fold_oeq_f16_zero: |
| ; GFX1010: ; %bb.0: ; %entry |
| ; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1010-NEXT: v_cmp_neq_f16_e32 vcc_lo, 0, v0 |
| ; GFX1010-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc_lo |
| ; GFX1010-NEXT: s_setpc_b64 s[30:31] |
| entry: |
| %cmp = fcmp oeq half %arg, 0xH0000 |
| %sel = select i1 %cmp, half 0xH0000, half %other |
| ret half %sel |
| } |
| ; Should NOT be folded: f16 fcmp one with negative zero constant |
| define half @fcmp_select_no_fold_one_f16_negzero(half %arg, half %other) { |
| ; GFX900-LABEL: fcmp_select_no_fold_one_f16_negzero: |
| ; GFX900: ; %bb.0: ; %entry |
| ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX900-NEXT: s_mov_b32 s4, 0x8000 |
| ; GFX900-NEXT: v_mov_b32_e32 v2, 0x8000 |
| ; GFX900-NEXT: v_cmp_lg_f16_e32 vcc, s4, v0 |
| ; GFX900-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc |
| ; GFX900-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1010-LABEL: fcmp_select_no_fold_one_f16_negzero: |
| ; GFX1010: ; %bb.0: ; %entry |
| ; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1010-NEXT: v_cmp_lg_f16_e32 vcc_lo, 0x8000, v0 |
| ; GFX1010-NEXT: v_cndmask_b32_e32 v0, 0x8000, v1, vcc_lo |
| ; GFX1010-NEXT: s_setpc_b64 s[30:31] |
| entry: |
| %cmp = fcmp one half 0xH8000, %arg |
| %sel = select i1 %cmp, half %other, half 0xH8000 |
| ret half %sel |
| } |
| |
| ; Should NOT be folded: f16 fcmp oeq with constant in other position |
| define half @fcmp_select_no_fold_f16_other_pos(half %arg, half %other) { |
| ; GFX900-LABEL: fcmp_select_no_fold_f16_other_pos: |
| ; GFX900: ; %bb.0: ; %entry |
| ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX900-NEXT: s_movk_i32 s4, 0x4248 |
| ; GFX900-NEXT: v_mov_b32_e32 v2, 0x4248 |
| ; GFX900-NEXT: v_cmp_eq_f16_e32 vcc, s4, v0 |
| ; GFX900-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc |
| ; GFX900-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1010-LABEL: fcmp_select_no_fold_f16_other_pos: |
| ; GFX1010: ; %bb.0: ; %entry |
| ; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1010-NEXT: v_cmp_eq_f16_e32 vcc_lo, 0x4248, v0 |
| ; GFX1010-NEXT: v_cndmask_b32_e32 v0, 0x4248, v1, vcc_lo |
| ; GFX1010-NEXT: s_setpc_b64 s[30:31] |
| entry: |
| %cmp = fcmp oeq half %arg, 0xH4248 |
| %sel = select i1 %cmp, half %other, half 0xH4248 |
| ret half %sel |
| } |
| |
| ; Should NOT be folded: f16 unsupported comparison type |
| define half @fcmp_select_no_fold_f16_unsupported_cmp(half %arg, half %other) { |
| ; GFX900-LABEL: fcmp_select_no_fold_f16_unsupported_cmp: |
| ; GFX900: ; %bb.0: ; %entry |
| ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX900-NEXT: s_movk_i32 s4, 0x4248 |
| ; GFX900-NEXT: v_mov_b32_e32 v2, 0x4248 |
| ; GFX900-NEXT: v_cmp_gt_f16_e32 vcc, s4, v0 |
| ; GFX900-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc |
| ; GFX900-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1010-LABEL: fcmp_select_no_fold_f16_unsupported_cmp: |
| ; GFX1010: ; %bb.0: ; %entry |
| ; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1010-NEXT: v_cmp_gt_f16_e32 vcc_lo, 0x4248, v0 |
| ; GFX1010-NEXT: v_cndmask_b32_e32 v0, 0x4248, v1, vcc_lo |
| ; GFX1010-NEXT: s_setpc_b64 s[30:31] |
| entry: |
| %cmp = fcmp olt half %arg, 0xH4248 |
| %sel = select i1 %cmp, half %other, half 0xH4248 |
| ret half %sel |
| } |
| |
| ;------------------------------------------------------------------------------ |
| ; BF16 Tests |
| ;------------------------------------------------------------------------------ |
| |
| ; Should be folded: bfloat fcmp oeq + select with constant in true value |
| define bfloat @fcmp_select_fold_oeq_bf16_imm(bfloat %arg, bfloat %other) { |
| ; GFX900-LABEL: fcmp_select_fold_oeq_bf16_imm: |
| ; GFX900: ; %bb.0: ; %entry |
| ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX900-NEXT: v_lshlrev_b32_e32 v2, 16, v0 |
| ; GFX900-NEXT: s_mov_b32 s4, 0x42480000 |
| ; GFX900-NEXT: v_cmp_eq_f32_e32 vcc, s4, v2 |
| ; GFX900-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc |
| ; GFX900-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1010-LABEL: fcmp_select_fold_oeq_bf16_imm: |
| ; GFX1010: ; %bb.0: ; %entry |
| ; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1010-NEXT: v_lshlrev_b32_e32 v2, 16, v0 |
| ; GFX1010-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0x42480000, v2 |
| ; GFX1010-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc_lo |
| ; GFX1010-NEXT: s_setpc_b64 s[30:31] |
| entry: |
| %cmp = fcmp oeq bfloat %arg, 0xR4248 |
| %sel = select i1 %cmp, bfloat 0xR4248, bfloat %other |
| ret bfloat %sel |
| } |
| |
| ; Should be folded: bfloat fcmp oeq + select with constant in true value (commutative) |
| define bfloat @fcmp_select_fold_oeq_imm_bf16(bfloat %arg, bfloat %other) { |
| ; GFX900-LABEL: fcmp_select_fold_oeq_imm_bf16: |
| ; GFX900: ; %bb.0: ; %entry |
| ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX900-NEXT: v_lshlrev_b32_e32 v2, 16, v0 |
| ; GFX900-NEXT: s_mov_b32 s4, 0x42480000 |
| ; GFX900-NEXT: v_cmp_eq_f32_e32 vcc, s4, v2 |
| ; GFX900-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc |
| ; GFX900-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1010-LABEL: fcmp_select_fold_oeq_imm_bf16: |
| ; GFX1010: ; %bb.0: ; %entry |
| ; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1010-NEXT: v_lshlrev_b32_e32 v2, 16, v0 |
| ; GFX1010-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0x42480000, v2 |
| ; GFX1010-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc_lo |
| ; GFX1010-NEXT: s_setpc_b64 s[30:31] |
| entry: |
| %cmp = fcmp oeq bfloat 0xR4248, %arg |
| %sel = select i1 %cmp, bfloat 0xR4248, bfloat %other |
| ret bfloat %sel |
| } |
| |
| ; Should be folded: bfloat fcmp one + select with constant in false value |
| define bfloat @fcmp_select_fold_one_bf16_imm(bfloat %arg, bfloat %other) { |
| ; GFX900-LABEL: fcmp_select_fold_one_bf16_imm: |
| ; GFX900: ; %bb.0: ; %entry |
| ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX900-NEXT: v_lshlrev_b32_e32 v2, 16, v0 |
| ; GFX900-NEXT: s_mov_b32 s4, 0x40200000 |
| ; GFX900-NEXT: v_cmp_lg_f32_e32 vcc, s4, v2 |
| ; GFX900-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc |
| ; GFX900-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1010-LABEL: fcmp_select_fold_one_bf16_imm: |
| ; GFX1010: ; %bb.0: ; %entry |
| ; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1010-NEXT: v_lshlrev_b32_e32 v2, 16, v0 |
| ; GFX1010-NEXT: v_cmp_lg_f32_e32 vcc_lo, 0x40200000, v2 |
| ; GFX1010-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo |
| ; GFX1010-NEXT: s_setpc_b64 s[30:31] |
| entry: |
| %cmp = fcmp one bfloat %arg, 0xR4020 |
| %sel = select i1 %cmp, bfloat %other, bfloat 0xR4020 |
| ret bfloat %sel |
| } |
| |
| ; Should be folded: bfloat fcmp one + select with constant in false value (commutative) |
| define bfloat @fcmp_select_fold_one_imm_bf16(bfloat %arg, bfloat %other) { |
| ; GFX900-LABEL: fcmp_select_fold_one_imm_bf16: |
| ; GFX900: ; %bb.0: ; %entry |
| ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX900-NEXT: v_lshlrev_b32_e32 v2, 16, v0 |
| ; GFX900-NEXT: s_mov_b32 s4, 0x40200000 |
| ; GFX900-NEXT: v_cmp_lg_f32_e32 vcc, s4, v2 |
| ; GFX900-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc |
| ; GFX900-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1010-LABEL: fcmp_select_fold_one_imm_bf16: |
| ; GFX1010: ; %bb.0: ; %entry |
| ; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1010-NEXT: v_lshlrev_b32_e32 v2, 16, v0 |
| ; GFX1010-NEXT: v_cmp_lg_f32_e32 vcc_lo, 0x40200000, v2 |
| ; GFX1010-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo |
| ; GFX1010-NEXT: s_setpc_b64 s[30:31] |
| entry: |
| %cmp = fcmp one bfloat 0xR4020, %arg |
| %sel = select i1 %cmp, bfloat %other, bfloat 0xR4020 |
| ret bfloat %sel |
| } |
| |
| ; Should NOT be folded: different constants |
| define bfloat @fcmp_select_no_fold_bf16_different_const(bfloat %arg, bfloat %other) { |
| ; GFX900-LABEL: fcmp_select_no_fold_bf16_different_const: |
| ; GFX900: ; %bb.0: ; %entry |
| ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX900-NEXT: v_lshlrev_b32_e32 v0, 16, v0 |
| ; GFX900-NEXT: s_mov_b32 s4, 0x42480000 |
| ; GFX900-NEXT: v_mov_b32_e32 v2, 0x4300 |
| ; GFX900-NEXT: v_cmp_neq_f32_e32 vcc, s4, v0 |
| ; GFX900-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc |
| ; GFX900-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1010-LABEL: fcmp_select_no_fold_bf16_different_const: |
| ; GFX1010: ; %bb.0: ; %entry |
| ; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1010-NEXT: v_lshlrev_b32_e32 v0, 16, v0 |
| ; GFX1010-NEXT: v_cmp_neq_f32_e32 vcc_lo, 0x42480000, v0 |
| ; GFX1010-NEXT: v_cndmask_b32_e32 v0, 0x4300, v1, vcc_lo |
| ; GFX1010-NEXT: s_setpc_b64 s[30:31] |
| entry: |
| %cmp = fcmp oeq bfloat %arg, 0xR4248 |
| %sel = select i1 %cmp, bfloat 0xR4300, bfloat %other |
| ret bfloat %sel |
| } |
| |
| ; Should NOT be folded: NaN values bypass optimization |
| define bfloat @fcmp_select_no_fold_nan_bf16(bfloat %arg, bfloat %other) { |
| ; GFX900-LABEL: fcmp_select_no_fold_nan_bf16: |
| ; GFX900: ; %bb.0: ; %entry |
| ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX900-NEXT: v_mov_b32_e32 v0, v1 |
| ; GFX900-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1010-LABEL: fcmp_select_no_fold_nan_bf16: |
| ; GFX1010: ; %bb.0: ; %entry |
| ; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1010-NEXT: v_mov_b32_e32 v0, v1 |
| ; GFX1010-NEXT: s_setpc_b64 s[30:31] |
| entry: |
| %cmp = fcmp oeq bfloat %arg, 0xR7FC0 |
| %sel = select i1 %cmp, bfloat 0xR7FC0, bfloat %other |
| ret bfloat %sel |
| } |
| |
| ; Should NOT be folded: bfloat fcmp one with NaN constant |
| define bfloat @fcmp_select_no_fold_nan_bf16_one(bfloat %arg, bfloat %other) { |
| ; GFX900-LABEL: fcmp_select_no_fold_nan_bf16_one: |
| ; GFX900: ; %bb.0: ; %entry |
| ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX900-NEXT: v_mov_b32_e32 v0, 0x7fc0 |
| ; GFX900-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1010-LABEL: fcmp_select_no_fold_nan_bf16_one: |
| ; GFX1010: ; %bb.0: ; %entry |
| ; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1010-NEXT: v_mov_b32_e32 v0, 0x7fc0 |
| ; GFX1010-NEXT: s_setpc_b64 s[30:31] |
| entry: |
| %cmp = fcmp one bfloat %arg, 0xR7FC0 |
| %sel = select i1 %cmp, bfloat %other, bfloat 0xR7FC0 |
| ret bfloat %sel |
| } |
| |
| ; Should NOT be folded: bfloat fcmp one with +Inf constant |
| define bfloat @fcmp_select_no_fold_posinf_bf16_one(bfloat %arg, bfloat %other) { |
| ; GFX900-LABEL: fcmp_select_no_fold_posinf_bf16_one: |
| ; GFX900: ; %bb.0: ; %entry |
| ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX900-NEXT: v_lshlrev_b32_e32 v0, 16, v0 |
| ; GFX900-NEXT: s_mov_b32 s4, 0x7f800000 |
| ; GFX900-NEXT: v_mov_b32_e32 v2, 0x7f80 |
| ; GFX900-NEXT: v_cmp_lg_f32_e32 vcc, s4, v0 |
| ; GFX900-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc |
| ; GFX900-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1010-LABEL: fcmp_select_no_fold_posinf_bf16_one: |
| ; GFX1010: ; %bb.0: ; %entry |
| ; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1010-NEXT: v_lshlrev_b32_e32 v0, 16, v0 |
| ; GFX1010-NEXT: v_cmp_lg_f32_e32 vcc_lo, 0x7f800000, v0 |
| ; GFX1010-NEXT: v_cndmask_b32_e32 v0, 0x7f80, v1, vcc_lo |
| ; GFX1010-NEXT: s_setpc_b64 s[30:31] |
| entry: |
| %cmp = fcmp one bfloat %arg, 0xR7F80 |
| %sel = select i1 %cmp, bfloat %other, bfloat 0xR7F80 |
| ret bfloat %sel |
| } |
| |
| ; Should NOT be folded: bfloat fcmp one with -Inf constant |
| define bfloat @fcmp_select_no_fold_neginf_bf16_one(bfloat %arg, bfloat %other) { |
| ; GFX900-LABEL: fcmp_select_no_fold_neginf_bf16_one: |
| ; GFX900: ; %bb.0: ; %entry |
| ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX900-NEXT: v_lshlrev_b32_e32 v0, 16, v0 |
| ; GFX900-NEXT: s_mov_b32 s4, 0xff800000 |
| ; GFX900-NEXT: v_mov_b32_e32 v2, 0xffffff80 |
| ; GFX900-NEXT: v_cmp_lg_f32_e32 vcc, s4, v0 |
| ; GFX900-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc |
| ; GFX900-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1010-LABEL: fcmp_select_no_fold_neginf_bf16_one: |
| ; GFX1010: ; %bb.0: ; %entry |
| ; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1010-NEXT: v_lshlrev_b32_e32 v0, 16, v0 |
| ; GFX1010-NEXT: v_cmp_lg_f32_e32 vcc_lo, 0xff800000, v0 |
| ; GFX1010-NEXT: v_cndmask_b32_e32 v0, 0xffffff80, v1, vcc_lo |
| ; GFX1010-NEXT: s_setpc_b64 s[30:31] |
| entry: |
| %cmp = fcmp one bfloat %arg, 0xRFF80 |
| %sel = select i1 %cmp, bfloat %other, bfloat 0xRFF80 |
| ret bfloat %sel |
| } |
| |
| ; Should NOT be folded: bfloat fcmp oeq with zero constant |
| define bfloat @fcmp_select_no_fold_oeq_bf16_zero(bfloat %arg, bfloat %other) { |
| ; GFX900-LABEL: fcmp_select_no_fold_oeq_bf16_zero: |
| ; GFX900: ; %bb.0: ; %entry |
| ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX900-NEXT: v_lshlrev_b32_e32 v0, 16, v0 |
| ; GFX900-NEXT: v_cmp_neq_f32_e32 vcc, 0, v0 |
| ; GFX900-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc |
| ; GFX900-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1010-LABEL: fcmp_select_no_fold_oeq_bf16_zero: |
| ; GFX1010: ; %bb.0: ; %entry |
| ; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1010-NEXT: v_lshlrev_b32_e32 v0, 16, v0 |
| ; GFX1010-NEXT: v_cmp_neq_f32_e32 vcc_lo, 0, v0 |
| ; GFX1010-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc_lo |
| ; GFX1010-NEXT: s_setpc_b64 s[30:31] |
| entry: |
| %cmp = fcmp oeq bfloat %arg, 0xR0000 |
| %sel = select i1 %cmp, bfloat 0xR0000, bfloat %other |
| ret bfloat %sel |
| } |
| |
| ; Should NOT be folded: bfloat fcmp one with negative zero constant |
| define bfloat @fcmp_select_no_fold_one_bf16_negzero(bfloat %arg, bfloat %other) { |
| ; GFX900-LABEL: fcmp_select_no_fold_one_bf16_negzero: |
| ; GFX900: ; %bb.0: ; %entry |
| ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX900-NEXT: v_lshlrev_b32_e32 v0, 16, v0 |
| ; GFX900-NEXT: s_brev_b32 s4, 1 |
| ; GFX900-NEXT: v_mov_b32_e32 v2, 0xffff8000 |
| ; GFX900-NEXT: v_cmp_lg_f32_e32 vcc, s4, v0 |
| ; GFX900-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc |
| ; GFX900-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1010-LABEL: fcmp_select_no_fold_one_bf16_negzero: |
| ; GFX1010: ; %bb.0: ; %entry |
| ; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1010-NEXT: v_lshlrev_b32_e32 v0, 16, v0 |
| ; GFX1010-NEXT: v_cmp_lg_f32_e32 vcc_lo, 0x80000000, v0 |
| ; GFX1010-NEXT: v_cndmask_b32_e32 v0, 0xffff8000, v1, vcc_lo |
| ; GFX1010-NEXT: s_setpc_b64 s[30:31] |
| entry: |
| %cmp = fcmp one bfloat 0xR8000, %arg |
| %sel = select i1 %cmp, bfloat %other, bfloat 0xR8000 |
| ret bfloat %sel |
| } |
| |
| ; Should NOT be folded: bfloat fcmp oeq with constant in other position |
| define bfloat @fcmp_select_no_fold_bf16_other_pos(bfloat %arg, bfloat %other) { |
| ; GFX900-LABEL: fcmp_select_no_fold_bf16_other_pos: |
| ; GFX900: ; %bb.0: ; %entry |
| ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX900-NEXT: v_lshlrev_b32_e32 v0, 16, v0 |
| ; GFX900-NEXT: s_mov_b32 s4, 0x42480000 |
| ; GFX900-NEXT: v_mov_b32_e32 v2, 0x4248 |
| ; GFX900-NEXT: v_cmp_eq_f32_e32 vcc, s4, v0 |
| ; GFX900-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc |
| ; GFX900-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1010-LABEL: fcmp_select_no_fold_bf16_other_pos: |
| ; GFX1010: ; %bb.0: ; %entry |
| ; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1010-NEXT: v_lshlrev_b32_e32 v0, 16, v0 |
| ; GFX1010-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0x42480000, v0 |
| ; GFX1010-NEXT: v_cndmask_b32_e32 v0, 0x4248, v1, vcc_lo |
| ; GFX1010-NEXT: s_setpc_b64 s[30:31] |
| entry: |
| %cmp = fcmp oeq bfloat %arg, 0xR4248 |
| %sel = select i1 %cmp, bfloat %other, bfloat 0xR4248 |
| ret bfloat %sel |
| } |
| |
| ; Should NOT be folded: bfloat unsupported comparison type |
| define bfloat @fcmp_select_no_fold_bf16_unsupported_cmp(bfloat %arg, bfloat %other) { |
| ; GFX900-LABEL: fcmp_select_no_fold_bf16_unsupported_cmp: |
| ; GFX900: ; %bb.0: ; %entry |
| ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX900-NEXT: v_lshlrev_b32_e32 v0, 16, v0 |
| ; GFX900-NEXT: s_mov_b32 s4, 0x42480000 |
| ; GFX900-NEXT: v_mov_b32_e32 v2, 0x4248 |
| ; GFX900-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 |
| ; GFX900-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc |
| ; GFX900-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1010-LABEL: fcmp_select_no_fold_bf16_unsupported_cmp: |
| ; GFX1010: ; %bb.0: ; %entry |
| ; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1010-NEXT: v_lshlrev_b32_e32 v0, 16, v0 |
| ; GFX1010-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x42480000, v0 |
| ; GFX1010-NEXT: v_cndmask_b32_e32 v0, 0x4248, v1, vcc_lo |
| ; GFX1010-NEXT: s_setpc_b64 s[30:31] |
| entry: |
| %cmp = fcmp olt bfloat %arg, 0xR4248 |
| %sel = select i1 %cmp, bfloat %other, bfloat 0xR4248 |
| ret bfloat %sel |
| } |