blob: 11af704d30973c194ef27d6689e75a742bec1a2c [file] [log] [blame]
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefix=GFX900 %s
; RUN: llc -mtriple=amdgcn -mcpu=gfx1010 < %s | FileCheck -check-prefix=GFX1010 %s
; Test the CMP+SELECT optimization that folds shared constants to reduce
; register pressure.
;------------------------------------------------------------------------------
; F32 Tests
;------------------------------------------------------------------------------
; Should be folded: fcmp oeq + select with constant in true value
define float @fcmp_select_fold_oeq_f32_imm(float %arg, float %other) {
; GFX900-LABEL: fcmp_select_fold_oeq_f32_imm:
; GFX900: ; %bb.0: ; %entry
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: s_mov_b32 s4, 0x42487ed8
; GFX900-NEXT: v_cmp_eq_f32_e32 vcc, s4, v0
; GFX900-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX1010-LABEL: fcmp_select_fold_oeq_f32_imm:
; GFX1010: ; %bb.0: ; %entry
; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1010-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0x42487ed8, v0
; GFX1010-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc_lo
; GFX1010-NEXT: s_setpc_b64 s[30:31]
entry:
%cmp = fcmp oeq float %arg, 0x40490FDB00000000
%sel = select i1 %cmp, float 0x40490FDB00000000, float %other
ret float %sel
}
; Should be folded: fcmp oeq + select with constant in true value (commutative)
define float @fcmp_select_fold_oeq_imm_f32(float %arg, float %other) {
; GFX900-LABEL: fcmp_select_fold_oeq_imm_f32:
; GFX900: ; %bb.0: ; %entry
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: s_mov_b32 s4, 0x42487ed8
; GFX900-NEXT: v_cmp_eq_f32_e32 vcc, s4, v0
; GFX900-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX1010-LABEL: fcmp_select_fold_oeq_imm_f32:
; GFX1010: ; %bb.0: ; %entry
; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1010-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0x42487ed8, v0
; GFX1010-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc_lo
; GFX1010-NEXT: s_setpc_b64 s[30:31]
entry:
%cmp = fcmp oeq float 0x40490FDB00000000, %arg
%sel = select i1 %cmp, float 0x40490FDB00000000, float %other
ret float %sel
}
; Should be folded: fcmp one + select with constant in false value
define float @fcmp_select_fold_one_f32_imm(float %arg, float %other) {
; GFX900-LABEL: fcmp_select_fold_one_f32_imm:
; GFX900: ; %bb.0: ; %entry
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: s_mov_b32 s4, 0x402df850
; GFX900-NEXT: v_cmp_lg_f32_e32 vcc, s4, v0
; GFX900-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX1010-LABEL: fcmp_select_fold_one_f32_imm:
; GFX1010: ; %bb.0: ; %entry
; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1010-NEXT: v_cmp_lg_f32_e32 vcc_lo, 0x402df850, v0
; GFX1010-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo
; GFX1010-NEXT: s_setpc_b64 s[30:31]
entry:
%cmp = fcmp one float %arg, 0x4005BF0A00000000
%sel = select i1 %cmp, float %other, float 0x4005BF0A00000000
ret float %sel
}
; Should be folded: fcmp one + select with constant in false value (commutative)
define float @fcmp_select_fold_one_imm_f32(float %arg, float %other) {
; GFX900-LABEL: fcmp_select_fold_one_imm_f32:
; GFX900: ; %bb.0: ; %entry
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: s_mov_b32 s4, 0x402df850
; GFX900-NEXT: v_cmp_lg_f32_e32 vcc, s4, v0
; GFX900-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX1010-LABEL: fcmp_select_fold_one_imm_f32:
; GFX1010: ; %bb.0: ; %entry
; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1010-NEXT: v_cmp_lg_f32_e32 vcc_lo, 0x402df850, v0
; GFX1010-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo
; GFX1010-NEXT: s_setpc_b64 s[30:31]
entry:
%cmp = fcmp one float 0x4005BF0A00000000, %arg
%sel = select i1 %cmp, float %other, float 0x4005BF0A00000000
ret float %sel
}
; Should NOT be folded: different constants
define float @fcmp_select_no_fold_f32_different_const(float %arg, float %other) {
; GFX900-LABEL: fcmp_select_no_fold_f32_different_const:
; GFX900: ; %bb.0: ; %entry
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: s_mov_b32 s4, 0x42487ed8
; GFX900-NEXT: v_mov_b32_e32 v2, 0x46487ed8
; GFX900-NEXT: v_cmp_neq_f32_e32 vcc, s4, v0
; GFX900-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX1010-LABEL: fcmp_select_no_fold_f32_different_const:
; GFX1010: ; %bb.0: ; %entry
; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1010-NEXT: v_cmp_neq_f32_e32 vcc_lo, 0x42487ed8, v0
; GFX1010-NEXT: v_cndmask_b32_e32 v0, 0x46487ed8, v1, vcc_lo
; GFX1010-NEXT: s_setpc_b64 s[30:31]
entry:
%cmp = fcmp oeq float %arg, 0x40490FDB00000000
%sel = select i1 %cmp, float 0x40C90FDB00000000, float %other
ret float %sel
}
; Should NOT be folded: fcmp oeq with constant in other position
define float @fcmp_select_no_fold_f32_other_pos(float %arg, float %other) {
; GFX900-LABEL: fcmp_select_no_fold_f32_other_pos:
; GFX900: ; %bb.0: ; %entry
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: s_mov_b32 s4, 0x42487ed8
; GFX900-NEXT: v_mov_b32_e32 v2, 0x42487ed8
; GFX900-NEXT: v_cmp_eq_f32_e32 vcc, s4, v0
; GFX900-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX1010-LABEL: fcmp_select_no_fold_f32_other_pos:
; GFX1010: ; %bb.0: ; %entry
; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1010-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0x42487ed8, v0
; GFX1010-NEXT: v_cndmask_b32_e32 v0, 0x42487ed8, v1, vcc_lo
; GFX1010-NEXT: s_setpc_b64 s[30:31]
entry:
%cmp = fcmp oeq float %arg, 0x40490FDB00000000
%sel = select i1 %cmp, float %other, float 0x40490FDB00000000
ret float %sel
}
; Should NOT be folded: unsupported comparison type
define float @fcmp_select_no_fold_f32_unsupported_cmp(float %arg, float %other) {
; GFX900-LABEL: fcmp_select_no_fold_f32_unsupported_cmp:
; GFX900: ; %bb.0: ; %entry
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: s_mov_b32 s4, 0x42487ed8
; GFX900-NEXT: v_mov_b32_e32 v2, 0x42487ed8
; GFX900-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
; GFX900-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX1010-LABEL: fcmp_select_no_fold_f32_unsupported_cmp:
; GFX1010: ; %bb.0: ; %entry
; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1010-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x42487ed8, v0
; GFX1010-NEXT: v_cndmask_b32_e32 v0, 0x42487ed8, v1, vcc_lo
; GFX1010-NEXT: s_setpc_b64 s[30:31]
entry:
%cmp = fcmp olt float %arg, 0x40490FDB00000000
%sel = select i1 %cmp, float %other, float 0x40490FDB00000000
ret float %sel
}
; Should NOT be folded: imm can be encoded into cndmask
define float @fcmp_select_no_fold_f32_enc_imm(float %arg, float %other) {
; GFX900-LABEL: fcmp_select_no_fold_f32_enc_imm:
; GFX900: ; %bb.0: ; %entry
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: v_cmp_neq_f32_e32 vcc, 1.0, v0
; GFX900-NEXT: v_cndmask_b32_e32 v0, 1.0, v1, vcc
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX1010-LABEL: fcmp_select_no_fold_f32_enc_imm:
; GFX1010: ; %bb.0: ; %entry
; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1010-NEXT: v_cmp_neq_f32_e32 vcc_lo, 1.0, v0
; GFX1010-NEXT: v_cndmask_b32_e32 v0, 1.0, v1, vcc_lo
; GFX1010-NEXT: s_setpc_b64 s[30:31]
entry:
%cmp = fcmp oeq float %arg, 1.0
%sel = select i1 %cmp, float 1.0, float %other
ret float %sel
}
; Should NOT be folded: imm can be encoded into cndmask
define float @fcmp_select_no_fold_f32_enc_imm_2(float %arg, float %other) {
; GFX900-LABEL: fcmp_select_no_fold_f32_enc_imm_2:
; GFX900: ; %bb.0: ; %entry
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: v_cmp_lg_f32_e32 vcc, -4.0, v0
; GFX900-NEXT: v_cndmask_b32_e32 v0, -4.0, v1, vcc
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX1010-LABEL: fcmp_select_no_fold_f32_enc_imm_2:
; GFX1010: ; %bb.0: ; %entry
; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1010-NEXT: v_cmp_lg_f32_e32 vcc_lo, -4.0, v0
; GFX1010-NEXT: v_cndmask_b32_e32 v0, -4.0, v1, vcc_lo
; GFX1010-NEXT: s_setpc_b64 s[30:31]
entry:
%cmp = fcmp one float -4.0, %arg
%sel = select i1 %cmp, float %other, float -4.0
ret float %sel
}
; Should NOT be folded: fcmp oeq with zero constant
define float @fcmp_select_no_fold_oeq_f32_zero(float %arg, float %other) {
; GFX900-LABEL: fcmp_select_no_fold_oeq_f32_zero:
; GFX900: ; %bb.0: ; %entry
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: v_cmp_neq_f32_e32 vcc, 0, v0
; GFX900-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX1010-LABEL: fcmp_select_no_fold_oeq_f32_zero:
; GFX1010: ; %bb.0: ; %entry
; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1010-NEXT: v_cmp_neq_f32_e32 vcc_lo, 0, v0
; GFX1010-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc_lo
; GFX1010-NEXT: s_setpc_b64 s[30:31]
entry:
%cmp = fcmp oeq float %arg, 0.0
%sel = select i1 %cmp, float 0.0, float %other
ret float %sel
}
; Should NOT be folded: fcmp one with negative zero constant
define float @fcmp_select_no_fold_one_f32_negzero(float %arg, float %other) {
; GFX900-LABEL: fcmp_select_no_fold_one_f32_negzero:
; GFX900: ; %bb.0: ; %entry
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: s_brev_b32 s4, 1
; GFX900-NEXT: v_bfrev_b32_e32 v2, 1
; GFX900-NEXT: v_cmp_lg_f32_e32 vcc, s4, v0
; GFX900-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX1010-LABEL: fcmp_select_no_fold_one_f32_negzero:
; GFX1010: ; %bb.0: ; %entry
; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1010-NEXT: v_cmp_lg_f32_e32 vcc_lo, 0x80000000, v0
; GFX1010-NEXT: v_cndmask_b32_e32 v0, 0x80000000, v1, vcc_lo
; GFX1010-NEXT: s_setpc_b64 s[30:31]
entry:
%cmp = fcmp one float -0.0, %arg ; 0x8000000000000000
%sel = select i1 %cmp, float %other, float -0.0 ;0x8000000000000000
ret float %sel
}
; NaN values should bypass the optimization due to special IEEE 754 behavior
; fcmp oeq with NaN always returns false, so select always chooses %other
define float @fcmp_select_no_fold_oeq_f32_nan(float %arg, float %other) {
; GFX900-LABEL: fcmp_select_no_fold_oeq_f32_nan:
; GFX900: ; %bb.0: ; %entry
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: v_mov_b32_e32 v0, v1
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX1010-LABEL: fcmp_select_no_fold_oeq_f32_nan:
; GFX1010: ; %bb.0: ; %entry
; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1010-NEXT: v_mov_b32_e32 v0, v1
; GFX1010-NEXT: s_setpc_b64 s[30:31]
entry:
%cmp = fcmp oeq float %arg, 0x7FF8000000000000
%sel = select i1 %cmp, float 0x7FF8000000000000, float %other
ret float %sel
}
; NaN values should bypass the optimization due to special IEEE 754 behavior
; fcmp one with NaN always returns false, so select always chooses the NaN constant
define float @fcmp_select_no_fold_one_f32_nan(float %arg, float %other) {
; GFX900-LABEL: fcmp_select_no_fold_one_f32_nan:
; GFX900: ; %bb.0: ; %entry
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: v_mov_b32_e32 v0, 0x7fc00000
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX1010-LABEL: fcmp_select_no_fold_one_f32_nan:
; GFX1010: ; %bb.0: ; %entry
; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1010-NEXT: v_mov_b32_e32 v0, 0x7fc00000
; GFX1010-NEXT: s_setpc_b64 s[30:31]
entry:
%cmp = fcmp one float 0x7FF8000000000000, %arg
%sel = select i1 %cmp, float %other, float 0x7FF8000000000000
ret float %sel
}
; Should NOT be folded: fcmp one with positive infinity
; Infinity values should bypass the optimization, generating unfolded code
define float @fcmp_select_no_fold_posinf_oeq_f32(float %arg, float %other) {
; GFX900-LABEL: fcmp_select_no_fold_posinf_oeq_f32:
; GFX900: ; %bb.0: ; %entry
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: s_mov_b32 s4, 0x7f800000
; GFX900-NEXT: v_mov_b32_e32 v2, 0x7f800000
; GFX900-NEXT: v_cmp_neq_f32_e32 vcc, s4, v0
; GFX900-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX1010-LABEL: fcmp_select_no_fold_posinf_oeq_f32:
; GFX1010: ; %bb.0: ; %entry
; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1010-NEXT: v_cmp_neq_f32_e32 vcc_lo, 0x7f800000, v0
; GFX1010-NEXT: v_cndmask_b32_e32 v0, 0x7f800000, v1, vcc_lo
; GFX1010-NEXT: s_setpc_b64 s[30:31]
entry:
%cmp = fcmp oeq float %arg, 0x7FF0000000000000
%sel = select i1 %cmp, float 0x7FF0000000000000, float %other
ret float %sel
}
; Should NOT be folded: fcmp one with negative infinity
; Infinity values should bypass the optimization, generating unfolded code
define float @fcmp_select_no_fold_neginf_f32_one(float %arg, float %other) {
; GFX900-LABEL: fcmp_select_no_fold_neginf_f32_one:
; GFX900: ; %bb.0: ; %entry
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: s_mov_b32 s4, 0xff800000
; GFX900-NEXT: v_mov_b32_e32 v2, 0xff800000
; GFX900-NEXT: v_cmp_lg_f32_e32 vcc, s4, v0
; GFX900-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX1010-LABEL: fcmp_select_no_fold_neginf_f32_one:
; GFX1010: ; %bb.0: ; %entry
; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1010-NEXT: v_cmp_lg_f32_e32 vcc_lo, 0xff800000, v0
; GFX1010-NEXT: v_cndmask_b32_e32 v0, 0xff800000, v1, vcc_lo
; GFX1010-NEXT: s_setpc_b64 s[30:31]
entry:
%cmp = fcmp one float 0xFFF0000000000000, %arg
%sel = select i1 %cmp, float %other, float 0xFFF0000000000000
ret float %sel
}
;------------------------------------------------------------------------------
; F64 Tests
;------------------------------------------------------------------------------
; Should be folded: f64 fcmp oeq + select with constant in true value
define double @fcmp_select_fold_oeq_f64_imm(double %arg, double %other) {
; GFX900-LABEL: fcmp_select_fold_oeq_f64_imm:
; GFX900: ; %bb.0: ; %entry
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: s_mov_b32 s4, 0x54442d18
; GFX900-NEXT: s_mov_b32 s5, 0x400921fb
; GFX900-NEXT: v_cmp_eq_f64_e32 vcc, s[4:5], v[0:1]
; GFX900-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
; GFX900-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX1010-LABEL: fcmp_select_fold_oeq_f64_imm:
; GFX1010: ; %bb.0: ; %entry
; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1010-NEXT: s_mov_b32 s4, 0x54442d18
; GFX1010-NEXT: s_mov_b32 s5, 0x400921fb
; GFX1010-NEXT: v_cmp_eq_f64_e32 vcc_lo, s[4:5], v[0:1]
; GFX1010-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc_lo
; GFX1010-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc_lo
; GFX1010-NEXT: s_setpc_b64 s[30:31]
entry:
%cmp = fcmp oeq double %arg, 3.141592653589793
%sel = select i1 %cmp, double 3.141592653589793, double %other
ret double %sel
}
; Should be folded: f64 fcmp oeq + select with constant in true value (commutative)
define double @fcmp_select_fold_oeq_imm_f64(double %arg, double %other) {
; GFX900-LABEL: fcmp_select_fold_oeq_imm_f64:
; GFX900: ; %bb.0: ; %entry
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: s_mov_b32 s4, 0x54442d18
; GFX900-NEXT: s_mov_b32 s5, 0x400921fb
; GFX900-NEXT: v_cmp_eq_f64_e32 vcc, s[4:5], v[0:1]
; GFX900-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
; GFX900-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX1010-LABEL: fcmp_select_fold_oeq_imm_f64:
; GFX1010: ; %bb.0: ; %entry
; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1010-NEXT: s_mov_b32 s4, 0x54442d18
; GFX1010-NEXT: s_mov_b32 s5, 0x400921fb
; GFX1010-NEXT: v_cmp_eq_f64_e32 vcc_lo, s[4:5], v[0:1]
; GFX1010-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc_lo
; GFX1010-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc_lo
; GFX1010-NEXT: s_setpc_b64 s[30:31]
entry:
%cmp = fcmp oeq double 3.141592653589793, %arg
%sel = select i1 %cmp, double 3.141592653589793, double %other
ret double %sel
}
; Should be folded: f64 fcmp one + select with constant in false value
define double @fcmp_select_fold_one_f64_imm(double %arg, double %other) {
; GFX900-LABEL: fcmp_select_fold_one_f64_imm:
; GFX900: ; %bb.0: ; %entry
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: s_mov_b32 s4, 0x8b145769
; GFX900-NEXT: s_mov_b32 s5, 0x4005bf0a
; GFX900-NEXT: v_cmp_lg_f64_e32 vcc, s[4:5], v[0:1]
; GFX900-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
; GFX900-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX1010-LABEL: fcmp_select_fold_one_f64_imm:
; GFX1010: ; %bb.0: ; %entry
; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1010-NEXT: s_mov_b32 s4, 0x8b145769
; GFX1010-NEXT: s_mov_b32 s5, 0x4005bf0a
; GFX1010-NEXT: v_cmp_lg_f64_e32 vcc_lo, s[4:5], v[0:1]
; GFX1010-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc_lo
; GFX1010-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc_lo
; GFX1010-NEXT: s_setpc_b64 s[30:31]
entry:
%cmp = fcmp one double %arg, 2.718281828459045
%sel = select i1 %cmp, double %other, double 2.718281828459045
ret double %sel
}
; Should be folded: f64 fcmp one + select with constant in false value (commutative)
define double @fcmp_select_fold_one_imm_f64(double %arg, double %other) {
; GFX900-LABEL: fcmp_select_fold_one_imm_f64:
; GFX900: ; %bb.0: ; %entry
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: s_mov_b32 s4, 0x8b145769
; GFX900-NEXT: s_mov_b32 s5, 0x4005bf0a
; GFX900-NEXT: v_cmp_lg_f64_e32 vcc, s[4:5], v[0:1]
; GFX900-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
; GFX900-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX1010-LABEL: fcmp_select_fold_one_imm_f64:
; GFX1010: ; %bb.0: ; %entry
; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1010-NEXT: s_mov_b32 s4, 0x8b145769
; GFX1010-NEXT: s_mov_b32 s5, 0x4005bf0a
; GFX1010-NEXT: v_cmp_lg_f64_e32 vcc_lo, s[4:5], v[0:1]
; GFX1010-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc_lo
; GFX1010-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc_lo
; GFX1010-NEXT: s_setpc_b64 s[30:31]
entry:
%cmp = fcmp one double 2.718281828459045, %arg
%sel = select i1 %cmp, double %other, double 2.718281828459045
ret double %sel
}
; Should NOT be folded: f64 fcmp oeq with constant in other position
define double @fcmp_select_no_fold_f64_other_pos(double %arg, double %other) {
; GFX900-LABEL: fcmp_select_no_fold_f64_other_pos:
; GFX900: ; %bb.0: ; %entry
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: s_mov_b32 s4, 0x54442d18
; GFX900-NEXT: s_mov_b32 s5, 0x400921fb
; GFX900-NEXT: v_cmp_eq_f64_e32 vcc, s[4:5], v[0:1]
; GFX900-NEXT: v_mov_b32_e32 v4, 0x54442d18
; GFX900-NEXT: v_mov_b32_e32 v1, 0x400921fb
; GFX900-NEXT: v_cndmask_b32_e32 v0, v4, v2, vcc
; GFX900-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX1010-LABEL: fcmp_select_no_fold_f64_other_pos:
; GFX1010: ; %bb.0: ; %entry
; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1010-NEXT: s_mov_b32 s4, 0x54442d18
; GFX1010-NEXT: s_mov_b32 s5, 0x400921fb
; GFX1010-NEXT: v_cmp_eq_f64_e32 vcc_lo, s[4:5], v[0:1]
; GFX1010-NEXT: v_cndmask_b32_e32 v0, 0x54442d18, v2, vcc_lo
; GFX1010-NEXT: v_cndmask_b32_e32 v1, 0x400921fb, v3, vcc_lo
; GFX1010-NEXT: s_setpc_b64 s[30:31]
entry:
%cmp = fcmp oeq double %arg, 3.141592653589793
%sel = select i1 %cmp, double %other, double 3.141592653589793
ret double %sel
}
; Should NOT be folded: f64 fcmp unsupported comparison type
define double @fcmp_select_no_fold_f64_unsupported_cmp(double %arg, double %other) {
; GFX900-LABEL: fcmp_select_no_fold_f64_unsupported_cmp:
; GFX900: ; %bb.0: ; %entry
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: s_mov_b32 s4, 0x54442d18
; GFX900-NEXT: s_mov_b32 s5, 0x400921fb
; GFX900-NEXT: v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1]
; GFX900-NEXT: v_mov_b32_e32 v4, 0x54442d18
; GFX900-NEXT: v_mov_b32_e32 v1, 0x400921fb
; GFX900-NEXT: v_cndmask_b32_e32 v0, v4, v2, vcc
; GFX900-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX1010-LABEL: fcmp_select_no_fold_f64_unsupported_cmp:
; GFX1010: ; %bb.0: ; %entry
; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1010-NEXT: s_mov_b32 s4, 0x54442d18
; GFX1010-NEXT: s_mov_b32 s5, 0x400921fb
; GFX1010-NEXT: v_cmp_gt_f64_e32 vcc_lo, s[4:5], v[0:1]
; GFX1010-NEXT: v_cndmask_b32_e32 v0, 0x54442d18, v2, vcc_lo
; GFX1010-NEXT: v_cndmask_b32_e32 v1, 0x400921fb, v3, vcc_lo
; GFX1010-NEXT: s_setpc_b64 s[30:31]
entry:
%cmp = fcmp olt double %arg, 3.141592653589793
%sel = select i1 %cmp, double %other, double 3.141592653589793
ret double %sel
}
; Should NOT be folded: imm can be encoded into cndmask
define double @fcmp_select_no_fold_f64_enc_imm(double %arg, double %other) {
; GFX900-LABEL: fcmp_select_no_fold_f64_enc_imm:
; GFX900: ; %bb.0: ; %entry
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: v_cmp_neq_f64_e32 vcc, 1.0, v[0:1]
; GFX900-NEXT: v_mov_b32_e32 v1, 0x3ff00000
; GFX900-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc
; GFX900-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX1010-LABEL: fcmp_select_no_fold_f64_enc_imm:
; GFX1010: ; %bb.0: ; %entry
; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1010-NEXT: v_cmp_neq_f64_e32 vcc_lo, 1.0, v[0:1]
; GFX1010-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc_lo
; GFX1010-NEXT: v_cndmask_b32_e32 v1, 0x3ff00000, v3, vcc_lo
; GFX1010-NEXT: s_setpc_b64 s[30:31]
entry:
%cmp = fcmp oeq double %arg, 1.0
%sel = select i1 %cmp, double 1.0, double %other
ret double %sel
}
; Should NOT be folded: imm can be encoded into cndmask
define double @fcmp_select_no_fold_f64_enc_imm_2(double %arg, double %other) {
; GFX900-LABEL: fcmp_select_no_fold_f64_enc_imm_2:
; GFX900: ; %bb.0: ; %entry
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: v_cmp_lg_f64_e32 vcc, -4.0, v[0:1]
; GFX900-NEXT: v_mov_b32_e32 v1, 0xc0100000
; GFX900-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc
; GFX900-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX1010-LABEL: fcmp_select_no_fold_f64_enc_imm_2:
; GFX1010: ; %bb.0: ; %entry
; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1010-NEXT: v_cmp_lg_f64_e32 vcc_lo, -4.0, v[0:1]
; GFX1010-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc_lo
; GFX1010-NEXT: v_cndmask_b32_e32 v1, 0xc0100000, v3, vcc_lo
; GFX1010-NEXT: s_setpc_b64 s[30:31]
entry:
%cmp = fcmp one double -4.0, %arg
%sel = select i1 %cmp, double %other, double -4.0
ret double %sel
}
; Should NOT be folded: f64 fcmp oeq with zero constant
define double @fcmp_select_no_fold_oeq_f64_zero(double %arg, double %other) {
; GFX900-LABEL: fcmp_select_no_fold_oeq_f64_zero:
; GFX900: ; %bb.0: ; %entry
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: v_cmp_neq_f64_e32 vcc, 0, v[0:1]
; GFX900-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc
; GFX900-NEXT: v_cndmask_b32_e32 v1, 0, v3, vcc
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX1010-LABEL: fcmp_select_no_fold_oeq_f64_zero:
; GFX1010: ; %bb.0: ; %entry
; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1010-NEXT: v_cmp_neq_f64_e32 vcc_lo, 0, v[0:1]
; GFX1010-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc_lo
; GFX1010-NEXT: v_cndmask_b32_e32 v1, 0, v3, vcc_lo
; GFX1010-NEXT: s_setpc_b64 s[30:31]
entry:
%cmp = fcmp oeq double %arg, 0.0
%sel = select i1 %cmp, double 0.0, double %other
ret double %sel
}
; Should NOT be folded: f64 fcmp one with negative zero constant
define double @fcmp_select_no_fold_one_f64_negzero(double %arg, double %other) {
; GFX900-LABEL: fcmp_select_no_fold_one_f64_negzero:
; GFX900: ; %bb.0: ; %entry
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: s_mov_b32 s4, 0
; GFX900-NEXT: s_brev_b32 s5, 1
; GFX900-NEXT: v_cmp_lg_f64_e32 vcc, s[4:5], v[0:1]
; GFX900-NEXT: v_bfrev_b32_e32 v1, 1
; GFX900-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc
; GFX900-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX1010-LABEL: fcmp_select_no_fold_one_f64_negzero:
; GFX1010: ; %bb.0: ; %entry
; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1010-NEXT: v_cmp_lg_f64_e32 vcc_lo, 0x80000000, v[0:1]
; GFX1010-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc_lo
; GFX1010-NEXT: v_cndmask_b32_e32 v1, 0x80000000, v3, vcc_lo
; GFX1010-NEXT: s_setpc_b64 s[30:31]
entry:
%cmp = fcmp one double -0.0, %arg
%sel = select i1 %cmp, double %other, double -0.0
ret double %sel
}
; Should NOT be folded: f64 different constants
define double @fcmp_select_no_fold_f64_different_const(double %arg, double %other) {
; GFX900-LABEL: fcmp_select_no_fold_f64_different_const:
; GFX900: ; %bb.0: ; %entry
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: s_mov_b32 s4, 0x54442d18
; GFX900-NEXT: s_mov_b32 s5, 0x400921fb
; GFX900-NEXT: v_cmp_neq_f64_e32 vcc, s[4:5], v[0:1]
; GFX900-NEXT: v_mov_b32_e32 v4, 0x8b145769
; GFX900-NEXT: v_mov_b32_e32 v1, 0x4005bf0a
; GFX900-NEXT: v_cndmask_b32_e32 v0, v4, v2, vcc
; GFX900-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX1010-LABEL: fcmp_select_no_fold_f64_different_const:
; GFX1010: ; %bb.0: ; %entry
; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1010-NEXT: s_mov_b32 s4, 0x54442d18
; GFX1010-NEXT: s_mov_b32 s5, 0x400921fb
; GFX1010-NEXT: v_cmp_neq_f64_e32 vcc_lo, s[4:5], v[0:1]
; GFX1010-NEXT: v_cndmask_b32_e32 v0, 0x8b145769, v2, vcc_lo
; GFX1010-NEXT: v_cndmask_b32_e32 v1, 0x4005bf0a, v3, vcc_lo
; GFX1010-NEXT: s_setpc_b64 s[30:31]
entry:
%cmp = fcmp oeq double %arg, 3.141592653589793
%sel = select i1 %cmp, double 2.718281828459045, double %other
ret double %sel
}
; Should NOT be folded: f64 fcmp oeq with NaN constant
; fcmp oeq with NaN always returns false, so select always chooses %other
define double @fcmp_select_no_fold_nan_f64(double %arg, double %other) {
; GFX900-LABEL: fcmp_select_no_fold_nan_f64:
; GFX900: ; %bb.0: ; %entry
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: v_mov_b32_e32 v1, v3
; GFX900-NEXT: v_mov_b32_e32 v0, v2
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX1010-LABEL: fcmp_select_no_fold_nan_f64:
; GFX1010: ; %bb.0: ; %entry
; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1010-NEXT: v_mov_b32_e32 v1, v3
; GFX1010-NEXT: v_mov_b32_e32 v0, v2
; GFX1010-NEXT: s_setpc_b64 s[30:31]
entry:
%cmp = fcmp oeq double %arg, 0x7FF8000000000000
%sel = select i1 %cmp, double 0x7FF8000000000000, double %other
ret double %sel
}
; Should NOT be folded: f64 fcmp oeq with NaN constant (commutative variant)
; fcmp oeq with NaN always returns false, so select always chooses %other
define double @fcmp_select_no_fold_nan_f64_comm(double %arg, double %other) {
; GFX900-LABEL: fcmp_select_no_fold_nan_f64_comm:
; GFX900: ; %bb.0: ; %entry
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: v_mov_b32_e32 v1, v3
; GFX900-NEXT: v_mov_b32_e32 v0, v2
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX1010-LABEL: fcmp_select_no_fold_nan_f64_comm:
; GFX1010: ; %bb.0: ; %entry
; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1010-NEXT: v_mov_b32_e32 v1, v3
; GFX1010-NEXT: v_mov_b32_e32 v0, v2
; GFX1010-NEXT: s_setpc_b64 s[30:31]
entry:
%cmp = fcmp oeq double 0x7FF8000000000000, %arg
%sel = select i1 %cmp, double 0x7FF8000000000000, double %other
ret double %sel
}
; Should NOT be folded: f64 fcmp one with NaN constant
; fcmp one with NaN always returns false, so select always chooses the NaN constant
define double @fcmp_select_no_fold_nan_f64_one(double %arg, double %other) {
; GFX900-LABEL: fcmp_select_no_fold_nan_f64_one:
; GFX900: ; %bb.0: ; %entry
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: v_mov_b32_e32 v0, 0
; GFX900-NEXT: v_mov_b32_e32 v1, 0x7ff80000
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX1010-LABEL: fcmp_select_no_fold_nan_f64_one:
; GFX1010: ; %bb.0: ; %entry
; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1010-NEXT: v_mov_b32_e32 v0, 0
; GFX1010-NEXT: v_mov_b32_e32 v1, 0x7ff80000
; GFX1010-NEXT: s_setpc_b64 s[30:31]
entry:
%cmp = fcmp one double %arg, 0x7FF8000000000000
%sel = select i1 %cmp, double %other, double 0x7FF8000000000000
ret double %sel
}
; Should NOT be folded: f64 fcmp one with NaN constant (commutative variant)
; fcmp one with NaN always returns false, so select always chooses the NaN constant
define double @fcmp_select_no_fold_nan_f64_one_comm(double %arg, double %other) {
; GFX900-LABEL: fcmp_select_no_fold_nan_f64_one_comm:
; GFX900: ; %bb.0: ; %entry
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: v_mov_b32_e32 v0, 0
; GFX900-NEXT: v_mov_b32_e32 v1, 0x7ff80000
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX1010-LABEL: fcmp_select_no_fold_nan_f64_one_comm:
; GFX1010: ; %bb.0: ; %entry
; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1010-NEXT: v_mov_b32_e32 v0, 0
; GFX1010-NEXT: v_mov_b32_e32 v1, 0x7ff80000
; GFX1010-NEXT: s_setpc_b64 s[30:31]
entry:
%cmp = fcmp one double 0x7FF8000000000000, %arg
%sel = select i1 %cmp, double %other, double 0x7FF8000000000000
ret double %sel
}
; Should NOT be folded: f64 fcmp oeq with positive infinity
; Infinity values should bypass the optimization, generating unfolded code
define double @fcmp_select_no_fold_posinf_f64(double %arg, double %other) {
; GFX900-LABEL: fcmp_select_no_fold_posinf_f64:
; GFX900: ; %bb.0: ; %entry
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: s_mov_b32 s4, 0
; GFX900-NEXT: s_mov_b32 s5, 0x7ff00000
; GFX900-NEXT: v_cmp_neq_f64_e32 vcc, s[4:5], v[0:1]
; GFX900-NEXT: v_mov_b32_e32 v1, 0x7ff00000
; GFX900-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc
; GFX900-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX1010-LABEL: fcmp_select_no_fold_posinf_f64:
; GFX1010: ; %bb.0: ; %entry
; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1010-NEXT: v_cmp_neq_f64_e32 vcc_lo, 0x7ff00000, v[0:1]
; GFX1010-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc_lo
; GFX1010-NEXT: v_cndmask_b32_e32 v1, 0x7ff00000, v3, vcc_lo
; GFX1010-NEXT: s_setpc_b64 s[30:31]
entry:
%cmp = fcmp oeq double %arg, 0x7FF0000000000000
%sel = select i1 %cmp, double 0x7FF0000000000000, double %other
ret double %sel
}
; Should NOT be folded: f64 fcmp oeq with negative infinity
; Infinity values should bypass the optimization, generating unfolded code
define double @fcmp_select_no_fold_neginf_f64(double %arg, double %other) {
; GFX900-LABEL: fcmp_select_no_fold_neginf_f64:
; GFX900: ; %bb.0: ; %entry
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: s_mov_b32 s4, 0
; GFX900-NEXT: s_mov_b32 s5, 0xfff00000
; GFX900-NEXT: v_cmp_neq_f64_e32 vcc, s[4:5], v[0:1]
; GFX900-NEXT: v_mov_b32_e32 v1, 0xfff00000
; GFX900-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc
; GFX900-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX1010-LABEL: fcmp_select_no_fold_neginf_f64:
; GFX1010: ; %bb.0: ; %entry
; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1010-NEXT: v_cmp_neq_f64_e32 vcc_lo, 0xfff00000, v[0:1]
; GFX1010-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc_lo
; GFX1010-NEXT: v_cndmask_b32_e32 v1, 0xfff00000, v3, vcc_lo
; GFX1010-NEXT: s_setpc_b64 s[30:31]
entry:
%cmp = fcmp oeq double %arg, 0xFFF0000000000000
%sel = select i1 %cmp, double 0xFFF0000000000000, double %other
ret double %sel
}
; Should NOT be folded: f64 fcmp oeq with positive infinity (commutative variant)
; Infinity values should bypass the optimization, generating unfolded code
define double @fcmp_select_no_fold_posinf_f64_comm(double %arg, double %other) {
; GFX900-LABEL: fcmp_select_no_fold_posinf_f64_comm:
; GFX900: ; %bb.0: ; %entry
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: s_mov_b32 s4, 0
; GFX900-NEXT: s_mov_b32 s5, 0x7ff00000
; GFX900-NEXT: v_cmp_neq_f64_e32 vcc, s[4:5], v[0:1]
; GFX900-NEXT: v_mov_b32_e32 v1, 0x7ff00000
; GFX900-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc
; GFX900-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX1010-LABEL: fcmp_select_no_fold_posinf_f64_comm:
; GFX1010: ; %bb.0: ; %entry
; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1010-NEXT: v_cmp_neq_f64_e32 vcc_lo, 0x7ff00000, v[0:1]
; GFX1010-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc_lo
; GFX1010-NEXT: v_cndmask_b32_e32 v1, 0x7ff00000, v3, vcc_lo
; GFX1010-NEXT: s_setpc_b64 s[30:31]
entry:
%cmp = fcmp oeq double 0x7FF0000000000000, %arg
%sel = select i1 %cmp, double 0x7FF0000000000000, double %other
ret double %sel
}
; Should NOT be folded: f64 fcmp oeq with negative infinity (commutative variant)
; Infinity values should bypass the optimization, generating unfolded code
define double @fcmp_select_no_fold_neginf_f64_comm(double %arg, double %other) {
; GFX900-LABEL: fcmp_select_no_fold_neginf_f64_comm:
; GFX900: ; %bb.0: ; %entry
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: s_mov_b32 s4, 0
; GFX900-NEXT: s_mov_b32 s5, 0xfff00000
; GFX900-NEXT: v_cmp_neq_f64_e32 vcc, s[4:5], v[0:1]
; GFX900-NEXT: v_mov_b32_e32 v1, 0xfff00000
; GFX900-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc
; GFX900-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX1010-LABEL: fcmp_select_no_fold_neginf_f64_comm:
; GFX1010: ; %bb.0: ; %entry
; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1010-NEXT: v_cmp_neq_f64_e32 vcc_lo, 0xfff00000, v[0:1]
; GFX1010-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc_lo
; GFX1010-NEXT: v_cndmask_b32_e32 v1, 0xfff00000, v3, vcc_lo
; GFX1010-NEXT: s_setpc_b64 s[30:31]
entry:
%cmp = fcmp oeq double 0xFFF0000000000000, %arg
%sel = select i1 %cmp, double 0xFFF0000000000000, double %other
ret double %sel
}
;------------------------------------------------------------------------------
; F16 Tests
;------------------------------------------------------------------------------
; Should be folded: f16 fcmp oeq + select with constant in true value
define half @fcmp_select_fold_oeq_f16_imm(half %arg, half %other) {
; GFX900-LABEL: fcmp_select_fold_oeq_f16_imm:
; GFX900: ; %bb.0: ; %entry
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: s_movk_i32 s4, 0x4248
; GFX900-NEXT: v_cmp_eq_f16_e32 vcc, s4, v0
; GFX900-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX1010-LABEL: fcmp_select_fold_oeq_f16_imm:
; GFX1010: ; %bb.0: ; %entry
; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1010-NEXT: v_cmp_eq_f16_e32 vcc_lo, 0x4248, v0
; GFX1010-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc_lo
; GFX1010-NEXT: s_setpc_b64 s[30:31]
entry:
%cmp = fcmp oeq half %arg, 0xH4248
%sel = select i1 %cmp, half 0xH4248, half %other
ret half %sel
}
; Should be folded: f16 fcmp oeq + select with constant in true value (commutative)
define half @fcmp_select_fold_oeq_imm_f16(half %arg, half %other) {
; GFX900-LABEL: fcmp_select_fold_oeq_imm_f16:
; GFX900: ; %bb.0: ; %entry
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: s_movk_i32 s4, 0x4248
; GFX900-NEXT: v_cmp_eq_f16_e32 vcc, s4, v0
; GFX900-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX1010-LABEL: fcmp_select_fold_oeq_imm_f16:
; GFX1010: ; %bb.0: ; %entry
; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1010-NEXT: v_cmp_eq_f16_e32 vcc_lo, 0x4248, v0
; GFX1010-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc_lo
; GFX1010-NEXT: s_setpc_b64 s[30:31]
entry:
%cmp = fcmp oeq half 0xH4248, %arg
%sel = select i1 %cmp, half 0xH4248, half %other
ret half %sel
}
; Should be folded: f16 fcmp one + select with constant in false value
define half @fcmp_select_fold_one_f16_imm(half %arg, half %other) {
; GFX900-LABEL: fcmp_select_fold_one_f16_imm:
; GFX900: ; %bb.0: ; %entry
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: s_movk_i32 s4, 0x4020
; GFX900-NEXT: v_cmp_lg_f16_e32 vcc, s4, v0
; GFX900-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX1010-LABEL: fcmp_select_fold_one_f16_imm:
; GFX1010: ; %bb.0: ; %entry
; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1010-NEXT: v_cmp_lg_f16_e32 vcc_lo, 0x4020, v0
; GFX1010-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo
; GFX1010-NEXT: s_setpc_b64 s[30:31]
entry:
%cmp = fcmp one half %arg, 0xH4020
%sel = select i1 %cmp, half %other, half 0xH4020
ret half %sel
}
; Should be folded: f16 fcmp one + select with constant in false value (commutative)
define half @fcmp_select_fold_one_imm_f16(half %arg, half %other) {
; GFX900-LABEL: fcmp_select_fold_one_imm_f16:
; GFX900: ; %bb.0: ; %entry
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: s_movk_i32 s4, 0x4020
; GFX900-NEXT: v_cmp_lg_f16_e32 vcc, s4, v0
; GFX900-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX1010-LABEL: fcmp_select_fold_one_imm_f16:
; GFX1010: ; %bb.0: ; %entry
; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1010-NEXT: v_cmp_lg_f16_e32 vcc_lo, 0x4020, v0
; GFX1010-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo
; GFX1010-NEXT: s_setpc_b64 s[30:31]
entry:
%cmp = fcmp one half 0xH4020, %arg
%sel = select i1 %cmp, half %other, half 0xH4020
ret half %sel
}
; Should NOT be folded: different constants
define half @fcmp_select_no_fold_f16_different_const(half %arg, half %other) {
; GFX900-LABEL: fcmp_select_no_fold_f16_different_const:
; GFX900: ; %bb.0: ; %entry
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: s_movk_i32 s4, 0x4248
; GFX900-NEXT: v_mov_b32_e32 v2, 0x4300
; GFX900-NEXT: v_cmp_neq_f16_e32 vcc, s4, v0
; GFX900-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX1010-LABEL: fcmp_select_no_fold_f16_different_const:
; GFX1010: ; %bb.0: ; %entry
; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1010-NEXT: v_cmp_neq_f16_e32 vcc_lo, 0x4248, v0
; GFX1010-NEXT: v_cndmask_b32_e32 v0, 0x4300, v1, vcc_lo
; GFX1010-NEXT: s_setpc_b64 s[30:31]
entry:
%cmp = fcmp oeq half %arg, 0xH4248
%sel = select i1 %cmp, half 0xH4300, half %other
ret half %sel
}
; Should NOT be folded: NaN values bypass optimization
define half @fcmp_select_no_fold_nan_f16(half %arg, half %other) {
; GFX900-LABEL: fcmp_select_no_fold_nan_f16:
; GFX900: ; %bb.0: ; %entry
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: v_mov_b32_e32 v0, v1
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX1010-LABEL: fcmp_select_no_fold_nan_f16:
; GFX1010: ; %bb.0: ; %entry
; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1010-NEXT: v_mov_b32_e32 v0, v1
; GFX1010-NEXT: s_setpc_b64 s[30:31]
entry:
%cmp = fcmp oeq half %arg, 0xH7e00
%sel = select i1 %cmp, half 0xH7e00, half %other
ret half %sel
}
; Should NOT be folded: f16 fcmp one with NaN constant
define half @fcmp_select_no_fold_nan_f16_one(half %arg, half %other) {
; GFX900-LABEL: fcmp_select_no_fold_nan_f16_one:
; GFX900: ; %bb.0: ; %entry
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: v_mov_b32_e32 v0, 0x7e00
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX1010-LABEL: fcmp_select_no_fold_nan_f16_one:
; GFX1010: ; %bb.0: ; %entry
; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1010-NEXT: v_mov_b32_e32 v0, 0x7e00
; GFX1010-NEXT: s_setpc_b64 s[30:31]
entry:
%cmp = fcmp one half %arg, 0xH7e00
%sel = select i1 %cmp, half %other, half 0xH7e00
ret half %sel
}
; Should NOT be folded: f16 fcmp one with +Inf constant
define half @fcmp_select_no_fold_posinf_f16_one(half %arg, half %other) {
; GFX900-LABEL: fcmp_select_no_fold_posinf_f16_one:
; GFX900: ; %bb.0: ; %entry
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: s_movk_i32 s4, 0x7c00
; GFX900-NEXT: v_mov_b32_e32 v2, 0x7c00
; GFX900-NEXT: v_cmp_lg_f16_e32 vcc, s4, v0
; GFX900-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX1010-LABEL: fcmp_select_no_fold_posinf_f16_one:
; GFX1010: ; %bb.0: ; %entry
; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1010-NEXT: v_cmp_lg_f16_e32 vcc_lo, 0x7c00, v0
; GFX1010-NEXT: v_cndmask_b32_e32 v0, 0x7c00, v1, vcc_lo
; GFX1010-NEXT: s_setpc_b64 s[30:31]
entry:
%cmp = fcmp one half %arg, 0xH7c00
%sel = select i1 %cmp, half %other, half 0xH7c00
ret half %sel
}
; Should NOT be folded: f16 fcmp one with -Inf constant
define half @fcmp_select_no_fold_neginf_f16_one(half %arg, half %other) {
; GFX900-LABEL: fcmp_select_no_fold_neginf_f16_one:
; GFX900: ; %bb.0: ; %entry
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: s_mov_b32 s4, 0xfc00
; GFX900-NEXT: v_mov_b32_e32 v2, 0xfc00
; GFX900-NEXT: v_cmp_lg_f16_e32 vcc, s4, v0
; GFX900-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX1010-LABEL: fcmp_select_no_fold_neginf_f16_one:
; GFX1010: ; %bb.0: ; %entry
; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1010-NEXT: v_cmp_lg_f16_e32 vcc_lo, 0xfc00, v0
; GFX1010-NEXT: v_cndmask_b32_e32 v0, 0xfc00, v1, vcc_lo
; GFX1010-NEXT: s_setpc_b64 s[30:31]
entry:
%cmp = fcmp one half %arg, 0xHfc00
%sel = select i1 %cmp, half %other, half 0xHfc00
ret half %sel
}
; Should NOT be folded: f16 fcmp oeq with zero constant
define half @fcmp_select_no_fold_oeq_f16_zero(half %arg, half %other) {
; GFX900-LABEL: fcmp_select_no_fold_oeq_f16_zero:
; GFX900: ; %bb.0: ; %entry
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: v_cmp_neq_f16_e32 vcc, 0, v0
; GFX900-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX1010-LABEL: fcmp_select_no_fold_oeq_f16_zero:
; GFX1010: ; %bb.0: ; %entry
; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1010-NEXT: v_cmp_neq_f16_e32 vcc_lo, 0, v0
; GFX1010-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc_lo
; GFX1010-NEXT: s_setpc_b64 s[30:31]
entry:
%cmp = fcmp oeq half %arg, 0xH0000
%sel = select i1 %cmp, half 0xH0000, half %other
ret half %sel
}
; Should NOT be folded: f16 fcmp one with negative zero constant
define half @fcmp_select_no_fold_one_f16_negzero(half %arg, half %other) {
; GFX900-LABEL: fcmp_select_no_fold_one_f16_negzero:
; GFX900: ; %bb.0: ; %entry
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: s_mov_b32 s4, 0x8000
; GFX900-NEXT: v_mov_b32_e32 v2, 0x8000
; GFX900-NEXT: v_cmp_lg_f16_e32 vcc, s4, v0
; GFX900-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX1010-LABEL: fcmp_select_no_fold_one_f16_negzero:
; GFX1010: ; %bb.0: ; %entry
; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1010-NEXT: v_cmp_lg_f16_e32 vcc_lo, 0x8000, v0
; GFX1010-NEXT: v_cndmask_b32_e32 v0, 0x8000, v1, vcc_lo
; GFX1010-NEXT: s_setpc_b64 s[30:31]
entry:
%cmp = fcmp one half 0xH8000, %arg
%sel = select i1 %cmp, half %other, half 0xH8000
ret half %sel
}
; Should NOT be folded: f16 fcmp oeq with constant in other position
define half @fcmp_select_no_fold_f16_other_pos(half %arg, half %other) {
; GFX900-LABEL: fcmp_select_no_fold_f16_other_pos:
; GFX900: ; %bb.0: ; %entry
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: s_movk_i32 s4, 0x4248
; GFX900-NEXT: v_mov_b32_e32 v2, 0x4248
; GFX900-NEXT: v_cmp_eq_f16_e32 vcc, s4, v0
; GFX900-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX1010-LABEL: fcmp_select_no_fold_f16_other_pos:
; GFX1010: ; %bb.0: ; %entry
; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1010-NEXT: v_cmp_eq_f16_e32 vcc_lo, 0x4248, v0
; GFX1010-NEXT: v_cndmask_b32_e32 v0, 0x4248, v1, vcc_lo
; GFX1010-NEXT: s_setpc_b64 s[30:31]
entry:
%cmp = fcmp oeq half %arg, 0xH4248
%sel = select i1 %cmp, half %other, half 0xH4248
ret half %sel
}
; Should NOT be folded: f16 unsupported comparison type
define half @fcmp_select_no_fold_f16_unsupported_cmp(half %arg, half %other) {
; GFX900-LABEL: fcmp_select_no_fold_f16_unsupported_cmp:
; GFX900: ; %bb.0: ; %entry
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: s_movk_i32 s4, 0x4248
; GFX900-NEXT: v_mov_b32_e32 v2, 0x4248
; GFX900-NEXT: v_cmp_gt_f16_e32 vcc, s4, v0
; GFX900-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX1010-LABEL: fcmp_select_no_fold_f16_unsupported_cmp:
; GFX1010: ; %bb.0: ; %entry
; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1010-NEXT: v_cmp_gt_f16_e32 vcc_lo, 0x4248, v0
; GFX1010-NEXT: v_cndmask_b32_e32 v0, 0x4248, v1, vcc_lo
; GFX1010-NEXT: s_setpc_b64 s[30:31]
entry:
%cmp = fcmp olt half %arg, 0xH4248
%sel = select i1 %cmp, half %other, half 0xH4248
ret half %sel
}
;------------------------------------------------------------------------------
; BF16 Tests
;------------------------------------------------------------------------------
; Should be folded: bfloat fcmp oeq + select with constant in true value
define bfloat @fcmp_select_fold_oeq_bf16_imm(bfloat %arg, bfloat %other) {
; GFX900-LABEL: fcmp_select_fold_oeq_bf16_imm:
; GFX900: ; %bb.0: ; %entry
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: v_lshlrev_b32_e32 v2, 16, v0
; GFX900-NEXT: s_mov_b32 s4, 0x42480000
; GFX900-NEXT: v_cmp_eq_f32_e32 vcc, s4, v2
; GFX900-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX1010-LABEL: fcmp_select_fold_oeq_bf16_imm:
; GFX1010: ; %bb.0: ; %entry
; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1010-NEXT: v_lshlrev_b32_e32 v2, 16, v0
; GFX1010-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0x42480000, v2
; GFX1010-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc_lo
; GFX1010-NEXT: s_setpc_b64 s[30:31]
entry:
%cmp = fcmp oeq bfloat %arg, 0xR4248
%sel = select i1 %cmp, bfloat 0xR4248, bfloat %other
ret bfloat %sel
}
; Should be folded: bfloat fcmp oeq + select with constant in true value (commutative)
define bfloat @fcmp_select_fold_oeq_imm_bf16(bfloat %arg, bfloat %other) {
; GFX900-LABEL: fcmp_select_fold_oeq_imm_bf16:
; GFX900: ; %bb.0: ; %entry
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: v_lshlrev_b32_e32 v2, 16, v0
; GFX900-NEXT: s_mov_b32 s4, 0x42480000
; GFX900-NEXT: v_cmp_eq_f32_e32 vcc, s4, v2
; GFX900-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX1010-LABEL: fcmp_select_fold_oeq_imm_bf16:
; GFX1010: ; %bb.0: ; %entry
; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1010-NEXT: v_lshlrev_b32_e32 v2, 16, v0
; GFX1010-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0x42480000, v2
; GFX1010-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc_lo
; GFX1010-NEXT: s_setpc_b64 s[30:31]
entry:
%cmp = fcmp oeq bfloat 0xR4248, %arg
%sel = select i1 %cmp, bfloat 0xR4248, bfloat %other
ret bfloat %sel
}
; Should be folded: bfloat fcmp one + select with constant in false value
define bfloat @fcmp_select_fold_one_bf16_imm(bfloat %arg, bfloat %other) {
; GFX900-LABEL: fcmp_select_fold_one_bf16_imm:
; GFX900: ; %bb.0: ; %entry
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: v_lshlrev_b32_e32 v2, 16, v0
; GFX900-NEXT: s_mov_b32 s4, 0x40200000
; GFX900-NEXT: v_cmp_lg_f32_e32 vcc, s4, v2
; GFX900-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX1010-LABEL: fcmp_select_fold_one_bf16_imm:
; GFX1010: ; %bb.0: ; %entry
; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1010-NEXT: v_lshlrev_b32_e32 v2, 16, v0
; GFX1010-NEXT: v_cmp_lg_f32_e32 vcc_lo, 0x40200000, v2
; GFX1010-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo
; GFX1010-NEXT: s_setpc_b64 s[30:31]
entry:
%cmp = fcmp one bfloat %arg, 0xR4020
%sel = select i1 %cmp, bfloat %other, bfloat 0xR4020
ret bfloat %sel
}
; Should be folded: bfloat fcmp one + select with constant in false value (commutative)
define bfloat @fcmp_select_fold_one_imm_bf16(bfloat %arg, bfloat %other) {
; GFX900-LABEL: fcmp_select_fold_one_imm_bf16:
; GFX900: ; %bb.0: ; %entry
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: v_lshlrev_b32_e32 v2, 16, v0
; GFX900-NEXT: s_mov_b32 s4, 0x40200000
; GFX900-NEXT: v_cmp_lg_f32_e32 vcc, s4, v2
; GFX900-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX1010-LABEL: fcmp_select_fold_one_imm_bf16:
; GFX1010: ; %bb.0: ; %entry
; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1010-NEXT: v_lshlrev_b32_e32 v2, 16, v0
; GFX1010-NEXT: v_cmp_lg_f32_e32 vcc_lo, 0x40200000, v2
; GFX1010-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo
; GFX1010-NEXT: s_setpc_b64 s[30:31]
entry:
%cmp = fcmp one bfloat 0xR4020, %arg
%sel = select i1 %cmp, bfloat %other, bfloat 0xR4020
ret bfloat %sel
}
; Should NOT be folded: different constants
define bfloat @fcmp_select_no_fold_bf16_different_const(bfloat %arg, bfloat %other) {
; GFX900-LABEL: fcmp_select_no_fold_bf16_different_const:
; GFX900: ; %bb.0: ; %entry
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: v_lshlrev_b32_e32 v0, 16, v0
; GFX900-NEXT: s_mov_b32 s4, 0x42480000
; GFX900-NEXT: v_mov_b32_e32 v2, 0x4300
; GFX900-NEXT: v_cmp_neq_f32_e32 vcc, s4, v0
; GFX900-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX1010-LABEL: fcmp_select_no_fold_bf16_different_const:
; GFX1010: ; %bb.0: ; %entry
; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1010-NEXT: v_lshlrev_b32_e32 v0, 16, v0
; GFX1010-NEXT: v_cmp_neq_f32_e32 vcc_lo, 0x42480000, v0
; GFX1010-NEXT: v_cndmask_b32_e32 v0, 0x4300, v1, vcc_lo
; GFX1010-NEXT: s_setpc_b64 s[30:31]
entry:
%cmp = fcmp oeq bfloat %arg, 0xR4248
%sel = select i1 %cmp, bfloat 0xR4300, bfloat %other
ret bfloat %sel
}
; Should NOT be folded: NaN values bypass optimization
define bfloat @fcmp_select_no_fold_nan_bf16(bfloat %arg, bfloat %other) {
; GFX900-LABEL: fcmp_select_no_fold_nan_bf16:
; GFX900: ; %bb.0: ; %entry
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: v_mov_b32_e32 v0, v1
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX1010-LABEL: fcmp_select_no_fold_nan_bf16:
; GFX1010: ; %bb.0: ; %entry
; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1010-NEXT: v_mov_b32_e32 v0, v1
; GFX1010-NEXT: s_setpc_b64 s[30:31]
entry:
%cmp = fcmp oeq bfloat %arg, 0xR7FC0
%sel = select i1 %cmp, bfloat 0xR7FC0, bfloat %other
ret bfloat %sel
}
; Should NOT be folded: bfloat fcmp one with NaN constant
define bfloat @fcmp_select_no_fold_nan_bf16_one(bfloat %arg, bfloat %other) {
; GFX900-LABEL: fcmp_select_no_fold_nan_bf16_one:
; GFX900: ; %bb.0: ; %entry
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: v_mov_b32_e32 v0, 0x7fc0
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX1010-LABEL: fcmp_select_no_fold_nan_bf16_one:
; GFX1010: ; %bb.0: ; %entry
; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1010-NEXT: v_mov_b32_e32 v0, 0x7fc0
; GFX1010-NEXT: s_setpc_b64 s[30:31]
entry:
%cmp = fcmp one bfloat %arg, 0xR7FC0
%sel = select i1 %cmp, bfloat %other, bfloat 0xR7FC0
ret bfloat %sel
}
; Should NOT be folded: bfloat fcmp one with +Inf constant
define bfloat @fcmp_select_no_fold_posinf_bf16_one(bfloat %arg, bfloat %other) {
; GFX900-LABEL: fcmp_select_no_fold_posinf_bf16_one:
; GFX900: ; %bb.0: ; %entry
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: v_lshlrev_b32_e32 v0, 16, v0
; GFX900-NEXT: s_mov_b32 s4, 0x7f800000
; GFX900-NEXT: v_mov_b32_e32 v2, 0x7f80
; GFX900-NEXT: v_cmp_lg_f32_e32 vcc, s4, v0
; GFX900-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX1010-LABEL: fcmp_select_no_fold_posinf_bf16_one:
; GFX1010: ; %bb.0: ; %entry
; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1010-NEXT: v_lshlrev_b32_e32 v0, 16, v0
; GFX1010-NEXT: v_cmp_lg_f32_e32 vcc_lo, 0x7f800000, v0
; GFX1010-NEXT: v_cndmask_b32_e32 v0, 0x7f80, v1, vcc_lo
; GFX1010-NEXT: s_setpc_b64 s[30:31]
entry:
%cmp = fcmp one bfloat %arg, 0xR7F80
%sel = select i1 %cmp, bfloat %other, bfloat 0xR7F80
ret bfloat %sel
}
; Should NOT be folded: bfloat fcmp one with -Inf constant
define bfloat @fcmp_select_no_fold_neginf_bf16_one(bfloat %arg, bfloat %other) {
; GFX900-LABEL: fcmp_select_no_fold_neginf_bf16_one:
; GFX900: ; %bb.0: ; %entry
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: v_lshlrev_b32_e32 v0, 16, v0
; GFX900-NEXT: s_mov_b32 s4, 0xff800000
; GFX900-NEXT: v_mov_b32_e32 v2, 0xffffff80
; GFX900-NEXT: v_cmp_lg_f32_e32 vcc, s4, v0
; GFX900-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX1010-LABEL: fcmp_select_no_fold_neginf_bf16_one:
; GFX1010: ; %bb.0: ; %entry
; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1010-NEXT: v_lshlrev_b32_e32 v0, 16, v0
; GFX1010-NEXT: v_cmp_lg_f32_e32 vcc_lo, 0xff800000, v0
; GFX1010-NEXT: v_cndmask_b32_e32 v0, 0xffffff80, v1, vcc_lo
; GFX1010-NEXT: s_setpc_b64 s[30:31]
entry:
%cmp = fcmp one bfloat %arg, 0xRFF80
%sel = select i1 %cmp, bfloat %other, bfloat 0xRFF80
ret bfloat %sel
}
; Should NOT be folded: bfloat fcmp oeq with zero constant
define bfloat @fcmp_select_no_fold_oeq_bf16_zero(bfloat %arg, bfloat %other) {
; GFX900-LABEL: fcmp_select_no_fold_oeq_bf16_zero:
; GFX900: ; %bb.0: ; %entry
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: v_lshlrev_b32_e32 v0, 16, v0
; GFX900-NEXT: v_cmp_neq_f32_e32 vcc, 0, v0
; GFX900-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX1010-LABEL: fcmp_select_no_fold_oeq_bf16_zero:
; GFX1010: ; %bb.0: ; %entry
; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1010-NEXT: v_lshlrev_b32_e32 v0, 16, v0
; GFX1010-NEXT: v_cmp_neq_f32_e32 vcc_lo, 0, v0
; GFX1010-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc_lo
; GFX1010-NEXT: s_setpc_b64 s[30:31]
entry:
%cmp = fcmp oeq bfloat %arg, 0xR0000
%sel = select i1 %cmp, bfloat 0xR0000, bfloat %other
ret bfloat %sel
}
; Should NOT be folded: bfloat fcmp one with negative zero constant
define bfloat @fcmp_select_no_fold_one_bf16_negzero(bfloat %arg, bfloat %other) {
; GFX900-LABEL: fcmp_select_no_fold_one_bf16_negzero:
; GFX900: ; %bb.0: ; %entry
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: v_lshlrev_b32_e32 v0, 16, v0
; GFX900-NEXT: s_brev_b32 s4, 1
; GFX900-NEXT: v_mov_b32_e32 v2, 0xffff8000
; GFX900-NEXT: v_cmp_lg_f32_e32 vcc, s4, v0
; GFX900-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX1010-LABEL: fcmp_select_no_fold_one_bf16_negzero:
; GFX1010: ; %bb.0: ; %entry
; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1010-NEXT: v_lshlrev_b32_e32 v0, 16, v0
; GFX1010-NEXT: v_cmp_lg_f32_e32 vcc_lo, 0x80000000, v0
; GFX1010-NEXT: v_cndmask_b32_e32 v0, 0xffff8000, v1, vcc_lo
; GFX1010-NEXT: s_setpc_b64 s[30:31]
entry:
%cmp = fcmp one bfloat 0xR8000, %arg
%sel = select i1 %cmp, bfloat %other, bfloat 0xR8000
ret bfloat %sel
}
; Should NOT be folded: bfloat fcmp oeq with constant in other position
define bfloat @fcmp_select_no_fold_bf16_other_pos(bfloat %arg, bfloat %other) {
; GFX900-LABEL: fcmp_select_no_fold_bf16_other_pos:
; GFX900: ; %bb.0: ; %entry
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: v_lshlrev_b32_e32 v0, 16, v0
; GFX900-NEXT: s_mov_b32 s4, 0x42480000
; GFX900-NEXT: v_mov_b32_e32 v2, 0x4248
; GFX900-NEXT: v_cmp_eq_f32_e32 vcc, s4, v0
; GFX900-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX1010-LABEL: fcmp_select_no_fold_bf16_other_pos:
; GFX1010: ; %bb.0: ; %entry
; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1010-NEXT: v_lshlrev_b32_e32 v0, 16, v0
; GFX1010-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0x42480000, v0
; GFX1010-NEXT: v_cndmask_b32_e32 v0, 0x4248, v1, vcc_lo
; GFX1010-NEXT: s_setpc_b64 s[30:31]
entry:
%cmp = fcmp oeq bfloat %arg, 0xR4248
%sel = select i1 %cmp, bfloat %other, bfloat 0xR4248
ret bfloat %sel
}
; Should NOT be folded: bfloat unsupported comparison type
define bfloat @fcmp_select_no_fold_bf16_unsupported_cmp(bfloat %arg, bfloat %other) {
; GFX900-LABEL: fcmp_select_no_fold_bf16_unsupported_cmp:
; GFX900: ; %bb.0: ; %entry
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: v_lshlrev_b32_e32 v0, 16, v0
; GFX900-NEXT: s_mov_b32 s4, 0x42480000
; GFX900-NEXT: v_mov_b32_e32 v2, 0x4248
; GFX900-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
; GFX900-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX1010-LABEL: fcmp_select_no_fold_bf16_unsupported_cmp:
; GFX1010: ; %bb.0: ; %entry
; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1010-NEXT: v_lshlrev_b32_e32 v0, 16, v0
; GFX1010-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x42480000, v0
; GFX1010-NEXT: v_cndmask_b32_e32 v0, 0x4248, v1, vcc_lo
; GFX1010-NEXT: s_setpc_b64 s[30:31]
entry:
%cmp = fcmp olt bfloat %arg, 0xR4248
%sel = select i1 %cmp, bfloat %other, bfloat 0xR4248
ret bfloat %sel
}