[DAG] isKnownNeverNaN - add DemandedElts element mask to isKnownNeverNaN calls (#135952)
Matches what we've done for computeKnownBits etc. to improve vector handling
diff --git a/llvm/include/llvm/CodeGen/SelectionDAG.h b/llvm/include/llvm/CodeGen/SelectionDAG.h
index 6342346..2ab6b4d 100644
--- a/llvm/include/llvm/CodeGen/SelectionDAG.h
+++ b/llvm/include/llvm/CodeGen/SelectionDAG.h
@@ -2143,10 +2143,24 @@
bool isBaseWithConstantOffset(SDValue Op) const;
/// Test whether the given SDValue (or all elements of it, if it is a
+ /// vector) is known to never be NaN in \p DemandedElts. If \p SNaN is true,
+ /// returns if \p Op is known to never be a signaling NaN (it may still be a
+ /// qNaN).
+ bool isKnownNeverNaN(SDValue Op, const APInt &DemandedElts, bool SNaN = false,
+ unsigned Depth = 0) const;
+
+ /// Test whether the given SDValue (or all elements of it, if it is a
/// vector) is known to never be NaN. If \p SNaN is true, returns if \p Op is
/// known to never be a signaling NaN (it may still be a qNaN).
bool isKnownNeverNaN(SDValue Op, bool SNaN = false, unsigned Depth = 0) const;
+ /// \returns true if \p Op is known to never be a signaling NaN in \p
+ /// DemandedElts.
+ bool isKnownNeverSNaN(SDValue Op, const APInt &DemandedElts,
+ unsigned Depth = 0) const {
+ return isKnownNeverNaN(Op, DemandedElts, true, Depth);
+ }
+
/// \returns true if \p Op is known to never be a signaling NaN.
bool isKnownNeverSNaN(SDValue Op, unsigned Depth = 0) const {
return isKnownNeverNaN(Op, true, Depth);
diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h
index 0a36975..00c3626 100644
--- a/llvm/include/llvm/CodeGen/TargetLowering.h
+++ b/llvm/include/llvm/CodeGen/TargetLowering.h
@@ -4283,6 +4283,7 @@
/// NaN. If \p sNaN is true, returns if \p Op is known to never be a signaling
/// NaN.
virtual bool isKnownNeverNaNForTargetNode(SDValue Op,
+ const APInt &DemandedElts,
const SelectionDAG &DAG,
bool SNaN = false,
unsigned Depth = 0) const;
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index b45369e..f502a53 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -5601,7 +5601,24 @@
(Op.getOpcode() == ISD::ADD || isADDLike(Op));
}
-bool SelectionDAG::isKnownNeverNaN(SDValue Op, bool SNaN, unsigned Depth) const {
+bool SelectionDAG::isKnownNeverNaN(SDValue Op, bool SNaN,
+ unsigned Depth) const {
+ EVT VT = Op.getValueType();
+
+ // Since the number of lanes in a scalable vector is unknown at compile time,
+ // we track one bit which is implicitly broadcast to all lanes. This means
+ // that all lanes in a scalable vector are considered demanded.
+ APInt DemandedElts = VT.isFixedLengthVector()
+ ? APInt::getAllOnes(VT.getVectorNumElements())
+ : APInt(1, 1);
+
+ return isKnownNeverNaN(Op, DemandedElts, SNaN, Depth);
+}
+
+bool SelectionDAG::isKnownNeverNaN(SDValue Op, const APInt &DemandedElts,
+ bool SNaN, unsigned Depth) const {
+ assert(!DemandedElts.isZero() && "No demanded elements");
+
// If we're told that NaNs won't happen, assume they won't.
if (getTarget().Options.NoNaNsFPMath || Op->getFlags().hasNoNaNs())
return true;
@@ -5657,21 +5674,21 @@
case ISD::FLDEXP: {
if (SNaN)
return true;
- return isKnownNeverNaN(Op.getOperand(0), SNaN, Depth + 1);
+ return isKnownNeverNaN(Op.getOperand(0), DemandedElts, SNaN, Depth + 1);
}
case ISD::FABS:
case ISD::FNEG:
case ISD::FCOPYSIGN: {
- return isKnownNeverNaN(Op.getOperand(0), SNaN, Depth + 1);
+ return isKnownNeverNaN(Op.getOperand(0), DemandedElts, SNaN, Depth + 1);
}
case ISD::SELECT:
- return isKnownNeverNaN(Op.getOperand(1), SNaN, Depth + 1) &&
- isKnownNeverNaN(Op.getOperand(2), SNaN, Depth + 1);
+ return isKnownNeverNaN(Op.getOperand(1), DemandedElts, SNaN, Depth + 1) &&
+ isKnownNeverNaN(Op.getOperand(2), DemandedElts, SNaN, Depth + 1);
case ISD::FP_EXTEND:
case ISD::FP_ROUND: {
if (SNaN)
return true;
- return isKnownNeverNaN(Op.getOperand(0), SNaN, Depth + 1);
+ return isKnownNeverNaN(Op.getOperand(0), DemandedElts, SNaN, Depth + 1);
}
case ISD::SINT_TO_FP:
case ISD::UINT_TO_FP:
@@ -5693,8 +5710,8 @@
case ISD::FMAXIMUMNUM: {
// Only one needs to be known not-nan, since it will be returned if the
// other ends up being one.
- return isKnownNeverNaN(Op.getOperand(0), SNaN, Depth + 1) ||
- isKnownNeverNaN(Op.getOperand(1), SNaN, Depth + 1);
+ return isKnownNeverNaN(Op.getOperand(0), DemandedElts, SNaN, Depth + 1) ||
+ isKnownNeverNaN(Op.getOperand(1), DemandedElts, SNaN, Depth + 1);
}
case ISD::FMINNUM_IEEE:
case ISD::FMAXNUM_IEEE: {
@@ -5702,33 +5719,52 @@
return true;
// This can return a NaN if either operand is an sNaN, or if both operands
// are NaN.
- return (isKnownNeverNaN(Op.getOperand(0), false, Depth + 1) &&
- isKnownNeverSNaN(Op.getOperand(1), Depth + 1)) ||
- (isKnownNeverNaN(Op.getOperand(1), false, Depth + 1) &&
- isKnownNeverSNaN(Op.getOperand(0), Depth + 1));
+ return (isKnownNeverNaN(Op.getOperand(0), DemandedElts, false, Depth + 1) &&
+ isKnownNeverSNaN(Op.getOperand(1), DemandedElts, Depth + 1)) ||
+ (isKnownNeverNaN(Op.getOperand(1), DemandedElts, false, Depth + 1) &&
+ isKnownNeverSNaN(Op.getOperand(0), DemandedElts, Depth + 1));
}
case ISD::FMINIMUM:
case ISD::FMAXIMUM: {
// TODO: Does this quiet or return the origina NaN as-is?
- return isKnownNeverNaN(Op.getOperand(0), SNaN, Depth + 1) &&
- isKnownNeverNaN(Op.getOperand(1), SNaN, Depth + 1);
+ return isKnownNeverNaN(Op.getOperand(0), DemandedElts, SNaN, Depth + 1) &&
+ isKnownNeverNaN(Op.getOperand(1), DemandedElts, SNaN, Depth + 1);
}
- case ISD::EXTRACT_VECTOR_ELT:
+ case ISD::EXTRACT_VECTOR_ELT: {
+ SDValue Src = Op.getOperand(0);
+ auto *Idx = dyn_cast<ConstantSDNode>(Op.getOperand(1));
+ EVT SrcVT = Src.getValueType();
+ if (SrcVT.isFixedLengthVector() && Idx &&
+ Idx->getAPIntValue().ult(SrcVT.getVectorNumElements())) {
+ APInt DemandedSrcElts = APInt::getOneBitSet(SrcVT.getVectorNumElements(),
+ Idx->getZExtValue());
+ return isKnownNeverNaN(Src, DemandedSrcElts, SNaN, Depth + 1);
+ }
+ return isKnownNeverNaN(Src, SNaN, Depth + 1);
+ }
case ISD::EXTRACT_SUBVECTOR: {
- return isKnownNeverNaN(Op.getOperand(0), SNaN, Depth + 1);
+ SDValue Src = Op.getOperand(0);
+ if (Src.getValueType().isFixedLengthVector()) {
+ unsigned Idx = Op.getConstantOperandVal(1);
+ unsigned NumSrcElts = Src.getValueType().getVectorNumElements();
+ APInt DemandedSrcElts = DemandedElts.zext(NumSrcElts).shl(Idx);
+ return isKnownNeverNaN(Src, DemandedSrcElts, SNaN, Depth + 1);
+ }
+ return isKnownNeverNaN(Src, SNaN, Depth + 1);
}
case ISD::BUILD_VECTOR: {
- for (const SDValue &Opnd : Op->ops())
- if (!isKnownNeverNaN(Opnd, SNaN, Depth + 1))
+ unsigned NumElts = Op.getNumOperands();
+ for (unsigned I = 0; I != NumElts; ++I)
+ if (DemandedElts[I] &&
+ !isKnownNeverNaN(Op.getOperand(I), SNaN, Depth + 1))
return false;
return true;
}
default:
- if (Opcode >= ISD::BUILTIN_OP_END ||
- Opcode == ISD::INTRINSIC_WO_CHAIN ||
- Opcode == ISD::INTRINSIC_W_CHAIN ||
- Opcode == ISD::INTRINSIC_VOID) {
- return TLI->isKnownNeverNaNForTargetNode(Op, *this, SNaN, Depth);
+ if (Opcode >= ISD::BUILTIN_OP_END || Opcode == ISD::INTRINSIC_WO_CHAIN ||
+ Opcode == ISD::INTRINSIC_W_CHAIN || Opcode == ISD::INTRINSIC_VOID) {
+ return TLI->isKnownNeverNaNForTargetNode(Op, DemandedElts, *this, SNaN,
+ Depth);
}
return false;
diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index 5308593..3995216 100644
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -3928,6 +3928,7 @@
}
bool TargetLowering::isKnownNeverNaNForTargetNode(SDValue Op,
+ const APInt &DemandedElts,
const SelectionDAG &DAG,
bool SNaN,
unsigned Depth) const {
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
index 533ad34..2846405 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
@@ -5971,10 +5971,9 @@
}
}
-bool AMDGPUTargetLowering::isKnownNeverNaNForTargetNode(SDValue Op,
- const SelectionDAG &DAG,
- bool SNaN,
- unsigned Depth) const {
+bool AMDGPUTargetLowering::isKnownNeverNaNForTargetNode(
+ SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, bool SNaN,
+ unsigned Depth) const {
unsigned Opcode = Op.getOpcode();
switch (Opcode) {
case AMDGPUISD::FMIN_LEGACY:
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h
index 6705f86..fa9d61e 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h
@@ -321,9 +321,8 @@
const MachineRegisterInfo &MRI,
unsigned Depth = 0) const override;
- bool isKnownNeverNaNForTargetNode(SDValue Op,
- const SelectionDAG &DAG,
- bool SNaN = false,
+ bool isKnownNeverNaNForTargetNode(SDValue Op, const APInt &DemandedElts,
+ const SelectionDAG &DAG, bool SNaN = false,
unsigned Depth = 0) const override;
bool isReassocProfitable(MachineRegisterInfo &MRI, Register N0,
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index 67f2cf2..724a450 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -16684,6 +16684,7 @@
}
bool SITargetLowering::isKnownNeverNaNForTargetNode(SDValue Op,
+ const APInt &DemandedElts,
const SelectionDAG &DAG,
bool SNaN,
unsigned Depth) const {
@@ -16696,8 +16697,8 @@
return DAG.isKnownNeverNaN(Op.getOperand(0), SNaN, Depth + 1);
}
- return AMDGPUTargetLowering::isKnownNeverNaNForTargetNode(Op, DAG, SNaN,
- Depth);
+ return AMDGPUTargetLowering::isKnownNeverNaNForTargetNode(Op, DemandedElts,
+ DAG, SNaN, Depth);
}
// On older subtargets, global FP atomic instructions have a hardcoded FP mode
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.h b/llvm/lib/Target/AMDGPU/SIISelLowering.h
index dc06343..c42366a 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.h
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.h
@@ -546,9 +546,8 @@
bool isProfitableToHoist(Instruction *I) const override;
- bool isKnownNeverNaNForTargetNode(SDValue Op,
- const SelectionDAG &DAG,
- bool SNaN = false,
+ bool isKnownNeverNaNForTargetNode(SDValue Op, const APInt &DemandedElts,
+ const SelectionDAG &DAG, bool SNaN = false,
unsigned Depth = 0) const override;
AtomicExpansionKind shouldExpandAtomicRMWInIR(AtomicRMWInst *) const override;
AtomicExpansionKind shouldExpandAtomicLoadInIR(LoadInst *LI) const override;
diff --git a/llvm/test/CodeGen/AMDGPU/mad-mix-lo.ll b/llvm/test/CodeGen/AMDGPU/mad-mix-lo.ll
index beac41e..ef325da 100644
--- a/llvm/test/CodeGen/AMDGPU/mad-mix-lo.ll
+++ b/llvm/test/CodeGen/AMDGPU/mad-mix-lo.ll
@@ -1057,54 +1057,53 @@
; SDAG-GFX1100-TRUE16-LABEL: v_mad_mix_v3f32_clamp_postcvt:
; SDAG-GFX1100-TRUE16: ; %bb.0:
; SDAG-GFX1100-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SDAG-GFX1100-TRUE16-NEXT: v_fma_mixlo_f16 v6, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1]
; SDAG-GFX1100-TRUE16-NEXT: v_fma_mixlo_f16 v1, v1, v3, v5 op_sel_hi:[1,1,1]
; SDAG-GFX1100-TRUE16-NEXT: v_mov_b16_e32 v3.l, v0.l
; SDAG-GFX1100-TRUE16-NEXT: v_mov_b16_e32 v5.l, v2.l
-; SDAG-GFX1100-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
-; SDAG-GFX1100-TRUE16-NEXT: v_mov_b16_e32 v2.h, v6.l
-; SDAG-GFX1100-TRUE16-NEXT: v_mov_b16_e32 v0.l, v1.l
+; SDAG-GFX1100-TRUE16-NEXT: v_mov_b16_e32 v6.l, v4.l
+; SDAG-GFX1100-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_2)
+; SDAG-GFX1100-TRUE16-NEXT: v_pack_b32_f16 v1, v1.l, 0
+; SDAG-GFX1100-TRUE16-NEXT: v_fma_mixlo_f16 v3, v3, v5, v6 op_sel_hi:[1,1,1] clamp
; SDAG-GFX1100-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; SDAG-GFX1100-TRUE16-NEXT: v_fma_mixlo_f16 v2, v3, v5, v4 op_sel_hi:[1,1,1]
-; SDAG-GFX1100-TRUE16-NEXT: v_pack_b32_f16 v1, v0.l, 0
-; SDAG-GFX1100-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; SDAG-GFX1100-TRUE16-NEXT: v_pk_max_f16 v0, v2, v2 clamp
; SDAG-GFX1100-TRUE16-NEXT: v_pk_max_f16 v1, v1, v1 clamp
+; SDAG-GFX1100-TRUE16-NEXT: v_fma_mixhi_f16 v3, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
+; SDAG-GFX1100-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; SDAG-GFX1100-TRUE16-NEXT: v_mov_b32_e32 v0, v3
; SDAG-GFX1100-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; SDAG-GFX1100-FAKE16-LABEL: v_mad_mix_v3f32_clamp_postcvt:
; SDAG-GFX1100-FAKE16: ; %bb.0:
; SDAG-GFX1100-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SDAG-GFX1100-FAKE16-NEXT: v_fma_mixlo_f16 v6, v0, v2, v4 op_sel_hi:[1,1,1]
; SDAG-GFX1100-FAKE16-NEXT: v_fma_mixlo_f16 v1, v1, v3, v5 op_sel_hi:[1,1,1]
+; SDAG-GFX1100-FAKE16-NEXT: v_fma_mixlo_f16 v3, v0, v2, v4 op_sel_hi:[1,1,1] clamp
; SDAG-GFX1100-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; SDAG-GFX1100-FAKE16-NEXT: v_fma_mixhi_f16 v6, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1]
; SDAG-GFX1100-FAKE16-NEXT: v_pack_b32_f16 v1, v1, 0
+; SDAG-GFX1100-FAKE16-NEXT: v_fma_mixhi_f16 v3, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
; SDAG-GFX1100-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; SDAG-GFX1100-FAKE16-NEXT: v_pk_max_f16 v0, v6, v6 clamp
; SDAG-GFX1100-FAKE16-NEXT: v_pk_max_f16 v1, v1, v1 clamp
+; SDAG-GFX1100-FAKE16-NEXT: v_mov_b32_e32 v0, v3
; SDAG-GFX1100-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; SDAG-GFX900-LABEL: v_mad_mix_v3f32_clamp_postcvt:
; SDAG-GFX900: ; %bb.0:
; SDAG-GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SDAG-GFX900-NEXT: v_mad_mixlo_f16 v6, v0, v2, v4 op_sel_hi:[1,1,1]
; SDAG-GFX900-NEXT: v_mad_mixlo_f16 v1, v1, v3, v5 op_sel_hi:[1,1,1]
+; SDAG-GFX900-NEXT: v_mad_mixlo_f16 v3, v0, v2, v4 op_sel_hi:[1,1,1] clamp
; SDAG-GFX900-NEXT: v_pack_b32_f16 v1, v1, 0
-; SDAG-GFX900-NEXT: v_mad_mixhi_f16 v6, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1]
-; SDAG-GFX900-NEXT: v_pk_max_f16 v0, v6, v6 clamp
+; SDAG-GFX900-NEXT: v_mad_mixhi_f16 v3, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
; SDAG-GFX900-NEXT: v_pk_max_f16 v1, v1, v1 clamp
+; SDAG-GFX900-NEXT: v_mov_b32_e32 v0, v3
; SDAG-GFX900-NEXT: s_setpc_b64 s[30:31]
;
; SDAG-GFX906-LABEL: v_mad_mix_v3f32_clamp_postcvt:
; SDAG-GFX906: ; %bb.0:
; SDAG-GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SDAG-GFX906-NEXT: v_fma_mixlo_f16 v6, v0, v2, v4 op_sel_hi:[1,1,1]
; SDAG-GFX906-NEXT: v_fma_mixlo_f16 v1, v1, v3, v5 op_sel_hi:[1,1,1]
+; SDAG-GFX906-NEXT: v_fma_mixlo_f16 v3, v0, v2, v4 op_sel_hi:[1,1,1] clamp
; SDAG-GFX906-NEXT: v_pack_b32_f16 v1, v1, 0
-; SDAG-GFX906-NEXT: v_fma_mixhi_f16 v6, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1]
-; SDAG-GFX906-NEXT: v_pk_max_f16 v0, v6, v6 clamp
+; SDAG-GFX906-NEXT: v_fma_mixhi_f16 v3, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
; SDAG-GFX906-NEXT: v_pk_max_f16 v1, v1, v1 clamp
+; SDAG-GFX906-NEXT: v_mov_b32_e32 v0, v3
; SDAG-GFX906-NEXT: s_setpc_b64 s[30:31]
;
; SDAG-VI-LABEL: v_mad_mix_v3f32_clamp_postcvt: