blob: 500bdb401b957b1dec1921329bceb236538eb67e [file] [log] [blame]
//===-- TargetLowering.cpp - Implement the TargetLowering class -----------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This implements the TargetLowering class.
//
//===----------------------------------------------------------------------===//
#include "llvm/CodeGen/TargetLowering.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/CodeGen/CallingConvLower.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineJumpTableInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/GlobalVariable.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/KnownBits.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Target/TargetLoweringObjectFile.h"
#include "llvm/Target/TargetMachine.h"
#include <cctype>
using namespace llvm;
/// NOTE: The TargetMachine owns TLOF.
TargetLowering::TargetLowering(const TargetMachine &tm)
: TargetLoweringBase(tm) {}
const char *TargetLowering::getTargetNodeName(unsigned Opcode) const {
return nullptr;
}
bool TargetLowering::isPositionIndependent() const {
return getTargetMachine().isPositionIndependent();
}
/// Check whether a given call node is in tail position within its function. If
/// so, it sets Chain to the input chain of the tail call.
bool TargetLowering::isInTailCallPosition(SelectionDAG &DAG, SDNode *Node,
SDValue &Chain) const {
const Function &F = DAG.getMachineFunction().getFunction();
// First, check if tail calls have been disabled in this function.
if (F.getFnAttribute("disable-tail-calls").getValueAsString() == "true")
return false;
// Conservatively require the attributes of the call to match those of
// the return. Ignore NoAlias and NonNull because they don't affect the
// call sequence.
AttributeList CallerAttrs = F.getAttributes();
if (AttrBuilder(CallerAttrs, AttributeList::ReturnIndex)
.removeAttribute(Attribute::NoAlias)
.removeAttribute(Attribute::NonNull)
.hasAttributes())
return false;
// It's not safe to eliminate the sign / zero extension of the return value.
if (CallerAttrs.hasAttribute(AttributeList::ReturnIndex, Attribute::ZExt) ||
CallerAttrs.hasAttribute(AttributeList::ReturnIndex, Attribute::SExt))
return false;
// Check if the only use is a function return node.
return isUsedByReturnOnly(Node, Chain);
}
bool TargetLowering::parametersInCSRMatch(const MachineRegisterInfo &MRI,
const uint32_t *CallerPreservedMask,
const SmallVectorImpl<CCValAssign> &ArgLocs,
const SmallVectorImpl<SDValue> &OutVals) const {
for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) {
const CCValAssign &ArgLoc = ArgLocs[I];
if (!ArgLoc.isRegLoc())
continue;
MCRegister Reg = ArgLoc.getLocReg();
// Only look at callee saved registers.
if (MachineOperand::clobbersPhysReg(CallerPreservedMask, Reg))
continue;
// Check that we pass the value used for the caller.
// (We look for a CopyFromReg reading a virtual register that is used
// for the function live-in value of register Reg)
SDValue Value = OutVals[I];
if (Value->getOpcode() != ISD::CopyFromReg)
return false;
Register ArgReg = cast<RegisterSDNode>(Value->getOperand(1))->getReg();
if (MRI.getLiveInPhysReg(ArgReg) != Reg)
return false;
}
return true;
}
/// Set CallLoweringInfo attribute flags based on a call instruction
/// and called function attributes.
void TargetLoweringBase::ArgListEntry::setAttributes(const CallBase *Call,
unsigned ArgIdx) {
IsSExt = Call->paramHasAttr(ArgIdx, Attribute::SExt);
IsZExt = Call->paramHasAttr(ArgIdx, Attribute::ZExt);
IsInReg = Call->paramHasAttr(ArgIdx, Attribute::InReg);
IsSRet = Call->paramHasAttr(ArgIdx, Attribute::StructRet);
IsNest = Call->paramHasAttr(ArgIdx, Attribute::Nest);
IsByVal = Call->paramHasAttr(ArgIdx, Attribute::ByVal);
IsPreallocated = Call->paramHasAttr(ArgIdx, Attribute::Preallocated);
IsInAlloca = Call->paramHasAttr(ArgIdx, Attribute::InAlloca);
IsReturned = Call->paramHasAttr(ArgIdx, Attribute::Returned);
IsSwiftSelf = Call->paramHasAttr(ArgIdx, Attribute::SwiftSelf);
IsSwiftError = Call->paramHasAttr(ArgIdx, Attribute::SwiftError);
Alignment = Call->getParamAlign(ArgIdx);
ByValType = nullptr;
if (IsByVal)
ByValType = Call->getParamByValType(ArgIdx);
PreallocatedType = nullptr;
if (IsPreallocated)
PreallocatedType = Call->getParamPreallocatedType(ArgIdx);
}
/// Generate a libcall taking the given operands as arguments and returning a
/// result of type RetVT.
std::pair<SDValue, SDValue>
TargetLowering::makeLibCall(SelectionDAG &DAG, RTLIB::Libcall LC, EVT RetVT,
ArrayRef<SDValue> Ops,
MakeLibCallOptions CallOptions,
const SDLoc &dl,
SDValue InChain) const {
if (!InChain)
InChain = DAG.getEntryNode();
TargetLowering::ArgListTy Args;
Args.reserve(Ops.size());
TargetLowering::ArgListEntry Entry;
for (unsigned i = 0; i < Ops.size(); ++i) {
SDValue NewOp = Ops[i];
Entry.Node = NewOp;
Entry.Ty = Entry.Node.getValueType().getTypeForEVT(*DAG.getContext());
Entry.IsSExt = shouldSignExtendTypeInLibCall(NewOp.getValueType(),
CallOptions.IsSExt);
Entry.IsZExt = !Entry.IsSExt;
if (CallOptions.IsSoften &&
!shouldExtendTypeInLibCall(CallOptions.OpsVTBeforeSoften[i])) {
Entry.IsSExt = Entry.IsZExt = false;
}
Args.push_back(Entry);
}
if (LC == RTLIB::UNKNOWN_LIBCALL)
report_fatal_error("Unsupported library call operation!");
SDValue Callee = DAG.getExternalSymbol(getLibcallName(LC),
getPointerTy(DAG.getDataLayout()));
Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext());
TargetLowering::CallLoweringInfo CLI(DAG);
bool signExtend = shouldSignExtendTypeInLibCall(RetVT, CallOptions.IsSExt);
bool zeroExtend = !signExtend;
if (CallOptions.IsSoften &&
!shouldExtendTypeInLibCall(CallOptions.RetVTBeforeSoften)) {
signExtend = zeroExtend = false;
}
CLI.setDebugLoc(dl)
.setChain(InChain)
.setLibCallee(getLibcallCallingConv(LC), RetTy, Callee, std::move(Args))
.setNoReturn(CallOptions.DoesNotReturn)
.setDiscardResult(!CallOptions.IsReturnValueUsed)
.setIsPostTypeLegalization(CallOptions.IsPostTypeLegalization)
.setSExtResult(signExtend)
.setZExtResult(zeroExtend);
return LowerCallTo(CLI);
}
bool TargetLowering::findOptimalMemOpLowering(
std::vector<EVT> &MemOps, unsigned Limit, const MemOp &Op, unsigned DstAS,
unsigned SrcAS, const AttributeList &FuncAttributes) const {
if (Op.isMemcpyWithFixedDstAlign() && Op.getSrcAlign() < Op.getDstAlign())
return false;
EVT VT = getOptimalMemOpType(Op, FuncAttributes);
if (VT == MVT::Other) {
// Use the largest integer type whose alignment constraints are satisfied.
// We only need to check DstAlign here as SrcAlign is always greater or
// equal to DstAlign (or zero).
VT = MVT::i64;
if (Op.isFixedDstAlign())
while (Op.getDstAlign() < (VT.getSizeInBits() / 8) &&
!allowsMisalignedMemoryAccesses(VT, DstAS, Op.getDstAlign()))
VT = (MVT::SimpleValueType)(VT.getSimpleVT().SimpleTy - 1);
assert(VT.isInteger());
// Find the largest legal integer type.
MVT LVT = MVT::i64;
while (!isTypeLegal(LVT))
LVT = (MVT::SimpleValueType)(LVT.SimpleTy - 1);
assert(LVT.isInteger());
// If the type we've chosen is larger than the largest legal integer type
// then use that instead.
if (VT.bitsGT(LVT))
VT = LVT;
}
unsigned NumMemOps = 0;
uint64_t Size = Op.size();
while (Size) {
unsigned VTSize = VT.getSizeInBits() / 8;
while (VTSize > Size) {
// For now, only use non-vector load / store's for the left-over pieces.
EVT NewVT = VT;
unsigned NewVTSize;
bool Found = false;
if (VT.isVector() || VT.isFloatingPoint()) {
NewVT = (VT.getSizeInBits() > 64) ? MVT::i64 : MVT::i32;
if (isOperationLegalOrCustom(ISD::STORE, NewVT) &&
isSafeMemOpType(NewVT.getSimpleVT()))
Found = true;
else if (NewVT == MVT::i64 &&
isOperationLegalOrCustom(ISD::STORE, MVT::f64) &&
isSafeMemOpType(MVT::f64)) {
// i64 is usually not legal on 32-bit targets, but f64 may be.
NewVT = MVT::f64;
Found = true;
}
}
if (!Found) {
do {
NewVT = (MVT::SimpleValueType)(NewVT.getSimpleVT().SimpleTy - 1);
if (NewVT == MVT::i8)
break;
} while (!isSafeMemOpType(NewVT.getSimpleVT()));
}
NewVTSize = NewVT.getSizeInBits() / 8;
// If the new VT cannot cover all of the remaining bits, then consider
// issuing a (or a pair of) unaligned and overlapping load / store.
bool Fast;
if (NumMemOps && Op.allowOverlap() && NewVTSize < Size &&
allowsMisalignedMemoryAccesses(
VT, DstAS, Op.isFixedDstAlign() ? Op.getDstAlign() : Align(1),
MachineMemOperand::MONone, &Fast) &&
Fast)
VTSize = Size;
else {
VT = NewVT;
VTSize = NewVTSize;
}
}
if (++NumMemOps > Limit)
return false;
MemOps.push_back(VT);
Size -= VTSize;
}
return true;
}
/// Soften the operands of a comparison. This code is shared among BR_CC,
/// SELECT_CC, and SETCC handlers.
void TargetLowering::softenSetCCOperands(SelectionDAG &DAG, EVT VT,
SDValue &NewLHS, SDValue &NewRHS,
ISD::CondCode &CCCode,
const SDLoc &dl, const SDValue OldLHS,
const SDValue OldRHS) const {
SDValue Chain;
return softenSetCCOperands(DAG, VT, NewLHS, NewRHS, CCCode, dl, OldLHS,
OldRHS, Chain);
}
void TargetLowering::softenSetCCOperands(SelectionDAG &DAG, EVT VT,
SDValue &NewLHS, SDValue &NewRHS,
ISD::CondCode &CCCode,
const SDLoc &dl, const SDValue OldLHS,
const SDValue OldRHS,
SDValue &Chain,
bool IsSignaling) const {
// FIXME: Currently we cannot really respect all IEEE predicates due to libgcc
// not supporting it. We can update this code when libgcc provides such
// functions.
assert((VT == MVT::f32 || VT == MVT::f64 || VT == MVT::f128 || VT == MVT::ppcf128)
&& "Unsupported setcc type!");
// Expand into one or more soft-fp libcall(s).
RTLIB::Libcall LC1 = RTLIB::UNKNOWN_LIBCALL, LC2 = RTLIB::UNKNOWN_LIBCALL;
bool ShouldInvertCC = false;
switch (CCCode) {
case ISD::SETEQ:
case ISD::SETOEQ:
LC1 = (VT == MVT::f32) ? RTLIB::OEQ_F32 :
(VT == MVT::f64) ? RTLIB::OEQ_F64 :
(VT == MVT::f128) ? RTLIB::OEQ_F128 : RTLIB::OEQ_PPCF128;
break;
case ISD::SETNE:
case ISD::SETUNE:
LC1 = (VT == MVT::f32) ? RTLIB::UNE_F32 :
(VT == MVT::f64) ? RTLIB::UNE_F64 :
(VT == MVT::f128) ? RTLIB::UNE_F128 : RTLIB::UNE_PPCF128;
break;
case ISD::SETGE:
case ISD::SETOGE:
LC1 = (VT == MVT::f32) ? RTLIB::OGE_F32 :
(VT == MVT::f64) ? RTLIB::OGE_F64 :
(VT == MVT::f128) ? RTLIB::OGE_F128 : RTLIB::OGE_PPCF128;
break;
case ISD::SETLT:
case ISD::SETOLT:
LC1 = (VT == MVT::f32) ? RTLIB::OLT_F32 :
(VT == MVT::f64) ? RTLIB::OLT_F64 :
(VT == MVT::f128) ? RTLIB::OLT_F128 : RTLIB::OLT_PPCF128;
break;
case ISD::SETLE:
case ISD::SETOLE:
LC1 = (VT == MVT::f32) ? RTLIB::OLE_F32 :
(VT == MVT::f64) ? RTLIB::OLE_F64 :
(VT == MVT::f128) ? RTLIB::OLE_F128 : RTLIB::OLE_PPCF128;
break;
case ISD::SETGT:
case ISD::SETOGT:
LC1 = (VT == MVT::f32) ? RTLIB::OGT_F32 :
(VT == MVT::f64) ? RTLIB::OGT_F64 :
(VT == MVT::f128) ? RTLIB::OGT_F128 : RTLIB::OGT_PPCF128;
break;
case ISD::SETO:
ShouldInvertCC = true;
LLVM_FALLTHROUGH;
case ISD::SETUO:
LC1 = (VT == MVT::f32) ? RTLIB::UO_F32 :
(VT == MVT::f64) ? RTLIB::UO_F64 :
(VT == MVT::f128) ? RTLIB::UO_F128 : RTLIB::UO_PPCF128;
break;
case ISD::SETONE:
// SETONE = O && UNE
ShouldInvertCC = true;
LLVM_FALLTHROUGH;
case ISD::SETUEQ:
LC1 = (VT == MVT::f32) ? RTLIB::UO_F32 :
(VT == MVT::f64) ? RTLIB::UO_F64 :
(VT == MVT::f128) ? RTLIB::UO_F128 : RTLIB::UO_PPCF128;
LC2 = (VT == MVT::f32) ? RTLIB::OEQ_F32 :
(VT == MVT::f64) ? RTLIB::OEQ_F64 :
(VT == MVT::f128) ? RTLIB::OEQ_F128 : RTLIB::OEQ_PPCF128;
break;
default:
// Invert CC for unordered comparisons
ShouldInvertCC = true;
switch (CCCode) {
case ISD::SETULT:
LC1 = (VT == MVT::f32) ? RTLIB::OGE_F32 :
(VT == MVT::f64) ? RTLIB::OGE_F64 :
(VT == MVT::f128) ? RTLIB::OGE_F128 : RTLIB::OGE_PPCF128;
break;
case ISD::SETULE:
LC1 = (VT == MVT::f32) ? RTLIB::OGT_F32 :
(VT == MVT::f64) ? RTLIB::OGT_F64 :
(VT == MVT::f128) ? RTLIB::OGT_F128 : RTLIB::OGT_PPCF128;
break;
case ISD::SETUGT:
LC1 = (VT == MVT::f32) ? RTLIB::OLE_F32 :
(VT == MVT::f64) ? RTLIB::OLE_F64 :
(VT == MVT::f128) ? RTLIB::OLE_F128 : RTLIB::OLE_PPCF128;
break;
case ISD::SETUGE:
LC1 = (VT == MVT::f32) ? RTLIB::OLT_F32 :
(VT == MVT::f64) ? RTLIB::OLT_F64 :
(VT == MVT::f128) ? RTLIB::OLT_F128 : RTLIB::OLT_PPCF128;
break;
default: llvm_unreachable("Do not know how to soften this setcc!");
}
}
// Use the target specific return value for comparions lib calls.
EVT RetVT = getCmpLibcallReturnType();
SDValue Ops[2] = {NewLHS, NewRHS};
TargetLowering::MakeLibCallOptions CallOptions;
EVT OpsVT[2] = { OldLHS.getValueType(),
OldRHS.getValueType() };
CallOptions.setTypeListBeforeSoften(OpsVT, RetVT, true);
auto Call = makeLibCall(DAG, LC1, RetVT, Ops, CallOptions, dl, Chain);
NewLHS = Call.first;
NewRHS = DAG.getConstant(0, dl, RetVT);
CCCode = getCmpLibcallCC(LC1);
if (ShouldInvertCC) {
assert(RetVT.isInteger());
CCCode = getSetCCInverse(CCCode, RetVT);
}
if (LC2 == RTLIB::UNKNOWN_LIBCALL) {
// Update Chain.
Chain = Call.second;
} else {
EVT SetCCVT =
getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), RetVT);
SDValue Tmp = DAG.getSetCC(dl, SetCCVT, NewLHS, NewRHS, CCCode);
auto Call2 = makeLibCall(DAG, LC2, RetVT, Ops, CallOptions, dl, Chain);
CCCode = getCmpLibcallCC(LC2);
if (ShouldInvertCC)
CCCode = getSetCCInverse(CCCode, RetVT);
NewLHS = DAG.getSetCC(dl, SetCCVT, Call2.first, NewRHS, CCCode);
if (Chain)
Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Call.second,
Call2.second);
NewLHS = DAG.getNode(ShouldInvertCC ? ISD::AND : ISD::OR, dl,
Tmp.getValueType(), Tmp, NewLHS);
NewRHS = SDValue();
}
}
/// Return the entry encoding for a jump table in the current function. The
/// returned value is a member of the MachineJumpTableInfo::JTEntryKind enum.
unsigned TargetLowering::getJumpTableEncoding() const {
// In non-pic modes, just use the address of a block.
if (!isPositionIndependent())
return MachineJumpTableInfo::EK_BlockAddress;
// In PIC mode, if the target supports a GPRel32 directive, use it.
if (getTargetMachine().getMCAsmInfo()->getGPRel32Directive() != nullptr)
return MachineJumpTableInfo::EK_GPRel32BlockAddress;
// Otherwise, use a label difference.
return MachineJumpTableInfo::EK_LabelDifference32;
}
SDValue TargetLowering::getPICJumpTableRelocBase(SDValue Table,
SelectionDAG &DAG) const {
// If our PIC model is GP relative, use the global offset table as the base.
unsigned JTEncoding = getJumpTableEncoding();
if ((JTEncoding == MachineJumpTableInfo::EK_GPRel64BlockAddress) ||
(JTEncoding == MachineJumpTableInfo::EK_GPRel32BlockAddress))
return DAG.getGLOBAL_OFFSET_TABLE(getPointerTy(DAG.getDataLayout()));
return Table;
}
/// This returns the relocation base for the given PIC jumptable, the same as
/// getPICJumpTableRelocBase, but as an MCExpr.
const MCExpr *
TargetLowering::getPICJumpTableRelocBaseExpr(const MachineFunction *MF,
unsigned JTI,MCContext &Ctx) const{
// The normal PIC reloc base is the label at the start of the jump table.
return MCSymbolRefExpr::create(MF->getJTISymbol(JTI, Ctx), Ctx);
}
bool
TargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
const TargetMachine &TM = getTargetMachine();
const GlobalValue *GV = GA->getGlobal();
// If the address is not even local to this DSO we will have to load it from
// a got and then add the offset.
if (!TM.shouldAssumeDSOLocal(*GV->getParent(), GV))
return false;
// If the code is position independent we will have to add a base register.
if (isPositionIndependent())
return false;
// Otherwise we can do it.
return true;
}
//===----------------------------------------------------------------------===//
// Optimization Methods
//===----------------------------------------------------------------------===//
/// If the specified instruction has a constant integer operand and there are
/// bits set in that constant that are not demanded, then clear those bits and
/// return true.
bool TargetLowering::ShrinkDemandedConstant(SDValue Op,
const APInt &DemandedBits,
const APInt &DemandedElts,
TargetLoweringOpt &TLO) const {
SDLoc DL(Op);
unsigned Opcode = Op.getOpcode();
// Do target-specific constant optimization.
if (targetShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
return TLO.New.getNode();
// FIXME: ISD::SELECT, ISD::SELECT_CC
switch (Opcode) {
default:
break;
case ISD::XOR:
case ISD::AND:
case ISD::OR: {
auto *Op1C = dyn_cast<ConstantSDNode>(Op.getOperand(1));
if (!Op1C)
return false;
// If this is a 'not' op, don't touch it because that's a canonical form.
const APInt &C = Op1C->getAPIntValue();
if (Opcode == ISD::XOR && DemandedBits.isSubsetOf(C))
return false;
if (!C.isSubsetOf(DemandedBits)) {
EVT VT = Op.getValueType();
SDValue NewC = TLO.DAG.getConstant(DemandedBits & C, DL, VT);
SDValue NewOp = TLO.DAG.getNode(Opcode, DL, VT, Op.getOperand(0), NewC);
return TLO.CombineTo(Op, NewOp);
}
break;
}
}
return false;
}
bool TargetLowering::ShrinkDemandedConstant(SDValue Op,
const APInt &DemandedBits,
TargetLoweringOpt &TLO) const {
EVT VT = Op.getValueType();
APInt DemandedElts = VT.isVector()
? APInt::getAllOnesValue(VT.getVectorNumElements())
: APInt(1, 1);
return ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO);
}
/// Convert x+y to (VT)((SmallVT)x+(SmallVT)y) if the casts are free.
/// This uses isZExtFree and ZERO_EXTEND for the widening cast, but it could be
/// generalized for targets with other types of implicit widening casts.
bool TargetLowering::ShrinkDemandedOp(SDValue Op, unsigned BitWidth,
const APInt &Demanded,
TargetLoweringOpt &TLO) const {
assert(Op.getNumOperands() == 2 &&
"ShrinkDemandedOp only supports binary operators!");
assert(Op.getNode()->getNumValues() == 1 &&
"ShrinkDemandedOp only supports nodes with one result!");
SelectionDAG &DAG = TLO.DAG;
SDLoc dl(Op);
// Early return, as this function cannot handle vector types.
if (Op.getValueType().isVector())
return false;
// Don't do this if the node has another user, which may require the
// full value.
if (!Op.getNode()->hasOneUse())
return false;
// Search for the smallest integer type with free casts to and from
// Op's type. For expedience, just check power-of-2 integer types.
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
unsigned DemandedSize = Demanded.getActiveBits();
unsigned SmallVTBits = DemandedSize;
if (!isPowerOf2_32(SmallVTBits))
SmallVTBits = NextPowerOf2(SmallVTBits);
for (; SmallVTBits < BitWidth; SmallVTBits = NextPowerOf2(SmallVTBits)) {
EVT SmallVT = EVT::getIntegerVT(*DAG.getContext(), SmallVTBits);
if (TLI.isTruncateFree(Op.getValueType(), SmallVT) &&
TLI.isZExtFree(SmallVT, Op.getValueType())) {
// We found a type with free casts.
SDValue X = DAG.getNode(
Op.getOpcode(), dl, SmallVT,
DAG.getNode(ISD::TRUNCATE, dl, SmallVT, Op.getOperand(0)),
DAG.getNode(ISD::TRUNCATE, dl, SmallVT, Op.getOperand(1)));
assert(DemandedSize <= SmallVTBits && "Narrowed below demanded bits?");
SDValue Z = DAG.getNode(ISD::ANY_EXTEND, dl, Op.getValueType(), X);
return TLO.CombineTo(Op, Z);
}
}
return false;
}
bool TargetLowering::SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits,
DAGCombinerInfo &DCI) const {
SelectionDAG &DAG = DCI.DAG;
TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(),
!DCI.isBeforeLegalizeOps());
KnownBits Known;
bool Simplified = SimplifyDemandedBits(Op, DemandedBits, Known, TLO);
if (Simplified) {
DCI.AddToWorklist(Op.getNode());
DCI.CommitTargetLoweringOpt(TLO);
}
return Simplified;
}
bool TargetLowering::SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits,
KnownBits &Known,
TargetLoweringOpt &TLO,
unsigned Depth,
bool AssumeSingleUse) const {
EVT VT = Op.getValueType();
// TODO: We can probably do more work on calculating the known bits and
// simplifying the operations for scalable vectors, but for now we just
// bail out.
if (VT.isScalableVector()) {
// Pretend we don't know anything for now.
Known = KnownBits(DemandedBits.getBitWidth());
return false;
}
APInt DemandedElts = VT.isVector()
? APInt::getAllOnesValue(VT.getVectorNumElements())
: APInt(1, 1);
return SimplifyDemandedBits(Op, DemandedBits, DemandedElts, Known, TLO, Depth,
AssumeSingleUse);
}
// TODO: Can we merge SelectionDAG::GetDemandedBits into this?
// TODO: Under what circumstances can we create nodes? Constant folding?
SDValue TargetLowering::SimplifyMultipleUseDemandedBits(
SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
SelectionDAG &DAG, unsigned Depth) const {
// Limit search depth.
if (Depth >= SelectionDAG::MaxRecursionDepth)
return SDValue();
// Ignore UNDEFs.
if (Op.isUndef())
return SDValue();
// Not demanding any bits/elts from Op.
if (DemandedBits == 0 || DemandedElts == 0)
return DAG.getUNDEF(Op.getValueType());
unsigned NumElts = DemandedElts.getBitWidth();
unsigned BitWidth = DemandedBits.getBitWidth();
KnownBits LHSKnown, RHSKnown;
switch (Op.getOpcode()) {
case ISD::BITCAST: {
SDValue Src = peekThroughBitcasts(Op.getOperand(0));
EVT SrcVT = Src.getValueType();
EVT DstVT = Op.getValueType();
if (SrcVT == DstVT)
return Src;
unsigned NumSrcEltBits = SrcVT.getScalarSizeInBits();
unsigned NumDstEltBits = DstVT.getScalarSizeInBits();
if (NumSrcEltBits == NumDstEltBits)
if (SDValue V = SimplifyMultipleUseDemandedBits(
Src, DemandedBits, DemandedElts, DAG, Depth + 1))
return DAG.getBitcast(DstVT, V);
// TODO - bigendian once we have test coverage.
if (SrcVT.isVector() && (NumDstEltBits % NumSrcEltBits) == 0 &&
DAG.getDataLayout().isLittleEndian()) {
unsigned Scale = NumDstEltBits / NumSrcEltBits;
unsigned NumSrcElts = SrcVT.getVectorNumElements();
APInt DemandedSrcBits = APInt::getNullValue(NumSrcEltBits);
APInt DemandedSrcElts = APInt::getNullValue(NumSrcElts);
for (unsigned i = 0; i != Scale; ++i) {
unsigned Offset = i * NumSrcEltBits;
APInt Sub = DemandedBits.extractBits(NumSrcEltBits, Offset);
if (!Sub.isNullValue()) {
DemandedSrcBits |= Sub;
for (unsigned j = 0; j != NumElts; ++j)
if (DemandedElts[j])
DemandedSrcElts.setBit((j * Scale) + i);
}
}
if (SDValue V = SimplifyMultipleUseDemandedBits(
Src, DemandedSrcBits, DemandedSrcElts, DAG, Depth + 1))
return DAG.getBitcast(DstVT, V);
}
// TODO - bigendian once we have test coverage.
if ((NumSrcEltBits % NumDstEltBits) == 0 &&
DAG.getDataLayout().isLittleEndian()) {
unsigned Scale = NumSrcEltBits / NumDstEltBits;
unsigned NumSrcElts = SrcVT.isVector() ? SrcVT.getVectorNumElements() : 1;
APInt DemandedSrcBits = APInt::getNullValue(NumSrcEltBits);
APInt DemandedSrcElts = APInt::getNullValue(NumSrcElts);
for (unsigned i = 0; i != NumElts; ++i)
if (DemandedElts[i]) {
unsigned Offset = (i % Scale) * NumDstEltBits;
DemandedSrcBits.insertBits(DemandedBits, Offset);
DemandedSrcElts.setBit(i / Scale);
}
if (SDValue V = SimplifyMultipleUseDemandedBits(
Src, DemandedSrcBits, DemandedSrcElts, DAG, Depth + 1))
return DAG.getBitcast(DstVT, V);
}
break;
}
case ISD::AND: {
LHSKnown = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
RHSKnown = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
// If all of the demanded bits are known 1 on one side, return the other.
// These bits cannot contribute to the result of the 'and' in this
// context.
if (DemandedBits.isSubsetOf(LHSKnown.Zero | RHSKnown.One))
return Op.getOperand(0);
if (DemandedBits.isSubsetOf(RHSKnown.Zero | LHSKnown.One))
return Op.getOperand(1);
break;
}
case ISD::OR: {
LHSKnown = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
RHSKnown = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
// If all of the demanded bits are known zero on one side, return the
// other. These bits cannot contribute to the result of the 'or' in this
// context.
if (DemandedBits.isSubsetOf(LHSKnown.One | RHSKnown.Zero))
return Op.getOperand(0);
if (DemandedBits.isSubsetOf(RHSKnown.One | LHSKnown.Zero))
return Op.getOperand(1);
break;
}
case ISD::XOR: {
LHSKnown = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
RHSKnown = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
// If all of the demanded bits are known zero on one side, return the
// other.
if (DemandedBits.isSubsetOf(RHSKnown.Zero))
return Op.getOperand(0);
if (DemandedBits.isSubsetOf(LHSKnown.Zero))
return Op.getOperand(1);
break;
}
case ISD::SHL: {
// If we are only demanding sign bits then we can use the shift source
// directly.
if (const APInt *MaxSA =
DAG.getValidMaximumShiftAmountConstant(Op, DemandedElts)) {
SDValue Op0 = Op.getOperand(0);
unsigned ShAmt = MaxSA->getZExtValue();
unsigned NumSignBits =
DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1);
unsigned UpperDemandedBits = BitWidth - DemandedBits.countTrailingZeros();
if (NumSignBits > ShAmt && (NumSignBits - ShAmt) >= (UpperDemandedBits))
return Op0;
}
break;
}
case ISD::SETCC: {
SDValue Op0 = Op.getOperand(0);
SDValue Op1 = Op.getOperand(1);
ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
// If (1) we only need the sign-bit, (2) the setcc operands are the same
// width as the setcc result, and (3) the result of a setcc conforms to 0 or
// -1, we may be able to bypass the setcc.
if (DemandedBits.isSignMask() &&
Op0.getScalarValueSizeInBits() == BitWidth &&
getBooleanContents(Op0.getValueType()) ==
BooleanContent::ZeroOrNegativeOneBooleanContent) {
// If we're testing X < 0, then this compare isn't needed - just use X!
// FIXME: We're limiting to integer types here, but this should also work
// if we don't care about FP signed-zero. The use of SETLT with FP means
// that we don't care about NaNs.
if (CC == ISD::SETLT && Op1.getValueType().isInteger() &&
(isNullConstant(Op1) || ISD::isBuildVectorAllZeros(Op1.getNode())))
return Op0;
}
break;
}
case ISD::SIGN_EXTEND_INREG: {
// If none of the extended bits are demanded, eliminate the sextinreg.
SDValue Op0 = Op.getOperand(0);
EVT ExVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
unsigned ExBits = ExVT.getScalarSizeInBits();
if (DemandedBits.getActiveBits() <= ExBits)
return Op0;
// If the input is already sign extended, just drop the extension.
unsigned NumSignBits = DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1);
if (NumSignBits >= (BitWidth - ExBits + 1))
return Op0;
break;
}
case ISD::ANY_EXTEND_VECTOR_INREG:
case ISD::SIGN_EXTEND_VECTOR_INREG:
case ISD::ZERO_EXTEND_VECTOR_INREG: {
// If we only want the lowest element and none of extended bits, then we can
// return the bitcasted source vector.
SDValue Src = Op.getOperand(0);
EVT SrcVT = Src.getValueType();
EVT DstVT = Op.getValueType();
if (DemandedElts == 1 && DstVT.getSizeInBits() == SrcVT.getSizeInBits() &&
DAG.getDataLayout().isLittleEndian() &&
DemandedBits.getActiveBits() <= SrcVT.getScalarSizeInBits()) {
return DAG.getBitcast(DstVT, Src);
}
break;
}
case ISD::INSERT_VECTOR_ELT: {
// If we don't demand the inserted element, return the base vector.
SDValue Vec = Op.getOperand(0);
auto *CIdx = dyn_cast<ConstantSDNode>(Op.getOperand(2));
EVT VecVT = Vec.getValueType();
if (CIdx && CIdx->getAPIntValue().ult(VecVT.getVectorNumElements()) &&
!DemandedElts[CIdx->getZExtValue()])
return Vec;
break;
}
case ISD::INSERT_SUBVECTOR: {
// If we don't demand the inserted subvector, return the base vector.
SDValue Vec = Op.getOperand(0);
SDValue Sub = Op.getOperand(1);
uint64_t Idx = Op.getConstantOperandVal(2);
unsigned NumSubElts = Sub.getValueType().getVectorNumElements();
if (DemandedElts.extractBits(NumSubElts, Idx) == 0)
return Vec;
break;
}
case ISD::VECTOR_SHUFFLE: {
ArrayRef<int> ShuffleMask = cast<ShuffleVectorSDNode>(Op)->getMask();
// If all the demanded elts are from one operand and are inline,
// then we can use the operand directly.
bool AllUndef = true, IdentityLHS = true, IdentityRHS = true;
for (unsigned i = 0; i != NumElts; ++i) {
int M = ShuffleMask[i];
if (M < 0 || !DemandedElts[i])
continue;
AllUndef = false;
IdentityLHS &= (M == (int)i);
IdentityRHS &= ((M - NumElts) == i);
}
if (AllUndef)
return DAG.getUNDEF(Op.getValueType());
if (IdentityLHS)
return Op.getOperand(0);
if (IdentityRHS)
return Op.getOperand(1);
break;
}
default:
if (Op.getOpcode() >= ISD::BUILTIN_OP_END)
if (SDValue V = SimplifyMultipleUseDemandedBitsForTargetNode(
Op, DemandedBits, DemandedElts, DAG, Depth))
return V;
break;
}
return SDValue();
}
SDValue TargetLowering::SimplifyMultipleUseDemandedBits(
SDValue Op, const APInt &DemandedBits, SelectionDAG &DAG,
unsigned Depth) const {
EVT VT = Op.getValueType();
APInt DemandedElts = VT.isVector()
? APInt::getAllOnesValue(VT.getVectorNumElements())
: APInt(1, 1);
return SimplifyMultipleUseDemandedBits(Op, DemandedBits, DemandedElts, DAG,
Depth);
}
SDValue TargetLowering::SimplifyMultipleUseDemandedVectorElts(
SDValue Op, const APInt &DemandedElts, SelectionDAG &DAG,
unsigned Depth) const {
APInt DemandedBits = APInt::getAllOnesValue(Op.getScalarValueSizeInBits());
return SimplifyMultipleUseDemandedBits(Op, DemandedBits, DemandedElts, DAG,
Depth);
}
/// Look at Op. At this point, we know that only the OriginalDemandedBits of the
/// result of Op are ever used downstream. If we can use this information to
/// simplify Op, create a new simplified DAG node and return true, returning the
/// original and new nodes in Old and New. Otherwise, analyze the expression and
/// return a mask of Known bits for the expression (used to simplify the
/// caller). The Known bits may only be accurate for those bits in the
/// OriginalDemandedBits and OriginalDemandedElts.
bool TargetLowering::SimplifyDemandedBits(
SDValue Op, const APInt &OriginalDemandedBits,
const APInt &OriginalDemandedElts, KnownBits &Known, TargetLoweringOpt &TLO,
unsigned Depth, bool AssumeSingleUse) const {
unsigned BitWidth = OriginalDemandedBits.getBitWidth();
assert(Op.getScalarValueSizeInBits() == BitWidth &&
"Mask size mismatches value type size!");
// Don't know anything.
Known = KnownBits(BitWidth);
// TODO: We can probably do more work on calculating the known bits and
// simplifying the operations for scalable vectors, but for now we just
// bail out.
if (Op.getValueType().isScalableVector())
return false;
unsigned NumElts = OriginalDemandedElts.getBitWidth();
assert((!Op.getValueType().isVector() ||
NumElts == Op.getValueType().getVectorNumElements()) &&
"Unexpected vector size");
APInt DemandedBits = OriginalDemandedBits;
APInt DemandedElts = OriginalDemandedElts;
SDLoc dl(Op);
auto &DL = TLO.DAG.getDataLayout();
// Undef operand.
if (Op.isUndef())
return false;
if (Op.getOpcode() == ISD::Constant) {
// We know all of the bits for a constant!
Known = KnownBits::makeConstant(cast<ConstantSDNode>(Op)->getAPIntValue());
return false;
}
if (Op.getOpcode() == ISD::ConstantFP) {
// We know all of the bits for a floating point constant!
Known = KnownBits::makeConstant(
cast<ConstantFPSDNode>(Op)->getValueAPF().bitcastToAPInt());
return false;
}
// Other users may use these bits.
EVT VT = Op.getValueType();
if (!Op.getNode()->hasOneUse() && !AssumeSingleUse) {
if (Depth != 0) {
// If not at the root, Just compute the Known bits to
// simplify things downstream.
Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
return false;
}
// If this is the root being simplified, allow it to have multiple uses,
// just set the DemandedBits/Elts to all bits.
DemandedBits = APInt::getAllOnesValue(BitWidth);
DemandedElts = APInt::getAllOnesValue(NumElts);
} else if (OriginalDemandedBits == 0 || OriginalDemandedElts == 0) {
// Not demanding any bits/elts from Op.
return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
} else if (Depth >= SelectionDAG::MaxRecursionDepth) {
// Limit search depth.
return false;
}
KnownBits Known2;
switch (Op.getOpcode()) {
case ISD::TargetConstant:
llvm_unreachable("Can't simplify this node");
case ISD::SCALAR_TO_VECTOR: {
if (!DemandedElts[0])
return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
KnownBits SrcKnown;
SDValue Src = Op.getOperand(0);
unsigned SrcBitWidth = Src.getScalarValueSizeInBits();
APInt SrcDemandedBits = DemandedBits.zextOrSelf(SrcBitWidth);
if (SimplifyDemandedBits(Src, SrcDemandedBits, SrcKnown, TLO, Depth + 1))
return true;
// Upper elements are undef, so only get the knownbits if we just demand
// the bottom element.
if (DemandedElts == 1)
Known = SrcKnown.anyextOrTrunc(BitWidth);
break;
}
case ISD::BUILD_VECTOR:
// Collect the known bits that are shared by every demanded element.
// TODO: Call SimplifyDemandedBits for non-constant demanded elements.
Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
return false; // Don't fall through, will infinitely loop.
case ISD::LOAD: {
LoadSDNode *LD = cast<LoadSDNode>(Op);
if (getTargetConstantFromLoad(LD)) {
Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
return false; // Don't fall through, will infinitely loop.
} else if (ISD::isZEXTLoad(Op.getNode()) && Op.getResNo() == 0) {
// If this is a ZEXTLoad and we are looking at the loaded value.
EVT MemVT = LD->getMemoryVT();
unsigned MemBits = MemVT.getScalarSizeInBits();
Known.Zero.setBitsFrom(MemBits);
return false; // Don't fall through, will infinitely loop.
}
break;
}
case ISD::INSERT_VECTOR_ELT: {
SDValue Vec = Op.getOperand(0);
SDValue Scl = Op.getOperand(1);
auto *CIdx = dyn_cast<ConstantSDNode>(Op.getOperand(2));
EVT VecVT = Vec.getValueType();
// If index isn't constant, assume we need all vector elements AND the
// inserted element.
APInt DemandedVecElts(DemandedElts);
if (CIdx && CIdx->getAPIntValue().ult(VecVT.getVectorNumElements())) {
unsigned Idx = CIdx->getZExtValue();
DemandedVecElts.clearBit(Idx);
// Inserted element is not required.
if (!DemandedElts[Idx])
return TLO.CombineTo(Op, Vec);
}
KnownBits KnownScl;
unsigned NumSclBits = Scl.getScalarValueSizeInBits();
APInt DemandedSclBits = DemandedBits.zextOrTrunc(NumSclBits);
if (SimplifyDemandedBits(Scl, DemandedSclBits, KnownScl, TLO, Depth + 1))
return true;
Known = KnownScl.anyextOrTrunc(BitWidth);
KnownBits KnownVec;
if (SimplifyDemandedBits(Vec, DemandedBits, DemandedVecElts, KnownVec, TLO,
Depth + 1))
return true;
if (!!DemandedVecElts)
Known = KnownBits::commonBits(Known, KnownVec);
return false;
}
case ISD::INSERT_SUBVECTOR: {
// Demand any elements from the subvector and the remainder from the src its
// inserted into.
SDValue Src = Op.getOperand(0);
SDValue Sub = Op.getOperand(1);
uint64_t Idx = Op.getConstantOperandVal(2);
unsigned NumSubElts = Sub.getValueType().getVectorNumElements();
APInt DemandedSubElts = DemandedElts.extractBits(NumSubElts, Idx);
APInt DemandedSrcElts = DemandedElts;
DemandedSrcElts.insertBits(APInt::getNullValue(NumSubElts), Idx);
KnownBits KnownSub, KnownSrc;
if (SimplifyDemandedBits(Sub, DemandedBits, DemandedSubElts, KnownSub, TLO,
Depth + 1))
return true;
if (SimplifyDemandedBits(Src, DemandedBits, DemandedSrcElts, KnownSrc, TLO,
Depth + 1))
return true;
Known.Zero.setAllBits();
Known.One.setAllBits();
if (!!DemandedSubElts)
Known = KnownBits::commonBits(Known, KnownSub);
if (!!DemandedSrcElts)
Known = KnownBits::commonBits(Known, KnownSrc);
// Attempt to avoid multi-use src if we don't need anything from it.
if (!DemandedBits.isAllOnesValue() || !DemandedSubElts.isAllOnesValue() ||
!DemandedSrcElts.isAllOnesValue()) {
SDValue NewSub = SimplifyMultipleUseDemandedBits(
Sub, DemandedBits, DemandedSubElts, TLO.DAG, Depth + 1);
SDValue NewSrc = SimplifyMultipleUseDemandedBits(
Src, DemandedBits, DemandedSrcElts, TLO.DAG, Depth + 1);
if (NewSub || NewSrc) {
NewSub = NewSub ? NewSub : Sub;
NewSrc = NewSrc ? NewSrc : Src;
SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, NewSrc, NewSub,
Op.getOperand(2));
return TLO.CombineTo(Op, NewOp);
}
}
break;
}
case ISD::EXTRACT_SUBVECTOR: {
// Offset the demanded elts by the subvector index.
SDValue Src = Op.getOperand(0);
if (Src.getValueType().isScalableVector())
break;
uint64_t Idx = Op.getConstantOperandVal(1);
unsigned NumSrcElts = Src.getValueType().getVectorNumElements();
APInt DemandedSrcElts = DemandedElts.zextOrSelf(NumSrcElts).shl(Idx);
if (SimplifyDemandedBits(Src, DemandedBits, DemandedSrcElts, Known, TLO,
Depth + 1))
return true;
// Attempt to avoid multi-use src if we don't need anything from it.
if (!DemandedBits.isAllOnesValue() || !DemandedSrcElts.isAllOnesValue()) {
SDValue DemandedSrc = SimplifyMultipleUseDemandedBits(
Src, DemandedBits, DemandedSrcElts, TLO.DAG, Depth + 1);
if (DemandedSrc) {
SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, DemandedSrc,
Op.getOperand(1));
return TLO.CombineTo(Op, NewOp);
}
}
break;
}
case ISD::CONCAT_VECTORS: {
Known.Zero.setAllBits();
Known.One.setAllBits();
EVT SubVT = Op.getOperand(0).getValueType();
unsigned NumSubVecs = Op.getNumOperands();
unsigned NumSubElts = SubVT.getVectorNumElements();
for (unsigned i = 0; i != NumSubVecs; ++i) {
APInt DemandedSubElts =
DemandedElts.extractBits(NumSubElts, i * NumSubElts);
if (SimplifyDemandedBits(Op.getOperand(i), DemandedBits, DemandedSubElts,
Known2, TLO, Depth + 1))
return true;
// Known bits are shared by every demanded subvector element.
if (!!DemandedSubElts)
Known = KnownBits::commonBits(Known, Known2);
}
break;
}
case ISD::VECTOR_SHUFFLE: {
ArrayRef<int> ShuffleMask = cast<ShuffleVectorSDNode>(Op)->getMask();
// Collect demanded elements from shuffle operands..
APInt DemandedLHS(NumElts, 0);
APInt DemandedRHS(NumElts, 0);
for (unsigned i = 0; i != NumElts; ++i) {
if (!DemandedElts[i])
continue;
int M = ShuffleMask[i];
if (M < 0) {
// For UNDEF elements, we don't know anything about the common state of
// the shuffle result.
DemandedLHS.clearAllBits();
DemandedRHS.clearAllBits();
break;
}
assert(0 <= M && M < (int)(2 * NumElts) && "Shuffle index out of range");
if (M < (int)NumElts)
DemandedLHS.setBit(M);
else
DemandedRHS.setBit(M - NumElts);
}
if (!!DemandedLHS || !!DemandedRHS) {
SDValue Op0 = Op.getOperand(0);
SDValue Op1 = Op.getOperand(1);
Known.Zero.setAllBits();
Known.One.setAllBits();
if (!!DemandedLHS) {
if (SimplifyDemandedBits(Op0, DemandedBits, DemandedLHS, Known2, TLO,
Depth + 1))
return true;
Known = KnownBits::commonBits(Known, Known2);
}
if (!!DemandedRHS) {
if (SimplifyDemandedBits(Op1, DemandedBits, DemandedRHS, Known2, TLO,
Depth + 1))
return true;
Known = KnownBits::commonBits(Known, Known2);
}
// Attempt to avoid multi-use ops if we don't need anything from them.
SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
Op0, DemandedBits, DemandedLHS, TLO.DAG, Depth + 1);
SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
Op1, DemandedBits, DemandedRHS, TLO.DAG, Depth + 1);
if (DemandedOp0 || DemandedOp1) {
Op0 = DemandedOp0 ? DemandedOp0 : Op0;
Op1 = DemandedOp1 ? DemandedOp1 : Op1;
SDValue NewOp = TLO.DAG.getVectorShuffle(VT, dl, Op0, Op1, ShuffleMask);
return TLO.CombineTo(Op, NewOp);
}
}
break;
}
case ISD::AND: {
SDValue Op0 = Op.getOperand(0);
SDValue Op1 = Op.getOperand(1);
// If the RHS is a constant, check to see if the LHS would be zero without
// using the bits from the RHS. Below, we use knowledge about the RHS to
// simplify the LHS, here we're using information from the LHS to simplify
// the RHS.
if (ConstantSDNode *RHSC = isConstOrConstSplat(Op1)) {
// Do not increment Depth here; that can cause an infinite loop.
KnownBits LHSKnown = TLO.DAG.computeKnownBits(Op0, DemandedElts, Depth);
// If the LHS already has zeros where RHSC does, this 'and' is dead.
if ((LHSKnown.Zero & DemandedBits) ==
(~RHSC->getAPIntValue() & DemandedBits))
return TLO.CombineTo(Op, Op0);
// If any of the set bits in the RHS are known zero on the LHS, shrink
// the constant.
if (ShrinkDemandedConstant(Op, ~LHSKnown.Zero & DemandedBits,
DemandedElts, TLO))
return true;
// Bitwise-not (xor X, -1) is a special case: we don't usually shrink its
// constant, but if this 'and' is only clearing bits that were just set by
// the xor, then this 'and' can be eliminated by shrinking the mask of
// the xor. For example, for a 32-bit X:
// and (xor (srl X, 31), -1), 1 --> xor (srl X, 31), 1
if (isBitwiseNot(Op0) && Op0.hasOneUse() &&
LHSKnown.One == ~RHSC->getAPIntValue()) {
SDValue Xor = TLO.DAG.getNode(ISD::XOR, dl, VT, Op0.getOperand(0), Op1);
return TLO.CombineTo(Op, Xor);
}
}
if (SimplifyDemandedBits(Op1, DemandedBits, DemandedElts, Known, TLO,
Depth + 1))
return true;
assert(!Known.hasConflict() && "Bits known to be one AND zero?");
if (SimplifyDemandedBits(Op0, ~Known.Zero & DemandedBits, DemandedElts,
Known2, TLO, Depth + 1))
return true;
assert(!Known2.hasConflict() && "Bits known to be one AND zero?");
// Attempt to avoid multi-use ops if we don't need anything from them.
if (!DemandedBits.isAllOnesValue() || !DemandedElts.isAllOnesValue()) {
SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
Op0, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
Op1, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
if (DemandedOp0 || DemandedOp1) {
Op0 = DemandedOp0 ? DemandedOp0 : Op0;
Op1 = DemandedOp1 ? DemandedOp1 : Op1;
SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1);
return TLO.CombineTo(Op, NewOp);
}
}
// If all of the demanded bits are known one on one side, return the other.
// These bits cannot contribute to the result of the 'and'.
if (DemandedBits.isSubsetOf(Known2.Zero | Known.One))
return TLO.CombineTo(Op, Op0);
if (DemandedBits.isSubsetOf(Known.Zero | Known2.One))
return TLO.CombineTo(Op, Op1);
// If all of the demanded bits in the inputs are known zeros, return zero.
if (DemandedBits.isSubsetOf(Known.Zero | Known2.Zero))
return TLO.CombineTo(Op, TLO.DAG.getConstant(0, dl, VT));
// If the RHS is a constant, see if we can simplify it.
if (ShrinkDemandedConstant(Op, ~Known2.Zero & DemandedBits, DemandedElts,
TLO))
return true;
// If the operation can be done in a smaller type, do so.
if (ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO))
return true;
Known &= Known2;
break;
}
case ISD::OR: {
SDValue Op0 = Op.getOperand(0);
SDValue Op1 = Op.getOperand(1);
if (SimplifyDemandedBits(Op1, DemandedBits, DemandedElts, Known, TLO,
Depth + 1))
return true;
assert(!Known.hasConflict() && "Bits known to be one AND zero?");
if (SimplifyDemandedBits(Op0, ~Known.One & DemandedBits, DemandedElts,
Known2, TLO, Depth + 1))
return true;
assert(!Known2.hasConflict() && "Bits known to be one AND zero?");
// Attempt to avoid multi-use ops if we don't need anything from them.
if (!DemandedBits.isAllOnesValue() || !DemandedElts.isAllOnesValue()) {
SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
Op0, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
Op1, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
if (DemandedOp0 || DemandedOp1) {
Op0 = DemandedOp0 ? DemandedOp0 : Op0;
Op1 = DemandedOp1 ? DemandedOp1 : Op1;
SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1);
return TLO.CombineTo(Op, NewOp);
}
}
// If all of the demanded bits are known zero on one side, return the other.
// These bits cannot contribute to the result of the 'or'.
if (DemandedBits.isSubsetOf(Known2.One | Known.Zero))
return TLO.CombineTo(Op, Op0);
if (DemandedBits.isSubsetOf(Known.One | Known2.Zero))
return TLO.CombineTo(Op, Op1);
// If the RHS is a constant, see if we can simplify it.
if (ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
return true;
// If the operation can be done in a smaller type, do so.
if (ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO))
return true;
Known |= Known2;
break;
}
case ISD::XOR: {
SDValue Op0 = Op.getOperand(0);
SDValue Op1 = Op.getOperand(1);
if (SimplifyDemandedBits(Op1, DemandedBits, DemandedElts, Known, TLO,
Depth + 1))
return true;
assert(!Known.hasConflict() && "Bits known to be one AND zero?");
if (SimplifyDemandedBits(Op0, DemandedBits, DemandedElts, Known2, TLO,
Depth + 1))
return true;
assert(!Known2.hasConflict() && "Bits known to be one AND zero?");
// Attempt to avoid multi-use ops if we don't need anything from them.
if (!DemandedBits.isAllOnesValue() || !DemandedElts.isAllOnesValue()) {
SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
Op0, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
Op1, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
if (DemandedOp0 || DemandedOp1) {
Op0 = DemandedOp0 ? DemandedOp0 : Op0;
Op1 = DemandedOp1 ? DemandedOp1 : Op1;
SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1);
return TLO.CombineTo(Op, NewOp);
}
}
// If all of the demanded bits are known zero on one side, return the other.
// These bits cannot contribute to the result of the 'xor'.
if (DemandedBits.isSubsetOf(Known.Zero))
return TLO.CombineTo(Op, Op0);
if (DemandedBits.isSubsetOf(Known2.Zero))
return TLO.CombineTo(Op, Op1);
// If the operation can be done in a smaller type, do so.
if (ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO))
return true;
// If all of the unknown bits are known to be zero on one side or the other
// turn this into an *inclusive* or.
// e.g. (A & C1)^(B & C2) -> (A & C1)|(B & C2) iff C1&C2 == 0
if (DemandedBits.isSubsetOf(Known.Zero | Known2.Zero))
return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::OR, dl, VT, Op0, Op1));
ConstantSDNode* C = isConstOrConstSplat(Op1, DemandedElts);
if (C) {
// If one side is a constant, and all of the set bits in the constant are
// also known set on the other side, turn this into an AND, as we know
// the bits will be cleared.
// e.g. (X | C1) ^ C2 --> (X | C1) & ~C2 iff (C1&C2) == C2
// NB: it is okay if more bits are known than are requested
if (C->getAPIntValue() == Known2.One) {
SDValue ANDC =
TLO.DAG.getConstant(~C->getAPIntValue() & DemandedBits, dl, VT);
return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::AND, dl, VT, Op0, ANDC));
}
// If the RHS is a constant, see if we can change it. Don't alter a -1
// constant because that's a 'not' op, and that is better for combining
// and codegen.
if (!C->isAllOnesValue() &&
DemandedBits.isSubsetOf(C->getAPIntValue())) {
// We're flipping all demanded bits. Flip the undemanded bits too.
SDValue New = TLO.DAG.getNOT(dl, Op0, VT);
return TLO.CombineTo(Op, New);
}
}
// If we can't turn this into a 'not', try to shrink the constant.
if (!C || !C->isAllOnesValue())
if (ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
return true;
Known ^= Known2;
break;
}
case ISD::SELECT:
if (SimplifyDemandedBits(Op.getOperand(2), DemandedBits, Known, TLO,
Depth + 1))
return true;
if (SimplifyDemandedBits(Op.getOperand(1), DemandedBits, Known2, TLO,
Depth + 1))
return true;
assert(!Known.hasConflict() && "Bits known to be one AND zero?");
assert(!Known2.hasConflict() && "Bits known to be one AND zero?");
// If the operands are constants, see if we can simplify them.
if (ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
return true;
// Only known if known in both the LHS and RHS.
Known = KnownBits::commonBits(Known, Known2);
break;
case ISD::SELECT_CC:
if (SimplifyDemandedBits(Op.getOperand(3), DemandedBits, Known, TLO,
Depth + 1))
return true;
if (SimplifyDemandedBits(Op.getOperand(2), DemandedBits, Known2, TLO,
Depth + 1))
return true;
assert(!Known.hasConflict() && "Bits known to be one AND zero?");
assert(!Known2.hasConflict() && "Bits known to be one AND zero?");
// If the operands are constants, see if we can simplify them.
if (ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
return true;
// Only known if known in both the LHS and RHS.
Known = KnownBits::commonBits(Known, Known2);
break;
case ISD::SETCC: {
SDValue Op0 = Op.getOperand(0);
SDValue Op1 = Op.getOperand(1);
ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
// If (1) we only need the sign-bit, (2) the setcc operands are the same
// width as the setcc result, and (3) the result of a setcc conforms to 0 or
// -1, we may be able to bypass the setcc.
if (DemandedBits.isSignMask() &&
Op0.getScalarValueSizeInBits() == BitWidth &&
getBooleanContents(Op0.getValueType()) ==
BooleanContent::ZeroOrNegativeOneBooleanContent) {
// If we're testing X < 0, then this compare isn't needed - just use X!
// FIXME: We're limiting to integer types here, but this should also work
// if we don't care about FP signed-zero. The use of SETLT with FP means
// that we don't care about NaNs.
if (CC == ISD::SETLT && Op1.getValueType().isInteger() &&
(isNullConstant(Op1) || ISD::isBuildVectorAllZeros(Op1.getNode())))
return TLO.CombineTo(Op, Op0);
// TODO: Should we check for other forms of sign-bit comparisons?
// Examples: X <= -1, X >= 0
}
if (getBooleanContents(Op0.getValueType()) ==
TargetLowering::ZeroOrOneBooleanContent &&
BitWidth > 1)
Known.Zero.setBitsFrom(1);
break;
}
case ISD::SHL: {
SDValue Op0 = Op.getOperand(0);
SDValue Op1 = Op.getOperand(1);
EVT ShiftVT = Op1.getValueType();
if (const APInt *SA =
TLO.DAG.getValidShiftAmountConstant(Op, DemandedElts)) {
unsigned ShAmt = SA->getZExtValue();
if (ShAmt == 0)
return TLO.CombineTo(Op, Op0);
// If this is ((X >>u C1) << ShAmt), see if we can simplify this into a
// single shift. We can do this if the bottom bits (which are shifted
// out) are never demanded.
// TODO - support non-uniform vector amounts.
if (Op0.getOpcode() == ISD::SRL) {
if (!DemandedBits.intersects(APInt::getLowBitsSet(BitWidth, ShAmt))) {
if (const APInt *SA2 =
TLO.DAG.getValidShiftAmountConstant(Op0, DemandedElts)) {
unsigned C1 = SA2->getZExtValue();
unsigned Opc = ISD::SHL;
int Diff = ShAmt - C1;
if (Diff < 0) {
Diff = -Diff;
Opc = ISD::SRL;
}
SDValue NewSA = TLO.DAG.getConstant(Diff, dl, ShiftVT);
return TLO.CombineTo(
Op, TLO.DAG.getNode(Opc, dl, VT, Op0.getOperand(0), NewSA));
}
}
}
// Convert (shl (anyext x, c)) to (anyext (shl x, c)) if the high bits
// are not demanded. This will likely allow the anyext to be folded away.
// TODO - support non-uniform vector amounts.
if (Op0.getOpcode() == ISD::ANY_EXTEND) {
SDValue InnerOp = Op0.getOperand(0);
EVT InnerVT = InnerOp.getValueType();
unsigned InnerBits = InnerVT.getScalarSizeInBits();
if (ShAmt < InnerBits && DemandedBits.getActiveBits() <= InnerBits &&
isTypeDesirableForOp(ISD::SHL, InnerVT)) {
EVT ShTy = getShiftAmountTy(InnerVT, DL);
if (!APInt(BitWidth, ShAmt).isIntN(ShTy.getSizeInBits()))
ShTy = InnerVT;
SDValue NarrowShl =
TLO.DAG.getNode(ISD::SHL, dl, InnerVT, InnerOp,
TLO.DAG.getConstant(ShAmt, dl, ShTy));
return TLO.CombineTo(
Op, TLO.DAG.getNode(ISD::ANY_EXTEND, dl, VT, NarrowShl));
}
// Repeat the SHL optimization above in cases where an extension
// intervenes: (shl (anyext (shr x, c1)), c2) to
// (shl (anyext x), c2-c1). This requires that the bottom c1 bits
// aren't demanded (as above) and that the shifted upper c1 bits of
// x aren't demanded.
// TODO - support non-uniform vector amounts.
if (Op0.hasOneUse() && InnerOp.getOpcode() == ISD::SRL &&
InnerOp.hasOneUse()) {
if (const APInt *SA2 =
TLO.DAG.getValidShiftAmountConstant(InnerOp, DemandedElts)) {
unsigned InnerShAmt = SA2->getZExtValue();
if (InnerShAmt < ShAmt && InnerShAmt < InnerBits &&
DemandedBits.getActiveBits() <=
(InnerBits - InnerShAmt + ShAmt) &&
DemandedBits.countTrailingZeros() >= ShAmt) {
SDValue NewSA =
TLO.DAG.getConstant(ShAmt - InnerShAmt, dl, ShiftVT);
SDValue NewExt = TLO.DAG.getNode(ISD::ANY_EXTEND, dl, VT,
InnerOp.getOperand(0));
return TLO.CombineTo(
Op, TLO.DAG.getNode(ISD::SHL, dl, VT, NewExt, NewSA));
}
}
}
}
APInt InDemandedMask = DemandedBits.lshr(ShAmt);
if (SimplifyDemandedBits(Op0, InDemandedMask, DemandedElts, Known, TLO,
Depth + 1))
return true;
assert(!Known.hasConflict() && "Bits known to be one AND zero?");
Known.Zero <<= ShAmt;
Known.One <<= ShAmt;
// low bits known zero.
Known.Zero.setLowBits(ShAmt);
// Try shrinking the operation as long as the shift amount will still be
// in range.
if ((ShAmt < DemandedBits.getActiveBits()) &&
ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO))
return true;
}
// If we are only demanding sign bits then we can use the shift source
// directly.
if (const APInt *MaxSA =
TLO.DAG.getValidMaximumShiftAmountConstant(Op, DemandedElts)) {
unsigned ShAmt = MaxSA->getZExtValue();
unsigned NumSignBits =
TLO.DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1);
unsigned UpperDemandedBits = BitWidth - DemandedBits.countTrailingZeros();
if (NumSignBits > ShAmt && (NumSignBits - ShAmt) >= (UpperDemandedBits))
return TLO.CombineTo(Op, Op0);
}
break;
}
case ISD::SRL: {
SDValue Op0 = Op.getOperand(0);
SDValue Op1 = Op.getOperand(1);
EVT ShiftVT = Op1.getValueType();
if (const APInt *SA =
TLO.DAG.getValidShiftAmountConstant(Op, DemandedElts)) {
unsigned ShAmt = SA->getZExtValue();
if (ShAmt == 0)
return TLO.CombineTo(Op, Op0);
// If this is ((X << C1) >>u ShAmt), see if we can simplify this into a
// single shift. We can do this if the top bits (which are shifted out)
// are never demanded.
// TODO - support non-uniform vector amounts.
if (Op0.getOpcode() == ISD::SHL) {
if (!DemandedBits.intersects(APInt::getHighBitsSet(BitWidth, ShAmt))) {
if (const APInt *SA2 =
TLO.DAG.getValidShiftAmountConstant(Op0, DemandedElts)) {
unsigned C1 = SA2->getZExtValue();
unsigned Opc = ISD::SRL;
int Diff = ShAmt - C1;
if (Diff < 0) {
Diff = -Diff;
Opc = ISD::SHL;
}
SDValue NewSA = TLO.DAG.getConstant(Diff, dl, ShiftVT);
return TLO.CombineTo(
Op, TLO.DAG.getNode(Opc, dl, VT, Op0.getOperand(0), NewSA));
}
}
}
APInt InDemandedMask = (DemandedBits << ShAmt);
// If the shift is exact, then it does demand the low bits (and knows that
// they are zero).
if (Op->getFlags().hasExact())
InDemandedMask.setLowBits(ShAmt);
// Compute the new bits that are at the top now.
if (SimplifyDemandedBits(Op0, InDemandedMask, DemandedElts, Known, TLO,
Depth + 1))
return true;
assert(!Known.hasConflict() && "Bits known to be one AND zero?");
Known.Zero.lshrInPlace(ShAmt);
Known.One.lshrInPlace(ShAmt);
// High bits known zero.
Known.Zero.setHighBits(ShAmt);
}
break;
}
case ISD::SRA: {
SDValue Op0 = Op.getOperand(0);
SDValue Op1 = Op.getOperand(1);
EVT ShiftVT = Op1.getValueType();
// If we only want bits that already match the signbit then we don't need
// to shift.
unsigned NumHiDemandedBits = BitWidth - DemandedBits.countTrailingZeros();
if (TLO.DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1) >=
NumHiDemandedBits)
return TLO.CombineTo(Op, Op0);
// If this is an arithmetic shift right and only the low-bit is set, we can
// always convert this into a logical shr, even if the shift amount is
// variable. The low bit of the shift cannot be an input sign bit unless
// the shift amount is >= the size of the datatype, which is undefined.
if (DemandedBits.isOneValue())
return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, VT, Op0, Op1));
if (const APInt *SA =
TLO.DAG.getValidShiftAmountConstant(Op, DemandedElts)) {
unsigned ShAmt = SA->getZExtValue();
if (ShAmt == 0)
return TLO.CombineTo(Op, Op0);
APInt InDemandedMask = (DemandedBits << ShAmt);
// If the shift is exact, then it does demand the low bits (and knows that
// they are zero).
if (Op->getFlags().hasExact())
InDemandedMask.setLowBits(ShAmt);
// If any of the demanded bits are produced by the sign extension, we also
// demand the input sign bit.
if (DemandedBits.countLeadingZeros() < ShAmt)
InDemandedMask.setSignBit();
if (SimplifyDemandedBits(Op0, InDemandedMask, DemandedElts, Known, TLO,
Depth + 1))
return true;
assert(!Known.hasConflict() && "Bits known to be one AND zero?");
Known.Zero.lshrInPlace(ShAmt);
Known.One.lshrInPlace(ShAmt);
// If the input sign bit is known to be zero, or if none of the top bits
// are demanded, turn this into an unsigned shift right.
if (Known.Zero[BitWidth - ShAmt - 1] ||
DemandedBits.countLeadingZeros() >= ShAmt) {
SDNodeFlags Flags;
Flags.setExact(Op->getFlags().hasExact());
return TLO.CombineTo(
Op, TLO.DAG.getNode(ISD::SRL, dl, VT, Op0, Op1, Flags));
}
int Log2 = DemandedBits.exactLogBase2();
if (Log2 >= 0) {
// The bit must come from the sign.
SDValue NewSA = TLO.DAG.getConstant(BitWidth - 1 - Log2, dl, ShiftVT);
return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, VT, Op0, NewSA));
}
if (Known.One[BitWidth - ShAmt - 1])
// New bits are known one.
Known.One.setHighBits(ShAmt);
// Attempt to avoid multi-use ops if we don't need anything from them.
if (!InDemandedMask.isAllOnesValue() || !DemandedElts.isAllOnesValue()) {
SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
Op0, InDemandedMask, DemandedElts, TLO.DAG, Depth + 1);
if (DemandedOp0) {
SDValue NewOp = TLO.DAG.getNode(ISD::SRA, dl, VT, DemandedOp0, Op1);
return TLO.CombineTo(Op, NewOp);
}
}
}
break;
}
case ISD::FSHL:
case ISD::FSHR: {
SDValue Op0 = Op.getOperand(0);
SDValue Op1 = Op.getOperand(1);
SDValue Op2 = Op.getOperand(2);
bool IsFSHL = (Op.getOpcode() == ISD::FSHL);
if (ConstantSDNode *SA = isConstOrConstSplat(Op2, DemandedElts)) {
unsigned Amt = SA->getAPIntValue().urem(BitWidth);
// For fshl, 0-shift returns the 1st arg.
// For fshr, 0-shift returns the 2nd arg.
if (Amt == 0) {
if (SimplifyDemandedBits(IsFSHL ? Op0 : Op1, DemandedBits, DemandedElts,
Known, TLO, Depth + 1))
return true;
break;
}
// fshl: (Op0 << Amt) | (Op1 >> (BW - Amt))
// fshr: (Op0 << (BW - Amt)) | (Op1 >> Amt)
APInt Demanded0 = DemandedBits.lshr(IsFSHL ? Amt : (BitWidth - Amt));
APInt Demanded1 = DemandedBits << (IsFSHL ? (BitWidth - Amt) : Amt);
if (SimplifyDemandedBits(Op0, Demanded0, DemandedElts, Known2, TLO,
Depth + 1))
return true;
if (SimplifyDemandedBits(Op1, Demanded1, DemandedElts, Known, TLO,
Depth + 1))
return true;
Known2.One <<= (IsFSHL ? Amt : (BitWidth - Amt));
Known2.Zero <<= (IsFSHL ? Amt : (BitWidth - Amt));
Known.One.lshrInPlace(IsFSHL ? (BitWidth - Amt) : Amt);
Known.Zero.lshrInPlace(IsFSHL ? (BitWidth - Amt) : Amt);
Known.One |= Known2.One;
Known.Zero |= Known2.Zero;
}
// For pow-2 bitwidths we only demand the bottom modulo amt bits.
if (isPowerOf2_32(BitWidth)) {
APInt DemandedAmtBits(Op2.getScalarValueSizeInBits(), BitWidth - 1);
if (SimplifyDemandedBits(Op2, DemandedAmtBits, DemandedElts,
Known2, TLO, Depth + 1))
return true;
}
break;
}
case ISD::ROTL:
case ISD::ROTR: {
SDValue Op0 = Op.getOperand(0);
SDValue Op1 = Op.getOperand(1);
// If we're rotating an 0/-1 value, then it stays an 0/-1 value.
if (BitWidth == TLO.DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1))
return TLO.CombineTo(Op, Op0);
// For pow-2 bitwidths we only demand the bottom modulo amt bits.
if (isPowerOf2_32(BitWidth)) {
APInt DemandedAmtBits(Op1.getScalarValueSizeInBits(), BitWidth - 1);
if (SimplifyDemandedBits(Op1, DemandedAmtBits, DemandedElts, Known2, TLO,
Depth + 1))
return true;
}
break;
}
case ISD::UMIN: {
// Check if one arg is always less than (or equal) to the other arg.
SDValue Op0 = Op.getOperand(0);
SDValue Op1 = Op.getOperand(1);
KnownBits Known0 = TLO.DAG.computeKnownBits(Op0, DemandedElts, Depth + 1);
KnownBits Known1 = TLO.DAG.computeKnownBits(Op1, DemandedElts, Depth + 1);
Known = KnownBits::umin(Known0, Known1);
if (Optional<bool> IsULE = KnownBits::ule(Known0, Known1))
return TLO.CombineTo(Op, IsULE.getValue() ? Op0 : Op1);
if (Optional<bool> IsULT = KnownBits::ult(Known0, Known1))
return TLO.CombineTo(Op, IsULT.getValue() ? Op0 : Op1);
break;
}
case ISD::UMAX: {
// Check if one arg is always greater than (or equal) to the other arg.
SDValue Op0 = Op.getOperand(0);
SDValue Op1 = Op.getOperand(1);
KnownBits Known0 = TLO.DAG.computeKnownBits(Op0, DemandedElts, Depth + 1);
KnownBits Known1 = TLO.DAG.computeKnownBits(Op1, DemandedElts, Depth + 1);
Known = KnownBits::umax(Known0, Known1);
if (Optional<bool> IsUGE = KnownBits::uge(Known0, Known1))
return TLO.CombineTo(Op, IsUGE.getValue() ? Op0 : Op1);
if (Optional<bool> IsUGT = KnownBits::ugt(Known0, Known1))
return TLO.CombineTo(Op, IsUGT.getValue() ? Op0 : Op1);
break;
}
case ISD::BITREVERSE: {
SDValue Src = Op.getOperand(0);
APInt DemandedSrcBits = DemandedBits.reverseBits();
if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedElts, Known2, TLO,
Depth + 1))
return true;
Known.One = Known2.One.reverseBits();
Known.Zero = Known2.Zero.reverseBits();
break;
}
case ISD::BSWAP: {
SDValue Src = Op.getOperand(0);
APInt DemandedSrcBits = DemandedBits.byteSwap();
if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedElts, Known2, TLO,
Depth + 1))
return true;
Known.One = Known2.One.byteSwap();
Known.Zero = Known2.Zero.byteSwap();
break;
}
case ISD::CTPOP: {
// If only 1 bit is demanded, replace with PARITY as long as we're before
// op legalization.
// FIXME: Limit to scalars for now.
if (DemandedBits.isOneValue() && !TLO.LegalOps && !VT.isVector())
return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::PARITY, dl, VT,
Op.getOperand(0)));
Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
break;
}
case ISD::SIGN_EXTEND_INREG: {
SDValue Op0 = Op.getOperand(0);
EVT ExVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
unsigned ExVTBits = ExVT.getScalarSizeInBits();
// If we only care about the highest bit, don't bother shifting right.
if (DemandedBits.isSignMask()) {
unsigned NumSignBits =
TLO.DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1);
bool AlreadySignExtended = NumSignBits >= BitWidth - ExVTBits + 1;
// However if the input is already sign extended we expect the sign
// extension to be dropped altogether later and do not simplify.
if (!AlreadySignExtended) {
// Compute the correct shift amount type, which must be getShiftAmountTy
// for scalar types after legalization.
EVT ShiftAmtTy = VT;
if (TLO.LegalTypes() && !ShiftAmtTy.isVector())
ShiftAmtTy = getShiftAmountTy(ShiftAmtTy, DL);
SDValue ShiftAmt =
TLO.DAG.getConstant(BitWidth - ExVTBits, dl, ShiftAmtTy);
return TLO.CombineTo(Op,
TLO.DAG.getNode(ISD::SHL, dl, VT, Op0, ShiftAmt));
}
}
// If none of the extended bits are demanded, eliminate the sextinreg.
if (DemandedBits.getActiveBits() <= ExVTBits)
return TLO.CombineTo(Op, Op0);
APInt InputDemandedBits = DemandedBits.getLoBits(ExVTBits);
// Since the sign extended bits are demanded, we know that the sign
// bit is demanded.
InputDemandedBits.setBit(ExVTBits - 1);
if (SimplifyDemandedBits(Op0, InputDemandedBits, Known, TLO, Depth + 1))
return true;
assert(!Known.hasConflict() && "Bits known to be one AND zero?");
// If the sign bit of the input is known set or clear, then we know the
// top bits of the result.
// If the input sign bit is known zero, convert this into a zero extension.
if (Known.Zero[ExVTBits - 1])
return TLO.CombineTo(Op, TLO.DAG.getZeroExtendInReg(Op0, dl, ExVT));
APInt Mask = APInt::getLowBitsSet(BitWidth, ExVTBits);
if (Known.One[ExVTBits - 1]) { // Input sign bit known set
Known.One.setBitsFrom(ExVTBits);
Known.Zero &= Mask;
} else { // Input sign bit unknown
Known.Zero &= Mask;
Known.One &= Mask;
}
break;
}
case ISD::BUILD_PAIR: {
EVT HalfVT = Op.getOperand(0).getValueType();
unsigned HalfBitWidth = HalfVT.getScalarSizeInBits();
APInt MaskLo = DemandedBits.getLoBits(HalfBitWidth).trunc(HalfBitWidth);
APInt MaskHi = DemandedBits.getHiBits(HalfBitWidth).trunc(HalfBitWidth);
KnownBits KnownLo, KnownHi;
if (SimplifyDemandedBits(Op.getOperand(0), MaskLo, KnownLo, TLO, Depth + 1))
return true;
if (SimplifyDemandedBits(Op.getOperand(1), MaskHi, KnownHi, TLO, Depth + 1))
return true;
Known.Zero = KnownLo.Zero.zext(BitWidth) |
KnownHi.Zero.zext(BitWidth).shl(HalfBitWidth);
Known.One = KnownLo.One.zext(BitWidth) |
KnownHi.One.zext(BitWidth).shl(HalfBitWidth);
break;
}
case ISD::ZERO_EXTEND:
case ISD::ZERO_EXTEND_VECTOR_INREG: {
SDValue Src = Op.getOperand(0);
EVT SrcVT = Src.getValueType();
unsigned InBits = SrcVT.getScalarSizeInBits();
unsigned InElts = SrcVT.isVector() ? SrcVT.getVectorNumElements() : 1;
bool IsVecInReg = Op.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG;
// If none of the top bits are demanded, convert this into an any_extend.
if (DemandedBits.getActiveBits() <= InBits) {
// If we only need the non-extended bits of the bottom element
// then we can just bitcast to the result.
if (IsVecInReg && DemandedElts == 1 &&
VT.getSizeInBits() == SrcVT.getSizeInBits() &&
TLO.DAG.getDataLayout().isLittleEndian())
return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Src));
unsigned Opc =
IsVecInReg ? ISD::ANY_EXTEND_VECTOR_INREG : ISD::ANY_EXTEND;
if (!TLO.LegalOperations() || isOperationLegal(Opc, VT))
return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT, Src));
}
APInt InDemandedBits = DemandedBits.trunc(InBits);
APInt InDemandedElts = DemandedElts.zextOrSelf(InElts);
if (SimplifyDemandedBits(Src, InDemandedBits, InDemandedElts, Known, TLO,
Depth + 1))
return true;
assert(!Known.hasConflict() && "Bits known to be one AND zero?");
assert(Known.getBitWidth() == InBits && "Src width has changed?");
Known = Known.zext(BitWidth);
// Attempt to avoid multi-use ops if we don't need anything from them.
if (SDValue NewSrc = SimplifyMultipleUseDemandedBits(
Src, InDemandedBits, InDemandedElts, TLO.DAG, Depth + 1))
return TLO.CombineTo(Op, TLO.DAG.getNode(Op.getOpcode(), dl, VT, NewSrc));
break;
}
case ISD::SIGN_EXTEND:
case ISD::SIGN_EXTEND_VECTOR_INREG: {
SDValue Src = Op.getOperand(0);
EVT SrcVT = Src.getValueType();
unsigned InBits = SrcVT.getScalarSizeInBits();
unsigned InElts = SrcVT.isVector() ? SrcVT.getVectorNumElements() : 1;
bool IsVecInReg = Op.getOpcode() == ISD::SIGN_EXTEND_VECTOR_INREG;
// If none of the top bits are demanded, convert this into an any_extend.
if (DemandedBits.getActiveBits() <= InBits) {
// If we only need the non-extended bits of the bottom element
// then we can just bitcast to the result.
if (IsVecInReg && DemandedElts == 1 &&
VT.getSizeInBits() == SrcVT.getSizeInBits() &&
TLO.DAG.getDataLayout().isLittleEndian())
return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Src));
unsigned Opc =
IsVecInReg ? ISD::ANY_EXTEND_VECTOR_INREG : ISD::ANY_EXTEND;
if (!TLO.LegalOperations() || isOperationLegal(Opc, VT))
return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT, Src));
}
APInt InDemandedBits = DemandedBits.trunc(InBits);
APInt InDemandedElts = DemandedElts.zextOrSelf(InElts);
// Since some of the sign extended bits are demanded, we know that the sign
// bit is demanded.
InDemandedBits.setBit(InBits - 1);
if (SimplifyDemandedBits(Src, InDemandedBits, InDemandedElts, Known, TLO,
Depth + 1))
return true;
assert(!Known.hasConflict() && "Bits known to be one AND zero?");
assert(Known.getBitWidth() == InBits && "Src width has changed?");
// If the sign bit is known one, the top bits match.
Known = Known.sext(BitWidth);
// If the sign bit is known zero, convert this to a zero extend.
if (Known.isNonNegative()) {
unsigned Opc =
IsVecInReg ? ISD::ZERO_EXTEND_VECTOR_INREG : ISD::ZERO_EXTEND;
if (!TLO.LegalOperations() || isOperationLegal(Opc, VT))
return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT, Src));
}
// Attempt to avoid multi-use ops if we don't need anything from them.
if (SDValue NewSrc = SimplifyMultipleUseDemandedBits(
Src, InDemandedBits, InDemandedElts, TLO.DAG, Depth + 1))
return TLO.CombineTo(Op, TLO.DAG.getNode(Op.getOpcode(), dl, VT, NewSrc));
break;
}
case ISD::ANY_EXTEND:
case ISD::ANY_EXTEND_VECTOR_INREG: {
SDValue Src = Op.getOperand(0);
EVT SrcVT = Src.getValueType();
unsigned InBits = SrcVT.getScalarSizeInBits();
unsigned InElts = SrcVT.isVector() ? SrcVT.getVectorNumElements() : 1;
bool IsVecInReg = Op.getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG;
// If we only need the bottom element then we can just bitcast.
// TODO: Handle ANY_EXTEND?
if (IsVecInReg && DemandedElts == 1 &&
VT.getSizeInBits() == SrcVT.getSizeInBits() &&
TLO.DAG.getDataLayout().isLittleEndian())
return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Src));
APInt InDemandedBits = DemandedBits.trunc(InBits);
APInt InDemandedElts = DemandedElts.zextOrSelf(InElts);
if (SimplifyDemandedBits(Src, InDemandedBits, InDemandedElts, Known, TLO,
Depth + 1))
return true;
assert(!Known.hasConflict() && "Bits known to be one AND zero?");
assert(Known.getBitWidth() == InBits && "Src width has changed?");
Known = Known.anyext(BitWidth);
// Attempt to avoid multi-use ops if we don't need anything from them.
if (SDValue NewSrc = SimplifyMultipleUseDemandedBits(
Src, InDemandedBits, InDemandedElts, TLO.DAG, Depth + 1))
return TLO.CombineTo(Op, TLO.DAG.getNode(Op.getOpcode(), dl, VT, NewSrc));
break;
}
case ISD::TRUNCATE: {
SDValue Src = Op.getOperand(0);
// Simplify the input, using demanded bit information, and compute the known
// zero/one bits live out.
unsigned OperandBitWidth = Src.getScalarValueSizeInBits();
APInt TruncMask = DemandedBits.zext(OperandBitWidth);
if (SimplifyDemandedBits(Src, TruncMask, DemandedElts, Known, TLO,
Depth + 1))
return true;
Known = Known.trunc(BitWidth);
// Attempt to avoid multi-use ops if we don't need anything from them.
if (SDValue NewSrc = SimplifyMultipleUseDemandedBits(
Src, TruncMask, DemandedElts, TLO.DAG, Depth + 1))
return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::TRUNCATE, dl, VT, NewSrc));
// If the input is only used by this truncate, see if we can shrink it based
// on the known demanded bits.
if (Src.getNode()->hasOneUse()) {
switch (Src.getOpcode()) {
default:
break;
case ISD::SRL:
// Shrink SRL by a constant if none of the high bits shifted in are
// demanded.
if (TLO.LegalTypes() && !isTypeDesirableForOp(ISD::SRL, VT))
// Do not turn (vt1 truncate (vt2 srl)) into (vt1 srl) if vt1 is
// undesirable.
break;
const APInt *ShAmtC =
TLO.DAG.getValidShiftAmountConstant(Src, DemandedElts);
if (!ShAmtC || ShAmtC->uge(BitWidth))
break;
uint64_t ShVal = ShAmtC->getZExtValue();
APInt HighBits =
APInt::getHighBitsSet(OperandBitWidth, OperandBitWidth - BitWidth);
HighBits.lshrInPlace(ShVal);
HighBits = HighBits.trunc(BitWidth);
if (!(HighBits & DemandedBits)) {
// None of the shifted in bits are needed. Add a truncate of the
// shift input, then shift it.
SDValue NewShAmt = TLO.DAG.getConstant(
ShVal, dl, getShiftAmountTy(VT, DL, TLO.LegalTypes()));
SDValue NewTrunc =
TLO.DAG.getNode(ISD::TRUNCATE, dl, VT, Src.getOperand(0));
return TLO.CombineTo(
Op, TLO.DAG.getNode(ISD::SRL, dl, VT, NewTrunc, NewShAmt));
}
break;
}
}
assert(!Known.hasConflict() && "Bits known to be one AND zero?");
break;
}
case ISD::AssertZext: {
// AssertZext demands all of the high bits, plus any of the low bits
// demanded by its users.
EVT ZVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
APInt InMask = APInt::getLowBitsSet(BitWidth, ZVT.getSizeInBits());
if (SimplifyDemandedBits(Op.getOperand(0), ~InMask | DemandedBits, Known,
TLO, Depth + 1))
return true;
assert(!Known.hasConflict() && "Bits known to be one AND zero?");
Known.Zero |= ~InMask;
break;
}
case ISD::EXTRACT_VECTOR_ELT: {
SDValue Src = Op.getOperand(0);
SDValue Idx = Op.getOperand(1);
ElementCount SrcEltCnt = Src.getValueType().getVectorElementCount();
unsigned EltBitWidth = Src.getScalarValueSizeInBits();
if (SrcEltCnt.isScalable())
return false;
// Demand the bits from every vector element without a constant index.
unsigned NumSrcElts = SrcEltCnt.getFixedValue();
APInt DemandedSrcElts = APInt::getAllOnesValue(NumSrcElts);
if (auto *CIdx = dyn_cast<ConstantSDNode>(Idx))
if (CIdx->getAPIntValue().ult(NumSrcElts))
DemandedSrcElts = APInt::getOneBitSet(NumSrcElts, CIdx->getZExtValue());
// If BitWidth > EltBitWidth the value is anyext:ed. So we do not know
// anything about the extended bits.
APInt DemandedSrcBits = DemandedBits;
if (BitWidth > EltBitWidth)
DemandedSrcBits = DemandedSrcBits.trunc(EltBitWidth);
if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedSrcElts, Known2, TLO,
Depth + 1))
return true;
// Attempt to avoid multi-use ops if we don't need anything from them.
if (!DemandedSrcBits.isAllOnesValue() ||
!DemandedSrcElts.isAllOnesValue()) {
if (SDValue DemandedSrc = SimplifyMultipleUseDemandedBits(
Src, DemandedSrcBits, DemandedSrcElts, TLO.DAG, Depth + 1)) {
SDValue NewOp =
TLO.DAG.getNode(Op.getOpcode(), dl, VT, DemandedSrc, Idx);
return TLO.CombineTo(Op, NewOp);
}
}
Known = Known2;
if (BitWidth > EltBitWidth)
Known = Known.anyext(BitWidth);
break;
}
case ISD::BITCAST: {
SDValue Src = Op.getOperand(0);
EVT SrcVT = Src.getValueType();
unsigned NumSrcEltBits = SrcVT.getScalarSizeInBits();
// If this is an FP->Int bitcast and if the sign bit is the only
// thing demanded, turn this into a FGETSIGN.
if (!TLO.LegalOperations() && !VT.isVector() && !SrcVT.isVector() &&
DemandedBits == APInt::getSignMask(Op.getValueSizeInBits()) &&
SrcVT.isFloatingPoint()) {
bool OpVTLegal = isOperationLegalOrCustom(ISD::FGETSIGN, VT);
bool i32Legal = isOperationLegalOrCustom(ISD::FGETSIGN, MVT::i32);
if ((OpVTLegal || i32Legal) && VT.isSimple() && SrcVT != MVT::f16 &&
SrcVT != MVT::f128) {
// Cannot eliminate/lower SHL for f128 yet.
EVT Ty = OpVTLegal ? VT : MVT::i32;
// Make a FGETSIGN + SHL to move the sign bit into the appropriate
// place. We expect the SHL to be eliminated by other optimizations.
SDValue Sign = TLO.DAG.getNode(ISD::FGETSIGN, dl, Ty, Src);
unsigned OpVTSizeInBits = Op.getValueSizeInBits();
if (!OpVTLegal && OpVTSizeInBits > 32)
Sign = TLO.DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Sign);
unsigned ShVal = Op.getValueSizeInBits() - 1;
SDValue ShAmt = TLO.DAG.getConstant(ShVal, dl, VT);
return TLO.CombineTo(Op,
TLO.DAG.getNode(ISD::SHL, dl, VT, Sign, ShAmt));
}
}
// Bitcast from a vector using SimplifyDemanded Bits/VectorElts.
// Demand the elt/bit if any of the original elts/bits are demanded.
// TODO - bigendian once we have test coverage.
if (SrcVT.isVector() && (BitWidth % NumSrcEltBits) == 0 &&
TLO.DAG.getDataLayout().isLittleEndian()) {
unsigned Scale = BitWidth / NumSrcEltBits;
unsigned NumSrcElts = SrcVT.getVectorNumElements();
APInt DemandedSrcBits = APInt::getNullValue(NumSrcEltBits);
APInt DemandedSrcElts = APInt::getNullValue(NumSrcElts);
for (unsigned i = 0; i != Scale; ++i) {
unsigned Offset = i * NumSrcEltBits;
APInt Sub = DemandedBits.extractBits(NumSrcEltBits, Offset);
if (!Sub.isNullValue()) {
DemandedSrcBits |= Sub;
for (unsigned j = 0; j != NumElts; ++j)
if (DemandedElts[j])
DemandedSrcElts.setBit((j * Scale) + i);
}
}
APInt KnownSrcUndef, KnownSrcZero;
if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, KnownSrcUndef,
KnownSrcZero, TLO, Depth + 1))
return true;
KnownBits KnownSrcBits;
if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedSrcElts,
KnownSrcBits, TLO, Depth + 1))
return true;
} else if ((NumSrcEltBits % BitWidth) == 0 &&
TLO.DAG.getDataLayout().isLittleEndian()) {
unsigned Scale = NumSrcEltBits / BitWidth;
unsigned NumSrcElts = SrcVT.isVector() ? SrcVT.getVectorNumElements() : 1;
APInt DemandedSrcBits = APInt::getNullValue(NumSrcEltBits);
APInt DemandedSrcElts = APInt::getNullValue(NumSrcElts);
for (unsigned i = 0; i != NumElts; ++i)
if (DemandedElts[i]) {
unsigned Offset = (i % Scale) * BitWidth;
DemandedSrcBits.insertBits(DemandedBits, Offset);
DemandedSrcElts.setBit(i / Scale);
}
if (SrcVT.isVector()) {
APInt KnownSrcUndef, KnownSrcZero;
if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, KnownSrcUndef,
KnownSrcZero, TLO, Depth + 1))
return true;
}
KnownBits KnownSrcBits;
if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedSrcElts,
KnownSrcBits, TLO, Depth + 1))
return true;
}
// If this is a bitcast, let computeKnownBits handle it. Only do this on a
// recursive call where Known may be useful to the caller.
if (Depth > 0) {
Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
return false;
}
break;
}
case ISD::ADD:
case ISD::MUL:
case ISD::SUB: {
// Add, Sub, and Mul don't demand any bits in positions beyond that
// of the highest bit demanded of them.
SDValue Op0 = Op.getOperand(0), Op1 = Op.getOperand(1);
SDNodeFlags Flags = Op.getNode()->getFlags();
unsigned DemandedBitsLZ = DemandedBits.countLeadingZeros();
APInt LoMask = APInt::getLowBitsSet(BitWidth, BitWidth - DemandedBitsLZ);
if (SimplifyDemandedBits(Op0, LoMask, DemandedElts, Known2, TLO,
Depth + 1) ||
SimplifyDemandedBits(Op1, LoMask, DemandedElts, Known2, TLO,
Depth + 1) ||
// See if the operation should be performed at a smaller bit width.
ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO)) {
if (Flags.hasNoSignedWrap() || Flags.hasNoUnsignedWrap()) {
// Disable the nsw and nuw flags. We can no longer guarantee that we
// won't wrap after simplification.
Flags.setNoSignedWrap(false);
Flags.setNoUnsignedWrap(false);
SDValue NewOp =
TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1, Flags);
return TLO.CombineTo(Op, NewOp);
}
return true;
}
// Attempt to avoid multi-use ops if we don't need anything from them.
if (!LoMask.isAllOnesValue() || !DemandedElts.isAllOnesValue()) {
SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
Op0, LoMask, DemandedElts, TLO.DAG, Depth + 1);
SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(