blob: 4c443600f9413cdee8f99cae82c2546438084bd2 [file] [log] [blame]
//===-- TargetLowering.cpp - Implement the TargetLowering class -----------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This implements the TargetLowering class.
//
//===----------------------------------------------------------------------===//
#include "llvm/CodeGen/TargetLowering.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/Analysis/VectorUtils.h"
#include "llvm/CodeGen/CallingConvLower.h"
#include "llvm/CodeGen/CodeGenCommonISel.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineJumpTableInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/GlobalVariable.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/Support/DivisionByConstantInfo.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/KnownBits.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Target/TargetMachine.h"
#include <cctype>
using namespace llvm;
/// NOTE: The TargetMachine owns TLOF.
TargetLowering::TargetLowering(const TargetMachine &tm)
: TargetLoweringBase(tm) {}
const char *TargetLowering::getTargetNodeName(unsigned Opcode) const {
return nullptr;
}
bool TargetLowering::isPositionIndependent() const {
return getTargetMachine().isPositionIndependent();
}
/// Check whether a given call node is in tail position within its function. If
/// so, it sets Chain to the input chain of the tail call.
bool TargetLowering::isInTailCallPosition(SelectionDAG &DAG, SDNode *Node,
SDValue &Chain) const {
const Function &F = DAG.getMachineFunction().getFunction();
// First, check if tail calls have been disabled in this function.
if (F.getFnAttribute("disable-tail-calls").getValueAsBool())
return false;
// Conservatively require the attributes of the call to match those of
// the return. Ignore following attributes because they don't affect the
// call sequence.
AttrBuilder CallerAttrs(F.getContext(), F.getAttributes().getRetAttrs());
for (const auto &Attr : {Attribute::Alignment, Attribute::Dereferenceable,
Attribute::DereferenceableOrNull, Attribute::NoAlias,
Attribute::NonNull, Attribute::NoUndef})
CallerAttrs.removeAttribute(Attr);
if (CallerAttrs.hasAttributes())
return false;
// It's not safe to eliminate the sign / zero extension of the return value.
if (CallerAttrs.contains(Attribute::ZExt) ||
CallerAttrs.contains(Attribute::SExt))
return false;
// Check if the only use is a function return node.
return isUsedByReturnOnly(Node, Chain);
}
bool TargetLowering::parametersInCSRMatch(const MachineRegisterInfo &MRI,
const uint32_t *CallerPreservedMask,
const SmallVectorImpl<CCValAssign> &ArgLocs,
const SmallVectorImpl<SDValue> &OutVals) const {
for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) {
const CCValAssign &ArgLoc = ArgLocs[I];
if (!ArgLoc.isRegLoc())
continue;
MCRegister Reg = ArgLoc.getLocReg();
// Only look at callee saved registers.
if (MachineOperand::clobbersPhysReg(CallerPreservedMask, Reg))
continue;
// Check that we pass the value used for the caller.
// (We look for a CopyFromReg reading a virtual register that is used
// for the function live-in value of register Reg)
SDValue Value = OutVals[I];
if (Value->getOpcode() == ISD::AssertZext)
Value = Value.getOperand(0);
if (Value->getOpcode() != ISD::CopyFromReg)
return false;
Register ArgReg = cast<RegisterSDNode>(Value->getOperand(1))->getReg();
if (MRI.getLiveInPhysReg(ArgReg) != Reg)
return false;
}
return true;
}
/// Set CallLoweringInfo attribute flags based on a call instruction
/// and called function attributes.
void TargetLoweringBase::ArgListEntry::setAttributes(const CallBase *Call,
unsigned ArgIdx) {
IsSExt = Call->paramHasAttr(ArgIdx, Attribute::SExt);
IsZExt = Call->paramHasAttr(ArgIdx, Attribute::ZExt);
IsInReg = Call->paramHasAttr(ArgIdx, Attribute::InReg);
IsSRet = Call->paramHasAttr(ArgIdx, Attribute::StructRet);
IsNest = Call->paramHasAttr(ArgIdx, Attribute::Nest);
IsByVal = Call->paramHasAttr(ArgIdx, Attribute::ByVal);
IsPreallocated = Call->paramHasAttr(ArgIdx, Attribute::Preallocated);
IsInAlloca = Call->paramHasAttr(ArgIdx, Attribute::InAlloca);
IsReturned = Call->paramHasAttr(ArgIdx, Attribute::Returned);
IsSwiftSelf = Call->paramHasAttr(ArgIdx, Attribute::SwiftSelf);
IsSwiftAsync = Call->paramHasAttr(ArgIdx, Attribute::SwiftAsync);
IsSwiftError = Call->paramHasAttr(ArgIdx, Attribute::SwiftError);
Alignment = Call->getParamStackAlign(ArgIdx);
IndirectType = nullptr;
assert(IsByVal + IsPreallocated + IsInAlloca + IsSRet <= 1 &&
"multiple ABI attributes?");
if (IsByVal) {
IndirectType = Call->getParamByValType(ArgIdx);
if (!Alignment)
Alignment = Call->getParamAlign(ArgIdx);
}
if (IsPreallocated)
IndirectType = Call->getParamPreallocatedType(ArgIdx);
if (IsInAlloca)
IndirectType = Call->getParamInAllocaType(ArgIdx);
if (IsSRet)
IndirectType = Call->getParamStructRetType(ArgIdx);
}
/// Generate a libcall taking the given operands as arguments and returning a
/// result of type RetVT.
std::pair<SDValue, SDValue>
TargetLowering::makeLibCall(SelectionDAG &DAG, RTLIB::Libcall LC, EVT RetVT,
ArrayRef<SDValue> Ops,
MakeLibCallOptions CallOptions,
const SDLoc &dl,
SDValue InChain) const {
if (!InChain)
InChain = DAG.getEntryNode();
TargetLowering::ArgListTy Args;
Args.reserve(Ops.size());
TargetLowering::ArgListEntry Entry;
for (unsigned i = 0; i < Ops.size(); ++i) {
SDValue NewOp = Ops[i];
Entry.Node = NewOp;
Entry.Ty = Entry.Node.getValueType().getTypeForEVT(*DAG.getContext());
Entry.IsSExt = shouldSignExtendTypeInLibCall(NewOp.getValueType(),
CallOptions.IsSExt);
Entry.IsZExt = !Entry.IsSExt;
if (CallOptions.IsSoften &&
!shouldExtendTypeInLibCall(CallOptions.OpsVTBeforeSoften[i])) {
Entry.IsSExt = Entry.IsZExt = false;
}
Args.push_back(Entry);
}
if (LC == RTLIB::UNKNOWN_LIBCALL)
report_fatal_error("Unsupported library call operation!");
SDValue Callee = DAG.getExternalSymbol(getLibcallName(LC),
getPointerTy(DAG.getDataLayout()));
Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext());
TargetLowering::CallLoweringInfo CLI(DAG);
bool signExtend = shouldSignExtendTypeInLibCall(RetVT, CallOptions.IsSExt);
bool zeroExtend = !signExtend;
if (CallOptions.IsSoften &&
!shouldExtendTypeInLibCall(CallOptions.RetVTBeforeSoften)) {
signExtend = zeroExtend = false;
}
CLI.setDebugLoc(dl)
.setChain(InChain)
.setLibCallee(getLibcallCallingConv(LC), RetTy, Callee, std::move(Args))
.setNoReturn(CallOptions.DoesNotReturn)
.setDiscardResult(!CallOptions.IsReturnValueUsed)
.setIsPostTypeLegalization(CallOptions.IsPostTypeLegalization)
.setSExtResult(signExtend)
.setZExtResult(zeroExtend);
return LowerCallTo(CLI);
}
bool TargetLowering::findOptimalMemOpLowering(
std::vector<EVT> &MemOps, unsigned Limit, const MemOp &Op, unsigned DstAS,
unsigned SrcAS, const AttributeList &FuncAttributes) const {
if (Limit != ~unsigned(0) && Op.isMemcpyWithFixedDstAlign() &&
Op.getSrcAlign() < Op.getDstAlign())
return false;
EVT VT = getOptimalMemOpType(Op, FuncAttributes);
if (VT == MVT::Other) {
// Use the largest integer type whose alignment constraints are satisfied.
// We only need to check DstAlign here as SrcAlign is always greater or
// equal to DstAlign (or zero).
VT = MVT::i64;
if (Op.isFixedDstAlign())
while (Op.getDstAlign() < (VT.getSizeInBits() / 8) &&
!allowsMisalignedMemoryAccesses(VT, DstAS, Op.getDstAlign()))
VT = (MVT::SimpleValueType)(VT.getSimpleVT().SimpleTy - 1);
assert(VT.isInteger());
// Find the largest legal integer type.
MVT LVT = MVT::i64;
while (!isTypeLegal(LVT))
LVT = (MVT::SimpleValueType)(LVT.SimpleTy - 1);
assert(LVT.isInteger());
// If the type we've chosen is larger than the largest legal integer type
// then use that instead.
if (VT.bitsGT(LVT))
VT = LVT;
}
unsigned NumMemOps = 0;
uint64_t Size = Op.size();
while (Size) {
unsigned VTSize = VT.getSizeInBits() / 8;
while (VTSize > Size) {
// For now, only use non-vector load / store's for the left-over pieces.
EVT NewVT = VT;
unsigned NewVTSize;
bool Found = false;
if (VT.isVector() || VT.isFloatingPoint()) {
NewVT = (VT.getSizeInBits() > 64) ? MVT::i64 : MVT::i32;
if (isOperationLegalOrCustom(ISD::STORE, NewVT) &&
isSafeMemOpType(NewVT.getSimpleVT()))
Found = true;
else if (NewVT == MVT::i64 &&
isOperationLegalOrCustom(ISD::STORE, MVT::f64) &&
isSafeMemOpType(MVT::f64)) {
// i64 is usually not legal on 32-bit targets, but f64 may be.
NewVT = MVT::f64;
Found = true;
}
}
if (!Found) {
do {
NewVT = (MVT::SimpleValueType)(NewVT.getSimpleVT().SimpleTy - 1);
if (NewVT == MVT::i8)
break;
} while (!isSafeMemOpType(NewVT.getSimpleVT()));
}
NewVTSize = NewVT.getSizeInBits() / 8;
// If the new VT cannot cover all of the remaining bits, then consider
// issuing a (or a pair of) unaligned and overlapping load / store.
unsigned Fast;
if (NumMemOps && Op.allowOverlap() && NewVTSize < Size &&
allowsMisalignedMemoryAccesses(
VT, DstAS, Op.isFixedDstAlign() ? Op.getDstAlign() : Align(1),
MachineMemOperand::MONone, &Fast) &&
Fast)
VTSize = Size;
else {
VT = NewVT;
VTSize = NewVTSize;
}
}
if (++NumMemOps > Limit)
return false;
MemOps.push_back(VT);
Size -= VTSize;
}
return true;
}
/// Soften the operands of a comparison. This code is shared among BR_CC,
/// SELECT_CC, and SETCC handlers.
void TargetLowering::softenSetCCOperands(SelectionDAG &DAG, EVT VT,
SDValue &NewLHS, SDValue &NewRHS,
ISD::CondCode &CCCode,
const SDLoc &dl, const SDValue OldLHS,
const SDValue OldRHS) const {
SDValue Chain;
return softenSetCCOperands(DAG, VT, NewLHS, NewRHS, CCCode, dl, OldLHS,
OldRHS, Chain);
}
void TargetLowering::softenSetCCOperands(SelectionDAG &DAG, EVT VT,
SDValue &NewLHS, SDValue &NewRHS,
ISD::CondCode &CCCode,
const SDLoc &dl, const SDValue OldLHS,
const SDValue OldRHS,
SDValue &Chain,
bool IsSignaling) const {
// FIXME: Currently we cannot really respect all IEEE predicates due to libgcc
// not supporting it. We can update this code when libgcc provides such
// functions.
assert((VT == MVT::f32 || VT == MVT::f64 || VT == MVT::f128 || VT == MVT::ppcf128)
&& "Unsupported setcc type!");
// Expand into one or more soft-fp libcall(s).
RTLIB::Libcall LC1 = RTLIB::UNKNOWN_LIBCALL, LC2 = RTLIB::UNKNOWN_LIBCALL;
bool ShouldInvertCC = false;
switch (CCCode) {
case ISD::SETEQ:
case ISD::SETOEQ:
LC1 = (VT == MVT::f32) ? RTLIB::OEQ_F32 :
(VT == MVT::f64) ? RTLIB::OEQ_F64 :
(VT == MVT::f128) ? RTLIB::OEQ_F128 : RTLIB::OEQ_PPCF128;
break;
case ISD::SETNE:
case ISD::SETUNE:
LC1 = (VT == MVT::f32) ? RTLIB::UNE_F32 :
(VT == MVT::f64) ? RTLIB::UNE_F64 :
(VT == MVT::f128) ? RTLIB::UNE_F128 : RTLIB::UNE_PPCF128;
break;
case ISD::SETGE:
case ISD::SETOGE:
LC1 = (VT == MVT::f32) ? RTLIB::OGE_F32 :
(VT == MVT::f64) ? RTLIB::OGE_F64 :
(VT == MVT::f128) ? RTLIB::OGE_F128 : RTLIB::OGE_PPCF128;
break;
case ISD::SETLT:
case ISD::SETOLT:
LC1 = (VT == MVT::f32) ? RTLIB::OLT_F32 :
(VT == MVT::f64) ? RTLIB::OLT_F64 :
(VT == MVT::f128) ? RTLIB::OLT_F128 : RTLIB::OLT_PPCF128;
break;
case ISD::SETLE:
case ISD::SETOLE:
LC1 = (VT == MVT::f32) ? RTLIB::OLE_F32 :
(VT == MVT::f64) ? RTLIB::OLE_F64 :
(VT == MVT::f128) ? RTLIB::OLE_F128 : RTLIB::OLE_PPCF128;
break;
case ISD::SETGT:
case ISD::SETOGT:
LC1 = (VT == MVT::f32) ? RTLIB::OGT_F32 :
(VT == MVT::f64) ? RTLIB::OGT_F64 :
(VT == MVT::f128) ? RTLIB::OGT_F128 : RTLIB::OGT_PPCF128;
break;
case ISD::SETO:
ShouldInvertCC = true;
[[fallthrough]];
case ISD::SETUO:
LC1 = (VT == MVT::f32) ? RTLIB::UO_F32 :
(VT == MVT::f64) ? RTLIB::UO_F64 :
(VT == MVT::f128) ? RTLIB::UO_F128 : RTLIB::UO_PPCF128;
break;
case ISD::SETONE:
// SETONE = O && UNE
ShouldInvertCC = true;
[[fallthrough]];
case ISD::SETUEQ:
LC1 = (VT == MVT::f32) ? RTLIB::UO_F32 :
(VT == MVT::f64) ? RTLIB::UO_F64 :
(VT == MVT::f128) ? RTLIB::UO_F128 : RTLIB::UO_PPCF128;
LC2 = (VT == MVT::f32) ? RTLIB::OEQ_F32 :
(VT == MVT::f64) ? RTLIB::OEQ_F64 :
(VT == MVT::f128) ? RTLIB::OEQ_F128 : RTLIB::OEQ_PPCF128;
break;
default:
// Invert CC for unordered comparisons
ShouldInvertCC = true;
switch (CCCode) {
case ISD::SETULT:
LC1 = (VT == MVT::f32) ? RTLIB::OGE_F32 :
(VT == MVT::f64) ? RTLIB::OGE_F64 :
(VT == MVT::f128) ? RTLIB::OGE_F128 : RTLIB::OGE_PPCF128;
break;
case ISD::SETULE:
LC1 = (VT == MVT::f32) ? RTLIB::OGT_F32 :
(VT == MVT::f64) ? RTLIB::OGT_F64 :
(VT == MVT::f128) ? RTLIB::OGT_F128 : RTLIB::OGT_PPCF128;
break;
case ISD::SETUGT:
LC1 = (VT == MVT::f32) ? RTLIB::OLE_F32 :
(VT == MVT::f64) ? RTLIB::OLE_F64 :
(VT == MVT::f128) ? RTLIB::OLE_F128 : RTLIB::OLE_PPCF128;
break;
case ISD::SETUGE:
LC1 = (VT == MVT::f32) ? RTLIB::OLT_F32 :
(VT == MVT::f64) ? RTLIB::OLT_F64 :
(VT == MVT::f128) ? RTLIB::OLT_F128 : RTLIB::OLT_PPCF128;
break;
default: llvm_unreachable("Do not know how to soften this setcc!");
}
}
// Use the target specific return value for comparison lib calls.
EVT RetVT = getCmpLibcallReturnType();
SDValue Ops[2] = {NewLHS, NewRHS};
TargetLowering::MakeLibCallOptions CallOptions;
EVT OpsVT[2] = { OldLHS.getValueType(),
OldRHS.getValueType() };
CallOptions.setTypeListBeforeSoften(OpsVT, RetVT, true);
auto Call = makeLibCall(DAG, LC1, RetVT, Ops, CallOptions, dl, Chain);
NewLHS = Call.first;
NewRHS = DAG.getConstant(0, dl, RetVT);
CCCode = getCmpLibcallCC(LC1);
if (ShouldInvertCC) {
assert(RetVT.isInteger());
CCCode = getSetCCInverse(CCCode, RetVT);
}
if (LC2 == RTLIB::UNKNOWN_LIBCALL) {
// Update Chain.
Chain = Call.second;
} else {
EVT SetCCVT =
getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), RetVT);
SDValue Tmp = DAG.getSetCC(dl, SetCCVT, NewLHS, NewRHS, CCCode);
auto Call2 = makeLibCall(DAG, LC2, RetVT, Ops, CallOptions, dl, Chain);
CCCode = getCmpLibcallCC(LC2);
if (ShouldInvertCC)
CCCode = getSetCCInverse(CCCode, RetVT);
NewLHS = DAG.getSetCC(dl, SetCCVT, Call2.first, NewRHS, CCCode);
if (Chain)
Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Call.second,
Call2.second);
NewLHS = DAG.getNode(ShouldInvertCC ? ISD::AND : ISD::OR, dl,
Tmp.getValueType(), Tmp, NewLHS);
NewRHS = SDValue();
}
}
/// Return the entry encoding for a jump table in the current function. The
/// returned value is a member of the MachineJumpTableInfo::JTEntryKind enum.
unsigned TargetLowering::getJumpTableEncoding() const {
// In non-pic modes, just use the address of a block.
if (!isPositionIndependent())
return MachineJumpTableInfo::EK_BlockAddress;
// In PIC mode, if the target supports a GPRel32 directive, use it.
if (getTargetMachine().getMCAsmInfo()->getGPRel32Directive() != nullptr)
return MachineJumpTableInfo::EK_GPRel32BlockAddress;
// Otherwise, use a label difference.
return MachineJumpTableInfo::EK_LabelDifference32;
}
SDValue TargetLowering::getPICJumpTableRelocBase(SDValue Table,
SelectionDAG &DAG) const {
// If our PIC model is GP relative, use the global offset table as the base.
unsigned JTEncoding = getJumpTableEncoding();
if ((JTEncoding == MachineJumpTableInfo::EK_GPRel64BlockAddress) ||
(JTEncoding == MachineJumpTableInfo::EK_GPRel32BlockAddress))
return DAG.getGLOBAL_OFFSET_TABLE(getPointerTy(DAG.getDataLayout()));
return Table;
}
/// This returns the relocation base for the given PIC jumptable, the same as
/// getPICJumpTableRelocBase, but as an MCExpr.
const MCExpr *
TargetLowering::getPICJumpTableRelocBaseExpr(const MachineFunction *MF,
unsigned JTI,MCContext &Ctx) const{
// The normal PIC reloc base is the label at the start of the jump table.
return MCSymbolRefExpr::create(MF->getJTISymbol(JTI, Ctx), Ctx);
}
bool
TargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
const TargetMachine &TM = getTargetMachine();
const GlobalValue *GV = GA->getGlobal();
// If the address is not even local to this DSO we will have to load it from
// a got and then add the offset.
if (!TM.shouldAssumeDSOLocal(*GV->getParent(), GV))
return false;
// If the code is position independent we will have to add a base register.
if (isPositionIndependent())
return false;
// Otherwise we can do it.
return true;
}
//===----------------------------------------------------------------------===//
// Optimization Methods
//===----------------------------------------------------------------------===//
/// If the specified instruction has a constant integer operand and there are
/// bits set in that constant that are not demanded, then clear those bits and
/// return true.
bool TargetLowering::ShrinkDemandedConstant(SDValue Op,
const APInt &DemandedBits,
const APInt &DemandedElts,
TargetLoweringOpt &TLO) const {
SDLoc DL(Op);
unsigned Opcode = Op.getOpcode();
// Do target-specific constant optimization.
if (targetShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
return TLO.New.getNode();
// FIXME: ISD::SELECT, ISD::SELECT_CC
switch (Opcode) {
default:
break;
case ISD::XOR:
case ISD::AND:
case ISD::OR: {
auto *Op1C = dyn_cast<ConstantSDNode>(Op.getOperand(1));
if (!Op1C || Op1C->isOpaque())
return false;
// If this is a 'not' op, don't touch it because that's a canonical form.
const APInt &C = Op1C->getAPIntValue();
if (Opcode == ISD::XOR && DemandedBits.isSubsetOf(C))
return false;
if (!C.isSubsetOf(DemandedBits)) {
EVT VT = Op.getValueType();
SDValue NewC = TLO.DAG.getConstant(DemandedBits & C, DL, VT);
SDValue NewOp = TLO.DAG.getNode(Opcode, DL, VT, Op.getOperand(0), NewC);
return TLO.CombineTo(Op, NewOp);
}
break;
}
}
return false;
}
bool TargetLowering::ShrinkDemandedConstant(SDValue Op,
const APInt &DemandedBits,
TargetLoweringOpt &TLO) const {
EVT VT = Op.getValueType();
APInt DemandedElts = VT.isVector()
? APInt::getAllOnes(VT.getVectorNumElements())
: APInt(1, 1);
return ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO);
}
/// Convert x+y to (VT)((SmallVT)x+(SmallVT)y) if the casts are free.
/// This uses isZExtFree and ZERO_EXTEND for the widening cast, but it could be
/// generalized for targets with other types of implicit widening casts.
bool TargetLowering::ShrinkDemandedOp(SDValue Op, unsigned BitWidth,
const APInt &DemandedBits,
TargetLoweringOpt &TLO) const {
assert(Op.getNumOperands() == 2 &&
"ShrinkDemandedOp only supports binary operators!");
assert(Op.getNode()->getNumValues() == 1 &&
"ShrinkDemandedOp only supports nodes with one result!");
EVT VT = Op.getValueType();
SelectionDAG &DAG = TLO.DAG;
SDLoc dl(Op);
// Early return, as this function cannot handle vector types.
if (VT.isVector())
return false;
// Don't do this if the node has another user, which may require the
// full value.
if (!Op.getNode()->hasOneUse())
return false;
// Search for the smallest integer type with free casts to and from
// Op's type. For expedience, just check power-of-2 integer types.
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
unsigned DemandedSize = DemandedBits.getActiveBits();
for (unsigned SmallVTBits = llvm::bit_ceil(DemandedSize);
SmallVTBits < BitWidth; SmallVTBits = NextPowerOf2(SmallVTBits)) {
EVT SmallVT = EVT::getIntegerVT(*DAG.getContext(), SmallVTBits);
if (TLI.isTruncateFree(VT, SmallVT) && TLI.isZExtFree(SmallVT, VT)) {
// We found a type with free casts.
SDValue X = DAG.getNode(
Op.getOpcode(), dl, SmallVT,
DAG.getNode(ISD::TRUNCATE, dl, SmallVT, Op.getOperand(0)),
DAG.getNode(ISD::TRUNCATE, dl, SmallVT, Op.getOperand(1)));
assert(DemandedSize <= SmallVTBits && "Narrowed below demanded bits?");
SDValue Z = DAG.getNode(ISD::ANY_EXTEND, dl, VT, X);
return TLO.CombineTo(Op, Z);
}
}
return false;
}
bool TargetLowering::SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits,
DAGCombinerInfo &DCI) const {
SelectionDAG &DAG = DCI.DAG;
TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(),
!DCI.isBeforeLegalizeOps());
KnownBits Known;
bool Simplified = SimplifyDemandedBits(Op, DemandedBits, Known, TLO);
if (Simplified) {
DCI.AddToWorklist(Op.getNode());
DCI.CommitTargetLoweringOpt(TLO);
}
return Simplified;
}
bool TargetLowering::SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits,
const APInt &DemandedElts,
DAGCombinerInfo &DCI) const {
SelectionDAG &DAG = DCI.DAG;
TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(),
!DCI.isBeforeLegalizeOps());
KnownBits Known;
bool Simplified =
SimplifyDemandedBits(Op, DemandedBits, DemandedElts, Known, TLO);
if (Simplified) {
DCI.AddToWorklist(Op.getNode());
DCI.CommitTargetLoweringOpt(TLO);
}
return Simplified;
}
bool TargetLowering::SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits,
KnownBits &Known,
TargetLoweringOpt &TLO,
unsigned Depth,
bool AssumeSingleUse) const {
EVT VT = Op.getValueType();
// Since the number of lanes in a scalable vector is unknown at compile time,
// we track one bit which is implicitly broadcast to all lanes. This means
// that all lanes in a scalable vector are considered demanded.
APInt DemandedElts = VT.isFixedLengthVector()
? APInt::getAllOnes(VT.getVectorNumElements())
: APInt(1, 1);
return SimplifyDemandedBits(Op, DemandedBits, DemandedElts, Known, TLO, Depth,
AssumeSingleUse);
}
// TODO: Under what circumstances can we create nodes? Constant folding?
SDValue TargetLowering::SimplifyMultipleUseDemandedBits(
SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
SelectionDAG &DAG, unsigned Depth) const {
EVT VT = Op.getValueType();
// Limit search depth.
if (Depth >= SelectionDAG::MaxRecursionDepth)
return SDValue();
// Ignore UNDEFs.
if (Op.isUndef())
return SDValue();
// Not demanding any bits/elts from Op.
if (DemandedBits == 0 || DemandedElts == 0)
return DAG.getUNDEF(VT);
bool IsLE = DAG.getDataLayout().isLittleEndian();
unsigned NumElts = DemandedElts.getBitWidth();
unsigned BitWidth = DemandedBits.getBitWidth();
KnownBits LHSKnown, RHSKnown;
switch (Op.getOpcode()) {
case ISD::BITCAST: {
if (VT.isScalableVector())
return SDValue();
SDValue Src = peekThroughBitcasts(Op.getOperand(0));
EVT SrcVT = Src.getValueType();
EVT DstVT = Op.getValueType();
if (SrcVT == DstVT)
return Src;
unsigned NumSrcEltBits = SrcVT.getScalarSizeInBits();
unsigned NumDstEltBits = DstVT.getScalarSizeInBits();
if (NumSrcEltBits == NumDstEltBits)
if (SDValue V = SimplifyMultipleUseDemandedBits(
Src, DemandedBits, DemandedElts, DAG, Depth + 1))
return DAG.getBitcast(DstVT, V);
if (SrcVT.isVector() && (NumDstEltBits % NumSrcEltBits) == 0) {
unsigned Scale = NumDstEltBits / NumSrcEltBits;
unsigned NumSrcElts = SrcVT.getVectorNumElements();
APInt DemandedSrcBits = APInt::getZero(NumSrcEltBits);
APInt DemandedSrcElts = APInt::getZero(NumSrcElts);
for (unsigned i = 0; i != Scale; ++i) {
unsigned EltOffset = IsLE ? i : (Scale - 1 - i);
unsigned BitOffset = EltOffset * NumSrcEltBits;
APInt Sub = DemandedBits.extractBits(NumSrcEltBits, BitOffset);
if (!Sub.isZero()) {
DemandedSrcBits |= Sub;
for (unsigned j = 0; j != NumElts; ++j)
if (DemandedElts[j])
DemandedSrcElts.setBit((j * Scale) + i);
}
}
if (SDValue V = SimplifyMultipleUseDemandedBits(
Src, DemandedSrcBits, DemandedSrcElts, DAG, Depth + 1))
return DAG.getBitcast(DstVT, V);
}
// TODO - bigendian once we have test coverage.
if (IsLE && (NumSrcEltBits % NumDstEltBits) == 0) {
unsigned Scale = NumSrcEltBits / NumDstEltBits;
unsigned NumSrcElts = SrcVT.isVector() ? SrcVT.getVectorNumElements() : 1;
APInt DemandedSrcBits = APInt::getZero(NumSrcEltBits);
APInt DemandedSrcElts = APInt::getZero(NumSrcElts);
for (unsigned i = 0; i != NumElts; ++i)
if (DemandedElts[i]) {
unsigned Offset = (i % Scale) * NumDstEltBits;
DemandedSrcBits.insertBits(DemandedBits, Offset);
DemandedSrcElts.setBit(i / Scale);
}
if (SDValue V = SimplifyMultipleUseDemandedBits(
Src, DemandedSrcBits, DemandedSrcElts, DAG, Depth + 1))
return DAG.getBitcast(DstVT, V);
}
break;
}
case ISD::AND: {
LHSKnown = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
RHSKnown = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
// If all of the demanded bits are known 1 on one side, return the other.
// These bits cannot contribute to the result of the 'and' in this
// context.
if (DemandedBits.isSubsetOf(LHSKnown.Zero | RHSKnown.One))
return Op.getOperand(0);
if (DemandedBits.isSubsetOf(RHSKnown.Zero | LHSKnown.One))
return Op.getOperand(1);
break;
}
case ISD::OR: {
LHSKnown = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
RHSKnown = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
// If all of the demanded bits are known zero on one side, return the
// other. These bits cannot contribute to the result of the 'or' in this
// context.
if (DemandedBits.isSubsetOf(LHSKnown.One | RHSKnown.Zero))
return Op.getOperand(0);
if (DemandedBits.isSubsetOf(RHSKnown.One | LHSKnown.Zero))
return Op.getOperand(1);
break;
}
case ISD::XOR: {
LHSKnown = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
RHSKnown = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
// If all of the demanded bits are known zero on one side, return the
// other.
if (DemandedBits.isSubsetOf(RHSKnown.Zero))
return Op.getOperand(0);
if (DemandedBits.isSubsetOf(LHSKnown.Zero))
return Op.getOperand(1);
break;
}
case ISD::SHL: {
// If we are only demanding sign bits then we can use the shift source
// directly.
if (const APInt *MaxSA =
DAG.getValidMaximumShiftAmountConstant(Op, DemandedElts)) {
SDValue Op0 = Op.getOperand(0);
unsigned ShAmt = MaxSA->getZExtValue();
unsigned NumSignBits =
DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1);
unsigned UpperDemandedBits = BitWidth - DemandedBits.countr_zero();
if (NumSignBits > ShAmt && (NumSignBits - ShAmt) >= (UpperDemandedBits))
return Op0;
}
break;
}
case ISD::SETCC: {
SDValue Op0 = Op.getOperand(0);
SDValue Op1 = Op.getOperand(1);
ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
// If (1) we only need the sign-bit, (2) the setcc operands are the same
// width as the setcc result, and (3) the result of a setcc conforms to 0 or
// -1, we may be able to bypass the setcc.
if (DemandedBits.isSignMask() &&
Op0.getScalarValueSizeInBits() == BitWidth &&
getBooleanContents(Op0.getValueType()) ==
BooleanContent::ZeroOrNegativeOneBooleanContent) {
// If we're testing X < 0, then this compare isn't needed - just use X!
// FIXME: We're limiting to integer types here, but this should also work
// if we don't care about FP signed-zero. The use of SETLT with FP means
// that we don't care about NaNs.
if (CC == ISD::SETLT && Op1.getValueType().isInteger() &&
(isNullConstant(Op1) || ISD::isBuildVectorAllZeros(Op1.getNode())))
return Op0;
}
break;
}
case ISD::SIGN_EXTEND_INREG: {
// If none of the extended bits are demanded, eliminate the sextinreg.
SDValue Op0 = Op.getOperand(0);
EVT ExVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
unsigned ExBits = ExVT.getScalarSizeInBits();
if (DemandedBits.getActiveBits() <= ExBits &&
shouldRemoveRedundantExtend(Op))
return Op0;
// If the input is already sign extended, just drop the extension.
unsigned NumSignBits = DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1);
if (NumSignBits >= (BitWidth - ExBits + 1))
return Op0;
break;
}
case ISD::ANY_EXTEND_VECTOR_INREG:
case ISD::SIGN_EXTEND_VECTOR_INREG:
case ISD::ZERO_EXTEND_VECTOR_INREG: {
if (VT.isScalableVector())
return SDValue();
// If we only want the lowest element and none of extended bits, then we can
// return the bitcasted source vector.
SDValue Src = Op.getOperand(0);
EVT SrcVT = Src.getValueType();
EVT DstVT = Op.getValueType();
if (IsLE && DemandedElts == 1 &&
DstVT.getSizeInBits() == SrcVT.getSizeInBits() &&
DemandedBits.getActiveBits() <= SrcVT.getScalarSizeInBits()) {
return DAG.getBitcast(DstVT, Src);
}
break;
}
case ISD::INSERT_VECTOR_ELT: {
if (VT.isScalableVector())
return SDValue();
// If we don't demand the inserted element, return the base vector.
SDValue Vec = Op.getOperand(0);
auto *CIdx = dyn_cast<ConstantSDNode>(Op.getOperand(2));
EVT VecVT = Vec.getValueType();
if (CIdx && CIdx->getAPIntValue().ult(VecVT.getVectorNumElements()) &&
!DemandedElts[CIdx->getZExtValue()])
return Vec;
break;
}
case ISD::INSERT_SUBVECTOR: {
if (VT.isScalableVector())
return SDValue();
SDValue Vec = Op.getOperand(0);
SDValue Sub = Op.getOperand(1);
uint64_t Idx = Op.getConstantOperandVal(2);
unsigned NumSubElts = Sub.getValueType().getVectorNumElements();
APInt DemandedSubElts = DemandedElts.extractBits(NumSubElts, Idx);
// If we don't demand the inserted subvector, return the base vector.
if (DemandedSubElts == 0)
return Vec;
break;
}
case ISD::VECTOR_SHUFFLE: {
assert(!VT.isScalableVector());
ArrayRef<int> ShuffleMask = cast<ShuffleVectorSDNode>(Op)->getMask();
// If all the demanded elts are from one operand and are inline,
// then we can use the operand directly.
bool AllUndef = true, IdentityLHS = true, IdentityRHS = true;
for (unsigned i = 0; i != NumElts; ++i) {
int M = ShuffleMask[i];
if (M < 0 || !DemandedElts[i])
continue;
AllUndef = false;
IdentityLHS &= (M == (int)i);
IdentityRHS &= ((M - NumElts) == i);
}
if (AllUndef)
return DAG.getUNDEF(Op.getValueType());
if (IdentityLHS)
return Op.getOperand(0);
if (IdentityRHS)
return Op.getOperand(1);
break;
}
default:
// TODO: Probably okay to remove after audit; here to reduce change size
// in initial enablement patch for scalable vectors
if (VT.isScalableVector())
return SDValue();
if (Op.getOpcode() >= ISD::BUILTIN_OP_END)
if (SDValue V = SimplifyMultipleUseDemandedBitsForTargetNode(
Op, DemandedBits, DemandedElts, DAG, Depth))
return V;
break;
}
return SDValue();
}
SDValue TargetLowering::SimplifyMultipleUseDemandedBits(
SDValue Op, const APInt &DemandedBits, SelectionDAG &DAG,
unsigned Depth) const {
EVT VT = Op.getValueType();
// Since the number of lanes in a scalable vector is unknown at compile time,
// we track one bit which is implicitly broadcast to all lanes. This means
// that all lanes in a scalable vector are considered demanded.
APInt DemandedElts = VT.isFixedLengthVector()
? APInt::getAllOnes(VT.getVectorNumElements())
: APInt(1, 1);
return SimplifyMultipleUseDemandedBits(Op, DemandedBits, DemandedElts, DAG,
Depth);
}
SDValue TargetLowering::SimplifyMultipleUseDemandedVectorElts(
SDValue Op, const APInt &DemandedElts, SelectionDAG &DAG,
unsigned Depth) const {
APInt DemandedBits = APInt::getAllOnes(Op.getScalarValueSizeInBits());
return SimplifyMultipleUseDemandedBits(Op, DemandedBits, DemandedElts, DAG,
Depth);
}
// Attempt to form ext(avgfloor(A, B)) from shr(add(ext(A), ext(B)), 1).
// or to form ext(avgceil(A, B)) from shr(add(ext(A), ext(B), 1), 1).
static SDValue combineShiftToAVG(SDValue Op, SelectionDAG &DAG,
const TargetLowering &TLI,
const APInt &DemandedBits,
const APInt &DemandedElts,
unsigned Depth) {
assert((Op.getOpcode() == ISD::SRL || Op.getOpcode() == ISD::SRA) &&
"SRL or SRA node is required here!");
// Is the right shift using an immediate value of 1?
ConstantSDNode *N1C = isConstOrConstSplat(Op.getOperand(1), DemandedElts);
if (!N1C || !N1C->isOne())
return SDValue();
// We are looking for an avgfloor
// add(ext, ext)
// or one of these as a avgceil
// add(add(ext, ext), 1)
// add(add(ext, 1), ext)
// add(ext, add(ext, 1))
SDValue Add = Op.getOperand(0);
if (Add.getOpcode() != ISD::ADD)
return SDValue();
SDValue ExtOpA = Add.getOperand(0);
SDValue ExtOpB = Add.getOperand(1);
SDValue Add2;
auto MatchOperands = [&](SDValue Op1, SDValue Op2, SDValue Op3, SDValue A) {
ConstantSDNode *ConstOp;
if ((ConstOp = isConstOrConstSplat(Op2, DemandedElts)) &&
ConstOp->isOne()) {
ExtOpA = Op1;
ExtOpB = Op3;
Add2 = A;
return true;
}
if ((ConstOp = isConstOrConstSplat(Op3, DemandedElts)) &&
ConstOp->isOne()) {
ExtOpA = Op1;
ExtOpB = Op2;
Add2 = A;
return true;
}
return false;
};
bool IsCeil =
(ExtOpA.getOpcode() == ISD::ADD &&
MatchOperands(ExtOpA.getOperand(0), ExtOpA.getOperand(1), ExtOpB, ExtOpA)) ||
(ExtOpB.getOpcode() == ISD::ADD &&
MatchOperands(ExtOpB.getOperand(0), ExtOpB.getOperand(1), ExtOpA, ExtOpB));
// If the shift is signed (sra):
// - Needs >= 2 sign bit for both operands.
// - Needs >= 2 zero bits.
// If the shift is unsigned (srl):
// - Needs >= 1 zero bit for both operands.
// - Needs 1 demanded bit zero and >= 2 sign bits.
unsigned ShiftOpc = Op.getOpcode();
bool IsSigned = false;
unsigned KnownBits;
unsigned NumSignedA = DAG.ComputeNumSignBits(ExtOpA, DemandedElts, Depth);
unsigned NumSignedB = DAG.ComputeNumSignBits(ExtOpB, DemandedElts, Depth);
unsigned NumSigned = std::min(NumSignedA, NumSignedB) - 1;
unsigned NumZeroA =
DAG.computeKnownBits(ExtOpA, DemandedElts, Depth).countMinLeadingZeros();
unsigned NumZeroB =
DAG.computeKnownBits(ExtOpB, DemandedElts, Depth).countMinLeadingZeros();
unsigned NumZero = std::min(NumZeroA, NumZeroB);
switch (ShiftOpc) {
default:
llvm_unreachable("Unexpected ShiftOpc in combineShiftToAVG");
case ISD::SRA: {
if (NumZero >= 2 && NumSigned < NumZero) {
IsSigned = false;
KnownBits = NumZero;
break;
}
if (NumSigned >= 1) {
IsSigned = true;
KnownBits = NumSigned;
break;
}
return SDValue();
}
case ISD::SRL: {
if (NumZero >= 1 && NumSigned < NumZero) {
IsSigned = false;
KnownBits = NumZero;
break;
}
if (NumSigned >= 1 && DemandedBits.isSignBitClear()) {
IsSigned = true;
KnownBits = NumSigned;
break;
}
return SDValue();
}
}
unsigned AVGOpc = IsCeil ? (IsSigned ? ISD::AVGCEILS : ISD::AVGCEILU)
: (IsSigned ? ISD::AVGFLOORS : ISD::AVGFLOORU);
// Find the smallest power-2 type that is legal for this vector size and
// operation, given the original type size and the number of known sign/zero
// bits.
EVT VT = Op.getValueType();
unsigned MinWidth =
std::max<unsigned>(VT.getScalarSizeInBits() - KnownBits, 8);
EVT NVT = EVT::getIntegerVT(*DAG.getContext(), llvm::bit_ceil(MinWidth));
if (VT.isVector())
NVT = EVT::getVectorVT(*DAG.getContext(), NVT, VT.getVectorElementCount());
if (!TLI.isOperationLegalOrCustom(AVGOpc, NVT)) {
// If we could not transform, and (both) adds are nuw/nsw, we can use the
// larger type size to do the transform.
if (!TLI.isOperationLegalOrCustom(AVGOpc, VT))
return SDValue();
if (DAG.computeOverflowForAdd(IsSigned, Add.getOperand(0),
Add.getOperand(1)) ==
SelectionDAG::OFK_Never &&
(!Add2 || DAG.computeOverflowForAdd(IsSigned, Add2.getOperand(0),
Add2.getOperand(1)) ==
SelectionDAG::OFK_Never))
NVT = VT;
else
return SDValue();
}
SDLoc DL(Op);
SDValue ResultAVG =
DAG.getNode(AVGOpc, DL, NVT, DAG.getNode(ISD::TRUNCATE, DL, NVT, ExtOpA),
DAG.getNode(ISD::TRUNCATE, DL, NVT, ExtOpB));
return DAG.getNode(IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND, DL, VT,
ResultAVG);
}
/// Look at Op. At this point, we know that only the OriginalDemandedBits of the
/// result of Op are ever used downstream. If we can use this information to
/// simplify Op, create a new simplified DAG node and return true, returning the
/// original and new nodes in Old and New. Otherwise, analyze the expression and
/// return a mask of Known bits for the expression (used to simplify the
/// caller). The Known bits may only be accurate for those bits in the
/// OriginalDemandedBits and OriginalDemandedElts.
bool TargetLowering::SimplifyDemandedBits(
SDValue Op, const APInt &OriginalDemandedBits,
const APInt &OriginalDemandedElts, KnownBits &Known, TargetLoweringOpt &TLO,
unsigned Depth, bool AssumeSingleUse) const {
unsigned BitWidth = OriginalDemandedBits.getBitWidth();
assert(Op.getScalarValueSizeInBits() == BitWidth &&
"Mask size mismatches value type size!");
// Don't know anything.
Known = KnownBits(BitWidth);
EVT VT = Op.getValueType();
bool IsLE = TLO.DAG.getDataLayout().isLittleEndian();
unsigned NumElts = OriginalDemandedElts.getBitWidth();
assert((!VT.isFixedLengthVector() || NumElts == VT.getVectorNumElements()) &&
"Unexpected vector size");
APInt DemandedBits = OriginalDemandedBits;
APInt DemandedElts = OriginalDemandedElts;
SDLoc dl(Op);
auto &DL = TLO.DAG.getDataLayout();
// Undef operand.
if (Op.isUndef())
return false;
// We can't simplify target constants.
if (Op.getOpcode() == ISD::TargetConstant)
return false;
if (Op.getOpcode() == ISD::Constant) {
// We know all of the bits for a constant!
Known = KnownBits::makeConstant(cast<ConstantSDNode>(Op)->getAPIntValue());
return false;
}
if (Op.getOpcode() == ISD::ConstantFP) {
// We know all of the bits for a floating point constant!
Known = KnownBits::makeConstant(
cast<ConstantFPSDNode>(Op)->getValueAPF().bitcastToAPInt());
return false;
}
// Other users may use these bits.
bool HasMultiUse = false;
if (!AssumeSingleUse && !Op.getNode()->hasOneUse()) {
if (Depth >= SelectionDAG::MaxRecursionDepth) {
// Limit search depth.
return false;
}
// Allow multiple uses, just set the DemandedBits/Elts to all bits.
DemandedBits = APInt::getAllOnes(BitWidth);
DemandedElts = APInt::getAllOnes(NumElts);
HasMultiUse = true;
} else if (OriginalDemandedBits == 0 || OriginalDemandedElts == 0) {
// Not demanding any bits/elts from Op.
return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
} else if (Depth >= SelectionDAG::MaxRecursionDepth) {
// Limit search depth.
return false;
}
KnownBits Known2;
switch (Op.getOpcode()) {
case ISD::SCALAR_TO_VECTOR: {
if (VT.isScalableVector())
return false;
if (!DemandedElts[0])
return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
KnownBits SrcKnown;
SDValue Src = Op.getOperand(0);
unsigned SrcBitWidth = Src.getScalarValueSizeInBits();
APInt SrcDemandedBits = DemandedBits.zext(SrcBitWidth);
if (SimplifyDemandedBits(Src, SrcDemandedBits, SrcKnown, TLO, Depth + 1))
return true;
// Upper elements are undef, so only get the knownbits if we just demand
// the bottom element.
if (DemandedElts == 1)
Known = SrcKnown.anyextOrTrunc(BitWidth);
break;
}
case ISD::BUILD_VECTOR:
// Collect the known bits that are shared by every demanded element.
// TODO: Call SimplifyDemandedBits for non-constant demanded elements.
Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
return false; // Don't fall through, will infinitely loop.
case ISD::LOAD: {
auto *LD = cast<LoadSDNode>(Op);
if (getTargetConstantFromLoad(LD)) {
Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
return false; // Don't fall through, will infinitely loop.
}
if (ISD::isZEXTLoad(Op.getNode()) && Op.getResNo() == 0) {
// If this is a ZEXTLoad and we are looking at the loaded value.
EVT MemVT = LD->getMemoryVT();
unsigned MemBits = MemVT.getScalarSizeInBits();
Known.Zero.setBitsFrom(MemBits);
return false; // Don't fall through, will infinitely loop.
}
break;
}
case ISD::INSERT_VECTOR_ELT: {
if (VT.isScalableVector())
return false;
SDValue Vec = Op.getOperand(0);
SDValue Scl = Op.getOperand(1);
auto *CIdx = dyn_cast<ConstantSDNode>(Op.getOperand(2));
EVT VecVT = Vec.getValueType();
// If index isn't constant, assume we need all vector elements AND the
// inserted element.
APInt DemandedVecElts(DemandedElts);
if (CIdx && CIdx->getAPIntValue().ult(VecVT.getVectorNumElements())) {
unsigned Idx = CIdx->getZExtValue();
DemandedVecElts.clearBit(Idx);
// Inserted element is not required.
if (!DemandedElts[Idx])
return TLO.CombineTo(Op, Vec);
}
KnownBits KnownScl;
unsigned NumSclBits = Scl.getScalarValueSizeInBits();
APInt DemandedSclBits = DemandedBits.zextOrTrunc(NumSclBits);
if (SimplifyDemandedBits(Scl, DemandedSclBits, KnownScl, TLO, Depth + 1))
return true;
Known = KnownScl.anyextOrTrunc(BitWidth);
KnownBits KnownVec;
if (SimplifyDemandedBits(Vec, DemandedBits, DemandedVecElts, KnownVec, TLO,
Depth + 1))
return true;
if (!!DemandedVecElts)
Known = Known.intersectWith(KnownVec);
return false;
}
case ISD::INSERT_SUBVECTOR: {
if (VT.isScalableVector())
return false;
// Demand any elements from the subvector and the remainder from the src its
// inserted into.
SDValue Src = Op.getOperand(0);
SDValue Sub = Op.getOperand(1);
uint64_t Idx = Op.getConstantOperandVal(2);
unsigned NumSubElts = Sub.getValueType().getVectorNumElements();
APInt DemandedSubElts = DemandedElts.extractBits(NumSubElts, Idx);
APInt DemandedSrcElts = DemandedElts;
DemandedSrcElts.insertBits(APInt::getZero(NumSubElts), Idx);
KnownBits KnownSub, KnownSrc;
if (SimplifyDemandedBits(Sub, DemandedBits, DemandedSubElts, KnownSub, TLO,
Depth + 1))
return true;
if (SimplifyDemandedBits(Src, DemandedBits, DemandedSrcElts, KnownSrc, TLO,
Depth + 1))
return true;
Known.Zero.setAllBits();
Known.One.setAllBits();
if (!!DemandedSubElts)
Known = Known.intersectWith(KnownSub);
if (!!DemandedSrcElts)
Known = Known.intersectWith(KnownSrc);
// Attempt to avoid multi-use src if we don't need anything from it.
if (!DemandedBits.isAllOnes() || !DemandedSubElts.isAllOnes() ||
!DemandedSrcElts.isAllOnes()) {
SDValue NewSub = SimplifyMultipleUseDemandedBits(
Sub, DemandedBits, DemandedSubElts, TLO.DAG, Depth + 1);
SDValue NewSrc = SimplifyMultipleUseDemandedBits(
Src, DemandedBits, DemandedSrcElts, TLO.DAG, Depth + 1);
if (NewSub || NewSrc) {
NewSub = NewSub ? NewSub : Sub;
NewSrc = NewSrc ? NewSrc : Src;
SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, NewSrc, NewSub,
Op.getOperand(2));
return TLO.CombineTo(Op, NewOp);
}
}
break;
}
case ISD::EXTRACT_SUBVECTOR: {
if (VT.isScalableVector())
return false;
// Offset the demanded elts by the subvector index.
SDValue Src = Op.getOperand(0);
if (Src.getValueType().isScalableVector())
break;
uint64_t Idx = Op.getConstantOperandVal(1);
unsigned NumSrcElts = Src.getValueType().getVectorNumElements();
APInt DemandedSrcElts = DemandedElts.zext(NumSrcElts).shl(Idx);
if (SimplifyDemandedBits(Src, DemandedBits, DemandedSrcElts, Known, TLO,
Depth + 1))
return true;
// Attempt to avoid multi-use src if we don't need anything from it.
if (!DemandedBits.isAllOnes() || !DemandedSrcElts.isAllOnes()) {
SDValue DemandedSrc = SimplifyMultipleUseDemandedBits(
Src, DemandedBits, DemandedSrcElts, TLO.DAG, Depth + 1);
if (DemandedSrc) {
SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, DemandedSrc,
Op.getOperand(1));
return TLO.CombineTo(Op, NewOp);
}
}
break;
}
case ISD::CONCAT_VECTORS: {
if (VT.isScalableVector())
return false;
Known.Zero.setAllBits();
Known.One.setAllBits();
EVT SubVT = Op.getOperand(0).getValueType();
unsigned NumSubVecs = Op.getNumOperands();
unsigned NumSubElts = SubVT.getVectorNumElements();
for (unsigned i = 0; i != NumSubVecs; ++i) {
APInt DemandedSubElts =
DemandedElts.extractBits(NumSubElts, i * NumSubElts);
if (SimplifyDemandedBits(Op.getOperand(i), DemandedBits, DemandedSubElts,
Known2, TLO, Depth + 1))
return true;
// Known bits are shared by every demanded subvector element.
if (!!DemandedSubElts)
Known = Known.intersectWith(Known2);
}
break;
}
case ISD::VECTOR_SHUFFLE: {
assert(!VT.isScalableVector());
ArrayRef<int> ShuffleMask = cast<ShuffleVectorSDNode>(Op)->getMask();
// Collect demanded elements from shuffle operands..
APInt DemandedLHS, DemandedRHS;
if (!getShuffleDemandedElts(NumElts, ShuffleMask, DemandedElts, DemandedLHS,
DemandedRHS))
break;
if (!!DemandedLHS || !!DemandedRHS) {
SDValue Op0 = Op.getOperand(0);
SDValue Op1 = Op.getOperand(1);
Known.Zero.setAllBits();
Known.One.setAllBits();
if (!!DemandedLHS) {
if (SimplifyDemandedBits(Op0, DemandedBits, DemandedLHS, Known2, TLO,
Depth + 1))
return true;
Known = Known.intersectWith(Known2);
}
if (!!DemandedRHS) {
if (SimplifyDemandedBits(Op1, DemandedBits, DemandedRHS, Known2, TLO,
Depth + 1))
return true;
Known = Known.intersectWith(Known2);
}
// Attempt to avoid multi-use ops if we don't need anything from them.
SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
Op0, DemandedBits, DemandedLHS, TLO.DAG, Depth + 1);
SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
Op1, DemandedBits, DemandedRHS, TLO.DAG, Depth + 1);
if (DemandedOp0 || DemandedOp1) {
Op0 = DemandedOp0 ? DemandedOp0 : Op0;
Op1 = DemandedOp1 ? DemandedOp1 : Op1;
SDValue NewOp = TLO.DAG.getVectorShuffle(VT, dl, Op0, Op1, ShuffleMask);
return TLO.CombineTo(Op, NewOp);
}
}
break;
}
case ISD::AND: {
SDValue Op0 = Op.getOperand(0);
SDValue Op1 = Op.getOperand(1);
// If the RHS is a constant, check to see if the LHS would be zero without
// using the bits from the RHS. Below, we use knowledge about the RHS to
// simplify the LHS, here we're using information from the LHS to simplify
// the RHS.
if (ConstantSDNode *RHSC = isConstOrConstSplat(Op1)) {
// Do not increment Depth here; that can cause an infinite loop.
KnownBits LHSKnown = TLO.DAG.computeKnownBits(Op0, DemandedElts, Depth);
// If the LHS already has zeros where RHSC does, this 'and' is dead.
if ((LHSKnown.Zero & DemandedBits) ==
(~RHSC->getAPIntValue() & DemandedBits))
return TLO.CombineTo(Op, Op0);
// If any of the set bits in the RHS are known zero on the LHS, shrink
// the constant.
if (ShrinkDemandedConstant(Op, ~LHSKnown.Zero & DemandedBits,
DemandedElts, TLO))
return true;
// Bitwise-not (xor X, -1) is a special case: we don't usually shrink its
// constant, but if this 'and' is only clearing bits that were just set by
// the xor, then this 'and' can be eliminated by shrinking the mask of
// the xor. For example, for a 32-bit X:
// and (xor (srl X, 31), -1), 1 --> xor (srl X, 31), 1
if (isBitwiseNot(Op0) && Op0.hasOneUse() &&
LHSKnown.One == ~RHSC->getAPIntValue()) {
SDValue Xor = TLO.DAG.getNode(ISD::XOR, dl, VT, Op0.getOperand(0), Op1);
return TLO.CombineTo(Op, Xor);
}
}
// AND(INSERT_SUBVECTOR(C,X,I),M) -> INSERT_SUBVECTOR(AND(C,M),X,I)
// iff 'C' is Undef/Constant and AND(X,M) == X (for DemandedBits).
if (Op0.getOpcode() == ISD::INSERT_SUBVECTOR && !VT.isScalableVector() &&
(Op0.getOperand(0).isUndef() ||
ISD::isBuildVectorOfConstantSDNodes(Op0.getOperand(0).getNode())) &&
Op0->hasOneUse()) {
unsigned NumSubElts =
Op0.getOperand(1).getValueType().getVectorNumElements();
unsigned SubIdx = Op0.getConstantOperandVal(2);
APInt DemandedSub =
APInt::getBitsSet(NumElts, SubIdx, SubIdx + NumSubElts);
KnownBits KnownSubMask =
TLO.DAG.computeKnownBits(Op1, DemandedSub & DemandedElts, Depth + 1);
if (DemandedBits.isSubsetOf(KnownSubMask.One)) {
SDValue NewAnd =
TLO.DAG.getNode(ISD::AND, dl, VT, Op0.getOperand(0), Op1);
SDValue NewInsert =
TLO.DAG.getNode(ISD::INSERT_SUBVECTOR, dl, VT, NewAnd,
Op0.getOperand(1), Op0.getOperand(2));
return TLO.CombineTo(Op, NewInsert);
}
}
if (SimplifyDemandedBits(Op1, DemandedBits, DemandedElts, Known, TLO,
Depth + 1))
return true;
assert(!Known.hasConflict() && "Bits known to be one AND zero?");
if (SimplifyDemandedBits(Op0, ~Known.Zero & DemandedBits, DemandedElts,
Known2, TLO, Depth + 1))
return true;
assert(!Known2.hasConflict() && "Bits known to be one AND zero?");
// If all of the demanded bits are known one on one side, return the other.
// These bits cannot contribute to the result of the 'and'.
if (DemandedBits.isSubsetOf(Known2.Zero | Known.One))
return TLO.CombineTo(Op, Op0);
if (DemandedBits.isSubsetOf(Known.Zero | Known2.One))
return TLO.CombineTo(Op, Op1);
// If all of the demanded bits in the inputs are known zeros, return zero.
if (DemandedBits.isSubsetOf(Known.Zero | Known2.Zero))
return TLO.CombineTo(Op, TLO.DAG.getConstant(0, dl, VT));
// If the RHS is a constant, see if we can simplify it.
if (ShrinkDemandedConstant(Op, ~Known2.Zero & DemandedBits, DemandedElts,
TLO))
return true;
// If the operation can be done in a smaller type, do so.
if (ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO))
return true;
// Attempt to avoid multi-use ops if we don't need anything from them.
if (!DemandedBits.isAllOnes() || !DemandedElts.isAllOnes()) {
SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
Op0, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
Op1, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
if (DemandedOp0 || DemandedOp1) {
Op0 = DemandedOp0 ? DemandedOp0 : Op0;
Op1 = DemandedOp1 ? DemandedOp1 : Op1;
SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1);
return TLO.CombineTo(Op, NewOp);
}
}
Known &= Known2;
break;
}
case ISD::OR: {
SDValue Op0 = Op.getOperand(0);
SDValue Op1 = Op.getOperand(1);
if (SimplifyDemandedBits(Op1, DemandedBits, DemandedElts, Known, TLO,
Depth + 1))
return true;
assert(!Known.hasConflict() && "Bits known to be one AND zero?");
if (SimplifyDemandedBits(Op0, ~Known.One & DemandedBits, DemandedElts,
Known2, TLO, Depth + 1))
return true;
assert(!Known2.hasConflict() && "Bits known to be one AND zero?");
// If all of the demanded bits are known zero on one side, return the other.
// These bits cannot contribute to the result of the 'or'.
if (DemandedBits.isSubsetOf(Known2.One | Known.Zero))
return TLO.CombineTo(Op, Op0);
if (DemandedBits.isSubsetOf(Known.One | Known2.Zero))
return TLO.CombineTo(Op, Op1);
// If the RHS is a constant, see if we can simplify it.
if (ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
return true;
// If the operation can be done in a smaller type, do so.
if (ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO))
return true;
// Attempt to avoid multi-use ops if we don't need anything from them.
if (!DemandedBits.isAllOnes() || !DemandedElts.isAllOnes()) {
SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
Op0, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
Op1, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
if (DemandedOp0 || DemandedOp1) {
Op0 = DemandedOp0 ? DemandedOp0 : Op0;
Op1 = DemandedOp1 ? DemandedOp1 : Op1;
SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1);
return TLO.CombineTo(Op, NewOp);
}
}
// (or (and X, C1), (and (or X, Y), C2)) -> (or (and X, C1|C2), (and Y, C2))
// TODO: Use SimplifyMultipleUseDemandedBits to peek through masks.
if (Op0.getOpcode() == ISD::AND && Op1.getOpcode() == ISD::AND &&
Op0->hasOneUse() && Op1->hasOneUse()) {
// Attempt to match all commutations - m_c_Or would've been useful!
for (int I = 0; I != 2; ++I) {
SDValue X = Op.getOperand(I).getOperand(0);
SDValue C1 = Op.getOperand(I).getOperand(1);
SDValue Alt = Op.getOperand(1 - I).getOperand(0);
SDValue C2 = Op.getOperand(1 - I).getOperand(1);
if (Alt.getOpcode() == ISD::OR) {
for (int J = 0; J != 2; ++J) {
if (X == Alt.getOperand(J)) {
SDValue Y = Alt.getOperand(1 - J);
if (SDValue C12 = TLO.DAG.FoldConstantArithmetic(ISD::OR, dl, VT,
{C1, C2})) {
SDValue MaskX = TLO.DAG.getNode(ISD::AND, dl, VT, X, C12);
SDValue MaskY = TLO.DAG.getNode(ISD::AND, dl, VT, Y, C2);
return TLO.CombineTo(
Op, TLO.DAG.getNode(ISD::OR, dl, VT, MaskX, MaskY));
}
}
}
}
}
}
Known |= Known2;
break;
}
case ISD::XOR: {
SDValue Op0 = Op.getOperand(0);
SDValue Op1 = Op.getOperand(1);
if (SimplifyDemandedBits(Op1, DemandedBits, DemandedElts, Known, TLO,
Depth + 1))
return true;
assert(!Known.hasConflict() && "Bits known to be one AND zero?");
if (SimplifyDemandedBits(Op0, DemandedBits, DemandedElts, Known2, TLO,
Depth + 1))
return true;
assert(!Known2.hasConflict() && "Bits known to be one AND zero?");
// If all of the demanded bits are known zero on one side, return the other.
// These bits cannot contribute to the result of the 'xor'.
if (DemandedBits.isSubsetOf(Known.Zero))
return TLO.CombineTo(Op, Op0);
if (DemandedBits.isSubsetOf(Known2.Zero))
return TLO.CombineTo(Op, Op1);
// If the operation can be done in a smaller type, do so.
if (ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO))
return true;
// If all of the unknown bits are known to be zero on one side or the other
// turn this into an *inclusive* or.
// e.g. (A & C1)^(B & C2) -> (A & C1)|(B & C2) iff C1&C2 == 0
if (DemandedBits.isSubsetOf(Known.Zero | Known2.Zero))
return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::OR, dl, VT, Op0, Op1));
ConstantSDNode *C = isConstOrConstSplat(Op1, DemandedElts);
if (C) {
// If one side is a constant, and all of the set bits in the constant are
// also known set on the other side, turn this into an AND, as we know
// the bits will be cleared.
// e.g. (X | C1) ^ C2 --> (X | C1) & ~C2 iff (C1&C2) == C2
// NB: it is okay if more bits are known than are requested
if (C->getAPIntValue() == Known2.One) {
SDValue ANDC =
TLO.DAG.getConstant(~C->getAPIntValue() & DemandedBits, dl, VT);
return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::AND, dl, VT, Op0, ANDC));
}
// If the RHS is a constant, see if we can change it. Don't alter a -1
// constant because that's a 'not' op, and that is better for combining
// and codegen.
if (!C->isAllOnes() && DemandedBits.isSubsetOf(C->getAPIntValue())) {
// We're flipping all demanded bits. Flip the undemanded bits too.
SDValue New = TLO.DAG.getNOT(dl, Op0, VT);
return TLO.CombineTo(Op, New);
}
unsigned Op0Opcode = Op0.getOpcode();
if ((Op0Opcode == ISD::SRL || Op0Opcode == ISD::SHL) && Op0.hasOneUse()) {
if (ConstantSDNode *ShiftC =
isConstOrConstSplat(Op0.getOperand(1), DemandedElts)) {
// Don't crash on an oversized shift. We can not guarantee that a
// bogus shift has been simplified to undef.
if (ShiftC->getAPIntValue().ult(BitWidth)) {
uint64_t ShiftAmt = ShiftC->getZExtValue();
APInt Ones = APInt::getAllOnes(BitWidth);
Ones = Op0Opcode == ISD::SHL ? Ones.shl(ShiftAmt)
: Ones.lshr(ShiftAmt);
const TargetLowering &TLI = TLO.DAG.getTargetLoweringInfo();
if ((DemandedBits & C->getAPIntValue()) == (DemandedBits & Ones) &&
TLI.isDesirableToCommuteXorWithShift(Op.getNode())) {
// If the xor constant is a demanded mask, do a 'not' before the
// shift:
// xor (X << ShiftC), XorC --> (not X) << ShiftC
// xor (X >> ShiftC), XorC --> (not X) >> ShiftC
SDValue Not = TLO.DAG.getNOT(dl, Op0.getOperand(0), VT);
return TLO.CombineTo(Op, TLO.DAG.getNode(Op0Opcode, dl, VT, Not,
Op0.getOperand(1)));
}
}
}
}
}
// If we can't turn this into a 'not', try to shrink the constant.
if (!C || !C->isAllOnes())
if (ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
return true;
// Attempt to avoid multi-use ops if we don't need anything from them.
if (!DemandedBits.isAllOnes() || !DemandedElts.isAllOnes()) {
SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
Op0, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
Op1, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
if (DemandedOp0 || DemandedOp1) {
Op0 = DemandedOp0 ? DemandedOp0 : Op0;
Op1 = DemandedOp1 ? DemandedOp1 : Op1;
SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1);
return TLO.CombineTo(Op, NewOp);
}
}
Known ^= Known2;
break;
}
case ISD::SELECT:
if (SimplifyDemandedBits(Op.getOperand(2), DemandedBits, Known, TLO,
Depth + 1))
return true;
if (SimplifyDemandedBits(Op.getOperand(1), DemandedBits, Known2, TLO,
Depth + 1))
return true;
assert(!Known.hasConflict() && "Bits known to be one AND zero?");
assert(!Known2.hasConflict() && "Bits known to be one AND zero?");
// If the operands are constants, see if we can simplify them.
if (ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
return true;
// Only known if known in both the LHS and RHS.
Known = Known.intersectWith(Known2);
break;
case ISD::VSELECT:
if (SimplifyDemandedBits(Op.getOperand(2), DemandedBits, DemandedElts,
Known, TLO, Depth + 1))
return true;
if (SimplifyDemandedBits(Op.getOperand(1), DemandedBits, DemandedElts,
Known2, TLO, Depth + 1))
return true;
assert(!Known.hasConflict() && "Bits known to be one AND zero?");
assert(!Known2.hasConflict() && "Bits known to be one AND zero?");
// Only known if known in both the LHS and RHS.
Known = Known.intersectWith(Known2);
break;
case ISD::SELECT_CC:
if (SimplifyDemandedBits(Op.getOperand(3), DemandedBits, Known, TLO,
Depth + 1))
return true;
if (SimplifyDemandedBits(Op.getOperand(2), DemandedBits, Known2, TLO,
Depth + 1))
return true;
assert(!Known.hasConflict() && "Bits known to be one AND zero?");
assert(!Known2.hasConflict() && "Bits known to be one AND zero?");
// If the operands are constants, see if we can simplify them.
if (ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
return true;
// Only known if known in both the LHS and RHS.
Known = Known.intersectWith(Known2);
break;
case ISD::SETCC: {
SDValue Op0 = Op.getOperand(0);
SDValue Op1 = Op.getOperand(1);
ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
// If (1) we only need the sign-bit, (2) the setcc operands are the same
// width as the setcc result, and (3) the result of a setcc conforms to 0 or
// -1, we may be able to bypass the setcc.
if (DemandedBits.isSignMask() &&
Op0.getScalarValueSizeInBits() == BitWidth &&
getBooleanContents(Op0.getValueType()) ==
BooleanContent::ZeroOrNegativeOneBooleanContent) {
// If we're testing X < 0, then this compare isn't needed - just use X!
// FIXME: We're limiting to integer types here, but this should also work
// if we don't care about FP signed-zero. The use of SETLT with FP means
// that we don't care about NaNs.
if (CC == ISD::SETLT && Op1.getValueType().isInteger() &&
(isNullConstant(Op1) || ISD::isBuildVectorAllZeros(Op1.getNode())))
return TLO.CombineTo(Op, Op0);
// TODO: Should we check for other forms of sign-bit comparisons?
// Examples: X <= -1, X >= 0
}
if (getBooleanContents(Op0.getValueType()) ==
TargetLowering::ZeroOrOneBooleanContent &&
BitWidth > 1)
Known.Zero.setBitsFrom(1);
break;
}
case ISD::SHL: {
SDValue Op0 = Op.getOperand(0);
SDValue Op1 = Op.getOperand(1);
EVT ShiftVT = Op1.getValueType();
if (const APInt *SA =
TLO.DAG.getValidShiftAmountConstant(Op, DemandedElts)) {
unsigned ShAmt = SA->getZExtValue();
if (ShAmt == 0)
return TLO.CombineTo(Op, Op0);
// If this is ((X >>u C1) << ShAmt), see if we can simplify this into a
// single shift. We can do this if the bottom bits (which are shifted
// out) are never demanded.
// TODO - support non-uniform vector amounts.
if (Op0.getOpcode() == ISD::SRL) {
if (!DemandedBits.intersects(APInt::getLowBitsSet(BitWidth, ShAmt))) {
if (const APInt *SA2 =
TLO.DAG.getValidShiftAmountConstant(Op0, DemandedElts)) {
unsigned C1 = SA2->getZExtValue();
unsigned Opc = ISD::SHL;
int Diff = ShAmt - C1;
if (Diff < 0) {
Diff = -Diff;
Opc = ISD::SRL;
}
SDValue NewSA = TLO.DAG.getConstant(Diff, dl, ShiftVT);
return TLO.CombineTo(
Op, TLO.DAG.getNode(Opc, dl, VT, Op0.getOperand(0), NewSA));
}
}
}
// Convert (shl (anyext x, c)) to (anyext (shl x, c)) if the high bits
// are not demanded. This will likely allow the anyext to be folded away.
// TODO - support non-uniform vector amounts.
if (Op0.getOpcode() == ISD::ANY_EXTEND) {
SDValue InnerOp = Op0.getOperand(0);
EVT InnerVT = InnerOp.getValueType();
unsigned InnerBits = InnerVT.getScalarSizeInBits();
if (ShAmt < InnerBits && DemandedBits.getActiveBits() <= InnerBits &&
isTypeDesirableForOp(ISD::SHL, InnerVT)) {
SDValue NarrowShl = TLO.DAG.getNode(
ISD::SHL, dl, InnerVT, InnerOp,
TLO.DAG.getShiftAmountConstant(ShAmt, InnerVT, dl));
return TLO.CombineTo(
Op, TLO.DAG.getNode(ISD::ANY_EXTEND, dl, VT, NarrowShl));
}
// Repeat the SHL optimization above in cases where an extension
// intervenes: (shl (anyext (shr x, c1)), c2) to
// (shl (anyext x), c2-c1). This requires that the bottom c1 bits
// aren't demanded (as above) and that the shifted upper c1 bits of
// x aren't demanded.
// TODO - support non-uniform vector amounts.
if (InnerOp.getOpcode() == ISD::SRL && Op0.hasOneUse() &&
InnerOp.hasOneUse()) {
if (const APInt *SA2 =
TLO.DAG.getValidShiftAmountConstant(InnerOp, DemandedElts)) {
unsigned InnerShAmt = SA2->getZExtValue();
if (InnerShAmt < ShAmt && InnerShAmt < InnerBits &&
DemandedBits.getActiveBits() <=
(InnerBits - InnerShAmt + ShAmt) &&
DemandedBits.countr_zero() >= ShAmt) {
SDValue NewSA =
TLO.DAG.getConstant(ShAmt - InnerShAmt, dl, ShiftVT);
SDValue NewExt = TLO.DAG.getNode(ISD::ANY_EXTEND, dl, VT,
InnerOp.getOperand(0));
return TLO.CombineTo(
Op, TLO.DAG.getNode(ISD::SHL, dl, VT, NewExt, NewSA));
}
}
}
}
APInt InDemandedMask = DemandedBits.lshr(ShAmt);
if (SimplifyDemandedBits(Op0, InDemandedMask, DemandedElts, Known, TLO,
Depth + 1))
return true;
assert(!Known.hasConflict() && "Bits known to be one AND zero?");
Known.Zero <<= ShAmt;
Known.One <<= ShAmt;
// low bits known zero.
Known.Zero.setLowBits(ShAmt);
// Attempt to avoid multi-use ops if we don't need anything from them.
if (!InDemandedMask.isAllOnes() || !DemandedElts.isAllOnes()) {
SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
Op0, InDemandedMask, DemandedElts, TLO.DAG, Depth + 1);
if (DemandedOp0) {
SDValue NewOp = TLO.DAG.getNode(ISD::SHL, dl, VT, DemandedOp0, Op1);
return TLO.CombineTo(Op, NewOp);
}
}
// Try shrinking the operation as long as the shift amount will still be
// in range.
if ((ShAmt < DemandedBits.getActiveBits()) &&
ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO))
return true;
} else {
// This is a variable shift, so we can't shift the demand mask by a known
// amount. But if we are not demanding high bits, then we are not
// demanding those bits from the pre-shifted operand either.
if (unsigned CTLZ = DemandedBits.countl_zero()) {
APInt DemandedFromOp(APInt::getLowBitsSet(BitWidth, BitWidth - CTLZ));
if (SimplifyDemandedBits(Op0, DemandedFromOp, DemandedElts, Known, TLO,
Depth + 1)) {
SDNodeFlags Flags = Op.getNode()->getFlags();
if (Flags.hasNoSignedWrap() || Flags.hasNoUnsignedWrap()) {
// Disable the nsw and nuw flags. We can no longer guarantee that we
// won't wrap after simplification.
Flags.setNoSignedWrap(false);
Flags.setNoUnsignedWrap(false);
Op->setFlags(Flags);
}
return true;
}
Known.resetAll();
}
}
// If we are only demanding sign bits then we can use the shift source
// directly.
if (const APInt *MaxSA =
TLO.DAG.getValidMaximumShiftAmountConstant(Op, DemandedElts)) {
unsigned ShAmt = MaxSA->getZExtValue();
unsigned NumSignBits =
TLO.DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1);
unsigned UpperDemandedBits = BitWidth - DemandedBits.countr_zero();
if (NumSignBits > ShAmt && (NumSignBits - ShAmt) >= (UpperDemandedBits))
return TLO.CombineTo(Op, Op0);
}
break;
}
case ISD::SRL: {
SDValue Op0 = Op.getOperand(0);
SDValue Op1 = Op.getOperand(1);
EVT ShiftVT = Op1.getValueType();
// Try to match AVG patterns.
if (SDValue AVG = combineShiftToAVG(Op, TLO.DAG, *this, DemandedBits,
DemandedElts, Depth + 1))
return TLO.CombineTo(Op, AVG);
if (const APInt *SA =
TLO.DAG.getValidShiftAmountConstant(Op, DemandedElts)) {
unsigned ShAmt = SA->getZExtValue();
if (ShAmt == 0)
return TLO.CombineTo(Op, Op0);
// If this is ((X << C1) >>u ShAmt), see if we can simplify this into a
// single shift. We can do this if the top bits (which are shifted out)
// are never demanded.
// TODO - support non-uniform vector amounts.
if (Op0.getOpcode() == ISD::SHL) {
if (!DemandedBits.intersects(APInt::getHighBitsSet(BitWidth, ShAmt))) {
if (const APInt *SA2 =
TLO.DAG.getValidShiftAmountConstant(Op0, DemandedElts)) {
unsigned C1 = SA2->getZExtValue();
unsigned Opc = ISD::SRL;
int Diff = ShAmt - C1;
if (Diff < 0) {
Diff = -Diff;
Opc = ISD::SHL;
}
SDValue NewSA = TLO.DAG.getConstant(Diff, dl, ShiftVT);
return TLO.CombineTo(
Op, TLO.DAG.getNode(Opc, dl, VT, Op0.getOperand(0), NewSA));
}
}
}
APInt InDemandedMask = (DemandedBits << ShAmt);
// If the shift is exact, then it does demand the low bits (and knows that
// they are zero).
if (Op->getFlags().hasExact())
InDemandedMask.setLowBits(ShAmt);
// Compute the new bits that are at the top now.
if (SimplifyDemandedBits(Op0, InDemandedMask, DemandedElts, Known, TLO,
Depth + 1))
return true;
assert(!Known.hasConflict() && "Bits known to be one AND zero?");
Known.Zero.lshrInPlace(ShAmt);
Known.One.lshrInPlace(ShAmt);
// High bits known zero.
Known.Zero.setHighBits(ShAmt);
// Attempt to avoid multi-use ops if we don't need anything from them.
if (!InDemandedMask.isAllOnes() || !DemandedElts.isAllOnes()) {
SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
Op0, InDemandedMask, DemandedElts, TLO.DAG, Depth + 1);
if (DemandedOp0) {
SDValue NewOp = TLO.DAG.getNode(ISD::SRL, dl, VT, DemandedOp0, Op1);
return TLO.CombineTo(Op, NewOp);
}
}
}
break;
}
case ISD::SRA: {
SDValue Op0 = Op.getOperand(0);
SDValue Op1 = Op.getOperand(1);
EVT ShiftVT = Op1.getValueType();
// If we only want bits that already match the signbit then we don't need
// to shift.
unsigned NumHiDemandedBits = BitWidth - DemandedBits.countr_zero();
if (TLO.DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1) >=
NumHiDemandedBits)
return TLO.CombineTo(Op, Op0);
// If this is an arithmetic shift right and only the low-bit is set, we can
// always convert this into a logical shr, even if the shift amount is
// variable. The low bit of the shift cannot be an input sign bit unless
// the shift amount is >= the size of the datatype, which is undefined.
if (DemandedBits.isOne())
return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, VT, Op0, Op1));
// Try to match AVG patterns.
if (SDValue AVG = combineShiftToAVG(Op, TLO.DAG, *this, DemandedBits,
DemandedElts, Depth + 1))
return TLO.CombineTo(Op, AVG);
if (const APInt *SA =
TLO.DAG.getValidShiftAmountConstant(Op, DemandedElts)) {
unsigned ShAmt = SA->getZExtValue();
if (ShAmt == 0)
return TLO.CombineTo(Op, Op0);
APInt InDemandedMask = (DemandedBits << ShAmt);
// If the shift is exact, then it does demand the low bits (and knows that
// they are zero).
if (Op->getFlags().hasExact())
InDemandedMask.setLowBits(ShAmt);
// If any of the demanded bits are produced by the sign extension, we also
// demand the input sign bit.
if (DemandedBits.countl_zero() < ShAmt)
InDemandedMask.setSignBit();
if (SimplifyDemandedBits(Op0, InDemandedMask, DemandedElts, Known, TLO,
Depth + 1))
return true;
assert(!Known.hasConflict() && "Bits known to be one AND zero?");
Known.Zero.lshrInPlace(ShAmt);
Known.One.lshrInPlace(ShAmt);
// If the input sign bit is known to be zero, or if none of the top bits
// are demanded, turn this into an unsigned shift right.
if (Known.Zero[BitWidth - ShAmt - 1] ||
DemandedBits.countl_zero() >= ShAmt) {
SDNodeFlags Flags;
Flags.setExact(Op->getFlags().hasExact());
return TLO.CombineTo(
Op, TLO.DAG.getNode(ISD::SRL, dl, VT, Op0, Op1, Flags));
}
int Log2 = DemandedBits.exactLogBase2();
if (Log2 >= 0) {
// The bit must come from the sign.
SDValue NewSA = TLO.DAG.getConstant(BitWidth - 1 - Log2, dl, ShiftVT);
return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, VT, Op0, NewSA));
}
if (Known.One[BitWidth - ShAmt - 1])
// New bits are known one.
Known.One.setHighBits(ShAmt);
// Attempt to avoid multi-use ops if we don't need anything from them.
if (!InDemandedMask.isAllOnes() || !DemandedElts.isAllOnes()) {
SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
Op0, InDemandedMask, DemandedElts, TLO.DAG, Depth + 1);
if (DemandedOp0) {
SDValue NewOp = TLO.DAG.getNode(ISD::SRA, dl, VT, DemandedOp0, Op1);
return TLO.CombineTo(Op, NewOp);
}
}
}
break;
}
case ISD::FSHL:
case ISD::FSHR: {
SDValue Op0 = Op.getOperand(0);
SDValue Op1 = Op.getOperand(1);
SDValue Op2 = Op.getOperand(2);
bool IsFSHL = (Op.getOpcode() == ISD::FSHL);
if (ConstantSDNode *SA = isConstOrConstSplat(Op2, DemandedElts)) {
unsigned Amt = SA->getAPIntValue().urem(BitWidth);
// For fshl, 0-shift returns the 1st arg.
// For fshr, 0-shift returns the 2nd arg.
if (Amt == 0) {
if (SimplifyDemandedBits(IsFSHL ? Op0 : Op1, DemandedBits, DemandedElts,
Known, TLO, Depth + 1))
return true;
break;
}
// fshl: (Op0 << Amt) | (Op1 >> (BW - Amt))
// fshr: (Op0 << (BW - Amt)) | (Op1 >> Amt)
APInt Demanded0 = DemandedBits.lshr(IsFSHL ? Amt : (BitWidth - Amt));
APInt Demanded1 = DemandedBits << (IsFSHL ? (BitWidth - Amt) : Amt);
if (SimplifyDemandedBits(Op0, Demanded0, DemandedElts, Known2, TLO,
Depth + 1))
return true;
if (SimplifyDemandedBits(Op1, Demanded1, DemandedElts, Known, TLO,
Depth + 1))
return true;
Known2.One <<= (IsFSHL ? Amt : (BitWidth - Amt));
Known2.Zero <<= (IsFSHL ? Amt : (BitWidth - Amt));
Known.One.lshrInPlace(IsFSHL ? (BitWidth - Amt) : Amt);
Known.Zero.lshrInPlace(IsFSHL ? (BitWidth - Amt) : Amt);
Known = Known.unionWith(Known2);
// Attempt to avoid multi-use ops if we don't need anything from them.
if (!Demanded0.isAllOnes() || !Demanded1.isAllOnes() ||
!DemandedElts.isAllOnes()) {
SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
Op0, Demanded0, DemandedElts, TLO.DAG, Depth + 1);
SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
Op1, Demanded1, DemandedElts, TLO.DAG, Depth + 1);
if (DemandedOp0 || DemandedOp1) {
DemandedOp0 = DemandedOp0 ? DemandedOp0 : Op0;
DemandedOp1 = DemandedOp1 ? DemandedOp1 : Op1;
SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, DemandedOp0,
DemandedOp1, Op2);
return TLO.CombineTo(Op, NewOp);
}
}
}
// For pow-2 bitwidths we only demand the bottom modulo amt bits.
if (isPowerOf2_32(BitWidth)) {
APInt DemandedAmtBits(Op2.getScalarValueSizeInBits(), BitWidth - 1);
if (SimplifyDemandedBits(Op2, DemandedAmtBits, DemandedElts,
Known2, TLO, Depth + 1))
return true;
}
break;
}
case ISD::ROTL:
case ISD::ROTR: {
SDValue Op0 = Op.getOperand(0);
SDValue Op1 = Op.getOperand(1);
bool IsROTL = (Op.getOpcode() == ISD::ROTL);
// If we're rotating an 0/-1 value, then it stays an 0/-1 value.
if (BitWidth == TLO.DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1))
return TLO.CombineTo(Op, Op0);
if (ConstantSDNode *SA = isConstOrConstSplat(Op1, DemandedElts)) {
unsigned Amt = SA->getAPIntValue().urem(BitWidth);
unsigned RevAmt = BitWidth - Amt;
// rotl: (Op0 << Amt) | (Op0 >> (BW - Amt))
// rotr: (Op0 << (BW - Amt)) | (Op0 >> Amt)
APInt Demanded0 = DemandedBits.rotr(IsROTL ? Amt : RevAmt);
if (SimplifyDemandedBits(Op0, Demanded0, DemandedElts, Known2, TLO,
Depth + 1))
return true;
// rot*(x, 0) --> x
if (Amt == 0)
return TLO.CombineTo(Op, Op0);
// See if we don't demand either half of the rotated bits.
if ((!TLO.LegalOperations() || isOperationLegal(ISD::SHL, VT)) &&
DemandedBits.countr_zero() >= (IsROTL ? Amt : RevAmt)) {
Op1 = TLO.DAG.getConstant(IsROTL ? Amt : RevAmt, dl, Op1.getValueType());
return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SHL, dl, VT, Op0, Op1));
}
if ((!TLO.LegalOperations() || isOperationLegal(ISD::SRL, VT)) &&
DemandedBits.countl_zero() >= (IsROTL ? RevAmt : Amt)) {
Op1 = TLO.DAG.getConstant(IsROTL ? RevAmt : Amt, dl, Op1.getValueType());
return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, VT, Op0, Op1));
}
}
// For pow-2 bitwidths we only demand the bottom modulo amt bits.
if (isPowerOf2_32(BitWidth)) {
APInt DemandedAmtBits(Op1.getScalarValueSizeInBits(), BitWidth - 1);
if (SimplifyDemandedBits(Op1, DemandedAmtBits, DemandedElts, Known2, TLO,
Depth + 1))
return true;
}
break;
}
case ISD::UMIN: {
// Check if one arg is always less than (or equal) to the other arg.
SDValue Op0 = Op.getOperand(0);
SDValue Op1 = Op.getOperand(1);
KnownBits Known0 = TLO.DAG.computeKnownBits(Op0, DemandedElts, Depth + 1);
KnownBits Known1 = TLO.DAG.computeKnownBits(Op1, DemandedElts, Depth + 1);
Known = KnownBits::umin(Known0, Known1);
if (std::optional<bool> IsULE = KnownBits::ule(Known0, Known1))
return TLO.CombineTo(Op, *IsULE ? Op0 : Op1);
if (std::optional<bool> IsULT = KnownBits::ult(Known0, Known1))
return TLO.CombineTo(Op, *IsULT ? Op0 : Op1);
break;
}
case ISD::UMAX: {
// Check if one arg is always greater than (or equal) to the other arg.
SDValue Op0 = Op.getOperand(0);
SDValue Op1 = Op.getOperand(1);
KnownBits Known0 = TLO.DAG.computeKnownBits(Op0, DemandedElts, Depth + 1);
KnownBits Known1 = TLO.DAG.computeKnownBits(Op1, DemandedElts, Depth + 1);
Known = KnownBits::umax(Known0, Known1);
if (std::optional<bool> IsUGE = KnownBits::uge(Known0, Known1))
return TLO.CombineTo(Op, *IsUGE ? Op0 : Op1);
if (std::optional<bool> IsUGT = KnownBits::ugt(Known0, Known1))
return TLO.CombineTo(Op, *IsUGT ? Op0 : Op1);
break;
}
case ISD::BITREVERSE: {
SDValue Src = Op.getOperand(0);
APInt DemandedSrcBits = DemandedBits.reverseBits();
if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedElts, Known2, TLO,
Depth + 1))
return true;
Known.One = Known2.One.reverseBits();
Known.Zero = Known2.Zero.reverseBits();
break;
}
case ISD::BSWAP: {
SDValue Src = Op.getOperand(0);
// If the only bits demanded come from one byte of the bswap result,
// just shift the input byte into position to eliminate the bswap.
unsigned NLZ = DemandedBits.countl_zero();
unsigned NTZ = DemandedBits.countr_zero();
// Round NTZ down to the next byte. If we have 11 trailing zeros, then
// we need all the bits down to bit 8. Likewise, round NLZ. If we
// have 14 leading zeros, round to 8.
NLZ = alignDown(NLZ, 8);
NTZ = alignDown(NTZ, 8);
// If we need exactly one byte, we can do this transformation.
if (BitWidth - NLZ - NTZ == 8) {
// Replace this with either a left or right shift to get the byte into
// the right place.
unsigned ShiftOpcode = NLZ > NTZ ? ISD::SRL : ISD::SHL;
if (!TLO.LegalOperations() || isOperationLegal(ShiftOpcode, VT)) {
EVT ShiftAmtTy = getShiftAmountTy(VT, DL);
unsigned ShiftAmount = NLZ > NTZ ? NLZ - NTZ : NTZ - NLZ;
SDValue ShAmt = TLO.DAG.getConstant(ShiftAmount, dl, ShiftAmtTy);
SDValue NewOp = TLO.DAG.getNode(ShiftOpcode, dl, VT, Src, ShAmt);
return TLO.CombineTo(Op, NewOp);
}
}
APInt DemandedSrcBits = DemandedBits.byteSwap();
if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedElts, Known2, TLO,
Depth + 1))
return true;
Known.One = Known2.One.byteSwap();
Known.Zero = Known2.Zero.byteSwap();
break;
}
case ISD::CTPOP: {
// If only 1 bit is demanded, replace with PARITY as long as we're before
// op legalization.
// FIXME: Limit to scalars for now.
if (DemandedBits.isOne() && !TLO.LegalOps && !VT.isVector())
return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::PARITY, dl, VT,
Op.getOperand(0)));
Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
break;
}
case ISD::SIGN_EXTEND_INREG: {
SDValue Op0 = Op.getOperand(0);
EVT ExVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
unsigned ExVTBits = ExVT.getScalarSizeInBits();
// If we only care about the highest bit, don't bother shifting right.
if (DemandedBits.isSignMask()) {
unsigned MinSignedBits =
TLO.DAG.ComputeMaxSignificantBits(Op0, DemandedElts, Depth + 1);
bool AlreadySignExtended = ExVTBits >= MinSignedBits;
// However if the input is already sign extended we expect the sign
// extension to be dropped altogether later and do not simplify.
if (!AlreadySignExtended) {
// Compute the correct shift amount type, which must be getShiftAmountTy
// for scalar types after legalization.
SDValue ShiftAmt = TLO.DAG.getConstant(BitWidth - ExVTBits, dl,
getShiftAmountTy(VT, DL));
return TLO.CombineTo(Op,
TLO.DAG.getNode(ISD::SHL, dl, VT, Op0, ShiftAmt));
}
}
// If none of the extended bits are demanded, eliminate the sextinreg.
if (DemandedBits.getActiveBits() <= ExVTBits)
return TLO.CombineTo(Op, Op0);
APInt InputDemandedBits = DemandedBits.getLoBits(ExVTBits);
// Since the sign extended bits are demanded, we know that the sign
// bit is demanded.
InputDemandedBits.setBit(ExVTBits - 1);
if (SimplifyDemandedBits(Op0, InputDemandedBits, DemandedElts, Known, TLO,
Depth + 1))
return true;
assert(!Known.hasConflict() && "Bits known to be one AND zero?");
// If the sign bit of the input is known set or clear, then we know the
// top bits of the result.
// If the input sign bit is known zero, convert this into a zero extension.
if (Known.Zero[ExVTBits - 1])
return TLO.CombineTo(Op, TLO.DAG.getZeroExtendInReg(Op0, dl, ExVT));
APInt Mask = APInt::getLowBitsSet(BitWidth, ExVTBits);
if (Known.One[ExVTBits - 1]) { // Input sign bit known set
Known.One.setBitsFrom(ExVTBits);
Known.Zero &= Mask;
} else { // Input sign bit unknown
Known.Zero &= Mask;
Known.One &= Mask;
}
break;
}
case ISD::BUILD_PAIR: {
EVT HalfVT = Op.getOperand(0).getValueType();
unsigned HalfBitWidth = HalfVT.getScalarSizeInBits();
APInt MaskLo = DemandedBits.getLoBits(HalfBitWidth).trunc(HalfBitWidth);
APInt MaskHi = DemandedBits.getHiBits(HalfBitWidth).trunc(HalfBitWidth);
KnownBits KnownLo, KnownHi;
if (SimplifyDemandedBits(Op.getOperand(0), MaskLo, KnownLo, TLO, Depth + 1))
return true;
if (SimplifyDemandedBits(Op.getOperand(1), MaskHi, KnownHi, TLO, Depth + 1))
return true;
Known = KnownHi.concat(KnownLo);
break;
}
case ISD::ZERO_EXTEND_VECTOR_INREG:
if (VT.isScalableVector())
return false;
[[fallthrough]];
case ISD::ZERO_EXTEND: {
SDValue Src = Op.getOperand(0);
EVT SrcVT = Src.getValueType();
unsigned InBits = SrcVT.getScalarSizeInBits();
unsigned InElts = SrcVT.isFixedLengthVector() ? SrcVT.getVectorNumElements() : 1;
bool IsVecInReg = Op.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG;
// If none of the top bits are demanded, convert this into an any_extend.
if (DemandedBits.getActiveBits() <= InBits) {
// If we only need the non-extended bits of the bottom element
// then we can just bitcast to the result.
if (IsLE && IsVecInReg && DemandedElts == 1 &&
VT.getSizeInBits() == SrcVT.getSizeInBits())
return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Src));
unsigned Opc =
IsVecInReg ? ISD::ANY_EXTEND_VECTOR_INREG : ISD::ANY_EXTEND;
if (!TLO.LegalOperations() || isOperationLegal(Opc, VT))
return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT, Src));
}
APInt InDemandedBits = DemandedBits.trunc(InBits);
APInt InDemandedElts = DemandedElts.zext(InElts);
if (SimplifyDemandedBits(Src, InDemandedBits, InDemandedElts, Known, TLO,
Depth + 1))
return true;
assert(!Known.hasConflict() && "Bits known to be one AND zero?");
assert(Known.getBitWidth() == InBits && "Src width has changed?");
Known = Known.zext(BitWidth);
// Attempt to avoid multi-use ops if we don't need anything from them.
if (SDValue NewSrc = SimplifyMultipleUseDemandedBits(
Src, InDemandedBits, InDemandedElts, TLO.DAG, Depth + 1))
return TLO.CombineTo(Op, TLO.DAG.getNode(Op.getOpcode(), dl, VT, NewSrc));
break;
}
case ISD::SIGN_EXTEND_VECTOR_INREG:
if (VT.isScalableVector())
return false;
[[fallthrough]];
case ISD::SIGN_EXTEND: {
SDValue Src = Op.getOperand(0);
EVT SrcVT = Src.getValueType();
unsigned InBits = SrcVT.getScalarSizeInBits();
unsigned InElts = SrcVT.isFixedLengthVector() ? SrcVT.getVectorNumElements() : 1;
bool IsVecInReg = Op.getOpcode() == ISD::SIGN_EXTEND_VECTOR_INREG;
// If none of the top bits are demanded, convert this into an any_extend.
if (DemandedBits.getActiveBits() <= InBits) {
// If we only need the non-extended bits of the bottom element
// then we can just bitcast to the result.
if (IsLE && IsVecInReg && DemandedElts == 1 &&
VT.getSizeInBits() == SrcVT.getSizeInBits())
return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Src));
unsigned Opc =
IsVecInReg ? ISD::ANY_EXTEND_VECTOR_INREG : ISD::ANY_EXTEND;
if (!TLO.LegalOperations() || isOperationLegal(Opc, VT))
return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT, Src));
}
APInt InDemandedBits = DemandedBits.trunc(InBits);
APInt InDemandedElts = DemandedElts.zext(InElts);
// Since some of the sign extended bits are demanded, we know that the sign
// bit is demanded.
InDemandedBits.setBit(InBits - 1);
if (SimplifyDemandedBits(Src, InDemandedBits, InDemandedElts, Known, TLO,
Depth + 1))
return true;
assert(!Known.hasConflict() && "Bits known to be one AND zero?");
assert(Known.getBitWidth() == InBits && "Src width has changed?");
// If the sign bit is known one, the top bits match.
Known = Known.sext(BitWidth);
// If the sign bit is known zero, convert this to a zero extend.
if (Known.isNonNegative()) {
unsigned Opc =
IsVecInReg ? ISD::ZERO_EXTEND_VECTOR_INREG : ISD::ZERO_EXTEND;
if (!TLO.LegalOperations() || isOperationLegal(Opc, VT))
return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT, Src));
}
// Attempt to avoid multi-use ops if we don't need anything from them.
if (SDValue NewSrc = SimplifyMultipleUseDemandedBits(
Src, InDemandedBits, InDemandedElts, TLO.DAG, Depth + 1))
return TLO.CombineTo(Op, TLO.DAG.getNode(Op.getOpcode(), dl, VT, NewSrc));
break;
}
case ISD::ANY_EXTEND_VECTOR_INREG:
if (VT.isScalableVector())
return false;
[[fallthrough]];
case ISD::ANY_EXTEND: {
SDValue Src = Op.getOperand(0);
EVT SrcVT = Src.getValueType();
unsigned InBits = SrcVT.getScalarSizeInBits();
unsigned InElts = SrcVT.isFixedLengthVector() ? SrcVT.getVectorNumElements() : 1;
bool IsVecInReg = Op.getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG;
// If we only need the bottom element then we can just bitcast.
// TODO: Handle ANY_EXTEND?
if (IsLE && IsVecInReg && DemandedElts == 1 &&
VT.getSizeInBits() == SrcVT.getSizeInBits())
return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Src));
APInt InDemandedBits = DemandedBits.trunc(InBits);
APInt InDemandedElts = DemandedElts.zext(InElts);
if (SimplifyDemandedBits(Src, InDemandedBits, InDemandedElts, Known, TLO,
Depth + 1))
return true;
assert(!Known.hasConflict() && "Bits known to be one AND zero?");
assert(Known.getBitWidth() == InBits && "Src width has changed?");
Known = Known.anyext(BitWidth);
// Attempt to avoid multi-use ops if we don't need anything from them.
if (SDValue NewSrc = SimplifyMultipleUseDemandedBits(
Src, InDemandedBits, InDemandedElts, TLO.DAG, Depth + 1))
return TLO.CombineTo(Op, TLO.DAG.getNode(Op.getOpcode(), dl, VT, NewSrc));
break;
}
case ISD::TRUNCATE: {
SDValue Src = Op.getOperand(0);
// Simplify the input, using demanded bit information, and compute the known
// zero/one bits live out.
unsigned OperandBitWidth = Src.getScalarValueSizeInBits();
APInt TruncMask = DemandedBits.zext(OperandBitWidth);
if (SimplifyDemandedBits(Src, TruncMask, DemandedElts, Known, TLO,
Depth + 1))
return true;
Known = Known.trunc(BitWidth);
// Attempt to avoid multi-use ops if we don't need anything from them.
if (SDValue NewSrc = SimplifyMultipleUseDemandedBits(
Src, TruncMask, DemandedElts, TLO.DAG, Depth + 1))
return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::TRUNCATE, dl, VT, NewSrc));
// If the input is only used by this truncate, see if we can shrink it based
// on the known demanded bits.
switch (Src.getOpcode()) {
default:
break;
case ISD::SRL:
// Shrink SRL by a constant if none of the high bits shifted in are
// demanded.
if (TLO.LegalTypes() && !isTypeDesirableForOp(ISD::SRL, VT))
// Do not turn (vt1 truncate (vt2 srl)) into (vt1 srl) if vt1 is
// undesirable.
break;
if (Src.getNode()->hasOneUse()) {
const APInt *ShAmtC =
TLO.DAG.getValidShiftAmountConstant(Src, DemandedElts);
if (!ShAmtC || ShAmtC->uge(BitWidth))
break;
uint64_t ShVal = ShAmtC->getZExtValue();
APInt HighBits =
APInt::getHighBitsSet(OperandBitWidth, OperandBitWidth - BitWidth);
HighBits.lshrInPlace(ShVal);
HighBits = HighBits.trunc(BitWidth);
if (!(HighBits & DemandedBits)) {
// None of the shifted in bits are needed. Add a truncate of the
// shift input, then shift it.
SDValue NewShAmt = TLO.DAG.getConstant(
ShVal, dl, getShiftAmountTy(VT, DL, TLO.LegalTypes()));
SDValue NewTrunc =
TLO.DAG.getNode(ISD::TRUNCATE, dl, VT, Src.getOperand(0));
return TLO.CombineTo(
Op, TLO.DAG.getNode(ISD::SRL, dl, VT, NewTrunc, NewShAmt));
}
}
break;
}