blob: 0914c6627660f3db3dd0b9fe76b92e68ef2d3949 [file] [log] [blame]
//===-- DAGCombiner.cpp - Implement a DAG node combiner -------------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This pass combines dag nodes to form fewer, simpler DAG nodes. It can be run
// both before and after the DAG is legalized.
//
// This pass is not a substitute for the LLVM IR instcombine pass. This pass is
// primarily intended to handle simplification opportunities that are implicit
// in the LLVM IR and exposed by the various codegen lowering phases.
//
//===----------------------------------------------------------------------===//
#define DEBUG_TYPE "dagcombine"
#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/DerivedTypes.h"
#include "llvm/LLVMContext.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Target/TargetData.h"
#include "llvm/Target/TargetLowering.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetOptions.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
#include <algorithm>
using namespace llvm;
STATISTIC(NodesCombined , "Number of dag nodes combined");
STATISTIC(PreIndexedNodes , "Number of pre-indexed nodes created");
STATISTIC(PostIndexedNodes, "Number of post-indexed nodes created");
STATISTIC(OpsNarrowed , "Number of load/op/store narrowed");
STATISTIC(LdStFP2Int , "Number of fp load/store pairs transformed to int");
namespace {
static cl::opt<bool>
CombinerAA("combiner-alias-analysis", cl::Hidden,
cl::desc("Turn on alias analysis during testing"));
static cl::opt<bool>
CombinerGlobalAA("combiner-global-alias-analysis", cl::Hidden,
cl::desc("Include global information in alias analysis"));
//------------------------------ DAGCombiner ---------------------------------//
class DAGCombiner {
SelectionDAG &DAG;
const TargetLowering &TLI;
CombineLevel Level;
CodeGenOpt::Level OptLevel;
bool LegalOperations;
bool LegalTypes;
// Worklist of all of the nodes that need to be simplified.
//
// This has the semantics that when adding to the worklist,
// the item added must be next to be processed. It should
// also only appear once. The naive approach to this takes
// linear time.
//
// To reduce the insert/remove time to logarithmic, we use
// a set and a vector to maintain our worklist.
//
// The set contains the items on the worklist, but does not
// maintain the order they should be visited.
//
// The vector maintains the order nodes should be visited, but may
// contain duplicate or removed nodes. When choosing a node to
// visit, we pop off the order stack until we find an item that is
// also in the contents set. All operations are O(log N).
SmallPtrSet<SDNode*, 64> WorkListContents;
SmallVector<SDNode*, 64> WorkListOrder;
// AA - Used for DAG load/store alias analysis.
AliasAnalysis &AA;
/// AddUsersToWorkList - When an instruction is simplified, add all users of
/// the instruction to the work lists because they might get more simplified
/// now.
///
void AddUsersToWorkList(SDNode *N) {
for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end();
UI != UE; ++UI)
AddToWorkList(*UI);
}
/// visit - call the node-specific routine that knows how to fold each
/// particular type of node.
SDValue visit(SDNode *N);
public:
/// AddToWorkList - Add to the work list making sure its instance is at the
/// back (next to be processed.)
void AddToWorkList(SDNode *N) {
WorkListContents.insert(N);
WorkListOrder.push_back(N);
}
/// removeFromWorkList - remove all instances of N from the worklist.
///
void removeFromWorkList(SDNode *N) {
WorkListContents.erase(N);
}
SDValue CombineTo(SDNode *N, const SDValue *To, unsigned NumTo,
bool AddTo = true);
SDValue CombineTo(SDNode *N, SDValue Res, bool AddTo = true) {
return CombineTo(N, &Res, 1, AddTo);
}
SDValue CombineTo(SDNode *N, SDValue Res0, SDValue Res1,
bool AddTo = true) {
SDValue To[] = { Res0, Res1 };
return CombineTo(N, To, 2, AddTo);
}
void CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO);
private:
/// SimplifyDemandedBits - Check the specified integer node value to see if
/// it can be simplified or if things it uses can be simplified by bit
/// propagation. If so, return true.
bool SimplifyDemandedBits(SDValue Op) {
unsigned BitWidth = Op.getValueType().getScalarType().getSizeInBits();
APInt Demanded = APInt::getAllOnesValue(BitWidth);
return SimplifyDemandedBits(Op, Demanded);
}
bool SimplifyDemandedBits(SDValue Op, const APInt &Demanded);
bool CombineToPreIndexedLoadStore(SDNode *N);
bool CombineToPostIndexedLoadStore(SDNode *N);
void ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad);
SDValue PromoteOperand(SDValue Op, EVT PVT, bool &Replace);
SDValue SExtPromoteOperand(SDValue Op, EVT PVT);
SDValue ZExtPromoteOperand(SDValue Op, EVT PVT);
SDValue PromoteIntBinOp(SDValue Op);
SDValue PromoteIntShiftOp(SDValue Op);
SDValue PromoteExtend(SDValue Op);
bool PromoteLoad(SDValue Op);
void ExtendSetCCUses(SmallVector<SDNode*, 4> SetCCs,
SDValue Trunc, SDValue ExtLoad, DebugLoc DL,
ISD::NodeType ExtType);
/// combine - call the node-specific routine that knows how to fold each
/// particular type of node. If that doesn't do anything, try the
/// target-specific DAG combines.
SDValue combine(SDNode *N);
// Visitation implementation - Implement dag node combining for different
// node types. The semantics are as follows:
// Return Value:
// SDValue.getNode() == 0 - No change was made
// SDValue.getNode() == N - N was replaced, is dead and has been handled.
// otherwise - N should be replaced by the returned Operand.
//
SDValue visitTokenFactor(SDNode *N);
SDValue visitMERGE_VALUES(SDNode *N);
SDValue visitADD(SDNode *N);
SDValue visitSUB(SDNode *N);
SDValue visitADDC(SDNode *N);
SDValue visitSUBC(SDNode *N);
SDValue visitADDE(SDNode *N);
SDValue visitSUBE(SDNode *N);
SDValue visitMUL(SDNode *N);
SDValue visitSDIV(SDNode *N);
SDValue visitUDIV(SDNode *N);
SDValue visitSREM(SDNode *N);
SDValue visitUREM(SDNode *N);
SDValue visitMULHU(SDNode *N);
SDValue visitMULHS(SDNode *N);
SDValue visitSMUL_LOHI(SDNode *N);
SDValue visitUMUL_LOHI(SDNode *N);
SDValue visitSMULO(SDNode *N);
SDValue visitUMULO(SDNode *N);
SDValue visitSDIVREM(SDNode *N);
SDValue visitUDIVREM(SDNode *N);
SDValue visitAND(SDNode *N);
SDValue visitOR(SDNode *N);
SDValue visitXOR(SDNode *N);
SDValue SimplifyVBinOp(SDNode *N);
SDValue visitSHL(SDNode *N);
SDValue visitSRA(SDNode *N);
SDValue visitSRL(SDNode *N);
SDValue visitCTLZ(SDNode *N);
SDValue visitCTLZ_ZERO_UNDEF(SDNode *N);
SDValue visitCTTZ(SDNode *N);
SDValue visitCTTZ_ZERO_UNDEF(SDNode *N);
SDValue visitCTPOP(SDNode *N);
SDValue visitSELECT(SDNode *N);
SDValue visitSELECT_CC(SDNode *N);
SDValue visitSETCC(SDNode *N);
SDValue visitSIGN_EXTEND(SDNode *N);
SDValue visitZERO_EXTEND(SDNode *N);
SDValue visitANY_EXTEND(SDNode *N);
SDValue visitSIGN_EXTEND_INREG(SDNode *N);
SDValue visitTRUNCATE(SDNode *N);
SDValue visitBITCAST(SDNode *N);
SDValue visitBUILD_PAIR(SDNode *N);
SDValue visitFADD(SDNode *N);
SDValue visitFSUB(SDNode *N);
SDValue visitFMUL(SDNode *N);
SDValue visitFDIV(SDNode *N);
SDValue visitFREM(SDNode *N);
SDValue visitFCOPYSIGN(SDNode *N);
SDValue visitSINT_TO_FP(SDNode *N);
SDValue visitUINT_TO_FP(SDNode *N);
SDValue visitFP_TO_SINT(SDNode *N);
SDValue visitFP_TO_UINT(SDNode *N);
SDValue visitFP_ROUND(SDNode *N);
SDValue visitFP_ROUND_INREG(SDNode *N);
SDValue visitFP_EXTEND(SDNode *N);
SDValue visitFNEG(SDNode *N);
SDValue visitFABS(SDNode *N);
SDValue visitBRCOND(SDNode *N);
SDValue visitBR_CC(SDNode *N);
SDValue visitLOAD(SDNode *N);
SDValue visitSTORE(SDNode *N);
SDValue visitINSERT_VECTOR_ELT(SDNode *N);
SDValue visitEXTRACT_VECTOR_ELT(SDNode *N);
SDValue visitBUILD_VECTOR(SDNode *N);
SDValue visitCONCAT_VECTORS(SDNode *N);
SDValue visitEXTRACT_SUBVECTOR(SDNode *N);
SDValue visitVECTOR_SHUFFLE(SDNode *N);
SDValue visitMEMBARRIER(SDNode *N);
SDValue XformToShuffleWithZero(SDNode *N);
SDValue ReassociateOps(unsigned Opc, DebugLoc DL, SDValue LHS, SDValue RHS);
SDValue visitShiftByConstant(SDNode *N, unsigned Amt);
bool SimplifySelectOps(SDNode *SELECT, SDValue LHS, SDValue RHS);
SDValue SimplifyBinOpWithSameOpcodeHands(SDNode *N);
SDValue SimplifySelect(DebugLoc DL, SDValue N0, SDValue N1, SDValue N2);
SDValue SimplifySelectCC(DebugLoc DL, SDValue N0, SDValue N1, SDValue N2,
SDValue N3, ISD::CondCode CC,
bool NotExtCompare = false);
SDValue SimplifySetCC(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond,
DebugLoc DL, bool foldBooleans = true);
SDValue SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp,
unsigned HiOp);
SDValue CombineConsecutiveLoads(SDNode *N, EVT VT);
SDValue ConstantFoldBITCASTofBUILD_VECTOR(SDNode *, EVT);
SDValue BuildSDIV(SDNode *N);
SDValue BuildUDIV(SDNode *N);
SDValue MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1,
bool DemandHighBits = true);
SDValue MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1);
SDNode *MatchRotate(SDValue LHS, SDValue RHS, DebugLoc DL);
SDValue ReduceLoadWidth(SDNode *N);
SDValue ReduceLoadOpStoreWidth(SDNode *N);
SDValue TransformFPLoadStorePair(SDNode *N);
SDValue GetDemandedBits(SDValue V, const APInt &Mask);
/// GatherAllAliases - Walk up chain skipping non-aliasing memory nodes,
/// looking for aliasing nodes and adding them to the Aliases vector.
void GatherAllAliases(SDNode *N, SDValue OriginalChain,
SmallVector<SDValue, 8> &Aliases);
/// isAlias - Return true if there is any possibility that the two addresses
/// overlap.
bool isAlias(SDValue Ptr1, int64_t Size1,
const Value *SrcValue1, int SrcValueOffset1,
unsigned SrcValueAlign1,
const MDNode *TBAAInfo1,
SDValue Ptr2, int64_t Size2,
const Value *SrcValue2, int SrcValueOffset2,
unsigned SrcValueAlign2,
const MDNode *TBAAInfo2) const;
/// FindAliasInfo - Extracts the relevant alias information from the memory
/// node. Returns true if the operand was a load.
bool FindAliasInfo(SDNode *N,
SDValue &Ptr, int64_t &Size,
const Value *&SrcValue, int &SrcValueOffset,
unsigned &SrcValueAlignment,
const MDNode *&TBAAInfo) const;
/// FindBetterChain - Walk up chain skipping non-aliasing memory nodes,
/// looking for a better chain (aliasing node.)
SDValue FindBetterChain(SDNode *N, SDValue Chain);
public:
DAGCombiner(SelectionDAG &D, AliasAnalysis &A, CodeGenOpt::Level OL)
: DAG(D), TLI(D.getTargetLoweringInfo()), Level(BeforeLegalizeTypes),
OptLevel(OL), LegalOperations(false), LegalTypes(false), AA(A) {}
/// Run - runs the dag combiner on all nodes in the work list
void Run(CombineLevel AtLevel);
SelectionDAG &getDAG() const { return DAG; }
/// getShiftAmountTy - Returns a type large enough to hold any valid
/// shift amount - before type legalization these can be huge.
EVT getShiftAmountTy(EVT LHSTy) {
return LegalTypes ? TLI.getShiftAmountTy(LHSTy) : TLI.getPointerTy();
}
/// isTypeLegal - This method returns true if we are running before type
/// legalization or if the specified VT is legal.
bool isTypeLegal(const EVT &VT) {
if (!LegalTypes) return true;
return TLI.isTypeLegal(VT);
}
};
}
namespace {
/// WorkListRemover - This class is a DAGUpdateListener that removes any deleted
/// nodes from the worklist.
class WorkListRemover : public SelectionDAG::DAGUpdateListener {
DAGCombiner &DC;
public:
explicit WorkListRemover(DAGCombiner &dc) : DC(dc) {}
virtual void NodeDeleted(SDNode *N, SDNode *E) {
DC.removeFromWorkList(N);
}
virtual void NodeUpdated(SDNode *N) {
// Ignore updates.
}
};
}
//===----------------------------------------------------------------------===//
// TargetLowering::DAGCombinerInfo implementation
//===----------------------------------------------------------------------===//
void TargetLowering::DAGCombinerInfo::AddToWorklist(SDNode *N) {
((DAGCombiner*)DC)->AddToWorkList(N);
}
void TargetLowering::DAGCombinerInfo::RemoveFromWorklist(SDNode *N) {
((DAGCombiner*)DC)->removeFromWorkList(N);
}
SDValue TargetLowering::DAGCombinerInfo::
CombineTo(SDNode *N, const std::vector<SDValue> &To, bool AddTo) {
return ((DAGCombiner*)DC)->CombineTo(N, &To[0], To.size(), AddTo);
}
SDValue TargetLowering::DAGCombinerInfo::
CombineTo(SDNode *N, SDValue Res, bool AddTo) {
return ((DAGCombiner*)DC)->CombineTo(N, Res, AddTo);
}
SDValue TargetLowering::DAGCombinerInfo::
CombineTo(SDNode *N, SDValue Res0, SDValue Res1, bool AddTo) {
return ((DAGCombiner*)DC)->CombineTo(N, Res0, Res1, AddTo);
}
void TargetLowering::DAGCombinerInfo::
CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) {
return ((DAGCombiner*)DC)->CommitTargetLoweringOpt(TLO);
}
//===----------------------------------------------------------------------===//
// Helper Functions
//===----------------------------------------------------------------------===//
/// isNegatibleForFree - Return 1 if we can compute the negated form of the
/// specified expression for the same cost as the expression itself, or 2 if we
/// can compute the negated form more cheaply than the expression itself.
static char isNegatibleForFree(SDValue Op, bool LegalOperations,
const TargetLowering &TLI,
const TargetOptions *Options,
unsigned Depth = 0) {
// No compile time optimizations on this type.
if (Op.getValueType() == MVT::ppcf128)
return 0;
// fneg is removable even if it has multiple uses.
if (Op.getOpcode() == ISD::FNEG) return 2;
// Don't allow anything with multiple uses.
if (!Op.hasOneUse()) return 0;
// Don't recurse exponentially.
if (Depth > 6) return 0;
switch (Op.getOpcode()) {
default: return false;
case ISD::ConstantFP:
// Don't invert constant FP values after legalize. The negated constant
// isn't necessarily legal.
return LegalOperations ? 0 : 1;
case ISD::FADD:
// FIXME: determine better conditions for this xform.
if (!Options->UnsafeFPMath) return 0;
// After operation legalization, it might not be legal to create new FSUBs.
if (LegalOperations &&
!TLI.isOperationLegalOrCustom(ISD::FSUB, Op.getValueType()))
return 0;
// fold (fsub (fadd A, B)) -> (fsub (fneg A), B)
if (char V = isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI,
Options, Depth + 1))
return V;
// fold (fneg (fadd A, B)) -> (fsub (fneg B), A)
return isNegatibleForFree(Op.getOperand(1), LegalOperations, TLI, Options,
Depth + 1);
case ISD::FSUB:
// We can't turn -(A-B) into B-A when we honor signed zeros.
if (!Options->UnsafeFPMath) return 0;
// fold (fneg (fsub A, B)) -> (fsub B, A)
return 1;
case ISD::FMUL:
case ISD::FDIV:
if (Options->HonorSignDependentRoundingFPMath()) return 0;
// fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y) or (fmul X, (fneg Y))
if (char V = isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI,
Options, Depth + 1))
return V;
return isNegatibleForFree(Op.getOperand(1), LegalOperations, TLI, Options,
Depth + 1);
case ISD::FP_EXTEND:
case ISD::FP_ROUND:
case ISD::FSIN:
return isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI, Options,
Depth + 1);
}
}
/// GetNegatedExpression - If isNegatibleForFree returns true, this function
/// returns the newly negated expression.
static SDValue GetNegatedExpression(SDValue Op, SelectionDAG &DAG,
bool LegalOperations, unsigned Depth = 0) {
// fneg is removable even if it has multiple uses.
if (Op.getOpcode() == ISD::FNEG) return Op.getOperand(0);
// Don't allow anything with multiple uses.
assert(Op.hasOneUse() && "Unknown reuse!");
assert(Depth <= 6 && "GetNegatedExpression doesn't match isNegatibleForFree");
switch (Op.getOpcode()) {
default: llvm_unreachable("Unknown code");
case ISD::ConstantFP: {
APFloat V = cast<ConstantFPSDNode>(Op)->getValueAPF();
V.changeSign();
return DAG.getConstantFP(V, Op.getValueType());
}
case ISD::FADD:
// FIXME: determine better conditions for this xform.
assert(DAG.getTarget().Options.UnsafeFPMath);
// fold (fneg (fadd A, B)) -> (fsub (fneg A), B)
if (isNegatibleForFree(Op.getOperand(0), LegalOperations,
DAG.getTargetLoweringInfo(),
&DAG.getTarget().Options, Depth+1))
return DAG.getNode(ISD::FSUB, Op.getDebugLoc(), Op.getValueType(),
GetNegatedExpression(Op.getOperand(0), DAG,
LegalOperations, Depth+1),
Op.getOperand(1));
// fold (fneg (fadd A, B)) -> (fsub (fneg B), A)
return DAG.getNode(ISD::FSUB, Op.getDebugLoc(), Op.getValueType(),
GetNegatedExpression(Op.getOperand(1), DAG,
LegalOperations, Depth+1),
Op.getOperand(0));
case ISD::FSUB:
// We can't turn -(A-B) into B-A when we honor signed zeros.
assert(DAG.getTarget().Options.UnsafeFPMath);
// fold (fneg (fsub 0, B)) -> B
if (ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(Op.getOperand(0)))
if (N0CFP->getValueAPF().isZero())
return Op.getOperand(1);
// fold (fneg (fsub A, B)) -> (fsub B, A)
return DAG.getNode(ISD::FSUB, Op.getDebugLoc(), Op.getValueType(),
Op.getOperand(1), Op.getOperand(0));
case ISD::FMUL:
case ISD::FDIV:
assert(!DAG.getTarget().Options.HonorSignDependentRoundingFPMath());
// fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y)
if (isNegatibleForFree(Op.getOperand(0), LegalOperations,
DAG.getTargetLoweringInfo(),
&DAG.getTarget().Options, Depth+1))
return DAG.getNode(Op.getOpcode(), Op.getDebugLoc(), Op.getValueType(),
GetNegatedExpression(Op.getOperand(0), DAG,
LegalOperations, Depth+1),
Op.getOperand(1));
// fold (fneg (fmul X, Y)) -> (fmul X, (fneg Y))
return DAG.getNode(Op.getOpcode(), Op.getDebugLoc(), Op.getValueType(),
Op.getOperand(0),
GetNegatedExpression(Op.getOperand(1), DAG,
LegalOperations, Depth+1));
case ISD::FP_EXTEND:
case ISD::FSIN:
return DAG.getNode(Op.getOpcode(), Op.getDebugLoc(), Op.getValueType(),
GetNegatedExpression(Op.getOperand(0), DAG,
LegalOperations, Depth+1));
case ISD::FP_ROUND:
return DAG.getNode(ISD::FP_ROUND, Op.getDebugLoc(), Op.getValueType(),
GetNegatedExpression(Op.getOperand(0), DAG,
LegalOperations, Depth+1),
Op.getOperand(1));
}
}
// isSetCCEquivalent - Return true if this node is a setcc, or is a select_cc
// that selects between the values 1 and 0, making it equivalent to a setcc.
// Also, set the incoming LHS, RHS, and CC references to the appropriate
// nodes based on the type of node we are checking. This simplifies life a
// bit for the callers.
static bool isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS,
SDValue &CC) {
if (N.getOpcode() == ISD::SETCC) {
LHS = N.getOperand(0);
RHS = N.getOperand(1);
CC = N.getOperand(2);
return true;
}
if (N.getOpcode() == ISD::SELECT_CC &&
N.getOperand(2).getOpcode() == ISD::Constant &&
N.getOperand(3).getOpcode() == ISD::Constant &&
cast<ConstantSDNode>(N.getOperand(2))->getAPIntValue() == 1 &&
cast<ConstantSDNode>(N.getOperand(3))->isNullValue()) {
LHS = N.getOperand(0);
RHS = N.getOperand(1);
CC = N.getOperand(4);
return true;
}
return false;
}
// isOneUseSetCC - Return true if this is a SetCC-equivalent operation with only
// one use. If this is true, it allows the users to invert the operation for
// free when it is profitable to do so.
static bool isOneUseSetCC(SDValue N) {
SDValue N0, N1, N2;
if (isSetCCEquivalent(N, N0, N1, N2) && N.getNode()->hasOneUse())
return true;
return false;
}
SDValue DAGCombiner::ReassociateOps(unsigned Opc, DebugLoc DL,
SDValue N0, SDValue N1) {
EVT VT = N0.getValueType();
if (N0.getOpcode() == Opc && isa<ConstantSDNode>(N0.getOperand(1))) {
if (isa<ConstantSDNode>(N1)) {
// reassoc. (op (op x, c1), c2) -> (op x, (op c1, c2))
SDValue OpNode =
DAG.FoldConstantArithmetic(Opc, VT,
cast<ConstantSDNode>(N0.getOperand(1)),
cast<ConstantSDNode>(N1));
return DAG.getNode(Opc, DL, VT, N0.getOperand(0), OpNode);
}
if (N0.hasOneUse()) {
// reassoc. (op (op x, c1), y) -> (op (op x, y), c1) iff x+c1 has one use
SDValue OpNode = DAG.getNode(Opc, N0.getDebugLoc(), VT,
N0.getOperand(0), N1);
AddToWorkList(OpNode.getNode());
return DAG.getNode(Opc, DL, VT, OpNode, N0.getOperand(1));
}
}
if (N1.getOpcode() == Opc && isa<ConstantSDNode>(N1.getOperand(1))) {
if (isa<ConstantSDNode>(N0)) {
// reassoc. (op c2, (op x, c1)) -> (op x, (op c1, c2))
SDValue OpNode =
DAG.FoldConstantArithmetic(Opc, VT,
cast<ConstantSDNode>(N1.getOperand(1)),
cast<ConstantSDNode>(N0));
return DAG.getNode(Opc, DL, VT, N1.getOperand(0), OpNode);
}
if (N1.hasOneUse()) {
// reassoc. (op y, (op x, c1)) -> (op (op x, y), c1) iff x+c1 has one use
SDValue OpNode = DAG.getNode(Opc, N0.getDebugLoc(), VT,
N1.getOperand(0), N0);
AddToWorkList(OpNode.getNode());
return DAG.getNode(Opc, DL, VT, OpNode, N1.getOperand(1));
}
}
return SDValue();
}
SDValue DAGCombiner::CombineTo(SDNode *N, const SDValue *To, unsigned NumTo,
bool AddTo) {
assert(N->getNumValues() == NumTo && "Broken CombineTo call!");
++NodesCombined;
DEBUG(dbgs() << "\nReplacing.1 ";
N->dump(&DAG);
dbgs() << "\nWith: ";
To[0].getNode()->dump(&DAG);
dbgs() << " and " << NumTo-1 << " other values\n";
for (unsigned i = 0, e = NumTo; i != e; ++i)
assert((!To[i].getNode() ||
N->getValueType(i) == To[i].getValueType()) &&
"Cannot combine value to value of different type!"));
WorkListRemover DeadNodes(*this);
DAG.ReplaceAllUsesWith(N, To, &DeadNodes);
if (AddTo) {
// Push the new nodes and any users onto the worklist
for (unsigned i = 0, e = NumTo; i != e; ++i) {
if (To[i].getNode()) {
AddToWorkList(To[i].getNode());
AddUsersToWorkList(To[i].getNode());
}
}
}
// Finally, if the node is now dead, remove it from the graph. The node
// may not be dead if the replacement process recursively simplified to
// something else needing this node.
if (N->use_empty()) {
// Nodes can be reintroduced into the worklist. Make sure we do not
// process a node that has been replaced.
removeFromWorkList(N);
// Finally, since the node is now dead, remove it from the graph.
DAG.DeleteNode(N);
}
return SDValue(N, 0);
}
void DAGCombiner::
CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) {
// Replace all uses. If any nodes become isomorphic to other nodes and
// are deleted, make sure to remove them from our worklist.
WorkListRemover DeadNodes(*this);
DAG.ReplaceAllUsesOfValueWith(TLO.Old, TLO.New, &DeadNodes);
// Push the new node and any (possibly new) users onto the worklist.
AddToWorkList(TLO.New.getNode());
AddUsersToWorkList(TLO.New.getNode());
// Finally, if the node is now dead, remove it from the graph. The node
// may not be dead if the replacement process recursively simplified to
// something else needing this node.
if (TLO.Old.getNode()->use_empty()) {
removeFromWorkList(TLO.Old.getNode());
// If the operands of this node are only used by the node, they will now
// be dead. Make sure to visit them first to delete dead nodes early.
for (unsigned i = 0, e = TLO.Old.getNode()->getNumOperands(); i != e; ++i)
if (TLO.Old.getNode()->getOperand(i).getNode()->hasOneUse())
AddToWorkList(TLO.Old.getNode()->getOperand(i).getNode());
DAG.DeleteNode(TLO.Old.getNode());
}
}
/// SimplifyDemandedBits - Check the specified integer node value to see if
/// it can be simplified or if things it uses can be simplified by bit
/// propagation. If so, return true.
bool DAGCombiner::SimplifyDemandedBits(SDValue Op, const APInt &Demanded) {
TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations);
APInt KnownZero, KnownOne;
if (!TLI.SimplifyDemandedBits(Op, Demanded, KnownZero, KnownOne, TLO))
return false;
// Revisit the node.
AddToWorkList(Op.getNode());
// Replace the old value with the new one.
++NodesCombined;
DEBUG(dbgs() << "\nReplacing.2 ";
TLO.Old.getNode()->dump(&DAG);
dbgs() << "\nWith: ";
TLO.New.getNode()->dump(&DAG);
dbgs() << '\n');
CommitTargetLoweringOpt(TLO);
return true;
}
void DAGCombiner::ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad) {
DebugLoc dl = Load->getDebugLoc();
EVT VT = Load->getValueType(0);
SDValue Trunc = DAG.getNode(ISD::TRUNCATE, dl, VT, SDValue(ExtLoad, 0));
DEBUG(dbgs() << "\nReplacing.9 ";
Load->dump(&DAG);
dbgs() << "\nWith: ";
Trunc.getNode()->dump(&DAG);
dbgs() << '\n');
WorkListRemover DeadNodes(*this);
DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 0), Trunc, &DeadNodes);
DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), SDValue(ExtLoad, 1),
&DeadNodes);
removeFromWorkList(Load);
DAG.DeleteNode(Load);
AddToWorkList(Trunc.getNode());
}
SDValue DAGCombiner::PromoteOperand(SDValue Op, EVT PVT, bool &Replace) {
Replace = false;
DebugLoc dl = Op.getDebugLoc();
if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Op)) {
EVT MemVT = LD->getMemoryVT();
ISD::LoadExtType ExtType = ISD::isNON_EXTLoad(LD)
? (TLI.isLoadExtLegal(ISD::ZEXTLOAD, MemVT) ? ISD::ZEXTLOAD
: ISD::EXTLOAD)
: LD->getExtensionType();
Replace = true;
return DAG.getExtLoad(ExtType, dl, PVT,
LD->getChain(), LD->getBasePtr(),
LD->getPointerInfo(),
MemVT, LD->isVolatile(),
LD->isNonTemporal(), LD->getAlignment());
}
unsigned Opc = Op.getOpcode();
switch (Opc) {
default: break;
case ISD::AssertSext:
return DAG.getNode(ISD::AssertSext, dl, PVT,
SExtPromoteOperand(Op.getOperand(0), PVT),
Op.getOperand(1));
case ISD::AssertZext:
return DAG.getNode(ISD::AssertZext, dl, PVT,
ZExtPromoteOperand(Op.getOperand(0), PVT),
Op.getOperand(1));
case ISD::Constant: {
unsigned ExtOpc =
Op.getValueType().isByteSized() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
return DAG.getNode(ExtOpc, dl, PVT, Op);
}
}
if (!TLI.isOperationLegal(ISD::ANY_EXTEND, PVT))
return SDValue();
return DAG.getNode(ISD::ANY_EXTEND, dl, PVT, Op);
}
SDValue DAGCombiner::SExtPromoteOperand(SDValue Op, EVT PVT) {
if (!TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG, PVT))
return SDValue();
EVT OldVT = Op.getValueType();
DebugLoc dl = Op.getDebugLoc();
bool Replace = false;
SDValue NewOp = PromoteOperand(Op, PVT, Replace);
if (NewOp.getNode() == 0)
return SDValue();
AddToWorkList(NewOp.getNode());
if (Replace)
ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode());
return DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, NewOp.getValueType(), NewOp,
DAG.getValueType(OldVT));
}
SDValue DAGCombiner::ZExtPromoteOperand(SDValue Op, EVT PVT) {
EVT OldVT = Op.getValueType();
DebugLoc dl = Op.getDebugLoc();
bool Replace = false;
SDValue NewOp = PromoteOperand(Op, PVT, Replace);
if (NewOp.getNode() == 0)
return SDValue();
AddToWorkList(NewOp.getNode());
if (Replace)
ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode());
return DAG.getZeroExtendInReg(NewOp, dl, OldVT);
}
/// PromoteIntBinOp - Promote the specified integer binary operation if the
/// target indicates it is beneficial. e.g. On x86, it's usually better to
/// promote i16 operations to i32 since i16 instructions are longer.
SDValue DAGCombiner::PromoteIntBinOp(SDValue Op) {
if (!LegalOperations)
return SDValue();
EVT VT = Op.getValueType();
if (VT.isVector() || !VT.isInteger())
return SDValue();
// If operation type is 'undesirable', e.g. i16 on x86, consider
// promoting it.
unsigned Opc = Op.getOpcode();
if (TLI.isTypeDesirableForOp(Opc, VT))
return SDValue();
EVT PVT = VT;
// Consult target whether it is a good idea to promote this operation and
// what's the right type to promote it to.
if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
assert(PVT != VT && "Don't know what type to promote to!");
bool Replace0 = false;
SDValue N0 = Op.getOperand(0);
SDValue NN0 = PromoteOperand(N0, PVT, Replace0);
if (NN0.getNode() == 0)
return SDValue();
bool Replace1 = false;
SDValue N1 = Op.getOperand(1);
SDValue NN1;
if (N0 == N1)
NN1 = NN0;
else {
NN1 = PromoteOperand(N1, PVT, Replace1);
if (NN1.getNode() == 0)
return SDValue();
}
AddToWorkList(NN0.getNode());
if (NN1.getNode())
AddToWorkList(NN1.getNode());
if (Replace0)
ReplaceLoadWithPromotedLoad(N0.getNode(), NN0.getNode());
if (Replace1)
ReplaceLoadWithPromotedLoad(N1.getNode(), NN1.getNode());
DEBUG(dbgs() << "\nPromoting ";
Op.getNode()->dump(&DAG));
DebugLoc dl = Op.getDebugLoc();
return DAG.getNode(ISD::TRUNCATE, dl, VT,
DAG.getNode(Opc, dl, PVT, NN0, NN1));
}
return SDValue();
}
/// PromoteIntShiftOp - Promote the specified integer shift operation if the
/// target indicates it is beneficial. e.g. On x86, it's usually better to
/// promote i16 operations to i32 since i16 instructions are longer.
SDValue DAGCombiner::PromoteIntShiftOp(SDValue Op) {
if (!LegalOperations)
return SDValue();
EVT VT = Op.getValueType();
if (VT.isVector() || !VT.isInteger())
return SDValue();
// If operation type is 'undesirable', e.g. i16 on x86, consider
// promoting it.
unsigned Opc = Op.getOpcode();
if (TLI.isTypeDesirableForOp(Opc, VT))
return SDValue();
EVT PVT = VT;
// Consult target whether it is a good idea to promote this operation and
// what's the right type to promote it to.
if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
assert(PVT != VT && "Don't know what type to promote to!");
bool Replace = false;
SDValue N0 = Op.getOperand(0);
if (Opc == ISD::SRA)
N0 = SExtPromoteOperand(Op.getOperand(0), PVT);
else if (Opc == ISD::SRL)
N0 = ZExtPromoteOperand(Op.getOperand(0), PVT);
else
N0 = PromoteOperand(N0, PVT, Replace);
if (N0.getNode() == 0)
return SDValue();
AddToWorkList(N0.getNode());
if (Replace)
ReplaceLoadWithPromotedLoad(Op.getOperand(0).getNode(), N0.getNode());
DEBUG(dbgs() << "\nPromoting ";
Op.getNode()->dump(&DAG));
DebugLoc dl = Op.getDebugLoc();
return DAG.getNode(ISD::TRUNCATE, dl, VT,
DAG.getNode(Opc, dl, PVT, N0, Op.getOperand(1)));
}
return SDValue();
}
SDValue DAGCombiner::PromoteExtend(SDValue Op) {
if (!LegalOperations)
return SDValue();
EVT VT = Op.getValueType();
if (VT.isVector() || !VT.isInteger())
return SDValue();
// If operation type is 'undesirable', e.g. i16 on x86, consider
// promoting it.
unsigned Opc = Op.getOpcode();
if (TLI.isTypeDesirableForOp(Opc, VT))
return SDValue();
EVT PVT = VT;
// Consult target whether it is a good idea to promote this operation and
// what's the right type to promote it to.
if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
assert(PVT != VT && "Don't know what type to promote to!");
// fold (aext (aext x)) -> (aext x)
// fold (aext (zext x)) -> (zext x)
// fold (aext (sext x)) -> (sext x)
DEBUG(dbgs() << "\nPromoting ";
Op.getNode()->dump(&DAG));
return DAG.getNode(Op.getOpcode(), Op.getDebugLoc(), VT, Op.getOperand(0));
}
return SDValue();
}
bool DAGCombiner::PromoteLoad(SDValue Op) {
if (!LegalOperations)
return false;
EVT VT = Op.getValueType();
if (VT.isVector() || !VT.isInteger())
return false;
// If operation type is 'undesirable', e.g. i16 on x86, consider
// promoting it.
unsigned Opc = Op.getOpcode();
if (TLI.isTypeDesirableForOp(Opc, VT))
return false;
EVT PVT = VT;
// Consult target whether it is a good idea to promote this operation and
// what's the right type to promote it to.
if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
assert(PVT != VT && "Don't know what type to promote to!");
DebugLoc dl = Op.getDebugLoc();
SDNode *N = Op.getNode();
LoadSDNode *LD = cast<LoadSDNode>(N);
EVT MemVT = LD->getMemoryVT();
ISD::LoadExtType ExtType = ISD::isNON_EXTLoad(LD)
? (TLI.isLoadExtLegal(ISD::ZEXTLOAD, MemVT) ? ISD::ZEXTLOAD
: ISD::EXTLOAD)
: LD->getExtensionType();
SDValue NewLD = DAG.getExtLoad(ExtType, dl, PVT,
LD->getChain(), LD->getBasePtr(),
LD->getPointerInfo(),
MemVT, LD->isVolatile(),
LD->isNonTemporal(), LD->getAlignment());
SDValue Result = DAG.getNode(ISD::TRUNCATE, dl, VT, NewLD);
DEBUG(dbgs() << "\nPromoting ";
N->dump(&DAG);
dbgs() << "\nTo: ";
Result.getNode()->dump(&DAG);
dbgs() << '\n');
WorkListRemover DeadNodes(*this);
DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result, &DeadNodes);
DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), NewLD.getValue(1), &DeadNodes);
removeFromWorkList(N);
DAG.DeleteNode(N);
AddToWorkList(Result.getNode());
return true;
}
return false;
}
//===----------------------------------------------------------------------===//
// Main DAG Combiner implementation
//===----------------------------------------------------------------------===//
void DAGCombiner::Run(CombineLevel AtLevel) {
// set the instance variables, so that the various visit routines may use it.
Level = AtLevel;
LegalOperations = Level >= AfterLegalizeVectorOps;
LegalTypes = Level >= AfterLegalizeTypes;
// Add all the dag nodes to the worklist.
for (SelectionDAG::allnodes_iterator I = DAG.allnodes_begin(),
E = DAG.allnodes_end(); I != E; ++I)
AddToWorkList(I);
// Create a dummy node (which is not added to allnodes), that adds a reference
// to the root node, preventing it from being deleted, and tracking any
// changes of the root.
HandleSDNode Dummy(DAG.getRoot());
// The root of the dag may dangle to deleted nodes until the dag combiner is
// done. Set it to null to avoid confusion.
DAG.setRoot(SDValue());
// while the worklist isn't empty, find a node and
// try and combine it.
while (!WorkListContents.empty()) {
SDNode *N;
// The WorkListOrder holds the SDNodes in order, but it may contain duplicates.
// In order to avoid a linear scan, we use a set (O(log N)) to hold what the
// worklist *should* contain, and check the node we want to visit is should
// actually be visited.
do {
N = WorkListOrder.pop_back_val();
} while (!WorkListContents.erase(N));
// If N has no uses, it is dead. Make sure to revisit all N's operands once
// N is deleted from the DAG, since they too may now be dead or may have a
// reduced number of uses, allowing other xforms.
if (N->use_empty() && N != &Dummy) {
for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
AddToWorkList(N->getOperand(i).getNode());
DAG.DeleteNode(N);
continue;
}
SDValue RV = combine(N);
if (RV.getNode() == 0)
continue;
++NodesCombined;
// If we get back the same node we passed in, rather than a new node or
// zero, we know that the node must have defined multiple values and
// CombineTo was used. Since CombineTo takes care of the worklist
// mechanics for us, we have no work to do in this case.
if (RV.getNode() == N)
continue;
assert(N->getOpcode() != ISD::DELETED_NODE &&
RV.getNode()->getOpcode() != ISD::DELETED_NODE &&
"Node was deleted but visit returned new node!");
DEBUG(dbgs() << "\nReplacing.3 ";
N->dump(&DAG);
dbgs() << "\nWith: ";
RV.getNode()->dump(&DAG);
dbgs() << '\n');
// Transfer debug value.
DAG.TransferDbgValues(SDValue(N, 0), RV);
WorkListRemover DeadNodes(*this);
if (N->getNumValues() == RV.getNode()->getNumValues())
DAG.ReplaceAllUsesWith(N, RV.getNode(), &DeadNodes);
else {
assert(N->getValueType(0) == RV.getValueType() &&
N->getNumValues() == 1 && "Type mismatch");
SDValue OpV = RV;
DAG.ReplaceAllUsesWith(N, &OpV, &DeadNodes);
}
// Push the new node and any users onto the worklist
AddToWorkList(RV.getNode());
AddUsersToWorkList(RV.getNode());
// Add any uses of the old node to the worklist in case this node is the
// last one that uses them. They may become dead after this node is
// deleted.
for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
AddToWorkList(N->getOperand(i).getNode());
// Finally, if the node is now dead, remove it from the graph. The node
// may not be dead if the replacement process recursively simplified to
// something else needing this node.
if (N->use_empty()) {
// Nodes can be reintroduced into the worklist. Make sure we do not
// process a node that has been replaced.
removeFromWorkList(N);
// Finally, since the node is now dead, remove it from the graph.
DAG.DeleteNode(N);
}
}
// If the root changed (e.g. it was a dead load, update the root).
DAG.setRoot(Dummy.getValue());
DAG.RemoveDeadNodes();
}
SDValue DAGCombiner::visit(SDNode *N) {
switch (N->getOpcode()) {
default: break;
case ISD::TokenFactor: return visitTokenFactor(N);
case ISD::MERGE_VALUES: return visitMERGE_VALUES(N);
case ISD::ADD: return visitADD(N);
case ISD::SUB: return visitSUB(N);
case ISD::ADDC: return visitADDC(N);
case ISD::SUBC: return visitSUBC(N);
case ISD::ADDE: return visitADDE(N);
case ISD::SUBE: return visitSUBE(N);
case ISD::MUL: return visitMUL(N);
case ISD::SDIV: return visitSDIV(N);
case ISD::UDIV: return visitUDIV(N);
case ISD::SREM: return visitSREM(N);
case ISD::UREM: return visitUREM(N);
case ISD::MULHU: return visitMULHU(N);
case ISD::MULHS: return visitMULHS(N);
case ISD::SMUL_LOHI: return visitSMUL_LOHI(N);
case ISD::UMUL_LOHI: return visitUMUL_LOHI(N);
case ISD::SMULO: return visitSMULO(N);
case ISD::UMULO: return visitUMULO(N);
case ISD::SDIVREM: return visitSDIVREM(N);
case ISD::UDIVREM: return visitUDIVREM(N);
case ISD::AND: return visitAND(N);
case ISD::OR: return visitOR(N);
case ISD::XOR: return visitXOR(N);
case ISD::SHL: return visitSHL(N);
case ISD::SRA: return visitSRA(N);
case ISD::SRL: return visitSRL(N);
case ISD::CTLZ: return visitCTLZ(N);
case ISD::CTLZ_ZERO_UNDEF: return visitCTLZ_ZERO_UNDEF(N);
case ISD::CTTZ: return visitCTTZ(N);
case ISD::CTTZ_ZERO_UNDEF: return visitCTTZ_ZERO_UNDEF(N);
case ISD::CTPOP: return visitCTPOP(N);
case ISD::SELECT: return visitSELECT(N);
case ISD::SELECT_CC: return visitSELECT_CC(N);
case ISD::SETCC: return visitSETCC(N);
case ISD::SIGN_EXTEND: return visitSIGN_EXTEND(N);
case ISD::ZERO_EXTEND: return visitZERO_EXTEND(N);
case ISD::ANY_EXTEND: return visitANY_EXTEND(N);
case ISD::SIGN_EXTEND_INREG: return visitSIGN_EXTEND_INREG(N);
case ISD::TRUNCATE: return visitTRUNCATE(N);
case ISD::BITCAST: return visitBITCAST(N);
case ISD::BUILD_PAIR: return visitBUILD_PAIR(N);
case ISD::FADD: return visitFADD(N);
case ISD::FSUB: return visitFSUB(N);
case ISD::FMUL: return visitFMUL(N);
case ISD::FDIV: return visitFDIV(N);
case ISD::FREM: return visitFREM(N);
case ISD::FCOPYSIGN: return visitFCOPYSIGN(N);
case ISD::SINT_TO_FP: return visitSINT_TO_FP(N);
case ISD::UINT_TO_FP: return visitUINT_TO_FP(N);
case ISD::FP_TO_SINT: return visitFP_TO_SINT(N);
case ISD::FP_TO_UINT: return visitFP_TO_UINT(N);
case ISD::FP_ROUND: return visitFP_ROUND(N);
case ISD::FP_ROUND_INREG: return visitFP_ROUND_INREG(N);
case ISD::FP_EXTEND: return visitFP_EXTEND(N);
case ISD::FNEG: return visitFNEG(N);
case ISD::FABS: return visitFABS(N);
case ISD::BRCOND: return visitBRCOND(N);
case ISD::BR_CC: return visitBR_CC(N);
case ISD::LOAD: return visitLOAD(N);
case ISD::STORE: return visitSTORE(N);
case ISD::INSERT_VECTOR_ELT: return visitINSERT_VECTOR_ELT(N);
case ISD::EXTRACT_VECTOR_ELT: return visitEXTRACT_VECTOR_ELT(N);
case ISD::BUILD_VECTOR: return visitBUILD_VECTOR(N);
case ISD::CONCAT_VECTORS: return visitCONCAT_VECTORS(N);
case ISD::EXTRACT_SUBVECTOR: return visitEXTRACT_SUBVECTOR(N);
case ISD::VECTOR_SHUFFLE: return visitVECTOR_SHUFFLE(N);
case ISD::MEMBARRIER: return visitMEMBARRIER(N);
}
return SDValue();
}
SDValue DAGCombiner::combine(SDNode *N) {
SDValue RV = visit(N);
// If nothing happened, try a target-specific DAG combine.
if (RV.getNode() == 0) {
assert(N->getOpcode() != ISD::DELETED_NODE &&
"Node was deleted but visit returned NULL!");
if (N->getOpcode() >= ISD::BUILTIN_OP_END ||
TLI.hasTargetDAGCombine((ISD::NodeType)N->getOpcode())) {
// Expose the DAG combiner to the target combiner impls.
TargetLowering::DAGCombinerInfo
DagCombineInfo(DAG, !LegalTypes, !LegalOperations, false, this);
RV = TLI.PerformDAGCombine(N, DagCombineInfo);
}
}
// If nothing happened still, try promoting the operation.
if (RV.getNode() == 0) {
switch (N->getOpcode()) {
default: break;
case ISD::ADD:
case ISD::SUB:
case ISD::MUL:
case ISD::AND:
case ISD::OR:
case ISD::XOR:
RV = PromoteIntBinOp(SDValue(N, 0));
break;
case ISD::SHL:
case ISD::SRA:
case ISD::SRL:
RV = PromoteIntShiftOp(SDValue(N, 0));
break;
case ISD::SIGN_EXTEND:
case ISD::ZERO_EXTEND:
case ISD::ANY_EXTEND:
RV = PromoteExtend(SDValue(N, 0));
break;
case ISD::LOAD:
if (PromoteLoad(SDValue(N, 0)))
RV = SDValue(N, 0);
break;
}
}
// If N is a commutative binary node, try commuting it to enable more
// sdisel CSE.
if (RV.getNode() == 0 &&
SelectionDAG::isCommutativeBinOp(N->getOpcode()) &&
N->getNumValues() == 1) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
// Constant operands are canonicalized to RHS.
if (isa<ConstantSDNode>(N0) || !isa<ConstantSDNode>(N1)) {
SDValue Ops[] = { N1, N0 };
SDNode *CSENode = DAG.getNodeIfExists(N->getOpcode(), N->getVTList(),
Ops, 2);
if (CSENode)
return SDValue(CSENode, 0);
}
}
return RV;
}
/// getInputChainForNode - Given a node, return its input chain if it has one,
/// otherwise return a null sd operand.
static SDValue getInputChainForNode(SDNode *N) {
if (unsigned NumOps = N->getNumOperands()) {
if (N->getOperand(0).getValueType() == MVT::Other)
return N->getOperand(0);
else if (N->getOperand(NumOps-1).getValueType() == MVT::Other)
return N->getOperand(NumOps-1);
for (unsigned i = 1; i < NumOps-1; ++i)
if (N->getOperand(i).getValueType() == MVT::Other)
return N->getOperand(i);
}
return SDValue();
}
SDValue DAGCombiner::visitTokenFactor(SDNode *N) {
// If N has two operands, where one has an input chain equal to the other,
// the 'other' chain is redundant.
if (N->getNumOperands() == 2) {
if (getInputChainForNode(N->getOperand(0).getNode()) == N->getOperand(1))
return N->getOperand(0);
if (getInputChainForNode(N->getOperand(1).getNode()) == N->getOperand(0))
return N->getOperand(1);
}
SmallVector<SDNode *, 8> TFs; // List of token factors to visit.
SmallVector<SDValue, 8> Ops; // Ops for replacing token factor.
SmallPtrSet<SDNode*, 16> SeenOps;
bool Changed = false; // If we should replace this token factor.
// Start out with this token factor.
TFs.push_back(N);
// Iterate through token factors. The TFs grows when new token factors are
// encountered.
for (unsigned i = 0; i < TFs.size(); ++i) {
SDNode *TF = TFs[i];
// Check each of the operands.
for (unsigned i = 0, ie = TF->getNumOperands(); i != ie; ++i) {
SDValue Op = TF->getOperand(i);
switch (Op.getOpcode()) {
case ISD::EntryToken:
// Entry tokens don't need to be added to the list. They are
// rededundant.
Changed = true;
break;
case ISD::TokenFactor:
if (Op.hasOneUse() &&
std::find(TFs.begin(), TFs.end(), Op.getNode()) == TFs.end()) {
// Queue up for processing.
TFs.push_back(Op.getNode());
// Clean up in case the token factor is removed.
AddToWorkList(Op.getNode());
Changed = true;
break;
}
// Fall thru
default:
// Only add if it isn't already in the list.
if (SeenOps.insert(Op.getNode()))
Ops.push_back(Op);
else
Changed = true;
break;
}
}
}
SDValue Result;
// If we've change things around then replace token factor.
if (Changed) {
if (Ops.empty()) {
// The entry token is the only possible outcome.
Result = DAG.getEntryNode();
} else {
// New and improved token factor.
Result = DAG.getNode(ISD::TokenFactor, N->getDebugLoc(),
MVT::Other, &Ops[0], Ops.size());
}
// Don't add users to work list.
return CombineTo(N, Result, false);
}
return Result;
}
/// MERGE_VALUES can always be eliminated.
SDValue DAGCombiner::visitMERGE_VALUES(SDNode *N) {
WorkListRemover DeadNodes(*this);
// Replacing results may cause a different MERGE_VALUES to suddenly
// be CSE'd with N, and carry its uses with it. Iterate until no
// uses remain, to ensure that the node can be safely deleted.
do {
for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
DAG.ReplaceAllUsesOfValueWith(SDValue(N, i), N->getOperand(i),
&DeadNodes);
} while (!N->use_empty());
removeFromWorkList(N);
DAG.DeleteNode(N);
return SDValue(N, 0); // Return N so it doesn't get rechecked!
}
static
SDValue combineShlAddConstant(DebugLoc DL, SDValue N0, SDValue N1,
SelectionDAG &DAG) {
EVT VT = N0.getValueType();
SDValue N00 = N0.getOperand(0);
SDValue N01 = N0.getOperand(1);
ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N01);
if (N01C && N00.getOpcode() == ISD::ADD && N00.getNode()->hasOneUse() &&
isa<ConstantSDNode>(N00.getOperand(1))) {
// fold (add (shl (add x, c1), c2), ) -> (add (add (shl x, c2), c1<<c2), )
N0 = DAG.getNode(ISD::ADD, N0.getDebugLoc(), VT,
DAG.getNode(ISD::SHL, N00.getDebugLoc(), VT,
N00.getOperand(0), N01),
DAG.getNode(ISD::SHL, N01.getDebugLoc(), VT,
N00.getOperand(1), N01));
return DAG.getNode(ISD::ADD, DL, VT, N0, N1);
}
return SDValue();
}
SDValue DAGCombiner::visitADD(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
EVT VT = N0.getValueType();
// fold vector ops
if (VT.isVector()) {
SDValue FoldedVOp = SimplifyVBinOp(N);
if (FoldedVOp.getNode()) return FoldedVOp;
}
// fold (add x, undef) -> undef
if (N0.getOpcode() == ISD::UNDEF)
return N0;
if (N1.getOpcode() == ISD::UNDEF)
return N1;
// fold (add c1, c2) -> c1+c2
if (N0C && N1C)
return DAG.FoldConstantArithmetic(ISD::ADD, VT, N0C, N1C);
// canonicalize constant to RHS
if (N0C && !N1C)
return DAG.getNode(ISD::ADD, N->getDebugLoc(), VT, N1, N0);
// fold (add x, 0) -> x
if (N1C && N1C->isNullValue())
return N0;
// fold (add Sym, c) -> Sym+c
if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(N0))
if (!LegalOperations && TLI.isOffsetFoldingLegal(GA) && N1C &&
GA->getOpcode() == ISD::GlobalAddress)
return DAG.getGlobalAddress(GA->getGlobal(), N1C->getDebugLoc(), VT,
GA->getOffset() +
(uint64_t)N1C->getSExtValue());
// fold ((c1-A)+c2) -> (c1+c2)-A
if (N1C && N0.getOpcode() == ISD::SUB)
if (ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0.getOperand(0)))
return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT,
DAG.getConstant(N1C->getAPIntValue()+
N0C->getAPIntValue(), VT),
N0.getOperand(1));
// reassociate add
SDValue RADD = ReassociateOps(ISD::ADD, N->getDebugLoc(), N0, N1);
if (RADD.getNode() != 0)
return RADD;
// fold ((0-A) + B) -> B-A
if (N0.getOpcode() == ISD::SUB && isa<ConstantSDNode>(N0.getOperand(0)) &&
cast<ConstantSDNode>(N0.getOperand(0))->isNullValue())
return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, N1, N0.getOperand(1));
// fold (A + (0-B)) -> A-B
if (N1.getOpcode() == ISD::SUB && isa<ConstantSDNode>(N1.getOperand(0)) &&
cast<ConstantSDNode>(N1.getOperand(0))->isNullValue())
return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, N0, N1.getOperand(1));
// fold (A+(B-A)) -> B
if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(1))
return N1.getOperand(0);
// fold ((B-A)+A) -> B
if (N0.getOpcode() == ISD::SUB && N1 == N0.getOperand(1))
return N0.getOperand(0);
// fold (A+(B-(A+C))) to (B-C)
if (N1.getOpcode() == ISD::SUB && N1.getOperand(1).getOpcode() == ISD::ADD &&
N0 == N1.getOperand(1).getOperand(0))
return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, N1.getOperand(0),
N1.getOperand(1).getOperand(1));
// fold (A+(B-(C+A))) to (B-C)
if (N1.getOpcode() == ISD::SUB && N1.getOperand(1).getOpcode() == ISD::ADD &&
N0 == N1.getOperand(1).getOperand(1))
return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, N1.getOperand(0),
N1.getOperand(1).getOperand(0));
// fold (A+((B-A)+or-C)) to (B+or-C)
if ((N1.getOpcode() == ISD::SUB || N1.getOpcode() == ISD::ADD) &&
N1.getOperand(0).getOpcode() == ISD::SUB &&
N0 == N1.getOperand(0).getOperand(1))
return DAG.getNode(N1.getOpcode(), N->getDebugLoc(), VT,
N1.getOperand(0).getOperand(0), N1.getOperand(1));
// fold (A-B)+(C-D) to (A+C)-(B+D) when A or C is constant
if (N0.getOpcode() == ISD::SUB && N1.getOpcode() == ISD::SUB) {
SDValue N00 = N0.getOperand(0);
SDValue N01 = N0.getOperand(1);
SDValue N10 = N1.getOperand(0);
SDValue N11 = N1.getOperand(1);
if (isa<ConstantSDNode>(N00) || isa<ConstantSDNode>(N10))
return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT,
DAG.getNode(ISD::ADD, N0.getDebugLoc(), VT, N00, N10),
DAG.getNode(ISD::ADD, N1.getDebugLoc(), VT, N01, N11));
}
if (!VT.isVector() && SimplifyDemandedBits(SDValue(N, 0)))
return SDValue(N, 0);
// fold (a+b) -> (a|b) iff a and b share no bits.
if (VT.isInteger() && !VT.isVector()) {
APInt LHSZero, LHSOne;
APInt RHSZero, RHSOne;
DAG.ComputeMaskedBits(N0, LHSZero, LHSOne);
if (LHSZero.getBoolValue()) {
DAG.ComputeMaskedBits(N1, RHSZero, RHSOne);
// If all possibly-set bits on the LHS are clear on the RHS, return an OR.
// If all possibly-set bits on the RHS are clear on the LHS, return an OR.
if ((RHSZero & ~LHSZero) == ~LHSZero || (LHSZero & ~RHSZero) == ~RHSZero)
return DAG.getNode(ISD::OR, N->getDebugLoc(), VT, N0, N1);
}
}
// fold (add (shl (add x, c1), c2), ) -> (add (add (shl x, c2), c1<<c2), )
if (N0.getOpcode() == ISD::SHL && N0.getNode()->hasOneUse()) {
SDValue Result = combineShlAddConstant(N->getDebugLoc(), N0, N1, DAG);
if (Result.getNode()) return Result;
}
if (N1.getOpcode() == ISD::SHL && N1.getNode()->hasOneUse()) {
SDValue Result = combineShlAddConstant(N->getDebugLoc(), N1, N0, DAG);
if (Result.getNode()) return Result;
}
// fold (add x, shl(0 - y, n)) -> sub(x, shl(y, n))
if (N1.getOpcode() == ISD::SHL &&
N1.getOperand(0).getOpcode() == ISD::SUB)
if (ConstantSDNode *C =
dyn_cast<ConstantSDNode>(N1.getOperand(0).getOperand(0)))
if (C->getAPIntValue() == 0)
return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, N0,
DAG.getNode(ISD::SHL, N->getDebugLoc(), VT,
N1.getOperand(0).getOperand(1),
N1.getOperand(1)));
if (N0.getOpcode() == ISD::SHL &&
N0.getOperand(0).getOpcode() == ISD::SUB)
if (ConstantSDNode *C =
dyn_cast<ConstantSDNode>(N0.getOperand(0).getOperand(0)))
if (C->getAPIntValue() == 0)
return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, N1,
DAG.getNode(ISD::SHL, N->getDebugLoc(), VT,
N0.getOperand(0).getOperand(1),
N0.getOperand(1)));
if (N1.getOpcode() == ISD::AND) {
SDValue AndOp0 = N1.getOperand(0);
ConstantSDNode *AndOp1 = dyn_cast<ConstantSDNode>(N1->getOperand(1));
unsigned NumSignBits = DAG.ComputeNumSignBits(AndOp0);
unsigned DestBits = VT.getScalarType().getSizeInBits();
// (add z, (and (sbbl x, x), 1)) -> (sub z, (sbbl x, x))
// and similar xforms where the inner op is either ~0 or 0.
if (NumSignBits == DestBits && AndOp1 && AndOp1->isOne()) {
DebugLoc DL = N->getDebugLoc();
return DAG.getNode(ISD::SUB, DL, VT, N->getOperand(0), AndOp0);
}
}
// add (sext i1), X -> sub X, (zext i1)
if (N0.getOpcode() == ISD::SIGN_EXTEND &&
N0.getOperand(0).getValueType() == MVT::i1 &&
!TLI.isOperationLegal(ISD::SIGN_EXTEND, MVT::i1)) {
DebugLoc DL = N->getDebugLoc();
SDValue ZExt = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0));
return DAG.getNode(ISD::SUB, DL, VT, N1, ZExt);
}
return SDValue();
}
SDValue DAGCombiner::visitADDC(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
EVT VT = N0.getValueType();
// If the flag result is dead, turn this into an ADD.
if (!N->hasAnyUseOfValue(1))
return CombineTo(N, DAG.getNode(ISD::ADD, N->getDebugLoc(), VT, N0, N1),
DAG.getNode(ISD::CARRY_FALSE,
N->getDebugLoc(), MVT::Glue));
// canonicalize constant to RHS.
if (N0C && !N1C)
return DAG.getNode(ISD::ADDC, N->getDebugLoc(), N->getVTList(), N1, N0);
// fold (addc x, 0) -> x + no carry out
if (N1C && N1C->isNullValue())
return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE,
N->getDebugLoc(), MVT::Glue));
// fold (addc a, b) -> (or a, b), CARRY_FALSE iff a and b share no bits.
APInt LHSZero, LHSOne;
APInt RHSZero, RHSOne;
DAG.ComputeMaskedBits(N0, LHSZero, LHSOne);
if (LHSZero.getBoolValue()) {
DAG.ComputeMaskedBits(N1, RHSZero, RHSOne);
// If all possibly-set bits on the LHS are clear on the RHS, return an OR.
// If all possibly-set bits on the RHS are clear on the LHS, return an OR.
if ((RHSZero & ~LHSZero) == ~LHSZero || (LHSZero & ~RHSZero) == ~RHSZero)
return CombineTo(N, DAG.getNode(ISD::OR, N->getDebugLoc(), VT, N0, N1),
DAG.getNode(ISD::CARRY_FALSE,
N->getDebugLoc(), MVT::Glue));
}
return SDValue();
}
SDValue DAGCombiner::visitADDE(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
SDValue CarryIn = N->getOperand(2);
ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
// canonicalize constant to RHS
if (N0C && !N1C)
return DAG.getNode(ISD::ADDE, N->getDebugLoc(), N->getVTList(),
N1, N0, CarryIn);
// fold (adde x, y, false) -> (addc x, y)
if (CarryIn.getOpcode() == ISD::CARRY_FALSE)
return DAG.getNode(ISD::ADDC, N->getDebugLoc(), N->getVTList(), N0, N1);
return SDValue();
}
// Since it may not be valid to emit a fold to zero for vector initializers
// check if we can before folding.
static SDValue tryFoldToZero(DebugLoc DL, const TargetLowering &TLI, EVT VT,
SelectionDAG &DAG, bool LegalOperations) {
if (!VT.isVector()) {
return DAG.getConstant(0, VT);
}
if (!LegalOperations || TLI.isOperationLegal(ISD::BUILD_VECTOR, VT)) {
// Produce a vector of zeros.
SDValue El = DAG.getConstant(0, VT.getVectorElementType());
std::vector<SDValue> Ops(VT.getVectorNumElements(), El);
return DAG.getNode(ISD::BUILD_VECTOR, DL, VT,
&Ops[0], Ops.size());
}
return SDValue();
}
SDValue DAGCombiner::visitSUB(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0.getNode());
ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.getNode());
ConstantSDNode *N1C1 = N1.getOpcode() != ISD::ADD ? 0 :
dyn_cast<ConstantSDNode>(N1.getOperand(1).getNode());
EVT VT = N0.getValueType();
// fold vector ops
if (VT.isVector()) {
SDValue FoldedVOp = SimplifyVBinOp(N);
if (FoldedVOp.getNode()) return FoldedVOp;
}
// fold (sub x, x) -> 0
// FIXME: Refactor this and xor and other similar operations together.
if (N0 == N1)
return tryFoldToZero(N->getDebugLoc(), TLI, VT, DAG, LegalOperations);
// fold (sub c1, c2) -> c1-c2
if (N0C && N1C)
return DAG.FoldConstantArithmetic(ISD::SUB, VT, N0C, N1C);
// fold (sub x, c) -> (add x, -c)
if (N1C)
return DAG.getNode(ISD::ADD, N->getDebugLoc(), VT, N0,
DAG.getConstant(-N1C->getAPIntValue(), VT));
// Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1)
if (N0C && N0C->isAllOnesValue())
return DAG.getNode(ISD::XOR, N->getDebugLoc(), VT, N1, N0);
// fold A-(A-B) -> B
if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(0))
return N1.getOperand(1);
// fold (A+B)-A -> B
if (N0.getOpcode() == ISD::ADD && N0.getOperand(0) == N1)
return N0.getOperand(1);
// fold (A+B)-B -> A
if (N0.getOpcode() == ISD::ADD && N0.getOperand(1) == N1)
return N0.getOperand(0);
// fold C2-(A+C1) -> (C2-C1)-A
if (N1.getOpcode() == ISD::ADD && N0C && N1C1) {
SDValue NewC = DAG.getConstant((N0C->getAPIntValue() - N1C1->getAPIntValue()), VT);
return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, NewC,
N1.getOperand(0));
}
// fold ((A+(B+or-C))-B) -> A+or-C
if (N0.getOpcode() == ISD::ADD &&
(N0.getOperand(1).getOpcode() == ISD::SUB ||
N0.getOperand(1).getOpcode() == ISD::ADD) &&
N0.getOperand(1).getOperand(0) == N1)
return DAG.getNode(N0.getOperand(1).getOpcode(), N->getDebugLoc(), VT,
N0.getOperand(0), N0.getOperand(1).getOperand(1));
// fold ((A+(C+B))-B) -> A+C
if (N0.getOpcode() == ISD::ADD &&
N0.getOperand(1).getOpcode() == ISD::ADD &&
N0.getOperand(1).getOperand(1) == N1)
return DAG.getNode(ISD::ADD, N->getDebugLoc(), VT,
N0.getOperand(0), N0.getOperand(1).getOperand(0));
// fold ((A-(B-C))-C) -> A-B
if (N0.getOpcode() == ISD::SUB &&
N0.getOperand(1).getOpcode() == ISD::SUB &&
N0.getOperand(1).getOperand(1) == N1)
return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT,
N0.getOperand(0), N0.getOperand(1).getOperand(0));
// If either operand of a sub is undef, the result is undef
if (N0.getOpcode() == ISD::UNDEF)
return N0;
if (N1.getOpcode() == ISD::UNDEF)
return N1;
// If the relocation model supports it, consider symbol offsets.
if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(N0))
if (!LegalOperations && TLI.isOffsetFoldingLegal(GA)) {
// fold (sub Sym, c) -> Sym-c
if (N1C && GA->getOpcode() == ISD::GlobalAddress)
return DAG.getGlobalAddress(GA->getGlobal(), N1C->getDebugLoc(), VT,
GA->getOffset() -
(uint64_t)N1C->getSExtValue());
// fold (sub Sym+c1, Sym+c2) -> c1-c2
if (GlobalAddressSDNode *GB = dyn_cast<GlobalAddressSDNode>(N1))
if (GA->getGlobal() == GB->getGlobal())
return DAG.getConstant((uint64_t)GA->getOffset() - GB->getOffset(),
VT);
}
return SDValue();
}
SDValue DAGCombiner::visitSUBC(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
EVT VT = N0.getValueType();
// If the flag result is dead, turn this into an SUB.
if (!N->hasAnyUseOfValue(1))
return CombineTo(N, DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, N0, N1),
DAG.getNode(ISD::CARRY_FALSE, N->getDebugLoc(),
MVT::Glue));
// fold (subc x, x) -> 0 + no borrow
if (N0 == N1)
return CombineTo(N, DAG.getConstant(0, VT),
DAG.getNode(ISD::CARRY_FALSE, N->getDebugLoc(),
MVT::Glue));
// fold (subc x, 0) -> x + no borrow
if (N1C && N1C->isNullValue())
return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE, N->getDebugLoc(),
MVT::Glue));
// Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1) + no borrow
if (N0C && N0C->isAllOnesValue())
return CombineTo(N, DAG.getNode(ISD::XOR, N->getDebugLoc(), VT, N1, N0),
DAG.getNode(ISD::CARRY_FALSE, N->getDebugLoc(),
MVT::Glue));
return SDValue();
}
SDValue DAGCombiner::visitSUBE(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
SDValue CarryIn = N->getOperand(2);
// fold (sube x, y, false) -> (subc x, y)
if (CarryIn.getOpcode() == ISD::CARRY_FALSE)
return DAG.getNode(ISD::SUBC, N->getDebugLoc(), N->getVTList(), N0, N1);
return SDValue();
}
SDValue DAGCombiner::visitMUL(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
EVT VT = N0.getValueType();
// fold vector ops
if (VT.isVector()) {
SDValue FoldedVOp = SimplifyVBinOp(N);
if (FoldedVOp.getNode()) return FoldedVOp;
}
// fold (mul x, undef) -> 0
if (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF)
return DAG.getConstant(0, VT);
// fold (mul c1, c2) -> c1*c2
if (N0C && N1C)
return DAG.FoldConstantArithmetic(ISD::MUL, VT, N0C, N1C);
// canonicalize constant to RHS
if (N0C && !N1C)
return DAG.getNode(ISD::MUL, N->getDebugLoc(), VT, N1, N0);
// fold (mul x, 0) -> 0
if (N1C && N1C->isNullValue())
return N1;
// fold (mul x, -1) -> 0-x
if (N1C && N1C->isAllOnesValue())
return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT,
DAG.getConstant(0, VT), N0);
// fold (mul x, (1 << c)) -> x << c
if (N1C && N1C->getAPIntValue().isPowerOf2())
return DAG.getNode(ISD::SHL, N->getDebugLoc(), VT, N0,
DAG.getConstant(N1C->getAPIntValue().logBase2(),
getShiftAmountTy(N0.getValueType())));
// fold (mul x, -(1 << c)) -> -(x << c) or (-x) << c
if (N1C && (-N1C->getAPIntValue()).isPowerOf2()) {
unsigned Log2Val = (-N1C->getAPIntValue()).logBase2();
// FIXME: If the input is something that is easily negated (e.g. a
// single-use add), we should put the negate there.
return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT,
DAG.getConstant(0, VT),
DAG.getNode(ISD::SHL, N->getDebugLoc(), VT, N0,
DAG.getConstant(Log2Val,
getShiftAmountTy(N0.getValueType()))));
}
// (mul (shl X, c1), c2) -> (mul X, c2 << c1)
if (N1C && N0.getOpcode() == ISD::SHL &&
isa<ConstantSDNode>(N0.getOperand(1))) {
SDValue C3 = DAG.getNode(ISD::SHL, N->getDebugLoc(), VT,
N1, N0.getOperand(1));
AddToWorkList(C3.getNode());
return DAG.getNode(ISD::MUL, N->getDebugLoc(), VT,
N0.getOperand(0), C3);
}
// Change (mul (shl X, C), Y) -> (shl (mul X, Y), C) when the shift has one
// use.
{
SDValue Sh(0,0), Y(0,0);
// Check for both (mul (shl X, C), Y) and (mul Y, (shl X, C)).
if (N0.getOpcode() == ISD::SHL && isa<ConstantSDNode>(N0.getOperand(1)) &&
N0.getNode()->hasOneUse()) {
Sh = N0; Y = N1;
} else if (N1.getOpcode() == ISD::SHL &&
isa<ConstantSDNode>(N1.getOperand(1)) &&
N1.getNode()->hasOneUse()) {
Sh = N1; Y = N0;
}
if (Sh.getNode()) {
SDValue Mul = DAG.getNode(ISD::MUL, N->getDebugLoc(), VT,
Sh.getOperand(0), Y);
return DAG.getNode(ISD::SHL, N->getDebugLoc(), VT,
Mul, Sh.getOperand(1));
}
}
// fold (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2)
if (N1C && N0.getOpcode() == ISD::ADD && N0.getNode()->hasOneUse() &&
isa<ConstantSDNode>(N0.getOperand(1)))
return DAG.getNode(ISD::ADD, N->getDebugLoc(), VT,
DAG.getNode(ISD::MUL, N0.getDebugLoc(), VT,
N0.getOperand(0), N1),
DAG.getNode(ISD::MUL, N1.getDebugLoc(), VT,
N0.getOperand(1), N1));
// reassociate mul
SDValue RMUL = ReassociateOps(ISD::MUL, N->getDebugLoc(), N0, N1);
if (RMUL.getNode() != 0)
return RMUL;
return SDValue();
}
SDValue DAGCombiner::visitSDIV(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0.getNode());
ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.getNode());
EVT VT = N->getValueType(0);
// fold vector ops
if (VT.isVector()) {
SDValue FoldedVOp = SimplifyVBinOp(N);
if (FoldedVOp.getNode()) return FoldedVOp;
}
// fold (sdiv c1, c2) -> c1/c2
if (N0C && N1C && !N1C->isNullValue())
return DAG.FoldConstantArithmetic(ISD::SDIV, VT, N0C, N1C);
// fold (sdiv X, 1) -> X
if (N1C && N1C->getAPIntValue() == 1LL)
return N0;
// fold (sdiv X, -1) -> 0-X
if (N1C && N1C->isAllOnesValue())
return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT,
DAG.getConstant(0, VT), N0);
// If we know the sign bits of both operands are zero, strength reduce to a
// udiv instead. Handles (X&15) /s 4 -> X&15 >> 2
if (!VT.isVector()) {
if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0))
return DAG.getNode(ISD::UDIV, N->getDebugLoc(), N1.getValueType(),
N0, N1);
}
// fold (sdiv X, pow2) -> simple ops after legalize
if (N1C && !N1C->isNullValue() &&
(N1C->getAPIntValue().isPowerOf2() ||
(-N1C->getAPIntValue()).isPowerOf2())) {
// If dividing by powers of two is cheap, then don't perform the following
// fold.
if (TLI.isPow2DivCheap())
return SDValue();
unsigned lg2 = N1C->getAPIntValue().countTrailingZeros();
// Splat the sign bit into the register
SDValue SGN = DAG.getNode(ISD::SRA, N->getDebugLoc(), VT, N0,
DAG.getConstant(VT.getSizeInBits()-1,
getShiftAmountTy(N0.getValueType())));
AddToWorkList(SGN.getNode());
// Add (N0 < 0) ? abs2 - 1 : 0;
SDValue SRL = DAG.getNode(ISD::SRL, N->getDebugLoc(), VT, SGN,
DAG.getConstant(VT.getSizeInBits() - lg2,
getShiftAmountTy(SGN.getValueType())));
SDValue ADD = DAG.getNode(ISD::ADD, N->getDebugLoc(), VT, N0, SRL);
AddToWorkList(SRL.getNode());
AddToWorkList(ADD.getNode()); // Divide by pow2
SDValue SRA = DAG.getNode(ISD::SRA, N->getDebugLoc(), VT, ADD,
DAG.getConstant(lg2, getShiftAmountTy(ADD.getValueType())));
// If we're dividing by a positive value, we're done. Otherwise, we must
// negate the result.
if (N1C->getAPIntValue().isNonNegative())
return SRA;
AddToWorkList(SRA.getNode());
return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT,
DAG.getConstant(0, VT), SRA);
}
// if integer divide is expensive and we satisfy the requirements, emit an
// alternate sequence.
if (N1C && !N1C->isNullValue() && !TLI.isIntDivCheap()) {
SDValue Op = BuildSDIV(N);
if (Op.getNode()) return Op;
}
// undef / X -> 0
if (N0.getOpcode() == ISD::UNDEF)
return DAG.getConstant(0, VT);
// X / undef -> undef
if (N1.getOpcode() == ISD::UNDEF)
return N1;
return SDValue();
}
SDValue DAGCombiner::visitUDIV(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0.getNode());
ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.getNode());
EVT VT = N->getValueType(0);
// fold vector ops
if (VT.isVector()) {
SDValue FoldedVOp = SimplifyVBinOp(N);
if (FoldedVOp.getNode()) return FoldedVOp;
}
// fold (udiv c1, c2) -> c1/c2
if (N0C && N1C && !N1C->isNullValue())
return DAG.FoldConstantArithmetic(ISD::UDIV, VT, N0C, N1C);
// fold (udiv x, (1 << c)) -> x >>u c
if (N1C && N1C->getAPIntValue().isPowerOf2())
return DAG.getNode(ISD::SRL, N->getDebugLoc(), VT, N0,
DAG.getConstant(N1C->getAPIntValue().logBase2(),
getShiftAmountTy(N0.getValueType())));
// fold (udiv x, (shl c, y)) -> x >>u (log2(c)+y) iff c is power of 2
if (N1.getOpcode() == ISD::SHL) {
if (ConstantSDNode *SHC = dyn_cast<ConstantSDNode>(N1.getOperand(0))) {
if (SHC->getAPIntValue().isPowerOf2()) {
EVT ADDVT = N1.getOperand(1).getValueType();
SDValue Add = DAG.getNode(ISD::ADD, N->getDebugLoc(), ADDVT,
N1.getOperand(1),
DAG.getConstant(SHC->getAPIntValue()
.logBase2(),
ADDVT));
AddToWorkList(Add.getNode());
return DAG.getNode(ISD::SRL, N->getDebugLoc(), VT, N0, Add);
}
}
}
// fold (udiv x, c) -> alternate
if (N1C && !N1C->isNullValue() && !TLI.isIntDivCheap()) {
SDValue Op = BuildUDIV(N);
if (Op.getNode()) return Op;
}
// undef / X -> 0
if (N0.getOpcode() == ISD::UNDEF)
return DAG.getConstant(0, VT);
// X / undef -> undef
if (N1.getOpcode() == ISD::UNDEF)
return N1;
return SDValue();
}
SDValue DAGCombiner::visitSREM(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
EVT VT = N->getValueType(0);
// fold (srem c1, c2) -> c1%c2
if (N0C && N1C && !N1C->isNullValue())
return DAG.FoldConstantArithmetic(ISD::SREM, VT, N0C, N1C);
// If we know the sign bits of both operands are zero, strength reduce to a
// urem instead. Handles (X & 0x0FFFFFFF) %s 16 -> X&15
if (!VT.isVector()) {
if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0))
return DAG.getNode(ISD::UREM, N->getDebugLoc(), VT, N0, N1);
}
// If X/C can be simplified by the division-by-constant logic, lower
// X%C to the equivalent of X-X/C*C.
if (N1C && !N1C->isNullValue()) {
SDValue Div = DAG.getNode(ISD::SDIV, N->getDebugLoc(), VT, N0, N1);
AddToWorkList(Div.getNode());
SDValue OptimizedDiv = combine(Div.getNode());
if (OptimizedDiv.getNode() && OptimizedDiv.getNode() != Div.getNode()) {
SDValue Mul = DAG.getNode(ISD::MUL, N->getDebugLoc(), VT,
OptimizedDiv, N1);
SDValue Sub = DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, N0, Mul);
AddToWorkList(Mul.getNode());
return Sub;
}
}
// undef % X -> 0
if (N0.getOpcode() == ISD::UNDEF)
return DAG.getConstant(0, VT);
// X % undef -> undef
if (N1.getOpcode() == ISD::UNDEF)
return N1;
return SDValue();
}
SDValue DAGCombiner::visitUREM(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
EVT VT = N->getValueType(0);
// fold (urem c1, c2) -> c1%c2
if (N0C && N1C && !N1C->isNullValue())
return DAG.FoldConstantArithmetic(ISD::UREM, VT, N0C, N1C);
// fold (urem x, pow2) -> (and x, pow2-1)
if (N1C && !N1C->isNullValue() && N1C->getAPIntValue().isPowerOf2())
return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, N0,
DAG.getConstant(N1C->getAPIntValue()-1,VT));
// fold (urem x, (shl pow2, y)) -> (and x, (add (shl pow2, y), -1))
if (N1.getOpcode() == ISD::SHL) {
if (ConstantSDNode *SHC = dyn_cast<ConstantSDNode>(N1.getOperand(0))) {
if (SHC->getAPIntValue().isPowerOf2()) {
SDValue Add =
DAG.getNode(ISD::ADD, N->getDebugLoc(), VT, N1,
DAG.getConstant(APInt::getAllOnesValue(VT.getSizeInBits()),
VT));
AddToWorkList(Add.getNode());
return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, N0, Add);
}
}
}
// If X/C can be simplified by the division-by-constant logic, lower
// X%C to the equivalent of X-X/C*C.
if (N1C && !N1C->isNullValue()) {
SDValue Div = DAG.getNode(ISD::UDIV, N->getDebugLoc(), VT, N0, N1);
AddToWorkList(Div.getNode());
SDValue OptimizedDiv = combine(Div.getNode());
if (OptimizedDiv.getNode() && OptimizedDiv.getNode() != Div.getNode()) {
SDValue Mul = DAG.getNode(ISD::MUL, N->getDebugLoc(), VT,
OptimizedDiv, N1);
SDValue Sub = DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, N0, Mul);
AddToWorkList(Mul.getNode());
return Sub;
}
}
// undef % X -> 0
if (N0.getOpcode() == ISD::UNDEF)
return DAG.getConstant(0, VT);
// X % undef -> undef
if (N1.getOpcode() == ISD::UNDEF)
return N1;
return SDValue();
}
SDValue DAGCombiner::visitMULHS(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
EVT VT = N->getValueType(0);
DebugLoc DL = N->getDebugLoc();
// fold (mulhs x, 0) -> 0
if (N1C && N1C->isNullValue())
return N1;
// fold (mulhs x, 1) -> (sra x, size(x)-1)
if (N1C && N1C->getAPIntValue() == 1)
return DAG.getNode(ISD::SRA, N->getDebugLoc(), N0.getValueType(), N0,
DAG.getConstant(N0.getValueType().getSizeInBits() - 1,
getShiftAmountTy(N0.getValueType())));
// fold (mulhs x, undef) -> 0
if (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF)
return DAG.getConstant(0, VT);
// If the type twice as wide is legal, transform the mulhs to a wider multiply
// plus a shift.
if (VT.isSimple() && !VT.isVector()) {
MVT Simple = VT.getSimpleVT();
unsigned SimpleSize = Simple.getSizeInBits();
EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
N0 = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N0);
N1 = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N1);
N1 = DAG.getNode(ISD::MUL, DL, NewVT, N0, N1);
N1 = DAG.getNode(ISD::SRL, DL, NewVT, N1,
DAG.getConstant(SimpleSize, getShiftAmountTy(N1.getValueType())));
return DAG.getNode(ISD::TRUNCATE, DL, VT, N1);
}
}
return SDValue();
}
SDValue DAGCombiner::visitMULHU(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
EVT VT = N->getValueType(0);
DebugLoc DL = N->getDebugLoc();
// fold (mulhu x, 0) -> 0
if (N1C && N1C->isNullValue())
return N1;
// fold (mulhu x, 1) -> 0
if (N1C && N1C->getAPIntValue() == 1)
return DAG.getConstant(0, N0.getValueType());
// fold (mulhu x, undef) -> 0
if (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF)
return DAG.getConstant(0, VT);
// If the type twice as wide is legal, transform the mulhu to a wider multiply
// plus a shift.
if (VT.isSimple() && !VT.isVector()) {
MVT Simple = VT.getSimpleVT();
unsigned SimpleSize = Simple.getSizeInBits();
EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
N0 = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N0);
N1 = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N1);
N1 = DAG.getNode(ISD::MUL, DL, NewVT, N0, N1);
N1 = DAG.getNode(ISD::SRL, DL, NewVT, N1,
DAG.getConstant(SimpleSize, getShiftAmountTy(N1.getValueType())));
return DAG.getNode(ISD::TRUNCATE, DL, VT, N1);
}
}
return SDValue();
}
/// SimplifyNodeWithTwoResults - Perform optimizations common to nodes that
/// compute two values. LoOp and HiOp give the opcodes for the two computations
/// that are being performed. Return true if a simplification was made.
///
SDValue DAGCombiner::SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp,
unsigned HiOp) {
// If the high half is not needed, just compute the low half.
bool HiExists = N->hasAnyUseOfValue(1);
if (!HiExists &&
(!LegalOperations ||
TLI.isOperationLegal(LoOp, N->getValueType(0)))) {
SDValue Res = DAG.getNode(LoOp, N->getDebugLoc(), N->getValueType(0),
N->op_begin(), N->getNumOperands());
return CombineTo(N, Res, Res);
}
// If the low half is not needed, just compute the high half.
bool LoExists = N->hasAnyUseOfValue(0);
if (!LoExists &&
(!LegalOperations ||
TLI.isOperationLegal(HiOp, N->getValueType(1)))) {
SDValue Res = DAG.getNode(HiOp, N->getDebugLoc(), N->getValueType(1),
N->op_begin(), N->getNumOperands());
return CombineTo(N, Res, Res);
}
// If both halves are used, return as it is.
if (LoExists && HiExists)
return SDValue();
// If the two computed results can be simplified separately, separate them.
if (LoExists) {
SDValue Lo = DAG.getNode(LoOp, N->getDebugLoc(), N->getValueType(0),
N->op_begin(), N->getNumOperands());
AddToWorkList(Lo.getNode());
SDValue LoOpt = combine(Lo.getNode());
if (LoOpt.getNode() && LoOpt.getNode() != Lo.getNode() &&
(!LegalOperations ||
TLI.isOperationLegal(LoOpt.getOpcode(), LoOpt.getValueType())))
return CombineTo(N, LoOpt, LoOpt);
}
if (HiExists) {
SDValue Hi = DAG.getNode(HiOp, N->getDebugLoc(), N->getValueType(1),
N->op_begin(), N->getNumOperands());
AddToWorkList(Hi.getNode());
SDValue HiOpt = combine(Hi.getNode());
if (HiOpt.getNode() && HiOpt != Hi &&
(!LegalOperations ||
TLI.isOperationLegal(HiOpt.getOpcode(), HiOpt.getValueType())))
return CombineTo(N, HiOpt, HiOpt);
}
return SDValue();
}
SDValue DAGCombiner::visitSMUL_LOHI(SDNode *N) {
SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHS);
if (Res.getNode()) return Res;
EVT VT = N->getValueType(0);
DebugLoc DL = N->getDebugLoc();
// If the type twice as wide is legal, transform the mulhu to a wider multiply
// plus a shift.
if (VT.isSimple() && !VT.isVector()) {
MVT Simple = VT.getSimpleVT();
unsigned SimpleSize = Simple.getSizeInBits();
EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
SDValue Lo = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N->getOperand(0));
SDValue Hi = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N->getOperand(1));
Lo = DAG.getNode(ISD::MUL, DL, NewVT, Lo, Hi);
// Compute the high part as N1.
Hi = DAG.getNode(ISD::SRL, DL, NewVT, Lo,
DAG.getConstant(SimpleSize, getShiftAmountTy(Lo.getValueType())));
Hi = DAG.getNode(ISD::TRUNCATE, DL, VT, Hi);
// Compute the low part as N0.
Lo = DAG.getNode(ISD::TRUNCATE, DL, VT, Lo);
return CombineTo(N, Lo, Hi);
}
}
return SDValue();
}
SDValue DAGCombiner::visitUMUL_LOHI(SDNode *N) {
SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHU);
if (Res.getNode()) return Res;
EVT VT = N->getValueType(0);
DebugLoc DL = N->getDebugLoc();
// If the type twice as wide is legal, transform the mulhu to a wider multiply
// plus a shift.
if (VT.isSimple() && !VT.isVector()) {
MVT Simple = VT.getSimpleVT();
unsigned SimpleSize = Simple.getSizeInBits();
EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
SDValue Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N->getOperand(0));
SDValue Hi = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N->getOperand(1));
Lo = DAG.getNode(ISD::MUL, DL, NewVT, Lo, Hi);
// Compute the high part as N1.
Hi = DAG.getNode(ISD::SRL, DL, NewVT, Lo,
DAG.getConstant(SimpleSize, getShiftAmountTy(Lo.getValueType())));
Hi = DAG.getNode(ISD::TRUNCATE, DL, VT, Hi);
// Compute the low part as N0.
Lo = DAG.getNode(ISD::TRUNCATE, DL, VT, Lo);
return CombineTo(N, Lo, Hi);
}
}
return SDValue();
}
SDValue DAGCombiner::visitSMULO(SDNode *N) {
// (smulo x, 2) -> (saddo x, x)
if (ConstantSDNode *C2 = dyn_cast<ConstantSDNode>(N->getOperand(1)))
if (C2->getAPIntValue() == 2)
return DAG.getNode(ISD::SADDO, N->getDebugLoc(), N->getVTList(),
N->getOperand(0), N->getOperand(0));
return SDValue();
}
SDValue DAGCombiner::visitUMULO(SDNode *N) {
// (umulo x, 2) -> (uaddo x, x)
if (ConstantSDNode *C2 = dyn_cast<ConstantSDNode>(N->getOperand(1)))
if (C2->getAPIntValue() == 2)
return DAG.getNode(ISD::UADDO, N->getDebugLoc(), N->getVTList(),
N->getOperand(0), N->getOperand(0));
return SDValue();
}
SDValue DAGCombiner::visitSDIVREM(SDNode *N) {
SDValue Res = SimplifyNodeWithTwoResults(N, ISD::SDIV, ISD::SREM);
if (Res.getNode()) return Res;
return SDValue();
}
SDValue DAGCombiner::visitUDIVREM(SDNode *N) {
SDValue Res = SimplifyNodeWithTwoResults(N, ISD::UDIV, ISD::UREM);
if (Res.getNode()) return Res;