lib/Analysis/CostModel.cpp - llvm - Git at Google

 //===- CostModel.cpp ------ Cost Model Analysis ---------------------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
 // This file is distributed under the University of Illinois Open Source
 // License. See LICENSE.TXT for details.
 //
 //===----------------------------------------------------------------------===//
 //
 // This file defines the cost model analysis. It provides a very basic cost
 // estimation for LLVM-IR. This analysis uses the services of the codegen
 // to approximate the cost of any IR instruction when lowered to machine
 // instructions. The cost results are unit-less and the cost number represents
 // the throughput of the machine assuming that all loads hit the cache, all
 // branches are predicted, etc. The cost numbers can be added in order to
 // compare two or more transformation alternatives.
 //
 //===----------------------------------------------------------------------===//

 #define CM_NAME "cost-model"
 #define DEBUG_TYPE CM_NAME
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/Analysis/Passes.h"
 #include "llvm/Analysis/TargetTransformInfo.h"
 #include "llvm/IR/Function.h"
 #include "llvm/IR/Instructions.h"
 #include "llvm/IR/IntrinsicInst.h"
 #include "llvm/IR/Value.h"
 #include "llvm/Pass.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
 using namespace llvm;

 static cl::opt<bool> EnableReduxCost("costmodel-reduxcost", cl::init(false),
                                      cl::Hidden,
                                      cl::desc("Recognize reduction patterns."));

 namespace {
   class CostModelAnalysis : public FunctionPass {

   public:
     static char ID; // Class identification, replacement for typeinfo
     CostModelAnalysis() : FunctionPass(ID), F(0), TTI(0) {
       initializeCostModelAnalysisPass(
         *PassRegistry::getPassRegistry());
     }

     /// Returns the expected cost of the instruction.
     /// Returns -1 if the cost is unknown.
     /// Note, this method does not cache the cost calculation and it
     /// can be expensive in some cases.
     unsigned getInstructionCost(const Instruction *I) const;

   private:
     virtual void getAnalysisUsage(AnalysisUsage &AU) const;
     virtual bool runOnFunction(Function &F);
     virtual void print(raw_ostream &OS, const Module*) const;

     /// The function that we analyze.
     Function *F;
     /// Target information.
     const TargetTransformInfo *TTI;
   };
 }  // End of anonymous namespace

 // Register this pass.
 char CostModelAnalysis::ID = 0;
 static const char cm_name[] = "Cost Model Analysis";
 INITIALIZE_PASS_BEGIN(CostModelAnalysis, CM_NAME, cm_name, false, true)
 INITIALIZE_PASS_END  (CostModelAnalysis, CM_NAME, cm_name, false, true)

 FunctionPass *llvm::createCostModelAnalysisPass() {
   return new CostModelAnalysis();
 }

 void
 CostModelAnalysis::getAnalysisUsage(AnalysisUsage &AU) const {
   AU.setPreservesAll();
 }

 bool
 CostModelAnalysis::runOnFunction(Function &F) {
  this->F = &F;
  TTI = getAnalysisIfAvailable<TargetTransformInfo>();

  return false;
 }

 static bool isReverseVectorMask(SmallVectorImpl<int> &Mask) {
   for (unsigned i = 0, MaskSize = Mask.size(); i < MaskSize; ++i)
     if (Mask[i] > 0 && Mask[i] != (int)(MaskSize - 1 - i))
       return false;
   return true;
 }

 static TargetTransformInfo::OperandValueKind getOperandInfo(Value *V) {
   TargetTransformInfo::OperandValueKind OpInfo =
     TargetTransformInfo::OK_AnyValue;

   // Check for a splat of a constant.
   ConstantDataVector *CDV = 0;
   if ((CDV = dyn_cast<ConstantDataVector>(V)))
     if (CDV->getSplatValue() != NULL)
       OpInfo = TargetTransformInfo::OK_UniformConstantValue;
   ConstantVector *CV = 0;
   if ((CV = dyn_cast<ConstantVector>(V)))
     if (CV->getSplatValue() != NULL)
       OpInfo = TargetTransformInfo::OK_UniformConstantValue;

   return OpInfo;
 }

 static bool matchMask(SmallVectorImpl<int> &M1, SmallVectorImpl<int> &M2) {
   if (M1.size() != M2.size())
     return false;

   for (unsigned i = 0, e = M1.size(); i != e; ++i)
     if (M1[i] != M2[i])
       return false;

   return true;
 }

 static bool matchPairwiseShuffleMask(ShuffleVectorInst *SI, bool IsLeft,
                                      unsigned Level) {
   // We don't need a shuffle if we just want to have element 0 in position 0 of
   // the vector.
   if (!SI && Level == 0 && IsLeft)
     return true;
   else if (!SI)
     return false;

   SmallVector<int, 32> Mask(SI->getType()->getVectorNumElements(), -1);

   // Build a mask of 0, 2, ... (left) or 1, 3, ... (right) depending on whether
   // we look at the left or right side.
   for (unsigned i = 0, e = (1 << Level), val = !IsLeft; i != e; ++i, val += 2)
     Mask[i] = val;

   SmallVector<int, 16> ActualMask = SI->getShuffleMask();
   if (!matchMask(Mask, ActualMask))
     return false;

   return true;
 }

 static bool matchPairwiseReductionAtLevel(const BinaryOperator *BinOp,
                                           unsigned Level, unsigned NumLevels) {
   // Match one level of pairwise operations.
   // %rdx.shuf.0.0 = shufflevector <4 x float> %rdx, <4 x float> undef,
   //       <4 x i32> <i32 0, i32 2 , i32 undef, i32 undef>
   // %rdx.shuf.0.1 = shufflevector <4 x float> %rdx, <4 x float> undef,
   //       <4 x i32> <i32 1, i32 3, i32 undef, i32 undef>
   // %bin.rdx.0 = fadd <4 x float> %rdx.shuf.0.0, %rdx.shuf.0.1
   if (BinOp == 0)
     return false;

   assert(BinOp->getType()->isVectorTy() && "Expecting a vector type");

   unsigned Opcode = BinOp->getOpcode();
   Value *L = BinOp->getOperand(0);
   Value *R = BinOp->getOperand(1);

   ShuffleVectorInst *LS = dyn_cast<ShuffleVectorInst>(L);
   if (!LS && Level)
     return false;
   ShuffleVectorInst *RS = dyn_cast<ShuffleVectorInst>(R);
   if (!RS && Level)
     return false;

   // On level 0 we can omit one shufflevector instruction.
   if (!Level && !RS && !LS)
     return false;

   // Shuffle inputs must match.
   Value *NextLevelOpL = LS ? LS->getOperand(0) : 0;
   Value *NextLevelOpR = RS ? RS->getOperand(0) : 0;
   Value *NextLevelOp = 0;
   if (NextLevelOpR && NextLevelOpL) {
     // If we have two shuffles their operands must match.
     if (NextLevelOpL != NextLevelOpR)
       return false;

     NextLevelOp = NextLevelOpL;
   } else if (Level == 0 && (NextLevelOpR || NextLevelOpL)) {
     // On the first level we can omit the shufflevector <0, undef,...>. So the
     // input to the other shufflevector <1, undef> must match with one of the
     // inputs to the current binary operation.
     // Example:
     //  %NextLevelOpL = shufflevector %R, <1, undef ...>
     //  %BinOp        = fadd          %NextLevelOpL, %R
     if (NextLevelOpL && NextLevelOpL != R)
       return false;
     else if (NextLevelOpR && NextLevelOpR != L)
       return false;

     NextLevelOp = NextLevelOpL ? R : L;
   } else
     return false;

   // Check that the next levels binary operation exists and matches with the
   // current one.
   BinaryOperator *NextLevelBinOp = 0;
   if (Level + 1 != NumLevels) {
     if (!(NextLevelBinOp = dyn_cast<BinaryOperator>(NextLevelOp)))
       return false;
     else if (NextLevelBinOp->getOpcode() != Opcode)
       return false;
   }

   // Shuffle mask for pairwise operation must match.
   if (matchPairwiseShuffleMask(LS, true, Level)) {
     if (!matchPairwiseShuffleMask(RS, false, Level))
       return false;
   } else if (matchPairwiseShuffleMask(RS, true, Level)) {
     if (!matchPairwiseShuffleMask(LS, false, Level))
       return false;
   } else
     return false;

   if (++Level == NumLevels)
     return true;

   // Match next level.
   return matchPairwiseReductionAtLevel(NextLevelBinOp, Level, NumLevels);
 }

 static bool matchPairwiseReduction(const ExtractElementInst *ReduxRoot,
                                    unsigned &Opcode, Type *&Ty) {
   if (!EnableReduxCost)
     return false;

   // Need to extract the first element.
   ConstantInt *CI = dyn_cast<ConstantInt>(ReduxRoot->getOperand(1));
   unsigned Idx = ~0u;
   if (CI)
     Idx = CI->getZExtValue();
   if (Idx != 0)
     return false;

   BinaryOperator *RdxStart = dyn_cast<BinaryOperator>(ReduxRoot->getOperand(0));
   if (!RdxStart)
     return false;

   Type *VecTy = ReduxRoot->getOperand(0)->getType();
   unsigned NumVecElems = VecTy->getVectorNumElements();
   if (!isPowerOf2_32(NumVecElems))
     return false;

   // We look for a sequence of shuffle,shuffle,add triples like the following
   // that builds a pairwise reduction tree.
   //
   //  (X0, X1, X2, X3)
   //   (X0 + X1, X2 + X3, undef, undef)
   //    ((X0 + X1) + (X2 + X3), undef, undef, undef)
   //
   // %rdx.shuf.0.0 = shufflevector <4 x float> %rdx, <4 x float> undef,
   //       <4 x i32> <i32 0, i32 2 , i32 undef, i32 undef>
   // %rdx.shuf.0.1 = shufflevector <4 x float> %rdx, <4 x float> undef,
   //       <4 x i32> <i32 1, i32 3, i32 undef, i32 undef>
   // %bin.rdx.0 = fadd <4 x float> %rdx.shuf.0.0, %rdx.shuf.0.1
   // %rdx.shuf.1.0 = shufflevector <4 x float> %bin.rdx.0, <4 x float> undef,
   //       <4 x i32> <i32 0, i32 undef, i32 undef, i32 undef>
   // %rdx.shuf.1.1 = shufflevector <4 x float> %bin.rdx.0, <4 x float> undef,
   //       <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
   // %bin.rdx8 = fadd <4 x float> %rdx.shuf.1.0, %rdx.shuf.1.1
   // %r = extractelement <4 x float> %bin.rdx8, i32 0
   if (!matchPairwiseReductionAtLevel(RdxStart, 0,  Log2_32(NumVecElems)))
     return false;

   Opcode = RdxStart->getOpcode();
   Ty = VecTy;

   return true;
 }

 static std::pair<Value *, ShuffleVectorInst *>
 getShuffleAndOtherOprd(BinaryOperator *B) {

   Value *L = B->getOperand(0);
   Value *R = B->getOperand(1);
   ShuffleVectorInst *S = 0;

   if ((S = dyn_cast<ShuffleVectorInst>(L)))
     return std::make_pair(R, S);

   S = dyn_cast<ShuffleVectorInst>(R);
   return std::make_pair(L, S);
 }

 static bool matchVectorSplittingReduction(const ExtractElementInst *ReduxRoot,
                                           unsigned &Opcode, Type *&Ty) {
   if (!EnableReduxCost)
     return false;

   // Need to extract the first element.
   ConstantInt *CI = dyn_cast<ConstantInt>(ReduxRoot->getOperand(1));
   unsigned Idx = ~0u;
   if (CI)
     Idx = CI->getZExtValue();
   if (Idx != 0)
     return false;

   BinaryOperator *RdxStart = dyn_cast<BinaryOperator>(ReduxRoot->getOperand(0));
   if (!RdxStart)
     return false;
   unsigned RdxOpcode = RdxStart->getOpcode();

   Type *VecTy = ReduxRoot->getOperand(0)->getType();
   unsigned NumVecElems = VecTy->getVectorNumElements();
   if (!isPowerOf2_32(NumVecElems))
     return false;

   // We look for a sequence of shuffles and adds like the following matching one
   // fadd, shuffle vector pair at a time.
   //
   // %rdx.shuf = shufflevector <4 x float> %rdx, <4 x float> undef,
   //                           <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
   // %bin.rdx = fadd <4 x float> %rdx, %rdx.shuf
   // %rdx.shuf7 = shufflevector <4 x float> %bin.rdx, <4 x float> undef,
   //                          <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
   // %bin.rdx8 = fadd <4 x float> %bin.rdx, %rdx.shuf7
   // %r = extractelement <4 x float> %bin.rdx8, i32 0

   unsigned MaskStart = 1;
   Value *RdxOp = RdxStart;
   SmallVector<int, 32> ShuffleMask(NumVecElems, 0);
   unsigned NumVecElemsRemain = NumVecElems;
   while (NumVecElemsRemain - 1) {
     // Check for the right reduction operation.
     BinaryOperator *BinOp;
     if (!(BinOp = dyn_cast<BinaryOperator>(RdxOp)))
       return false;
     if (BinOp->getOpcode() != RdxOpcode)
       return false;

     Value *NextRdxOp;
     ShuffleVectorInst *Shuffle;
     tie(NextRdxOp, Shuffle) = getShuffleAndOtherOprd(BinOp);

     // Check the current reduction operation and the shuffle use the same value.
     if (Shuffle == 0)
       return false;
     if (Shuffle->getOperand(0) != NextRdxOp)
       return false;

     // Check that shuffle masks matches.
     for (unsigned j = 0; j != MaskStart; ++j)
       ShuffleMask[j] = MaskStart + j;
     // Fill the rest of the mask with -1 for undef.
     std::fill(&ShuffleMask[MaskStart], ShuffleMask.end(), -1);

     SmallVector<int, 16> Mask = Shuffle->getShuffleMask();
     if (!matchMask(ShuffleMask, Mask))
       return false;

     RdxOp = NextRdxOp;
     NumVecElemsRemain /= 2;
     MaskStart *= 2;
   }

   Opcode = RdxOpcode;
   Ty = VecTy;
   return true;
 }

 unsigned CostModelAnalysis::getInstructionCost(const Instruction *I) const {
   if (!TTI)
     return -1;

   switch (I->getOpcode()) {
   case Instruction::GetElementPtr:{
     Type *ValTy = I->getOperand(0)->getType()->getPointerElementType();
     return TTI->getAddressComputationCost(ValTy);
   }

   case Instruction::Ret:
   case Instruction::PHI:
   case Instruction::Br: {
     return TTI->getCFInstrCost(I->getOpcode());
   }
   case Instruction::Add:
   case Instruction::FAdd:
   case Instruction::Sub:
   case Instruction::FSub:
   case Instruction::Mul:
   case Instruction::FMul:
   case Instruction::UDiv:
   case Instruction::SDiv:
   case Instruction::FDiv:
   case Instruction::URem:
   case Instruction::SRem:
   case Instruction::FRem:
   case Instruction::Shl:
   case Instruction::LShr:
   case Instruction::AShr:
   case Instruction::And:
   case Instruction::Or:
   case Instruction::Xor: {
     TargetTransformInfo::OperandValueKind Op1VK =
       getOperandInfo(I->getOperand(0));
     TargetTransformInfo::OperandValueKind Op2VK =
       getOperandInfo(I->getOperand(1));
     return TTI->getArithmeticInstrCost(I->getOpcode(), I->getType(), Op1VK,
                                        Op2VK);
   }
   case Instruction::Select: {
     const SelectInst *SI = cast<SelectInst>(I);
     Type *CondTy = SI->getCondition()->getType();
     return TTI->getCmpSelInstrCost(I->getOpcode(), I->getType(), CondTy);
   }
   case Instruction::ICmp:
   case Instruction::FCmp: {
     Type *ValTy = I->getOperand(0)->getType();
     return TTI->getCmpSelInstrCost(I->getOpcode(), ValTy);
   }
   case Instruction::Store: {
     const StoreInst *SI = cast<StoreInst>(I);
     Type *ValTy = SI->getValueOperand()->getType();
     return TTI->getMemoryOpCost(I->getOpcode(), ValTy,
                                  SI->getAlignment(),
                                  SI->getPointerAddressSpace());
   }
   case Instruction::Load: {
     const LoadInst *LI = cast<LoadInst>(I);
     return TTI->getMemoryOpCost(I->getOpcode(), I->getType(),
                                  LI->getAlignment(),
                                  LI->getPointerAddressSpace());
   }
   case Instruction::ZExt:
   case Instruction::SExt:
   case Instruction::FPToUI:
   case Instruction::FPToSI:
   case Instruction::FPExt:
   case Instruction::PtrToInt:
   case Instruction::IntToPtr:
   case Instruction::SIToFP:
   case Instruction::UIToFP:
   case Instruction::Trunc:
   case Instruction::FPTrunc:
   case Instruction::BitCast: {
     Type *SrcTy = I->getOperand(0)->getType();
     return TTI->getCastInstrCost(I->getOpcode(), I->getType(), SrcTy);
   }
   case Instruction::ExtractElement: {
     const ExtractElementInst * EEI = cast<ExtractElementInst>(I);
     ConstantInt *CI = dyn_cast<ConstantInt>(I->getOperand(1));
     unsigned Idx = -1;
     if (CI)
       Idx = CI->getZExtValue();

     // Try to match a reduction sequence (series of shufflevector and vector
     // adds followed by a extractelement).
     unsigned ReduxOpCode;
     Type *ReduxType;

     if (matchVectorSplittingReduction(EEI, ReduxOpCode, ReduxType))
       return TTI->getReductionCost(ReduxOpCode, ReduxType, false);
     else if (matchPairwiseReduction(EEI, ReduxOpCode, ReduxType))
       return TTI->getReductionCost(ReduxOpCode, ReduxType, true);

     return TTI->getVectorInstrCost(I->getOpcode(),
                                    EEI->getOperand(0)->getType(), Idx);
   }
   case Instruction::InsertElement: {
     const InsertElementInst * IE = cast<InsertElementInst>(I);
     ConstantInt *CI = dyn_cast<ConstantInt>(IE->getOperand(2));
     unsigned Idx = -1;
     if (CI)
       Idx = CI->getZExtValue();
     return TTI->getVectorInstrCost(I->getOpcode(),
                                    IE->getType(), Idx);
   }
   case Instruction::ShuffleVector: {
     const ShuffleVectorInst *Shuffle = cast<ShuffleVectorInst>(I);
     Type *VecTypOp0 = Shuffle->getOperand(0)->getType();
     unsigned NumVecElems = VecTypOp0->getVectorNumElements();
     SmallVector<int, 16> Mask = Shuffle->getShuffleMask();

     if (NumVecElems == Mask.size() && isReverseVectorMask(Mask))
       return TTI->getShuffleCost(TargetTransformInfo::SK_Reverse, VecTypOp0, 0,
                                  0);
     return -1;
   }
   case Instruction::Call:
     if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
       SmallVector<Type*, 4> Tys;
       for (unsigned J = 0, JE = II->getNumArgOperands(); J != JE; ++J)
         Tys.push_back(II->getArgOperand(J)->getType());

       return TTI->getIntrinsicInstrCost(II->getIntrinsicID(), II->getType(),
                                         Tys);
     }
     return -1;
   default:
     // We don't have any information on this instruction.
     return -1;
   }
 }

 void CostModelAnalysis::print(raw_ostream &OS, const Module*) const {
   if (!F)
     return;

   for (Function::iterator B = F->begin(), BE = F->end(); B != BE; ++B) {
     for (BasicBlock::iterator it = B->begin(), e = B->end(); it != e; ++it) {
       Instruction *Inst = it;
       unsigned Cost = getInstructionCost(Inst);
       if (Cost != (unsigned)-1)
         OS << "Cost Model: Found an estimated cost of " << Cost;
       else
         OS << "Cost Model: Unknown cost";

       OS << " for instruction: "<< *Inst << "\n";
     }
   }
 }
	//===- CostModel.cpp ------ Cost Model Analysis ---------------------------===//
	//
	// The LLVM Compiler Infrastructure
	//
	// This file is distributed under the University of Illinois Open Source
	// License. See LICENSE.TXT for details.
	//
	//===----------------------------------------------------------------------===//
	//
	// This file defines the cost model analysis. It provides a very basic cost
	// estimation for LLVM-IR. This analysis uses the services of the codegen
	// to approximate the cost of any IR instruction when lowered to machine
	// instructions. The cost results are unit-less and the cost number represents
	// the throughput of the machine assuming that all loads hit the cache, all
	// branches are predicted, etc. The cost numbers can be added in order to
	// compare two or more transformation alternatives.
	//
	//===----------------------------------------------------------------------===//

	#define CM_NAME "cost-model"
	#define DEBUG_TYPE CM_NAME
	#include "llvm/ADT/STLExtras.h"
	#include "llvm/Analysis/Passes.h"
	#include "llvm/Analysis/TargetTransformInfo.h"
	#include "llvm/IR/Function.h"
	#include "llvm/IR/Instructions.h"
	#include "llvm/IR/IntrinsicInst.h"
	#include "llvm/IR/Value.h"
	#include "llvm/Pass.h"
	#include "llvm/Support/CommandLine.h"
	#include "llvm/Support/Debug.h"
	#include "llvm/Support/raw_ostream.h"
	using namespace llvm;

	static cl::opt<bool> EnableReduxCost("costmodel-reduxcost", cl::init(false),
	cl::Hidden,
	cl::desc("Recognize reduction patterns."));

	namespace {
	class CostModelAnalysis : public FunctionPass {

	public:
	static char ID; // Class identification, replacement for typeinfo
	CostModelAnalysis() : FunctionPass(ID), F(0), TTI(0) {
	initializeCostModelAnalysisPass(
	*PassRegistry::getPassRegistry());
	}

	/// Returns the expected cost of the instruction.
	/// Returns -1 if the cost is unknown.
	/// Note, this method does not cache the cost calculation and it
	/// can be expensive in some cases.
	unsigned getInstructionCost(const Instruction *I) const;

	private:
	virtual void getAnalysisUsage(AnalysisUsage &AU) const;
	virtual bool runOnFunction(Function &F);
	virtual void print(raw_ostream &OS, const Module*) const;

	/// The function that we analyze.
	Function *F;
	/// Target information.
	const TargetTransformInfo *TTI;
	};
	} // End of anonymous namespace

	// Register this pass.
	char CostModelAnalysis::ID = 0;
	static const char cm_name[] = "Cost Model Analysis";
	INITIALIZE_PASS_BEGIN(CostModelAnalysis, CM_NAME, cm_name, false, true)
	INITIALIZE_PASS_END (CostModelAnalysis, CM_NAME, cm_name, false, true)

	FunctionPass *llvm::createCostModelAnalysisPass() {
	return new CostModelAnalysis();
	}

	void
	CostModelAnalysis::getAnalysisUsage(AnalysisUsage &AU) const {
	AU.setPreservesAll();
	}

	bool
	CostModelAnalysis::runOnFunction(Function &F) {
	this->F = &F;
	TTI = getAnalysisIfAvailable<TargetTransformInfo>();

	return false;
	}

	static bool isReverseVectorMask(SmallVectorImpl<int> &Mask) {
	for (unsigned i = 0, MaskSize = Mask.size(); i < MaskSize; ++i)
	if (Mask[i] > 0 && Mask[i] != (int)(MaskSize - 1 - i))
	return false;
	return true;
	}

	static TargetTransformInfo::OperandValueKind getOperandInfo(Value *V) {
	TargetTransformInfo::OperandValueKind OpInfo =
	TargetTransformInfo::OK_AnyValue;

	// Check for a splat of a constant.
	ConstantDataVector *CDV = 0;
	if ((CDV = dyn_cast<ConstantDataVector>(V)))
	if (CDV->getSplatValue() != NULL)
	OpInfo = TargetTransformInfo::OK_UniformConstantValue;
	ConstantVector *CV = 0;
	if ((CV = dyn_cast<ConstantVector>(V)))
	if (CV->getSplatValue() != NULL)
	OpInfo = TargetTransformInfo::OK_UniformConstantValue;

	return OpInfo;
	}

	static bool matchMask(SmallVectorImpl<int> &M1, SmallVectorImpl<int> &M2) {
	if (M1.size() != M2.size())
	return false;

	for (unsigned i = 0, e = M1.size(); i != e; ++i)
	if (M1[i] != M2[i])
	return false;

	return true;
	}

	static bool matchPairwiseShuffleMask(ShuffleVectorInst *SI, bool IsLeft,
	unsigned Level) {
	// We don't need a shuffle if we just want to have element 0 in position 0 of
	// the vector.
	if (!SI && Level == 0 && IsLeft)
	return true;
	else if (!SI)
	return false;

	SmallVector<int, 32> Mask(SI->getType()->getVectorNumElements(), -1);

	// Build a mask of 0, 2, ... (left) or 1, 3, ... (right) depending on whether
	// we look at the left or right side.
	for (unsigned i = 0, e = (1 << Level), val = !IsLeft; i != e; ++i, val += 2)
	Mask[i] = val;

	SmallVector<int, 16> ActualMask = SI->getShuffleMask();
	if (!matchMask(Mask, ActualMask))
	return false;

	return true;
	}

	static bool matchPairwiseReductionAtLevel(const BinaryOperator *BinOp,
	unsigned Level, unsigned NumLevels) {
	// Match one level of pairwise operations.
	// %rdx.shuf.0.0 = shufflevector <4 x float> %rdx, <4 x float> undef,
	// <4 x i32> <i32 0, i32 2 , i32 undef, i32 undef>
	// %rdx.shuf.0.1 = shufflevector <4 x float> %rdx, <4 x float> undef,
	// <4 x i32> <i32 1, i32 3, i32 undef, i32 undef>
	// %bin.rdx.0 = fadd <4 x float> %rdx.shuf.0.0, %rdx.shuf.0.1
	if (BinOp == 0)
	return false;

	assert(BinOp->getType()->isVectorTy() && "Expecting a vector type");

	unsigned Opcode = BinOp->getOpcode();
	Value *L = BinOp->getOperand(0);
	Value *R = BinOp->getOperand(1);

	ShuffleVectorInst *LS = dyn_cast<ShuffleVectorInst>(L);
	if (!LS && Level)
	return false;
	ShuffleVectorInst *RS = dyn_cast<ShuffleVectorInst>(R);
	if (!RS && Level)
	return false;

	// On level 0 we can omit one shufflevector instruction.
	if (!Level && !RS && !LS)
	return false;

	// Shuffle inputs must match.
	Value *NextLevelOpL = LS ? LS->getOperand(0) : 0;
	Value *NextLevelOpR = RS ? RS->getOperand(0) : 0;
	Value *NextLevelOp = 0;
	if (NextLevelOpR && NextLevelOpL) {
	// If we have two shuffles their operands must match.
	if (NextLevelOpL != NextLevelOpR)
	return false;

	NextLevelOp = NextLevelOpL;
	} else if (Level == 0 && (NextLevelOpR \|\| NextLevelOpL)) {
	// On the first level we can omit the shufflevector <0, undef,...>. So the
	// input to the other shufflevector <1, undef> must match with one of the
	// inputs to the current binary operation.
	// Example:
	// %NextLevelOpL = shufflevector %R, <1, undef ...>
	// %BinOp = fadd %NextLevelOpL, %R
	if (NextLevelOpL && NextLevelOpL != R)
	return false;
	else if (NextLevelOpR && NextLevelOpR != L)
	return false;

	NextLevelOp = NextLevelOpL ? R : L;
	} else
	return false;

	// Check that the next levels binary operation exists and matches with the
	// current one.
	BinaryOperator *NextLevelBinOp = 0;
	if (Level + 1 != NumLevels) {
	if (!(NextLevelBinOp = dyn_cast<BinaryOperator>(NextLevelOp)))
	return false;
	else if (NextLevelBinOp->getOpcode() != Opcode)
	return false;
	}

	// Shuffle mask for pairwise operation must match.
	if (matchPairwiseShuffleMask(LS, true, Level)) {
	if (!matchPairwiseShuffleMask(RS, false, Level))
	return false;
	} else if (matchPairwiseShuffleMask(RS, true, Level)) {
	if (!matchPairwiseShuffleMask(LS, false, Level))
	return false;
	} else
	return false;

	if (++Level == NumLevels)
	return true;

	// Match next level.
	return matchPairwiseReductionAtLevel(NextLevelBinOp, Level, NumLevels);
	}

	static bool matchPairwiseReduction(const ExtractElementInst *ReduxRoot,
	unsigned &Opcode, Type *&Ty) {
	if (!EnableReduxCost)
	return false;

	// Need to extract the first element.
	ConstantInt *CI = dyn_cast<ConstantInt>(ReduxRoot->getOperand(1));
	unsigned Idx = ~0u;
	if (CI)
	Idx = CI->getZExtValue();
	if (Idx != 0)
	return false;

	BinaryOperator *RdxStart = dyn_cast<BinaryOperator>(ReduxRoot->getOperand(0));
	if (!RdxStart)
	return false;

	Type *VecTy = ReduxRoot->getOperand(0)->getType();
	unsigned NumVecElems = VecTy->getVectorNumElements();
	if (!isPowerOf2_32(NumVecElems))
	return false;

	// We look for a sequence of shuffle,shuffle,add triples like the following
	// that builds a pairwise reduction tree.
	//
	// (X0, X1, X2, X3)
	// (X0 + X1, X2 + X3, undef, undef)
	// ((X0 + X1) + (X2 + X3), undef, undef, undef)
	//
	// %rdx.shuf.0.0 = shufflevector <4 x float> %rdx, <4 x float> undef,
	// <4 x i32> <i32 0, i32 2 , i32 undef, i32 undef>
	// %rdx.shuf.0.1 = shufflevector <4 x float> %rdx, <4 x float> undef,
	// <4 x i32> <i32 1, i32 3, i32 undef, i32 undef>
	// %bin.rdx.0 = fadd <4 x float> %rdx.shuf.0.0, %rdx.shuf.0.1
	// %rdx.shuf.1.0 = shufflevector <4 x float> %bin.rdx.0, <4 x float> undef,
	// <4 x i32> <i32 0, i32 undef, i32 undef, i32 undef>
	// %rdx.shuf.1.1 = shufflevector <4 x float> %bin.rdx.0, <4 x float> undef,
	// <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
	// %bin.rdx8 = fadd <4 x float> %rdx.shuf.1.0, %rdx.shuf.1.1
	// %r = extractelement <4 x float> %bin.rdx8, i32 0
	if (!matchPairwiseReductionAtLevel(RdxStart, 0, Log2_32(NumVecElems)))
	return false;

	Opcode = RdxStart->getOpcode();
	Ty = VecTy;

	return true;
	}

	static std::pair<Value , ShuffleVectorInst >
	getShuffleAndOtherOprd(BinaryOperator *B) {

	Value *L = B->getOperand(0);
	Value *R = B->getOperand(1);
	ShuffleVectorInst *S = 0;

	if ((S = dyn_cast<ShuffleVectorInst>(L)))
	return std::make_pair(R, S);

	S = dyn_cast<ShuffleVectorInst>(R);
	return std::make_pair(L, S);
	}

	static bool matchVectorSplittingReduction(const ExtractElementInst *ReduxRoot,
	unsigned &Opcode, Type *&Ty) {
	if (!EnableReduxCost)
	return false;

	// Need to extract the first element.
	ConstantInt *CI = dyn_cast<ConstantInt>(ReduxRoot->getOperand(1));
	unsigned Idx = ~0u;
	if (CI)
	Idx = CI->getZExtValue();
	if (Idx != 0)
	return false;

	BinaryOperator *RdxStart = dyn_cast<BinaryOperator>(ReduxRoot->getOperand(0));
	if (!RdxStart)
	return false;
	unsigned RdxOpcode = RdxStart->getOpcode();

	Type *VecTy = ReduxRoot->getOperand(0)->getType();
	unsigned NumVecElems = VecTy->getVectorNumElements();
	if (!isPowerOf2_32(NumVecElems))
	return false;

	// We look for a sequence of shuffles and adds like the following matching one
	// fadd, shuffle vector pair at a time.
	//
	// %rdx.shuf = shufflevector <4 x float> %rdx, <4 x float> undef,
	// <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
	// %bin.rdx = fadd <4 x float> %rdx, %rdx.shuf
	// %rdx.shuf7 = shufflevector <4 x float> %bin.rdx, <4 x float> undef,
	// <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
	// %bin.rdx8 = fadd <4 x float> %bin.rdx, %rdx.shuf7
	// %r = extractelement <4 x float> %bin.rdx8, i32 0

	unsigned MaskStart = 1;
	Value *RdxOp = RdxStart;
	SmallVector<int, 32> ShuffleMask(NumVecElems, 0);
	unsigned NumVecElemsRemain = NumVecElems;
	while (NumVecElemsRemain - 1) {
	// Check for the right reduction operation.
	BinaryOperator *BinOp;
	if (!(BinOp = dyn_cast<BinaryOperator>(RdxOp)))
	return false;
	if (BinOp->getOpcode() != RdxOpcode)
	return false;

	Value *NextRdxOp;
	ShuffleVectorInst *Shuffle;
	tie(NextRdxOp, Shuffle) = getShuffleAndOtherOprd(BinOp);

	// Check the current reduction operation and the shuffle use the same value.
	if (Shuffle == 0)
	return false;
	if (Shuffle->getOperand(0) != NextRdxOp)
	return false;

	// Check that shuffle masks matches.
	for (unsigned j = 0; j != MaskStart; ++j)
	ShuffleMask[j] = MaskStart + j;
	// Fill the rest of the mask with -1 for undef.
	std::fill(&ShuffleMask[MaskStart], ShuffleMask.end(), -1);

	SmallVector<int, 16> Mask = Shuffle->getShuffleMask();
	if (!matchMask(ShuffleMask, Mask))
	return false;

	RdxOp = NextRdxOp;
	NumVecElemsRemain /= 2;
	MaskStart *= 2;
	}

	Opcode = RdxOpcode;
	Ty = VecTy;
	return true;
	}

	unsigned CostModelAnalysis::getInstructionCost(const Instruction *I) const {
	if (!TTI)
	return -1;

	switch (I->getOpcode()) {
	case Instruction::GetElementPtr:{
	Type *ValTy = I->getOperand(0)->getType()->getPointerElementType();
	return TTI->getAddressComputationCost(ValTy);
	}

	case Instruction::Ret:
	case Instruction::PHI:
	case Instruction::Br: {
	return TTI->getCFInstrCost(I->getOpcode());
	}
	case Instruction::Add:
	case Instruction::FAdd:
	case Instruction::Sub:
	case Instruction::FSub:
	case Instruction::Mul:
	case Instruction::FMul:
	case Instruction::UDiv:
	case Instruction::SDiv:
	case Instruction::FDiv:
	case Instruction::URem:
	case Instruction::SRem:
	case Instruction::FRem:
	case Instruction::Shl:
	case Instruction::LShr:
	case Instruction::AShr:
	case Instruction::And:
	case Instruction::Or:
	case Instruction::Xor: {
	TargetTransformInfo::OperandValueKind Op1VK =
	getOperandInfo(I->getOperand(0));
	TargetTransformInfo::OperandValueKind Op2VK =
	getOperandInfo(I->getOperand(1));
	return TTI->getArithmeticInstrCost(I->getOpcode(), I->getType(), Op1VK,
	Op2VK);
	}
	case Instruction::Select: {
	const SelectInst *SI = cast<SelectInst>(I);
	Type *CondTy = SI->getCondition()->getType();
	return TTI->getCmpSelInstrCost(I->getOpcode(), I->getType(), CondTy);
	}
	case Instruction::ICmp:
	case Instruction::FCmp: {
	Type *ValTy = I->getOperand(0)->getType();
	return TTI->getCmpSelInstrCost(I->getOpcode(), ValTy);
	}
	case Instruction::Store: {
	const StoreInst *SI = cast<StoreInst>(I);
	Type *ValTy = SI->getValueOperand()->getType();
	return TTI->getMemoryOpCost(I->getOpcode(), ValTy,
	SI->getAlignment(),
	SI->getPointerAddressSpace());
	}
	case Instruction::Load: {
	const LoadInst *LI = cast<LoadInst>(I);
	return TTI->getMemoryOpCost(I->getOpcode(), I->getType(),
	LI->getAlignment(),
	LI->getPointerAddressSpace());
	}
	case Instruction::ZExt:
	case Instruction::SExt:
	case Instruction::FPToUI:
	case Instruction::FPToSI:
	case Instruction::FPExt:
	case Instruction::PtrToInt:
	case Instruction::IntToPtr:
	case Instruction::SIToFP:
	case Instruction::UIToFP:
	case Instruction::Trunc:
	case Instruction::FPTrunc:
	case Instruction::BitCast: {
	Type *SrcTy = I->getOperand(0)->getType();
	return TTI->getCastInstrCost(I->getOpcode(), I->getType(), SrcTy);
	}
	case Instruction::ExtractElement: {
	const ExtractElementInst * EEI = cast<ExtractElementInst>(I);
	ConstantInt *CI = dyn_cast<ConstantInt>(I->getOperand(1));
	unsigned Idx = -1;
	if (CI)
	Idx = CI->getZExtValue();

	// Try to match a reduction sequence (series of shufflevector and vector
	// adds followed by a extractelement).
	unsigned ReduxOpCode;
	Type *ReduxType;

	if (matchVectorSplittingReduction(EEI, ReduxOpCode, ReduxType))
	return TTI->getReductionCost(ReduxOpCode, ReduxType, false);
	else if (matchPairwiseReduction(EEI, ReduxOpCode, ReduxType))
	return TTI->getReductionCost(ReduxOpCode, ReduxType, true);

	return TTI->getVectorInstrCost(I->getOpcode(),
	EEI->getOperand(0)->getType(), Idx);
	}
	case Instruction::InsertElement: {
	const InsertElementInst * IE = cast<InsertElementInst>(I);
	ConstantInt *CI = dyn_cast<ConstantInt>(IE->getOperand(2));
	unsigned Idx = -1;
	if (CI)
	Idx = CI->getZExtValue();
	return TTI->getVectorInstrCost(I->getOpcode(),
	IE->getType(), Idx);
	}
	case Instruction::ShuffleVector: {
	const ShuffleVectorInst *Shuffle = cast<ShuffleVectorInst>(I);
	Type *VecTypOp0 = Shuffle->getOperand(0)->getType();
	unsigned NumVecElems = VecTypOp0->getVectorNumElements();
	SmallVector<int, 16> Mask = Shuffle->getShuffleMask();

	if (NumVecElems == Mask.size() && isReverseVectorMask(Mask))
	return TTI->getShuffleCost(TargetTransformInfo::SK_Reverse, VecTypOp0, 0,
	0);
	return -1;
	}
	case Instruction::Call:
	if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
	SmallVector<Type*, 4> Tys;
	for (unsigned J = 0, JE = II->getNumArgOperands(); J != JE; ++J)
	Tys.push_back(II->getArgOperand(J)->getType());

	return TTI->getIntrinsicInstrCost(II->getIntrinsicID(), II->getType(),
	Tys);
	}
	return -1;
	default:
	// We don't have any information on this instruction.
	return -1;
	}
	}

	void CostModelAnalysis::print(raw_ostream &OS, const Module*) const {
	if (!F)
	return;

	for (Function::iterator B = F->begin(), BE = F->end(); B != BE; ++B) {
	for (BasicBlock::iterator it = B->begin(), e = B->end(); it != e; ++it) {
	Instruction *Inst = it;
	unsigned Cost = getInstructionCost(Inst);
	if (Cost != (unsigned)-1)
	OS << "Cost Model: Found an estimated cost of " << Cost;
	else
	OS << "Cost Model: Unknown cost";

	OS << " for instruction: "<< *Inst << "\n";
	}
	}
	}