include/llvm/Analysis/VectorUtils.h - llvm - Git at Google

 //===- llvm/Analysis/VectorUtils.h - Vector utilities -----------*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
 // This file is distributed under the University of Illinois Open Source
 // License. See LICENSE.TXT for details.
 //
 //===----------------------------------------------------------------------===//
 //
 // This file defines some vectorizer utilities.
 //
 //===----------------------------------------------------------------------===//

 #ifndef LLVM_ANALYSIS_VECTORUTILS_H
 #define LLVM_ANALYSIS_VECTORUTILS_H

 #include "llvm/ADT/MapVector.h"
 #include "llvm/Analysis/TargetLibraryInfo.h"
 #include "llvm/IR/IRBuilder.h"

 namespace llvm {

 template <typename T> class ArrayRef;
 class DemandedBits;
 class GetElementPtrInst;
 class Loop;
 class ScalarEvolution;
 class TargetTransformInfo;
 class Type;
 class Value;

 namespace Intrinsic {
 enum ID : unsigned;
 }

 /// \brief Identify if the intrinsic is trivially vectorizable.
 /// This method returns true if the intrinsic's argument types are all
 /// scalars for the scalar form of the intrinsic and all vectors for
 /// the vector form of the intrinsic.
 bool isTriviallyVectorizable(Intrinsic::ID ID);

 /// \brief Identifies if the intrinsic has a scalar operand. It checks for
 /// ctlz,cttz and powi special intrinsics whose argument is scalar.
 bool hasVectorInstrinsicScalarOpd(Intrinsic::ID ID, unsigned ScalarOpdIdx);

 /// \brief Returns intrinsic ID for call.
 /// For the input call instruction it finds mapping intrinsic and returns
 /// its intrinsic ID, in case it does not found it return not_intrinsic.
 Intrinsic::ID getVectorIntrinsicIDForCall(const CallInst *CI,
                                           const TargetLibraryInfo *TLI);

 /// \brief Find the operand of the GEP that should be checked for consecutive
 /// stores. This ignores trailing indices that have no effect on the final
 /// pointer.
 unsigned getGEPInductionOperand(const GetElementPtrInst *Gep);

 /// \brief If the argument is a GEP, then returns the operand identified by
 /// getGEPInductionOperand. However, if there is some other non-loop-invariant
 /// operand, it returns that instead.
 Value *stripGetElementPtr(Value *Ptr, ScalarEvolution *SE, Loop *Lp);

 /// \brief If a value has only one user that is a CastInst, return it.
 Value *getUniqueCastUse(Value *Ptr, Loop *Lp, Type *Ty);

 /// \brief Get the stride of a pointer access in a loop. Looks for symbolic
 /// strides "a[i*stride]". Returns the symbolic stride, or null otherwise.
 Value *getStrideFromPointer(Value *Ptr, ScalarEvolution *SE, Loop *Lp);

 /// \brief Given a vector and an element number, see if the scalar value is
 /// already around as a register, for example if it were inserted then extracted
 /// from the vector.
 Value *findScalarElement(Value *V, unsigned EltNo);

 /// \brief Get splat value if the input is a splat vector or return nullptr.
 /// The value may be extracted from a splat constants vector or from
 /// a sequence of instructions that broadcast a single value into a vector.
 const Value *getSplatValue(const Value *V);

 /// \brief Compute a map of integer instructions to their minimum legal type
 /// size.
 ///
 /// C semantics force sub-int-sized values (e.g. i8, i16) to be promoted to int
 /// type (e.g. i32) whenever arithmetic is performed on them.
 ///
 /// For targets with native i8 or i16 operations, usually InstCombine can shrink
 /// the arithmetic type down again. However InstCombine refuses to create
 /// illegal types, so for targets without i8 or i16 registers, the lengthening
 /// and shrinking remains.
 ///
 /// Most SIMD ISAs (e.g. NEON) however support vectors of i8 or i16 even when
 /// their scalar equivalents do not, so during vectorization it is important to
 /// remove these lengthens and truncates when deciding the profitability of
 /// vectorization.
 ///
 /// This function analyzes the given range of instructions and determines the
 /// minimum type size each can be converted to. It attempts to remove or
 /// minimize type size changes across each def-use chain, so for example in the
 /// following code:
 ///
 ///   %1 = load i8, i8*
 ///   %2 = add i8 %1, 2
 ///   %3 = load i16, i16*
 ///   %4 = zext i8 %2 to i32
 ///   %5 = zext i16 %3 to i32
 ///   %6 = add i32 %4, %5
 ///   %7 = trunc i32 %6 to i16
 ///
 /// Instruction %6 must be done at least in i16, so computeMinimumValueSizes
 /// will return: {%1: 16, %2: 16, %3: 16, %4: 16, %5: 16, %6: 16, %7: 16}.
 ///
 /// If the optional TargetTransformInfo is provided, this function tries harder
 /// to do less work by only looking at illegal types.
 MapVector<Instruction*, uint64_t>
 computeMinimumValueSizes(ArrayRef<BasicBlock*> Blocks,
                          DemandedBits &DB,
                          const TargetTransformInfo *TTI=nullptr);

 /// Specifically, let Kinds = [MD_tbaa, MD_alias_scope, MD_noalias, MD_fpmath,
 /// MD_nontemporal].  For K in Kinds, we get the MDNode for K from each of the
 /// elements of VL, compute their "intersection" (i.e., the most generic
 /// metadata value that covers all of the individual values), and set I's
 /// metadata for M equal to the intersection value.
 ///
 /// This function always sets a (possibly null) value for each K in Kinds.
 Instruction *propagateMetadata(Instruction *I, ArrayRef<Value *> VL);

 /// \brief Create an interleave shuffle mask.
 ///
 /// This function creates a shuffle mask for interleaving \p NumVecs vectors of
 /// vectorization factor \p VF into a single wide vector. The mask is of the
 /// form:
 ///
 ///   <0, VF, VF * 2, ..., VF * (NumVecs - 1), 1, VF + 1, VF * 2 + 1, ...>
 ///
 /// For example, the mask for VF = 4 and NumVecs = 2 is:
 ///
 ///   <0, 4, 1, 5, 2, 6, 3, 7>.
 Constant *createInterleaveMask(IRBuilder<> &Builder, unsigned VF,
                                unsigned NumVecs);

 /// \brief Create a stride shuffle mask.
 ///
 /// This function creates a shuffle mask whose elements begin at \p Start and
 /// are incremented by \p Stride. The mask can be used to deinterleave an
 /// interleaved vector into separate vectors of vectorization factor \p VF. The
 /// mask is of the form:
 ///
 ///   <Start, Start + Stride, ..., Start + Stride * (VF - 1)>
 ///
 /// For example, the mask for Start = 0, Stride = 2, and VF = 4 is:
 ///
 ///   <0, 2, 4, 6>
 Constant *createStrideMask(IRBuilder<> &Builder, unsigned Start,
                            unsigned Stride, unsigned VF);

 /// \brief Create a sequential shuffle mask.
 ///
 /// This function creates shuffle mask whose elements are sequential and begin
 /// at \p Start.  The mask contains \p NumInts integers and is padded with \p
 /// NumUndefs undef values. The mask is of the form:
 ///
 ///   <Start, Start + 1, ... Start + NumInts - 1, undef_1, ... undef_NumUndefs>
 ///
 /// For example, the mask for Start = 0, NumInsts = 4, and NumUndefs = 4 is:
 ///
 ///   <0, 1, 2, 3, undef, undef, undef, undef>
 Constant *createSequentialMask(IRBuilder<> &Builder, unsigned Start,
                                unsigned NumInts, unsigned NumUndefs);

 /// \brief Concatenate a list of vectors.
 ///
 /// This function generates code that concatenate the vectors in \p Vecs into a
 /// single large vector. The number of vectors should be greater than one, and
 /// their element types should be the same. The number of elements in the
 /// vectors should also be the same; however, if the last vector has fewer
 /// elements, it will be padded with undefs.
 Value *concatenateVectors(IRBuilder<> &Builder, ArrayRef<Value *> Vecs);

 } // llvm namespace

 #endif
	//===- llvm/Analysis/VectorUtils.h - Vector utilities ------------ C++ --===//
	//
	// The LLVM Compiler Infrastructure
	//
	// This file is distributed under the University of Illinois Open Source
	// License. See LICENSE.TXT for details.
	//
	//===----------------------------------------------------------------------===//
	//
	// This file defines some vectorizer utilities.
	//
	//===----------------------------------------------------------------------===//

	#ifndef LLVM_ANALYSIS_VECTORUTILS_H
	#define LLVM_ANALYSIS_VECTORUTILS_H

	#include "llvm/ADT/MapVector.h"
	#include "llvm/Analysis/TargetLibraryInfo.h"
	#include "llvm/IR/IRBuilder.h"

	namespace llvm {

	template <typename T> class ArrayRef;
	class DemandedBits;
	class GetElementPtrInst;
	class Loop;
	class ScalarEvolution;
	class TargetTransformInfo;
	class Type;
	class Value;

	namespace Intrinsic {
	enum ID : unsigned;
	}

	/// \brief Identify if the intrinsic is trivially vectorizable.
	/// This method returns true if the intrinsic's argument types are all
	/// scalars for the scalar form of the intrinsic and all vectors for
	/// the vector form of the intrinsic.
	bool isTriviallyVectorizable(Intrinsic::ID ID);

	/// \brief Identifies if the intrinsic has a scalar operand. It checks for
	/// ctlz,cttz and powi special intrinsics whose argument is scalar.
	bool hasVectorInstrinsicScalarOpd(Intrinsic::ID ID, unsigned ScalarOpdIdx);

	/// \brief Returns intrinsic ID for call.
	/// For the input call instruction it finds mapping intrinsic and returns
	/// its intrinsic ID, in case it does not found it return not_intrinsic.
	Intrinsic::ID getVectorIntrinsicIDForCall(const CallInst *CI,
	const TargetLibraryInfo *TLI);

	/// \brief Find the operand of the GEP that should be checked for consecutive
	/// stores. This ignores trailing indices that have no effect on the final
	/// pointer.
	unsigned getGEPInductionOperand(const GetElementPtrInst *Gep);

	/// \brief If the argument is a GEP, then returns the operand identified by
	/// getGEPInductionOperand. However, if there is some other non-loop-invariant
	/// operand, it returns that instead.
	Value stripGetElementPtr(Value Ptr, ScalarEvolution SE, Loop Lp);

	/// \brief If a value has only one user that is a CastInst, return it.
	Value getUniqueCastUse(Value Ptr, Loop Lp, Type Ty);

	/// \brief Get the stride of a pointer access in a loop. Looks for symbolic
	/// strides "a[i*stride]". Returns the symbolic stride, or null otherwise.
	Value getStrideFromPointer(Value Ptr, ScalarEvolution SE, Loop Lp);

	/// \brief Given a vector and an element number, see if the scalar value is
	/// already around as a register, for example if it were inserted then extracted
	/// from the vector.
	Value findScalarElement(Value V, unsigned EltNo);

	/// \brief Get splat value if the input is a splat vector or return nullptr.
	/// The value may be extracted from a splat constants vector or from
	/// a sequence of instructions that broadcast a single value into a vector.
	const Value getSplatValue(const Value V);

	/// \brief Compute a map of integer instructions to their minimum legal type
	/// size.
	///
	/// C semantics force sub-int-sized values (e.g. i8, i16) to be promoted to int
	/// type (e.g. i32) whenever arithmetic is performed on them.
	///
	/// For targets with native i8 or i16 operations, usually InstCombine can shrink
	/// the arithmetic type down again. However InstCombine refuses to create
	/// illegal types, so for targets without i8 or i16 registers, the lengthening
	/// and shrinking remains.
	///
	/// Most SIMD ISAs (e.g. NEON) however support vectors of i8 or i16 even when
	/// their scalar equivalents do not, so during vectorization it is important to
	/// remove these lengthens and truncates when deciding the profitability of
	/// vectorization.
	///
	/// This function analyzes the given range of instructions and determines the
	/// minimum type size each can be converted to. It attempts to remove or
	/// minimize type size changes across each def-use chain, so for example in the
	/// following code:
	///
	/// %1 = load i8, i8*
	/// %2 = add i8 %1, 2
	/// %3 = load i16, i16*
	/// %4 = zext i8 %2 to i32
	/// %5 = zext i16 %3 to i32
	/// %6 = add i32 %4, %5
	/// %7 = trunc i32 %6 to i16
	///
	/// Instruction %6 must be done at least in i16, so computeMinimumValueSizes
	/// will return: {%1: 16, %2: 16, %3: 16, %4: 16, %5: 16, %6: 16, %7: 16}.
	///
	/// If the optional TargetTransformInfo is provided, this function tries harder
	/// to do less work by only looking at illegal types.
	MapVector<Instruction*, uint64_t>
	computeMinimumValueSizes(ArrayRef<BasicBlock*> Blocks,
	DemandedBits &DB,
	const TargetTransformInfo *TTI=nullptr);

	/// Specifically, let Kinds = [MD_tbaa, MD_alias_scope, MD_noalias, MD_fpmath,
	/// MD_nontemporal]. For K in Kinds, we get the MDNode for K from each of the
	/// elements of VL, compute their "intersection" (i.e., the most generic
	/// metadata value that covers all of the individual values), and set I's
	/// metadata for M equal to the intersection value.
	///
	/// This function always sets a (possibly null) value for each K in Kinds.
	Instruction propagateMetadata(Instruction I, ArrayRef<Value *> VL);

	/// \brief Create an interleave shuffle mask.
	///
	/// This function creates a shuffle mask for interleaving \p NumVecs vectors of
	/// vectorization factor \p VF into a single wide vector. The mask is of the
	/// form:
	///
	/// <0, VF, VF * 2, ..., VF * (NumVecs - 1), 1, VF + 1, VF * 2 + 1, ...>
	///
	/// For example, the mask for VF = 4 and NumVecs = 2 is:
	///
	/// <0, 4, 1, 5, 2, 6, 3, 7>.
	Constant *createInterleaveMask(IRBuilder<> &Builder, unsigned VF,
	unsigned NumVecs);

	/// \brief Create a stride shuffle mask.
	///
	/// This function creates a shuffle mask whose elements begin at \p Start and
	/// are incremented by \p Stride. The mask can be used to deinterleave an
	/// interleaved vector into separate vectors of vectorization factor \p VF. The
	/// mask is of the form:
	///
	/// <Start, Start + Stride, ..., Start + Stride * (VF - 1)>
	///
	/// For example, the mask for Start = 0, Stride = 2, and VF = 4 is:
	///
	/// <0, 2, 4, 6>
	Constant *createStrideMask(IRBuilder<> &Builder, unsigned Start,
	unsigned Stride, unsigned VF);

	/// \brief Create a sequential shuffle mask.
	///
	/// This function creates shuffle mask whose elements are sequential and begin
	/// at \p Start. The mask contains \p NumInts integers and is padded with \p
	/// NumUndefs undef values. The mask is of the form:
	///
	/// <Start, Start + 1, ... Start + NumInts - 1, undef_1, ... undef_NumUndefs>
	///
	/// For example, the mask for Start = 0, NumInsts = 4, and NumUndefs = 4 is:
	///
	/// <0, 1, 2, 3, undef, undef, undef, undef>
	Constant *createSequentialMask(IRBuilder<> &Builder, unsigned Start,
	unsigned NumInts, unsigned NumUndefs);

	/// \brief Concatenate a list of vectors.
	///
	/// This function generates code that concatenate the vectors in \p Vecs into a
	/// single large vector. The number of vectors should be greater than one, and
	/// their element types should be the same. The number of elements in the
	/// vectors should also be the same; however, if the last vector has fewer
	/// elements, it will be padded with undefs.
	Value concatenateVectors(IRBuilder<> &Builder, ArrayRef<Value > Vecs);

	} // llvm namespace

	#endif