llvm/lib/Target/AMDGPU/AMDGPUPropagateAttributes.cpp - llvm-project - Git at Google

 //===--- AMDGPUPropagateAttributes.cpp --------------------------*- C++ -*-===//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
 /// \file
 /// \brief This pass propagates attributes from kernels to the non-entry
 /// functions. Most of the library functions were not compiled for specific ABI,
 /// yet will be correctly compiled if proper attributes are propagated from the
 /// caller.
 ///
 /// The pass analyzes call graph and propagates ABI target features through the
 /// call graph.
 ///
 /// It can run in two modes: as a function or module pass. A function pass
 /// simply propagates attributes. A module pass clones functions if there are
 /// callers with different ABI. If a function is cloned all call sites will
 /// be updated to use a correct clone.
 ///
 /// A function pass is limited in functionality but can run early in the
 /// pipeline. A module pass is more powerful but has to run late, so misses
 /// library folding opportunities.
 //
 //===----------------------------------------------------------------------===//

 #include "AMDGPU.h"
 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
 #include "Utils/AMDGPUBaseInfo.h"
 #include "llvm/ADT/SmallSet.h"
 #include "llvm/CodeGen/TargetPassConfig.h"
 #include "llvm/CodeGen/TargetSubtargetInfo.h"
 #include "llvm/IR/InstrTypes.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Transforms/Utils/Cloning.h"

 #define DEBUG_TYPE "amdgpu-propagate-attributes"

 using namespace llvm;

 namespace llvm {
 extern const SubtargetFeatureKV AMDGPUFeatureKV[AMDGPU::NumSubtargetFeatures-1];
 }

 namespace {

 // Target features to propagate.
 static constexpr const FeatureBitset TargetFeatures = {
   AMDGPU::FeatureWavefrontSize16,
   AMDGPU::FeatureWavefrontSize32,
   AMDGPU::FeatureWavefrontSize64
 };

 // Attributes to propagate.
 // TODO: Support conservative min/max merging instead of cloning.
 static constexpr const char *AttributeNames[] = {"amdgpu-waves-per-eu"};

 static constexpr unsigned NumAttr =
   sizeof(AttributeNames) / sizeof(AttributeNames[0]);

 class AMDGPUPropagateAttributes {

   class FnProperties {
   private:
     explicit FnProperties(const FeatureBitset &&FB) : Features(FB) {}

   public:
     explicit FnProperties(const TargetMachine &TM, const Function &F) {
       Features = TM.getSubtargetImpl(F)->getFeatureBits();

       for (unsigned I = 0; I < NumAttr; ++I)
         if (F.hasFnAttribute(AttributeNames[I]))
           Attributes[I] = F.getFnAttribute(AttributeNames[I]);
     }

     bool operator == (const FnProperties &Other) const {
       if ((Features & TargetFeatures) != (Other.Features & TargetFeatures))
         return false;
       for (unsigned I = 0; I < NumAttr; ++I)
         if (Attributes[I] != Other.Attributes[I])
           return false;
       return true;
     }

     FnProperties adjustToCaller(const FnProperties &CallerProps) const {
       FnProperties New((Features & ~TargetFeatures) | CallerProps.Features);
       for (unsigned I = 0; I < NumAttr; ++I)
         New.Attributes[I] = CallerProps.Attributes[I];
       return New;
     }

     FeatureBitset Features;
     Optional<Attribute> Attributes[NumAttr];
   };

   class Clone {
   public:
     Clone(const FnProperties &Props, Function *OrigF, Function *NewF) :
       Properties(Props), OrigF(OrigF), NewF(NewF) {}

     FnProperties Properties;
     Function *OrigF;
     Function *NewF;
   };

   const TargetMachine *TM;

   // Clone functions as needed or just set attributes.
   bool AllowClone;

   // Option propagation roots.
   SmallSet<Function *, 32> Roots;

   // Clones of functions with their attributes.
   SmallVector<Clone, 32> Clones;

   // Find a clone with required features.
   Function *findFunction(const FnProperties &PropsNeeded,
                          Function *OrigF);

   // Clone function \p F and set \p NewProps on the clone.
   // Cole takes the name of original function.
   Function *cloneWithProperties(Function &F, const FnProperties &NewProps);

   // Set new function's features in place.
   void setFeatures(Function &F, const FeatureBitset &NewFeatures);

   // Set new function's attributes in place.
   void setAttributes(Function &F, const ArrayRef<Optional<Attribute>> NewAttrs);

   std::string getFeatureString(const FeatureBitset &Features) const;

   // Propagate attributes from Roots.
   bool process();

 public:
   AMDGPUPropagateAttributes(const TargetMachine *TM, bool AllowClone) :
     TM(TM), AllowClone(AllowClone) {}

   // Use F as a root and propagate its attributes.
   bool process(Function &F);

   // Propagate attributes starting from kernel functions.
   bool process(Module &M);
 };

 // Allows to propagate attributes early, but no cloning is allowed as it must
 // be a function pass to run before any optimizations.
 // TODO: We shall only need a one instance of module pass, but that needs to be
 // in the linker pipeline which is currently not possible.
 class AMDGPUPropagateAttributesEarly : public FunctionPass {
   const TargetMachine *TM;

 public:
   static char ID; // Pass identification

   AMDGPUPropagateAttributesEarly(const TargetMachine *TM = nullptr) :
     FunctionPass(ID), TM(TM) {
     initializeAMDGPUPropagateAttributesEarlyPass(
       *PassRegistry::getPassRegistry());
   }

   bool runOnFunction(Function &F) override;
 };

 // Allows to propagate attributes with cloning but does that late in the
 // pipeline.
 class AMDGPUPropagateAttributesLate : public ModulePass {
   const TargetMachine *TM;

 public:
   static char ID; // Pass identification

   AMDGPUPropagateAttributesLate(const TargetMachine *TM = nullptr) :
     ModulePass(ID), TM(TM) {
     initializeAMDGPUPropagateAttributesLatePass(
       *PassRegistry::getPassRegistry());
   }

   bool runOnModule(Module &M) override;
 };

 }  // end anonymous namespace.

 char AMDGPUPropagateAttributesEarly::ID = 0;
 char AMDGPUPropagateAttributesLate::ID = 0;

 INITIALIZE_PASS(AMDGPUPropagateAttributesEarly,
                 "amdgpu-propagate-attributes-early",
                 "Early propagate attributes from kernels to functions",
                 false, false)
 INITIALIZE_PASS(AMDGPUPropagateAttributesLate,
                 "amdgpu-propagate-attributes-late",
                 "Late propagate attributes from kernels to functions",
                 false, false)

 Function *
 AMDGPUPropagateAttributes::findFunction(const FnProperties &PropsNeeded,
                                         Function *OrigF) {
   // TODO: search for clone's clones.
   for (Clone &C : Clones)
     if (C.OrigF == OrigF && PropsNeeded == C.Properties)
       return C.NewF;

   return nullptr;
 }

 bool AMDGPUPropagateAttributes::process(Module &M) {
   for (auto &F : M.functions())
     if (AMDGPU::isKernel(F.getCallingConv()))
       Roots.insert(&F);

   return Roots.empty() ? false : process();
 }

 bool AMDGPUPropagateAttributes::process(Function &F) {
   Roots.insert(&F);
   return process();
 }

 bool AMDGPUPropagateAttributes::process() {
   bool Changed = false;
   SmallSet<Function *, 32> NewRoots;
   SmallSet<Function *, 32> Replaced;

   assert(!Roots.empty());
   Module &M = *(*Roots.begin())->getParent();

   do {
     Roots.insert(NewRoots.begin(), NewRoots.end());
     NewRoots.clear();

     for (auto &F : M.functions()) {
       if (F.isDeclaration())
         continue;

       const FnProperties CalleeProps(*TM, F);
       SmallVector<std::pair<CallBase *, Function *>, 32> ToReplace;
       SmallSet<CallBase *, 32> Visited;

       for (User *U : F.users()) {
         Instruction *I = dyn_cast<Instruction>(U);
         if (!I)
           continue;
         CallBase *CI = dyn_cast<CallBase>(I);
         // Only propagate attributes if F is the called function. Specifically,
         // do not propagate attributes if F is passed as an argument.
         // FIXME: handle bitcasted callee, e.g.
         // %retval = call i8* bitcast (i32* ()* @f to i8* ()*)()
         if (!CI || CI->getCalledOperand() != &F)
           continue;
         Function *Caller = CI->getCaller();
         if (!Caller || !Visited.insert(CI).second)
           continue;
         if (!Roots.count(Caller) && !NewRoots.count(Caller))
           continue;

         const FnProperties CallerProps(*TM, *Caller);

         if (CalleeProps == CallerProps) {
           if (!Roots.count(&F))
             NewRoots.insert(&F);
           continue;
         }

         Function *NewF = findFunction(CallerProps, &F);
         if (!NewF) {
           const FnProperties NewProps = CalleeProps.adjustToCaller(CallerProps);
           if (!AllowClone) {
             // This may set different features on different iterations if
             // there is a contradiction in callers' attributes. In this case
             // we rely on a second pass running on Module, which is allowed
             // to clone.
             setFeatures(F, NewProps.Features);
             setAttributes(F, NewProps.Attributes);
             NewRoots.insert(&F);
             Changed = true;
             break;
           }

           NewF = cloneWithProperties(F, NewProps);
           Clones.push_back(Clone(CallerProps, &F, NewF));
           NewRoots.insert(NewF);
         }

         ToReplace.push_back(std::make_pair(CI, NewF));
         Replaced.insert(&F);

         Changed = true;
       }

       while (!ToReplace.empty()) {
         auto R = ToReplace.pop_back_val();
         R.first->setCalledFunction(R.second);
       }
     }
   } while (!NewRoots.empty());

   for (Function *F : Replaced) {
     if (F->use_empty())
       F->eraseFromParent();
   }

   Roots.clear();
   Clones.clear();

   return Changed;
 }

 Function *
 AMDGPUPropagateAttributes::cloneWithProperties(Function &F,
                                                const FnProperties &NewProps) {
   LLVM_DEBUG(dbgs() << "Cloning " << F.getName() << '\n');

   ValueToValueMapTy dummy;
   Function *NewF = CloneFunction(&F, dummy);
   setFeatures(*NewF, NewProps.Features);
   setAttributes(*NewF, NewProps.Attributes);
   NewF->setVisibility(GlobalValue::DefaultVisibility);
   NewF->setLinkage(GlobalValue::InternalLinkage);

   // Swap names. If that is the only clone it will retain the name of now
   // dead value. Preserve original name for externally visible functions.
   if (F.hasName() && F.hasLocalLinkage()) {
     std::string NewName = std::string(NewF->getName());
     NewF->takeName(&F);
     F.setName(NewName);
   }

   return NewF;
 }

 void AMDGPUPropagateAttributes::setFeatures(Function &F,
                                             const FeatureBitset &NewFeatures) {
   std::string NewFeatureStr = getFeatureString(NewFeatures);

   LLVM_DEBUG(dbgs() << "Set features "
                     << getFeatureString(NewFeatures & TargetFeatures)
                     << " on " << F.getName() << '\n');

   F.removeFnAttr("target-features");
   F.addFnAttr("target-features", NewFeatureStr);
 }

 void AMDGPUPropagateAttributes::setAttributes(Function &F,
     const ArrayRef<Optional<Attribute>> NewAttrs) {
   LLVM_DEBUG(dbgs() << "Set attributes on " << F.getName() << ":\n");
   for (unsigned I = 0; I < NumAttr; ++I) {
     F.removeFnAttr(AttributeNames[I]);
     if (NewAttrs[I]) {
       LLVM_DEBUG(dbgs() << '\t' << NewAttrs[I]->getAsString() << '\n');
       F.addFnAttr(*NewAttrs[I]);
     }
   }
 }

 std::string
 AMDGPUPropagateAttributes::getFeatureString(const FeatureBitset &Features) const
 {
   std::string Ret;
   for (const SubtargetFeatureKV &KV : AMDGPUFeatureKV) {
     if (Features[KV.Value])
       Ret += (StringRef("+") + KV.Key + ",").str();
     else if (TargetFeatures[KV.Value])
       Ret += (StringRef("-") + KV.Key + ",").str();
   }
   Ret.pop_back(); // Remove last comma.
   return Ret;
 }

 bool AMDGPUPropagateAttributesEarly::runOnFunction(Function &F) {
   if (!TM) {
     auto *TPC = getAnalysisIfAvailable<TargetPassConfig>();
     if (!TPC)
       return false;

     TM = &TPC->getTM<TargetMachine>();
   }

   if (!AMDGPU::isKernel(F.getCallingConv()))
     return false;

   return AMDGPUPropagateAttributes(TM, false).process(F);
 }

 bool AMDGPUPropagateAttributesLate::runOnModule(Module &M) {
   if (!TM) {
     auto *TPC = getAnalysisIfAvailable<TargetPassConfig>();
     if (!TPC)
       return false;

     TM = &TPC->getTM<TargetMachine>();
   }

   return AMDGPUPropagateAttributes(TM, true).process(M);
 }

 FunctionPass
 *llvm::createAMDGPUPropagateAttributesEarlyPass(const TargetMachine *TM) {
   return new AMDGPUPropagateAttributesEarly(TM);
 }

 ModulePass
 *llvm::createAMDGPUPropagateAttributesLatePass(const TargetMachine *TM) {
   return new AMDGPUPropagateAttributesLate(TM);
 }

 PreservedAnalyses
 AMDGPUPropagateAttributesEarlyPass::run(Function &F,
                                         FunctionAnalysisManager &AM) {
   if (!AMDGPU::isEntryFunctionCC(F.getCallingConv()))
     return PreservedAnalyses::all();

   return AMDGPUPropagateAttributes(&TM, false).process(F)
              ? PreservedAnalyses::none()
              : PreservedAnalyses::all();
 }

 PreservedAnalyses
 AMDGPUPropagateAttributesLatePass::run(Module &M, ModuleAnalysisManager &AM) {
   return AMDGPUPropagateAttributes(&TM, true).process(M)
              ? PreservedAnalyses::none()
              : PreservedAnalyses::all();
 }
	//===--- AMDGPUPropagateAttributes.cpp --------------------------- C++ --===//
	//
	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
	// See https://llvm.org/LICENSE.txt for license information.
	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
	//
	//===----------------------------------------------------------------------===//
	//
	/// \file
	/// \brief This pass propagates attributes from kernels to the non-entry
	/// functions. Most of the library functions were not compiled for specific ABI,
	/// yet will be correctly compiled if proper attributes are propagated from the
	/// caller.
	///
	/// The pass analyzes call graph and propagates ABI target features through the
	/// call graph.
	///
	/// It can run in two modes: as a function or module pass. A function pass
	/// simply propagates attributes. A module pass clones functions if there are
	/// callers with different ABI. If a function is cloned all call sites will
	/// be updated to use a correct clone.
	///
	/// A function pass is limited in functionality but can run early in the
	/// pipeline. A module pass is more powerful but has to run late, so misses
	/// library folding opportunities.
	//
	//===----------------------------------------------------------------------===//

	#include "AMDGPU.h"
	#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
	#include "Utils/AMDGPUBaseInfo.h"
	#include "llvm/ADT/SmallSet.h"
	#include "llvm/CodeGen/TargetPassConfig.h"
	#include "llvm/CodeGen/TargetSubtargetInfo.h"
	#include "llvm/IR/InstrTypes.h"
	#include "llvm/Target/TargetMachine.h"
	#include "llvm/Transforms/Utils/Cloning.h"

	#define DEBUG_TYPE "amdgpu-propagate-attributes"

	using namespace llvm;

	namespace llvm {
	extern const SubtargetFeatureKV AMDGPUFeatureKV[AMDGPU::NumSubtargetFeatures-1];
	}

	namespace {

	// Target features to propagate.
	static constexpr const FeatureBitset TargetFeatures = {
	AMDGPU::FeatureWavefrontSize16,
	AMDGPU::FeatureWavefrontSize32,
	AMDGPU::FeatureWavefrontSize64
	};

	// Attributes to propagate.
	// TODO: Support conservative min/max merging instead of cloning.
	static constexpr const char *AttributeNames[] = {"amdgpu-waves-per-eu"};

	static constexpr unsigned NumAttr =
	sizeof(AttributeNames) / sizeof(AttributeNames[0]);

	class AMDGPUPropagateAttributes {

	class FnProperties {
	private:
	explicit FnProperties(const FeatureBitset &&FB) : Features(FB) {}

	public:
	explicit FnProperties(const TargetMachine &TM, const Function &F) {
	Features = TM.getSubtargetImpl(F)->getFeatureBits();

	for (unsigned I = 0; I < NumAttr; ++I)
	if (F.hasFnAttribute(AttributeNames[I]))
	Attributes[I] = F.getFnAttribute(AttributeNames[I]);
	}

	bool operator == (const FnProperties &Other) const {
	if ((Features & TargetFeatures) != (Other.Features & TargetFeatures))
	return false;
	for (unsigned I = 0; I < NumAttr; ++I)
	if (Attributes[I] != Other.Attributes[I])
	return false;
	return true;
	}

	FnProperties adjustToCaller(const FnProperties &CallerProps) const {
	FnProperties New((Features & ~TargetFeatures) \| CallerProps.Features);
	for (unsigned I = 0; I < NumAttr; ++I)
	New.Attributes[I] = CallerProps.Attributes[I];
	return New;
	}

	FeatureBitset Features;
	Optional<Attribute> Attributes[NumAttr];
	};

	class Clone {
	public:
	Clone(const FnProperties &Props, Function OrigF, Function NewF) :
	Properties(Props), OrigF(OrigF), NewF(NewF) {}

	FnProperties Properties;
	Function *OrigF;
	Function *NewF;
	};

	const TargetMachine *TM;

	// Clone functions as needed or just set attributes.
	bool AllowClone;

	// Option propagation roots.
	SmallSet<Function *, 32> Roots;

	// Clones of functions with their attributes.
	SmallVector<Clone, 32> Clones;

	// Find a clone with required features.
	Function *findFunction(const FnProperties &PropsNeeded,
	Function *OrigF);

	// Clone function \p F and set \p NewProps on the clone.
	// Cole takes the name of original function.
	Function *cloneWithProperties(Function &F, const FnProperties &NewProps);

	// Set new function's features in place.
	void setFeatures(Function &F, const FeatureBitset &NewFeatures);

	// Set new function's attributes in place.
	void setAttributes(Function &F, const ArrayRef<Optional<Attribute>> NewAttrs);

	std::string getFeatureString(const FeatureBitset &Features) const;

	// Propagate attributes from Roots.
	bool process();

	public:
	AMDGPUPropagateAttributes(const TargetMachine *TM, bool AllowClone) :
	TM(TM), AllowClone(AllowClone) {}

	// Use F as a root and propagate its attributes.
	bool process(Function &F);

	// Propagate attributes starting from kernel functions.
	bool process(Module &M);
	};

	// Allows to propagate attributes early, but no cloning is allowed as it must
	// be a function pass to run before any optimizations.
	// TODO: We shall only need a one instance of module pass, but that needs to be
	// in the linker pipeline which is currently not possible.
	class AMDGPUPropagateAttributesEarly : public FunctionPass {
	const TargetMachine *TM;

	public:
	static char ID; // Pass identification

	AMDGPUPropagateAttributesEarly(const TargetMachine *TM = nullptr) :
	FunctionPass(ID), TM(TM) {
	initializeAMDGPUPropagateAttributesEarlyPass(
	*PassRegistry::getPassRegistry());
	}

	bool runOnFunction(Function &F) override;
	};

	// Allows to propagate attributes with cloning but does that late in the
	// pipeline.
	class AMDGPUPropagateAttributesLate : public ModulePass {
	const TargetMachine *TM;

	public:
	static char ID; // Pass identification

	AMDGPUPropagateAttributesLate(const TargetMachine *TM = nullptr) :
	ModulePass(ID), TM(TM) {
	initializeAMDGPUPropagateAttributesLatePass(
	*PassRegistry::getPassRegistry());
	}

	bool runOnModule(Module &M) override;
	};

	} // end anonymous namespace.

	char AMDGPUPropagateAttributesEarly::ID = 0;
	char AMDGPUPropagateAttributesLate::ID = 0;

	INITIALIZE_PASS(AMDGPUPropagateAttributesEarly,
	"amdgpu-propagate-attributes-early",
	"Early propagate attributes from kernels to functions",
	false, false)
	INITIALIZE_PASS(AMDGPUPropagateAttributesLate,
	"amdgpu-propagate-attributes-late",
	"Late propagate attributes from kernels to functions",
	false, false)

	Function *
	AMDGPUPropagateAttributes::findFunction(const FnProperties &PropsNeeded,
	Function *OrigF) {
	// TODO: search for clone's clones.
	for (Clone &C : Clones)
	if (C.OrigF == OrigF && PropsNeeded == C.Properties)
	return C.NewF;

	return nullptr;
	}

	bool AMDGPUPropagateAttributes::process(Module &M) {
	for (auto &F : M.functions())
	if (AMDGPU::isKernel(F.getCallingConv()))
	Roots.insert(&F);

	return Roots.empty() ? false : process();
	}

	bool AMDGPUPropagateAttributes::process(Function &F) {
	Roots.insert(&F);
	return process();
	}

	bool AMDGPUPropagateAttributes::process() {
	bool Changed = false;
	SmallSet<Function *, 32> NewRoots;
	SmallSet<Function *, 32> Replaced;

	assert(!Roots.empty());
	Module &M = (Roots.begin())->getParent();

	do {
	Roots.insert(NewRoots.begin(), NewRoots.end());
	NewRoots.clear();

	for (auto &F : M.functions()) {
	if (F.isDeclaration())
	continue;

	const FnProperties CalleeProps(*TM, F);
	SmallVector<std::pair<CallBase , Function >, 32> ToReplace;
	SmallSet<CallBase *, 32> Visited;

	for (User *U : F.users()) {
	Instruction *I = dyn_cast<Instruction>(U);
	if (!I)
	continue;
	CallBase *CI = dyn_cast<CallBase>(I);
	// Only propagate attributes if F is the called function. Specifically,
	// do not propagate attributes if F is passed as an argument.
	// FIXME: handle bitcasted callee, e.g.
	// %retval = call i8* bitcast (i32* ()* @f to i8* ()*)()
	if (!CI \|\| CI->getCalledOperand() != &F)
	continue;
	Function *Caller = CI->getCaller();
	if (!Caller \|\| !Visited.insert(CI).second)
	continue;
	if (!Roots.count(Caller) && !NewRoots.count(Caller))
	continue;

	const FnProperties CallerProps(TM, Caller);

	if (CalleeProps == CallerProps) {
	if (!Roots.count(&F))
	NewRoots.insert(&F);
	continue;
	}

	Function *NewF = findFunction(CallerProps, &F);
	if (!NewF) {
	const FnProperties NewProps = CalleeProps.adjustToCaller(CallerProps);
	if (!AllowClone) {
	// This may set different features on different iterations if
	// there is a contradiction in callers' attributes. In this case
	// we rely on a second pass running on Module, which is allowed
	// to clone.
	setFeatures(F, NewProps.Features);
	setAttributes(F, NewProps.Attributes);
	NewRoots.insert(&F);
	Changed = true;
	break;
	}

	NewF = cloneWithProperties(F, NewProps);
	Clones.push_back(Clone(CallerProps, &F, NewF));
	NewRoots.insert(NewF);
	}

	ToReplace.push_back(std::make_pair(CI, NewF));
	Replaced.insert(&F);

	Changed = true;
	}

	while (!ToReplace.empty()) {
	auto R = ToReplace.pop_back_val();
	R.first->setCalledFunction(R.second);
	}
	}
	} while (!NewRoots.empty());

	for (Function *F : Replaced) {
	if (F->use_empty())
	F->eraseFromParent();
	}

	Roots.clear();
	Clones.clear();

	return Changed;
	}

	Function *
	AMDGPUPropagateAttributes::cloneWithProperties(Function &F,
	const FnProperties &NewProps) {
	LLVM_DEBUG(dbgs() << "Cloning " << F.getName() << '\n');

	ValueToValueMapTy dummy;
	Function *NewF = CloneFunction(&F, dummy);
	setFeatures(*NewF, NewProps.Features);
	setAttributes(*NewF, NewProps.Attributes);
	NewF->setVisibility(GlobalValue::DefaultVisibility);
	NewF->setLinkage(GlobalValue::InternalLinkage);

	// Swap names. If that is the only clone it will retain the name of now
	// dead value. Preserve original name for externally visible functions.
	if (F.hasName() && F.hasLocalLinkage()) {
	std::string NewName = std::string(NewF->getName());
	NewF->takeName(&F);
	F.setName(NewName);
	}

	return NewF;
	}

	void AMDGPUPropagateAttributes::setFeatures(Function &F,
	const FeatureBitset &NewFeatures) {
	std::string NewFeatureStr = getFeatureString(NewFeatures);

	LLVM_DEBUG(dbgs() << "Set features "
	<< getFeatureString(NewFeatures & TargetFeatures)
	<< " on " << F.getName() << '\n');

	F.removeFnAttr("target-features");
	F.addFnAttr("target-features", NewFeatureStr);
	}

	void AMDGPUPropagateAttributes::setAttributes(Function &F,
	const ArrayRef<Optional<Attribute>> NewAttrs) {
	LLVM_DEBUG(dbgs() << "Set attributes on " << F.getName() << ":\n");
	for (unsigned I = 0; I < NumAttr; ++I) {
	F.removeFnAttr(AttributeNames[I]);
	if (NewAttrs[I]) {
	LLVM_DEBUG(dbgs() << '\t' << NewAttrs[I]->getAsString() << '\n');
	F.addFnAttr(*NewAttrs[I]);
	}
	}
	}

	std::string
	AMDGPUPropagateAttributes::getFeatureString(const FeatureBitset &Features) const
	{
	std::string Ret;
	for (const SubtargetFeatureKV &KV : AMDGPUFeatureKV) {
	if (Features[KV.Value])
	Ret += (StringRef("+") + KV.Key + ",").str();
	else if (TargetFeatures[KV.Value])
	Ret += (StringRef("-") + KV.Key + ",").str();
	}
	Ret.pop_back(); // Remove last comma.
	return Ret;
	}

	bool AMDGPUPropagateAttributesEarly::runOnFunction(Function &F) {
	if (!TM) {
	auto *TPC = getAnalysisIfAvailable<TargetPassConfig>();
	if (!TPC)
	return false;

	TM = &TPC->getTM<TargetMachine>();
	}

	if (!AMDGPU::isKernel(F.getCallingConv()))
	return false;

	return AMDGPUPropagateAttributes(TM, false).process(F);
	}

	bool AMDGPUPropagateAttributesLate::runOnModule(Module &M) {
	if (!TM) {
	auto *TPC = getAnalysisIfAvailable<TargetPassConfig>();
	if (!TPC)
	return false;

	TM = &TPC->getTM<TargetMachine>();
	}

	return AMDGPUPropagateAttributes(TM, true).process(M);
	}

	FunctionPass
	llvm::createAMDGPUPropagateAttributesEarlyPass(const TargetMachine TM) {
	return new AMDGPUPropagateAttributesEarly(TM);
	}

	ModulePass
	llvm::createAMDGPUPropagateAttributesLatePass(const TargetMachine TM) {
	return new AMDGPUPropagateAttributesLate(TM);
	}

	PreservedAnalyses
	AMDGPUPropagateAttributesEarlyPass::run(Function &F,
	FunctionAnalysisManager &AM) {
	if (!AMDGPU::isEntryFunctionCC(F.getCallingConv()))
	return PreservedAnalyses::all();

	return AMDGPUPropagateAttributes(&TM, false).process(F)
	? PreservedAnalyses::none()
	: PreservedAnalyses::all();
	}

	PreservedAnalyses
	AMDGPUPropagateAttributesLatePass::run(Module &M, ModuleAnalysisManager &AM) {
	return AMDGPUPropagateAttributes(&TM, true).process(M)
	? PreservedAnalyses::none()
	: PreservedAnalyses::all();
	}