llvm/lib/Target/AMDGPU/AMDGPUPromoteKernelArguments.cpp - llvm-project - Git at Google

 //===-- AMDGPUPromoteKernelArguments.cpp ----------------------------------===//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
 /// \file This pass recursively promotes generic pointer arguments of a kernel
 /// into the global address space.
 ///
 /// The pass walks kernel's pointer arguments, then loads from them. If a loaded
 /// value is a pointer and loaded pointer is unmodified in the kernel before the
 /// load, then promote loaded pointer to global. Then recursively continue.
 //
 //===----------------------------------------------------------------------===//

 #include "AMDGPU.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/Analysis/MemorySSA.h"
 #include "llvm/IR/IRBuilder.h"
 #include "llvm/InitializePasses.h"

 #define DEBUG_TYPE "amdgpu-promote-kernel-arguments"

 using namespace llvm;

 namespace {

 class AMDGPUPromoteKernelArguments : public FunctionPass {
   MemorySSA *MSSA;

   Instruction *ArgCastInsertPt;

   SmallVector<Value *> Ptrs;

   void enqueueUsers(Value *Ptr);

   bool promotePointer(Value *Ptr);

 public:
   static char ID;

   AMDGPUPromoteKernelArguments() : FunctionPass(ID) {}

   bool run(Function &F, MemorySSA &MSSA);

   bool runOnFunction(Function &F) override;

   void getAnalysisUsage(AnalysisUsage &AU) const override {
     AU.addRequired<MemorySSAWrapperPass>();
     AU.setPreservesAll();
   }
 };

 } // end anonymous namespace

 void AMDGPUPromoteKernelArguments::enqueueUsers(Value *Ptr) {
   SmallVector<User *> PtrUsers(Ptr->users());

   while (!PtrUsers.empty()) {
     Instruction *U = dyn_cast<Instruction>(PtrUsers.pop_back_val());
     if (!U)
       continue;

     switch (U->getOpcode()) {
     default:
       break;
     case Instruction::Load: {
       LoadInst *LD = cast<LoadInst>(U);
       PointerType *PT = dyn_cast<PointerType>(LD->getType());
       if (!PT ||
           (PT->getAddressSpace() != AMDGPUAS::FLAT_ADDRESS &&
            PT->getAddressSpace() != AMDGPUAS::GLOBAL_ADDRESS &&
            PT->getAddressSpace() != AMDGPUAS::CONSTANT_ADDRESS) ||
           LD->getPointerOperand()->stripInBoundsOffsets() != Ptr)
         break;
       const MemoryAccess *MA = MSSA->getWalker()->getClobberingMemoryAccess(LD);
       // TODO: This load poprobably can be promoted to constant address space.
       if (MSSA->isLiveOnEntryDef(MA))
         Ptrs.push_back(LD);
       break;
     }
     case Instruction::GetElementPtr:
     case Instruction::AddrSpaceCast:
     case Instruction::BitCast:
       if (U->getOperand(0)->stripInBoundsOffsets() == Ptr)
         PtrUsers.append(U->user_begin(), U->user_end());
       break;
     }
   }
 }

 bool AMDGPUPromoteKernelArguments::promotePointer(Value *Ptr) {
   enqueueUsers(Ptr);

   PointerType *PT = cast<PointerType>(Ptr->getType());
   if (PT->getAddressSpace() != AMDGPUAS::FLAT_ADDRESS)
     return false;

   bool IsArg = isa<Argument>(Ptr);
   IRBuilder<> B(IsArg ? ArgCastInsertPt
                       : &*std::next(cast<Instruction>(Ptr)->getIterator()));

   // Cast pointer to global address space and back to flat and let
   // Infer Address Spaces pass to do all necessary rewriting.
   PointerType *NewPT =
       PointerType::getWithSamePointeeType(PT, AMDGPUAS::GLOBAL_ADDRESS);
   Value *Cast =
       B.CreateAddrSpaceCast(Ptr, NewPT, Twine(Ptr->getName(), ".global"));
   Value *CastBack =
       B.CreateAddrSpaceCast(Cast, PT, Twine(Ptr->getName(), ".flat"));
   Ptr->replaceUsesWithIf(CastBack,
                          [Cast](Use &U) { return U.getUser() != Cast; });

   return true;
 }

 // skip allocas
 static BasicBlock::iterator getInsertPt(BasicBlock &BB) {
   BasicBlock::iterator InsPt = BB.getFirstInsertionPt();
   for (BasicBlock::iterator E = BB.end(); InsPt != E; ++InsPt) {
     AllocaInst *AI = dyn_cast<AllocaInst>(&*InsPt);

     // If this is a dynamic alloca, the value may depend on the loaded kernargs,
     // so loads will need to be inserted before it.
     if (!AI || !AI->isStaticAlloca())
       break;
   }

   return InsPt;
 }

 bool AMDGPUPromoteKernelArguments::run(Function &F, MemorySSA &MSSA) {
   if (skipFunction(F))
     return false;

   CallingConv::ID CC = F.getCallingConv();
   if (CC != CallingConv::AMDGPU_KERNEL || F.arg_empty())
     return false;

   ArgCastInsertPt = &*getInsertPt(*F.begin());
   this->MSSA = &MSSA;

   for (Argument &Arg : F.args()) {
     if (Arg.use_empty())
       continue;

     PointerType *PT = dyn_cast<PointerType>(Arg.getType());
     if (!PT || (PT->getAddressSpace() != AMDGPUAS::FLAT_ADDRESS &&
                 PT->getAddressSpace() != AMDGPUAS::GLOBAL_ADDRESS &&
                 PT->getAddressSpace() != AMDGPUAS::CONSTANT_ADDRESS))
       continue;

     Ptrs.push_back(&Arg);
   }

   bool Changed = false;
   while (!Ptrs.empty()) {
     Value *Ptr = Ptrs.pop_back_val();
     Changed |= promotePointer(Ptr);
   }

   return Changed;
 }

 bool AMDGPUPromoteKernelArguments::runOnFunction(Function &F) {
   MemorySSA &MSSA = getAnalysis<MemorySSAWrapperPass>().getMSSA();
   return run(F, MSSA);
 }

 INITIALIZE_PASS_BEGIN(AMDGPUPromoteKernelArguments, DEBUG_TYPE,
                       "AMDGPU Promote Kernel Arguments", false, false)
 INITIALIZE_PASS_DEPENDENCY(MemorySSAWrapperPass)
 INITIALIZE_PASS_END(AMDGPUPromoteKernelArguments, DEBUG_TYPE,
                     "AMDGPU Promote Kernel Arguments", false, false)

 char AMDGPUPromoteKernelArguments::ID = 0;

 FunctionPass *llvm::createAMDGPUPromoteKernelArgumentsPass() {
   return new AMDGPUPromoteKernelArguments();
 }

 PreservedAnalyses
 AMDGPUPromoteKernelArgumentsPass::run(Function &F,
                                       FunctionAnalysisManager &AM) {
   MemorySSA &MSSA = AM.getResult<MemorySSAAnalysis>(F).getMSSA();
   if (AMDGPUPromoteKernelArguments().run(F, MSSA)) {
     PreservedAnalyses PA;
     PA.preserveSet<CFGAnalyses>();
     PA.preserve<MemorySSAAnalysis>();
     return PA;
   }
   return PreservedAnalyses::all();
 }
	//===-- AMDGPUPromoteKernelArguments.cpp ----------------------------------===//
	//
	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
	// See https://llvm.org/LICENSE.txt for license information.
	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
	//
	//===----------------------------------------------------------------------===//
	//
	/// \file This pass recursively promotes generic pointer arguments of a kernel
	/// into the global address space.
	///
	/// The pass walks kernel's pointer arguments, then loads from them. If a loaded
	/// value is a pointer and loaded pointer is unmodified in the kernel before the
	/// load, then promote loaded pointer to global. Then recursively continue.
	//
	//===----------------------------------------------------------------------===//

	#include "AMDGPU.h"
	#include "llvm/ADT/SmallVector.h"
	#include "llvm/Analysis/MemorySSA.h"
	#include "llvm/IR/IRBuilder.h"
	#include "llvm/InitializePasses.h"

	#define DEBUG_TYPE "amdgpu-promote-kernel-arguments"

	using namespace llvm;

	namespace {

	class AMDGPUPromoteKernelArguments : public FunctionPass {
	MemorySSA *MSSA;

	Instruction *ArgCastInsertPt;

	SmallVector<Value *> Ptrs;

	void enqueueUsers(Value *Ptr);

	bool promotePointer(Value *Ptr);

	public:
	static char ID;

	AMDGPUPromoteKernelArguments() : FunctionPass(ID) {}

	bool run(Function &F, MemorySSA &MSSA);

	bool runOnFunction(Function &F) override;

	void getAnalysisUsage(AnalysisUsage &AU) const override {
	AU.addRequired<MemorySSAWrapperPass>();
	AU.setPreservesAll();
	}
	};

	} // end anonymous namespace

	void AMDGPUPromoteKernelArguments::enqueueUsers(Value *Ptr) {
	SmallVector<User *> PtrUsers(Ptr->users());

	while (!PtrUsers.empty()) {
	Instruction *U = dyn_cast<Instruction>(PtrUsers.pop_back_val());
	if (!U)
	continue;

	switch (U->getOpcode()) {
	default:
	break;
	case Instruction::Load: {
	LoadInst *LD = cast<LoadInst>(U);
	PointerType *PT = dyn_cast<PointerType>(LD->getType());
	if (!PT \|\|
	(PT->getAddressSpace() != AMDGPUAS::FLAT_ADDRESS &&
	PT->getAddressSpace() != AMDGPUAS::GLOBAL_ADDRESS &&
	PT->getAddressSpace() != AMDGPUAS::CONSTANT_ADDRESS) \|\|
	LD->getPointerOperand()->stripInBoundsOffsets() != Ptr)
	break;
	const MemoryAccess *MA = MSSA->getWalker()->getClobberingMemoryAccess(LD);
	// TODO: This load poprobably can be promoted to constant address space.
	if (MSSA->isLiveOnEntryDef(MA))
	Ptrs.push_back(LD);
	break;
	}
	case Instruction::GetElementPtr:
	case Instruction::AddrSpaceCast:
	case Instruction::BitCast:
	if (U->getOperand(0)->stripInBoundsOffsets() == Ptr)
	PtrUsers.append(U->user_begin(), U->user_end());
	break;
	}
	}
	}

	bool AMDGPUPromoteKernelArguments::promotePointer(Value *Ptr) {
	enqueueUsers(Ptr);

	PointerType *PT = cast<PointerType>(Ptr->getType());
	if (PT->getAddressSpace() != AMDGPUAS::FLAT_ADDRESS)
	return false;

	bool IsArg = isa<Argument>(Ptr);
	IRBuilder<> B(IsArg ? ArgCastInsertPt
	: &*std::next(cast<Instruction>(Ptr)->getIterator()));

	// Cast pointer to global address space and back to flat and let
	// Infer Address Spaces pass to do all necessary rewriting.
	PointerType *NewPT =
	PointerType::getWithSamePointeeType(PT, AMDGPUAS::GLOBAL_ADDRESS);
	Value *Cast =
	B.CreateAddrSpaceCast(Ptr, NewPT, Twine(Ptr->getName(), ".global"));
	Value *CastBack =
	B.CreateAddrSpaceCast(Cast, PT, Twine(Ptr->getName(), ".flat"));
	Ptr->replaceUsesWithIf(CastBack,
	[Cast](Use &U) { return U.getUser() != Cast; });

	return true;
	}

	// skip allocas
	static BasicBlock::iterator getInsertPt(BasicBlock &BB) {
	BasicBlock::iterator InsPt = BB.getFirstInsertionPt();
	for (BasicBlock::iterator E = BB.end(); InsPt != E; ++InsPt) {
	AllocaInst AI = dyn_cast<AllocaInst>(&InsPt);

	// If this is a dynamic alloca, the value may depend on the loaded kernargs,
	// so loads will need to be inserted before it.
	if (!AI \|\| !AI->isStaticAlloca())
	break;
	}

	return InsPt;
	}

	bool AMDGPUPromoteKernelArguments::run(Function &F, MemorySSA &MSSA) {
	if (skipFunction(F))
	return false;

	CallingConv::ID CC = F.getCallingConv();
	if (CC != CallingConv::AMDGPU_KERNEL \|\| F.arg_empty())
	return false;

	ArgCastInsertPt = &getInsertPt(F.begin());
	this->MSSA = &MSSA;

	for (Argument &Arg : F.args()) {
	if (Arg.use_empty())
	continue;

	PointerType *PT = dyn_cast<PointerType>(Arg.getType());
	if (!PT \|\| (PT->getAddressSpace() != AMDGPUAS::FLAT_ADDRESS &&
	PT->getAddressSpace() != AMDGPUAS::GLOBAL_ADDRESS &&
	PT->getAddressSpace() != AMDGPUAS::CONSTANT_ADDRESS))
	continue;

	Ptrs.push_back(&Arg);
	}

	bool Changed = false;
	while (!Ptrs.empty()) {
	Value *Ptr = Ptrs.pop_back_val();
	Changed \|= promotePointer(Ptr);
	}

	return Changed;
	}

	bool AMDGPUPromoteKernelArguments::runOnFunction(Function &F) {
	MemorySSA &MSSA = getAnalysis<MemorySSAWrapperPass>().getMSSA();
	return run(F, MSSA);
	}

	INITIALIZE_PASS_BEGIN(AMDGPUPromoteKernelArguments, DEBUG_TYPE,
	"AMDGPU Promote Kernel Arguments", false, false)
	INITIALIZE_PASS_DEPENDENCY(MemorySSAWrapperPass)
	INITIALIZE_PASS_END(AMDGPUPromoteKernelArguments, DEBUG_TYPE,
	"AMDGPU Promote Kernel Arguments", false, false)

	char AMDGPUPromoteKernelArguments::ID = 0;

	FunctionPass *llvm::createAMDGPUPromoteKernelArgumentsPass() {
	return new AMDGPUPromoteKernelArguments();
	}

	PreservedAnalyses
	AMDGPUPromoteKernelArgumentsPass::run(Function &F,
	FunctionAnalysisManager &AM) {
	MemorySSA &MSSA = AM.getResult<MemorySSAAnalysis>(F).getMSSA();
	if (AMDGPUPromoteKernelArguments().run(F, MSSA)) {
	PreservedAnalyses PA;
	PA.preserveSet<CFGAnalyses>();
	PA.preserve<MemorySSAAnalysis>();
	return PA;
	}
	return PreservedAnalyses::all();
	}