| //===----- HipStdPar.cpp - HIP C++ Standard Parallelism Support Passes ----===// |
| // |
| // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| // See https://llvm.org/LICENSE.txt for license information. |
| // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| // |
| //===----------------------------------------------------------------------===// |
| // This file implements two passes that enable HIP C++ Standard Parallelism |
| // Support: |
| // |
| // 1. AcceleratorCodeSelection (required): Given that only algorithms are |
| // accelerated, and that the accelerated implementation exists in the form of |
| // a compute kernel, we assume that only the kernel, and all functions |
| // reachable from it, constitute code that the user expects the accelerator |
| // to execute. Thus, we identify the set of all functions reachable from |
| // kernels, and then remove all unreachable ones. This last part is necessary |
| // because it is possible for code that the user did not expect to execute on |
| // an accelerator to contain constructs that cannot be handled by the target |
| // BE, which cannot be provably demonstrated to be dead code in general, and |
| // thus can lead to mis-compilation. The degenerate case of this is when a |
| // Module contains no kernels (the parent TU had no algorithm invocations fit |
| // for acceleration), which we handle by completely emptying said module. |
| // **NOTE**: The above does not handle indirectly reachable functions i.e. |
| // it is possible to obtain a case where the target of an indirect |
| // call is otherwise unreachable and thus is removed; this |
| // restriction is aligned with the current `-hipstdpar` limitations |
| // and will be relaxed in the future. |
| // |
| // 2. AllocationInterposition (required only when on-demand paging is |
| // unsupported): Some accelerators or operating systems might not support |
| // transparent on-demand paging. Thus, they would only be able to access |
| // memory that is allocated by an accelerator-aware mechanism. For such cases |
| // the user can opt into enabling allocation / deallocation interposition, |
| // whereby we replace calls to known allocation / deallocation functions with |
| // calls to runtime implemented equivalents that forward the requests to |
| // accelerator-aware interfaces. We also support freeing system allocated |
| // memory that ends up in one of the runtime equivalents, since this can |
| // happen if e.g. a library that was compiled without interposition returns |
| // an allocation that can be validly passed to `free`. |
| //===----------------------------------------------------------------------===// |
| |
| #include "llvm/Transforms/HipStdPar/HipStdPar.h" |
| |
| #include "llvm/ADT/SmallPtrSet.h" |
| #include "llvm/ADT/SmallVector.h" |
| #include "llvm/ADT/STLExtras.h" |
| #include "llvm/Analysis/CallGraph.h" |
| #include "llvm/Analysis/OptimizationRemarkEmitter.h" |
| #include "llvm/IR/Constants.h" |
| #include "llvm/IR/DebugInfoMetadata.h" |
| #include "llvm/IR/Function.h" |
| #include "llvm/IR/Module.h" |
| #include "llvm/Transforms/Utils/ModuleUtils.h" |
| |
| #include <cassert> |
| #include <string> |
| #include <utility> |
| |
| using namespace llvm; |
| |
| template<typename T> |
| static inline void eraseFromModule(T &ToErase) { |
| ToErase.replaceAllUsesWith(PoisonValue::get(ToErase.getType())); |
| ToErase.eraseFromParent(); |
| } |
| |
| static inline bool checkIfSupported(GlobalVariable &G) { |
| if (!G.isThreadLocal()) |
| return true; |
| |
| G.dropDroppableUses(); |
| |
| if (!G.isConstantUsed()) |
| return true; |
| |
| std::string W; |
| raw_string_ostream OS(W); |
| |
| OS << "Accelerator does not support the thread_local variable " |
| << G.getName(); |
| |
| Instruction *I = nullptr; |
| SmallVector<User *> Tmp(G.user_begin(), G.user_end()); |
| SmallPtrSet<User *, 5> Visited; |
| do { |
| auto U = std::move(Tmp.back()); |
| Tmp.pop_back(); |
| |
| if (Visited.contains(U)) |
| continue; |
| |
| if (isa<Instruction>(U)) |
| I = cast<Instruction>(U); |
| else |
| Tmp.insert(Tmp.end(), U->user_begin(), U->user_end()); |
| |
| Visited.insert(U); |
| } while (!I && !Tmp.empty()); |
| |
| assert(I && "thread_local global should have at least one non-constant use."); |
| |
| G.getContext().diagnose( |
| DiagnosticInfoUnsupported(*I->getParent()->getParent(), W, |
| I->getDebugLoc(), DS_Error)); |
| |
| return false; |
| } |
| |
| static inline void clearModule(Module &M) { // TODO: simplify. |
| while (!M.functions().empty()) |
| eraseFromModule(*M.begin()); |
| while (!M.globals().empty()) |
| eraseFromModule(*M.globals().begin()); |
| while (!M.aliases().empty()) |
| eraseFromModule(*M.aliases().begin()); |
| while (!M.ifuncs().empty()) |
| eraseFromModule(*M.ifuncs().begin()); |
| } |
| |
| static inline void maybeHandleGlobals(Module &M) { |
| unsigned GlobAS = M.getDataLayout().getDefaultGlobalsAddressSpace(); |
| for (auto &&G : M.globals()) { // TODO: should we handle these in the FE? |
| if (!checkIfSupported(G)) |
| return clearModule(M); |
| |
| if (G.isThreadLocal()) |
| continue; |
| if (G.isConstant()) |
| continue; |
| if (G.getAddressSpace() != GlobAS) |
| continue; |
| if (G.getLinkage() != GlobalVariable::ExternalLinkage) |
| continue; |
| |
| G.setLinkage(GlobalVariable::ExternalWeakLinkage); |
| G.setInitializer(nullptr); |
| G.setExternallyInitialized(true); |
| } |
| } |
| |
| template<unsigned N> |
| static inline void removeUnreachableFunctions( |
| const SmallPtrSet<const Function *, N>& Reachable, Module &M) { |
| removeFromUsedLists(M, [&](Constant *C) { |
| if (auto F = dyn_cast<Function>(C)) |
| return !Reachable.contains(F); |
| |
| return false; |
| }); |
| |
| SmallVector<std::reference_wrapper<Function>> ToRemove; |
| copy_if(M, std::back_inserter(ToRemove), [&](auto &&F) { |
| return !F.isIntrinsic() && !Reachable.contains(&F); |
| }); |
| |
| for_each(ToRemove, eraseFromModule<Function>); |
| } |
| |
| static inline bool isAcceleratorExecutionRoot(const Function *F) { |
| if (!F) |
| return false; |
| |
| return F->getCallingConv() == CallingConv::AMDGPU_KERNEL; |
| } |
| |
| static inline bool checkIfSupported(const Function *F, const CallBase *CB) { |
| const auto Dx = F->getName().rfind("__hipstdpar_unsupported"); |
| |
| if (Dx == StringRef::npos) |
| return true; |
| |
| const auto N = F->getName().substr(0, Dx); |
| |
| std::string W; |
| raw_string_ostream OS(W); |
| |
| if (N == "__ASM") |
| OS << "Accelerator does not support the ASM block:\n" |
| << cast<ConstantDataArray>(CB->getArgOperand(0))->getAsCString(); |
| else |
| OS << "Accelerator does not support the " << N << " function."; |
| |
| auto Caller = CB->getParent()->getParent(); |
| |
| Caller->getContext().diagnose( |
| DiagnosticInfoUnsupported(*Caller, W, CB->getDebugLoc(), DS_Error)); |
| |
| return false; |
| } |
| |
| PreservedAnalyses |
| HipStdParAcceleratorCodeSelectionPass::run(Module &M, |
| ModuleAnalysisManager &MAM) { |
| auto &CGA = MAM.getResult<CallGraphAnalysis>(M); |
| |
| SmallPtrSet<const Function *, 32> Reachable; |
| for (auto &&CGN : CGA) { |
| if (!isAcceleratorExecutionRoot(CGN.first)) |
| continue; |
| |
| Reachable.insert(CGN.first); |
| |
| SmallVector<const Function *> Tmp({CGN.first}); |
| do { |
| auto F = std::move(Tmp.back()); |
| Tmp.pop_back(); |
| |
| for (auto &&N : *CGA[F]) { |
| if (!N.second) |
| continue; |
| if (!N.second->getFunction()) |
| continue; |
| if (Reachable.contains(N.second->getFunction())) |
| continue; |
| |
| if (!checkIfSupported(N.second->getFunction(), |
| dyn_cast<CallBase>(*N.first))) |
| return PreservedAnalyses::none(); |
| |
| Reachable.insert(N.second->getFunction()); |
| Tmp.push_back(N.second->getFunction()); |
| } |
| } while (!std::empty(Tmp)); |
| } |
| |
| if (std::empty(Reachable)) |
| clearModule(M); |
| else |
| removeUnreachableFunctions(Reachable, M); |
| |
| maybeHandleGlobals(M); |
| |
| return PreservedAnalyses::none(); |
| } |
| |
| static constexpr std::pair<StringLiteral, StringLiteral> ReplaceMap[]{ |
| {"aligned_alloc", "__hipstdpar_aligned_alloc"}, |
| {"calloc", "__hipstdpar_calloc"}, |
| {"free", "__hipstdpar_free"}, |
| {"malloc", "__hipstdpar_malloc"}, |
| {"memalign", "__hipstdpar_aligned_alloc"}, |
| {"posix_memalign", "__hipstdpar_posix_aligned_alloc"}, |
| {"realloc", "__hipstdpar_realloc"}, |
| {"reallocarray", "__hipstdpar_realloc_array"}, |
| {"_ZdaPv", "__hipstdpar_operator_delete"}, |
| {"_ZdaPvm", "__hipstdpar_operator_delete_sized"}, |
| {"_ZdaPvSt11align_val_t", "__hipstdpar_operator_delete_aligned"}, |
| {"_ZdaPvmSt11align_val_t", "__hipstdpar_operator_delete_aligned_sized"}, |
| {"_ZdlPv", "__hipstdpar_operator_delete"}, |
| {"_ZdlPvm", "__hipstdpar_operator_delete_sized"}, |
| {"_ZdlPvSt11align_val_t", "__hipstdpar_operator_delete_aligned"}, |
| {"_ZdlPvmSt11align_val_t", "__hipstdpar_operator_delete_aligned_sized"}, |
| {"_Znam", "__hipstdpar_operator_new"}, |
| {"_ZnamRKSt9nothrow_t", "__hipstdpar_operator_new_nothrow"}, |
| {"_ZnamSt11align_val_t", "__hipstdpar_operator_new_aligned"}, |
| {"_ZnamSt11align_val_tRKSt9nothrow_t", |
| "__hipstdpar_operator_new_aligned_nothrow"}, |
| |
| {"_Znwm", "__hipstdpar_operator_new"}, |
| {"_ZnwmRKSt9nothrow_t", "__hipstdpar_operator_new_nothrow"}, |
| {"_ZnwmSt11align_val_t", "__hipstdpar_operator_new_aligned"}, |
| {"_ZnwmSt11align_val_tRKSt9nothrow_t", |
| "__hipstdpar_operator_new_aligned_nothrow"}, |
| {"__builtin_calloc", "__hipstdpar_calloc"}, |
| {"__builtin_free", "__hipstdpar_free"}, |
| {"__builtin_malloc", "__hipstdpar_malloc"}, |
| {"__builtin_operator_delete", "__hipstdpar_operator_delete"}, |
| {"__builtin_operator_new", "__hipstdpar_operator_new"}, |
| {"__builtin_realloc", "__hipstdpar_realloc"}, |
| {"__libc_calloc", "__hipstdpar_calloc"}, |
| {"__libc_free", "__hipstdpar_free"}, |
| {"__libc_malloc", "__hipstdpar_malloc"}, |
| {"__libc_memalign", "__hipstdpar_aligned_alloc"}, |
| {"__libc_realloc", "__hipstdpar_realloc"} |
| }; |
| |
| PreservedAnalyses |
| HipStdParAllocationInterpositionPass::run(Module &M, ModuleAnalysisManager&) { |
| SmallDenseMap<StringRef, StringRef> AllocReplacements(std::cbegin(ReplaceMap), |
| std::cend(ReplaceMap)); |
| |
| for (auto &&F : M) { |
| if (!F.hasName()) |
| continue; |
| if (!AllocReplacements.contains(F.getName())) |
| continue; |
| |
| if (auto R = M.getFunction(AllocReplacements[F.getName()])) { |
| F.replaceAllUsesWith(R); |
| } else { |
| std::string W; |
| raw_string_ostream OS(W); |
| |
| OS << "cannot be interposed, missing: " << AllocReplacements[F.getName()] |
| << ". Tried to run the allocation interposition pass without the " |
| << "replacement functions available."; |
| |
| F.getContext().diagnose(DiagnosticInfoUnsupported(F, W, |
| F.getSubprogram(), |
| DS_Warning)); |
| } |
| } |
| |
| if (auto F = M.getFunction("__hipstdpar_hidden_free")) { |
| auto LibcFree = M.getOrInsertFunction("__libc_free", F->getFunctionType(), |
| F->getAttributes()); |
| F->replaceAllUsesWith(LibcFree.getCallee()); |
| |
| eraseFromModule(*F); |
| } |
| |
| return PreservedAnalyses::none(); |
| } |