blob: 93dd7cecb32e13bc873cda7e3b85db3277ecd55d [file] [log] [blame]
//===- KernelInfo.cpp - Kernel Analysis -----------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This file defines the KernelInfoPrinter class used to emit remarks about
// function properties from a GPU kernel.
//
//===----------------------------------------------------------------------===//
#include "llvm/Analysis/KernelInfo.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/IR/DebugInfo.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/Metadata.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/PassManager.h"
using namespace llvm;
#define DEBUG_TYPE "kernel-info"
namespace {
/// Data structure holding function info for kernels.
class KernelInfo {
void updateForBB(const BasicBlock &BB, OptimizationRemarkEmitter &ORE);
public:
static void emitKernelInfo(Function &F, FunctionAnalysisManager &FAM,
TargetMachine *TM);
/// Whether the function has external linkage and is not a kernel function.
bool ExternalNotKernel = false;
/// Launch bounds.
SmallVector<std::pair<StringRef, int64_t>> LaunchBounds;
/// The number of alloca instructions inside the function, the number of those
/// with allocation sizes that cannot be determined at compile time, and the
/// sum of the sizes that can be.
///
/// With the current implementation for at least some GPU archs,
/// AllocasDyn > 0 might not be possible, but we report AllocasDyn anyway in
/// case the implementation changes.
int64_t Allocas = 0;
int64_t AllocasDyn = 0;
int64_t AllocasStaticSizeSum = 0;
/// Number of direct/indirect calls (anything derived from CallBase).
int64_t DirectCalls = 0;
int64_t IndirectCalls = 0;
/// Number of direct calls made from this function to other functions
/// defined in this module.
int64_t DirectCallsToDefinedFunctions = 0;
/// Number of direct calls to inline assembly.
int64_t InlineAssemblyCalls = 0;
/// Number of calls of type InvokeInst.
int64_t Invokes = 0;
/// Target-specific flat address space.
unsigned FlatAddrspace;
/// Number of flat address space memory accesses (via load, store, etc.).
int64_t FlatAddrspaceAccesses = 0;
};
} // end anonymous namespace
static void identifyCallee(OptimizationRemark &R, const Module *M,
const Value *V, StringRef Kind = "") {
SmallString<100> Name; // might be function name or asm expression
if (const Function *F = dyn_cast<Function>(V)) {
if (auto *SubProgram = F->getSubprogram()) {
if (SubProgram->isArtificial())
R << "artificial ";
Name = SubProgram->getName();
}
}
if (Name.empty()) {
raw_svector_ostream OS(Name);
V->printAsOperand(OS, /*PrintType=*/false, M);
}
if (!Kind.empty())
R << Kind << " ";
R << "'" << Name << "'";
}
static void identifyFunction(OptimizationRemark &R, const Function &F) {
identifyCallee(R, F.getParent(), &F, "function");
}
static void remarkAlloca(OptimizationRemarkEmitter &ORE, const Function &Caller,
const AllocaInst &Alloca,
TypeSize::ScalarTy StaticSize) {
ORE.emit([&] {
StringRef DbgName;
DebugLoc Loc;
bool Artificial = false;
auto DVRs = findDVRDeclares(&const_cast<AllocaInst &>(Alloca));
if (!DVRs.empty()) {
const DbgVariableRecord &DVR = **DVRs.begin();
DbgName = DVR.getVariable()->getName();
Loc = DVR.getDebugLoc();
Artificial = DVR.Variable->isArtificial();
}
OptimizationRemark R(DEBUG_TYPE, "Alloca", DiagnosticLocation(Loc),
Alloca.getParent());
R << "in ";
identifyFunction(R, Caller);
R << ", ";
if (Artificial)
R << "artificial ";
SmallString<20> ValName;
raw_svector_ostream OS(ValName);
Alloca.printAsOperand(OS, /*PrintType=*/false, Caller.getParent());
R << "alloca ('" << ValName << "') ";
if (!DbgName.empty())
R << "for '" << DbgName << "' ";
else
R << "without debug info ";
R << "with ";
if (StaticSize)
R << "static size of " << itostr(StaticSize) << " bytes";
else
R << "dynamic size";
return R;
});
}
static void remarkCall(OptimizationRemarkEmitter &ORE, const Function &Caller,
const CallBase &Call, StringRef CallKind,
StringRef RemarkKind) {
ORE.emit([&] {
OptimizationRemark R(DEBUG_TYPE, RemarkKind, &Call);
R << "in ";
identifyFunction(R, Caller);
R << ", " << CallKind << ", callee is ";
identifyCallee(R, Caller.getParent(), Call.getCalledOperand());
return R;
});
}
static void remarkFlatAddrspaceAccess(OptimizationRemarkEmitter &ORE,
const Function &Caller,
const Instruction &Inst) {
ORE.emit([&] {
OptimizationRemark R(DEBUG_TYPE, "FlatAddrspaceAccess", &Inst);
R << "in ";
identifyFunction(R, Caller);
if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(&Inst)) {
R << ", '" << II->getCalledFunction()->getName() << "' call";
} else {
R << ", '" << Inst.getOpcodeName() << "' instruction";
}
if (!Inst.getType()->isVoidTy()) {
SmallString<20> Name;
raw_svector_ostream OS(Name);
Inst.printAsOperand(OS, /*PrintType=*/false, Caller.getParent());
R << " ('" << Name << "')";
}
R << " accesses memory in flat address space";
return R;
});
}
void KernelInfo::updateForBB(const BasicBlock &BB,
OptimizationRemarkEmitter &ORE) {
const Function &F = *BB.getParent();
const Module &M = *F.getParent();
const DataLayout &DL = M.getDataLayout();
for (const Instruction &I : BB.instructionsWithoutDebug()) {
if (const AllocaInst *Alloca = dyn_cast<AllocaInst>(&I)) {
++Allocas;
TypeSize::ScalarTy StaticSize = 0;
if (std::optional<TypeSize> Size = Alloca->getAllocationSize(DL)) {
StaticSize = Size->getFixedValue();
assert(StaticSize <=
(TypeSize::ScalarTy)std::numeric_limits<int64_t>::max());
AllocasStaticSizeSum += StaticSize;
} else {
++AllocasDyn;
}
remarkAlloca(ORE, F, *Alloca, StaticSize);
} else if (const CallBase *Call = dyn_cast<CallBase>(&I)) {
SmallString<40> CallKind;
SmallString<40> RemarkKind;
if (Call->isIndirectCall()) {
++IndirectCalls;
CallKind += "indirect";
RemarkKind += "Indirect";
} else {
++DirectCalls;
CallKind += "direct";
RemarkKind += "Direct";
}
if (isa<InvokeInst>(Call)) {
++Invokes;
CallKind += " invoke";
RemarkKind += "Invoke";
} else {
CallKind += " call";
RemarkKind += "Call";
}
if (!Call->isIndirectCall()) {
if (const Function *Callee = Call->getCalledFunction()) {
if (!Callee->isIntrinsic() && !Callee->isDeclaration()) {
++DirectCallsToDefinedFunctions;
CallKind += " to defined function";
RemarkKind += "ToDefinedFunction";
}
} else if (Call->isInlineAsm()) {
++InlineAssemblyCalls;
CallKind += " to inline assembly";
RemarkKind += "ToInlineAssembly";
}
}
remarkCall(ORE, F, *Call, CallKind, RemarkKind);
if (const AnyMemIntrinsic *MI = dyn_cast<AnyMemIntrinsic>(Call)) {
if (MI->getDestAddressSpace() == FlatAddrspace) {
++FlatAddrspaceAccesses;
remarkFlatAddrspaceAccess(ORE, F, I);
} else if (const AnyMemTransferInst *MT =
dyn_cast<AnyMemTransferInst>(MI)) {
if (MT->getSourceAddressSpace() == FlatAddrspace) {
++FlatAddrspaceAccesses;
remarkFlatAddrspaceAccess(ORE, F, I);
}
}
}
} else if (const LoadInst *Load = dyn_cast<LoadInst>(&I)) {
if (Load->getPointerAddressSpace() == FlatAddrspace) {
++FlatAddrspaceAccesses;
remarkFlatAddrspaceAccess(ORE, F, I);
}
} else if (const StoreInst *Store = dyn_cast<StoreInst>(&I)) {
if (Store->getPointerAddressSpace() == FlatAddrspace) {
++FlatAddrspaceAccesses;
remarkFlatAddrspaceAccess(ORE, F, I);
}
} else if (const AtomicRMWInst *At = dyn_cast<AtomicRMWInst>(&I)) {
if (At->getPointerAddressSpace() == FlatAddrspace) {
++FlatAddrspaceAccesses;
remarkFlatAddrspaceAccess(ORE, F, I);
}
} else if (const AtomicCmpXchgInst *At = dyn_cast<AtomicCmpXchgInst>(&I)) {
if (At->getPointerAddressSpace() == FlatAddrspace) {
++FlatAddrspaceAccesses;
remarkFlatAddrspaceAccess(ORE, F, I);
}
}
}
}
static void remarkProperty(OptimizationRemarkEmitter &ORE, const Function &F,
StringRef Name, int64_t Value) {
ORE.emit([&] {
OptimizationRemark R(DEBUG_TYPE, Name, &F);
R << "in ";
identifyFunction(R, F);
R << ", " << Name << " = " << itostr(Value);
return R;
});
}
static std::optional<int64_t> parseFnAttrAsInteger(Function &F,
StringRef Name) {
if (!F.hasFnAttribute(Name))
return std::nullopt;
return F.getFnAttributeAsParsedInteger(Name);
}
void KernelInfo::emitKernelInfo(Function &F, FunctionAnalysisManager &FAM,
TargetMachine *TM) {
KernelInfo KI;
TargetTransformInfo &TheTTI = FAM.getResult<TargetIRAnalysis>(F);
KI.FlatAddrspace = TheTTI.getFlatAddressSpace();
// Record function properties.
KI.ExternalNotKernel = F.hasExternalLinkage() && !F.hasKernelCallingConv();
for (StringRef Name : {"omp_target_num_teams", "omp_target_thread_limit"}) {
if (auto Val = parseFnAttrAsInteger(F, Name))
KI.LaunchBounds.push_back({Name, *Val});
}
TheTTI.collectKernelLaunchBounds(F, KI.LaunchBounds);
auto &ORE = FAM.getResult<OptimizationRemarkEmitterAnalysis>(F);
for (const auto &BB : F)
KI.updateForBB(BB, ORE);
#define REMARK_PROPERTY(PROP_NAME) \
remarkProperty(ORE, F, #PROP_NAME, KI.PROP_NAME)
REMARK_PROPERTY(ExternalNotKernel);
for (auto LB : KI.LaunchBounds)
remarkProperty(ORE, F, LB.first, LB.second);
REMARK_PROPERTY(Allocas);
REMARK_PROPERTY(AllocasStaticSizeSum);
REMARK_PROPERTY(AllocasDyn);
REMARK_PROPERTY(DirectCalls);
REMARK_PROPERTY(IndirectCalls);
REMARK_PROPERTY(DirectCallsToDefinedFunctions);
REMARK_PROPERTY(InlineAssemblyCalls);
REMARK_PROPERTY(Invokes);
REMARK_PROPERTY(FlatAddrspaceAccesses);
#undef REMARK_PROPERTY
}
PreservedAnalyses KernelInfoPrinter::run(Function &F,
FunctionAnalysisManager &AM) {
// Skip it if remarks are not enabled as it will do nothing useful.
if (F.getContext().getDiagHandlerPtr()->isPassedOptRemarkEnabled(DEBUG_TYPE))
KernelInfo::emitKernelInfo(F, AM, TM);
return PreservedAnalyses::all();
}