| #include "ThinLtoInstrumentationLayer.h" |
| |
| #include "llvm/IR/BasicBlock.h" |
| #include "llvm/IR/Constants.h" |
| #include "llvm/IR/Function.h" |
| #include "llvm/IR/Module.h" |
| #include "llvm/IR/Type.h" |
| #include "llvm/Support/Debug.h" |
| #include "llvm/Support/Process.h" |
| |
| #include <cstdlib> |
| |
| #define DEBUG_TYPE "thinltojit" |
| |
| namespace llvm { |
| namespace orc { |
| |
| // TODO: Fixed set of flags may not always be enough. Make this expandable. |
| void ThinLtoInstrumentationLayer::allocateDiscoveryFlags(unsigned MinFlags) { |
| // Round up to full memory pages. |
| unsigned PageSize = sys::Process::getPageSizeEstimate(); |
| unsigned NumPagesEach = (MinFlags + (PageSize - 1)) / PageSize; |
| unsigned NumPagesTotal = 2 * NumPagesEach; |
| assert(isPowerOf2_64(PageSize) && "Adjust aligned memory alloc below"); |
| |
| // Allocate one more page to make up for size loss due to alignment. |
| void *Storage = std::calloc(NumPagesTotal + 1, PageSize); |
| uint64_t StorageAddr = reinterpret_cast<uint64_t>(Storage); |
| uint64_t PageSizeDecr = PageSize - 1; |
| uint64_t AlignedAddr = ((StorageAddr + PageSizeDecr) & ~PageSizeDecr); |
| uint64_t Diff = AlignedAddr - StorageAddr; |
| |
| // For each flag we allocate one byte in each location: Incoming and Handled. |
| // TODO: 'Handled' could be a bitset, but size must be dynamic |
| NumFlagsUsed.store(0); |
| NumFlagsAllocated = NumPagesEach * PageSize; |
| FlagsStorage = static_cast<uint8_t *>(Storage); |
| FlagsIncoming = reinterpret_cast<Flag *>(FlagsStorage + Diff); |
| FlagsHandled = FlagsIncoming + NumFlagsAllocated; |
| |
| static_assert(sizeof(FlagsIncoming[0]) == sizeof(uint8_t), "Flags are bytes"); |
| assert(reinterpret_cast<uint64_t>(FlagsIncoming) % PageSize == 0); |
| assert(reinterpret_cast<uint64_t>(FlagsHandled) % PageSize == 0); |
| assert(NumFlagsAllocated >= MinFlags); |
| } |
| |
| // Reserve a new set of discovery flags and return the index of the first one. |
| unsigned ThinLtoInstrumentationLayer::reserveDiscoveryFlags(unsigned Count) { |
| #ifndef NDEBUG |
| for (unsigned i = NumFlagsUsed.load(), e = i + Count; i < e; i++) { |
| assert(FlagsIncoming[i] == Clear); |
| } |
| #endif |
| |
| assert(Count > 0); |
| return NumFlagsUsed.fetch_add(Count); |
| } |
| |
| void ThinLtoInstrumentationLayer::registerDiscoveryFlagOwners( |
| std::vector<GlobalValue::GUID> Guids, unsigned FirstIdx) { |
| unsigned Count = Guids.size(); |
| |
| std::lock_guard<std::mutex> Lock(DiscoveryFlagsInfoLock); |
| for (unsigned i = 0; i < Count; i++) { |
| assert(!FlagOwnersMap.count(FirstIdx + i) && |
| "Flag should not have an owner at this point"); |
| FlagOwnersMap[FirstIdx + i] = Guids[i]; |
| } |
| } |
| |
| std::vector<unsigned> ThinLtoInstrumentationLayer::takeFlagsThatFired() { |
| // This is only effective with the respective Release. |
| FlagsSync.load(std::memory_order_acquire); |
| |
| std::vector<unsigned> Indexes; |
| unsigned NumIndexesUsed = NumFlagsUsed.load(); |
| for (unsigned i = 0; i < NumIndexesUsed; i++) { |
| if (FlagsIncoming[i] == Fired && FlagsHandled[i] == Clear) { |
| FlagsHandled[i] = Fired; |
| Indexes.push_back(i); |
| } |
| } |
| |
| return Indexes; |
| } |
| |
| std::vector<GlobalValue::GUID> |
| ThinLtoInstrumentationLayer::takeFlagOwners(std::vector<unsigned> Indexes) { |
| std::vector<GlobalValue::GUID> ReachedFunctions; |
| std::lock_guard<std::mutex> Lock(DiscoveryFlagsInfoLock); |
| |
| for (unsigned i : Indexes) { |
| auto KV = FlagOwnersMap.find(i); |
| assert(KV != FlagOwnersMap.end()); |
| ReachedFunctions.push_back(KV->second); |
| FlagOwnersMap.erase(KV); |
| } |
| |
| return ReachedFunctions; |
| } |
| |
| void ThinLtoInstrumentationLayer::nudgeIntoDiscovery( |
| std::vector<GlobalValue::GUID> Functions) { |
| unsigned Count = Functions.size(); |
| |
| // Registering synthetic flags in advance. We expect them to get processed |
| // before the respective functions get emitted. If not, the emit() function |
| unsigned FirstFlagIdx = reserveDiscoveryFlags(Functions.size()); |
| registerDiscoveryFlagOwners(std::move(Functions), FirstFlagIdx); |
| |
| // Initialize the flags as fired and force a cache sync, so discovery will |
| // pick them up as soon as possible. |
| for (unsigned i = FirstFlagIdx; i < FirstFlagIdx + Count; i++) { |
| FlagsIncoming[i] = Fired; |
| } |
| if (MemFence & ThinLtoJIT::FenceStaticCode) { |
| FlagsSync.store(0, std::memory_order_release); |
| } |
| |
| LLVM_DEBUG(dbgs() << "Nudged " << Count << " new functions into discovery\n"); |
| } |
| |
| void ThinLtoInstrumentationLayer::emit(MaterializationResponsibility R, |
| ThreadSafeModule TSM) { |
| TSM.withModuleDo([this](Module &M) { |
| std::vector<Function *> FunctionsToInstrument; |
| |
| // We may have discovered ahead of some functions already, but we still |
| // instrument them all. Their notifications steer the future direction of |
| // discovery. |
| for (Function &F : M.getFunctionList()) |
| if (!F.isDeclaration()) |
| FunctionsToInstrument.push_back(&F); |
| |
| if (!FunctionsToInstrument.empty()) { |
| IRBuilder<> B(M.getContext()); |
| std::vector<GlobalValue::GUID> NewDiscoveryRoots; |
| |
| // Flags that fire must have owners registered. We will do it below and |
| // that's fine, because they can only be reached once the code is emitted. |
| unsigned FirstFlagIdx = |
| reserveDiscoveryFlags(FunctionsToInstrument.size()); |
| |
| unsigned NextFlagIdx = FirstFlagIdx; |
| for (Function *F : FunctionsToInstrument) { |
| // TODO: Emitting the write operation into an indirection stub would |
| // allow to skip it once we got the notification. |
| BasicBlock *E = &F->getEntryBlock(); |
| B.SetInsertPoint(BasicBlock::Create( |
| M.getContext(), "NotifyFunctionReachedProlog", F, E)); |
| compileFunctionReachedFlagSetter(B, FlagsIncoming + NextFlagIdx); |
| B.CreateBr(E); |
| |
| std::string GlobalName = GlobalValue::getGlobalIdentifier( |
| F->getName(), F->getLinkage(), M.getSourceFileName()); |
| NewDiscoveryRoots.push_back(GlobalValue::getGUID(GlobalName)); |
| ++NextFlagIdx; |
| } |
| |
| LLVM_DEBUG(dbgs() << "Instrumented " << NewDiscoveryRoots.size() |
| << " new functions in module " << M.getName() << "\n"); |
| |
| // Submit owner info, so the DiscoveryThread can evaluate the flags. |
| registerDiscoveryFlagOwners(std::move(NewDiscoveryRoots), FirstFlagIdx); |
| } |
| }); |
| |
| BaseLayer.emit(std::move(R), std::move(TSM)); |
| } |
| |
| void ThinLtoInstrumentationLayer::compileFunctionReachedFlagSetter( |
| IRBuilder<> &B, Flag *F) { |
| assert(*F == Clear); |
| Type *Int64Ty = Type::getInt64Ty(B.getContext()); |
| |
| // Write one immediate 8bit value to a fixed location in memory. |
| auto FlagAddr = pointerToJITTargetAddress(F); |
| Type *FlagTy = Type::getInt8Ty(B.getContext()); |
| B.CreateStore(ConstantInt::get(FlagTy, Fired), |
| B.CreateIntToPtr(ConstantInt::get(Int64Ty, FlagAddr), |
| FlagTy->getPointerTo())); |
| |
| if (MemFence & ThinLtoJIT::FenceJITedCode) { |
| // Overwrite the sync value with Release ordering. The discovery thread |
| // reads it with Acquire ordering. The actual value doesn't matter. |
| static constexpr bool IsVolatile = true; |
| static constexpr Instruction *NoInsertBefore = nullptr; |
| auto SyncFlagAddr = pointerToJITTargetAddress(&FlagsSync); |
| |
| B.Insert( |
| new StoreInst(ConstantInt::get(Int64Ty, 0), |
| B.CreateIntToPtr(ConstantInt::get(Int64Ty, SyncFlagAddr), |
| Int64Ty->getPointerTo()), |
| IsVolatile, Align(64), AtomicOrdering::Release, |
| SyncScope::System, NoInsertBefore)); |
| } |
| } |
| |
| void ThinLtoInstrumentationLayer::dump(raw_ostream &OS) { |
| OS << "Discovery flags stats\n"; |
| |
| unsigned NumFlagsFired = 0; |
| for (unsigned i = 0; i < NumFlagsAllocated; i++) { |
| if (FlagsIncoming[i] == Fired) |
| ++NumFlagsFired; |
| } |
| OS << "Alloc: " << format("%6.d", NumFlagsAllocated) << "\n"; |
| OS << "Issued: " << format("%6.d", NumFlagsUsed.load()) << "\n"; |
| OS << "Fired: " << format("%6.d", NumFlagsFired) << "\n"; |
| |
| unsigned RemainingFlagOwners = 0; |
| for (const auto &_ : FlagOwnersMap) { |
| ++RemainingFlagOwners; |
| (void)_; |
| } |
| OS << "\nFlagOwnersMap has " << RemainingFlagOwners |
| << " remaining entries.\n"; |
| } |
| |
| ThinLtoInstrumentationLayer::~ThinLtoInstrumentationLayer() { |
| std::free(FlagsStorage); |
| } |
| |
| } // namespace orc |
| } // namespace llvm |