blob: 65a861cc8a0cc6a8db7f23facd987067e604a66e [file]
//===- RecordReplay.h - Target independent kernel record replay interface -===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
//===----------------------------------------------------------------------===//
#ifndef OPENMP_LIBOMPTARGET_PLUGINS_NEXTGEN_COMMON_RECORDREPLAY_H
#define OPENMP_LIBOMPTARGET_PLUGINS_NEXTGEN_COMMON_RECORDREPLAY_H
#include <chrono>
#include <cstddef>
#include <cstdint>
#include <filesystem>
#include <mutex>
#include <unordered_set>
#include "Shared/APITypes.h"
#include "Shared/EnvironmentVar.h"
#include "Shared/Utils.h"
#include "OffloadError.h"
#include "llvm/ADT/Hashing.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StableHashing.h"
#include "llvm/Support/Error.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MemoryBufferRef.h"
#include "llvm/Support/raw_ostream.h"
namespace llvm {
namespace omp {
namespace target {
namespace plugin {
struct GenericKernelTy;
struct GenericDeviceTy;
struct RecordReplayTy {
protected:
struct InstanceTy;
public:
/// Describes the state of the record replay mechanism.
enum StatusTy { Deactivated = 0, Recording, Replaying };
/// Describes the format of the recording and replaying.
enum FormatTy { Native = 0 };
/// Describes the file types that can be recorded.
enum FileTy {
PrologueSnapshot = 0,
EpilogueSnapshot,
Descriptor,
Globals,
Program
};
struct HandleTy {
const InstanceTy *Instance = nullptr;
bool Active = false;
};
protected:
/// Address and size of record replay memory space.
void *StartAddr = nullptr;
uint64_t TotalSize = 0;
uint64_t CurrentSize = 0;
/// Status of the record or replay.
StatusTy Status;
/// The path where to store all recorded files.
std::filesystem::path OutputDirectory;
/// Whether a memory snapshot should be recorded a kernel execution.
bool SaveOutput;
/// Whether a report should be emitted afther the recording.
bool EmitReport;
/// Reference to the corresponding device.
GenericDeviceTy &Device;
/// The information for a global.
struct GlobalEntryTy {
std::string Name;
uint64_t Size;
void *Addr;
};
/// List of all globals mapped to the device.
SmallVector<GlobalEntryTy> GlobalEntries;
/// Mutex that protects dynamic allocations and globals.
std::mutex AllocationLock;
// An instance of a kernel record replay.
struct InstanceTy {
/// Reference to the kernel.
const GenericKernelTy &Kernel;
/// The launch configuration parameters.
uint32_t NumTeams = 0;
uint32_t NumThreads = 0;
uint32_t SharedMemorySize = 0;
/// The hashes representing the kernel and the launch configuration.
size_t KernelHash = 0;
size_t LaunchConfigHash = 0;
/// When replaying, the entity requesting the replay may also need further
/// information about the the kernel's replay, such as the snapshot file.
KernelReplayOutcomeTy *ReplayOutcome = nullptr;
/// The begin and end time points of the kernel execution.
using ClockTy = std::chrono::steady_clock;
mutable std::chrono::time_point<ClockTy> BeginTime, EndTime;
/// The number of occurrences during the execution.
mutable size_t Occurrences = 0;
InstanceTy(const GenericKernelTy &Kernel, uint32_t NumTeams,
uint32_t NumThreads, uint32_t SharedMemorySize,
KernelReplayOutcomeTy *ReplayOutcome);
bool operator==(const InstanceTy &Other) const {
return (KernelHash == Other.KernelHash &&
LaunchConfigHash == Other.LaunchConfigHash &&
NumTeams == Other.NumTeams && NumThreads == Other.NumThreads &&
SharedMemorySize == Other.SharedMemorySize);
}
/// Record the begin and ending of the kernel execution.
void recordBeginTime() const { BeginTime = ClockTy::now(); }
void recordEndTime() const { EndTime = ClockTy::now(); }
/// Get the kernel execution time in nanoseconds.
uint64_t getRecordedTimeNs() const {
using DurationNsTy = std::chrono::duration<uint64_t, std::nano>;
return std::chrono::duration_cast<DurationNsTy>(EndTime - BeginTime)
.count();
}
};
struct InstanceHasher {
std::size_t operator()(const InstanceTy &I) const {
llvm::stable_hash H =
llvm::stable_hash_combine(I.KernelHash, I.LaunchConfigHash);
return static_cast<std::size_t>(H);
}
};
/// Tracker of record replay instances.
std::unordered_set<InstanceTy, InstanceHasher> Instances;
std::mutex InstancesLock;
public:
RecordReplayTy(StatusTy Status, StringRef OutputDirectoryStr, bool SaveOutput,
bool EmitReport, GenericDeviceTy &Device)
: Status(Status), SaveOutput(SaveOutput), EmitReport(EmitReport),
Device(Device) {
if (OutputDirectoryStr == "")
OutputDirectory = std::filesystem::current_path();
else
OutputDirectory = OutputDirectoryStr.data();
}
virtual ~RecordReplayTy() = default;
/// Initialize kernel record replay for the corresponding device.
Error init(uint64_t MemSize, void *VAddr);
Error deinit();
/// Emit the information of each registered instance.
Error emitInstanceReport();
bool isRecording() const { return Status == StatusTy::Recording; }
bool isReplaying() const { return Status == StatusTy::Replaying; }
bool isRecordingOrReplaying() const { return isRecording() || isReplaying(); }
bool shouldRecordPrologue() const { return isRecording(); }
bool shouldRecordEpilogue() const {
return isRecordingOrReplaying() && SaveOutput;
}
/// Add information about a global.
void addGlobal(const char *Name, uint64_t Size, void *Addr) {
std::lock_guard<std::mutex> Lock(AllocationLock);
GlobalEntries.emplace_back(GlobalEntryTy{Name, Size, Addr});
}
/// Register a record replay instance, record the prologue data if necessary,
/// and return the instance's handle. The prologue is the phase just before
/// executing the kernel. This phase can include the recording of memory
/// snapshot, the record descriptor and the globals. When replaying, only the
/// instance is registered.
Expected<HandleTy>
recordPrologue(const GenericKernelTy &Kernel, const KernelArgsTy &KernelArgs,
const KernelExtraArgsTy *KernelExtraArgs,
const KernelLaunchParamsTy &LaunchParams, uint32_t NumTeams[3],
uint32_t NumThreads[3], uint32_t SharedMemorySize);
/// Record the epilogue if necessary, which can include the memory snapshot
/// when recording or replaying.
Error recordEpilogue(const GenericKernelTy &Kernel, HandleTy Handle);
/// Get a string with the filename.
SmallString<128> getFilename(const InstanceTy &Instance, FileTy FileType,
bool IncludeDirectory = true) {
return getFilenameImpl(Instance, FileType, IncludeDirectory);
}
/// Allocates device memory from the record replay space.
Expected<void *> allocate(uint64_t Size);
Error deallocate(void *Ptr);
private:
/// Register an instance and return a reference and whether it was registered
/// as a new instance.
std::pair<const InstanceTy &, bool>
registerInstance(const GenericKernelTy &Kernel, uint32_t NumTeams,
uint32_t NumThreads, uint32_t SharedMemorySize,
KernelReplayOutcomeTy *ReplayOutcome);
/// Unregister an instance once it has been replayed. Instances during
/// recording cannot be unregistered. Accessing the instance beyond this point
/// is invalid.
Error unregisterInstance(const InstanceTy &Instance);
/// Populate the replay outcome struct to forward some replay information.
void populateReplayOutcome(const InstanceTy &Instance,
KernelReplayOutcomeTy &Outcome);
/// Record the prologue data.
virtual Error
recordPrologueImpl(const GenericKernelTy &Kernel, const InstanceTy &Instance,
const KernelArgsTy &KernelArgs,
const KernelLaunchParamsTy &LaunchParams) = 0;
/// Record the epilogue data.
virtual Error recordEpilogueImpl(const GenericKernelTy &Kernel,
const InstanceTy &Instance) = 0;
/// Record the descriptor of the kernel.
virtual Error recordDescImpl(const GenericKernelTy &Kernel,
const InstanceTy &Instance,
const KernelArgsTy &KernelArgs,
const KernelLaunchParamsTy &LaunchParams) = 0;
/// Get a string with the filename.
virtual SmallString<128> getFilenameImpl(const InstanceTy &Instance,
FileTy FileType,
bool IncludeDirectory) = 0;
};
/// The native kernel record replay support.
struct NativeRecordReplayTy : public RecordReplayTy {
NativeRecordReplayTy(StatusTy Status, StringRef OutputDirectoryStr,
bool SaveOutput, bool EmitReport,
GenericDeviceTy &Device)
: RecordReplayTy(Status, OutputDirectoryStr, SaveOutput, EmitReport,
Device) {}
private:
Error recordPrologueImpl(const GenericKernelTy &Kernel,
const InstanceTy &Instance,
const KernelArgsTy &KernelArgs,
const KernelLaunchParamsTy &LaunchParams) override;
Error recordEpilogueImpl(const GenericKernelTy &Kernel,
const InstanceTy &Instance) override;
Error recordDescImpl(const GenericKernelTy &Kernel,
const InstanceTy &Instance,
const KernelArgsTy &KernelArgs,
const KernelLaunchParamsTy &LaunchParams) override;
/// Get a string with the filename.
SmallString<128> getFilenameImpl(const InstanceTy &Instance, FileTy FileType,
bool IncludeDirectory) override;
/// Get the extension for a recording file type.
StringRef getExtension(FileTy FileType);
/// Record a memory snapshot to a file.
Error recordSnapshot(StringRef Filename);
/// Record the globals to a file.
Error recordGlobals(StringRef Filename);
/// Record the device image to a file.
Error recordImage(const GenericKernelTy &Kernel, StringRef Filename);
};
} // namespace plugin
} // namespace target
} // namespace omp
} // namespace llvm
#endif // OPENMP_LIBOMPTARGET_PLUGINS_COMMON_RECORDREPLAY_H