blob: 9f9d59acba2140101fdd930f6593242fc6c2bf55 [file] [log] [blame] [edit]
//===-- Main entry into the loader interface ------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This utility is used to launch standard programs onto the GPU in conjunction
// with the LLVM 'libc' project. It is designed to mimic a standard emulator
// workflow, allowing for unit tests to be run on the GPU directly.
//
//===----------------------------------------------------------------------===//
#include "llvm-gpu-loader.h"
#include "llvm/BinaryFormat/Magic.h"
#include "llvm/Object/ELF.h"
#include "llvm/Object/ELFObjectFile.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Error.h"
#include "llvm/Support/FileSystem.h"
#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/Path.h"
#include "llvm/Support/Signals.h"
#include "llvm/Support/WithColor.h"
#include "llvm/TargetParser/Triple.h"
#include <cerrno>
#include <cstdio>
#include <cstdlib>
#include <cstring>
#include <string>
using namespace llvm;
static cl::OptionCategory LoaderCategory("loader options");
static cl::opt<bool> Help("h", cl::desc("Alias for -help"), cl::Hidden,
cl::cat(LoaderCategory));
static cl::opt<unsigned>
ThreadsX("threads-x", cl::desc("Number of threads in the 'x' dimension"),
cl::init(1), cl::cat(LoaderCategory));
static cl::opt<unsigned>
ThreadsY("threads-y", cl::desc("Number of threads in the 'y' dimension"),
cl::init(1), cl::cat(LoaderCategory));
static cl::opt<unsigned>
ThreadsZ("threads-z", cl::desc("Number of threads in the 'z' dimension"),
cl::init(1), cl::cat(LoaderCategory));
static cl::alias threads("threads", cl::aliasopt(ThreadsX),
cl::desc("Alias for --threads-x"),
cl::cat(LoaderCategory));
static cl::opt<unsigned>
BlocksX("blocks-x", cl::desc("Number of blocks in the 'x' dimension"),
cl::init(1), cl::cat(LoaderCategory));
static cl::opt<unsigned>
BlocksY("blocks-y", cl::desc("Number of blocks in the 'y' dimension"),
cl::init(1), cl::cat(LoaderCategory));
static cl::opt<unsigned>
BlocksZ("blocks-z", cl::desc("Number of blocks in the 'z' dimension"),
cl::init(1), cl::cat(LoaderCategory));
static cl::alias Blocks("blocks", cl::aliasopt(BlocksX),
cl::desc("Alias for --blocks-x"),
cl::cat(LoaderCategory));
static cl::opt<std::string> File(cl::Positional, cl::Required,
cl::desc("<gpu executable>"),
cl::cat(LoaderCategory));
static cl::list<std::string> Args(cl::ConsumeAfter,
cl::desc("<program arguments>..."),
cl::cat(LoaderCategory));
// The arguments to the '_begin' kernel.
struct BeginArgs {
int Argc;
void *Argv;
void *Envp;
};
// The arguments to the '_start' kernel.
struct StartArgs {
int Argc;
void *Argv;
void *Envp;
void *Ret;
};
// The arguments to the '_end' kernel.
struct EndArgs {};
[[noreturn]] static void handleError(Error E) {
outs().flush();
logAllUnhandledErrors(std::move(E), WithColor::error(errs(), "loader"));
exit(EXIT_FAILURE);
}
[[noreturn]] static void handleError(ol_result_t Err, unsigned Line) {
fprintf(stderr, "%s:%d %s\n", __FILE__, Line, Err->Details);
exit(EXIT_FAILURE);
}
#define OFFLOAD_ERR(X) \
if (ol_result_t Err = X) \
handleError(Err, __LINE__);
static void *copyArgumentVector(int Argc, const char **Argv,
ol_device_handle_t Device) {
size_t ArgSize = sizeof(char *) * (Argc + 1);
size_t StringLen = 0;
for (int i = 0; i < Argc; ++i)
StringLen += strlen(Argv[i]) + 1;
// We allocate enough space for a null terminated array and all the strings.
void *DevArgv;
OFFLOAD_ERR(
olMemAlloc(Device, OL_ALLOC_TYPE_HOST, ArgSize + StringLen, &DevArgv));
if (!DevArgv)
handleError(
createStringError("Failed to allocate memory for environment."));
// Store the strings linerally in the same memory buffer.
void *DevString = reinterpret_cast<uint8_t *>(DevArgv) + ArgSize;
for (int i = 0; i < Argc; ++i) {
size_t size = strlen(Argv[i]) + 1;
std::memcpy(DevString, Argv[i], size);
static_cast<void **>(DevArgv)[i] = DevString;
DevString = reinterpret_cast<uint8_t *>(DevString) + size;
}
// Ensure the vector is null terminated.
reinterpret_cast<void **>(DevArgv)[Argc] = nullptr;
return DevArgv;
}
void *copyEnvironment(const char **Envp, ol_device_handle_t Device) {
int Envc = 0;
for (const char **Env = Envp; *Env != 0; ++Env)
++Envc;
return copyArgumentVector(Envc, Envp, Device);
}
ol_device_handle_t findDevice(MemoryBufferRef Binary) {
ol_device_handle_t Device = nullptr;
std::tuple Data = std::make_tuple(&Device, &Binary);
OFFLOAD_ERR(olIterateDevices(
[](ol_device_handle_t Device, void *UserData) {
auto &[Output, Binary] = *reinterpret_cast<decltype(Data) *>(UserData);
bool IsValid = false;
OFFLOAD_ERR(olIsValidBinary(Device, Binary->getBufferStart(),
Binary->getBufferSize(), &IsValid));
if (!IsValid)
return true;
*Output = Device;
return false;
},
&Data));
return Device;
}
ol_device_handle_t getHostDevice() {
ol_device_handle_t Device;
OFFLOAD_ERR(olIterateDevices(
[](ol_device_handle_t Device, void *UserData) {
ol_platform_handle_t Platform;
olGetDeviceInfo(Device, OL_DEVICE_INFO_PLATFORM, sizeof(Platform),
&Platform);
ol_platform_backend_t Backend;
olGetPlatformInfo(Platform, OL_PLATFORM_INFO_BACKEND, sizeof(Backend),
&Backend);
auto &Output = *reinterpret_cast<decltype(Device) *>(UserData);
if (Backend == OL_PLATFORM_BACKEND_HOST) {
Output = Device;
return false;
}
return true;
},
&Device));
return Device;
}
template <typename Args>
void launchKernel(ol_queue_handle_t Queue, ol_device_handle_t Device,
ol_program_handle_t Program, const char *Name,
ol_kernel_launch_size_args_t LaunchArgs, Args &KernelArgs) {
ol_symbol_handle_t Kernel;
OFFLOAD_ERR(olGetSymbol(Program, Name, OL_SYMBOL_KIND_KERNEL, &Kernel));
OFFLOAD_ERR(olLaunchKernel(Queue, Device, Kernel, &KernelArgs,
std::is_empty_v<Args> ? 0 : sizeof(Args),
&LaunchArgs));
}
int main(int argc, const char **argv, const char **envp) {
sys::PrintStackTraceOnErrorSignal(argv[0]);
cl::HideUnrelatedOptions(LoaderCategory);
cl::ParseCommandLineOptions(
argc, argv,
"A utility used to launch unit tests built for a GPU target. This is\n"
"intended to provide an interface similar to cross-compiling "
"emulators\n");
if (Help) {
cl::PrintHelpMessage();
return EXIT_SUCCESS;
}
if (Error Err = loadLLVMOffload())
handleError(std::move(Err));
ErrorOr<std::unique_ptr<MemoryBuffer>> ImageOrErr =
MemoryBuffer::getFileOrSTDIN(File);
if (std::error_code EC = ImageOrErr.getError())
handleError(errorCodeToError(EC));
MemoryBufferRef Image = **ImageOrErr;
ol_platform_backend_t Backend;
ol_init_args_t InitArgs = OL_INIT_ARGS_INIT;
file_magic Magic = identify_magic(Image.getBuffer());
if (Magic >= file_magic::elf && Magic <= file_magic::elf_core) {
Expected<object::ELFFile<object::ELF64LE>> ElfOrErr =
object::ELFFile<object::ELF64LE>::create(Image.getBuffer());
if (!ElfOrErr)
handleError(ElfOrErr.takeError());
switch (ElfOrErr->getHeader().e_machine) {
case ELF::EM_AMDGPU:
Backend = OL_PLATFORM_BACKEND_AMDGPU;
break;
case ELF::EM_CUDA:
Backend = OL_PLATFORM_BACKEND_CUDA;
break;
default:
handleError(createStringError(
"unhandled ELF architecture: %s",
ELF::convertEMachineToArchName(ElfOrErr->getHeader().e_machine)
.data()));
}
InitArgs.NumPlatforms = 1;
InitArgs.Platforms = &Backend;
}
SmallVector<const char *> NewArgv = {File.c_str()};
llvm::transform(Args, std::back_inserter(NewArgv),
[](const std::string &Arg) { return Arg.c_str(); });
OFFLOAD_ERR(olInit(&InitArgs));
ol_device_handle_t Device = findDevice(Image);
if (!Device)
handleError(createStringError("No compatible device was found"));
ol_device_handle_t Host = getHostDevice();
assert(Host && "Host device should always be present");
ol_program_handle_t Program;
OFFLOAD_ERR(olCreateProgram(Device, Image.getBufferStart(),
Image.getBufferSize(), &Program));
ol_queue_handle_t Queue;
OFFLOAD_ERR(olCreateQueue(Device, &Queue));
int DevArgc = static_cast<int>(NewArgv.size());
void *DevArgv = copyArgumentVector(NewArgv.size(), NewArgv.begin(), Device);
void *DevEnvp = copyEnvironment(envp, Device);
void *DevRet;
OFFLOAD_ERR(olMemAlloc(Device, OL_ALLOC_TYPE_DEVICE, sizeof(int), &DevRet));
ol_kernel_launch_size_args_t BeginLaunch{1, {1, 1, 1}, {1, 1, 1}, 0};
BeginArgs BeginArgs = {DevArgc, DevArgv, DevEnvp};
launchKernel(Queue, Device, Program, "_begin", BeginLaunch, BeginArgs);
OFFLOAD_ERR(olSyncQueue(Queue));
uint32_t Dims = (BlocksZ > 1) ? 3 : (BlocksY > 1) ? 2 : 1;
ol_kernel_launch_size_args_t StartLaunch{Dims,
{BlocksX, BlocksY, BlocksZ},
{ThreadsX, ThreadsY, ThreadsZ},
/*SharedMemBytes=*/0};
StartArgs StartArgs = {DevArgc, DevArgv, DevEnvp, DevRet};
launchKernel(Queue, Device, Program, "_start", StartLaunch, StartArgs);
ol_kernel_launch_size_args_t EndLaunch{1, {1, 1, 1}, {1, 1, 1}, 0};
EndArgs EndArgs = {};
launchKernel(Queue, Device, Program, "_end", EndLaunch, EndArgs);
int Ret;
OFFLOAD_ERR(olMemcpy(Queue, &Ret, Host, DevRet, Device, sizeof(int)));
OFFLOAD_ERR(olSyncQueue(Queue));
OFFLOAD_ERR(olMemFree(DevArgv));
OFFLOAD_ERR(olMemFree(DevEnvp));
OFFLOAD_ERR(olDestroyQueue(Queue));
OFFLOAD_ERR(olDestroyProgram(Program));
OFFLOAD_ERR(olShutDown());
return Ret;
}