blob: a1e58e4e57f47a66f24009fe3b9915172ea7f9b4 [file]
//===- Library.cpp - Library calls for llubi ------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This file implements common libcalls for llubi.
//
//===----------------------------------------------------------------------===//
#include "Library.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/IR/InstrTypes.h"
#include "llvm/Support/Format.h"
#include "llvm/Support/raw_ostream.h"
namespace llvm::ubi {
static uint64_t getMaxAlign(const DataLayout &DL) {
// Return an alignment of 16 for 64-bit platforms, and 8 for 32-bit ones.
return DL.getPointerABIAlignment(0).value() >= 8 ? 16 : 8;
}
Library::Library(Context &Ctx, EventHandler &Handler, const DataLayout &DL,
ExecutorBase &Executor)
: Ctx(Ctx), Handler(Handler), DL(DL), Executor(Executor) {}
std::optional<std::string> Library::readStringFromMemory(const Pointer &Ptr) {
auto *MO = Ptr.getMemoryObject();
if (!MO) {
Executor.reportImmediateUB()
<< "Invalid memory access via a pointer with nullary provenance.";
return std::nullopt;
}
std::string Result;
const APInt &Address = Ptr.address();
uint64_t Offset = 0;
while (true) {
auto ValidOffset =
Executor.verifyMemAccess(*MO, Address + Offset, 1, Align(1), false);
if (!ValidOffset)
return std::nullopt;
Byte B = (*MO)[*ValidOffset];
if (B.ConcreteMask != 0xFF) {
Executor.reportImmediateUB()
<< "Read uninitialized or poison memory while "
"parsing C-string at offset "
<< Offset << ".";
return std::nullopt;
}
if (B.Value == 0)
break;
Result.push_back(static_cast<char>(B.Value));
++Offset;
}
return Result;
}
AnyValue Library::executeMalloc(StringRef Name, Type *Type,
ArrayRef<AnyValue> Args,
MemAllocKind AllocKind) {
assert((AllocKind == MemAllocKind::Malloc || AllocKind == MemAllocKind::New ||
AllocKind == MemAllocKind::NewArray) &&
"Unexpected MemAllocKind for malloc()/new/new[]");
const auto &SizeVal = Args[0];
const uint64_t AllocSize = SizeVal.asInteger().getZExtValue();
const IntrusiveRefCntPtr<MemoryObject> Obj =
Ctx.allocate(AllocSize, getMaxAlign(DL), Name, 0,
MemInitKind::Uninitialized, AllocKind);
if (!Obj) {
if (AllocKind == MemAllocKind::New || AllocKind == MemAllocKind::NewArray) {
// FIXME: As llubi doesn't support stack unwinding yet, we report an error
// when new/new[] fails.
Executor.reportError() << "Insufficient heap space.";
return AnyValue::poison();
}
return AnyValue::getNullValue(Ctx, Type);
}
return Ctx.deriveFromMemoryObject(Obj);
}
AnyValue Library::executeCalloc(StringRef Name, Type *Type,
ArrayRef<AnyValue> Args,
MemAllocKind AllocKind) {
assert(AllocKind == MemAllocKind::Malloc &&
"Unexpected MemAllocKind for calloc()");
const auto &CountVal = Args[0];
const auto &SizeVal = Args[1];
const APInt &Count = CountVal.asInteger();
const APInt &Size = SizeVal.asInteger();
bool Overflow = false;
const APInt AllocSize = Count.umul_ov(Size, Overflow);
if (Overflow)
return AnyValue::getNullValue(Ctx, Type);
const IntrusiveRefCntPtr<MemoryObject> Obj =
Ctx.allocate(AllocSize.getLimitedValue(), getMaxAlign(DL), Name, 0,
MemInitKind::Zeroed, AllocKind);
if (!Obj)
return AnyValue::getNullValue(Ctx, Type);
return Ctx.deriveFromMemoryObject(Obj);
}
AnyValue Library::executeFree(ArrayRef<AnyValue> Args) {
const auto &PtrVal = Args[0];
auto &Ptr = PtrVal.asPointer();
// no-op when free is called with a null pointer.
if (Ptr.address().isZero())
return AnyValue();
MemoryObject *Obj = Ptr.getMemoryObject();
if (!Obj) {
Executor.reportImmediateUB()
<< "freeing a pointer with nullary provenance.";
return AnyValue::poison();
}
if (const uint64_t Address = Ptr.address().getZExtValue();
Address != Obj->getAddress()) {
Executor.reportImmediateUB()
<< "freeing a pointer that does not point to "
"the start of an allocation. Pointer address: 0x"
<< Twine::utohexstr(Address) << ", allocation base: 0x"
<< Twine::utohexstr(Obj->getAddress()) << ".";
return AnyValue::poison();
}
if (Obj->getState() == MemoryObjectState::Freed) {
Executor.reportImmediateUB()
<< "double-freeing a memory object allocated at 0x"
<< Twine::utohexstr(Obj->getAddress()) << ".";
return AnyValue::poison();
}
if (!Obj->isHeapAllocated()) {
Executor.reportImmediateUB() << "freeing a non-heap allocation at 0x"
<< Twine::utohexstr(Obj->getAddress()) << ".";
return AnyValue::poison();
}
// Currently we don't check for cases where a memory allocated with C
// allocation family (malloc, calloc, etc.) is freed with a different free
// function comes from a different family (C++ delete, etc.)
if (!Ctx.free(*Obj)) {
Executor.reportImmediateUB()
<< "freeing an invalid pointer at 0x"
<< Twine::utohexstr(Ptr.address().getZExtValue()) << ".";
return AnyValue::poison();
}
return AnyValue();
}
AnyValue Library::executePuts(ArrayRef<AnyValue> Args) {
const auto &PtrVal = Args[0];
const auto StrOpt = readStringFromMemory(PtrVal.asPointer());
if (!StrOpt)
return AnyValue::poison();
Handler.onPrint(*StrOpt + "\n");
return AnyValue(APInt(Executor.getIntSize(), 1));
}
AnyValue Library::executePrintf(ArrayRef<AnyValue> Args) {
const auto &FormatPtrVal = Args[0];
const auto FormatStrOpt = readStringFromMemory(FormatPtrVal.asPointer());
if (!FormatStrOpt)
return AnyValue::poison();
const std::string &FormatStr = *FormatStrOpt;
std::string Output;
raw_string_ostream OS(Output);
unsigned ArgIndex = 1; // Start from 1 since 0 is the format string.
for (unsigned I = 0; I < FormatStr.size();) {
if (FormatStr[I] != '%') {
OS << FormatStr[I++];
continue;
}
const size_t Start = I++;
if (I < FormatStr.size() && FormatStr[I] == '%') {
OS << '%';
++I;
continue;
}
while (I < FormatStr.size() &&
StringRef("-= #0123456789").contains(FormatStr[I]))
++I;
while (I < FormatStr.size() && StringRef("hljzt").contains(FormatStr[I]))
++I;
if (I >= FormatStr.size()) {
Executor.reportImmediateUB()
<< "Invalid format string in printf: missing conversion specifier.";
return AnyValue::poison();
}
char Specifier = FormatStr[I++];
std::string CleanChunk = FormatStr.substr(Start, I - Start - 1);
CleanChunk.erase(
llvm::remove_if(CleanChunk,
[](char C) { return StringRef("hljzt").contains(C); }),
CleanChunk.end());
if (ArgIndex >= Args.size()) {
Executor.reportImmediateUB() << "Not enough arguments provided for the "
"format string. Required argument for '"
<< Specifier << "'.";
return AnyValue::poison();
}
const auto &Arg = Args[ArgIndex++];
if (Arg.isPoison()) {
Executor.reportImmediateUB()
<< "Poison argument passed to printf for format specifier '"
<< Specifier << "' at argument index " << ArgIndex << ".";
return AnyValue::poison();
}
switch (Specifier) {
case 'd':
case 'i': {
std::string HostFmt = CleanChunk + "ll" + Specifier;
OS << format(HostFmt.c_str(),
static_cast<long long>(Arg.asInteger().getSExtValue()));
break;
}
case 'u':
case 'o':
case 'x':
case 'X': {
// FIXME: The format specifiers "b" and "B" are not implemented here
// since currently MSVC doesn't support it.
std::string HostFmt = CleanChunk + "ll" + Specifier;
OS << format(HostFmt.c_str(), static_cast<unsigned long long>(
Arg.asInteger().getZExtValue()));
break;
}
case 'c': {
std::string HostFmt = CleanChunk + Specifier;
OS << format(HostFmt.c_str(),
static_cast<int>(Arg.asInteger().getZExtValue()));
break;
}
case 'f':
case 'e':
case 'E':
case 'g':
case 'G':
case 'a':
case 'A': {
std::string HostFmt = CleanChunk + Specifier;
OS << format(HostFmt.c_str(), Arg.asFloat().convertToDouble());
break;
}
case 'n': {
OS.flush();
Executor.store(Arg, Align(4), AnyValue(APInt(32, Output.size())),
Type::getInt32Ty(Ctx.getContext()));
break;
}
case 'p': {
std::string HostFmt = CleanChunk + "llx";
OS << "0x"
<< format(HostFmt.c_str(),
static_cast<unsigned long long>(
Arg.asPointer().address().getZExtValue()));
break;
}
case 's': {
auto StrOpt = readStringFromMemory(Arg.asPointer());
if (!StrOpt)
return AnyValue::poison();
std::string HostFmt = CleanChunk + "s";
OS << format(HostFmt.c_str(), StrOpt->c_str());
break;
}
default:
Executor.reportImmediateUB()
<< "Unknown or unsupported format specifier '" << Specifier
<< "' in printf.";
return AnyValue::poison();
}
}
OS.flush();
Handler.onPrint(Output);
return AnyValue(APInt(Executor.getIntSize(), Output.size()));
}
AnyValue Library::executeExit(ArrayRef<AnyValue> Args) {
const auto &RetCodeVal = Args[0];
Executor.requestProgramExit(ProgramExitInfo::ProgramExitKind::Exited,
RetCodeVal.asInteger().getZExtValue());
return AnyValue();
}
AnyValue Library::executeAbort() {
Executor.requestProgramExit(ProgramExitInfo::ProgramExitKind::Aborted);
return AnyValue();
}
AnyValue Library::executeTerminate() {
Executor.requestProgramExit(ProgramExitInfo::ProgramExitKind::Terminated);
return AnyValue();
}
std::optional<AnyValue> Library::executeLibcall(LibFunc LF, StringRef Name,
Type *Type,
ArrayRef<AnyValue> Args) {
unsigned Index = 0;
for (const AnyValue &Arg : Args) {
if (Arg.isPoison()) {
Executor.reportImmediateUB()
<< "Poison argument passed to a library call at argument index "
<< Index << ".";
return AnyValue::poison();
}
++Index;
}
switch (LF) {
case LibFunc_malloc:
return executeMalloc(Name, Type, Args, MemAllocKind::Malloc);
case LibFunc_Znwm:
return executeMalloc(Name, Type, Args, MemAllocKind::New);
case LibFunc_Znam:
return executeMalloc(Name, Type, Args, MemAllocKind::NewArray);
case LibFunc_calloc:
return executeCalloc(Name, Type, Args, MemAllocKind::Malloc);
case LibFunc_free:
case LibFunc_ZdaPv:
case LibFunc_ZdlPv:
return executeFree(Args);
case LibFunc_puts:
return executePuts(Args);
case LibFunc_printf:
return executePrintf(Args);
case LibFunc_exit:
return executeExit(Args);
case LibFunc_abort:
return executeAbort();
case LibFunc_terminate:
return executeTerminate();
default:
return std::nullopt;
}
}
} // namespace llvm::ubi