blob: 245b6fb832549fec250d822c48562e3773762421 [file] [log] [blame] [edit]
//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
/// \file
/// This file implements OnDiskGraphDB, an on-disk CAS nodes database,
/// independent of a particular hashing algorithm. It only needs to be
/// configured for the hash size and controls the schema of the storage.
///
/// OnDiskGraphDB defines:
///
/// - How the data is stored inside database, either as a standalone file, or
/// allocated inside a datapool.
/// - How references to other objects inside the same database is stored. They
/// are stored as internal references, instead of full hash value to save
/// space.
/// - How to chain databases together and import objects from upstream
/// databases.
///
/// Here's a top-level description of the current layout:
///
/// - db/index.<version>: a file for the "index" table, named by \a
/// IndexTableName and managed by \a TrieRawHashMap. The contents are 8B
/// that are accessed atomically, describing the object kind and where/how
/// it's stored (including an optional file offset). See \a TrieRecord for
/// more details.
/// - db/data.<version>: a file for the "data" table, named by \a
/// DataPoolTableName and managed by \a DataStore. New objects within
/// TrieRecord::MaxEmbeddedSize are inserted here as \a
/// TrieRecord::StorageKind::DataPool.
/// - db/obj.<offset>.<version>: a file storing an object outside the main
/// "data" table, named by its offset into the "index" table, with the
/// format of \a TrieRecord::StorageKind::Standalone.
/// - db/leaf.<offset>.<version>: a file storing a leaf node outside the
/// main "data" table, named by its offset into the "index" table, with
/// the format of \a TrieRecord::StorageKind::StandaloneLeaf.
/// - db/leaf+0.<offset>.<version>: a file storing a null-terminated leaf object
/// outside the main "data" table, named by its offset into the "index" table,
/// with the format of \a TrieRecord::StorageKind::StandaloneLeaf0.
//
//===----------------------------------------------------------------------===//
#include "llvm/CAS/OnDiskGraphDB.h"
#include "OnDiskCommon.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/ScopeExit.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/CAS/OnDiskDataAllocator.h"
#include "llvm/CAS/OnDiskTrieRawHashMap.h"
#include "llvm/Support/Alignment.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/Errc.h"
#include "llvm/Support/Error.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/FileSystem.h"
#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/Path.h"
#include "llvm/Support/Process.h"
#include <atomic>
#include <mutex>
#include <optional>
#define DEBUG_TYPE "on-disk-cas"
using namespace llvm;
using namespace llvm::cas;
using namespace llvm::cas::ondisk;
static constexpr StringLiteral IndexTableName = "llvm.cas.index";
static constexpr StringLiteral DataPoolTableName = "llvm.cas.data";
static constexpr StringLiteral IndexFilePrefix = "index.";
static constexpr StringLiteral DataPoolFilePrefix = "data.";
static constexpr StringLiteral FilePrefixObject = "obj.";
static constexpr StringLiteral FilePrefixLeaf = "leaf.";
static constexpr StringLiteral FilePrefixLeaf0 = "leaf+0.";
static Error createCorruptObjectError(Expected<ArrayRef<uint8_t>> ID) {
if (!ID)
return ID.takeError();
return createStringError(llvm::errc::invalid_argument,
"corrupt object '" + toHex(*ID) + "'");
}
namespace {
/// Trie record data: 8 bytes, atomic<uint64_t>
/// - 1-byte: StorageKind
/// - 7-bytes: DataStoreOffset (offset into referenced file)
class TrieRecord {
public:
enum class StorageKind : uint8_t {
/// Unknown object.
Unknown = 0,
/// data.vX: main pool, full DataStore record.
DataPool = 1,
/// obj.<TrieRecordOffset>.vX: standalone, with a full DataStore record.
Standalone = 10,
/// leaf.<TrieRecordOffset>.vX: standalone, just the data. File contents
/// exactly the data content and file size matches the data size. No refs.
StandaloneLeaf = 11,
/// leaf+0.<TrieRecordOffset>.vX: standalone, just the data plus an
/// extra null character ('\0'). File size is 1 bigger than the data size.
/// No refs.
StandaloneLeaf0 = 12,
};
static StringRef getStandaloneFilePrefix(StorageKind SK) {
switch (SK) {
default:
llvm_unreachable("Expected standalone storage kind");
case TrieRecord::StorageKind::Standalone:
return FilePrefixObject;
case TrieRecord::StorageKind::StandaloneLeaf:
return FilePrefixLeaf;
case TrieRecord::StorageKind::StandaloneLeaf0:
return FilePrefixLeaf0;
}
}
enum Limits : int64_t {
/// Saves files bigger than 64KB standalone instead of embedding them.
MaxEmbeddedSize = 64LL * 1024LL - 1,
};
struct Data {
StorageKind SK = StorageKind::Unknown;
FileOffset Offset;
};
/// Pack StorageKind and Offset from Data into 8 byte TrieRecord.
static uint64_t pack(Data D) {
assert(D.Offset.get() < (int64_t)(1ULL << 56));
uint64_t Packed = uint64_t(D.SK) << 56 | D.Offset.get();
assert(D.SK != StorageKind::Unknown || Packed == 0);
#ifndef NDEBUG
Data RoundTrip = unpack(Packed);
assert(D.SK == RoundTrip.SK);
assert(D.Offset.get() == RoundTrip.Offset.get());
#endif
return Packed;
}
// Unpack TrieRecord into Data.
static Data unpack(uint64_t Packed) {
Data D;
if (!Packed)
return D;
D.SK = (StorageKind)(Packed >> 56);
D.Offset = FileOffset(Packed & (UINT64_MAX >> 8));
return D;
}
TrieRecord() : Storage(0) {}
Data load() const { return unpack(Storage); }
bool compare_exchange_strong(Data &Existing, Data New);
private:
std::atomic<uint64_t> Storage;
};
/// DataStore record data: 4B + size? + refs? + data + 0
/// - 4-bytes: Header
/// - {0,4,8}-bytes: DataSize (may be packed in Header)
/// - {0,4,8}-bytes: NumRefs (may be packed in Header)
/// - NumRefs*{4,8}-bytes: Refs[] (end-ptr is 8-byte aligned)
/// - <data>
/// - 1-byte: 0-term
struct DataRecordHandle {
/// NumRefs storage: 4B, 2B, 1B, or 0B (no refs). Or, 8B, for alignment
/// convenience to avoid computing padding later.
enum class NumRefsFlags : uint8_t {
Uses0B = 0U,
Uses1B = 1U,
Uses2B = 2U,
Uses4B = 3U,
Uses8B = 4U,
Max = Uses8B,
};
/// DataSize storage: 8B, 4B, 2B, or 1B.
enum class DataSizeFlags {
Uses1B = 0U,
Uses2B = 1U,
Uses4B = 2U,
Uses8B = 3U,
Max = Uses8B,
};
/// Kind of ref stored in Refs[]: InternalRef or InternalRef4B.
enum class RefKindFlags {
InternalRef = 0U,
InternalRef4B = 1U,
Max = InternalRef4B,
};
enum Counts : int {
NumRefsShift = 0,
NumRefsBits = 3,
DataSizeShift = NumRefsShift + NumRefsBits,
DataSizeBits = 2,
RefKindShift = DataSizeShift + DataSizeBits,
RefKindBits = 1,
};
static_assert(((UINT32_MAX << NumRefsBits) & (uint32_t)NumRefsFlags::Max) ==
0,
"Not enough bits");
static_assert(((UINT32_MAX << DataSizeBits) & (uint32_t)DataSizeFlags::Max) ==
0,
"Not enough bits");
static_assert(((UINT32_MAX << RefKindBits) & (uint32_t)RefKindFlags::Max) ==
0,
"Not enough bits");
/// Layout of the DataRecordHandle and how to decode it.
struct LayoutFlags {
NumRefsFlags NumRefs;
DataSizeFlags DataSize;
RefKindFlags RefKind;
static uint64_t pack(LayoutFlags LF) {
unsigned Packed = ((unsigned)LF.NumRefs << NumRefsShift) |
((unsigned)LF.DataSize << DataSizeShift) |
((unsigned)LF.RefKind << RefKindShift);
#ifndef NDEBUG
LayoutFlags RoundTrip = unpack(Packed);
assert(LF.NumRefs == RoundTrip.NumRefs);
assert(LF.DataSize == RoundTrip.DataSize);
assert(LF.RefKind == RoundTrip.RefKind);
#endif
return Packed;
}
static LayoutFlags unpack(uint64_t Storage) {
assert(Storage <= UINT8_MAX && "Expect storage to fit in a byte");
LayoutFlags LF;
LF.NumRefs =
(NumRefsFlags)((Storage >> NumRefsShift) & ((1U << NumRefsBits) - 1));
LF.DataSize = (DataSizeFlags)((Storage >> DataSizeShift) &
((1U << DataSizeBits) - 1));
LF.RefKind =
(RefKindFlags)((Storage >> RefKindShift) & ((1U << RefKindBits) - 1));
return LF;
}
};
/// Header layout:
/// - 1-byte: LayoutFlags
/// - 1-byte: 1B size field
/// - {0,2}-bytes: 2B size field
struct Header {
using PackTy = uint32_t;
PackTy Packed;
static constexpr unsigned LayoutFlagsShift =
(sizeof(PackTy) - 1) * CHAR_BIT;
};
struct Input {
InternalRefArrayRef Refs;
ArrayRef<char> Data;
};
LayoutFlags getLayoutFlags() const {
return LayoutFlags::unpack(H->Packed >> Header::LayoutFlagsShift);
}
uint64_t getDataSize() const;
void skipDataSize(LayoutFlags LF, int64_t &RelOffset) const;
uint32_t getNumRefs() const;
void skipNumRefs(LayoutFlags LF, int64_t &RelOffset) const;
int64_t getRefsRelOffset() const;
int64_t getDataRelOffset() const;
static uint64_t getTotalSize(uint64_t DataRelOffset, uint64_t DataSize) {
return DataRelOffset + DataSize + 1;
}
uint64_t getTotalSize() const {
return getDataRelOffset() + getDataSize() + 1;
}
/// Describe the layout of data stored and how to decode from
/// DataRecordHandle.
struct Layout {
explicit Layout(const Input &I);
LayoutFlags Flags;
uint64_t DataSize = 0;
uint32_t NumRefs = 0;
int64_t RefsRelOffset = 0;
int64_t DataRelOffset = 0;
uint64_t getTotalSize() const {
return DataRecordHandle::getTotalSize(DataRelOffset, DataSize);
}
};
InternalRefArrayRef getRefs() const {
assert(H && "Expected valid handle");
auto *BeginByte = reinterpret_cast<const char *>(H) + getRefsRelOffset();
size_t Size = getNumRefs();
if (!Size)
return InternalRefArrayRef();
if (getLayoutFlags().RefKind == RefKindFlags::InternalRef4B)
return ArrayRef(reinterpret_cast<const InternalRef4B *>(BeginByte), Size);
return ArrayRef(reinterpret_cast<const InternalRef *>(BeginByte), Size);
}
ArrayRef<char> getData() const {
assert(H && "Expected valid handle");
return ArrayRef(reinterpret_cast<const char *>(H) + getDataRelOffset(),
getDataSize());
}
static DataRecordHandle create(function_ref<char *(size_t Size)> Alloc,
const Input &I);
static Expected<DataRecordHandle>
createWithError(function_ref<Expected<char *>(size_t Size)> Alloc,
const Input &I);
static DataRecordHandle construct(char *Mem, const Input &I);
static DataRecordHandle get(const char *Mem) {
return DataRecordHandle(
*reinterpret_cast<const DataRecordHandle::Header *>(Mem));
}
static Expected<DataRecordHandle>
getFromDataPool(const OnDiskDataAllocator &Pool, FileOffset Offset);
explicit operator bool() const { return H; }
const Header &getHeader() const { return *H; }
DataRecordHandle() = default;
explicit DataRecordHandle(const Header &H) : H(&H) {}
private:
static DataRecordHandle constructImpl(char *Mem, const Input &I,
const Layout &L);
const Header *H = nullptr;
};
/// Proxy for any on-disk object or raw data.
struct OnDiskContent {
std::optional<DataRecordHandle> Record;
std::optional<ArrayRef<char>> Bytes;
};
/// Data loaded inside the memory from standalone file.
class StandaloneDataInMemory {
public:
OnDiskContent getContent() const;
StandaloneDataInMemory(std::unique_ptr<sys::fs::mapped_file_region> Region,
TrieRecord::StorageKind SK)
: Region(std::move(Region)), SK(SK) {
#ifndef NDEBUG
bool IsStandalone = false;
switch (SK) {
case TrieRecord::StorageKind::Standalone:
case TrieRecord::StorageKind::StandaloneLeaf:
case TrieRecord::StorageKind::StandaloneLeaf0:
IsStandalone = true;
break;
default:
break;
}
assert(IsStandalone);
#endif
}
private:
std::unique_ptr<sys::fs::mapped_file_region> Region;
TrieRecord::StorageKind SK;
};
/// Container to lookup loaded standalone objects.
template <size_t NumShards> class StandaloneDataMap {
static_assert(isPowerOf2_64(NumShards), "Expected power of 2");
public:
uintptr_t insert(ArrayRef<uint8_t> Hash, TrieRecord::StorageKind SK,
std::unique_ptr<sys::fs::mapped_file_region> Region);
const StandaloneDataInMemory *lookup(ArrayRef<uint8_t> Hash) const;
bool count(ArrayRef<uint8_t> Hash) const { return bool(lookup(Hash)); }
private:
struct Shard {
/// Needs to store a std::unique_ptr for a stable address identity.
DenseMap<const uint8_t *, std::unique_ptr<StandaloneDataInMemory>> Map;
mutable std::mutex Mutex;
};
Shard &getShard(ArrayRef<uint8_t> Hash) {
return const_cast<Shard &>(
const_cast<const StandaloneDataMap *>(this)->getShard(Hash));
}
const Shard &getShard(ArrayRef<uint8_t> Hash) const {
static_assert(NumShards <= 256, "Expected only 8 bits of shard");
return Shards[Hash[0] % NumShards];
}
Shard Shards[NumShards];
};
using StandaloneDataMapTy = StandaloneDataMap<16>;
/// A vector of internal node references.
class InternalRefVector {
public:
void push_back(InternalRef Ref) {
if (NeedsFull)
return FullRefs.push_back(Ref);
if (std::optional<InternalRef4B> Small = InternalRef4B::tryToShrink(Ref))
return SmallRefs.push_back(*Small);
NeedsFull = true;
assert(FullRefs.empty());
FullRefs.reserve(SmallRefs.size() + 1);
for (InternalRef4B Small : SmallRefs)
FullRefs.push_back(Small);
FullRefs.push_back(Ref);
SmallRefs.clear();
}
operator InternalRefArrayRef() const {
assert(SmallRefs.empty() || FullRefs.empty());
return NeedsFull ? InternalRefArrayRef(FullRefs)
: InternalRefArrayRef(SmallRefs);
}
private:
bool NeedsFull = false;
SmallVector<InternalRef4B> SmallRefs;
SmallVector<InternalRef> FullRefs;
};
} // namespace
Expected<DataRecordHandle> DataRecordHandle::createWithError(
function_ref<Expected<char *>(size_t Size)> Alloc, const Input &I) {
Layout L(I);
if (Expected<char *> Mem = Alloc(L.getTotalSize()))
return constructImpl(*Mem, I, L);
else
return Mem.takeError();
}
DataRecordHandle
DataRecordHandle::create(function_ref<char *(size_t Size)> Alloc,
const Input &I) {
Layout L(I);
return constructImpl(Alloc(L.getTotalSize()), I, L);
}
ObjectHandle ObjectHandle::fromFileOffset(FileOffset Offset) {
// Store the file offset as it is.
assert(!(Offset.get() & 0x1));
return ObjectHandle(Offset.get());
}
ObjectHandle ObjectHandle::fromMemory(uintptr_t Ptr) {
// Store the pointer from memory with lowest bit set.
assert(!(Ptr & 0x1));
return ObjectHandle(Ptr | 1);
}
/// Proxy for an on-disk index record.
struct OnDiskGraphDB::IndexProxy {
FileOffset Offset;
ArrayRef<uint8_t> Hash;
TrieRecord &Ref;
};
template <size_t N>
uintptr_t StandaloneDataMap<N>::insert(
ArrayRef<uint8_t> Hash, TrieRecord::StorageKind SK,
std::unique_ptr<sys::fs::mapped_file_region> Region) {
auto &S = getShard(Hash);
std::lock_guard<std::mutex> Lock(S.Mutex);
auto &V = S.Map[Hash.data()];
if (!V)
V = std::make_unique<StandaloneDataInMemory>(std::move(Region), SK);
return reinterpret_cast<uintptr_t>(V.get());
}
template <size_t N>
const StandaloneDataInMemory *
StandaloneDataMap<N>::lookup(ArrayRef<uint8_t> Hash) const {
auto &S = getShard(Hash);
std::lock_guard<std::mutex> Lock(S.Mutex);
auto I = S.Map.find(Hash.data());
if (I == S.Map.end())
return nullptr;
return &*I->second;
}
namespace {
/// Copy of \a sys::fs::TempFile that skips RemoveOnSignal, which is too
/// expensive to register/unregister at this rate.
///
/// FIXME: Add a TempFileManager that maintains a thread-safe list of open temp
/// files and has a signal handler registerd that removes them all.
class TempFile {
bool Done = false;
TempFile(StringRef Name, int FD) : TmpName(std::string(Name)), FD(FD) {}
public:
/// This creates a temporary file with createUniqueFile.
static Expected<TempFile> create(const Twine &Model);
TempFile(TempFile &&Other) { *this = std::move(Other); }
TempFile &operator=(TempFile &&Other) {
TmpName = std::move(Other.TmpName);
FD = Other.FD;
Other.Done = true;
Other.FD = -1;
return *this;
}
// Name of the temporary file.
std::string TmpName;
// The open file descriptor.
int FD = -1;
// Keep this with the given name.
Error keep(const Twine &Name);
Error discard();
// This checks that keep or delete was called.
~TempFile() { consumeError(discard()); }
};
class MappedTempFile {
public:
char *data() const { return Map.data(); }
size_t size() const { return Map.size(); }
Error discard() {
assert(Map && "Map already destroyed");
Map.unmap();
return Temp.discard();
}
Error keep(const Twine &Name) {
assert(Map && "Map already destroyed");
Map.unmap();
return Temp.keep(Name);
}
MappedTempFile(TempFile Temp, sys::fs::mapped_file_region Map)
: Temp(std::move(Temp)), Map(std::move(Map)) {}
private:
TempFile Temp;
sys::fs::mapped_file_region Map;
};
} // namespace
Error TempFile::discard() {
Done = true;
if (FD != -1) {
sys::fs::file_t File = sys::fs::convertFDToNativeFile(FD);
if (std::error_code EC = sys::fs::closeFile(File))
return errorCodeToError(EC);
}
FD = -1;
// Always try to close and remove.
std::error_code RemoveEC;
if (!TmpName.empty()) {
std::error_code EC = sys::fs::remove(TmpName);
if (EC)
return errorCodeToError(EC);
}
TmpName = "";
return Error::success();
}
Error TempFile::keep(const Twine &Name) {
assert(!Done);
Done = true;
// Always try to close and rename.
std::error_code RenameEC = sys::fs::rename(TmpName, Name);
if (!RenameEC)
TmpName = "";
sys::fs::file_t File = sys::fs::convertFDToNativeFile(FD);
if (std::error_code EC = sys::fs::closeFile(File))
return errorCodeToError(EC);
FD = -1;
return errorCodeToError(RenameEC);
}
Expected<TempFile> TempFile::create(const Twine &Model) {
int FD;
SmallString<128> ResultPath;
if (std::error_code EC = sys::fs::createUniqueFile(Model, FD, ResultPath))
return errorCodeToError(EC);
TempFile Ret(ResultPath, FD);
return std::move(Ret);
}
bool TrieRecord::compare_exchange_strong(Data &Existing, Data New) {
uint64_t ExistingPacked = pack(Existing);
uint64_t NewPacked = pack(New);
if (Storage.compare_exchange_strong(ExistingPacked, NewPacked))
return true;
Existing = unpack(ExistingPacked);
return false;
}
DataRecordHandle DataRecordHandle::construct(char *Mem, const Input &I) {
return constructImpl(Mem, I, Layout(I));
}
Expected<DataRecordHandle>
DataRecordHandle::getFromDataPool(const OnDiskDataAllocator &Pool,
FileOffset Offset) {
auto HeaderData = Pool.get(Offset, sizeof(DataRecordHandle::Header));
if (!HeaderData)
return HeaderData.takeError();
auto Record = DataRecordHandle::get(HeaderData->data());
if (Record.getTotalSize() + Offset.get() > Pool.size())
return createStringError(
make_error_code(std::errc::illegal_byte_sequence),
"data record span passed the end of the data pool");
return Record;
}
DataRecordHandle DataRecordHandle::constructImpl(char *Mem, const Input &I,
const Layout &L) {
char *Next = Mem + sizeof(Header);
// Fill in Packed and set other data, then come back to construct the header.
Header::PackTy Packed = 0;
Packed |= LayoutFlags::pack(L.Flags) << Header::LayoutFlagsShift;
// Construct DataSize.
switch (L.Flags.DataSize) {
case DataSizeFlags::Uses1B:
assert(I.Data.size() <= UINT8_MAX);
Packed |= (Header::PackTy)I.Data.size()
<< ((sizeof(Packed) - 2) * CHAR_BIT);
break;
case DataSizeFlags::Uses2B:
assert(I.Data.size() <= UINT16_MAX);
Packed |= (Header::PackTy)I.Data.size()
<< ((sizeof(Packed) - 4) * CHAR_BIT);
break;
case DataSizeFlags::Uses4B:
support::endian::write32le(Next, I.Data.size());
Next += 4;
break;
case DataSizeFlags::Uses8B:
support::endian::write64le(Next, I.Data.size());
Next += 8;
break;
}
// Construct NumRefs.
//
// NOTE: May be writing NumRefs even if there are zero refs in order to fix
// alignment.
switch (L.Flags.NumRefs) {
case NumRefsFlags::Uses0B:
break;
case NumRefsFlags::Uses1B:
assert(I.Refs.size() <= UINT8_MAX);
Packed |= (Header::PackTy)I.Refs.size()
<< ((sizeof(Packed) - 2) * CHAR_BIT);
break;
case NumRefsFlags::Uses2B:
assert(I.Refs.size() <= UINT16_MAX);
Packed |= (Header::PackTy)I.Refs.size()
<< ((sizeof(Packed) - 4) * CHAR_BIT);
break;
case NumRefsFlags::Uses4B:
support::endian::write32le(Next, I.Refs.size());
Next += 4;
break;
case NumRefsFlags::Uses8B:
support::endian::write64le(Next, I.Refs.size());
Next += 8;
break;
}
// Construct Refs[].
if (!I.Refs.empty()) {
assert((L.Flags.RefKind == RefKindFlags::InternalRef4B) == I.Refs.is4B());
ArrayRef<uint8_t> RefsBuffer = I.Refs.getBuffer();
llvm::copy(RefsBuffer, Next);
Next += RefsBuffer.size();
}
// Construct Data and the trailing null.
assert(isAddrAligned(Align(8), Next));
llvm::copy(I.Data, Next);
Next[I.Data.size()] = 0;
// Construct the header itself and return.
Header *H = new (Mem) Header{Packed};
DataRecordHandle Record(*H);
assert(Record.getData() == I.Data);
assert(Record.getNumRefs() == I.Refs.size());
assert(Record.getRefs() == I.Refs);
assert(Record.getLayoutFlags().DataSize == L.Flags.DataSize);
assert(Record.getLayoutFlags().NumRefs == L.Flags.NumRefs);
assert(Record.getLayoutFlags().RefKind == L.Flags.RefKind);
return Record;
}
DataRecordHandle::Layout::Layout(const Input &I) {
// Start initial relative offsets right after the Header.
uint64_t RelOffset = sizeof(Header);
// Initialize the easy stuff.
DataSize = I.Data.size();
NumRefs = I.Refs.size();
// Check refs size.
Flags.RefKind =
I.Refs.is4B() ? RefKindFlags::InternalRef4B : RefKindFlags::InternalRef;
// Find the smallest slot available for DataSize.
bool Has1B = true;
bool Has2B = true;
if (DataSize <= UINT8_MAX && Has1B) {
Flags.DataSize = DataSizeFlags::Uses1B;
Has1B = false;
} else if (DataSize <= UINT16_MAX && Has2B) {
Flags.DataSize = DataSizeFlags::Uses2B;
Has2B = false;
} else if (DataSize <= UINT32_MAX) {
Flags.DataSize = DataSizeFlags::Uses4B;
RelOffset += 4;
} else {
Flags.DataSize = DataSizeFlags::Uses8B;
RelOffset += 8;
}
// Find the smallest slot available for NumRefs. Never sets NumRefs8B here.
if (!NumRefs) {
Flags.NumRefs = NumRefsFlags::Uses0B;
} else if (NumRefs <= UINT8_MAX && Has1B) {
Flags.NumRefs = NumRefsFlags::Uses1B;
Has1B = false;
} else if (NumRefs <= UINT16_MAX && Has2B) {
Flags.NumRefs = NumRefsFlags::Uses2B;
Has2B = false;
} else {
Flags.NumRefs = NumRefsFlags::Uses4B;
RelOffset += 4;
}
// Helper to "upgrade" either DataSize or NumRefs by 4B to avoid complicated
// padding rules when reading and writing. This also bumps RelOffset.
//
// The value for NumRefs is strictly limited to UINT32_MAX, but it can be
// stored as 8B. This means we can *always* find a size to grow.
//
// NOTE: Only call this once.
auto GrowSizeFieldsBy4B = [&]() {
assert(isAligned(Align(4), RelOffset));
RelOffset += 4;
assert(Flags.NumRefs != NumRefsFlags::Uses8B &&
"Expected to be able to grow NumRefs8B");
// First try to grow DataSize. NumRefs will not (yet) be 8B, and if
// DataSize is upgraded to 8B it'll already be aligned.
//
// Failing that, grow NumRefs.
if (Flags.DataSize < DataSizeFlags::Uses4B)
Flags.DataSize = DataSizeFlags::Uses4B; // DataSize: Packed => 4B.
else if (Flags.DataSize < DataSizeFlags::Uses8B)
Flags.DataSize = DataSizeFlags::Uses8B; // DataSize: 4B => 8B.
else if (Flags.NumRefs < NumRefsFlags::Uses4B)
Flags.NumRefs = NumRefsFlags::Uses4B; // NumRefs: Packed => 4B.
else
Flags.NumRefs = NumRefsFlags::Uses8B; // NumRefs: 4B => 8B.
};
assert(isAligned(Align(4), RelOffset));
if (Flags.RefKind == RefKindFlags::InternalRef) {
// List of 8B refs should be 8B-aligned. Grow one of the sizes to get this
// without padding.
if (!isAligned(Align(8), RelOffset))
GrowSizeFieldsBy4B();
assert(isAligned(Align(8), RelOffset));
RefsRelOffset = RelOffset;
RelOffset += 8 * NumRefs;
} else {
// The array of 4B refs doesn't need 8B alignment, but the data will need
// to be 8B-aligned. Detect this now, and, if necessary, shift everything
// by 4B by growing one of the sizes.
// If we remove the need for 8B-alignment for data there is <1% savings in
// disk storage for a clang build using MCCAS but the 8B-alignment may be
// useful in the future so keep it for now.
uint64_t RefListSize = 4 * NumRefs;
if (!isAligned(Align(8), RelOffset + RefListSize))
GrowSizeFieldsBy4B();
RefsRelOffset = RelOffset;
RelOffset += RefListSize;
}
assert(isAligned(Align(8), RelOffset));
DataRelOffset = RelOffset;
}
uint64_t DataRecordHandle::getDataSize() const {
int64_t RelOffset = sizeof(Header);
auto *DataSizePtr = reinterpret_cast<const char *>(H) + RelOffset;
switch (getLayoutFlags().DataSize) {
case DataSizeFlags::Uses1B:
return (H->Packed >> ((sizeof(Header::PackTy) - 2) * CHAR_BIT)) & UINT8_MAX;
case DataSizeFlags::Uses2B:
return (H->Packed >> ((sizeof(Header::PackTy) - 4) * CHAR_BIT)) &
UINT16_MAX;
case DataSizeFlags::Uses4B:
return support::endian::read32le(DataSizePtr);
case DataSizeFlags::Uses8B:
return support::endian::read64le(DataSizePtr);
}
llvm_unreachable("Unknown DataSizeFlags enum");
}
void DataRecordHandle::skipDataSize(LayoutFlags LF, int64_t &RelOffset) const {
if (LF.DataSize >= DataSizeFlags::Uses4B)
RelOffset += 4;
if (LF.DataSize >= DataSizeFlags::Uses8B)
RelOffset += 4;
}
uint32_t DataRecordHandle::getNumRefs() const {
LayoutFlags LF = getLayoutFlags();
int64_t RelOffset = sizeof(Header);
skipDataSize(LF, RelOffset);
auto *NumRefsPtr = reinterpret_cast<const char *>(H) + RelOffset;
switch (LF.NumRefs) {
case NumRefsFlags::Uses0B:
return 0;
case NumRefsFlags::Uses1B:
return (H->Packed >> ((sizeof(Header::PackTy) - 2) * CHAR_BIT)) & UINT8_MAX;
case NumRefsFlags::Uses2B:
return (H->Packed >> ((sizeof(Header::PackTy) - 4) * CHAR_BIT)) &
UINT16_MAX;
case NumRefsFlags::Uses4B:
return support::endian::read32le(NumRefsPtr);
case NumRefsFlags::Uses8B:
return support::endian::read64le(NumRefsPtr);
}
llvm_unreachable("Unknown NumRefsFlags enum");
}
void DataRecordHandle::skipNumRefs(LayoutFlags LF, int64_t &RelOffset) const {
if (LF.NumRefs >= NumRefsFlags::Uses4B)
RelOffset += 4;
if (LF.NumRefs >= NumRefsFlags::Uses8B)
RelOffset += 4;
}
int64_t DataRecordHandle::getRefsRelOffset() const {
LayoutFlags LF = getLayoutFlags();
int64_t RelOffset = sizeof(Header);
skipDataSize(LF, RelOffset);
skipNumRefs(LF, RelOffset);
return RelOffset;
}
int64_t DataRecordHandle::getDataRelOffset() const {
LayoutFlags LF = getLayoutFlags();
int64_t RelOffset = sizeof(Header);
skipDataSize(LF, RelOffset);
skipNumRefs(LF, RelOffset);
uint32_t RefSize = LF.RefKind == RefKindFlags::InternalRef4B ? 4 : 8;
RelOffset += RefSize * getNumRefs();
return RelOffset;
}
Error OnDiskGraphDB::validate(bool Deep, HashingFuncT Hasher) const {
if (UpstreamDB) {
if (auto E = UpstreamDB->validate(Deep, Hasher))
return E;
}
return Index.validate([&](FileOffset Offset,
OnDiskTrieRawHashMap::ConstValueProxy Record)
-> Error {
auto formatError = [&](Twine Msg) {
return createStringError(
llvm::errc::illegal_byte_sequence,
"bad record at 0x" +
utohexstr((unsigned)Offset.get(), /*LowerCase=*/true) + ": " +
Msg.str());
};
if (Record.Data.size() != sizeof(TrieRecord))
return formatError("wrong data record size");
if (!isAligned(Align::Of<TrieRecord>(), Record.Data.size()))
return formatError("wrong data record alignment");
auto *R = reinterpret_cast<const TrieRecord *>(Record.Data.data());
TrieRecord::Data D = R->load();
std::unique_ptr<MemoryBuffer> FileBuffer;
if ((uint8_t)D.SK != (uint8_t)TrieRecord::StorageKind::Unknown &&
(uint8_t)D.SK != (uint8_t)TrieRecord::StorageKind::DataPool &&
(uint8_t)D.SK != (uint8_t)TrieRecord::StorageKind::Standalone &&
(uint8_t)D.SK != (uint8_t)TrieRecord::StorageKind::StandaloneLeaf &&
(uint8_t)D.SK != (uint8_t)TrieRecord::StorageKind::StandaloneLeaf0)
return formatError("invalid record kind value");
auto Ref = InternalRef::getFromOffset(Offset);
auto I = getIndexProxyFromRef(Ref);
if (!I)
return I.takeError();
switch (D.SK) {
case TrieRecord::StorageKind::Unknown:
// This could be an abandoned entry due to a termination before updating
// the record. It can be reused by later insertion so just skip this entry
// for now.
return Error::success();
case TrieRecord::StorageKind::DataPool:
// Check offset is a postive value, and large enough to hold the
// header for the data record.
if (D.Offset.get() <= 0 ||
(uint64_t)D.Offset.get() + sizeof(DataRecordHandle::Header) >=
DataPool.size())
return formatError("datapool record out of bound");
break;
case TrieRecord::StorageKind::Standalone:
case TrieRecord::StorageKind::StandaloneLeaf:
case TrieRecord::StorageKind::StandaloneLeaf0:
SmallString<256> Path;
getStandalonePath(TrieRecord::getStandaloneFilePrefix(D.SK), *I, Path);
// If need to validate the content of the file later, just load the
// buffer here. Otherwise, just check the existance of the file.
if (Deep) {
auto File = MemoryBuffer::getFile(Path, /*IsText=*/false,
/*RequiresNullTerminator=*/false);
if (!File || !*File)
return formatError("record file \'" + Path + "\' does not exist");
FileBuffer = std::move(*File);
} else if (!llvm::sys::fs::exists(Path))
return formatError("record file \'" + Path + "\' does not exist");
}
if (!Deep)
return Error::success();
auto dataError = [&](Twine Msg) {
return createStringError(llvm::errc::illegal_byte_sequence,
"bad data for digest \'" + toHex(I->Hash) +
"\': " + Msg.str());
};
SmallVector<ArrayRef<uint8_t>> Refs;
ArrayRef<char> StoredData;
switch (D.SK) {
case TrieRecord::StorageKind::Unknown:
llvm_unreachable("already handled");
case TrieRecord::StorageKind::DataPool: {
auto DataRecord = DataRecordHandle::getFromDataPool(DataPool, D.Offset);
if (!DataRecord)
return dataError(toString(DataRecord.takeError()));
for (auto InternRef : DataRecord->getRefs()) {
auto Index = getIndexProxyFromRef(InternRef);
if (!Index)
return Index.takeError();
Refs.push_back(Index->Hash);
}
StoredData = DataRecord->getData();
break;
}
case TrieRecord::StorageKind::Standalone: {
if (FileBuffer->getBufferSize() < sizeof(DataRecordHandle::Header))
return dataError("data record is not big enough to read the header");
auto DataRecord = DataRecordHandle::get(FileBuffer->getBufferStart());
if (DataRecord.getTotalSize() < FileBuffer->getBufferSize())
return dataError(
"data record span passed the end of the standalone file");
for (auto InternRef : DataRecord.getRefs()) {
auto Index = getIndexProxyFromRef(InternRef);
if (!Index)
return Index.takeError();
Refs.push_back(Index->Hash);
}
StoredData = DataRecord.getData();
break;
}
case TrieRecord::StorageKind::StandaloneLeaf:
case TrieRecord::StorageKind::StandaloneLeaf0: {
StoredData = arrayRefFromStringRef<char>(FileBuffer->getBuffer());
if (D.SK == TrieRecord::StorageKind::StandaloneLeaf0) {
if (!FileBuffer->getBuffer().ends_with('\0'))
return dataError("standalone file is not zero terminated");
StoredData = StoredData.drop_back(1);
}
break;
}
}
SmallVector<uint8_t> ComputedHash;
Hasher(Refs, StoredData, ComputedHash);
if (I->Hash != ArrayRef(ComputedHash))
return dataError("hash mismatch, got \'" + toHex(ComputedHash) +
"\' instead");
return Error::success();
});
}
void OnDiskGraphDB::print(raw_ostream &OS) const {
OS << "on-disk-root-path: " << RootPath << "\n";
struct PoolInfo {
uint64_t Offset;
};
SmallVector<PoolInfo> Pool;
OS << "\n";
OS << "index:\n";
Index.print(OS, [&](ArrayRef<char> Data) {
assert(Data.size() == sizeof(TrieRecord));
assert(isAligned(Align::Of<TrieRecord>(), Data.size()));
auto *R = reinterpret_cast<const TrieRecord *>(Data.data());
TrieRecord::Data D = R->load();
OS << " SK=";
switch (D.SK) {
case TrieRecord::StorageKind::Unknown:
OS << "unknown ";
break;
case TrieRecord::StorageKind::DataPool:
OS << "datapool ";
Pool.push_back({D.Offset.get()});
break;
case TrieRecord::StorageKind::Standalone:
OS << "standalone-data ";
break;
case TrieRecord::StorageKind::StandaloneLeaf:
OS << "standalone-leaf ";
break;
case TrieRecord::StorageKind::StandaloneLeaf0:
OS << "standalone-leaf+0";
break;
}
OS << " Offset=" << (void *)D.Offset.get();
});
if (Pool.empty())
return;
OS << "\n";
OS << "pool:\n";
llvm::sort(
Pool, [](PoolInfo LHS, PoolInfo RHS) { return LHS.Offset < RHS.Offset; });
for (PoolInfo PI : Pool) {
OS << "- addr=" << (void *)PI.Offset << " ";
auto D = DataRecordHandle::getFromDataPool(DataPool, FileOffset(PI.Offset));
if (!D) {
OS << "error: " << toString(D.takeError());
return;
}
OS << "record refs=" << D->getNumRefs() << " data=" << D->getDataSize()
<< " size=" << D->getTotalSize()
<< " end=" << (void *)(PI.Offset + D->getTotalSize()) << "\n";
}
}
Expected<OnDiskGraphDB::IndexProxy>
OnDiskGraphDB::indexHash(ArrayRef<uint8_t> Hash) {
auto P = Index.insertLazy(
Hash, [](FileOffset TentativeOffset,
OnDiskTrieRawHashMap::ValueProxy TentativeValue) {
assert(TentativeValue.Data.size() == sizeof(TrieRecord));
assert(
isAddrAligned(Align::Of<TrieRecord>(), TentativeValue.Data.data()));
new (TentativeValue.Data.data()) TrieRecord();
});
if (LLVM_UNLIKELY(!P))
return P.takeError();
assert(*P && "Expected insertion");
return getIndexProxyFromPointer(*P);
}
OnDiskGraphDB::IndexProxy OnDiskGraphDB::getIndexProxyFromPointer(
OnDiskTrieRawHashMap::ConstOnDiskPtr P) const {
assert(P);
assert(P.getOffset());
return IndexProxy{P.getOffset(), P->Hash,
*const_cast<TrieRecord *>(
reinterpret_cast<const TrieRecord *>(P->Data.data()))};
}
Expected<ObjectID> OnDiskGraphDB::getReference(ArrayRef<uint8_t> Hash) {
auto I = indexHash(Hash);
if (LLVM_UNLIKELY(!I))
return I.takeError();
return getExternalReference(*I);
}
ObjectID OnDiskGraphDB::getExternalReference(const IndexProxy &I) {
return getExternalReference(makeInternalRef(I.Offset));
}
std::optional<ObjectID>
OnDiskGraphDB::getExistingReference(ArrayRef<uint8_t> Digest) {
auto tryUpstream =
[&](std::optional<IndexProxy> I) -> std::optional<ObjectID> {
if (!UpstreamDB)
return std::nullopt;
std::optional<ObjectID> UpstreamID =
UpstreamDB->getExistingReference(Digest);
if (LLVM_UNLIKELY(!UpstreamID))
return std::nullopt;
auto Ref = expectedToOptional(indexHash(Digest));
if (!Ref)
return std::nullopt;
if (!I)
I.emplace(*Ref);
return getExternalReference(*I);
};
OnDiskTrieRawHashMap::ConstOnDiskPtr P = Index.find(Digest);
if (!P)
return tryUpstream(std::nullopt);
IndexProxy I = getIndexProxyFromPointer(P);
TrieRecord::Data Obj = I.Ref.load();
if (Obj.SK == TrieRecord::StorageKind::Unknown)
return tryUpstream(I);
return getExternalReference(makeInternalRef(I.Offset));
}
Expected<OnDiskGraphDB::IndexProxy>
OnDiskGraphDB::getIndexProxyFromRef(InternalRef Ref) const {
auto P = Index.recoverFromFileOffset(Ref.getFileOffset());
if (LLVM_UNLIKELY(!P))
return P.takeError();
return getIndexProxyFromPointer(*P);
}
Expected<ArrayRef<uint8_t>> OnDiskGraphDB::getDigest(InternalRef Ref) const {
auto I = getIndexProxyFromRef(Ref);
if (!I)
return I.takeError();
return I->Hash;
}
ArrayRef<uint8_t> OnDiskGraphDB::getDigest(const IndexProxy &I) const {
return I.Hash;
}
static OnDiskContent getContentFromHandle(const OnDiskDataAllocator &DataPool,
ObjectHandle OH) {
// Decode ObjectHandle to locate the stored content.
uint64_t Data = OH.getOpaqueData();
if (Data & 1) {
const auto *SDIM =
reinterpret_cast<const StandaloneDataInMemory *>(Data & (-1ULL << 1));
return SDIM->getContent();
}
auto DataHandle =
cantFail(DataRecordHandle::getFromDataPool(DataPool, FileOffset(Data)));
assert(DataHandle.getData().end()[0] == 0 && "Null termination");
return OnDiskContent{DataHandle, std::nullopt};
}
ArrayRef<char> OnDiskGraphDB::getObjectData(ObjectHandle Node) const {
OnDiskContent Content = getContentFromHandle(DataPool, Node);
if (Content.Bytes)
return *Content.Bytes;
assert(Content.Record && "Expected record or bytes");
return Content.Record->getData();
}
InternalRefArrayRef OnDiskGraphDB::getInternalRefs(ObjectHandle Node) const {
if (std::optional<DataRecordHandle> Record =
getContentFromHandle(DataPool, Node).Record)
return Record->getRefs();
return std::nullopt;
}
Expected<std::optional<ObjectHandle>>
OnDiskGraphDB::load(ObjectID ExternalRef) {
InternalRef Ref = getInternalRef(ExternalRef);
auto I = getIndexProxyFromRef(Ref);
if (!I)
return I.takeError();
TrieRecord::Data Object = I->Ref.load();
if (Object.SK == TrieRecord::StorageKind::Unknown)
return faultInFromUpstream(ExternalRef);
if (Object.SK == TrieRecord::StorageKind::DataPool)
return ObjectHandle::fromFileOffset(Object.Offset);
// Only TrieRecord::StorageKind::Standalone (and variants) need to be
// explicitly loaded.
//
// There's corruption if standalone objects have offsets, or if we get here
// for something that isn't standalone.
if (Object.Offset)
return createCorruptObjectError(getDigest(*I));
switch (Object.SK) {
case TrieRecord::StorageKind::Unknown:
case TrieRecord::StorageKind::DataPool:
llvm_unreachable("unexpected storage kind");
case TrieRecord::StorageKind::Standalone:
case TrieRecord::StorageKind::StandaloneLeaf0:
case TrieRecord::StorageKind::StandaloneLeaf:
break;
}
// Load it from disk.
//
// Note: Creation logic guarantees that data that needs null-termination is
// suitably 0-padded. Requiring null-termination here would be too expensive
// for extremely large objects that happen to be page-aligned.
SmallString<256> Path;
getStandalonePath(TrieRecord::getStandaloneFilePrefix(Object.SK), *I, Path);
auto File = sys::fs::openNativeFileForRead(Path);
if (!File)
return createFileError(Path, File.takeError());
auto CloseFile = make_scope_exit([&]() { sys::fs::closeFile(*File); });
sys::fs::file_status Status;
if (std::error_code EC = sys::fs::status(*File, Status))
return createCorruptObjectError(getDigest(*I));
std::error_code EC;
auto Region = std::make_unique<sys::fs::mapped_file_region>(
*File, sys::fs::mapped_file_region::readonly, Status.getSize(), 0, EC);
if (EC)
return createCorruptObjectError(getDigest(*I));
return ObjectHandle::fromMemory(
static_cast<StandaloneDataMapTy *>(StandaloneData)
->insert(I->Hash, Object.SK, std::move(Region)));
}
Expected<bool> OnDiskGraphDB::isMaterialized(ObjectID Ref) {
auto Presence = getObjectPresence(Ref, /*CheckUpstream=*/true);
if (!Presence)
return Presence.takeError();
switch (*Presence) {
case ObjectPresence::Missing:
return false;
case ObjectPresence::InPrimaryDB:
return true;
case ObjectPresence::OnlyInUpstreamDB:
if (auto FaultInResult = faultInFromUpstream(Ref); !FaultInResult)
return FaultInResult.takeError();
return true;
}
llvm_unreachable("Unknown ObjectPresence enum");
}
Expected<OnDiskGraphDB::ObjectPresence>
OnDiskGraphDB::getObjectPresence(ObjectID ExternalRef,
bool CheckUpstream) const {
InternalRef Ref = getInternalRef(ExternalRef);
auto I = getIndexProxyFromRef(Ref);
if (!I)
return I.takeError();
TrieRecord::Data Object = I->Ref.load();
if (Object.SK != TrieRecord::StorageKind::Unknown)
return ObjectPresence::InPrimaryDB;
if (!CheckUpstream || !UpstreamDB)
return ObjectPresence::Missing;
std::optional<ObjectID> UpstreamID =
UpstreamDB->getExistingReference(getDigest(*I));
return UpstreamID.has_value() ? ObjectPresence::OnlyInUpstreamDB
: ObjectPresence::Missing;
}
InternalRef OnDiskGraphDB::makeInternalRef(FileOffset IndexOffset) {
return InternalRef::getFromOffset(IndexOffset);
}
void OnDiskGraphDB::getStandalonePath(StringRef Prefix, const IndexProxy &I,
SmallVectorImpl<char> &Path) const {
Path.assign(RootPath.begin(), RootPath.end());
sys::path::append(Path,
Prefix + Twine(I.Offset.get()) + "." + CASFormatVersion);
}
OnDiskContent StandaloneDataInMemory::getContent() const {
bool Leaf0 = false;
bool Leaf = false;
switch (SK) {
default:
llvm_unreachable("Storage kind must be standalone");
case TrieRecord::StorageKind::Standalone:
break;
case TrieRecord::StorageKind::StandaloneLeaf0:
Leaf = Leaf0 = true;
break;
case TrieRecord::StorageKind::StandaloneLeaf:
Leaf = true;
break;
}
if (Leaf) {
StringRef Data(Region->data(), Region->size());
assert(Data.drop_back(Leaf0).end()[0] == 0 &&
"Standalone node data missing null termination");
return OnDiskContent{std::nullopt,
arrayRefFromStringRef<char>(Data.drop_back(Leaf0))};
}
DataRecordHandle Record = DataRecordHandle::get(Region->data());
assert(Record.getData().end()[0] == 0 &&
"Standalone object record missing null termination for data");
return OnDiskContent{Record, std::nullopt};
}
static Expected<MappedTempFile> createTempFile(StringRef FinalPath,
uint64_t Size) {
assert(Size && "Unexpected request for an empty temp file");
Expected<TempFile> File = TempFile::create(FinalPath + ".%%%%%%");
if (!File)
return File.takeError();
if (Error E = preallocateFileTail(File->FD, 0, Size).takeError())
return createFileError(File->TmpName, std::move(E));
if (auto EC = sys::fs::resize_file_before_mapping_readwrite(File->FD, Size))
return createFileError(File->TmpName, EC);
std::error_code EC;
sys::fs::mapped_file_region Map(sys::fs::convertFDToNativeFile(File->FD),
sys::fs::mapped_file_region::readwrite, Size,
0, EC);
if (EC)
return createFileError(File->TmpName, EC);
return MappedTempFile(std::move(*File), std::move(Map));
}
static size_t getPageSize() {
static int PageSize = sys::Process::getPageSizeEstimate();
return PageSize;
}
Error OnDiskGraphDB::createStandaloneLeaf(IndexProxy &I, ArrayRef<char> Data) {
assert(Data.size() > TrieRecord::MaxEmbeddedSize &&
"Expected a bigger file for external content...");
bool Leaf0 = isAligned(Align(getPageSize()), Data.size());
TrieRecord::StorageKind SK = Leaf0 ? TrieRecord::StorageKind::StandaloneLeaf0
: TrieRecord::StorageKind::StandaloneLeaf;
SmallString<256> Path;
int64_t FileSize = Data.size() + Leaf0;
getStandalonePath(TrieRecord::getStandaloneFilePrefix(SK), I, Path);
// Write the file. Don't reuse this mapped_file_region, which is read/write.
// Let load() pull up one that's read-only.
Expected<MappedTempFile> File = createTempFile(Path, FileSize);
if (!File)
return File.takeError();
assert(File->size() == (uint64_t)FileSize);
llvm::copy(Data, File->data());
if (Leaf0)
File->data()[Data.size()] = 0;
assert(File->data()[Data.size()] == 0);
if (Error E = File->keep(Path))
return E;
// Store the object reference.
TrieRecord::Data Existing;
{
TrieRecord::Data Leaf{SK, FileOffset()};
if (I.Ref.compare_exchange_strong(Existing, Leaf)) {
recordStandaloneSizeIncrease(FileSize);
return Error::success();
}
}
// If there was a race, confirm that the new value has valid storage.
if (Existing.SK == TrieRecord::StorageKind::Unknown)
return createCorruptObjectError(getDigest(I));
return Error::success();
}
Error OnDiskGraphDB::store(ObjectID ID, ArrayRef<ObjectID> Refs,
ArrayRef<char> Data) {
auto I = getIndexProxyFromRef(getInternalRef(ID));
if (LLVM_UNLIKELY(!I))
return I.takeError();
// Early return in case the node exists.
{
TrieRecord::Data Existing = I->Ref.load();
if (Existing.SK != TrieRecord::StorageKind::Unknown)
return Error::success();
}
// Big leaf nodes.
if (Refs.empty() && Data.size() > TrieRecord::MaxEmbeddedSize)
return createStandaloneLeaf(*I, Data);
// TODO: Check whether it's worth checking the index for an already existing
// object (like storeTreeImpl() does) before building up the
// InternalRefVector.
InternalRefVector InternalRefs;
for (ObjectID Ref : Refs)
InternalRefs.push_back(getInternalRef(Ref));
// Create the object.
DataRecordHandle::Input Input{InternalRefs, Data};
// Compute the storage kind, allocate it, and create the record.
TrieRecord::StorageKind SK = TrieRecord::StorageKind::Unknown;
FileOffset PoolOffset;
SmallString<256> Path;
std::optional<MappedTempFile> File;
std::optional<uint64_t> FileSize;
auto AllocStandaloneFile = [&](size_t Size) -> Expected<char *> {
getStandalonePath(TrieRecord::getStandaloneFilePrefix(
TrieRecord::StorageKind::Standalone),
*I, Path);
if (Error E = createTempFile(Path, Size).moveInto(File))
return std::move(E);
assert(File->size() == Size);
FileSize = Size;
SK = TrieRecord::StorageKind::Standalone;
return File->data();
};
auto Alloc = [&](size_t Size) -> Expected<char *> {
if (Size <= TrieRecord::MaxEmbeddedSize) {
SK = TrieRecord::StorageKind::DataPool;
auto P = DataPool.allocate(Size);
if (LLVM_UNLIKELY(!P)) {
char *NewAlloc = nullptr;
auto NewE = handleErrors(
P.takeError(), [&](std::unique_ptr<StringError> E) -> Error {
if (E->convertToErrorCode() == std::errc::not_enough_memory)
return AllocStandaloneFile(Size).moveInto(NewAlloc);
return Error(std::move(E));
});
if (!NewE)
return NewAlloc;
return std::move(NewE);
}
PoolOffset = P->getOffset();
LLVM_DEBUG({
dbgs() << "pool-alloc addr=" << (void *)PoolOffset.get()
<< " size=" << Size
<< " end=" << (void *)(PoolOffset.get() + Size) << "\n";
});
return (*P)->data();
}
return AllocStandaloneFile(Size);
};
DataRecordHandle Record;
if (Error E =
DataRecordHandle::createWithError(Alloc, Input).moveInto(Record))
return E;
assert(Record.getData().end()[0] == 0 && "Expected null-termination");
assert(Record.getData() == Input.Data && "Expected initialization");
assert(SK != TrieRecord::StorageKind::Unknown);
assert(bool(File) != bool(PoolOffset) &&
"Expected either a mapped file or a pooled offset");
// Check for a race before calling MappedTempFile::keep().
//
// Then decide what to do with the file. Better to discard than overwrite if
// another thread/process has already added this.
TrieRecord::Data Existing = I->Ref.load();
{
TrieRecord::Data NewObject{SK, PoolOffset};
if (File) {
if (Existing.SK == TrieRecord::StorageKind::Unknown) {
// Keep the file!
if (Error E = File->keep(Path))
return E;
} else {
File.reset();
}
}
// If we didn't already see a racing/existing write, then try storing the
// new object. If that races, confirm that the new value has valid storage.
//
// TODO: Find a way to reuse the storage from the new-but-abandoned record
// handle.
if (Existing.SK == TrieRecord::StorageKind::Unknown) {
if (I->Ref.compare_exchange_strong(Existing, NewObject)) {
if (FileSize)
recordStandaloneSizeIncrease(*FileSize);
return Error::success();
}
}
}
if (Existing.SK == TrieRecord::StorageKind::Unknown)
return createCorruptObjectError(getDigest(*I));
// Load existing object.
return Error::success();
}
void OnDiskGraphDB::recordStandaloneSizeIncrease(size_t SizeIncrease) {
standaloneStorageSize().fetch_add(SizeIncrease, std::memory_order_relaxed);
}
std::atomic<uint64_t> &OnDiskGraphDB::standaloneStorageSize() const {
MutableArrayRef<uint8_t> UserHeader = DataPool.getUserHeader();
assert(UserHeader.size() == sizeof(std::atomic<uint64_t>));
assert(isAddrAligned(Align(8), UserHeader.data()));
return *reinterpret_cast<std::atomic<uint64_t> *>(UserHeader.data());
}
uint64_t OnDiskGraphDB::getStandaloneStorageSize() const {
return standaloneStorageSize().load(std::memory_order_relaxed);
}
size_t OnDiskGraphDB::getStorageSize() const {
return Index.size() + DataPool.size() + getStandaloneStorageSize();
}
unsigned OnDiskGraphDB::getHardStorageLimitUtilization() const {
unsigned IndexPercent = Index.size() * 100ULL / Index.capacity();
unsigned DataPercent = DataPool.size() * 100ULL / DataPool.capacity();
return std::max(IndexPercent, DataPercent);
}
Expected<std::unique_ptr<OnDiskGraphDB>>
OnDiskGraphDB::open(StringRef AbsPath, StringRef HashName,
unsigned HashByteSize, OnDiskGraphDB *UpstreamDB,
FaultInPolicy Policy) {
if (std::error_code EC = sys::fs::create_directories(AbsPath))
return createFileError(AbsPath, EC);
constexpr uint64_t MB = 1024ull * 1024ull;
constexpr uint64_t GB = 1024ull * 1024ull * 1024ull;
uint64_t MaxIndexSize = 12 * GB;
uint64_t MaxDataPoolSize = 24 * GB;
if (useSmallMappingSize(AbsPath)) {
MaxIndexSize = 1 * GB;
MaxDataPoolSize = 2 * GB;
}
auto CustomSize = getOverriddenMaxMappingSize();
if (!CustomSize)
return CustomSize.takeError();
if (*CustomSize)
MaxIndexSize = MaxDataPoolSize = **CustomSize;
SmallString<256> IndexPath(AbsPath);
sys::path::append(IndexPath, IndexFilePrefix + CASFormatVersion);
std::optional<OnDiskTrieRawHashMap> Index;
if (Error E = OnDiskTrieRawHashMap::create(
IndexPath, IndexTableName + "[" + HashName + "]",
HashByteSize * CHAR_BIT,
/*DataSize=*/sizeof(TrieRecord), MaxIndexSize,
/*MinFileSize=*/MB)
.moveInto(Index))
return std::move(E);
uint32_t UserHeaderSize = sizeof(std::atomic<uint64_t>);
SmallString<256> DataPoolPath(AbsPath);
sys::path::append(DataPoolPath, DataPoolFilePrefix + CASFormatVersion);
std::optional<OnDiskDataAllocator> DataPool;
StringRef PolicyName =
Policy == FaultInPolicy::SingleNode ? "single" : "full";
if (Error E = OnDiskDataAllocator::create(
DataPoolPath,
DataPoolTableName + "[" + HashName + "]" + PolicyName,
MaxDataPoolSize, /*MinFileSize=*/MB, UserHeaderSize,
[](void *UserHeaderPtr) {
new (UserHeaderPtr) std::atomic<uint64_t>(0);
})
.moveInto(DataPool))
return std::move(E);
if (DataPool->getUserHeader().size() != UserHeaderSize)
return createStringError(llvm::errc::argument_out_of_domain,
"unexpected user header in '" + DataPoolPath +
"'");
return std::unique_ptr<OnDiskGraphDB>(new OnDiskGraphDB(
AbsPath, std::move(*Index), std::move(*DataPool), UpstreamDB, Policy));
}
OnDiskGraphDB::OnDiskGraphDB(StringRef RootPath, OnDiskTrieRawHashMap Index,
OnDiskDataAllocator DataPool,
OnDiskGraphDB *UpstreamDB, FaultInPolicy Policy)
: Index(std::move(Index)), DataPool(std::move(DataPool)),
RootPath(RootPath.str()), UpstreamDB(UpstreamDB), FIPolicy(Policy) {
/// Lifetime for "big" objects not in DataPool.
///
/// NOTE: Could use ThreadSafeTrieRawHashMap here. For now, doing something
/// simpler on the assumption there won't be much contention since most data
/// is not big. If there is contention, and we've already fixed ObjectProxy
/// object handles to be cheap enough to use consistently, the fix might be
/// to use better use of them rather than optimizing this map.
///
/// FIXME: Figure out the right number of shards, if any.
StandaloneData = new StandaloneDataMapTy();
}
OnDiskGraphDB::~OnDiskGraphDB() {
delete static_cast<StandaloneDataMapTy *>(StandaloneData);
}
Error OnDiskGraphDB::importFullTree(ObjectID PrimaryID,
ObjectHandle UpstreamNode) {
// Copies the full CAS tree from upstream. Uses depth-first copying to protect
// against the process dying during importing and leaving the database with an
// incomplete tree. Note that if the upstream has missing nodes then the tree
// will be copied with missing nodes as well, it won't be considered an error.
struct UpstreamCursor {
ObjectHandle Node;
size_t RefsCount;
object_refs_iterator RefI;
object_refs_iterator RefE;
};
/// Keeps track of the state of visitation for current node and all of its
/// parents.
SmallVector<UpstreamCursor, 16> CursorStack;
/// Keeps track of the currently visited nodes as they are imported into
/// primary database, from current node and its parents. When a node is
/// entered for visitation it appends its own ID, then appends referenced IDs
/// as they get imported. When a node is fully imported it removes the
/// referenced IDs from the bottom of the stack which leaves its own ID at the
/// bottom, adding to the list of referenced IDs for the parent node.
SmallVector<ObjectID, 128> PrimaryNodesStack;
auto enqueueNode = [&](ObjectID PrimaryID, std::optional<ObjectHandle> Node) {
PrimaryNodesStack.push_back(PrimaryID);
if (!Node)
return;
auto Refs = UpstreamDB->getObjectRefs(*Node);
CursorStack.push_back({*Node,
(size_t)std::distance(Refs.begin(), Refs.end()),
Refs.begin(), Refs.end()});
};
enqueueNode(PrimaryID, UpstreamNode);
while (!CursorStack.empty()) {
UpstreamCursor &Cur = CursorStack.back();
if (Cur.RefI == Cur.RefE) {
// Copy the node data into the primary store.
// FIXME: Use hard-link or cloning if the file-system supports it and data
// is stored into a separate file.
// The bottom of \p PrimaryNodesStack contains the primary ID for the
// current node plus the list of imported referenced IDs.
assert(PrimaryNodesStack.size() >= Cur.RefsCount + 1);
ObjectID PrimaryID = *(PrimaryNodesStack.end() - Cur.RefsCount - 1);
auto PrimaryRefs = ArrayRef(PrimaryNodesStack)
.slice(PrimaryNodesStack.size() - Cur.RefsCount);
auto Data = UpstreamDB->getObjectData(Cur.Node);
if (Error E = store(PrimaryID, PrimaryRefs, Data))
return E;
// Remove the current node and its IDs from the stack.
PrimaryNodesStack.truncate(PrimaryNodesStack.size() - Cur.RefsCount);
CursorStack.pop_back();
continue;
}
ObjectID UpstreamID = *(Cur.RefI++);
auto PrimaryID = getReference(UpstreamDB->getDigest(UpstreamID));
if (LLVM_UNLIKELY(!PrimaryID))
return PrimaryID.takeError();
if (containsObject(*PrimaryID, /*CheckUpstream=*/false)) {
// This \p ObjectID already exists in the primary. Either it was imported
// via \p importFullTree or the client created it, in which case the
// client takes responsibility for how it was formed.
enqueueNode(*PrimaryID, std::nullopt);
continue;
}
Expected<std::optional<ObjectHandle>> UpstreamNode =
UpstreamDB->load(UpstreamID);
if (!UpstreamNode)
return UpstreamNode.takeError();
enqueueNode(*PrimaryID, *UpstreamNode);
}
assert(PrimaryNodesStack.size() == 1);
assert(PrimaryNodesStack.front() == PrimaryID);
return Error::success();
}
Error OnDiskGraphDB::importSingleNode(ObjectID PrimaryID,
ObjectHandle UpstreamNode) {
// Copies only a single node, it doesn't copy the referenced nodes.
// Copy the node data into the primary store.
// FIXME: Use hard-link or cloning if the file-system supports it and data is
// stored into a separate file.
auto Data = UpstreamDB->getObjectData(UpstreamNode);
auto UpstreamRefs = UpstreamDB->getObjectRefs(UpstreamNode);
SmallVector<ObjectID, 64> Refs;
Refs.reserve(std::distance(UpstreamRefs.begin(), UpstreamRefs.end()));
for (ObjectID UpstreamRef : UpstreamRefs) {
auto Ref = getReference(UpstreamDB->getDigest(UpstreamRef));
if (LLVM_UNLIKELY(!Ref))
return Ref.takeError();
Refs.push_back(*Ref);
}
return store(PrimaryID, Refs, Data);
}
Expected<std::optional<ObjectHandle>>
OnDiskGraphDB::faultInFromUpstream(ObjectID PrimaryID) {
if (!UpstreamDB)
return std::nullopt;
auto UpstreamID = UpstreamDB->getReference(getDigest(PrimaryID));
if (LLVM_UNLIKELY(!UpstreamID))
return UpstreamID.takeError();
Expected<std::optional<ObjectHandle>> UpstreamNode =
UpstreamDB->load(*UpstreamID);
if (!UpstreamNode)
return UpstreamNode.takeError();
if (!*UpstreamNode)
return std::nullopt;
if (Error E = FIPolicy == FaultInPolicy::SingleNode
? importSingleNode(PrimaryID, **UpstreamNode)
: importFullTree(PrimaryID, **UpstreamNode))
return std::move(E);
return load(PrimaryID);
}