blob: c089a070d4b57a73da2b1f54152cca901ed0242f [file] [log] [blame]
//===- StringTable.h - Table of strings tracked by offset ----------C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#ifndef LLVM_ADT_STRING_TABLE_H
#define LLVM_ADT_STRING_TABLE_H
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/iterator.h"
#include <iterator>
#include <limits>
namespace llvm {
/// A table of densely packed, null-terminated strings indexed by offset.
///
/// This table abstracts a densely concatenated list of null-terminated strings,
/// each of which can be referenced using an offset into the table.
///
/// This requires and ensures that the string at offset 0 is also the empty
/// string. This helps allow zero-initialized offsets form empty strings and
/// avoids non-zero initialization when using a string literal pointer would
/// allow a null pointer.
///
/// The primary use case is having a single global string literal for the table
/// contents, and offsets into it in other global data structures to avoid
/// dynamic relocations of individual string literal pointers in those global
/// data structures.
class StringTable {
StringRef Table;
public:
// An offset into one of these packed string tables, used to select a string
// within the table.
//
// Typically these are created by TableGen or other code generator from
// computed offsets, and it just wraps that integer into a type until it is
// used with the relevant table.
//
// We also ensure that the empty string is at offset zero and default
// constructing this class gives you an offset of zero. This makes default
// constructing this type work similarly (after indexing the table) to default
// constructing a `StringRef`.
class Offset {
// Note that we ensure the empty string is at offset zero.
unsigned Value = 0;
public:
constexpr Offset() = default;
constexpr Offset(unsigned Value) : Value(Value) {}
friend constexpr bool operator==(const Offset &LHS, const Offset &RHS) {
return LHS.Value == RHS.Value;
}
friend constexpr bool operator!=(const Offset &LHS, const Offset &RHS) {
return LHS.Value != RHS.Value;
}
constexpr unsigned value() const { return Value; }
};
// We directly handle string literals with a templated converting constructor
// because we *don't* want to do `strlen` on them -- we fully expect null
// bytes in this input. This is somewhat the opposite of how `StringLiteral`
// works.
template <size_t N>
constexpr StringTable(const char (&RawTable)[N]) : Table(RawTable, N) {
static_assert(N <= std::numeric_limits<unsigned>::max(),
"We only support table sizes that can be indexed by an "
"`unsigned` offset.");
// Note that we can only use `empty`, `data`, and `size` in these asserts to
// support `constexpr`.
assert(!Table.empty() && "Requires at least a valid empty string.");
assert(Table.data()[0] == '\0' && "Offset zero must be the empty string.");
// Regardless of how many strings are in the table, the last one should also
// be null terminated. This also ensures that computing `strlen` on the
// strings can't accidentally run past the end of the table.
assert(Table.data()[Table.size() - 1] == '\0' &&
"Last byte must be a null byte.");
}
// Returns the raw C string from the table starting with the provided offset.
// The returned string is null terminated.
constexpr const char *getCString(Offset O) const {
assert(O.value() < Table.size() && "Out of bounds offset!");
return Table.data() + O.value();
}
// Get a string from the table starting with the provided offset. The returned
// `StringRef` is in fact null terminated, and so can be converted safely to a
// C-string if necessary for a system API.
constexpr StringRef operator[](Offset O) const { return getCString(O); }
/// Returns the byte size of the table.
constexpr size_t size() const { return Table.size(); }
class Iterator
: public iterator_facade_base<Iterator, std::forward_iterator_tag,
const StringRef> {
friend StringTable;
const StringTable *Table;
Offset O;
// A cache of one value to allow `*` to return a reference.
mutable StringRef S;
explicit constexpr Iterator(const StringTable &Table, Offset O)
: Table(&Table), O(O) {}
public:
constexpr Iterator(const Iterator &RHS) = default;
constexpr Iterator(Iterator &&RHS) = default;
bool operator==(const Iterator &RHS) const {
assert(Table == RHS.Table && "Compared iterators for unrelated tables!");
return O == RHS.O;
}
const StringRef &operator*() const {
S = (*Table)[O];
return S;
}
Iterator &operator++() {
O = O.value() + (*Table)[O].size() + 1;
return *this;
}
};
constexpr Iterator begin() const { return Iterator(*this, 0); }
constexpr Iterator end() const { return Iterator(*this, size() - 1); }
};
} // namespace llvm
#endif // LLVM_ADT_STRING_TABLE_H