blob: d526dc1ece293dee9964cc95c99ad8e880e199db [file] [log] [blame]
//===-- HashTable BitMasks Generic Implementation ---------------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#include "src/__support/CPP/bit.h"
#include "src/__support/common.h"
#include "src/__support/endian_internal.h"
#include "src/__support/macros/config.h"
namespace LIBC_NAMESPACE_DECL {
namespace internal {
// GPU architectures are 64-bit but use 32-bit general purpose registers.
#ifdef LIBC_TARGET_ARCH_IS_GPU
using bitmask_t = uint32_t;
#else
using bitmask_t = uintptr_t;
#endif
// Helper function to spread a byte across the whole word.
// Accumutively, the procedure looks like:
// byte = 0x00000000000000ff
// byte | (byte << 8) = 0x000000000000ffff
// byte | (byte << 16) = 0x00000000ffffffff
// byte | (byte << 32) = 0xffffffffffffffff
LIBC_INLINE constexpr bitmask_t repeat_byte(bitmask_t byte) {
size_t shift_amount = 8;
while (shift_amount < sizeof(bitmask_t) * 8) {
byte |= byte << shift_amount;
shift_amount <<= 1;
}
return byte;
}
using BitMask = BitMaskAdaptor<bitmask_t, 0x8ul>;
using IteratableBitMask = IteratableBitMaskAdaptor<BitMask>;
struct Group {
LIBC_INLINE_VAR static constexpr bitmask_t MASK = repeat_byte(0x80ul);
bitmask_t data;
// Load a group of control words from an arbitary address.
LIBC_INLINE static Group load(const void *addr) {
char bytes[sizeof(bitmask_t)];
for (size_t i = 0; i < sizeof(bitmask_t); ++i)
bytes[i] = static_cast<const char *>(addr)[i];
return Group{cpp::bit_cast<bitmask_t>(bytes)};
}
// Load a group of control words from an aligned address.
LIBC_INLINE static Group load_aligned(const void *addr) {
return *static_cast<const Group *>(addr);
}
// Find out the lanes equal to the given byte and return the bitmask
// with corresponding bits set.
LIBC_INLINE IteratableBitMask match_byte(uint8_t byte) const {
// Given byte = 0x10, suppose the data is:
//
// data = [ 0x10 | 0x10 | 0x00 | 0xF1 | ... ]
//
// First, we compare the byte using XOR operation:
//
// [ 0x10 | 0x10 | 0x10 | 0x10 | ... ] (0)
// ^ [ 0x10 | 0x10 | 0x00 | 0xF1 | ... ] (1)
// = [ 0x00 | 0x00 | 0x10 | 0xE1 | ... ] (2)
//
// Notice that the equal positions will now be 0x00, so if we substract 0x01
// respective to every byte, it will need to carry the substraction to upper
// bits (assume no carry from the hidden parts)
// [ 0x00 | 0x00 | 0x10 | 0xE1 | ... ] (2)
// - [ 0x01 | 0x01 | 0x01 | 0x01 | ... ] (3)
// = [ 0xFE | 0xFF | 0x0F | 0xE0 | ... ] (4)
//
// But there may be some bytes whose highest bit is already set after the
// xor operation. To rule out these positions, we AND them with the NOT
// of the XOR result:
//
// [ 0xFF | 0xFF | 0xEF | 0x1E | ... ] (5, NOT (2))
// & [ 0xFE | 0xFF | 0x0F | 0xE0 | ... ] (4)
// = [ 0xFE | 0xFF | 0x0F | 0x10 | ... ] (6)
//
// To make the bitmask iteratable, only one bit can be set in each stride.
// So we AND each byte with 0x80 and keep only the highest bit:
//
// [ 0xFE | 0xFF | 0x0F | 0x10 | ... ] (6)
// & [ 0x80 | 0x80 | 0x80 | 0x80 | ... ] (7)
// = [ 0x80 | 0x80 | 0x00 | 0x00 | ... ] (8)
//
// However, there are possitbilites for false positives. For example, if the
// data is [ 0x10 | 0x11 | 0x10 | 0xF1 | ... ]. This only happens when there
// is a key only differs from the searched by the lowest bit. The claims
// are:
//
// - This never happens for `EMPTY` and `DELETED`, only full entries.
// - The check for key equality will catch these.
// - This only happens if there is at least 1 true match.
// - The chance of this happening is very low (< 1% chance per byte).
static constexpr bitmask_t ONES = repeat_byte(0x01ul);
auto cmp = data ^ repeat_byte(static_cast<bitmask_t>(byte) & 0xFFul);
auto result =
LIBC_NAMESPACE::Endian::to_little_endian((cmp - ONES) & ~cmp & MASK);
return {BitMask{result}};
}
// Find out the lanes equal to EMPTY or DELETE (highest bit set) and
// return the bitmask with corresponding bits set.
LIBC_INLINE BitMask mask_available() const {
bitmask_t le_data = LIBC_NAMESPACE::Endian::to_little_endian(data);
return {le_data & MASK};
}
LIBC_INLINE IteratableBitMask occupied() const {
bitmask_t available = mask_available().word;
return {BitMask{available ^ MASK}};
}
};
} // namespace internal
} // namespace LIBC_NAMESPACE_DECL