blob: 111b28af3b47ef30c3383814a383b01f4d9ec619 [file] [edit]
//===- AMDGPUWaitcntUtils.h -------------------------------------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUWAITCNTUTILS_H
#define LLVM_LIB_TARGET_AMDGPU_AMDGPUWAITCNTUTILS_H
#include "llvm/ADT/Sequence.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/TargetParser/TargetParser.h"
namespace llvm {
namespace AMDGPU {
enum InstCounterType {
LOAD_CNT = 0, // VMcnt prior to gfx12.
DS_CNT, // LKGMcnt prior to gfx12.
EXP_CNT, //
STORE_CNT, // VScnt in gfx10/gfx11.
NUM_NORMAL_INST_CNTS,
SAMPLE_CNT = NUM_NORMAL_INST_CNTS, // gfx12+ only.
BVH_CNT, // gfx12+ only.
KM_CNT, // gfx12+ only.
X_CNT, // gfx1250.
ASYNC_CNT, // gfx1250.
NUM_EXTENDED_INST_CNTS,
VA_VDST = NUM_EXTENDED_INST_CNTS, // gfx12+ expert mode only.
VM_VSRC, // gfx12+ expert mode only.
NUM_EXPERT_INST_CNTS,
NUM_INST_CNTS = NUM_EXPERT_INST_CNTS
};
StringLiteral getInstCounterName(InstCounterType T);
// Return an iterator over all counters between LOAD_CNT (the first counter)
// and \c MaxCounter (exclusive, default value yields an enumeration over
// all counters).
iota_range<InstCounterType>
inst_counter_types(InstCounterType MaxCounter = NUM_INST_CNTS);
} // namespace AMDGPU
template <> struct enum_iteration_traits<AMDGPU::InstCounterType> {
static constexpr bool is_iterable = true;
};
namespace AMDGPU {
/// Represents the counter values to wait for in an s_waitcnt instruction.
///
/// Large values (including the maximum possible integer) can be used to
/// represent "don't care" waits.
class Waitcnt {
std::array<unsigned, NUM_INST_CNTS> Cnt;
public:
unsigned get(InstCounterType T) const { return Cnt[T]; }
void set(InstCounterType T, unsigned Val) { Cnt[T] = Val; }
Waitcnt() { fill(Cnt, ~0u); }
// Pre-gfx12 constructor.
Waitcnt(unsigned VmCnt, unsigned ExpCnt, unsigned LgkmCnt, unsigned VsCnt)
: Waitcnt() {
Cnt[LOAD_CNT] = VmCnt;
Cnt[EXP_CNT] = ExpCnt;
Cnt[DS_CNT] = LgkmCnt;
Cnt[STORE_CNT] = VsCnt;
}
// gfx12+ constructor.
Waitcnt(unsigned LoadCnt, unsigned ExpCnt, unsigned DsCnt, unsigned StoreCnt,
unsigned SampleCnt, unsigned BvhCnt, unsigned KmCnt, unsigned XCnt,
unsigned AsyncCnt, unsigned VaVdst, unsigned VmVsrc)
: Waitcnt() {
Cnt[LOAD_CNT] = LoadCnt;
Cnt[DS_CNT] = DsCnt;
Cnt[EXP_CNT] = ExpCnt;
Cnt[STORE_CNT] = StoreCnt;
Cnt[SAMPLE_CNT] = SampleCnt;
Cnt[BVH_CNT] = BvhCnt;
Cnt[KM_CNT] = KmCnt;
Cnt[X_CNT] = XCnt;
Cnt[ASYNC_CNT] = AsyncCnt;
Cnt[VA_VDST] = VaVdst;
Cnt[VM_VSRC] = VmVsrc;
}
bool hasWait() const {
return any_of(Cnt, [](unsigned Val) { return Val != ~0u; });
}
bool hasWaitExceptStoreCnt() const {
for (InstCounterType T : inst_counter_types()) {
if (T == STORE_CNT)
continue;
if (Cnt[T] != ~0u)
return true;
}
return false;
}
bool hasWaitStoreCnt() const { return Cnt[STORE_CNT] != ~0u; }
bool hasWaitDepctr() const {
return Cnt[VA_VDST] != ~0u || Cnt[VM_VSRC] != ~0u;
}
Waitcnt combined(const Waitcnt &Other) const {
// Does the right thing provided self and Other are either both pre-gfx12
// or both gfx12+.
Waitcnt Wait;
for (InstCounterType T : inst_counter_types())
Wait.Cnt[T] = std::min(Cnt[T], Other.Cnt[T]);
return Wait;
}
void print(raw_ostream &OS) const {
ListSeparator LS;
for (InstCounterType T : inst_counter_types())
OS << LS << getInstCounterName(T) << ": " << Cnt[T];
if (LS.unused())
OS << "none";
OS << '\n';
}
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
LLVM_DUMP_METHOD void dump() const;
#endif
friend raw_ostream &operator<<(raw_ostream &OS, const AMDGPU::Waitcnt &Wait) {
Wait.print(OS);
return OS;
}
};
Waitcnt decodeWaitcnt(const IsaVersion &Version, unsigned Encoded);
unsigned encodeWaitcnt(const IsaVersion &Version, const Waitcnt &Decoded);
// The following are only meaningful on targets that support
// S_WAIT_LOADCNT_DSCNT and S_WAIT_STORECNT_DSCNT.
/// \returns Decoded Waitcnt structure from given \p LoadcntDscnt for given
/// isa \p Version.
Waitcnt decodeLoadcntDscnt(const IsaVersion &Version, unsigned LoadcntDscnt);
/// \returns Decoded Waitcnt structure from given \p StorecntDscnt for given
/// isa \p Version.
Waitcnt decodeStorecntDscnt(const IsaVersion &Version, unsigned StorecntDscnt);
/// \returns \p Loadcnt and \p Dscnt components of \p Decoded encoded as an
/// immediate that can be used with S_WAIT_LOADCNT_DSCNT for given isa
/// \p Version.
unsigned encodeLoadcntDscnt(const IsaVersion &Version, const Waitcnt &Decoded);
/// \returns \p Storecnt and \p Dscnt components of \p Decoded encoded as an
/// immediate that can be used with S_WAIT_STORECNT_DSCNT for given isa
/// \p Version.
unsigned encodeStorecntDscnt(const IsaVersion &Version, const Waitcnt &Decoded);
} // namespace AMDGPU
} // namespace llvm
#endif // LLVM_LIB_TARGET_AMDGPU_AMDGPUWAITCNTUTILS_H