blob: 83cc4b18824c718fbc4a08eb23c3ae7363ae7108 [file] [log] [blame]
//===- DXILOpLowering.cpp - Lowering to DXIL operations -------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#include "DXILOpLowering.h"
#include "DXILConstants.h"
#include "DXILIntrinsicExpansion.h"
#include "DXILOpBuilder.h"
#include "DXILResourceAnalysis.h"
#include "DXILShaderFlags.h"
#include "DirectX.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/Analysis/DXILMetadataAnalysis.h"
#include "llvm/Analysis/DXILResource.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/IR/DiagnosticInfo.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/IntrinsicsDirectX.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/PassManager.h"
#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Support/ErrorHandling.h"
#define DEBUG_TYPE "dxil-op-lower"
using namespace llvm;
using namespace llvm::dxil;
static bool isVectorArgExpansion(Function &F) {
switch (F.getIntrinsicID()) {
case Intrinsic::dx_dot2:
case Intrinsic::dx_dot3:
case Intrinsic::dx_dot4:
return true;
}
return false;
}
static SmallVector<Value *> populateOperands(Value *Arg, IRBuilder<> &Builder) {
SmallVector<Value *> ExtractedElements;
auto *VecArg = dyn_cast<FixedVectorType>(Arg->getType());
for (unsigned I = 0; I < VecArg->getNumElements(); ++I) {
Value *Index = ConstantInt::get(Type::getInt32Ty(Arg->getContext()), I);
Value *ExtractedElement = Builder.CreateExtractElement(Arg, Index);
ExtractedElements.push_back(ExtractedElement);
}
return ExtractedElements;
}
static SmallVector<Value *> argVectorFlatten(CallInst *Orig,
IRBuilder<> &Builder) {
// Note: arg[NumOperands-1] is a pointer and is not needed by our flattening.
unsigned NumOperands = Orig->getNumOperands() - 1;
assert(NumOperands > 0);
Value *Arg0 = Orig->getOperand(0);
[[maybe_unused]] auto *VecArg0 = dyn_cast<FixedVectorType>(Arg0->getType());
assert(VecArg0);
SmallVector<Value *> NewOperands = populateOperands(Arg0, Builder);
for (unsigned I = 1; I < NumOperands; ++I) {
Value *Arg = Orig->getOperand(I);
[[maybe_unused]] auto *VecArg = dyn_cast<FixedVectorType>(Arg->getType());
assert(VecArg);
assert(VecArg0->getElementType() == VecArg->getElementType());
assert(VecArg0->getNumElements() == VecArg->getNumElements());
auto NextOperandList = populateOperands(Arg, Builder);
NewOperands.append(NextOperandList.begin(), NextOperandList.end());
}
return NewOperands;
}
namespace {
class OpLowerer {
Module &M;
DXILOpBuilder OpBuilder;
DXILBindingMap &DBM;
DXILResourceTypeMap &DRTM;
SmallVector<CallInst *> CleanupCasts;
public:
OpLowerer(Module &M, DXILBindingMap &DBM, DXILResourceTypeMap &DRTM)
: M(M), OpBuilder(M), DBM(DBM), DRTM(DRTM) {}
/// Replace every call to \c F using \c ReplaceCall, and then erase \c F. If
/// there is an error replacing a call, we emit a diagnostic and return true.
[[nodiscard]] bool
replaceFunction(Function &F,
llvm::function_ref<Error(CallInst *CI)> ReplaceCall) {
for (User *U : make_early_inc_range(F.users())) {
CallInst *CI = dyn_cast<CallInst>(U);
if (!CI)
continue;
if (Error E = ReplaceCall(CI)) {
std::string Message(toString(std::move(E)));
DiagnosticInfoUnsupported Diag(*CI->getFunction(), Message,
CI->getDebugLoc());
M.getContext().diagnose(Diag);
return true;
}
}
if (F.user_empty())
F.eraseFromParent();
return false;
}
struct IntrinArgSelect {
enum class Type {
#define DXIL_OP_INTRINSIC_ARG_SELECT_TYPE(name) name,
#include "DXILOperation.inc"
};
Type Type;
int Value;
};
/// Replaces uses of a struct with uses of an equivalent named struct.
///
/// DXIL operations that return structs give them well known names, so we need
/// to update uses when we switch from an LLVM intrinsic to an op.
Error replaceNamedStructUses(CallInst *Intrin, CallInst *DXILOp) {
auto *IntrinTy = cast<StructType>(Intrin->getType());
auto *DXILOpTy = cast<StructType>(DXILOp->getType());
if (!IntrinTy->isLayoutIdentical(DXILOpTy))
return make_error<StringError>(
"Type mismatch between intrinsic and DXIL op",
inconvertibleErrorCode());
for (Use &U : make_early_inc_range(Intrin->uses()))
if (auto *EVI = dyn_cast<ExtractValueInst>(U.getUser()))
EVI->setOperand(0, DXILOp);
else if (auto *IVI = dyn_cast<InsertValueInst>(U.getUser()))
IVI->setOperand(0, DXILOp);
else
return make_error<StringError>("DXIL ops that return structs may only "
"be used by insert- and extractvalue",
inconvertibleErrorCode());
return Error::success();
}
[[nodiscard]] bool
replaceFunctionWithOp(Function &F, dxil::OpCode DXILOp,
ArrayRef<IntrinArgSelect> ArgSelects) {
bool IsVectorArgExpansion = isVectorArgExpansion(F);
assert(!(IsVectorArgExpansion && ArgSelects.size()) &&
"Cann't do vector arg expansion when using arg selects.");
return replaceFunction(F, [&](CallInst *CI) -> Error {
OpBuilder.getIRB().SetInsertPoint(CI);
SmallVector<Value *> Args;
if (ArgSelects.size()) {
for (const IntrinArgSelect &A : ArgSelects) {
switch (A.Type) {
case IntrinArgSelect::Type::Index:
Args.push_back(CI->getArgOperand(A.Value));
break;
case IntrinArgSelect::Type::I8:
Args.push_back(OpBuilder.getIRB().getInt8((uint8_t)A.Value));
break;
case IntrinArgSelect::Type::I32:
Args.push_back(OpBuilder.getIRB().getInt32(A.Value));
break;
}
}
} else if (IsVectorArgExpansion) {
Args = argVectorFlatten(CI, OpBuilder.getIRB());
} else {
Args.append(CI->arg_begin(), CI->arg_end());
}
Expected<CallInst *> OpCall =
OpBuilder.tryCreateOp(DXILOp, Args, CI->getName(), F.getReturnType());
if (Error E = OpCall.takeError())
return E;
if (isa<StructType>(CI->getType())) {
if (Error E = replaceNamedStructUses(CI, *OpCall))
return E;
} else
CI->replaceAllUsesWith(*OpCall);
CI->eraseFromParent();
return Error::success();
});
}
/// Create a cast between a `target("dx")` type and `dx.types.Handle`, which
/// is intended to be removed by the end of lowering. This is used to allow
/// lowering of ops which need to change their return or argument types in a
/// piecemeal way - we can add the casts in to avoid updating all of the uses
/// or defs, and by the end all of the casts will be redundant.
Value *createTmpHandleCast(Value *V, Type *Ty) {
CallInst *Cast = OpBuilder.getIRB().CreateIntrinsic(
Intrinsic::dx_resource_casthandle, {Ty, V->getType()}, {V});
CleanupCasts.push_back(Cast);
return Cast;
}
void cleanupHandleCasts() {
SmallVector<CallInst *> ToRemove;
SmallVector<Function *> CastFns;
for (CallInst *Cast : CleanupCasts) {
// These casts were only put in to ease the move from `target("dx")` types
// to `dx.types.Handle in a piecemeal way. At this point, all of the
// non-cast uses should now be `dx.types.Handle`, and remaining casts
// should all form pairs to and from the now unused `target("dx")` type.
CastFns.push_back(Cast->getCalledFunction());
// If the cast is not to `dx.types.Handle`, it should be the first part of
// the pair. Keep track so we can remove it once it has no more uses.
if (Cast->getType() != OpBuilder.getHandleType()) {
ToRemove.push_back(Cast);
continue;
}
// Otherwise, we're the second handle in a pair. Forward the arguments and
// remove the (second) cast.
CallInst *Def = cast<CallInst>(Cast->getOperand(0));
assert(Def->getIntrinsicID() == Intrinsic::dx_resource_casthandle &&
"Unbalanced pair of temporary handle casts");
Cast->replaceAllUsesWith(Def->getOperand(0));
Cast->eraseFromParent();
}
for (CallInst *Cast : ToRemove) {
assert(Cast->user_empty() && "Temporary handle cast still has users");
Cast->eraseFromParent();
}
// Deduplicate the cast functions so that we only erase each one once.
llvm::sort(CastFns);
CastFns.erase(llvm::unique(CastFns), CastFns.end());
for (Function *F : CastFns)
F->eraseFromParent();
CleanupCasts.clear();
}
// Remove the resource global associated with the handleFromBinding call
// instruction and their uses as they aren't needed anymore.
// TODO: We should verify that all the globals get removed.
// It's expected we'll need a custom pass in the future that will eliminate
// the need for this here.
void removeResourceGlobals(CallInst *CI) {
for (User *User : make_early_inc_range(CI->users())) {
if (StoreInst *Store = dyn_cast<StoreInst>(User)) {
Value *V = Store->getOperand(1);
Store->eraseFromParent();
if (GlobalVariable *GV = dyn_cast<GlobalVariable>(V))
if (GV->use_empty()) {
GV->removeDeadConstantUsers();
GV->eraseFromParent();
}
}
}
}
[[nodiscard]] bool lowerToCreateHandle(Function &F) {
IRBuilder<> &IRB = OpBuilder.getIRB();
Type *Int8Ty = IRB.getInt8Ty();
Type *Int32Ty = IRB.getInt32Ty();
return replaceFunction(F, [&](CallInst *CI) -> Error {
IRB.SetInsertPoint(CI);
auto *It = DBM.find(CI);
assert(It != DBM.end() && "Resource not in map?");
dxil::ResourceBindingInfo &RI = *It;
const auto &Binding = RI.getBinding();
dxil::ResourceClass RC = DRTM[RI.getHandleTy()].getResourceClass();
Value *IndexOp = CI->getArgOperand(3);
if (Binding.LowerBound != 0)
IndexOp = IRB.CreateAdd(IndexOp,
ConstantInt::get(Int32Ty, Binding.LowerBound));
std::array<Value *, 4> Args{
ConstantInt::get(Int8Ty, llvm::to_underlying(RC)),
ConstantInt::get(Int32Ty, Binding.RecordID), IndexOp,
CI->getArgOperand(4)};
Expected<CallInst *> OpCall =
OpBuilder.tryCreateOp(OpCode::CreateHandle, Args, CI->getName());
if (Error E = OpCall.takeError())
return E;
Value *Cast = createTmpHandleCast(*OpCall, CI->getType());
removeResourceGlobals(CI);
CI->replaceAllUsesWith(Cast);
CI->eraseFromParent();
return Error::success();
});
}
[[nodiscard]] bool lowerToBindAndAnnotateHandle(Function &F) {
IRBuilder<> &IRB = OpBuilder.getIRB();
Type *Int32Ty = IRB.getInt32Ty();
return replaceFunction(F, [&](CallInst *CI) -> Error {
IRB.SetInsertPoint(CI);
auto *It = DBM.find(CI);
assert(It != DBM.end() && "Resource not in map?");
dxil::ResourceBindingInfo &RI = *It;
const auto &Binding = RI.getBinding();
dxil::ResourceTypeInfo &RTI = DRTM[RI.getHandleTy()];
dxil::ResourceClass RC = RTI.getResourceClass();
Value *IndexOp = CI->getArgOperand(3);
if (Binding.LowerBound != 0)
IndexOp = IRB.CreateAdd(IndexOp,
ConstantInt::get(Int32Ty, Binding.LowerBound));
std::pair<uint32_t, uint32_t> Props =
RI.getAnnotateProps(*F.getParent(), RTI);
// For `CreateHandleFromBinding` we need the upper bound rather than the
// size, so we need to be careful about the difference for "unbounded".
uint32_t Unbounded = std::numeric_limits<uint32_t>::max();
uint32_t UpperBound = Binding.Size == Unbounded
? Unbounded
: Binding.LowerBound + Binding.Size - 1;
Constant *ResBind = OpBuilder.getResBind(Binding.LowerBound, UpperBound,
Binding.Space, RC);
std::array<Value *, 3> BindArgs{ResBind, IndexOp, CI->getArgOperand(4)};
Expected<CallInst *> OpBind = OpBuilder.tryCreateOp(
OpCode::CreateHandleFromBinding, BindArgs, CI->getName());
if (Error E = OpBind.takeError())
return E;
std::array<Value *, 2> AnnotateArgs{
*OpBind, OpBuilder.getResProps(Props.first, Props.second)};
Expected<CallInst *> OpAnnotate = OpBuilder.tryCreateOp(
OpCode::AnnotateHandle, AnnotateArgs,
CI->hasName() ? CI->getName() + "_annot" : Twine());
if (Error E = OpAnnotate.takeError())
return E;
Value *Cast = createTmpHandleCast(*OpAnnotate, CI->getType());
removeResourceGlobals(CI);
CI->replaceAllUsesWith(Cast);
CI->eraseFromParent();
return Error::success();
});
}
/// Lower `dx.resource.handlefrombinding` intrinsics depending on the shader
/// model and taking into account binding information from
/// DXILResourceBindingAnalysis.
bool lowerHandleFromBinding(Function &F) {
Triple TT(Triple(M.getTargetTriple()));
if (TT.getDXILVersion() < VersionTuple(1, 6))
return lowerToCreateHandle(F);
return lowerToBindAndAnnotateHandle(F);
}
/// Replace uses of \c Intrin with the values in the `dx.ResRet` of \c Op.
/// Since we expect to be post-scalarization, make an effort to avoid vectors.
Error replaceResRetUses(CallInst *Intrin, CallInst *Op, bool HasCheckBit) {
IRBuilder<> &IRB = OpBuilder.getIRB();
Instruction *OldResult = Intrin;
Type *OldTy = Intrin->getType();
if (HasCheckBit) {
auto *ST = cast<StructType>(OldTy);
Value *CheckOp = nullptr;
Type *Int32Ty = IRB.getInt32Ty();
for (Use &U : make_early_inc_range(OldResult->uses())) {
if (auto *EVI = dyn_cast<ExtractValueInst>(U.getUser())) {
ArrayRef<unsigned> Indices = EVI->getIndices();
assert(Indices.size() == 1);
// We're only interested in uses of the check bit for now.
if (Indices[0] != 1)
continue;
if (!CheckOp) {
Value *NewEVI = IRB.CreateExtractValue(Op, 4);
Expected<CallInst *> OpCall = OpBuilder.tryCreateOp(
OpCode::CheckAccessFullyMapped, {NewEVI},
OldResult->hasName() ? OldResult->getName() + "_check"
: Twine(),
Int32Ty);
if (Error E = OpCall.takeError())
return E;
CheckOp = *OpCall;
}
EVI->replaceAllUsesWith(CheckOp);
EVI->eraseFromParent();
}
}
if (OldResult->use_empty()) {
// Only the check bit was used, so we're done here.
OldResult->eraseFromParent();
return Error::success();
}
assert(OldResult->hasOneUse() &&
isa<ExtractValueInst>(*OldResult->user_begin()) &&
"Expected only use to be extract of first element");
OldResult = cast<Instruction>(*OldResult->user_begin());
OldTy = ST->getElementType(0);
}
// For scalars, we just extract the first element.
if (!isa<FixedVectorType>(OldTy)) {
Value *EVI = IRB.CreateExtractValue(Op, 0);
OldResult->replaceAllUsesWith(EVI);
OldResult->eraseFromParent();
if (OldResult != Intrin) {
assert(Intrin->use_empty() && "Intrinsic still has uses?");
Intrin->eraseFromParent();
}
return Error::success();
}
std::array<Value *, 4> Extracts = {};
SmallVector<ExtractElementInst *> DynamicAccesses;
// The users of the operation should all be scalarized, so we attempt to
// replace the extractelements with extractvalues directly.
for (Use &U : make_early_inc_range(OldResult->uses())) {
if (auto *EEI = dyn_cast<ExtractElementInst>(U.getUser())) {
if (auto *IndexOp = dyn_cast<ConstantInt>(EEI->getIndexOperand())) {
size_t IndexVal = IndexOp->getZExtValue();
assert(IndexVal < 4 && "Index into buffer load out of range");
if (!Extracts[IndexVal])
Extracts[IndexVal] = IRB.CreateExtractValue(Op, IndexVal);
EEI->replaceAllUsesWith(Extracts[IndexVal]);
EEI->eraseFromParent();
} else {
DynamicAccesses.push_back(EEI);
}
}
}
const auto *VecTy = cast<FixedVectorType>(OldTy);
const unsigned N = VecTy->getNumElements();
// If there's a dynamic access we need to round trip through stack memory so
// that we don't leave vectors around.
if (!DynamicAccesses.empty()) {
Type *Int32Ty = IRB.getInt32Ty();
Constant *Zero = ConstantInt::get(Int32Ty, 0);
Type *ElTy = VecTy->getElementType();
Type *ArrayTy = ArrayType::get(ElTy, N);
Value *Alloca = IRB.CreateAlloca(ArrayTy);
for (int I = 0, E = N; I != E; ++I) {
if (!Extracts[I])
Extracts[I] = IRB.CreateExtractValue(Op, I);
Value *GEP = IRB.CreateInBoundsGEP(
ArrayTy, Alloca, {Zero, ConstantInt::get(Int32Ty, I)});
IRB.CreateStore(Extracts[I], GEP);
}
for (ExtractElementInst *EEI : DynamicAccesses) {
Value *GEP = IRB.CreateInBoundsGEP(ArrayTy, Alloca,
{Zero, EEI->getIndexOperand()});
Value *Load = IRB.CreateLoad(ElTy, GEP);
EEI->replaceAllUsesWith(Load);
EEI->eraseFromParent();
}
}
// If we still have uses, then we're not fully scalarized and need to
// recreate the vector. This should only happen for things like exported
// functions from libraries.
if (!OldResult->use_empty()) {
for (int I = 0, E = N; I != E; ++I)
if (!Extracts[I])
Extracts[I] = IRB.CreateExtractValue(Op, I);
Value *Vec = UndefValue::get(OldTy);
for (int I = 0, E = N; I != E; ++I)
Vec = IRB.CreateInsertElement(Vec, Extracts[I], I);
OldResult->replaceAllUsesWith(Vec);
}
OldResult->eraseFromParent();
if (OldResult != Intrin) {
assert(Intrin->use_empty() && "Intrinsic still has uses?");
Intrin->eraseFromParent();
}
return Error::success();
}
[[nodiscard]] bool lowerTypedBufferLoad(Function &F, bool HasCheckBit) {
IRBuilder<> &IRB = OpBuilder.getIRB();
Type *Int32Ty = IRB.getInt32Ty();
return replaceFunction(F, [&](CallInst *CI) -> Error {
IRB.SetInsertPoint(CI);
Value *Handle =
createTmpHandleCast(CI->getArgOperand(0), OpBuilder.getHandleType());
Value *Index0 = CI->getArgOperand(1);
Value *Index1 = UndefValue::get(Int32Ty);
Type *OldTy = CI->getType();
if (HasCheckBit)
OldTy = cast<StructType>(OldTy)->getElementType(0);
Type *NewRetTy = OpBuilder.getResRetType(OldTy->getScalarType());
std::array<Value *, 3> Args{Handle, Index0, Index1};
Expected<CallInst *> OpCall = OpBuilder.tryCreateOp(
OpCode::BufferLoad, Args, CI->getName(), NewRetTy);
if (Error E = OpCall.takeError())
return E;
if (Error E = replaceResRetUses(CI, *OpCall, HasCheckBit))
return E;
return Error::success();
});
}
[[nodiscard]] bool lowerRawBufferLoad(Function &F) {
Triple TT(Triple(M.getTargetTriple()));
VersionTuple DXILVersion = TT.getDXILVersion();
const DataLayout &DL = F.getDataLayout();
IRBuilder<> &IRB = OpBuilder.getIRB();
Type *Int8Ty = IRB.getInt8Ty();
Type *Int32Ty = IRB.getInt32Ty();
return replaceFunction(F, [&](CallInst *CI) -> Error {
IRB.SetInsertPoint(CI);
Type *OldTy = cast<StructType>(CI->getType())->getElementType(0);
Type *ScalarTy = OldTy->getScalarType();
Type *NewRetTy = OpBuilder.getResRetType(ScalarTy);
Value *Handle =
createTmpHandleCast(CI->getArgOperand(0), OpBuilder.getHandleType());
Value *Index0 = CI->getArgOperand(1);
Value *Index1 = CI->getArgOperand(2);
uint64_t NumElements =
DL.getTypeSizeInBits(OldTy) / DL.getTypeSizeInBits(ScalarTy);
Value *Mask = ConstantInt::get(Int8Ty, ~(~0U << NumElements));
Value *Align =
ConstantInt::get(Int32Ty, DL.getPrefTypeAlign(ScalarTy).value());
Expected<CallInst *> OpCall =
DXILVersion >= VersionTuple(1, 2)
? OpBuilder.tryCreateOp(OpCode::RawBufferLoad,
{Handle, Index0, Index1, Mask, Align},
CI->getName(), NewRetTy)
: OpBuilder.tryCreateOp(OpCode::BufferLoad,
{Handle, Index0, Index1}, CI->getName(),
NewRetTy);
if (Error E = OpCall.takeError())
return E;
if (Error E = replaceResRetUses(CI, *OpCall, /*HasCheckBit=*/true))
return E;
return Error::success();
});
}
[[nodiscard]] bool lowerUpdateCounter(Function &F) {
IRBuilder<> &IRB = OpBuilder.getIRB();
Type *Int32Ty = IRB.getInt32Ty();
return replaceFunction(F, [&](CallInst *CI) -> Error {
IRB.SetInsertPoint(CI);
Value *Handle =
createTmpHandleCast(CI->getArgOperand(0), OpBuilder.getHandleType());
Value *Op1 = CI->getArgOperand(1);
std::array<Value *, 2> Args{Handle, Op1};
Expected<CallInst *> OpCall = OpBuilder.tryCreateOp(
OpCode::UpdateCounter, Args, CI->getName(), Int32Ty);
if (Error E = OpCall.takeError())
return E;
CI->replaceAllUsesWith(*OpCall);
CI->eraseFromParent();
return Error::success();
});
}
[[nodiscard]] bool lowerGetPointer(Function &F) {
// These should have already been handled in DXILResourceAccess, so we can
// just clean up the dead prototype.
assert(F.user_empty() && "getpointer operations should have been removed");
F.eraseFromParent();
return false;
}
[[nodiscard]] bool lowerBufferStore(Function &F, bool IsRaw) {
Triple TT(Triple(M.getTargetTriple()));
VersionTuple DXILVersion = TT.getDXILVersion();
const DataLayout &DL = F.getDataLayout();
IRBuilder<> &IRB = OpBuilder.getIRB();
Type *Int8Ty = IRB.getInt8Ty();
Type *Int32Ty = IRB.getInt32Ty();
return replaceFunction(F, [&](CallInst *CI) -> Error {
IRB.SetInsertPoint(CI);
Value *Handle =
createTmpHandleCast(CI->getArgOperand(0), OpBuilder.getHandleType());
Value *Index0 = CI->getArgOperand(1);
Value *Index1 = IsRaw ? CI->getArgOperand(2) : UndefValue::get(Int32Ty);
Value *Data = CI->getArgOperand(IsRaw ? 3 : 2);
Type *DataTy = Data->getType();
Type *ScalarTy = DataTy->getScalarType();
uint64_t NumElements =
DL.getTypeSizeInBits(DataTy) / DL.getTypeSizeInBits(ScalarTy);
Value *Mask = ConstantInt::get(Int8Ty, ~(~0U << NumElements));
// TODO: check that we only have vector or scalar...
if (!IsRaw && NumElements != 4)
return make_error<StringError>(
"typedBufferStore data must be a vector of 4 elements",
inconvertibleErrorCode());
else if (NumElements > 4)
return make_error<StringError>(
"rawBufferStore data must have at most 4 elements",
inconvertibleErrorCode());
std::array<Value *, 4> DataElements{nullptr, nullptr, nullptr, nullptr};
if (DataTy == ScalarTy)
DataElements[0] = Data;
else {
// Since we're post-scalarizer, if we see a vector here it's likely
// constructed solely for the argument of the store. Just use the scalar
// values from before they're inserted into the temporary.
auto *IEI = dyn_cast<InsertElementInst>(Data);
while (IEI) {
auto *IndexOp = dyn_cast<ConstantInt>(IEI->getOperand(2));
if (!IndexOp)
break;
size_t IndexVal = IndexOp->getZExtValue();
assert(IndexVal < 4 && "Too many elements for buffer store");
DataElements[IndexVal] = IEI->getOperand(1);
IEI = dyn_cast<InsertElementInst>(IEI->getOperand(0));
}
}
// If for some reason we weren't able to forward the arguments from the
// scalarizer artifact, then we may need to actually extract elements from
// the vector.
for (int I = 0, E = NumElements; I < E; ++I)
if (DataElements[I] == nullptr)
DataElements[I] =
IRB.CreateExtractElement(Data, ConstantInt::get(Int32Ty, I));
// For any elements beyond the length of the vector, fill up with undef.
for (int I = NumElements, E = 4; I < E; ++I)
if (DataElements[I] == nullptr)
DataElements[I] = UndefValue::get(ScalarTy);
dxil::OpCode Op = OpCode::BufferStore;
SmallVector<Value *, 9> Args{
Handle, Index0, Index1, DataElements[0],
DataElements[1], DataElements[2], DataElements[3], Mask};
if (IsRaw && DXILVersion >= VersionTuple(1, 2)) {
Op = OpCode::RawBufferStore;
// RawBufferStore requires the alignment
Args.push_back(
ConstantInt::get(Int32Ty, DL.getPrefTypeAlign(ScalarTy).value()));
}
Expected<CallInst *> OpCall =
OpBuilder.tryCreateOp(Op, Args, CI->getName());
if (Error E = OpCall.takeError())
return E;
CI->eraseFromParent();
// Clean up any leftover `insertelement`s
auto *IEI = dyn_cast<InsertElementInst>(Data);
while (IEI && IEI->use_empty()) {
InsertElementInst *Tmp = IEI;
IEI = dyn_cast<InsertElementInst>(IEI->getOperand(0));
Tmp->eraseFromParent();
}
return Error::success();
});
}
[[nodiscard]] bool lowerCtpopToCountBits(Function &F) {
IRBuilder<> &IRB = OpBuilder.getIRB();
Type *Int32Ty = IRB.getInt32Ty();
return replaceFunction(F, [&](CallInst *CI) -> Error {
IRB.SetInsertPoint(CI);
SmallVector<Value *> Args;
Args.append(CI->arg_begin(), CI->arg_end());
Type *RetTy = Int32Ty;
Type *FRT = F.getReturnType();
if (const auto *VT = dyn_cast<VectorType>(FRT))
RetTy = VectorType::get(RetTy, VT);
Expected<CallInst *> OpCall = OpBuilder.tryCreateOp(
dxil::OpCode::CountBits, Args, CI->getName(), RetTy);
if (Error E = OpCall.takeError())
return E;
// If the result type is 32 bits we can do a direct replacement.
if (FRT->isIntOrIntVectorTy(32)) {
CI->replaceAllUsesWith(*OpCall);
CI->eraseFromParent();
return Error::success();
}
unsigned CastOp;
unsigned CastOp2;
if (FRT->isIntOrIntVectorTy(16)) {
CastOp = Instruction::ZExt;
CastOp2 = Instruction::SExt;
} else { // must be 64 bits
assert(FRT->isIntOrIntVectorTy(64) &&
"Currently only lowering 16, 32, or 64 bit ctpop to CountBits \
is supported.");
CastOp = Instruction::Trunc;
CastOp2 = Instruction::Trunc;
}
// It is correct to replace the ctpop with the dxil op and
// remove all casts to i32
bool NeedsCast = false;
for (User *User : make_early_inc_range(CI->users())) {
Instruction *I = dyn_cast<Instruction>(User);
if (I && (I->getOpcode() == CastOp || I->getOpcode() == CastOp2) &&
I->getType() == RetTy) {
I->replaceAllUsesWith(*OpCall);
I->eraseFromParent();
} else
NeedsCast = true;
}
// It is correct to replace a ctpop with the dxil op and
// a cast from i32 to the return type of the ctpop
// the cast is emitted here if there is a non-cast to i32
// instr which uses the ctpop
if (NeedsCast) {
Value *Cast =
IRB.CreateZExtOrTrunc(*OpCall, F.getReturnType(), "ctpop.cast");
CI->replaceAllUsesWith(Cast);
}
CI->eraseFromParent();
return Error::success();
});
}
bool lowerIntrinsics() {
bool Updated = false;
bool HasErrors = false;
for (Function &F : make_early_inc_range(M.functions())) {
if (!F.isDeclaration())
continue;
Intrinsic::ID ID = F.getIntrinsicID();
switch (ID) {
default:
continue;
#define DXIL_OP_INTRINSIC(OpCode, Intrin, ...) \
case Intrin: \
HasErrors |= replaceFunctionWithOp( \
F, OpCode, ArrayRef<IntrinArgSelect>{__VA_ARGS__}); \
break;
#include "DXILOperation.inc"
case Intrinsic::dx_resource_handlefrombinding:
HasErrors |= lowerHandleFromBinding(F);
break;
case Intrinsic::dx_resource_getpointer:
HasErrors |= lowerGetPointer(F);
break;
case Intrinsic::dx_resource_load_typedbuffer:
HasErrors |= lowerTypedBufferLoad(F, /*HasCheckBit=*/true);
break;
case Intrinsic::dx_resource_store_typedbuffer:
HasErrors |= lowerBufferStore(F, /*IsRaw=*/false);
break;
case Intrinsic::dx_resource_load_rawbuffer:
HasErrors |= lowerRawBufferLoad(F);
break;
case Intrinsic::dx_resource_store_rawbuffer:
HasErrors |= lowerBufferStore(F, /*IsRaw=*/true);
break;
case Intrinsic::dx_resource_updatecounter:
HasErrors |= lowerUpdateCounter(F);
break;
case Intrinsic::ctpop:
HasErrors |= lowerCtpopToCountBits(F);
break;
}
Updated = true;
}
if (Updated && !HasErrors)
cleanupHandleCasts();
return Updated;
}
};
} // namespace
PreservedAnalyses DXILOpLowering::run(Module &M, ModuleAnalysisManager &MAM) {
DXILBindingMap &DBM = MAM.getResult<DXILResourceBindingAnalysis>(M);
DXILResourceTypeMap &DRTM = MAM.getResult<DXILResourceTypeAnalysis>(M);
bool MadeChanges = OpLowerer(M, DBM, DRTM).lowerIntrinsics();
if (!MadeChanges)
return PreservedAnalyses::all();
PreservedAnalyses PA;
PA.preserve<DXILResourceBindingAnalysis>();
PA.preserve<DXILMetadataAnalysis>();
PA.preserve<ShaderFlagsAnalysis>();
return PA;
}
namespace {
class DXILOpLoweringLegacy : public ModulePass {
public:
bool runOnModule(Module &M) override {
DXILBindingMap &DBM =
getAnalysis<DXILResourceBindingWrapperPass>().getBindingMap();
DXILResourceTypeMap &DRTM =
getAnalysis<DXILResourceTypeWrapperPass>().getResourceTypeMap();
return OpLowerer(M, DBM, DRTM).lowerIntrinsics();
}
StringRef getPassName() const override { return "DXIL Op Lowering"; }
DXILOpLoweringLegacy() : ModulePass(ID) {}
static char ID; // Pass identification.
void getAnalysisUsage(llvm::AnalysisUsage &AU) const override {
AU.addRequired<DXILResourceTypeWrapperPass>();
AU.addRequired<DXILResourceBindingWrapperPass>();
AU.addPreserved<DXILResourceBindingWrapperPass>();
AU.addPreserved<DXILResourceMDWrapper>();
AU.addPreserved<DXILMetadataAnalysisWrapperPass>();
AU.addPreserved<ShaderFlagsAnalysisWrapper>();
}
};
char DXILOpLoweringLegacy::ID = 0;
} // end anonymous namespace
INITIALIZE_PASS_BEGIN(DXILOpLoweringLegacy, DEBUG_TYPE, "DXIL Op Lowering",
false, false)
INITIALIZE_PASS_DEPENDENCY(DXILResourceTypeWrapperPass)
INITIALIZE_PASS_DEPENDENCY(DXILResourceBindingWrapperPass)
INITIALIZE_PASS_END(DXILOpLoweringLegacy, DEBUG_TYPE, "DXIL Op Lowering", false,
false)
ModulePass *llvm::createDXILOpLoweringLegacyPass() {
return new DXILOpLoweringLegacy();
}