| //===- DXILOpLowering.cpp - Lowering to DXIL operations -------------------===// |
| // |
| // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| // See https://llvm.org/LICENSE.txt for license information. |
| // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| // |
| //===----------------------------------------------------------------------===// |
| |
| #include "DXILOpLowering.h" |
| #include "DXILConstants.h" |
| #include "DXILIntrinsicExpansion.h" |
| #include "DXILOpBuilder.h" |
| #include "DXILResourceAnalysis.h" |
| #include "DXILShaderFlags.h" |
| #include "DirectX.h" |
| #include "llvm/ADT/SmallVector.h" |
| #include "llvm/Analysis/DXILMetadataAnalysis.h" |
| #include "llvm/Analysis/DXILResource.h" |
| #include "llvm/CodeGen/Passes.h" |
| #include "llvm/IR/DiagnosticInfo.h" |
| #include "llvm/IR/IRBuilder.h" |
| #include "llvm/IR/Instruction.h" |
| #include "llvm/IR/Instructions.h" |
| #include "llvm/IR/Intrinsics.h" |
| #include "llvm/IR/IntrinsicsDirectX.h" |
| #include "llvm/IR/Module.h" |
| #include "llvm/IR/PassManager.h" |
| #include "llvm/InitializePasses.h" |
| #include "llvm/Pass.h" |
| #include "llvm/Support/ErrorHandling.h" |
| |
| #define DEBUG_TYPE "dxil-op-lower" |
| |
| using namespace llvm; |
| using namespace llvm::dxil; |
| |
| static bool isVectorArgExpansion(Function &F) { |
| switch (F.getIntrinsicID()) { |
| case Intrinsic::dx_dot2: |
| case Intrinsic::dx_dot3: |
| case Intrinsic::dx_dot4: |
| return true; |
| } |
| return false; |
| } |
| |
| static SmallVector<Value *> populateOperands(Value *Arg, IRBuilder<> &Builder) { |
| SmallVector<Value *> ExtractedElements; |
| auto *VecArg = dyn_cast<FixedVectorType>(Arg->getType()); |
| for (unsigned I = 0; I < VecArg->getNumElements(); ++I) { |
| Value *Index = ConstantInt::get(Type::getInt32Ty(Arg->getContext()), I); |
| Value *ExtractedElement = Builder.CreateExtractElement(Arg, Index); |
| ExtractedElements.push_back(ExtractedElement); |
| } |
| return ExtractedElements; |
| } |
| |
| static SmallVector<Value *> argVectorFlatten(CallInst *Orig, |
| IRBuilder<> &Builder) { |
| // Note: arg[NumOperands-1] is a pointer and is not needed by our flattening. |
| unsigned NumOperands = Orig->getNumOperands() - 1; |
| assert(NumOperands > 0); |
| Value *Arg0 = Orig->getOperand(0); |
| [[maybe_unused]] auto *VecArg0 = dyn_cast<FixedVectorType>(Arg0->getType()); |
| assert(VecArg0); |
| SmallVector<Value *> NewOperands = populateOperands(Arg0, Builder); |
| for (unsigned I = 1; I < NumOperands; ++I) { |
| Value *Arg = Orig->getOperand(I); |
| [[maybe_unused]] auto *VecArg = dyn_cast<FixedVectorType>(Arg->getType()); |
| assert(VecArg); |
| assert(VecArg0->getElementType() == VecArg->getElementType()); |
| assert(VecArg0->getNumElements() == VecArg->getNumElements()); |
| auto NextOperandList = populateOperands(Arg, Builder); |
| NewOperands.append(NextOperandList.begin(), NextOperandList.end()); |
| } |
| return NewOperands; |
| } |
| |
| namespace { |
| class OpLowerer { |
| Module &M; |
| DXILOpBuilder OpBuilder; |
| DXILBindingMap &DBM; |
| DXILResourceTypeMap &DRTM; |
| SmallVector<CallInst *> CleanupCasts; |
| |
| public: |
| OpLowerer(Module &M, DXILBindingMap &DBM, DXILResourceTypeMap &DRTM) |
| : M(M), OpBuilder(M), DBM(DBM), DRTM(DRTM) {} |
| |
| /// Replace every call to \c F using \c ReplaceCall, and then erase \c F. If |
| /// there is an error replacing a call, we emit a diagnostic and return true. |
| [[nodiscard]] bool |
| replaceFunction(Function &F, |
| llvm::function_ref<Error(CallInst *CI)> ReplaceCall) { |
| for (User *U : make_early_inc_range(F.users())) { |
| CallInst *CI = dyn_cast<CallInst>(U); |
| if (!CI) |
| continue; |
| |
| if (Error E = ReplaceCall(CI)) { |
| std::string Message(toString(std::move(E))); |
| DiagnosticInfoUnsupported Diag(*CI->getFunction(), Message, |
| CI->getDebugLoc()); |
| M.getContext().diagnose(Diag); |
| return true; |
| } |
| } |
| if (F.user_empty()) |
| F.eraseFromParent(); |
| return false; |
| } |
| |
| struct IntrinArgSelect { |
| enum class Type { |
| #define DXIL_OP_INTRINSIC_ARG_SELECT_TYPE(name) name, |
| #include "DXILOperation.inc" |
| }; |
| Type Type; |
| int Value; |
| }; |
| |
| /// Replaces uses of a struct with uses of an equivalent named struct. |
| /// |
| /// DXIL operations that return structs give them well known names, so we need |
| /// to update uses when we switch from an LLVM intrinsic to an op. |
| Error replaceNamedStructUses(CallInst *Intrin, CallInst *DXILOp) { |
| auto *IntrinTy = cast<StructType>(Intrin->getType()); |
| auto *DXILOpTy = cast<StructType>(DXILOp->getType()); |
| if (!IntrinTy->isLayoutIdentical(DXILOpTy)) |
| return make_error<StringError>( |
| "Type mismatch between intrinsic and DXIL op", |
| inconvertibleErrorCode()); |
| |
| for (Use &U : make_early_inc_range(Intrin->uses())) |
| if (auto *EVI = dyn_cast<ExtractValueInst>(U.getUser())) |
| EVI->setOperand(0, DXILOp); |
| else if (auto *IVI = dyn_cast<InsertValueInst>(U.getUser())) |
| IVI->setOperand(0, DXILOp); |
| else |
| return make_error<StringError>("DXIL ops that return structs may only " |
| "be used by insert- and extractvalue", |
| inconvertibleErrorCode()); |
| return Error::success(); |
| } |
| |
| [[nodiscard]] bool |
| replaceFunctionWithOp(Function &F, dxil::OpCode DXILOp, |
| ArrayRef<IntrinArgSelect> ArgSelects) { |
| bool IsVectorArgExpansion = isVectorArgExpansion(F); |
| assert(!(IsVectorArgExpansion && ArgSelects.size()) && |
| "Cann't do vector arg expansion when using arg selects."); |
| return replaceFunction(F, [&](CallInst *CI) -> Error { |
| OpBuilder.getIRB().SetInsertPoint(CI); |
| SmallVector<Value *> Args; |
| if (ArgSelects.size()) { |
| for (const IntrinArgSelect &A : ArgSelects) { |
| switch (A.Type) { |
| case IntrinArgSelect::Type::Index: |
| Args.push_back(CI->getArgOperand(A.Value)); |
| break; |
| case IntrinArgSelect::Type::I8: |
| Args.push_back(OpBuilder.getIRB().getInt8((uint8_t)A.Value)); |
| break; |
| case IntrinArgSelect::Type::I32: |
| Args.push_back(OpBuilder.getIRB().getInt32(A.Value)); |
| break; |
| } |
| } |
| } else if (IsVectorArgExpansion) { |
| Args = argVectorFlatten(CI, OpBuilder.getIRB()); |
| } else { |
| Args.append(CI->arg_begin(), CI->arg_end()); |
| } |
| |
| Expected<CallInst *> OpCall = |
| OpBuilder.tryCreateOp(DXILOp, Args, CI->getName(), F.getReturnType()); |
| if (Error E = OpCall.takeError()) |
| return E; |
| |
| if (isa<StructType>(CI->getType())) { |
| if (Error E = replaceNamedStructUses(CI, *OpCall)) |
| return E; |
| } else |
| CI->replaceAllUsesWith(*OpCall); |
| |
| CI->eraseFromParent(); |
| return Error::success(); |
| }); |
| } |
| |
| /// Create a cast between a `target("dx")` type and `dx.types.Handle`, which |
| /// is intended to be removed by the end of lowering. This is used to allow |
| /// lowering of ops which need to change their return or argument types in a |
| /// piecemeal way - we can add the casts in to avoid updating all of the uses |
| /// or defs, and by the end all of the casts will be redundant. |
| Value *createTmpHandleCast(Value *V, Type *Ty) { |
| CallInst *Cast = OpBuilder.getIRB().CreateIntrinsic( |
| Intrinsic::dx_resource_casthandle, {Ty, V->getType()}, {V}); |
| CleanupCasts.push_back(Cast); |
| return Cast; |
| } |
| |
| void cleanupHandleCasts() { |
| SmallVector<CallInst *> ToRemove; |
| SmallVector<Function *> CastFns; |
| |
| for (CallInst *Cast : CleanupCasts) { |
| // These casts were only put in to ease the move from `target("dx")` types |
| // to `dx.types.Handle in a piecemeal way. At this point, all of the |
| // non-cast uses should now be `dx.types.Handle`, and remaining casts |
| // should all form pairs to and from the now unused `target("dx")` type. |
| CastFns.push_back(Cast->getCalledFunction()); |
| |
| // If the cast is not to `dx.types.Handle`, it should be the first part of |
| // the pair. Keep track so we can remove it once it has no more uses. |
| if (Cast->getType() != OpBuilder.getHandleType()) { |
| ToRemove.push_back(Cast); |
| continue; |
| } |
| // Otherwise, we're the second handle in a pair. Forward the arguments and |
| // remove the (second) cast. |
| CallInst *Def = cast<CallInst>(Cast->getOperand(0)); |
| assert(Def->getIntrinsicID() == Intrinsic::dx_resource_casthandle && |
| "Unbalanced pair of temporary handle casts"); |
| Cast->replaceAllUsesWith(Def->getOperand(0)); |
| Cast->eraseFromParent(); |
| } |
| for (CallInst *Cast : ToRemove) { |
| assert(Cast->user_empty() && "Temporary handle cast still has users"); |
| Cast->eraseFromParent(); |
| } |
| |
| // Deduplicate the cast functions so that we only erase each one once. |
| llvm::sort(CastFns); |
| CastFns.erase(llvm::unique(CastFns), CastFns.end()); |
| for (Function *F : CastFns) |
| F->eraseFromParent(); |
| |
| CleanupCasts.clear(); |
| } |
| |
| // Remove the resource global associated with the handleFromBinding call |
| // instruction and their uses as they aren't needed anymore. |
| // TODO: We should verify that all the globals get removed. |
| // It's expected we'll need a custom pass in the future that will eliminate |
| // the need for this here. |
| void removeResourceGlobals(CallInst *CI) { |
| for (User *User : make_early_inc_range(CI->users())) { |
| if (StoreInst *Store = dyn_cast<StoreInst>(User)) { |
| Value *V = Store->getOperand(1); |
| Store->eraseFromParent(); |
| if (GlobalVariable *GV = dyn_cast<GlobalVariable>(V)) |
| if (GV->use_empty()) { |
| GV->removeDeadConstantUsers(); |
| GV->eraseFromParent(); |
| } |
| } |
| } |
| } |
| |
| [[nodiscard]] bool lowerToCreateHandle(Function &F) { |
| IRBuilder<> &IRB = OpBuilder.getIRB(); |
| Type *Int8Ty = IRB.getInt8Ty(); |
| Type *Int32Ty = IRB.getInt32Ty(); |
| |
| return replaceFunction(F, [&](CallInst *CI) -> Error { |
| IRB.SetInsertPoint(CI); |
| |
| auto *It = DBM.find(CI); |
| assert(It != DBM.end() && "Resource not in map?"); |
| dxil::ResourceBindingInfo &RI = *It; |
| |
| const auto &Binding = RI.getBinding(); |
| dxil::ResourceClass RC = DRTM[RI.getHandleTy()].getResourceClass(); |
| |
| Value *IndexOp = CI->getArgOperand(3); |
| if (Binding.LowerBound != 0) |
| IndexOp = IRB.CreateAdd(IndexOp, |
| ConstantInt::get(Int32Ty, Binding.LowerBound)); |
| |
| std::array<Value *, 4> Args{ |
| ConstantInt::get(Int8Ty, llvm::to_underlying(RC)), |
| ConstantInt::get(Int32Ty, Binding.RecordID), IndexOp, |
| CI->getArgOperand(4)}; |
| Expected<CallInst *> OpCall = |
| OpBuilder.tryCreateOp(OpCode::CreateHandle, Args, CI->getName()); |
| if (Error E = OpCall.takeError()) |
| return E; |
| |
| Value *Cast = createTmpHandleCast(*OpCall, CI->getType()); |
| |
| removeResourceGlobals(CI); |
| |
| CI->replaceAllUsesWith(Cast); |
| CI->eraseFromParent(); |
| return Error::success(); |
| }); |
| } |
| |
| [[nodiscard]] bool lowerToBindAndAnnotateHandle(Function &F) { |
| IRBuilder<> &IRB = OpBuilder.getIRB(); |
| Type *Int32Ty = IRB.getInt32Ty(); |
| |
| return replaceFunction(F, [&](CallInst *CI) -> Error { |
| IRB.SetInsertPoint(CI); |
| |
| auto *It = DBM.find(CI); |
| assert(It != DBM.end() && "Resource not in map?"); |
| dxil::ResourceBindingInfo &RI = *It; |
| |
| const auto &Binding = RI.getBinding(); |
| dxil::ResourceTypeInfo &RTI = DRTM[RI.getHandleTy()]; |
| dxil::ResourceClass RC = RTI.getResourceClass(); |
| |
| Value *IndexOp = CI->getArgOperand(3); |
| if (Binding.LowerBound != 0) |
| IndexOp = IRB.CreateAdd(IndexOp, |
| ConstantInt::get(Int32Ty, Binding.LowerBound)); |
| |
| std::pair<uint32_t, uint32_t> Props = |
| RI.getAnnotateProps(*F.getParent(), RTI); |
| |
| // For `CreateHandleFromBinding` we need the upper bound rather than the |
| // size, so we need to be careful about the difference for "unbounded". |
| uint32_t Unbounded = std::numeric_limits<uint32_t>::max(); |
| uint32_t UpperBound = Binding.Size == Unbounded |
| ? Unbounded |
| : Binding.LowerBound + Binding.Size - 1; |
| Constant *ResBind = OpBuilder.getResBind(Binding.LowerBound, UpperBound, |
| Binding.Space, RC); |
| std::array<Value *, 3> BindArgs{ResBind, IndexOp, CI->getArgOperand(4)}; |
| Expected<CallInst *> OpBind = OpBuilder.tryCreateOp( |
| OpCode::CreateHandleFromBinding, BindArgs, CI->getName()); |
| if (Error E = OpBind.takeError()) |
| return E; |
| |
| std::array<Value *, 2> AnnotateArgs{ |
| *OpBind, OpBuilder.getResProps(Props.first, Props.second)}; |
| Expected<CallInst *> OpAnnotate = OpBuilder.tryCreateOp( |
| OpCode::AnnotateHandle, AnnotateArgs, |
| CI->hasName() ? CI->getName() + "_annot" : Twine()); |
| if (Error E = OpAnnotate.takeError()) |
| return E; |
| |
| Value *Cast = createTmpHandleCast(*OpAnnotate, CI->getType()); |
| |
| removeResourceGlobals(CI); |
| |
| CI->replaceAllUsesWith(Cast); |
| CI->eraseFromParent(); |
| |
| return Error::success(); |
| }); |
| } |
| |
| /// Lower `dx.resource.handlefrombinding` intrinsics depending on the shader |
| /// model and taking into account binding information from |
| /// DXILResourceBindingAnalysis. |
| bool lowerHandleFromBinding(Function &F) { |
| Triple TT(Triple(M.getTargetTriple())); |
| if (TT.getDXILVersion() < VersionTuple(1, 6)) |
| return lowerToCreateHandle(F); |
| return lowerToBindAndAnnotateHandle(F); |
| } |
| |
| /// Replace uses of \c Intrin with the values in the `dx.ResRet` of \c Op. |
| /// Since we expect to be post-scalarization, make an effort to avoid vectors. |
| Error replaceResRetUses(CallInst *Intrin, CallInst *Op, bool HasCheckBit) { |
| IRBuilder<> &IRB = OpBuilder.getIRB(); |
| |
| Instruction *OldResult = Intrin; |
| Type *OldTy = Intrin->getType(); |
| |
| if (HasCheckBit) { |
| auto *ST = cast<StructType>(OldTy); |
| |
| Value *CheckOp = nullptr; |
| Type *Int32Ty = IRB.getInt32Ty(); |
| for (Use &U : make_early_inc_range(OldResult->uses())) { |
| if (auto *EVI = dyn_cast<ExtractValueInst>(U.getUser())) { |
| ArrayRef<unsigned> Indices = EVI->getIndices(); |
| assert(Indices.size() == 1); |
| // We're only interested in uses of the check bit for now. |
| if (Indices[0] != 1) |
| continue; |
| if (!CheckOp) { |
| Value *NewEVI = IRB.CreateExtractValue(Op, 4); |
| Expected<CallInst *> OpCall = OpBuilder.tryCreateOp( |
| OpCode::CheckAccessFullyMapped, {NewEVI}, |
| OldResult->hasName() ? OldResult->getName() + "_check" |
| : Twine(), |
| Int32Ty); |
| if (Error E = OpCall.takeError()) |
| return E; |
| CheckOp = *OpCall; |
| } |
| EVI->replaceAllUsesWith(CheckOp); |
| EVI->eraseFromParent(); |
| } |
| } |
| |
| if (OldResult->use_empty()) { |
| // Only the check bit was used, so we're done here. |
| OldResult->eraseFromParent(); |
| return Error::success(); |
| } |
| |
| assert(OldResult->hasOneUse() && |
| isa<ExtractValueInst>(*OldResult->user_begin()) && |
| "Expected only use to be extract of first element"); |
| OldResult = cast<Instruction>(*OldResult->user_begin()); |
| OldTy = ST->getElementType(0); |
| } |
| |
| // For scalars, we just extract the first element. |
| if (!isa<FixedVectorType>(OldTy)) { |
| Value *EVI = IRB.CreateExtractValue(Op, 0); |
| OldResult->replaceAllUsesWith(EVI); |
| OldResult->eraseFromParent(); |
| if (OldResult != Intrin) { |
| assert(Intrin->use_empty() && "Intrinsic still has uses?"); |
| Intrin->eraseFromParent(); |
| } |
| return Error::success(); |
| } |
| |
| std::array<Value *, 4> Extracts = {}; |
| SmallVector<ExtractElementInst *> DynamicAccesses; |
| |
| // The users of the operation should all be scalarized, so we attempt to |
| // replace the extractelements with extractvalues directly. |
| for (Use &U : make_early_inc_range(OldResult->uses())) { |
| if (auto *EEI = dyn_cast<ExtractElementInst>(U.getUser())) { |
| if (auto *IndexOp = dyn_cast<ConstantInt>(EEI->getIndexOperand())) { |
| size_t IndexVal = IndexOp->getZExtValue(); |
| assert(IndexVal < 4 && "Index into buffer load out of range"); |
| if (!Extracts[IndexVal]) |
| Extracts[IndexVal] = IRB.CreateExtractValue(Op, IndexVal); |
| EEI->replaceAllUsesWith(Extracts[IndexVal]); |
| EEI->eraseFromParent(); |
| } else { |
| DynamicAccesses.push_back(EEI); |
| } |
| } |
| } |
| |
| const auto *VecTy = cast<FixedVectorType>(OldTy); |
| const unsigned N = VecTy->getNumElements(); |
| |
| // If there's a dynamic access we need to round trip through stack memory so |
| // that we don't leave vectors around. |
| if (!DynamicAccesses.empty()) { |
| Type *Int32Ty = IRB.getInt32Ty(); |
| Constant *Zero = ConstantInt::get(Int32Ty, 0); |
| |
| Type *ElTy = VecTy->getElementType(); |
| Type *ArrayTy = ArrayType::get(ElTy, N); |
| Value *Alloca = IRB.CreateAlloca(ArrayTy); |
| |
| for (int I = 0, E = N; I != E; ++I) { |
| if (!Extracts[I]) |
| Extracts[I] = IRB.CreateExtractValue(Op, I); |
| Value *GEP = IRB.CreateInBoundsGEP( |
| ArrayTy, Alloca, {Zero, ConstantInt::get(Int32Ty, I)}); |
| IRB.CreateStore(Extracts[I], GEP); |
| } |
| |
| for (ExtractElementInst *EEI : DynamicAccesses) { |
| Value *GEP = IRB.CreateInBoundsGEP(ArrayTy, Alloca, |
| {Zero, EEI->getIndexOperand()}); |
| Value *Load = IRB.CreateLoad(ElTy, GEP); |
| EEI->replaceAllUsesWith(Load); |
| EEI->eraseFromParent(); |
| } |
| } |
| |
| // If we still have uses, then we're not fully scalarized and need to |
| // recreate the vector. This should only happen for things like exported |
| // functions from libraries. |
| if (!OldResult->use_empty()) { |
| for (int I = 0, E = N; I != E; ++I) |
| if (!Extracts[I]) |
| Extracts[I] = IRB.CreateExtractValue(Op, I); |
| |
| Value *Vec = UndefValue::get(OldTy); |
| for (int I = 0, E = N; I != E; ++I) |
| Vec = IRB.CreateInsertElement(Vec, Extracts[I], I); |
| OldResult->replaceAllUsesWith(Vec); |
| } |
| |
| OldResult->eraseFromParent(); |
| if (OldResult != Intrin) { |
| assert(Intrin->use_empty() && "Intrinsic still has uses?"); |
| Intrin->eraseFromParent(); |
| } |
| |
| return Error::success(); |
| } |
| |
| [[nodiscard]] bool lowerTypedBufferLoad(Function &F, bool HasCheckBit) { |
| IRBuilder<> &IRB = OpBuilder.getIRB(); |
| Type *Int32Ty = IRB.getInt32Ty(); |
| |
| return replaceFunction(F, [&](CallInst *CI) -> Error { |
| IRB.SetInsertPoint(CI); |
| |
| Value *Handle = |
| createTmpHandleCast(CI->getArgOperand(0), OpBuilder.getHandleType()); |
| Value *Index0 = CI->getArgOperand(1); |
| Value *Index1 = UndefValue::get(Int32Ty); |
| |
| Type *OldTy = CI->getType(); |
| if (HasCheckBit) |
| OldTy = cast<StructType>(OldTy)->getElementType(0); |
| Type *NewRetTy = OpBuilder.getResRetType(OldTy->getScalarType()); |
| |
| std::array<Value *, 3> Args{Handle, Index0, Index1}; |
| Expected<CallInst *> OpCall = OpBuilder.tryCreateOp( |
| OpCode::BufferLoad, Args, CI->getName(), NewRetTy); |
| if (Error E = OpCall.takeError()) |
| return E; |
| if (Error E = replaceResRetUses(CI, *OpCall, HasCheckBit)) |
| return E; |
| |
| return Error::success(); |
| }); |
| } |
| |
| [[nodiscard]] bool lowerRawBufferLoad(Function &F) { |
| Triple TT(Triple(M.getTargetTriple())); |
| VersionTuple DXILVersion = TT.getDXILVersion(); |
| const DataLayout &DL = F.getDataLayout(); |
| IRBuilder<> &IRB = OpBuilder.getIRB(); |
| Type *Int8Ty = IRB.getInt8Ty(); |
| Type *Int32Ty = IRB.getInt32Ty(); |
| |
| return replaceFunction(F, [&](CallInst *CI) -> Error { |
| IRB.SetInsertPoint(CI); |
| |
| Type *OldTy = cast<StructType>(CI->getType())->getElementType(0); |
| Type *ScalarTy = OldTy->getScalarType(); |
| Type *NewRetTy = OpBuilder.getResRetType(ScalarTy); |
| |
| Value *Handle = |
| createTmpHandleCast(CI->getArgOperand(0), OpBuilder.getHandleType()); |
| Value *Index0 = CI->getArgOperand(1); |
| Value *Index1 = CI->getArgOperand(2); |
| uint64_t NumElements = |
| DL.getTypeSizeInBits(OldTy) / DL.getTypeSizeInBits(ScalarTy); |
| Value *Mask = ConstantInt::get(Int8Ty, ~(~0U << NumElements)); |
| Value *Align = |
| ConstantInt::get(Int32Ty, DL.getPrefTypeAlign(ScalarTy).value()); |
| |
| Expected<CallInst *> OpCall = |
| DXILVersion >= VersionTuple(1, 2) |
| ? OpBuilder.tryCreateOp(OpCode::RawBufferLoad, |
| {Handle, Index0, Index1, Mask, Align}, |
| CI->getName(), NewRetTy) |
| : OpBuilder.tryCreateOp(OpCode::BufferLoad, |
| {Handle, Index0, Index1}, CI->getName(), |
| NewRetTy); |
| if (Error E = OpCall.takeError()) |
| return E; |
| if (Error E = replaceResRetUses(CI, *OpCall, /*HasCheckBit=*/true)) |
| return E; |
| |
| return Error::success(); |
| }); |
| } |
| |
| [[nodiscard]] bool lowerUpdateCounter(Function &F) { |
| IRBuilder<> &IRB = OpBuilder.getIRB(); |
| Type *Int32Ty = IRB.getInt32Ty(); |
| |
| return replaceFunction(F, [&](CallInst *CI) -> Error { |
| IRB.SetInsertPoint(CI); |
| Value *Handle = |
| createTmpHandleCast(CI->getArgOperand(0), OpBuilder.getHandleType()); |
| Value *Op1 = CI->getArgOperand(1); |
| |
| std::array<Value *, 2> Args{Handle, Op1}; |
| |
| Expected<CallInst *> OpCall = OpBuilder.tryCreateOp( |
| OpCode::UpdateCounter, Args, CI->getName(), Int32Ty); |
| |
| if (Error E = OpCall.takeError()) |
| return E; |
| |
| CI->replaceAllUsesWith(*OpCall); |
| CI->eraseFromParent(); |
| return Error::success(); |
| }); |
| } |
| |
| [[nodiscard]] bool lowerGetPointer(Function &F) { |
| // These should have already been handled in DXILResourceAccess, so we can |
| // just clean up the dead prototype. |
| assert(F.user_empty() && "getpointer operations should have been removed"); |
| F.eraseFromParent(); |
| return false; |
| } |
| |
| [[nodiscard]] bool lowerBufferStore(Function &F, bool IsRaw) { |
| Triple TT(Triple(M.getTargetTriple())); |
| VersionTuple DXILVersion = TT.getDXILVersion(); |
| const DataLayout &DL = F.getDataLayout(); |
| IRBuilder<> &IRB = OpBuilder.getIRB(); |
| Type *Int8Ty = IRB.getInt8Ty(); |
| Type *Int32Ty = IRB.getInt32Ty(); |
| |
| return replaceFunction(F, [&](CallInst *CI) -> Error { |
| IRB.SetInsertPoint(CI); |
| |
| Value *Handle = |
| createTmpHandleCast(CI->getArgOperand(0), OpBuilder.getHandleType()); |
| Value *Index0 = CI->getArgOperand(1); |
| Value *Index1 = IsRaw ? CI->getArgOperand(2) : UndefValue::get(Int32Ty); |
| |
| Value *Data = CI->getArgOperand(IsRaw ? 3 : 2); |
| Type *DataTy = Data->getType(); |
| Type *ScalarTy = DataTy->getScalarType(); |
| |
| uint64_t NumElements = |
| DL.getTypeSizeInBits(DataTy) / DL.getTypeSizeInBits(ScalarTy); |
| Value *Mask = ConstantInt::get(Int8Ty, ~(~0U << NumElements)); |
| |
| // TODO: check that we only have vector or scalar... |
| if (!IsRaw && NumElements != 4) |
| return make_error<StringError>( |
| "typedBufferStore data must be a vector of 4 elements", |
| inconvertibleErrorCode()); |
| else if (NumElements > 4) |
| return make_error<StringError>( |
| "rawBufferStore data must have at most 4 elements", |
| inconvertibleErrorCode()); |
| |
| std::array<Value *, 4> DataElements{nullptr, nullptr, nullptr, nullptr}; |
| if (DataTy == ScalarTy) |
| DataElements[0] = Data; |
| else { |
| // Since we're post-scalarizer, if we see a vector here it's likely |
| // constructed solely for the argument of the store. Just use the scalar |
| // values from before they're inserted into the temporary. |
| auto *IEI = dyn_cast<InsertElementInst>(Data); |
| while (IEI) { |
| auto *IndexOp = dyn_cast<ConstantInt>(IEI->getOperand(2)); |
| if (!IndexOp) |
| break; |
| size_t IndexVal = IndexOp->getZExtValue(); |
| assert(IndexVal < 4 && "Too many elements for buffer store"); |
| DataElements[IndexVal] = IEI->getOperand(1); |
| IEI = dyn_cast<InsertElementInst>(IEI->getOperand(0)); |
| } |
| } |
| |
| // If for some reason we weren't able to forward the arguments from the |
| // scalarizer artifact, then we may need to actually extract elements from |
| // the vector. |
| for (int I = 0, E = NumElements; I < E; ++I) |
| if (DataElements[I] == nullptr) |
| DataElements[I] = |
| IRB.CreateExtractElement(Data, ConstantInt::get(Int32Ty, I)); |
| // For any elements beyond the length of the vector, fill up with undef. |
| for (int I = NumElements, E = 4; I < E; ++I) |
| if (DataElements[I] == nullptr) |
| DataElements[I] = UndefValue::get(ScalarTy); |
| |
| dxil::OpCode Op = OpCode::BufferStore; |
| SmallVector<Value *, 9> Args{ |
| Handle, Index0, Index1, DataElements[0], |
| DataElements[1], DataElements[2], DataElements[3], Mask}; |
| if (IsRaw && DXILVersion >= VersionTuple(1, 2)) { |
| Op = OpCode::RawBufferStore; |
| // RawBufferStore requires the alignment |
| Args.push_back( |
| ConstantInt::get(Int32Ty, DL.getPrefTypeAlign(ScalarTy).value())); |
| } |
| Expected<CallInst *> OpCall = |
| OpBuilder.tryCreateOp(Op, Args, CI->getName()); |
| if (Error E = OpCall.takeError()) |
| return E; |
| |
| CI->eraseFromParent(); |
| // Clean up any leftover `insertelement`s |
| auto *IEI = dyn_cast<InsertElementInst>(Data); |
| while (IEI && IEI->use_empty()) { |
| InsertElementInst *Tmp = IEI; |
| IEI = dyn_cast<InsertElementInst>(IEI->getOperand(0)); |
| Tmp->eraseFromParent(); |
| } |
| |
| return Error::success(); |
| }); |
| } |
| |
| [[nodiscard]] bool lowerCtpopToCountBits(Function &F) { |
| IRBuilder<> &IRB = OpBuilder.getIRB(); |
| Type *Int32Ty = IRB.getInt32Ty(); |
| |
| return replaceFunction(F, [&](CallInst *CI) -> Error { |
| IRB.SetInsertPoint(CI); |
| SmallVector<Value *> Args; |
| Args.append(CI->arg_begin(), CI->arg_end()); |
| |
| Type *RetTy = Int32Ty; |
| Type *FRT = F.getReturnType(); |
| if (const auto *VT = dyn_cast<VectorType>(FRT)) |
| RetTy = VectorType::get(RetTy, VT); |
| |
| Expected<CallInst *> OpCall = OpBuilder.tryCreateOp( |
| dxil::OpCode::CountBits, Args, CI->getName(), RetTy); |
| if (Error E = OpCall.takeError()) |
| return E; |
| |
| // If the result type is 32 bits we can do a direct replacement. |
| if (FRT->isIntOrIntVectorTy(32)) { |
| CI->replaceAllUsesWith(*OpCall); |
| CI->eraseFromParent(); |
| return Error::success(); |
| } |
| |
| unsigned CastOp; |
| unsigned CastOp2; |
| if (FRT->isIntOrIntVectorTy(16)) { |
| CastOp = Instruction::ZExt; |
| CastOp2 = Instruction::SExt; |
| } else { // must be 64 bits |
| assert(FRT->isIntOrIntVectorTy(64) && |
| "Currently only lowering 16, 32, or 64 bit ctpop to CountBits \ |
| is supported."); |
| CastOp = Instruction::Trunc; |
| CastOp2 = Instruction::Trunc; |
| } |
| |
| // It is correct to replace the ctpop with the dxil op and |
| // remove all casts to i32 |
| bool NeedsCast = false; |
| for (User *User : make_early_inc_range(CI->users())) { |
| Instruction *I = dyn_cast<Instruction>(User); |
| if (I && (I->getOpcode() == CastOp || I->getOpcode() == CastOp2) && |
| I->getType() == RetTy) { |
| I->replaceAllUsesWith(*OpCall); |
| I->eraseFromParent(); |
| } else |
| NeedsCast = true; |
| } |
| |
| // It is correct to replace a ctpop with the dxil op and |
| // a cast from i32 to the return type of the ctpop |
| // the cast is emitted here if there is a non-cast to i32 |
| // instr which uses the ctpop |
| if (NeedsCast) { |
| Value *Cast = |
| IRB.CreateZExtOrTrunc(*OpCall, F.getReturnType(), "ctpop.cast"); |
| CI->replaceAllUsesWith(Cast); |
| } |
| |
| CI->eraseFromParent(); |
| return Error::success(); |
| }); |
| } |
| |
| bool lowerIntrinsics() { |
| bool Updated = false; |
| bool HasErrors = false; |
| |
| for (Function &F : make_early_inc_range(M.functions())) { |
| if (!F.isDeclaration()) |
| continue; |
| Intrinsic::ID ID = F.getIntrinsicID(); |
| switch (ID) { |
| default: |
| continue; |
| #define DXIL_OP_INTRINSIC(OpCode, Intrin, ...) \ |
| case Intrin: \ |
| HasErrors |= replaceFunctionWithOp( \ |
| F, OpCode, ArrayRef<IntrinArgSelect>{__VA_ARGS__}); \ |
| break; |
| #include "DXILOperation.inc" |
| case Intrinsic::dx_resource_handlefrombinding: |
| HasErrors |= lowerHandleFromBinding(F); |
| break; |
| case Intrinsic::dx_resource_getpointer: |
| HasErrors |= lowerGetPointer(F); |
| break; |
| case Intrinsic::dx_resource_load_typedbuffer: |
| HasErrors |= lowerTypedBufferLoad(F, /*HasCheckBit=*/true); |
| break; |
| case Intrinsic::dx_resource_store_typedbuffer: |
| HasErrors |= lowerBufferStore(F, /*IsRaw=*/false); |
| break; |
| case Intrinsic::dx_resource_load_rawbuffer: |
| HasErrors |= lowerRawBufferLoad(F); |
| break; |
| case Intrinsic::dx_resource_store_rawbuffer: |
| HasErrors |= lowerBufferStore(F, /*IsRaw=*/true); |
| break; |
| case Intrinsic::dx_resource_updatecounter: |
| HasErrors |= lowerUpdateCounter(F); |
| break; |
| case Intrinsic::ctpop: |
| HasErrors |= lowerCtpopToCountBits(F); |
| break; |
| } |
| Updated = true; |
| } |
| if (Updated && !HasErrors) |
| cleanupHandleCasts(); |
| |
| return Updated; |
| } |
| }; |
| } // namespace |
| |
| PreservedAnalyses DXILOpLowering::run(Module &M, ModuleAnalysisManager &MAM) { |
| DXILBindingMap &DBM = MAM.getResult<DXILResourceBindingAnalysis>(M); |
| DXILResourceTypeMap &DRTM = MAM.getResult<DXILResourceTypeAnalysis>(M); |
| |
| bool MadeChanges = OpLowerer(M, DBM, DRTM).lowerIntrinsics(); |
| if (!MadeChanges) |
| return PreservedAnalyses::all(); |
| PreservedAnalyses PA; |
| PA.preserve<DXILResourceBindingAnalysis>(); |
| PA.preserve<DXILMetadataAnalysis>(); |
| PA.preserve<ShaderFlagsAnalysis>(); |
| return PA; |
| } |
| |
| namespace { |
| class DXILOpLoweringLegacy : public ModulePass { |
| public: |
| bool runOnModule(Module &M) override { |
| DXILBindingMap &DBM = |
| getAnalysis<DXILResourceBindingWrapperPass>().getBindingMap(); |
| DXILResourceTypeMap &DRTM = |
| getAnalysis<DXILResourceTypeWrapperPass>().getResourceTypeMap(); |
| |
| return OpLowerer(M, DBM, DRTM).lowerIntrinsics(); |
| } |
| StringRef getPassName() const override { return "DXIL Op Lowering"; } |
| DXILOpLoweringLegacy() : ModulePass(ID) {} |
| |
| static char ID; // Pass identification. |
| void getAnalysisUsage(llvm::AnalysisUsage &AU) const override { |
| AU.addRequired<DXILResourceTypeWrapperPass>(); |
| AU.addRequired<DXILResourceBindingWrapperPass>(); |
| AU.addPreserved<DXILResourceBindingWrapperPass>(); |
| AU.addPreserved<DXILResourceMDWrapper>(); |
| AU.addPreserved<DXILMetadataAnalysisWrapperPass>(); |
| AU.addPreserved<ShaderFlagsAnalysisWrapper>(); |
| } |
| }; |
| char DXILOpLoweringLegacy::ID = 0; |
| } // end anonymous namespace |
| |
| INITIALIZE_PASS_BEGIN(DXILOpLoweringLegacy, DEBUG_TYPE, "DXIL Op Lowering", |
| false, false) |
| INITIALIZE_PASS_DEPENDENCY(DXILResourceTypeWrapperPass) |
| INITIALIZE_PASS_DEPENDENCY(DXILResourceBindingWrapperPass) |
| INITIALIZE_PASS_END(DXILOpLoweringLegacy, DEBUG_TYPE, "DXIL Op Lowering", false, |
| false) |
| |
| ModulePass *llvm::createDXILOpLoweringLegacyPass() { |
| return new DXILOpLoweringLegacy(); |
| } |