[DirectX] Remove intrinsic definitions with no use (#133459)
Do cleanup in DXILFinalizeLinkage.cpp where intrinsic declares are getting orphaned.
This change reduces "Unsupported intrinsic for DXIL lowering" errors
when compiling DML shaders from 12218 to 415. and improves our
compilation success rate from less than 1% to 44%.
diff --git a/llvm/lib/Target/DirectX/DXILFinalizeLinkage.cpp b/llvm/lib/Target/DirectX/DXILFinalizeLinkage.cpp
index 91ac758..7651617 100644
--- a/llvm/lib/Target/DirectX/DXILFinalizeLinkage.cpp
+++ b/llvm/lib/Target/DirectX/DXILFinalizeLinkage.cpp
@@ -18,7 +18,7 @@
using namespace llvm;
static bool finalizeLinkage(Module &M) {
- SmallPtrSet<Function *, 8> Funcs;
+ SmallVector<Function *> Funcs;
// Collect non-entry and non-exported functions to set to internal linkage.
for (Function &EF : M.functions()) {
@@ -26,7 +26,7 @@
continue;
if (EF.hasFnAttribute("hlsl.shader") || EF.hasFnAttribute("hlsl.export"))
continue;
- Funcs.insert(&EF);
+ Funcs.push_back(&EF);
}
for (Function *F : Funcs) {
@@ -36,6 +36,14 @@
M.getFunctionList().erase(F);
}
+ // Do a pass over intrinsics that are no longer used and remove them.
+ Funcs.clear();
+ for (Function &F : M.functions())
+ if (F.isIntrinsic() && F.use_empty())
+ Funcs.push_back(&F);
+ for (Function *F : Funcs)
+ F->eraseFromParent();
+
return false;
}
diff --git a/llvm/test/CodeGen/DirectX/remove-dead-intriniscs.ll b/llvm/test/CodeGen/DirectX/remove-dead-intriniscs.ll
new file mode 100644
index 0000000..e3b1f0b
--- /dev/null
+++ b/llvm/test/CodeGen/DirectX/remove-dead-intriniscs.ll
@@ -0,0 +1,22 @@
+
+; RUN: llc %s -mtriple=dxil-pc-shadermodel6.3-library --filetype=asm -o - | FileCheck %s
+
+declare void @llvm.lifetime.start.p0(i64, ptr) #1
+declare void @llvm.lifetime.end.p0(i64, ptr) #1
+declare i32 @llvm.dx.udot.v4i32(<4 x i32>, <4 x i32>) #2
+declare void @llvm.memset.p0.i32(ptr, i8, i32, i1) #3
+
+; CHECK-NOT: declare void @llvm.lifetime.start.p0(i64, ptr)
+; CHECK-NOT: declare void @llvm.lifetime.end.p0(i64, ptr)
+; CHECK-NOT: declare i32 @llvm.dx.udot.v4i32(<4 x i32>, <4 x i32>)
+; CHECK-NOT: declare void @llvm.memset.p0.i32(ptr, i8, i32, i1)
+
+; CHECK-LABEL: empty_fn
+define void @empty_fn () local_unnamed_addr #0 {
+ ret void
+ }
+
+attributes #0 = { convergent norecurse nounwind "hlsl.export"}
+attributes #1 = { nounwind memory(argmem: readwrite) }
+attributes #2 = { nounwind memory(none) }
+attributes #3 = { nounwind memory(argmem: write) }