[AMDGPU] Don't flush vmcnt for loops with use/def pairs Conditions for hoisting vmcnt with flat instructions should be similar to VMEM. If there are use/def pairs in a loop body we cannot guarantee that hosting the waitcnt will be profitable. Better heuristics are needed to analyse whether gains from avoiding waitcnt in loop bodys outweighs waiting for loads in the preheader. Reviewed By: foad Differential Revision: https://reviews.llvm.org/D151126

commit: e501ed84aa4768e7008c6127e8573788dcee31ee [log] [tgz]
author: Austin Kerbow <Austin.Kerbow@amd.com> Mon May 22 10:32:09 2023 -0700
committer: Austin Kerbow <Austin.Kerbow@amd.com> Fri Jun 02 22:55:12 2023 -0700
tree: 2fa2853785e1eb5fc7c201487689200575d830eb
parent: fb7f50a0c38ae9e6dc138f6ddcb3d33d2782563d [diff] [blame]
diff --git a/llvm/test/CodeGen/AMDGPU/atomicrmw-nand.ll b/llvm/test/CodeGen/AMDGPU/atomicrmw-nand.ll
index fe0543a..5e1f9b0 100644
--- a/llvm/test/CodeGen/AMDGPU/atomicrmw-nand.ll
+++ b/llvm/test/CodeGen/AMDGPU/atomicrmw-nand.ll

@@ -34,9 +34,9 @@
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GCN-NEXT:    global_load_dword v2, v[0:1], off
 ; GCN-NEXT:    s_mov_b64 s[4:5], 0
-; GCN-NEXT:    s_waitcnt vmcnt(0)
 ; GCN-NEXT:  .LBB1_1: ; %atomicrmw.start
 ; GCN-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GCN-NEXT:    s_waitcnt vmcnt(0)
 ; GCN-NEXT:    v_mov_b32_e32 v3, v2
 ; GCN-NEXT:    v_not_b32_e32 v2, v3
 ; GCN-NEXT:    v_or_b32_e32 v2, -5, v2
@@ -62,10 +62,9 @@
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GCN-NEXT:    flat_load_dword v2, v[0:1]
 ; GCN-NEXT:    s_mov_b64 s[4:5], 0
-; GCN-NEXT:    s_waitcnt vmcnt(0)
 ; GCN-NEXT:  .LBB2_1: ; %atomicrmw.start
 ; GCN-NEXT:    ; =>This Inner Loop Header: Depth=1
-; GCN-NEXT:    s_waitcnt lgkmcnt(0)
+; GCN-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
 ; GCN-NEXT:    v_mov_b32_e32 v3, v2
 ; GCN-NEXT:    v_not_b32_e32 v2, v3
 ; GCN-NEXT:    v_or_b32_e32 v2, -5, v2
commit	e501ed84aa4768e7008c6127e8573788dcee31ee	[log] [tgz]
author	Austin Kerbow <Austin.Kerbow@amd.com>	Mon May 22 10:32:09 2023 -0700
committer	Austin Kerbow <Austin.Kerbow@amd.com>	Fri Jun 02 22:55:12 2023 -0700
tree	2fa2853785e1eb5fc7c201487689200575d830eb
parent	fb7f50a0c38ae9e6dc138f6ddcb3d33d2782563d [diff] [blame]