| ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6 |
| ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx950 < %s | FileCheck -check-prefixes=GFX950 %s |
| |
| ; Test that vmcnt(0) is correctly preserved between buffer_wbl2 and atomic |
| ; when there are global memory stores that need to be written back. |
| |
| define void @global_store_different_block(ptr addrspace(1) %data_ptr, ptr addrspace(1) %atomic_ptr, i1 %cond) { |
| ; GFX950-LABEL: global_store_different_block: |
| ; GFX950: ; %bb.0: ; %entry |
| ; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX950-NEXT: v_and_b32_e32 v4, 1, v4 |
| ; GFX950-NEXT: v_cmp_eq_u32_e32 vcc, 1, v4 |
| ; GFX950-NEXT: v_mov_b32_e32 v4, 42 |
| ; GFX950-NEXT: global_store_dword v[0:1], v4, off |
| ; GFX950-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) |
| ; GFX950-NEXT: s_and_saveexec_b64 s[0:1], vcc |
| ; GFX950-NEXT: s_cbranch_execz .LBB0_2 |
| ; GFX950-NEXT: ; %bb.1: ; %do_atomic |
| ; GFX950-NEXT: v_mov_b64_e32 v[0:1], 0 |
| ; GFX950-NEXT: buffer_wbl2 sc1 |
| ; GFX950-NEXT: s_waitcnt vmcnt(0) |
| ; GFX950-NEXT: global_atomic_swap_x2 v[2:3], v[0:1], off |
| ; GFX950-NEXT: .LBB0_2: ; %exit |
| ; GFX950-NEXT: s_or_b64 exec, exec, s[0:1] |
| ; GFX950-NEXT: s_waitcnt vmcnt(0) |
| ; GFX950-NEXT: s_setpc_b64 s[30:31] |
| entry: |
| ; Global store in entry block |
| store i32 42, ptr addrspace(1) %data_ptr, align 4 |
| call void @llvm.amdgcn.s.waitcnt(i32 112) |
| br i1 %cond, label %do_atomic, label %exit |
| |
| do_atomic: |
| %old = atomicrmw xchg ptr addrspace(1) %atomic_ptr, i64 0 syncscope("agent") release |
| br label %exit |
| |
| exit: |
| ret void |
| } |
| |
| define void @global_store_then_atomic(ptr addrspace(1) %data_ptr, ptr addrspace(1) %atomic_ptr) { |
| ; GFX950-LABEL: global_store_then_atomic: |
| ; GFX950: ; %bb.0: ; %entry |
| ; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX950-NEXT: v_mov_b32_e32 v4, 42 |
| ; GFX950-NEXT: global_store_dword v[0:1], v4, off |
| ; GFX950-NEXT: v_mov_b64_e32 v[0:1], 0 |
| ; GFX950-NEXT: buffer_wbl2 sc1 |
| ; GFX950-NEXT: s_waitcnt vmcnt(0) |
| ; GFX950-NEXT: global_atomic_swap_x2 v[2:3], v[0:1], off |
| ; GFX950-NEXT: s_waitcnt vmcnt(0) |
| ; GFX950-NEXT: s_setpc_b64 s[30:31] |
| entry: |
| store i32 42, ptr addrspace(1) %data_ptr, align 4 |
| %old = atomicrmw xchg ptr addrspace(1) %atomic_ptr, i64 0 syncscope("agent") release |
| ret void |
| } |