| ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6 |
| ; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1250 < %s | FileCheck --check-prefixes=GFX1250 %s |
| |
| ; Test single atomic RMW - s_wait_xcnt should be kept. |
| define amdgpu_kernel void @single_atomic_rmw(ptr addrspace(1) %ptr) { |
| ; GFX1250-LABEL: single_atomic_rmw: |
| ; GFX1250: ; %bb.0: |
| ; GFX1250-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1 |
| ; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 |
| ; GFX1250-NEXT: v_mov_b32_e32 v0, 0 |
| ; GFX1250-NEXT: v_mov_b32_e32 v1, 1 |
| ; GFX1250-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-NEXT: s_wait_storecnt 0x0 |
| ; GFX1250-NEXT: global_wb scope:SCOPE_SYS |
| ; GFX1250-NEXT: s_wait_storecnt 0x0 |
| ; GFX1250-NEXT: s_wait_xcnt 0x0 |
| ; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX1250-NEXT: s_wait_kmcnt 0x0 |
| ; GFX1250-NEXT: global_atomic_add_u32 v0, v1, s[0:1] scope:SCOPE_SYS |
| ; GFX1250-NEXT: s_wait_storecnt 0x0 |
| ; GFX1250-NEXT: global_inv scope:SCOPE_SYS |
| ; GFX1250-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-NEXT: s_endpgm |
| %gep1 = getelementptr i32, ptr addrspace(1) %ptr, i64 0 |
| %val1 = atomicrmw add ptr addrspace(1) %gep1, i32 1 seq_cst |
| ret void |
| } |
| |
| ; Test back-to-back atomic RMW operations - only first s_wait_xcnt should remain. |
| define amdgpu_kernel void @atomic_rmw_back_to_back(ptr addrspace(1) %ptr) { |
| ; GFX1250-LABEL: atomic_rmw_back_to_back: |
| ; GFX1250: ; %bb.0: |
| ; GFX1250-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1 |
| ; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 |
| ; GFX1250-NEXT: v_mov_b32_e32 v0, 0 |
| ; GFX1250-NEXT: v_mov_b32_e32 v1, 1 |
| ; GFX1250-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-NEXT: s_wait_storecnt 0x0 |
| ; GFX1250-NEXT: global_wb scope:SCOPE_SYS |
| ; GFX1250-NEXT: s_wait_storecnt 0x0 |
| ; GFX1250-NEXT: s_wait_xcnt 0x0 |
| ; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX1250-NEXT: s_wait_kmcnt 0x0 |
| ; GFX1250-NEXT: global_atomic_add_u32 v0, v1, s[0:1] scope:SCOPE_SYS |
| ; GFX1250-NEXT: s_wait_storecnt 0x0 |
| ; GFX1250-NEXT: global_inv scope:SCOPE_SYS |
| ; GFX1250-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-NEXT: v_mov_b32_e32 v1, 2 |
| ; GFX1250-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-NEXT: s_wait_storecnt 0x0 |
| ; GFX1250-NEXT: global_wb scope:SCOPE_SYS |
| ; GFX1250-NEXT: s_wait_storecnt 0x0 |
| ; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX1250-NEXT: global_atomic_add_u32 v0, v1, s[0:1] offset:4 scope:SCOPE_SYS |
| ; GFX1250-NEXT: s_wait_storecnt 0x0 |
| ; GFX1250-NEXT: global_inv scope:SCOPE_SYS |
| ; GFX1250-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-NEXT: v_mov_b32_e32 v1, 3 |
| ; GFX1250-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-NEXT: s_wait_storecnt 0x0 |
| ; GFX1250-NEXT: global_wb scope:SCOPE_SYS |
| ; GFX1250-NEXT: s_wait_storecnt 0x0 |
| ; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX1250-NEXT: global_atomic_add_u32 v0, v1, s[0:1] offset:8 scope:SCOPE_SYS |
| ; GFX1250-NEXT: s_wait_storecnt 0x0 |
| ; GFX1250-NEXT: global_inv scope:SCOPE_SYS |
| ; GFX1250-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-NEXT: s_endpgm |
| %gep1 = getelementptr i32, ptr addrspace(1) %ptr, i64 0 |
| %gep2 = getelementptr i32, ptr addrspace(1) %ptr, i64 1 |
| %gep3 = getelementptr i32, ptr addrspace(1) %ptr, i64 2 |
| %val1 = atomicrmw add ptr addrspace(1) %gep1, i32 1 seq_cst |
| %val2 = atomicrmw add ptr addrspace(1) %gep2, i32 2 seq_cst |
| %val3 = atomicrmw add ptr addrspace(1) %gep3, i32 3 seq_cst |
| ret void |
| } |
| |
| ; Test atomic RMW block with interleaved ALU ops - should not break the block. |
| define amdgpu_kernel void @atomic_rmw_with_alu(ptr addrspace(1) %ptr, i32 %a, i32 %b) { |
| ; GFX1250-LABEL: atomic_rmw_with_alu: |
| ; GFX1250: ; %bb.0: |
| ; GFX1250-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1 |
| ; GFX1250-NEXT: v_mov_b32_e32 v0, 0 |
| ; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 |
| ; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8 |
| ; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0xc |
| ; GFX1250-NEXT: v_mov_b32_e32 v1, 1 |
| ; GFX1250-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-NEXT: s_wait_storecnt 0x0 |
| ; GFX1250-NEXT: global_wb scope:SCOPE_SYS |
| ; GFX1250-NEXT: s_wait_storecnt 0x0 |
| ; GFX1250-NEXT: s_wait_xcnt 0x0 |
| ; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX1250-NEXT: s_wait_kmcnt 0x0 |
| ; GFX1250-NEXT: global_atomic_add_u32 v0, v1, s[0:1] scope:SCOPE_SYS |
| ; GFX1250-NEXT: s_wait_storecnt 0x0 |
| ; GFX1250-NEXT: global_inv scope:SCOPE_SYS |
| ; GFX1250-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-NEXT: s_add_co_i32 s4, s2, s3 |
| ; GFX1250-NEXT: v_mov_b32_e32 v1, s4 |
| ; GFX1250-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-NEXT: s_wait_storecnt 0x0 |
| ; GFX1250-NEXT: global_wb scope:SCOPE_SYS |
| ; GFX1250-NEXT: s_wait_storecnt 0x0 |
| ; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX1250-NEXT: global_atomic_add_u32 v0, v1, s[0:1] offset:4 scope:SCOPE_SYS |
| ; GFX1250-NEXT: s_wait_storecnt 0x0 |
| ; GFX1250-NEXT: global_inv scope:SCOPE_SYS |
| ; GFX1250-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-NEXT: s_mul_i32 s2, s2, s3 |
| ; GFX1250-NEXT: v_mov_b32_e32 v1, s2 |
| ; GFX1250-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-NEXT: s_wait_storecnt 0x0 |
| ; GFX1250-NEXT: global_wb scope:SCOPE_SYS |
| ; GFX1250-NEXT: s_wait_storecnt 0x0 |
| ; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX1250-NEXT: global_atomic_add_u32 v0, v1, s[0:1] offset:8 scope:SCOPE_SYS |
| ; GFX1250-NEXT: s_wait_storecnt 0x0 |
| ; GFX1250-NEXT: global_inv scope:SCOPE_SYS |
| ; GFX1250-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-NEXT: s_endpgm |
| %gep1 = getelementptr i32, ptr addrspace(1) %ptr, i64 0 |
| %gep2 = getelementptr i32, ptr addrspace(1) %ptr, i64 1 |
| %gep3 = getelementptr i32, ptr addrspace(1) %ptr, i64 2 |
| %val1 = atomicrmw add ptr addrspace(1) %gep1, i32 1 seq_cst |
| %sum = add i32 %a, %b |
| %val2 = atomicrmw add ptr addrspace(1) %gep2, i32 %sum seq_cst |
| %prod = mul i32 %a, %b |
| %val3 = atomicrmw add ptr addrspace(1) %gep3, i32 %prod seq_cst |
| ret void |
| } |
| |
| ; Test atomic RMW block broken by global load (VMEM). |
| define amdgpu_kernel void @atomic_rmw_broken_by_global_load(ptr addrspace(1) %ptr, ptr addrspace(1) %load_ptr) { |
| ; GFX1250-LABEL: atomic_rmw_broken_by_global_load: |
| ; GFX1250: ; %bb.0: |
| ; GFX1250-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1 |
| ; GFX1250-NEXT: v_mov_b32_e32 v0, 0 |
| ; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 |
| ; GFX1250-NEXT: s_load_b64 s[2:3], s[4:5], 0x8 |
| ; GFX1250-NEXT: v_mov_b32_e32 v1, 1 |
| ; GFX1250-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-NEXT: s_wait_storecnt 0x0 |
| ; GFX1250-NEXT: global_wb scope:SCOPE_SYS |
| ; GFX1250-NEXT: s_wait_storecnt 0x0 |
| ; GFX1250-NEXT: s_wait_xcnt 0x0 |
| ; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX1250-NEXT: s_wait_kmcnt 0x0 |
| ; GFX1250-NEXT: global_atomic_add_u32 v0, v1, s[0:1] scope:SCOPE_SYS |
| ; GFX1250-NEXT: s_wait_storecnt 0x0 |
| ; GFX1250-NEXT: global_inv scope:SCOPE_SYS |
| ; GFX1250-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-NEXT: v_mov_b32_e32 v1, 2 |
| ; GFX1250-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-NEXT: s_wait_storecnt 0x0 |
| ; GFX1250-NEXT: global_wb scope:SCOPE_SYS |
| ; GFX1250-NEXT: s_wait_storecnt 0x0 |
| ; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX1250-NEXT: global_atomic_add_u32 v0, v1, s[0:1] offset:4 scope:SCOPE_SYS |
| ; GFX1250-NEXT: s_wait_storecnt 0x0 |
| ; GFX1250-NEXT: global_inv scope:SCOPE_SYS |
| ; GFX1250-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-NEXT: global_load_b32 v1, v0, s[2:3] |
| ; GFX1250-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-NEXT: s_wait_storecnt 0x0 |
| ; GFX1250-NEXT: global_wb scope:SCOPE_SYS |
| ; GFX1250-NEXT: s_wait_storecnt 0x0 |
| ; GFX1250-NEXT: s_wait_xcnt 0x0 |
| ; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX1250-NEXT: global_atomic_add_u32 v0, v1, s[0:1] offset:8 scope:SCOPE_SYS |
| ; GFX1250-NEXT: s_wait_storecnt 0x0 |
| ; GFX1250-NEXT: global_inv scope:SCOPE_SYS |
| ; GFX1250-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-NEXT: s_endpgm |
| %gep1 = getelementptr i32, ptr addrspace(1) %ptr, i64 0 |
| %gep2 = getelementptr i32, ptr addrspace(1) %ptr, i64 1 |
| %gep3 = getelementptr i32, ptr addrspace(1) %ptr, i64 2 |
| %val1 = atomicrmw add ptr addrspace(1) %gep1, i32 1 seq_cst |
| %val2 = atomicrmw add ptr addrspace(1) %gep2, i32 2 seq_cst |
| %load = load i32, ptr addrspace(1) %load_ptr |
| %val3 = atomicrmw add ptr addrspace(1) %gep3, i32 %load seq_cst |
| ret void |
| } |
| |
| ; Test atomic RMW block broken by global store (VMEM). |
| define amdgpu_kernel void @atomic_rmw_broken_by_global_store(ptr addrspace(1) %ptr, ptr addrspace(1) %store_ptr) { |
| ; GFX1250-LABEL: atomic_rmw_broken_by_global_store: |
| ; GFX1250: ; %bb.0: |
| ; GFX1250-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1 |
| ; GFX1250-NEXT: v_mov_b32_e32 v0, 0 |
| ; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 |
| ; GFX1250-NEXT: s_load_b64 s[2:3], s[4:5], 0x8 |
| ; GFX1250-NEXT: v_mov_b32_e32 v1, 1 |
| ; GFX1250-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-NEXT: s_wait_storecnt 0x0 |
| ; GFX1250-NEXT: global_wb scope:SCOPE_SYS |
| ; GFX1250-NEXT: s_wait_storecnt 0x0 |
| ; GFX1250-NEXT: s_wait_xcnt 0x0 |
| ; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX1250-NEXT: s_wait_kmcnt 0x0 |
| ; GFX1250-NEXT: global_atomic_add_u32 v0, v1, s[0:1] scope:SCOPE_SYS |
| ; GFX1250-NEXT: s_wait_storecnt 0x0 |
| ; GFX1250-NEXT: global_inv scope:SCOPE_SYS |
| ; GFX1250-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-NEXT: v_mov_b32_e32 v1, 2 |
| ; GFX1250-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-NEXT: s_wait_storecnt 0x0 |
| ; GFX1250-NEXT: global_wb scope:SCOPE_SYS |
| ; GFX1250-NEXT: s_wait_storecnt 0x0 |
| ; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX1250-NEXT: global_atomic_add_u32 v0, v1, s[0:1] offset:4 scope:SCOPE_SYS |
| ; GFX1250-NEXT: s_wait_storecnt 0x0 |
| ; GFX1250-NEXT: global_inv scope:SCOPE_SYS |
| ; GFX1250-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-NEXT: v_mov_b32_e32 v1, 42 |
| ; GFX1250-NEXT: global_store_b32 v0, v1, s[2:3] |
| ; GFX1250-NEXT: s_wait_xcnt 0x0 |
| ; GFX1250-NEXT: v_mov_b32_e32 v1, 3 |
| ; GFX1250-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-NEXT: s_wait_storecnt 0x0 |
| ; GFX1250-NEXT: global_wb scope:SCOPE_SYS |
| ; GFX1250-NEXT: s_wait_storecnt 0x0 |
| ; GFX1250-NEXT: s_wait_xcnt 0x0 |
| ; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX1250-NEXT: global_atomic_add_u32 v0, v1, s[0:1] offset:8 scope:SCOPE_SYS |
| ; GFX1250-NEXT: s_wait_storecnt 0x0 |
| ; GFX1250-NEXT: global_inv scope:SCOPE_SYS |
| ; GFX1250-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-NEXT: s_endpgm |
| %gep1 = getelementptr i32, ptr addrspace(1) %ptr, i64 0 |
| %gep2 = getelementptr i32, ptr addrspace(1) %ptr, i64 1 |
| %gep3 = getelementptr i32, ptr addrspace(1) %ptr, i64 2 |
| %val1 = atomicrmw add ptr addrspace(1) %gep1, i32 1 seq_cst |
| %val2 = atomicrmw add ptr addrspace(1) %gep2, i32 2 seq_cst |
| store i32 42, ptr addrspace(1) %store_ptr |
| %val3 = atomicrmw add ptr addrspace(1) %gep3, i32 3 seq_cst |
| ret void |
| } |
| |
| ; Test atomic RMW block broken by FLAT load (VMEM). |
| define amdgpu_kernel void @atomic_rmw_broken_by_flat_load(ptr addrspace(1) %ptr, ptr %flat_ptr) { |
| ; GFX1250-LABEL: atomic_rmw_broken_by_flat_load: |
| ; GFX1250: ; %bb.0: |
| ; GFX1250-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1 |
| ; GFX1250-NEXT: v_mov_b32_e32 v0, 0 |
| ; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 |
| ; GFX1250-NEXT: s_load_b64 s[2:3], s[4:5], 0x8 |
| ; GFX1250-NEXT: v_mov_b32_e32 v1, 1 |
| ; GFX1250-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-NEXT: s_wait_storecnt 0x0 |
| ; GFX1250-NEXT: global_wb scope:SCOPE_SYS |
| ; GFX1250-NEXT: s_wait_storecnt 0x0 |
| ; GFX1250-NEXT: s_wait_xcnt 0x0 |
| ; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX1250-NEXT: s_wait_kmcnt 0x0 |
| ; GFX1250-NEXT: global_atomic_add_u32 v0, v1, s[0:1] scope:SCOPE_SYS |
| ; GFX1250-NEXT: s_wait_storecnt 0x0 |
| ; GFX1250-NEXT: global_inv scope:SCOPE_SYS |
| ; GFX1250-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-NEXT: v_mov_b32_e32 v1, 2 |
| ; GFX1250-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-NEXT: s_wait_storecnt 0x0 |
| ; GFX1250-NEXT: global_wb scope:SCOPE_SYS |
| ; GFX1250-NEXT: s_wait_storecnt 0x0 |
| ; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX1250-NEXT: global_atomic_add_u32 v0, v1, s[0:1] offset:4 scope:SCOPE_SYS |
| ; GFX1250-NEXT: s_wait_storecnt 0x0 |
| ; GFX1250-NEXT: global_inv scope:SCOPE_SYS |
| ; GFX1250-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-NEXT: flat_load_b32 v1, v0, s[2:3] |
| ; GFX1250-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-NEXT: s_wait_storecnt 0x0 |
| ; GFX1250-NEXT: global_wb scope:SCOPE_SYS |
| ; GFX1250-NEXT: s_wait_storecnt 0x0 |
| ; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX1250-NEXT: global_atomic_add_u32 v0, v1, s[0:1] offset:8 scope:SCOPE_SYS |
| ; GFX1250-NEXT: s_wait_storecnt 0x0 |
| ; GFX1250-NEXT: global_inv scope:SCOPE_SYS |
| ; GFX1250-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-NEXT: s_endpgm |
| %gep1 = getelementptr i32, ptr addrspace(1) %ptr, i64 0 |
| %gep2 = getelementptr i32, ptr addrspace(1) %ptr, i64 1 |
| %gep3 = getelementptr i32, ptr addrspace(1) %ptr, i64 2 |
| %val1 = atomicrmw add ptr addrspace(1) %gep1, i32 1 seq_cst |
| %val2 = atomicrmw add ptr addrspace(1) %gep2, i32 2 seq_cst |
| %load = load i32, ptr %flat_ptr, align 4 |
| %val3 = atomicrmw add ptr addrspace(1) %gep3, i32 %load seq_cst |
| ret void |
| } |
| |
| ; Test atomic RMW block broken by FLAT store (VMEM). |
| define amdgpu_kernel void @atomic_rmw_broken_by_flat_store(ptr addrspace(1) %ptr, ptr %flat_ptr) { |
| ; GFX1250-LABEL: atomic_rmw_broken_by_flat_store: |
| ; GFX1250: ; %bb.0: |
| ; GFX1250-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1 |
| ; GFX1250-NEXT: v_mov_b32_e32 v0, 0 |
| ; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 |
| ; GFX1250-NEXT: s_load_b64 s[2:3], s[4:5], 0x8 |
| ; GFX1250-NEXT: v_mov_b32_e32 v1, 1 |
| ; GFX1250-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-NEXT: s_wait_storecnt 0x0 |
| ; GFX1250-NEXT: global_wb scope:SCOPE_SYS |
| ; GFX1250-NEXT: s_wait_storecnt 0x0 |
| ; GFX1250-NEXT: s_wait_xcnt 0x0 |
| ; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX1250-NEXT: s_wait_kmcnt 0x0 |
| ; GFX1250-NEXT: global_atomic_add_u32 v0, v1, s[0:1] scope:SCOPE_SYS |
| ; GFX1250-NEXT: s_wait_storecnt 0x0 |
| ; GFX1250-NEXT: global_inv scope:SCOPE_SYS |
| ; GFX1250-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-NEXT: v_mov_b32_e32 v1, 2 |
| ; GFX1250-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-NEXT: s_wait_storecnt 0x0 |
| ; GFX1250-NEXT: global_wb scope:SCOPE_SYS |
| ; GFX1250-NEXT: s_wait_storecnt 0x0 |
| ; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX1250-NEXT: global_atomic_add_u32 v0, v1, s[0:1] offset:4 scope:SCOPE_SYS |
| ; GFX1250-NEXT: s_wait_storecnt 0x0 |
| ; GFX1250-NEXT: global_inv scope:SCOPE_SYS |
| ; GFX1250-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-NEXT: v_mov_b32_e32 v1, 42 |
| ; GFX1250-NEXT: flat_store_b32 v0, v1, s[2:3] |
| ; GFX1250-NEXT: s_wait_xcnt 0x0 |
| ; GFX1250-NEXT: v_mov_b32_e32 v1, 3 |
| ; GFX1250-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-NEXT: s_wait_storecnt 0x0 |
| ; GFX1250-NEXT: global_wb scope:SCOPE_SYS |
| ; GFX1250-NEXT: s_wait_storecnt 0x0 |
| ; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX1250-NEXT: global_atomic_add_u32 v0, v1, s[0:1] offset:8 scope:SCOPE_SYS |
| ; GFX1250-NEXT: s_wait_storecnt 0x0 |
| ; GFX1250-NEXT: global_inv scope:SCOPE_SYS |
| ; GFX1250-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-NEXT: s_endpgm |
| %gep1 = getelementptr i32, ptr addrspace(1) %ptr, i64 0 |
| %gep2 = getelementptr i32, ptr addrspace(1) %ptr, i64 1 |
| %gep3 = getelementptr i32, ptr addrspace(1) %ptr, i64 2 |
| %val1 = atomicrmw add ptr addrspace(1) %gep1, i32 1 seq_cst |
| %val2 = atomicrmw add ptr addrspace(1) %gep2, i32 2 seq_cst |
| store i32 42, ptr %flat_ptr, align 4 |
| %val3 = atomicrmw add ptr addrspace(1) %gep3, i32 3 seq_cst |
| ret void |
| } |
| |
| ; Test atomic RMW block broken by SMEM load. |
| define amdgpu_kernel void @atomic_rmw_broken_by_smem_load(ptr addrspace(1) %ptr, ptr addrspace(4) %const_ptr) { |
| ; GFX1250-LABEL: atomic_rmw_broken_by_smem_load: |
| ; GFX1250: ; %bb.0: |
| ; GFX1250-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1 |
| ; GFX1250-NEXT: v_mov_b32_e32 v0, 0 |
| ; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 |
| ; GFX1250-NEXT: s_load_b64 s[2:3], s[4:5], 0x8 |
| ; GFX1250-NEXT: v_mov_b32_e32 v1, 1 |
| ; GFX1250-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-NEXT: s_wait_storecnt 0x0 |
| ; GFX1250-NEXT: global_wb scope:SCOPE_SYS |
| ; GFX1250-NEXT: s_wait_storecnt 0x0 |
| ; GFX1250-NEXT: s_wait_xcnt 0x0 |
| ; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX1250-NEXT: s_wait_kmcnt 0x0 |
| ; GFX1250-NEXT: global_atomic_add_u32 v0, v1, s[0:1] scope:SCOPE_SYS |
| ; GFX1250-NEXT: s_wait_storecnt 0x0 |
| ; GFX1250-NEXT: global_inv scope:SCOPE_SYS |
| ; GFX1250-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-NEXT: v_mov_b32_e32 v1, 2 |
| ; GFX1250-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-NEXT: s_wait_storecnt 0x0 |
| ; GFX1250-NEXT: global_wb scope:SCOPE_SYS |
| ; GFX1250-NEXT: s_wait_storecnt 0x0 |
| ; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX1250-NEXT: global_atomic_add_u32 v0, v1, s[0:1] offset:4 scope:SCOPE_SYS |
| ; GFX1250-NEXT: s_wait_storecnt 0x0 |
| ; GFX1250-NEXT: global_inv scope:SCOPE_SYS |
| ; GFX1250-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-NEXT: s_load_b32 s2, s[2:3], 0x0 |
| ; GFX1250-NEXT: s_wait_kmcnt 0x0 |
| ; GFX1250-NEXT: v_mov_b32_e32 v1, s2 |
| ; GFX1250-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-NEXT: s_wait_storecnt 0x0 |
| ; GFX1250-NEXT: global_wb scope:SCOPE_SYS |
| ; GFX1250-NEXT: s_wait_storecnt 0x0 |
| ; GFX1250-NEXT: s_wait_xcnt 0x0 |
| ; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX1250-NEXT: global_atomic_add_u32 v0, v1, s[0:1] offset:8 scope:SCOPE_SYS |
| ; GFX1250-NEXT: s_wait_storecnt 0x0 |
| ; GFX1250-NEXT: global_inv scope:SCOPE_SYS |
| ; GFX1250-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-NEXT: s_endpgm |
| %gep1 = getelementptr i32, ptr addrspace(1) %ptr, i64 0 |
| %gep2 = getelementptr i32, ptr addrspace(1) %ptr, i64 1 |
| %gep3 = getelementptr i32, ptr addrspace(1) %ptr, i64 2 |
| %val1 = atomicrmw add ptr addrspace(1) %gep1, i32 1 seq_cst |
| %val2 = atomicrmw add ptr addrspace(1) %gep2, i32 2 seq_cst |
| %load = load i32, ptr addrspace(4) %const_ptr |
| %val3 = atomicrmw add ptr addrspace(1) %gep3, i32 %load seq_cst |
| ret void |
| } |
| |
| ; Test atomic RMW block broken by atomic store. |
| define amdgpu_kernel void @atomic_rmw_broken_by_atomic_store(ptr addrspace(1) %ptr, ptr addrspace(1) %store_ptr) { |
| ; GFX1250-LABEL: atomic_rmw_broken_by_atomic_store: |
| ; GFX1250: ; %bb.0: |
| ; GFX1250-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1 |
| ; GFX1250-NEXT: v_mov_b32_e32 v0, 0 |
| ; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 |
| ; GFX1250-NEXT: s_load_b64 s[2:3], s[4:5], 0x8 |
| ; GFX1250-NEXT: v_mov_b32_e32 v1, 1 |
| ; GFX1250-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-NEXT: s_wait_storecnt 0x0 |
| ; GFX1250-NEXT: global_wb scope:SCOPE_SYS |
| ; GFX1250-NEXT: s_wait_storecnt 0x0 |
| ; GFX1250-NEXT: s_wait_xcnt 0x0 |
| ; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX1250-NEXT: s_wait_kmcnt 0x0 |
| ; GFX1250-NEXT: global_atomic_add_u32 v0, v1, s[0:1] scope:SCOPE_SYS |
| ; GFX1250-NEXT: s_wait_storecnt 0x0 |
| ; GFX1250-NEXT: global_inv scope:SCOPE_SYS |
| ; GFX1250-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-NEXT: v_mov_b32_e32 v1, 2 |
| ; GFX1250-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-NEXT: s_wait_storecnt 0x0 |
| ; GFX1250-NEXT: global_wb scope:SCOPE_SYS |
| ; GFX1250-NEXT: s_wait_storecnt 0x0 |
| ; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX1250-NEXT: global_atomic_add_u32 v0, v1, s[0:1] offset:4 scope:SCOPE_SYS |
| ; GFX1250-NEXT: s_wait_storecnt 0x0 |
| ; GFX1250-NEXT: global_inv scope:SCOPE_SYS |
| ; GFX1250-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-NEXT: v_mov_b32_e32 v1, 42 |
| ; GFX1250-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-NEXT: s_wait_storecnt 0x0 |
| ; GFX1250-NEXT: global_wb scope:SCOPE_SYS |
| ; GFX1250-NEXT: s_wait_storecnt 0x0 |
| ; GFX1250-NEXT: s_wait_xcnt 0x0 |
| ; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX1250-NEXT: global_store_b32 v0, v1, s[2:3] scope:SCOPE_SYS |
| ; GFX1250-NEXT: s_wait_xcnt 0x0 |
| ; GFX1250-NEXT: v_mov_b32_e32 v1, 3 |
| ; GFX1250-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-NEXT: s_wait_storecnt 0x0 |
| ; GFX1250-NEXT: global_wb scope:SCOPE_SYS |
| ; GFX1250-NEXT: s_wait_storecnt 0x0 |
| ; GFX1250-NEXT: s_wait_xcnt 0x0 |
| ; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX1250-NEXT: global_atomic_add_u32 v0, v1, s[0:1] offset:8 scope:SCOPE_SYS |
| ; GFX1250-NEXT: s_wait_storecnt 0x0 |
| ; GFX1250-NEXT: global_inv scope:SCOPE_SYS |
| ; GFX1250-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-NEXT: s_endpgm |
| %gep1 = getelementptr i32, ptr addrspace(1) %ptr, i64 0 |
| %gep2 = getelementptr i32, ptr addrspace(1) %ptr, i64 1 |
| %gep3 = getelementptr i32, ptr addrspace(1) %ptr, i64 2 |
| %val1 = atomicrmw add ptr addrspace(1) %gep1, i32 1 seq_cst |
| %val2 = atomicrmw add ptr addrspace(1) %gep2, i32 2 seq_cst |
| store atomic i32 42, ptr addrspace(1) %store_ptr seq_cst, align 4 |
| %val3 = atomicrmw add ptr addrspace(1) %gep3, i32 3 seq_cst |
| ret void |
| } |
| |
| ; Test LDS load should not break atomic RMW block. |
| define amdgpu_kernel void @atomic_rmw_with_lds_load(ptr addrspace(1) %ptr, ptr addrspace(3) %lds_ptr) { |
| ; GFX1250-LABEL: atomic_rmw_with_lds_load: |
| ; GFX1250: ; %bb.0: |
| ; GFX1250-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1 |
| ; GFX1250-NEXT: v_mov_b32_e32 v0, 0 |
| ; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 |
| ; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8 |
| ; GFX1250-NEXT: v_mov_b32_e32 v1, 1 |
| ; GFX1250-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-NEXT: s_wait_storecnt 0x0 |
| ; GFX1250-NEXT: global_wb scope:SCOPE_SYS |
| ; GFX1250-NEXT: s_wait_storecnt 0x0 |
| ; GFX1250-NEXT: s_wait_xcnt 0x0 |
| ; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX1250-NEXT: s_wait_kmcnt 0x0 |
| ; GFX1250-NEXT: global_atomic_add_u32 v0, v1, s[0:1] scope:SCOPE_SYS |
| ; GFX1250-NEXT: s_wait_storecnt 0x0 |
| ; GFX1250-NEXT: global_inv scope:SCOPE_SYS |
| ; GFX1250-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-NEXT: v_mov_b32_e32 v1, 2 |
| ; GFX1250-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-NEXT: s_wait_storecnt 0x0 |
| ; GFX1250-NEXT: global_wb scope:SCOPE_SYS |
| ; GFX1250-NEXT: s_wait_storecnt 0x0 |
| ; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX1250-NEXT: global_atomic_add_u32 v0, v1, s[0:1] offset:4 scope:SCOPE_SYS |
| ; GFX1250-NEXT: s_wait_storecnt 0x0 |
| ; GFX1250-NEXT: global_inv scope:SCOPE_SYS |
| ; GFX1250-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-NEXT: v_mov_b32_e32 v1, s2 |
| ; GFX1250-NEXT: ds_load_b32 v1, v1 |
| ; GFX1250-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-NEXT: s_wait_storecnt 0x0 |
| ; GFX1250-NEXT: global_wb scope:SCOPE_SYS |
| ; GFX1250-NEXT: s_wait_storecnt 0x0 |
| ; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX1250-NEXT: global_atomic_add_u32 v0, v1, s[0:1] offset:8 scope:SCOPE_SYS |
| ; GFX1250-NEXT: s_wait_storecnt 0x0 |
| ; GFX1250-NEXT: global_inv scope:SCOPE_SYS |
| ; GFX1250-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-NEXT: s_endpgm |
| %gep1 = getelementptr i32, ptr addrspace(1) %ptr, i64 0 |
| %gep2 = getelementptr i32, ptr addrspace(1) %ptr, i64 1 |
| %gep3 = getelementptr i32, ptr addrspace(1) %ptr, i64 2 |
| %val1 = atomicrmw add ptr addrspace(1) %gep1, i32 1 seq_cst |
| %val2 = atomicrmw add ptr addrspace(1) %gep2, i32 2 seq_cst |
| %load = load i32, ptr addrspace(3) %lds_ptr, align 4 |
| %val3 = atomicrmw add ptr addrspace(1) %gep3, i32 %load seq_cst |
| ret void |
| } |
| |
| ; Test LDS store should not break atomic RMW block. |
| define amdgpu_kernel void @atomic_rmw_with_lds_store(ptr addrspace(1) %ptr, ptr addrspace(3) %lds_ptr) { |
| ; GFX1250-LABEL: atomic_rmw_with_lds_store: |
| ; GFX1250: ; %bb.0: |
| ; GFX1250-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1 |
| ; GFX1250-NEXT: v_mov_b32_e32 v0, 0 |
| ; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 |
| ; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8 |
| ; GFX1250-NEXT: v_mov_b32_e32 v1, 1 |
| ; GFX1250-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-NEXT: s_wait_storecnt 0x0 |
| ; GFX1250-NEXT: global_wb scope:SCOPE_SYS |
| ; GFX1250-NEXT: s_wait_storecnt 0x0 |
| ; GFX1250-NEXT: s_wait_xcnt 0x0 |
| ; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX1250-NEXT: s_wait_kmcnt 0x0 |
| ; GFX1250-NEXT: global_atomic_add_u32 v0, v1, s[0:1] scope:SCOPE_SYS |
| ; GFX1250-NEXT: s_wait_storecnt 0x0 |
| ; GFX1250-NEXT: global_inv scope:SCOPE_SYS |
| ; GFX1250-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-NEXT: v_mov_b32_e32 v1, 2 |
| ; GFX1250-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-NEXT: s_wait_storecnt 0x0 |
| ; GFX1250-NEXT: global_wb scope:SCOPE_SYS |
| ; GFX1250-NEXT: s_wait_storecnt 0x0 |
| ; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX1250-NEXT: global_atomic_add_u32 v0, v1, s[0:1] offset:4 scope:SCOPE_SYS |
| ; GFX1250-NEXT: s_wait_storecnt 0x0 |
| ; GFX1250-NEXT: global_inv scope:SCOPE_SYS |
| ; GFX1250-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-NEXT: v_mov_b32_e32 v2, 42 |
| ; GFX1250-NEXT: v_mov_b32_e32 v1, s2 |
| ; GFX1250-NEXT: ds_store_b32 v1, v2 |
| ; GFX1250-NEXT: v_mov_b32_e32 v1, 3 |
| ; GFX1250-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-NEXT: s_wait_storecnt 0x0 |
| ; GFX1250-NEXT: global_wb scope:SCOPE_SYS |
| ; GFX1250-NEXT: s_wait_storecnt 0x0 |
| ; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX1250-NEXT: global_atomic_add_u32 v0, v1, s[0:1] offset:8 scope:SCOPE_SYS |
| ; GFX1250-NEXT: s_wait_storecnt 0x0 |
| ; GFX1250-NEXT: global_inv scope:SCOPE_SYS |
| ; GFX1250-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-NEXT: s_endpgm |
| %gep1 = getelementptr i32, ptr addrspace(1) %ptr, i64 0 |
| %gep2 = getelementptr i32, ptr addrspace(1) %ptr, i64 1 |
| %gep3 = getelementptr i32, ptr addrspace(1) %ptr, i64 2 |
| %val1 = atomicrmw add ptr addrspace(1) %gep1, i32 1 seq_cst |
| %val2 = atomicrmw add ptr addrspace(1) %gep2, i32 2 seq_cst |
| store i32 42, ptr addrspace(3) %lds_ptr, align 4 |
| %val3 = atomicrmw add ptr addrspace(1) %gep3, i32 3 seq_cst |
| ret void |
| } |
| |
| ; Test FLAT load from LDS should not break atomic RMW block. |
| define amdgpu_kernel void @atomic_rmw_with_flat_lds_load(ptr addrspace(1) %ptr, ptr addrspace(3) %lds_ptr) { |
| ; GFX1250-LABEL: atomic_rmw_with_flat_lds_load: |
| ; GFX1250: ; %bb.0: |
| ; GFX1250-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1 |
| ; GFX1250-NEXT: v_mov_b32_e32 v0, 0 |
| ; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 |
| ; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8 |
| ; GFX1250-NEXT: v_mov_b32_e32 v1, 1 |
| ; GFX1250-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-NEXT: s_wait_storecnt 0x0 |
| ; GFX1250-NEXT: global_wb scope:SCOPE_SYS |
| ; GFX1250-NEXT: s_wait_storecnt 0x0 |
| ; GFX1250-NEXT: s_wait_xcnt 0x0 |
| ; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX1250-NEXT: s_wait_kmcnt 0x0 |
| ; GFX1250-NEXT: global_atomic_add_u32 v0, v1, s[0:1] scope:SCOPE_SYS |
| ; GFX1250-NEXT: s_wait_storecnt 0x0 |
| ; GFX1250-NEXT: global_inv scope:SCOPE_SYS |
| ; GFX1250-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-NEXT: v_mov_b32_e32 v1, 2 |
| ; GFX1250-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-NEXT: s_wait_storecnt 0x0 |
| ; GFX1250-NEXT: global_wb scope:SCOPE_SYS |
| ; GFX1250-NEXT: s_wait_storecnt 0x0 |
| ; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX1250-NEXT: global_atomic_add_u32 v0, v1, s[0:1] offset:4 scope:SCOPE_SYS |
| ; GFX1250-NEXT: s_wait_storecnt 0x0 |
| ; GFX1250-NEXT: global_inv scope:SCOPE_SYS |
| ; GFX1250-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-NEXT: s_mov_b64 s[6:7], 0 |
| ; GFX1250-NEXT: s_mov_b32 s4, s7 |
| ; GFX1250-NEXT: s_mov_b64 s[8:9], src_shared_base |
| ; GFX1250-NEXT: s_mov_b32 s3, s9 |
| ; GFX1250-NEXT: s_mov_b32 s5, -1 |
| ; GFX1250-NEXT: s_cmp_lg_u32 s2, s5 |
| ; GFX1250-NEXT: s_cselect_b32 s4, s3, s4 |
| ; GFX1250-NEXT: s_mov_b32 s3, s6 |
| ; GFX1250-NEXT: s_cselect_b32 s2, s2, s3 |
| ; GFX1250-NEXT: ; kill: def $sgpr2 killed $sgpr2 def $sgpr2_sgpr3 |
| ; GFX1250-NEXT: s_mov_b32 s3, s4 |
| ; GFX1250-NEXT: flat_load_b32 v1, v0, s[2:3] |
| ; GFX1250-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-NEXT: s_wait_storecnt 0x0 |
| ; GFX1250-NEXT: global_wb scope:SCOPE_SYS |
| ; GFX1250-NEXT: s_wait_storecnt 0x0 |
| ; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX1250-NEXT: global_atomic_add_u32 v0, v1, s[0:1] offset:8 scope:SCOPE_SYS |
| ; GFX1250-NEXT: s_wait_storecnt 0x0 |
| ; GFX1250-NEXT: global_inv scope:SCOPE_SYS |
| ; GFX1250-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-NEXT: s_endpgm |
| %gep1 = getelementptr i32, ptr addrspace(1) %ptr, i64 0 |
| %gep2 = getelementptr i32, ptr addrspace(1) %ptr, i64 1 |
| %gep3 = getelementptr i32, ptr addrspace(1) %ptr, i64 2 |
| %val1 = atomicrmw add ptr addrspace(1) %gep1, i32 1 seq_cst |
| %val2 = atomicrmw add ptr addrspace(1) %gep2, i32 2 seq_cst |
| %flat_lds = addrspacecast ptr addrspace(3) %lds_ptr to ptr |
| %load = load i32, ptr %flat_lds, align 4 |
| %val3 = atomicrmw add ptr addrspace(1) %gep3, i32 %load seq_cst |
| ret void |
| } |
| |
| ; Test FLAT store to LDS should not break atomic RMW block. |
| define amdgpu_kernel void @atomic_rmw_with_flat_lds_store(ptr addrspace(1) %ptr, ptr addrspace(3) %lds_ptr) { |
| ; GFX1250-LABEL: atomic_rmw_with_flat_lds_store: |
| ; GFX1250: ; %bb.0: |
| ; GFX1250-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1 |
| ; GFX1250-NEXT: v_mov_b32_e32 v0, 0 |
| ; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 |
| ; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8 |
| ; GFX1250-NEXT: v_mov_b32_e32 v1, 1 |
| ; GFX1250-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-NEXT: s_wait_storecnt 0x0 |
| ; GFX1250-NEXT: global_wb scope:SCOPE_SYS |
| ; GFX1250-NEXT: s_wait_storecnt 0x0 |
| ; GFX1250-NEXT: s_wait_xcnt 0x0 |
| ; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX1250-NEXT: s_wait_kmcnt 0x0 |
| ; GFX1250-NEXT: global_atomic_add_u32 v0, v1, s[0:1] scope:SCOPE_SYS |
| ; GFX1250-NEXT: s_wait_storecnt 0x0 |
| ; GFX1250-NEXT: global_inv scope:SCOPE_SYS |
| ; GFX1250-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-NEXT: v_mov_b32_e32 v1, 2 |
| ; GFX1250-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-NEXT: s_wait_storecnt 0x0 |
| ; GFX1250-NEXT: global_wb scope:SCOPE_SYS |
| ; GFX1250-NEXT: s_wait_storecnt 0x0 |
| ; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX1250-NEXT: global_atomic_add_u32 v0, v1, s[0:1] offset:4 scope:SCOPE_SYS |
| ; GFX1250-NEXT: s_wait_storecnt 0x0 |
| ; GFX1250-NEXT: global_inv scope:SCOPE_SYS |
| ; GFX1250-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-NEXT: s_mov_b64 s[6:7], 0 |
| ; GFX1250-NEXT: s_mov_b32 s4, s7 |
| ; GFX1250-NEXT: s_mov_b64 s[8:9], src_shared_base |
| ; GFX1250-NEXT: s_mov_b32 s3, s9 |
| ; GFX1250-NEXT: s_mov_b32 s5, -1 |
| ; GFX1250-NEXT: s_cmp_lg_u32 s2, s5 |
| ; GFX1250-NEXT: s_cselect_b32 s4, s3, s4 |
| ; GFX1250-NEXT: s_mov_b32 s3, s6 |
| ; GFX1250-NEXT: s_cselect_b32 s2, s2, s3 |
| ; GFX1250-NEXT: ; kill: def $sgpr2 killed $sgpr2 def $sgpr2_sgpr3 |
| ; GFX1250-NEXT: s_mov_b32 s3, s4 |
| ; GFX1250-NEXT: v_mov_b32_e32 v1, 42 |
| ; GFX1250-NEXT: flat_store_b32 v0, v1, s[2:3] |
| ; GFX1250-NEXT: s_wait_xcnt 0x0 |
| ; GFX1250-NEXT: v_mov_b32_e32 v1, 3 |
| ; GFX1250-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-NEXT: s_wait_storecnt 0x0 |
| ; GFX1250-NEXT: global_wb scope:SCOPE_SYS |
| ; GFX1250-NEXT: s_wait_storecnt 0x0 |
| ; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX1250-NEXT: global_atomic_add_u32 v0, v1, s[0:1] offset:8 scope:SCOPE_SYS |
| ; GFX1250-NEXT: s_wait_storecnt 0x0 |
| ; GFX1250-NEXT: global_inv scope:SCOPE_SYS |
| ; GFX1250-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-NEXT: s_endpgm |
| %gep1 = getelementptr i32, ptr addrspace(1) %ptr, i64 0 |
| %gep2 = getelementptr i32, ptr addrspace(1) %ptr, i64 1 |
| %gep3 = getelementptr i32, ptr addrspace(1) %ptr, i64 2 |
| %val1 = atomicrmw add ptr addrspace(1) %gep1, i32 1 seq_cst |
| %val2 = atomicrmw add ptr addrspace(1) %gep2, i32 2 seq_cst |
| %flat_lds = addrspacecast ptr addrspace(3) %lds_ptr to ptr |
| store i32 42, ptr %flat_lds, align 4 |
| %val3 = atomicrmw add ptr addrspace(1) %gep3, i32 3 seq_cst |
| ret void |
| } |
| |
| ; Test atomic RMW block broken by async copy from global to LDS. |
| define amdgpu_kernel void @atomic_rmw_borken_by_async_lds_copy(ptr addrspace(1) %ptr, ptr addrspace(1) %src, ptr addrspace(3) %dst) { |
| ; GFX1250-LABEL: atomic_rmw_borken_by_async_lds_copy: |
| ; GFX1250: ; %bb.0: |
| ; GFX1250-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1 |
| ; GFX1250-NEXT: s_mov_b64 s[2:3], s[4:5] |
| ; GFX1250-NEXT: v_mov_b32_e32 v0, 0 |
| ; GFX1250-NEXT: s_load_b64 s[0:1], s[2:3], 0x0 |
| ; GFX1250-NEXT: s_load_b64 s[4:5], s[2:3], 0x8 |
| ; GFX1250-NEXT: s_wait_xcnt 0x0 |
| ; GFX1250-NEXT: s_load_b32 s2, s[2:3], 0x10 |
| ; GFX1250-NEXT: v_mov_b32_e32 v1, 1 |
| ; GFX1250-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-NEXT: s_wait_storecnt 0x0 |
| ; GFX1250-NEXT: global_wb scope:SCOPE_SYS |
| ; GFX1250-NEXT: s_wait_storecnt 0x0 |
| ; GFX1250-NEXT: s_wait_xcnt 0x0 |
| ; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX1250-NEXT: s_wait_kmcnt 0x0 |
| ; GFX1250-NEXT: global_atomic_add_u32 v0, v1, s[0:1] scope:SCOPE_SYS |
| ; GFX1250-NEXT: s_wait_storecnt 0x0 |
| ; GFX1250-NEXT: global_inv scope:SCOPE_SYS |
| ; GFX1250-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-NEXT: v_mov_b32_e32 v1, 2 |
| ; GFX1250-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-NEXT: s_wait_storecnt 0x0 |
| ; GFX1250-NEXT: global_wb scope:SCOPE_SYS |
| ; GFX1250-NEXT: s_wait_storecnt 0x0 |
| ; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX1250-NEXT: global_atomic_add_u32 v0, v1, s[0:1] offset:4 scope:SCOPE_SYS |
| ; GFX1250-NEXT: s_wait_storecnt 0x0 |
| ; GFX1250-NEXT: global_inv scope:SCOPE_SYS |
| ; GFX1250-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-NEXT: global_load_b32 v2, v0, s[4:5] |
| ; GFX1250-NEXT: v_mov_b32_e32 v1, s2 |
| ; GFX1250-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-NEXT: ds_store_b32 v1, v2 |
| ; GFX1250-NEXT: v_mov_b32_e32 v1, 3 |
| ; GFX1250-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-NEXT: s_wait_storecnt 0x0 |
| ; GFX1250-NEXT: global_wb scope:SCOPE_SYS |
| ; GFX1250-NEXT: s_wait_storecnt 0x0 |
| ; GFX1250-NEXT: s_wait_xcnt 0x0 |
| ; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX1250-NEXT: global_atomic_add_u32 v0, v1, s[0:1] offset:8 scope:SCOPE_SYS |
| ; GFX1250-NEXT: s_wait_storecnt 0x0 |
| ; GFX1250-NEXT: global_inv scope:SCOPE_SYS |
| ; GFX1250-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-NEXT: s_endpgm |
| %gep1 = getelementptr i32, ptr addrspace(1) %ptr, i64 0 |
| %gep2 = getelementptr i32, ptr addrspace(1) %ptr, i64 1 |
| %gep3 = getelementptr i32, ptr addrspace(1) %ptr, i64 2 |
| %val1 = atomicrmw add ptr addrspace(1) %gep1, i32 1 seq_cst |
| %val2 = atomicrmw add ptr addrspace(1) %gep2, i32 2 seq_cst |
| %load = load i32, ptr addrspace(1) %src, align 4 |
| store i32 %load, ptr addrspace(3) %dst, align 4 |
| %val3 = atomicrmw add ptr addrspace(1) %gep3, i32 3 seq_cst |
| ret void |
| } |
| |
| ; Test multiple separate atomic RMW blocks. |
| define amdgpu_kernel void @multiple_atomic_rmw_blocks(ptr addrspace(1) %ptr1, ptr addrspace(1) %ptr2) { |
| ; First block |
| ; GFX1250-LABEL: multiple_atomic_rmw_blocks: |
| ; GFX1250: ; %bb.0: |
| ; GFX1250-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1 |
| ; GFX1250-NEXT: v_mov_b32_e32 v0, 0 |
| ; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 |
| ; GFX1250-NEXT: s_load_b64 s[2:3], s[4:5], 0x8 |
| ; GFX1250-NEXT: v_mov_b32_e32 v1, 1 |
| ; GFX1250-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-NEXT: s_wait_storecnt 0x0 |
| ; GFX1250-NEXT: global_wb scope:SCOPE_SYS |
| ; GFX1250-NEXT: s_wait_storecnt 0x0 |
| ; GFX1250-NEXT: s_wait_xcnt 0x0 |
| ; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX1250-NEXT: s_wait_kmcnt 0x0 |
| ; GFX1250-NEXT: global_atomic_add_u32 v0, v1, s[0:1] scope:SCOPE_SYS |
| ; GFX1250-NEXT: s_wait_storecnt 0x0 |
| ; GFX1250-NEXT: global_inv scope:SCOPE_SYS |
| ; GFX1250-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-NEXT: v_mov_b32_e32 v1, 2 |
| ; GFX1250-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-NEXT: s_wait_storecnt 0x0 |
| ; GFX1250-NEXT: global_wb scope:SCOPE_SYS |
| ; GFX1250-NEXT: s_wait_storecnt 0x0 |
| ; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX1250-NEXT: global_atomic_add_u32 v0, v1, s[0:1] offset:4 scope:SCOPE_SYS |
| ; GFX1250-NEXT: s_wait_storecnt 0x0 |
| ; GFX1250-NEXT: global_inv scope:SCOPE_SYS |
| ; GFX1250-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-NEXT: global_load_b32 v1, v0, s[2:3] |
| ; GFX1250-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-NEXT: s_wait_storecnt 0x0 |
| ; GFX1250-NEXT: global_wb scope:SCOPE_SYS |
| ; GFX1250-NEXT: s_wait_storecnt 0x0 |
| ; GFX1250-NEXT: s_wait_xcnt 0x0 |
| ; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX1250-NEXT: global_atomic_add_u32 v0, v1, s[0:1] offset:8 scope:SCOPE_SYS |
| ; GFX1250-NEXT: s_wait_storecnt 0x0 |
| ; GFX1250-NEXT: global_inv scope:SCOPE_SYS |
| ; GFX1250-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-NEXT: v_mov_b32_e32 v1, 4 |
| ; GFX1250-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-NEXT: s_wait_storecnt 0x0 |
| ; GFX1250-NEXT: global_wb scope:SCOPE_SYS |
| ; GFX1250-NEXT: s_wait_storecnt 0x0 |
| ; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX1250-NEXT: global_atomic_add_u32 v0, v1, s[0:1] offset:12 scope:SCOPE_SYS |
| ; GFX1250-NEXT: s_wait_storecnt 0x0 |
| ; GFX1250-NEXT: global_inv scope:SCOPE_SYS |
| ; GFX1250-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-NEXT: s_endpgm |
| %gep1 = getelementptr i32, ptr addrspace(1) %ptr1, i64 0 |
| %gep2 = getelementptr i32, ptr addrspace(1) %ptr1, i64 1 |
| %gep3 = getelementptr i32, ptr addrspace(1) %ptr1, i64 2 |
| %gep4 = getelementptr i32, ptr addrspace(1) %ptr1, i64 3 |
| %val1 = atomicrmw add ptr addrspace(1) %gep1, i32 1 seq_cst |
| %val2 = atomicrmw add ptr addrspace(1) %gep2, i32 2 seq_cst |
| %load = load i32, ptr addrspace(1) %ptr2 |
| %val3 = atomicrmw add ptr addrspace(1) %gep3, i32 %load seq_cst |
| %val4 = atomicrmw add ptr addrspace(1) %gep4, i32 4 seq_cst |
| ret void |
| } |
| |
| ; Test different atomic RMW operations in a block. |
| define amdgpu_kernel void @different_atomic_ops(ptr addrspace(1) %ptr) { |
| ; GFX1250-LABEL: different_atomic_ops: |
| ; GFX1250: ; %bb.0: |
| ; GFX1250-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1 |
| ; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 |
| ; GFX1250-NEXT: v_mov_b32_e32 v0, 0 |
| ; GFX1250-NEXT: v_mov_b32_e32 v1, 1 |
| ; GFX1250-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-NEXT: s_wait_storecnt 0x0 |
| ; GFX1250-NEXT: global_wb scope:SCOPE_SYS |
| ; GFX1250-NEXT: s_wait_storecnt 0x0 |
| ; GFX1250-NEXT: s_wait_xcnt 0x0 |
| ; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX1250-NEXT: s_wait_kmcnt 0x0 |
| ; GFX1250-NEXT: global_atomic_add_u32 v0, v1, s[0:1] scope:SCOPE_SYS |
| ; GFX1250-NEXT: s_wait_storecnt 0x0 |
| ; GFX1250-NEXT: global_inv scope:SCOPE_SYS |
| ; GFX1250-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-NEXT: v_mov_b32_e32 v1, 2 |
| ; GFX1250-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-NEXT: s_wait_storecnt 0x0 |
| ; GFX1250-NEXT: global_wb scope:SCOPE_SYS |
| ; GFX1250-NEXT: s_wait_storecnt 0x0 |
| ; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX1250-NEXT: global_atomic_sub_u32 v0, v1, s[0:1] offset:4 scope:SCOPE_SYS |
| ; GFX1250-NEXT: s_wait_storecnt 0x0 |
| ; GFX1250-NEXT: global_inv scope:SCOPE_SYS |
| ; GFX1250-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-NEXT: v_mov_b32_e32 v1, 3 |
| ; GFX1250-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-NEXT: s_wait_storecnt 0x0 |
| ; GFX1250-NEXT: global_wb scope:SCOPE_SYS |
| ; GFX1250-NEXT: s_wait_storecnt 0x0 |
| ; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX1250-NEXT: global_atomic_and_b32 v0, v1, s[0:1] offset:8 scope:SCOPE_SYS |
| ; GFX1250-NEXT: s_wait_storecnt 0x0 |
| ; GFX1250-NEXT: global_inv scope:SCOPE_SYS |
| ; GFX1250-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-NEXT: v_mov_b32_e32 v1, 4 |
| ; GFX1250-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-NEXT: s_wait_storecnt 0x0 |
| ; GFX1250-NEXT: global_wb scope:SCOPE_SYS |
| ; GFX1250-NEXT: s_wait_storecnt 0x0 |
| ; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX1250-NEXT: global_atomic_or_b32 v0, v1, s[0:1] offset:12 scope:SCOPE_SYS |
| ; GFX1250-NEXT: s_wait_storecnt 0x0 |
| ; GFX1250-NEXT: global_inv scope:SCOPE_SYS |
| ; GFX1250-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-NEXT: s_endpgm |
| %gep1 = getelementptr i32, ptr addrspace(1) %ptr, i64 0 |
| %gep2 = getelementptr i32, ptr addrspace(1) %ptr, i64 1 |
| %gep3 = getelementptr i32, ptr addrspace(1) %ptr, i64 2 |
| %gep4 = getelementptr i32, ptr addrspace(1) %ptr, i64 3 |
| %val1 = atomicrmw add ptr addrspace(1) %gep1, i32 1 seq_cst |
| %val2 = atomicrmw sub ptr addrspace(1) %gep2, i32 2 seq_cst |
| %val3 = atomicrmw and ptr addrspace(1) %gep3, i32 3 seq_cst |
| %val4 = atomicrmw or ptr addrspace(1) %gep4, i32 4 seq_cst |
| ret void |
| } |
| |
| ; Test atomic RMW block reset at basic block boundary. |
| define amdgpu_kernel void @atomic_rmw_across_basic_blocks(ptr addrspace(1) %ptr, i32 %cond) { |
| ; GFX1250-LABEL: atomic_rmw_across_basic_blocks: |
| ; GFX1250: ; %bb.0: ; %entry |
| ; GFX1250-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1 |
| ; GFX1250-NEXT: s_load_b64 s[2:3], s[4:5], 0x0 |
| ; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x8 |
| ; GFX1250-NEXT: s_wait_kmcnt 0x0 |
| ; GFX1250-NEXT: s_mov_b64 s[4:5], s[2:3] |
| ; GFX1250-NEXT: ; implicit-def: $vgpr2 : SGPR spill to VGPR lane |
| ; GFX1250-NEXT: v_writelane_b32 v2, s4, 0 |
| ; GFX1250-NEXT: v_writelane_b32 v2, s5, 1 |
| ; GFX1250-NEXT: s_or_saveexec_b32 s6, -1 |
| ; GFX1250-NEXT: scratch_store_b32 off, v2, off ; 4-byte Folded Spill |
| ; GFX1250-NEXT: s_wait_xcnt 0x0 |
| ; GFX1250-NEXT: s_mov_b32 exec_lo, s6 |
| ; GFX1250-NEXT: v_mov_b32_e32 v0, 0 |
| ; GFX1250-NEXT: v_mov_b32_e32 v1, 1 |
| ; GFX1250-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-NEXT: s_wait_storecnt 0x0 |
| ; GFX1250-NEXT: global_wb scope:SCOPE_SYS |
| ; GFX1250-NEXT: s_wait_storecnt 0x0 |
| ; GFX1250-NEXT: s_wait_xcnt 0x0 |
| ; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX1250-NEXT: global_atomic_add_u32 v0, v1, s[2:3] scope:SCOPE_SYS |
| ; GFX1250-NEXT: s_wait_storecnt 0x0 |
| ; GFX1250-NEXT: global_inv scope:SCOPE_SYS |
| ; GFX1250-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-NEXT: v_mov_b32_e32 v1, 2 |
| ; GFX1250-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-NEXT: s_wait_storecnt 0x0 |
| ; GFX1250-NEXT: global_wb scope:SCOPE_SYS |
| ; GFX1250-NEXT: s_wait_storecnt 0x0 |
| ; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX1250-NEXT: global_atomic_add_u32 v0, v1, s[2:3] offset:4 scope:SCOPE_SYS |
| ; GFX1250-NEXT: s_wait_storecnt 0x0 |
| ; GFX1250-NEXT: global_inv scope:SCOPE_SYS |
| ; GFX1250-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-NEXT: s_mov_b32 s1, 0 |
| ; GFX1250-NEXT: s_cmp_lg_u32 s0, s1 |
| ; GFX1250-NEXT: s_cbranch_scc1 .LBB16_2 |
| ; GFX1250-NEXT: ; %bb.1: ; %then |
| ; GFX1250-NEXT: s_or_saveexec_b32 s6, -1 |
| ; GFX1250-NEXT: scratch_load_b32 v2, off, off ; 4-byte Folded Reload |
| ; GFX1250-NEXT: s_wait_xcnt 0x0 |
| ; GFX1250-NEXT: s_mov_b32 exec_lo, s6 |
| ; GFX1250-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-NEXT: v_readlane_b32 s0, v2, 0 |
| ; GFX1250-NEXT: v_readlane_b32 s1, v2, 1 |
| ; GFX1250-NEXT: v_mov_b32_e32 v0, 0 |
| ; GFX1250-NEXT: v_mov_b32_e32 v1, 3 |
| ; GFX1250-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-NEXT: s_wait_storecnt 0x0 |
| ; GFX1250-NEXT: global_wb scope:SCOPE_SYS |
| ; GFX1250-NEXT: s_wait_storecnt 0x0 |
| ; GFX1250-NEXT: s_wait_xcnt 0x0 |
| ; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX1250-NEXT: global_atomic_add_u32 v0, v1, s[0:1] offset:8 scope:SCOPE_SYS |
| ; GFX1250-NEXT: s_wait_storecnt 0x0 |
| ; GFX1250-NEXT: global_inv scope:SCOPE_SYS |
| ; GFX1250-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-NEXT: .LBB16_2: ; %exit |
| ; GFX1250-NEXT: s_endpgm |
| entry: |
| %gep1 = getelementptr i32, ptr addrspace(1) %ptr, i64 0 |
| %gep2 = getelementptr i32, ptr addrspace(1) %ptr, i64 1 |
| %val1 = atomicrmw add ptr addrspace(1) %gep1, i32 1 seq_cst |
| %val2 = atomicrmw add ptr addrspace(1) %gep2, i32 2 seq_cst |
| %cmp = icmp eq i32 %cond, 0 |
| br i1 %cmp, label %then, label %exit |
| |
| then: |
| %gep3 = getelementptr i32, ptr addrspace(1) %ptr, i64 2 |
| %val3 = atomicrmw add ptr addrspace(1) %gep3, i32 3 seq_cst |
| br label %exit |
| |
| exit: |
| ret void |
| } |
| |
| ; Test atomic RMW block in loop. |
| define amdgpu_kernel void @atomic_rmw_in_loop(ptr addrspace(1) %ptr, i32 %n) { |
| ; GFX1250-LABEL: atomic_rmw_in_loop: |
| ; GFX1250: ; %bb.0: ; %entry |
| ; GFX1250-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1 |
| ; GFX1250-NEXT: s_load_b64 s[2:3], s[4:5], 0x0 |
| ; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x8 |
| ; GFX1250-NEXT: ; implicit-def: $vgpr2 : SGPR spill to VGPR lane |
| ; GFX1250-NEXT: s_wait_kmcnt 0x0 |
| ; GFX1250-NEXT: v_writelane_b32 v2, s2, 0 |
| ; GFX1250-NEXT: v_writelane_b32 v2, s3, 1 |
| ; GFX1250-NEXT: s_mov_b32 s0, 0 |
| ; GFX1250-NEXT: v_writelane_b32 v2, s1, 2 |
| ; GFX1250-NEXT: v_writelane_b32 v2, s0, 3 |
| ; GFX1250-NEXT: s_or_saveexec_b32 s6, -1 |
| ; GFX1250-NEXT: scratch_store_b32 off, v2, off ; 4-byte Folded Spill |
| ; GFX1250-NEXT: s_wait_xcnt 0x0 |
| ; GFX1250-NEXT: s_mov_b32 exec_lo, s6 |
| ; GFX1250-NEXT: .LBB17_1: ; %loop |
| ; GFX1250-NEXT: ; =>This Inner Loop Header: Depth=1 |
| ; GFX1250-NEXT: s_or_saveexec_b32 s6, -1 |
| ; GFX1250-NEXT: scratch_load_b32 v2, off, off ; 4-byte Folded Reload |
| ; GFX1250-NEXT: s_wait_xcnt 0x0 |
| ; GFX1250-NEXT: s_mov_b32 exec_lo, s6 |
| ; GFX1250-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-NEXT: v_readlane_b32 s0, v2, 3 |
| ; GFX1250-NEXT: v_readlane_b32 s1, v2, 2 |
| ; GFX1250-NEXT: v_readlane_b32 s2, v2, 0 |
| ; GFX1250-NEXT: v_readlane_b32 s3, v2, 1 |
| ; GFX1250-NEXT: v_mov_b32_e32 v0, 0 |
| ; GFX1250-NEXT: v_mov_b32_e32 v1, s0 |
| ; GFX1250-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-NEXT: s_wait_storecnt 0x0 |
| ; GFX1250-NEXT: global_wb scope:SCOPE_SYS |
| ; GFX1250-NEXT: s_wait_storecnt 0x0 |
| ; GFX1250-NEXT: s_wait_xcnt 0x0 |
| ; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX1250-NEXT: global_atomic_add_u32 v0, v1, s[2:3] scope:SCOPE_SYS |
| ; GFX1250-NEXT: s_wait_storecnt 0x0 |
| ; GFX1250-NEXT: global_inv scope:SCOPE_SYS |
| ; GFX1250-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-NEXT: v_mov_b32_e32 v1, s0 |
| ; GFX1250-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-NEXT: s_wait_storecnt 0x0 |
| ; GFX1250-NEXT: global_wb scope:SCOPE_SYS |
| ; GFX1250-NEXT: s_wait_storecnt 0x0 |
| ; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX1250-NEXT: global_atomic_add_u32 v0, v1, s[2:3] offset:4 scope:SCOPE_SYS |
| ; GFX1250-NEXT: s_wait_storecnt 0x0 |
| ; GFX1250-NEXT: global_inv scope:SCOPE_SYS |
| ; GFX1250-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-NEXT: s_mov_b32 s2, 1 |
| ; GFX1250-NEXT: s_add_co_i32 s0, s0, s2 |
| ; GFX1250-NEXT: s_cmp_lt_u32 s0, s1 |
| ; GFX1250-NEXT: v_writelane_b32 v2, s0, 3 |
| ; GFX1250-NEXT: s_mov_b32 s6, exec_lo |
| ; GFX1250-NEXT: s_mov_b32 exec_lo, -1 |
| ; GFX1250-NEXT: scratch_store_b32 off, v2, off ; 4-byte Folded Spill |
| ; GFX1250-NEXT: s_wait_xcnt 0x0 |
| ; GFX1250-NEXT: s_mov_b32 exec_lo, s6 |
| ; GFX1250-NEXT: s_cbranch_scc1 .LBB17_1 |
| ; GFX1250-NEXT: ; %bb.2: ; %exit |
| ; GFX1250-NEXT: s_endpgm |
| entry: |
| br label %loop |
| |
| loop: |
| %i = phi i32 [ 0, %entry ], [ %i.next, %loop ] |
| %gep1 = getelementptr i32, ptr addrspace(1) %ptr, i64 0 |
| %gep2 = getelementptr i32, ptr addrspace(1) %ptr, i64 1 |
| %val1 = atomicrmw add ptr addrspace(1) %gep1, i32 %i seq_cst |
| %val2 = atomicrmw add ptr addrspace(1) %gep2, i32 %i seq_cst |
| %i.next = add i32 %i, 1 |
| %cmp = icmp ult i32 %i.next, %n |
| br i1 %cmp, label %loop, label %exit |
| |
| exit: |
| ret void |
| } |
| |
| ; Test atomic RMW block with branch in between - state reset at new block. |
| define amdgpu_kernel void @atomic_rmw_with_branch(ptr addrspace(1) %ptr, i32 %cond) { |
| ; GFX1250-LABEL: atomic_rmw_with_branch: |
| ; GFX1250: ; %bb.0: ; %entry |
| ; GFX1250-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1 |
| ; GFX1250-NEXT: s_load_b64 s[2:3], s[4:5], 0x0 |
| ; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x8 |
| ; GFX1250-NEXT: s_wait_kmcnt 0x0 |
| ; GFX1250-NEXT: s_mov_b64 s[4:5], s[2:3] |
| ; GFX1250-NEXT: ; implicit-def: $vgpr2 : SGPR spill to VGPR lane |
| ; GFX1250-NEXT: v_writelane_b32 v2, s4, 0 |
| ; GFX1250-NEXT: v_writelane_b32 v2, s5, 1 |
| ; GFX1250-NEXT: v_mov_b32_e32 v0, 0 |
| ; GFX1250-NEXT: v_mov_b32_e32 v1, 1 |
| ; GFX1250-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-NEXT: s_wait_storecnt 0x0 |
| ; GFX1250-NEXT: global_wb scope:SCOPE_SYS |
| ; GFX1250-NEXT: s_wait_storecnt 0x0 |
| ; GFX1250-NEXT: s_wait_xcnt 0x0 |
| ; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX1250-NEXT: global_atomic_add_u32 v0, v1, s[2:3] scope:SCOPE_SYS |
| ; GFX1250-NEXT: s_wait_storecnt 0x0 |
| ; GFX1250-NEXT: global_inv scope:SCOPE_SYS |
| ; GFX1250-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-NEXT: v_mov_b32_e32 v1, 2 |
| ; GFX1250-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-NEXT: s_wait_storecnt 0x0 |
| ; GFX1250-NEXT: global_wb scope:SCOPE_SYS |
| ; GFX1250-NEXT: s_wait_storecnt 0x0 |
| ; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX1250-NEXT: global_atomic_add_u32 v0, v1, s[2:3] offset:4 scope:SCOPE_SYS |
| ; GFX1250-NEXT: s_wait_storecnt 0x0 |
| ; GFX1250-NEXT: global_inv scope:SCOPE_SYS |
| ; GFX1250-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-NEXT: s_mov_b32 s0, -1 |
| ; GFX1250-NEXT: s_mov_b32 s2, 0 |
| ; GFX1250-NEXT: s_cmp_lg_u32 s1, s2 |
| ; GFX1250-NEXT: v_writelane_b32 v2, s0, 2 |
| ; GFX1250-NEXT: s_mov_b32 s6, exec_lo |
| ; GFX1250-NEXT: s_mov_b32 exec_lo, -1 |
| ; GFX1250-NEXT: scratch_store_b32 off, v2, off ; 4-byte Folded Spill |
| ; GFX1250-NEXT: s_wait_xcnt 0x0 |
| ; GFX1250-NEXT: s_mov_b32 exec_lo, s6 |
| ; GFX1250-NEXT: s_cbranch_scc1 .LBB18_3 |
| ; GFX1250-NEXT: .LBB18_1: ; %Flow |
| ; GFX1250-NEXT: s_or_saveexec_b32 s6, -1 |
| ; GFX1250-NEXT: scratch_load_b32 v2, off, off ; 4-byte Folded Reload |
| ; GFX1250-NEXT: s_wait_xcnt 0x0 |
| ; GFX1250-NEXT: s_mov_b32 exec_lo, s6 |
| ; GFX1250-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-NEXT: v_readlane_b32 s0, v2, 2 |
| ; GFX1250-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 |
| ; GFX1250-NEXT: s_mov_b32 s0, 1 |
| ; GFX1250-NEXT: v_cmp_ne_u32_e64 s0, v0, s0 |
| ; GFX1250-NEXT: s_and_b32 vcc_lo, exec_lo, s0 |
| ; GFX1250-NEXT: s_cbranch_vccnz .LBB18_4 |
| ; GFX1250-NEXT: ; %bb.2: ; %bb1 |
| ; GFX1250-NEXT: s_or_saveexec_b32 s6, -1 |
| ; GFX1250-NEXT: scratch_load_b32 v2, off, off ; 4-byte Folded Reload |
| ; GFX1250-NEXT: s_wait_xcnt 0x0 |
| ; GFX1250-NEXT: s_mov_b32 exec_lo, s6 |
| ; GFX1250-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-NEXT: v_readlane_b32 s0, v2, 0 |
| ; GFX1250-NEXT: v_readlane_b32 s1, v2, 1 |
| ; GFX1250-NEXT: v_mov_b32_e32 v0, 0 |
| ; GFX1250-NEXT: v_mov_b32_e32 v1, 3 |
| ; GFX1250-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-NEXT: s_wait_storecnt 0x0 |
| ; GFX1250-NEXT: global_wb scope:SCOPE_SYS |
| ; GFX1250-NEXT: s_wait_storecnt 0x0 |
| ; GFX1250-NEXT: s_wait_xcnt 0x0 |
| ; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX1250-NEXT: global_atomic_add_u32 v0, v1, s[0:1] offset:8 scope:SCOPE_SYS |
| ; GFX1250-NEXT: s_wait_storecnt 0x0 |
| ; GFX1250-NEXT: global_inv scope:SCOPE_SYS |
| ; GFX1250-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-NEXT: s_branch .LBB18_4 |
| ; GFX1250-NEXT: .LBB18_3: ; %bb2 |
| ; GFX1250-NEXT: s_or_saveexec_b32 s6, -1 |
| ; GFX1250-NEXT: scratch_load_b32 v2, off, off ; 4-byte Folded Reload |
| ; GFX1250-NEXT: s_wait_xcnt 0x0 |
| ; GFX1250-NEXT: s_mov_b32 exec_lo, s6 |
| ; GFX1250-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-NEXT: v_readlane_b32 s0, v2, 0 |
| ; GFX1250-NEXT: v_readlane_b32 s1, v2, 1 |
| ; GFX1250-NEXT: v_mov_b32_e32 v0, 0 |
| ; GFX1250-NEXT: v_mov_b32_e32 v1, 4 |
| ; GFX1250-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-NEXT: s_wait_storecnt 0x0 |
| ; GFX1250-NEXT: global_wb scope:SCOPE_SYS |
| ; GFX1250-NEXT: s_wait_storecnt 0x0 |
| ; GFX1250-NEXT: s_wait_xcnt 0x0 |
| ; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX1250-NEXT: global_atomic_add_u32 v0, v1, s[0:1] offset:12 scope:SCOPE_SYS |
| ; GFX1250-NEXT: s_wait_storecnt 0x0 |
| ; GFX1250-NEXT: global_inv scope:SCOPE_SYS |
| ; GFX1250-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-NEXT: s_mov_b32 s0, 0 |
| ; GFX1250-NEXT: v_writelane_b32 v2, s0, 2 |
| ; GFX1250-NEXT: s_or_saveexec_b32 s6, -1 |
| ; GFX1250-NEXT: scratch_store_b32 off, v2, off ; 4-byte Folded Spill |
| ; GFX1250-NEXT: s_wait_xcnt 0x0 |
| ; GFX1250-NEXT: s_mov_b32 exec_lo, s6 |
| ; GFX1250-NEXT: s_branch .LBB18_1 |
| ; GFX1250-NEXT: .LBB18_4: ; %merge |
| ; GFX1250-NEXT: s_or_saveexec_b32 s6, -1 |
| ; GFX1250-NEXT: scratch_load_b32 v2, off, off ; 4-byte Folded Reload |
| ; GFX1250-NEXT: s_wait_xcnt 0x0 |
| ; GFX1250-NEXT: s_mov_b32 exec_lo, s6 |
| ; GFX1250-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-NEXT: v_readlane_b32 s0, v2, 0 |
| ; GFX1250-NEXT: v_readlane_b32 s1, v2, 1 |
| ; GFX1250-NEXT: v_mov_b32_e32 v0, 0 |
| ; GFX1250-NEXT: v_mov_b32_e32 v1, 5 |
| ; GFX1250-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-NEXT: s_wait_storecnt 0x0 |
| ; GFX1250-NEXT: global_wb scope:SCOPE_SYS |
| ; GFX1250-NEXT: s_wait_storecnt 0x0 |
| ; GFX1250-NEXT: s_wait_xcnt 0x0 |
| ; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX1250-NEXT: global_atomic_add_u32 v0, v1, s[0:1] offset:16 scope:SCOPE_SYS |
| ; GFX1250-NEXT: s_wait_storecnt 0x0 |
| ; GFX1250-NEXT: global_inv scope:SCOPE_SYS |
| ; GFX1250-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-NEXT: s_endpgm |
| entry: |
| %gep1 = getelementptr i32, ptr addrspace(1) %ptr, i64 0 |
| %gep2 = getelementptr i32, ptr addrspace(1) %ptr, i64 1 |
| %val1 = atomicrmw add ptr addrspace(1) %gep1, i32 1 seq_cst |
| %val2 = atomicrmw add ptr addrspace(1) %gep2, i32 2 seq_cst |
| %cmp = icmp eq i32 %cond, 0 |
| br i1 %cmp, label %bb1, label %bb2 |
| |
| bb1: |
| %gep3 = getelementptr i32, ptr addrspace(1) %ptr, i64 2 |
| %val3 = atomicrmw add ptr addrspace(1) %gep3, i32 3 seq_cst |
| br label %merge |
| |
| bb2: |
| %gep4 = getelementptr i32, ptr addrspace(1) %ptr, i64 3 |
| %val4 = atomicrmw add ptr addrspace(1) %gep4, i32 4 seq_cst |
| br label %merge |
| |
| merge: |
| %gep5 = getelementptr i32, ptr addrspace(1) %ptr, i64 4 |
| %val5 = atomicrmw add ptr addrspace(1) %gep5, i32 5 seq_cst |
| ret void |
| } |
| |
| ; Test fall-through block. |
| define amdgpu_kernel void @atomic_rmw_fallthrough(ptr addrspace(1) %ptr) { |
| ; GFX1250-LABEL: atomic_rmw_fallthrough: |
| ; GFX1250: ; %bb.0: ; %entry |
| ; GFX1250-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1 |
| ; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 |
| ; GFX1250-NEXT: s_wait_kmcnt 0x0 |
| ; GFX1250-NEXT: s_mov_b64 s[2:3], s[0:1] |
| ; GFX1250-NEXT: ; implicit-def: $vgpr2 : SGPR spill to VGPR lane |
| ; GFX1250-NEXT: v_writelane_b32 v2, s2, 0 |
| ; GFX1250-NEXT: v_writelane_b32 v2, s3, 1 |
| ; GFX1250-NEXT: s_or_saveexec_b32 s6, -1 |
| ; GFX1250-NEXT: scratch_store_b32 off, v2, off ; 4-byte Folded Spill |
| ; GFX1250-NEXT: s_wait_xcnt 0x0 |
| ; GFX1250-NEXT: s_mov_b32 exec_lo, s6 |
| ; GFX1250-NEXT: v_mov_b32_e32 v0, 0 |
| ; GFX1250-NEXT: v_mov_b32_e32 v1, 1 |
| ; GFX1250-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-NEXT: s_wait_storecnt 0x0 |
| ; GFX1250-NEXT: global_wb scope:SCOPE_SYS |
| ; GFX1250-NEXT: s_wait_storecnt 0x0 |
| ; GFX1250-NEXT: s_wait_xcnt 0x0 |
| ; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX1250-NEXT: global_atomic_add_u32 v0, v1, s[0:1] scope:SCOPE_SYS |
| ; GFX1250-NEXT: s_wait_storecnt 0x0 |
| ; GFX1250-NEXT: global_inv scope:SCOPE_SYS |
| ; GFX1250-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-NEXT: v_mov_b32_e32 v1, 2 |
| ; GFX1250-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-NEXT: s_wait_storecnt 0x0 |
| ; GFX1250-NEXT: global_wb scope:SCOPE_SYS |
| ; GFX1250-NEXT: s_wait_storecnt 0x0 |
| ; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX1250-NEXT: global_atomic_add_u32 v0, v1, s[0:1] offset:4 scope:SCOPE_SYS |
| ; GFX1250-NEXT: s_wait_storecnt 0x0 |
| ; GFX1250-NEXT: global_inv scope:SCOPE_SYS |
| ; GFX1250-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-NEXT: ; %bb.1: ; %next |
| ; GFX1250-NEXT: s_or_saveexec_b32 s6, -1 |
| ; GFX1250-NEXT: scratch_load_b32 v2, off, off ; 4-byte Folded Reload |
| ; GFX1250-NEXT: s_wait_xcnt 0x0 |
| ; GFX1250-NEXT: s_mov_b32 exec_lo, s6 |
| ; GFX1250-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-NEXT: v_readlane_b32 s0, v2, 0 |
| ; GFX1250-NEXT: v_readlane_b32 s1, v2, 1 |
| ; GFX1250-NEXT: v_mov_b32_e32 v0, 0 |
| ; GFX1250-NEXT: v_mov_b32_e32 v1, 3 |
| ; GFX1250-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-NEXT: s_wait_storecnt 0x0 |
| ; GFX1250-NEXT: global_wb scope:SCOPE_SYS |
| ; GFX1250-NEXT: s_wait_storecnt 0x0 |
| ; GFX1250-NEXT: s_wait_xcnt 0x0 |
| ; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX1250-NEXT: global_atomic_add_u32 v0, v1, s[0:1] offset:8 scope:SCOPE_SYS |
| ; GFX1250-NEXT: s_wait_storecnt 0x0 |
| ; GFX1250-NEXT: global_inv scope:SCOPE_SYS |
| ; GFX1250-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-NEXT: v_mov_b32_e32 v1, 4 |
| ; GFX1250-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-NEXT: s_wait_storecnt 0x0 |
| ; GFX1250-NEXT: global_wb scope:SCOPE_SYS |
| ; GFX1250-NEXT: s_wait_storecnt 0x0 |
| ; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX1250-NEXT: global_atomic_add_u32 v0, v1, s[0:1] offset:12 scope:SCOPE_SYS |
| ; GFX1250-NEXT: s_wait_storecnt 0x0 |
| ; GFX1250-NEXT: global_inv scope:SCOPE_SYS |
| ; GFX1250-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-NEXT: s_endpgm |
| entry: |
| %gep1 = getelementptr i32, ptr addrspace(1) %ptr, i64 0 |
| %gep2 = getelementptr i32, ptr addrspace(1) %ptr, i64 1 |
| %val1 = atomicrmw add ptr addrspace(1) %gep1, i32 1 seq_cst |
| %val2 = atomicrmw add ptr addrspace(1) %gep2, i32 2 seq_cst |
| br label %next |
| |
| next: |
| %gep3 = getelementptr i32, ptr addrspace(1) %ptr, i64 2 |
| %gep4 = getelementptr i32, ptr addrspace(1) %ptr, i64 3 |
| %val3 = atomicrmw add ptr addrspace(1) %gep3, i32 3 seq_cst |
| %val4 = atomicrmw add ptr addrspace(1) %gep4, i32 4 seq_cst |
| ret void |
| } |