| ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py |
| ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx950 < %s | FileCheck %s |
| |
| ; Test that DAGCombiner::reassociationCanBreakAddressingModePattern does not |
| ; crash when a MemSDNode user has multiple memory operands (e.g. |
| ; buffer_load_lds which reads from a buffer and writes to LDS). |
| |
| @global_smem = external addrspace(3) global [0 x i8], align 16 |
| |
| declare ptr addrspace(8) @llvm.amdgcn.make.buffer.rsrc.p8.p1(ptr addrspace(1), i16, i64, i32) |
| declare void @llvm.amdgcn.raw.ptr.buffer.load.lds(ptr addrspace(8), ptr addrspace(3) nocapture, i32, i32, i32, i32, i32) |
| declare i32 @llvm.amdgcn.workitem.id.x() |
| |
| define amdgpu_kernel void @buffer_load_lds_reassociate_offsets(ptr addrspace(1) inreg %ptr) { |
| ; CHECK-LABEL: buffer_load_lds_reassociate_offsets: |
| ; CHECK: ; %bb.1: |
| ; CHECK-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x0 |
| ; CHECK-NEXT: s_waitcnt lgkmcnt(0) |
| ; CHECK-NEXT: s_branch .LBB0_0 |
| ; CHECK-NEXT: .p2align 8 |
| ; CHECK-NEXT: ; %bb.2: |
| ; CHECK-NEXT: .LBB0_0: |
| ; CHECK-NEXT: v_and_b32_e32 v0, 0x3ff, v0 |
| ; CHECK-NEXT: v_mul_u32_u24_e32 v0, 0x600, v0 |
| ; CHECK-NEXT: v_lshlrev_b32_e32 v0, 1, v0 |
| ; CHECK-NEXT: v_add_u32_e32 v1, 0x840, v0 |
| ; CHECK-NEXT: s_and_b32 s9, s9, 0xffff |
| ; CHECK-NEXT: s_mov_b32 s11, 0x27000 |
| ; CHECK-NEXT: s_mov_b32 s10, 0x7ffffffe |
| ; CHECK-NEXT: s_mov_b32 m0, 0 |
| ; CHECK-NEXT: v_add_u32_e32 v0, 0x842, v0 |
| ; CHECK-NEXT: buffer_load_dwordx4 v1, s[8:11], 0 offen lds |
| ; CHECK-NEXT: s_add_i32 m0, 0, 0x420 |
| ; CHECK-NEXT: s_nop 0 |
| ; CHECK-NEXT: buffer_load_dwordx4 v0, s[8:11], 0 offen lds |
| ; CHECK-NEXT: s_endpgm |
| %tid = call i32 @llvm.amdgcn.workitem.id.x() |
| ; Create a pattern that will be reassociated: (add (add base, 1024), 32) |
| ; where base comes from mul, creating nested adds |
| %base = mul i32 %tid, 1536 |
| %add1 = add i32 %base, 1024 |
| %offset1 = add i32 %add1, 32 |
| %offset2 = add i32 %add1, 33 |
| %shl1 = shl i32 %offset1, 1 |
| %shl2 = shl i32 %offset2, 1 |
| %rsrc = call ptr addrspace(8) @llvm.amdgcn.make.buffer.rsrc.p8.p1(ptr addrspace(1) %ptr, i16 0, i64 2147483646, i32 159744) |
| %lds0 = getelementptr inbounds i8, ptr addrspace(3) @global_smem, i32 0 |
| %lds1 = getelementptr inbounds i8, ptr addrspace(3) @global_smem, i32 1056 |
| call void @llvm.amdgcn.raw.ptr.buffer.load.lds(ptr addrspace(8) %rsrc, ptr addrspace(3) %lds0, i32 16, i32 %shl1, i32 0, i32 0, i32 0) |
| call void @llvm.amdgcn.raw.ptr.buffer.load.lds(ptr addrspace(8) %rsrc, ptr addrspace(3) %lds1, i32 16, i32 %shl2, i32 0, i32 0, i32 0) |
| ret void |
| } |