blob: 55159634eb4e52da0a74d56afa64efb5d6c1e319 [file] [log] [blame] [edit]
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx950 < %s | FileCheck %s
; Test that DAGCombiner::reassociationCanBreakAddressingModePattern does not
; crash when a MemSDNode user has multiple memory operands (e.g.
; buffer_load_lds which reads from a buffer and writes to LDS).
@global_smem = external addrspace(3) global [0 x i8], align 16
declare ptr addrspace(8) @llvm.amdgcn.make.buffer.rsrc.p8.p1(ptr addrspace(1), i16, i64, i32)
declare void @llvm.amdgcn.raw.ptr.buffer.load.lds(ptr addrspace(8), ptr addrspace(3) nocapture, i32, i32, i32, i32, i32)
declare i32 @llvm.amdgcn.workitem.id.x()
define amdgpu_kernel void @buffer_load_lds_reassociate_offsets(ptr addrspace(1) inreg %ptr) {
; CHECK-LABEL: buffer_load_lds_reassociate_offsets:
; CHECK: ; %bb.1:
; CHECK-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x0
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
; CHECK-NEXT: s_branch .LBB0_0
; CHECK-NEXT: .p2align 8
; CHECK-NEXT: ; %bb.2:
; CHECK-NEXT: .LBB0_0:
; CHECK-NEXT: v_and_b32_e32 v0, 0x3ff, v0
; CHECK-NEXT: v_mul_u32_u24_e32 v0, 0x600, v0
; CHECK-NEXT: v_lshlrev_b32_e32 v0, 1, v0
; CHECK-NEXT: v_add_u32_e32 v1, 0x840, v0
; CHECK-NEXT: s_and_b32 s9, s9, 0xffff
; CHECK-NEXT: s_mov_b32 s11, 0x27000
; CHECK-NEXT: s_mov_b32 s10, 0x7ffffffe
; CHECK-NEXT: s_mov_b32 m0, 0
; CHECK-NEXT: v_add_u32_e32 v0, 0x842, v0
; CHECK-NEXT: buffer_load_dwordx4 v1, s[8:11], 0 offen lds
; CHECK-NEXT: s_add_i32 m0, 0, 0x420
; CHECK-NEXT: s_nop 0
; CHECK-NEXT: buffer_load_dwordx4 v0, s[8:11], 0 offen lds
; CHECK-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
; Create a pattern that will be reassociated: (add (add base, 1024), 32)
; where base comes from mul, creating nested adds
%base = mul i32 %tid, 1536
%add1 = add i32 %base, 1024
%offset1 = add i32 %add1, 32
%offset2 = add i32 %add1, 33
%shl1 = shl i32 %offset1, 1
%shl2 = shl i32 %offset2, 1
%rsrc = call ptr addrspace(8) @llvm.amdgcn.make.buffer.rsrc.p8.p1(ptr addrspace(1) %ptr, i16 0, i64 2147483646, i32 159744)
%lds0 = getelementptr inbounds i8, ptr addrspace(3) @global_smem, i32 0
%lds1 = getelementptr inbounds i8, ptr addrspace(3) @global_smem, i32 1056
call void @llvm.amdgcn.raw.ptr.buffer.load.lds(ptr addrspace(8) %rsrc, ptr addrspace(3) %lds0, i32 16, i32 %shl1, i32 0, i32 0, i32 0)
call void @llvm.amdgcn.raw.ptr.buffer.load.lds(ptr addrspace(8) %rsrc, ptr addrspace(3) %lds1, i32 16, i32 %shl2, i32 0, i32 0, i32 0)
ret void
}