blob: afa4e94222cd9f6fb6f70b48c81318989744dbcb [file] [log] [blame]
Matt Arsenault327bb5a2016-07-01 22:47:50 +00001; RUN: llc -march=amdgcn -verify-machineinstrs -mattr=+load-store-opt -enable-misched < %s | FileCheck %s
Mandeep Singh Grang029a0562016-04-19 23:51:52 +00002; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs -mattr=+load-store-opt -enable-misched < %s | FileCheck %s
Tom Stellard3e01d472014-12-08 23:36:48 +00003
4; This test is for a bug in the machine scheduler where stores without
5; an underlying object would be moved across the barrier. In this
6; test, the <2 x i8> store will be split into two i8 stores, so they
7; won't have an underlying object.
8
9; CHECK-LABEL: {{^}}test:
10; CHECK: ds_write_b8
11; CHECK: ds_write_b8
12; CHECK: s_barrier
13; CHECK: s_endpgm
14; Function Attrs: nounwind
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000015define amdgpu_kernel void @test(<2 x i8> addrspace(3)* nocapture %arg, <2 x i8> addrspace(1)* nocapture readonly %arg1, i32 addrspace(1)* nocapture readonly %arg2, <2 x i8> addrspace(1)* nocapture %arg3, i32 %arg4, i64 %tmp9) #0 {
Tom Stellard3e01d472014-12-08 23:36:48 +000016bb:
David Blaikie79e6c742015-02-27 19:29:02 +000017 %tmp10 = getelementptr inbounds i32, i32 addrspace(1)* %arg2, i64 %tmp9
David Blaikiea79ac142015-02-27 21:17:42 +000018 %tmp13 = load i32, i32 addrspace(1)* %tmp10, align 2
David Blaikie79e6c742015-02-27 19:29:02 +000019 %tmp14 = getelementptr inbounds <2 x i8>, <2 x i8> addrspace(3)* %arg, i32 %tmp13
Matt Arsenault327bb5a2016-07-01 22:47:50 +000020 %tmp15 = load <2 x i8>, <2 x i8> addrspace(3)* %tmp14, align 1
Tom Stellard3e01d472014-12-08 23:36:48 +000021 %tmp16 = add i32 %tmp13, 1
David Blaikie79e6c742015-02-27 19:29:02 +000022 %tmp17 = getelementptr inbounds <2 x i8>, <2 x i8> addrspace(3)* %arg, i32 %tmp16
Matt Arsenault327bb5a2016-07-01 22:47:50 +000023 store <2 x i8> %tmp15, <2 x i8> addrspace(3)* %tmp17, align 1
Matt Arsenault9c47dd52016-02-11 06:02:01 +000024 tail call void @llvm.amdgcn.s.barrier()
David Blaikiea79ac142015-02-27 21:17:42 +000025 %tmp25 = load i32, i32 addrspace(1)* %tmp10, align 4
Tom Stellard3e01d472014-12-08 23:36:48 +000026 %tmp26 = sext i32 %tmp25 to i64
27 %tmp27 = sext i32 %arg4 to i64
David Blaikie79e6c742015-02-27 19:29:02 +000028 %tmp28 = getelementptr inbounds <2 x i8>, <2 x i8> addrspace(3)* %arg, i32 %tmp25, i32 %arg4
David Blaikiea79ac142015-02-27 21:17:42 +000029 %tmp29 = load i8, i8 addrspace(3)* %tmp28, align 1
David Blaikie79e6c742015-02-27 19:29:02 +000030 %tmp30 = getelementptr inbounds <2 x i8>, <2 x i8> addrspace(1)* %arg3, i64 %tmp26, i64 %tmp27
Tom Stellard3e01d472014-12-08 23:36:48 +000031 store i8 %tmp29, i8 addrspace(1)* %tmp30, align 1
David Blaikie79e6c742015-02-27 19:29:02 +000032 %tmp32 = getelementptr inbounds <2 x i8>, <2 x i8> addrspace(3)* %arg, i32 %tmp25, i32 0
David Blaikiea79ac142015-02-27 21:17:42 +000033 %tmp33 = load i8, i8 addrspace(3)* %tmp32, align 1
David Blaikie79e6c742015-02-27 19:29:02 +000034 %tmp35 = getelementptr inbounds <2 x i8>, <2 x i8> addrspace(1)* %arg3, i64 %tmp26, i64 0
Tom Stellard3e01d472014-12-08 23:36:48 +000035 store i8 %tmp33, i8 addrspace(1)* %tmp35, align 1
36 ret void
37}
38
Matt Arsenault2aed6ca2015-12-19 01:46:41 +000039; Function Attrs: convergent nounwind
Matt Arsenault9c47dd52016-02-11 06:02:01 +000040declare void @llvm.amdgcn.s.barrier() #1
Tom Stellard3e01d472014-12-08 23:36:48 +000041
Matt Arsenault9c47dd52016-02-11 06:02:01 +000042attributes #0 = { nounwind }
43attributes #1 = { convergent nounwind }