blob: 56409999f1b95b8020cc12c802efc781c77a07ba [file] [log] [blame]
Matt Arsenault888a20c42024-01-10 00:12:40 +07001; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-attributes --check-globals --version 3
2; RUN: opt -S -mtriple=amdgcn-- -passes=amdgpu-attributor,amdgpu-lower-module-lds < %s --amdgpu-lower-module-lds-strategy=table | FileCheck -check-prefixes=CHECK,TABLE %s
3
4; FIXME: Work around update_test_checks bug in constant expression handling by manually deleting part of the last global pattern
5
6@function.lds = addrspace(3) global i16 poison
7@other.kernel.lds = addrspace(3) global i16 poison
8@recursive.kernel.lds = addrspace(3) global i16 poison
9
10;.
Shilei Tianf32f0282024-09-02 12:23:26 -040011; CHECK: @llvm.amdgcn.kernel.k0_f0.lds = internal addrspace(3) global %llvm.amdgcn.kernel.k0_f0.lds.t poison, align 2, !absolute_symbol [[META0:![0-9]+]]
12; CHECK: @llvm.amdgcn.kernel.k1_f0.lds = internal addrspace(3) global %llvm.amdgcn.kernel.k1_f0.lds.t poison, align 2, !absolute_symbol [[META0]]
13; CHECK: @llvm.amdgcn.kernel.kernel_lds.lds = internal addrspace(3) global %llvm.amdgcn.kernel.kernel_lds.lds.t poison, align 2, !absolute_symbol [[META0]]
14; CHECK: @llvm.amdgcn.kernel.kernel_lds_recursion.lds = internal addrspace(3) global %llvm.amdgcn.kernel.kernel_lds_recursion.lds.t poison, align 2, !absolute_symbol [[META0]]
Shilei Tiancb949b742024-09-02 12:33:24 -040015; CHECK: @llvm.amdgcn.lds.offset.table = internal addrspace(4) constant [3 x [2 x i32]]
Matt Arsenault888a20c42024-01-10 00:12:40 +070016;.
17define internal void @lds_use_through_indirect() {
18; CHECK-LABEL: define internal void @lds_use_through_indirect(
19; CHECK-SAME: ) #[[ATTR0:[0-9]+]] {
20; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.amdgcn.lds.kernel.id()
21; CHECK-NEXT: [[FUNCTION_LDS2:%.*]] = getelementptr inbounds [3 x [2 x i32]], ptr addrspace(4) @llvm.amdgcn.lds.offset.table, i32 0, i32 [[TMP1]], i32 0
22; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(4) [[FUNCTION_LDS2]], align 4
23; CHECK-NEXT: [[FUNCTION_LDS3:%.*]] = inttoptr i32 [[TMP2]] to ptr addrspace(3)
24; CHECK-NEXT: [[LD:%.*]] = load i16, ptr addrspace(3) [[FUNCTION_LDS3]], align 2
25; CHECK-NEXT: [[MUL:%.*]] = mul i16 [[LD]], 7
26; CHECK-NEXT: [[FUNCTION_LDS:%.*]] = getelementptr inbounds [3 x [2 x i32]], ptr addrspace(4) @llvm.amdgcn.lds.offset.table, i32 0, i32 [[TMP1]], i32 0
27; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(4) [[FUNCTION_LDS]], align 4
28; CHECK-NEXT: [[FUNCTION_LDS1:%.*]] = inttoptr i32 [[TMP3]] to ptr addrspace(3)
29; CHECK-NEXT: store i16 [[MUL]], ptr addrspace(3) [[FUNCTION_LDS1]], align 2
30; CHECK-NEXT: ret void
31;
32 %ld = load i16, ptr addrspace(3) @function.lds
33 %mul = mul i16 %ld, 7
34 store i16 %mul, ptr addrspace(3) @function.lds
35 ret void
36}
37
38define internal void @indirectly_called() {
39; CHECK-LABEL: define internal void @indirectly_called(
40; CHECK-SAME: ) #[[ATTR0]] {
41; CHECK-NEXT: store volatile ptr @indirectly_called, ptr addrspace(1) null, align 8
42; CHECK-NEXT: call void @lds_use_through_indirect()
43; CHECK-NEXT: ret void
44;
45 store volatile ptr @indirectly_called, ptr addrspace(1) null
46 call void @lds_use_through_indirect()
47 ret void
48}
49
50define internal void @calls_indirectly_called() {
51; CHECK-LABEL: define internal void @calls_indirectly_called(
52; CHECK-SAME: ) #[[ATTR0]] {
53; CHECK-NEXT: call void @indirectly_called()
54; CHECK-NEXT: ret void
55;
56 call void @indirectly_called()
57 ret void
58}
59
60; TODO: Should still have "amdgpu-no-lds-kernel-id" attached
61define internal void @no_lds_global_use_leaf() {
62; CHECK-LABEL: define internal void @no_lds_global_use_leaf(
63; CHECK-SAME: ) #[[ATTR1:[0-9]+]] {
64; CHECK-NEXT: ret void
65;
66 ret void
67}
68
69; Should have "amdgpu-no-lds-kernel-id" stripped
70define internal void @f0() {
71; CHECK-LABEL: define internal void @f0(
72; CHECK-SAME: ) #[[ATTR0]] {
73; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.amdgcn.lds.kernel.id()
74; CHECK-NEXT: [[FUNCTION_LDS2:%.*]] = getelementptr inbounds [3 x [2 x i32]], ptr addrspace(4) @llvm.amdgcn.lds.offset.table, i32 0, i32 [[TMP1]], i32 0
75; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(4) [[FUNCTION_LDS2]], align 4
76; CHECK-NEXT: [[FUNCTION_LDS3:%.*]] = inttoptr i32 [[TMP2]] to ptr addrspace(3)
77; CHECK-NEXT: [[LD:%.*]] = load i16, ptr addrspace(3) [[FUNCTION_LDS3]], align 2
78; CHECK-NEXT: [[MUL:%.*]] = mul i16 [[LD]], 4
79; CHECK-NEXT: [[FUNCTION_LDS:%.*]] = getelementptr inbounds [3 x [2 x i32]], ptr addrspace(4) @llvm.amdgcn.lds.offset.table, i32 0, i32 [[TMP1]], i32 0
80; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(4) [[FUNCTION_LDS]], align 4
81; CHECK-NEXT: [[FUNCTION_LDS1:%.*]] = inttoptr i32 [[TMP3]] to ptr addrspace(3)
82; CHECK-NEXT: store i16 [[MUL]], ptr addrspace(3) [[FUNCTION_LDS1]], align 2
83; CHECK-NEXT: call void @no_lds_global_use_leaf()
84; CHECK-NEXT: ret void
85;
86 %ld = load i16, ptr addrspace(3) @function.lds
87 %mul = mul i16 %ld, 4
88 store i16 %mul, ptr addrspace(3) @function.lds
89 call void @no_lds_global_use_leaf()
90 ret void
91}
92
93; Should have "amdgpu-no-lds-kernel-id" stripped
94define internal void @f0_transitive() {
95; CHECK-LABEL: define internal void @f0_transitive(
96; CHECK-SAME: ) #[[ATTR0]] {
97; CHECK-NEXT: call void @f0()
98; CHECK-NEXT: call void @no_lds_global_use_leaf()
99; CHECK-NEXT: ret void
100;
101 call void @f0()
102 call void @no_lds_global_use_leaf()
103 ret void
104}
105
106define amdgpu_kernel void @k0_f0() {
107; CHECK-LABEL: define amdgpu_kernel void @k0_f0(
Shilei Tianf32f0282024-09-02 12:23:26 -0400108; CHECK-SAME: ) #[[ATTR2:[0-9]+]] !llvm.amdgcn.lds.kernel.id [[META2:![0-9]+]] {
Matt Arsenault888a20c42024-01-10 00:12:40 +0700109; CHECK-NEXT: call void @llvm.donothing() [ "ExplicitUse"(ptr addrspace(3) @llvm.amdgcn.kernel.k0_f0.lds) ]
110; CHECK-NEXT: call void @f0_transitive()
111; CHECK-NEXT: ret void
112;
113 call void @f0_transitive()
114 ret void
115}
116
117define amdgpu_kernel void @k1_f0() {
118; CHECK-LABEL: define amdgpu_kernel void @k1_f0(
Shilei Tianf32f0282024-09-02 12:23:26 -0400119; CHECK-SAME: ) #[[ATTR3:[0-9]+]] !llvm.amdgcn.lds.kernel.id [[META3:![0-9]+]] {
120; CHECK-NEXT: call void @llvm.donothing() [ "ExplicitUse"(ptr addrspace(3) @llvm.amdgcn.kernel.k1_f0.lds) ], !alias.scope [[META4:![0-9]+]], !noalias [[META7:![0-9]+]]
Matt Arsenault888a20c42024-01-10 00:12:40 +0700121; CHECK-NEXT: call void @f0_transitive()
122; CHECK-NEXT: [[FPTR:%.*]] = load volatile ptr, ptr addrspace(1) null, align 8
123; CHECK-NEXT: call void [[FPTR]]()
124; CHECK-NEXT: call void @calls_indirectly_called()
125; CHECK-NEXT: ret void
126;
127 call void @f0_transitive()
128 %fptr = load volatile ptr, ptr addrspace(1) null
129 call void %fptr()
130 call void @calls_indirectly_called()
131 ret void
132}
133
134; Should still have "amdgpu-no-lds-kernel-id" attached
135define amdgpu_kernel void @kernel_lds() {
136; CHECK-LABEL: define amdgpu_kernel void @kernel_lds(
137; CHECK-SAME: ) #[[ATTR4:[0-9]+]] {
138; CHECK-NEXT: [[LD:%.*]] = load i16, ptr addrspace(3) @llvm.amdgcn.kernel.kernel_lds.lds, align 2
139; CHECK-NEXT: [[MUL:%.*]] = mul i16 [[LD]], 42
140; CHECK-NEXT: store i16 [[MUL]], ptr addrspace(3) @llvm.amdgcn.kernel.kernel_lds.lds, align 2
141; CHECK-NEXT: ret void
142;
143 %ld = load i16, ptr addrspace(3) @other.kernel.lds
144 %mul = mul i16 %ld, 42
145 store i16 %mul, ptr addrspace(3) @other.kernel.lds
146 ret void
147}
148
149define internal i16 @mutual_recursion_0(i16 %arg) {
150; CHECK-LABEL: define internal i16 @mutual_recursion_0(
151; CHECK-SAME: i16 [[ARG:%.*]]) #[[ATTR0]] {
152; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.amdgcn.lds.kernel.id()
153; CHECK-NEXT: [[RECURSIVE_KERNEL_LDS:%.*]] = getelementptr inbounds [3 x [2 x i32]], ptr addrspace(4) @llvm.amdgcn.lds.offset.table, i32 0, i32 [[TMP1]], i32 1
154; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(4) [[RECURSIVE_KERNEL_LDS]], align 4
155; CHECK-NEXT: [[RECURSIVE_KERNEL_LDS1:%.*]] = inttoptr i32 [[TMP2]] to ptr addrspace(3)
156; CHECK-NEXT: [[LD:%.*]] = load i16, ptr addrspace(3) [[RECURSIVE_KERNEL_LDS1]], align 2
157; CHECK-NEXT: [[MUL:%.*]] = mul i16 [[LD]], 7
158; CHECK-NEXT: [[RET:%.*]] = call i16 @mutual_recursion_1(i16 [[LD]])
159; CHECK-NEXT: [[ADD:%.*]] = add i16 [[RET]], 1
160; CHECK-NEXT: ret i16 [[ADD]]
161;
162 %ld = load i16, ptr addrspace(3) @recursive.kernel.lds
163 %mul = mul i16 %ld, 7
164 %ret = call i16 @mutual_recursion_1(i16 %ld)
165 %add = add i16 %ret, 1
166 ret i16 %add
167}
168
169define internal void @mutual_recursion_1(i16 %arg) {
170; CHECK-LABEL: define internal void @mutual_recursion_1(
171; CHECK-SAME: i16 [[ARG:%.*]]) #[[ATTR0]] {
172; CHECK-NEXT: call void @mutual_recursion_0(i16 [[ARG]])
173; CHECK-NEXT: ret void
174;
175 call void @mutual_recursion_0(i16 %arg)
176 ret void
177}
178
179define amdgpu_kernel void @kernel_lds_recursion() {
180; CHECK-LABEL: define amdgpu_kernel void @kernel_lds_recursion(
Shilei Tianf4037272024-12-11 16:50:06 -0500181; CHECK-SAME: ) #[[ATTR5:[0-9]+]] !llvm.amdgcn.lds.kernel.id [[META9:![0-9]+]] {
Matt Arsenault888a20c42024-01-10 00:12:40 +0700182; CHECK-NEXT: call void @llvm.donothing() [ "ExplicitUse"(ptr addrspace(3) @llvm.amdgcn.kernel.kernel_lds_recursion.lds) ]
183; CHECK-NEXT: call void @mutual_recursion_0(i16 0)
184; CHECK-NEXT: ret void
185;
186 call void @mutual_recursion_0(i16 0)
187 ret void
188}
189
Emma Pilkington4897b982024-01-22 11:22:57 -0500190!llvm.module.flags = !{!1}
Emma Pilkington44900032024-03-06 09:51:48 -0500191!1 = !{i32 1, !"amdhsa_code_object_version", i32 400}
Emma Pilkington4897b982024-01-22 11:22:57 -0500192
Matt Arsenault888a20c42024-01-10 00:12:40 +0700193;.
Matt Arsenaulta2163582025-03-06 09:17:51 +0700194; CHECK: attributes #[[ATTR0]] = { "amdgpu-agpr-alloc"="0" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
195; CHECK: attributes #[[ATTR1]] = { "amdgpu-agpr-alloc"="0" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
196; CHECK: attributes #[[ATTR2]] = { "amdgpu-agpr-alloc"="0" "amdgpu-lds-size"="2" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
Shilei Tianf4037272024-12-11 16:50:06 -0500197; CHECK: attributes #[[ATTR3]] = { "amdgpu-lds-size"="4" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" }
Matt Arsenaulta2163582025-03-06 09:17:51 +0700198; CHECK: attributes #[[ATTR4]] = { "amdgpu-agpr-alloc"="0" "amdgpu-lds-size"="2" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
199; CHECK: attributes #[[ATTR5]] = { "amdgpu-agpr-alloc"="0" "amdgpu-lds-size"="4" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" }
Shilei Tianf4037272024-12-11 16:50:06 -0500200; CHECK: attributes #[[ATTR6:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(none) }
201; CHECK: attributes #[[ATTR7:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
Matt Arsenault888a20c42024-01-10 00:12:40 +0700202;.
Shilei Tianf32f0282024-09-02 12:23:26 -0400203; CHECK: [[META0]] = !{i32 0, i32 1}
204; CHECK: [[META1:![0-9]+]] = !{i32 1, !"amdhsa_code_object_version", i32 400}
205; CHECK: [[META2]] = !{i32 0}
206; CHECK: [[META3]] = !{i32 1}
207; CHECK: [[META4]] = !{[[META5:![0-9]+]]}
208; CHECK: [[META5]] = distinct !{[[META5]], [[META6:![0-9]+]]}
209; CHECK: [[META6]] = distinct !{[[META6]]}
210; CHECK: [[META7]] = !{[[META8:![0-9]+]]}
211; CHECK: [[META8]] = distinct !{[[META8]], [[META6]]}
212; CHECK: [[META9]] = !{i32 2}
Matt Arsenault888a20c42024-01-10 00:12:40 +0700213;.
214;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
215; TABLE: {{.*}}