Matt Arsenault | 888a20c4 | 2024-01-10 00:12:40 +0700 | [diff] [blame] | 1 | ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-attributes --check-globals --version 3 |
| 2 | ; RUN: opt -S -mtriple=amdgcn-- -passes=amdgpu-attributor,amdgpu-lower-module-lds < %s --amdgpu-lower-module-lds-strategy=table | FileCheck -check-prefixes=CHECK,TABLE %s |
| 3 | |
| 4 | ; FIXME: Work around update_test_checks bug in constant expression handling by manually deleting part of the last global pattern |
| 5 | |
| 6 | @function.lds = addrspace(3) global i16 poison |
| 7 | @other.kernel.lds = addrspace(3) global i16 poison |
| 8 | @recursive.kernel.lds = addrspace(3) global i16 poison |
| 9 | |
| 10 | ;. |
Shilei Tian | f32f028 | 2024-09-02 12:23:26 -0400 | [diff] [blame] | 11 | ; CHECK: @llvm.amdgcn.kernel.k0_f0.lds = internal addrspace(3) global %llvm.amdgcn.kernel.k0_f0.lds.t poison, align 2, !absolute_symbol [[META0:![0-9]+]] |
| 12 | ; CHECK: @llvm.amdgcn.kernel.k1_f0.lds = internal addrspace(3) global %llvm.amdgcn.kernel.k1_f0.lds.t poison, align 2, !absolute_symbol [[META0]] |
| 13 | ; CHECK: @llvm.amdgcn.kernel.kernel_lds.lds = internal addrspace(3) global %llvm.amdgcn.kernel.kernel_lds.lds.t poison, align 2, !absolute_symbol [[META0]] |
| 14 | ; CHECK: @llvm.amdgcn.kernel.kernel_lds_recursion.lds = internal addrspace(3) global %llvm.amdgcn.kernel.kernel_lds_recursion.lds.t poison, align 2, !absolute_symbol [[META0]] |
Shilei Tian | cb949b74 | 2024-09-02 12:33:24 -0400 | [diff] [blame] | 15 | ; CHECK: @llvm.amdgcn.lds.offset.table = internal addrspace(4) constant [3 x [2 x i32]] |
Matt Arsenault | 888a20c4 | 2024-01-10 00:12:40 +0700 | [diff] [blame] | 16 | ;. |
| 17 | define internal void @lds_use_through_indirect() { |
| 18 | ; CHECK-LABEL: define internal void @lds_use_through_indirect( |
| 19 | ; CHECK-SAME: ) #[[ATTR0:[0-9]+]] { |
| 20 | ; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.amdgcn.lds.kernel.id() |
| 21 | ; CHECK-NEXT: [[FUNCTION_LDS2:%.*]] = getelementptr inbounds [3 x [2 x i32]], ptr addrspace(4) @llvm.amdgcn.lds.offset.table, i32 0, i32 [[TMP1]], i32 0 |
| 22 | ; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(4) [[FUNCTION_LDS2]], align 4 |
| 23 | ; CHECK-NEXT: [[FUNCTION_LDS3:%.*]] = inttoptr i32 [[TMP2]] to ptr addrspace(3) |
| 24 | ; CHECK-NEXT: [[LD:%.*]] = load i16, ptr addrspace(3) [[FUNCTION_LDS3]], align 2 |
| 25 | ; CHECK-NEXT: [[MUL:%.*]] = mul i16 [[LD]], 7 |
| 26 | ; CHECK-NEXT: [[FUNCTION_LDS:%.*]] = getelementptr inbounds [3 x [2 x i32]], ptr addrspace(4) @llvm.amdgcn.lds.offset.table, i32 0, i32 [[TMP1]], i32 0 |
| 27 | ; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(4) [[FUNCTION_LDS]], align 4 |
| 28 | ; CHECK-NEXT: [[FUNCTION_LDS1:%.*]] = inttoptr i32 [[TMP3]] to ptr addrspace(3) |
| 29 | ; CHECK-NEXT: store i16 [[MUL]], ptr addrspace(3) [[FUNCTION_LDS1]], align 2 |
| 30 | ; CHECK-NEXT: ret void |
| 31 | ; |
| 32 | %ld = load i16, ptr addrspace(3) @function.lds |
| 33 | %mul = mul i16 %ld, 7 |
| 34 | store i16 %mul, ptr addrspace(3) @function.lds |
| 35 | ret void |
| 36 | } |
| 37 | |
| 38 | define internal void @indirectly_called() { |
| 39 | ; CHECK-LABEL: define internal void @indirectly_called( |
| 40 | ; CHECK-SAME: ) #[[ATTR0]] { |
| 41 | ; CHECK-NEXT: store volatile ptr @indirectly_called, ptr addrspace(1) null, align 8 |
| 42 | ; CHECK-NEXT: call void @lds_use_through_indirect() |
| 43 | ; CHECK-NEXT: ret void |
| 44 | ; |
| 45 | store volatile ptr @indirectly_called, ptr addrspace(1) null |
| 46 | call void @lds_use_through_indirect() |
| 47 | ret void |
| 48 | } |
| 49 | |
| 50 | define internal void @calls_indirectly_called() { |
| 51 | ; CHECK-LABEL: define internal void @calls_indirectly_called( |
| 52 | ; CHECK-SAME: ) #[[ATTR0]] { |
| 53 | ; CHECK-NEXT: call void @indirectly_called() |
| 54 | ; CHECK-NEXT: ret void |
| 55 | ; |
| 56 | call void @indirectly_called() |
| 57 | ret void |
| 58 | } |
| 59 | |
| 60 | ; TODO: Should still have "amdgpu-no-lds-kernel-id" attached |
| 61 | define internal void @no_lds_global_use_leaf() { |
| 62 | ; CHECK-LABEL: define internal void @no_lds_global_use_leaf( |
| 63 | ; CHECK-SAME: ) #[[ATTR1:[0-9]+]] { |
| 64 | ; CHECK-NEXT: ret void |
| 65 | ; |
| 66 | ret void |
| 67 | } |
| 68 | |
| 69 | ; Should have "amdgpu-no-lds-kernel-id" stripped |
| 70 | define internal void @f0() { |
| 71 | ; CHECK-LABEL: define internal void @f0( |
| 72 | ; CHECK-SAME: ) #[[ATTR0]] { |
| 73 | ; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.amdgcn.lds.kernel.id() |
| 74 | ; CHECK-NEXT: [[FUNCTION_LDS2:%.*]] = getelementptr inbounds [3 x [2 x i32]], ptr addrspace(4) @llvm.amdgcn.lds.offset.table, i32 0, i32 [[TMP1]], i32 0 |
| 75 | ; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(4) [[FUNCTION_LDS2]], align 4 |
| 76 | ; CHECK-NEXT: [[FUNCTION_LDS3:%.*]] = inttoptr i32 [[TMP2]] to ptr addrspace(3) |
| 77 | ; CHECK-NEXT: [[LD:%.*]] = load i16, ptr addrspace(3) [[FUNCTION_LDS3]], align 2 |
| 78 | ; CHECK-NEXT: [[MUL:%.*]] = mul i16 [[LD]], 4 |
| 79 | ; CHECK-NEXT: [[FUNCTION_LDS:%.*]] = getelementptr inbounds [3 x [2 x i32]], ptr addrspace(4) @llvm.amdgcn.lds.offset.table, i32 0, i32 [[TMP1]], i32 0 |
| 80 | ; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(4) [[FUNCTION_LDS]], align 4 |
| 81 | ; CHECK-NEXT: [[FUNCTION_LDS1:%.*]] = inttoptr i32 [[TMP3]] to ptr addrspace(3) |
| 82 | ; CHECK-NEXT: store i16 [[MUL]], ptr addrspace(3) [[FUNCTION_LDS1]], align 2 |
| 83 | ; CHECK-NEXT: call void @no_lds_global_use_leaf() |
| 84 | ; CHECK-NEXT: ret void |
| 85 | ; |
| 86 | %ld = load i16, ptr addrspace(3) @function.lds |
| 87 | %mul = mul i16 %ld, 4 |
| 88 | store i16 %mul, ptr addrspace(3) @function.lds |
| 89 | call void @no_lds_global_use_leaf() |
| 90 | ret void |
| 91 | } |
| 92 | |
| 93 | ; Should have "amdgpu-no-lds-kernel-id" stripped |
| 94 | define internal void @f0_transitive() { |
| 95 | ; CHECK-LABEL: define internal void @f0_transitive( |
| 96 | ; CHECK-SAME: ) #[[ATTR0]] { |
| 97 | ; CHECK-NEXT: call void @f0() |
| 98 | ; CHECK-NEXT: call void @no_lds_global_use_leaf() |
| 99 | ; CHECK-NEXT: ret void |
| 100 | ; |
| 101 | call void @f0() |
| 102 | call void @no_lds_global_use_leaf() |
| 103 | ret void |
| 104 | } |
| 105 | |
| 106 | define amdgpu_kernel void @k0_f0() { |
| 107 | ; CHECK-LABEL: define amdgpu_kernel void @k0_f0( |
Shilei Tian | f32f028 | 2024-09-02 12:23:26 -0400 | [diff] [blame] | 108 | ; CHECK-SAME: ) #[[ATTR2:[0-9]+]] !llvm.amdgcn.lds.kernel.id [[META2:![0-9]+]] { |
Matt Arsenault | 888a20c4 | 2024-01-10 00:12:40 +0700 | [diff] [blame] | 109 | ; CHECK-NEXT: call void @llvm.donothing() [ "ExplicitUse"(ptr addrspace(3) @llvm.amdgcn.kernel.k0_f0.lds) ] |
| 110 | ; CHECK-NEXT: call void @f0_transitive() |
| 111 | ; CHECK-NEXT: ret void |
| 112 | ; |
| 113 | call void @f0_transitive() |
| 114 | ret void |
| 115 | } |
| 116 | |
| 117 | define amdgpu_kernel void @k1_f0() { |
| 118 | ; CHECK-LABEL: define amdgpu_kernel void @k1_f0( |
Shilei Tian | f32f028 | 2024-09-02 12:23:26 -0400 | [diff] [blame] | 119 | ; CHECK-SAME: ) #[[ATTR3:[0-9]+]] !llvm.amdgcn.lds.kernel.id [[META3:![0-9]+]] { |
| 120 | ; CHECK-NEXT: call void @llvm.donothing() [ "ExplicitUse"(ptr addrspace(3) @llvm.amdgcn.kernel.k1_f0.lds) ], !alias.scope [[META4:![0-9]+]], !noalias [[META7:![0-9]+]] |
Matt Arsenault | 888a20c4 | 2024-01-10 00:12:40 +0700 | [diff] [blame] | 121 | ; CHECK-NEXT: call void @f0_transitive() |
| 122 | ; CHECK-NEXT: [[FPTR:%.*]] = load volatile ptr, ptr addrspace(1) null, align 8 |
| 123 | ; CHECK-NEXT: call void [[FPTR]]() |
| 124 | ; CHECK-NEXT: call void @calls_indirectly_called() |
| 125 | ; CHECK-NEXT: ret void |
| 126 | ; |
| 127 | call void @f0_transitive() |
| 128 | %fptr = load volatile ptr, ptr addrspace(1) null |
| 129 | call void %fptr() |
| 130 | call void @calls_indirectly_called() |
| 131 | ret void |
| 132 | } |
| 133 | |
| 134 | ; Should still have "amdgpu-no-lds-kernel-id" attached |
| 135 | define amdgpu_kernel void @kernel_lds() { |
| 136 | ; CHECK-LABEL: define amdgpu_kernel void @kernel_lds( |
| 137 | ; CHECK-SAME: ) #[[ATTR4:[0-9]+]] { |
| 138 | ; CHECK-NEXT: [[LD:%.*]] = load i16, ptr addrspace(3) @llvm.amdgcn.kernel.kernel_lds.lds, align 2 |
| 139 | ; CHECK-NEXT: [[MUL:%.*]] = mul i16 [[LD]], 42 |
| 140 | ; CHECK-NEXT: store i16 [[MUL]], ptr addrspace(3) @llvm.amdgcn.kernel.kernel_lds.lds, align 2 |
| 141 | ; CHECK-NEXT: ret void |
| 142 | ; |
| 143 | %ld = load i16, ptr addrspace(3) @other.kernel.lds |
| 144 | %mul = mul i16 %ld, 42 |
| 145 | store i16 %mul, ptr addrspace(3) @other.kernel.lds |
| 146 | ret void |
| 147 | } |
| 148 | |
| 149 | define internal i16 @mutual_recursion_0(i16 %arg) { |
| 150 | ; CHECK-LABEL: define internal i16 @mutual_recursion_0( |
| 151 | ; CHECK-SAME: i16 [[ARG:%.*]]) #[[ATTR0]] { |
| 152 | ; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.amdgcn.lds.kernel.id() |
| 153 | ; CHECK-NEXT: [[RECURSIVE_KERNEL_LDS:%.*]] = getelementptr inbounds [3 x [2 x i32]], ptr addrspace(4) @llvm.amdgcn.lds.offset.table, i32 0, i32 [[TMP1]], i32 1 |
| 154 | ; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(4) [[RECURSIVE_KERNEL_LDS]], align 4 |
| 155 | ; CHECK-NEXT: [[RECURSIVE_KERNEL_LDS1:%.*]] = inttoptr i32 [[TMP2]] to ptr addrspace(3) |
| 156 | ; CHECK-NEXT: [[LD:%.*]] = load i16, ptr addrspace(3) [[RECURSIVE_KERNEL_LDS1]], align 2 |
| 157 | ; CHECK-NEXT: [[MUL:%.*]] = mul i16 [[LD]], 7 |
| 158 | ; CHECK-NEXT: [[RET:%.*]] = call i16 @mutual_recursion_1(i16 [[LD]]) |
| 159 | ; CHECK-NEXT: [[ADD:%.*]] = add i16 [[RET]], 1 |
| 160 | ; CHECK-NEXT: ret i16 [[ADD]] |
| 161 | ; |
| 162 | %ld = load i16, ptr addrspace(3) @recursive.kernel.lds |
| 163 | %mul = mul i16 %ld, 7 |
| 164 | %ret = call i16 @mutual_recursion_1(i16 %ld) |
| 165 | %add = add i16 %ret, 1 |
| 166 | ret i16 %add |
| 167 | } |
| 168 | |
| 169 | define internal void @mutual_recursion_1(i16 %arg) { |
| 170 | ; CHECK-LABEL: define internal void @mutual_recursion_1( |
| 171 | ; CHECK-SAME: i16 [[ARG:%.*]]) #[[ATTR0]] { |
| 172 | ; CHECK-NEXT: call void @mutual_recursion_0(i16 [[ARG]]) |
| 173 | ; CHECK-NEXT: ret void |
| 174 | ; |
| 175 | call void @mutual_recursion_0(i16 %arg) |
| 176 | ret void |
| 177 | } |
| 178 | |
| 179 | define amdgpu_kernel void @kernel_lds_recursion() { |
| 180 | ; CHECK-LABEL: define amdgpu_kernel void @kernel_lds_recursion( |
Shilei Tian | f403727 | 2024-12-11 16:50:06 -0500 | [diff] [blame] | 181 | ; CHECK-SAME: ) #[[ATTR5:[0-9]+]] !llvm.amdgcn.lds.kernel.id [[META9:![0-9]+]] { |
Matt Arsenault | 888a20c4 | 2024-01-10 00:12:40 +0700 | [diff] [blame] | 182 | ; CHECK-NEXT: call void @llvm.donothing() [ "ExplicitUse"(ptr addrspace(3) @llvm.amdgcn.kernel.kernel_lds_recursion.lds) ] |
| 183 | ; CHECK-NEXT: call void @mutual_recursion_0(i16 0) |
| 184 | ; CHECK-NEXT: ret void |
| 185 | ; |
| 186 | call void @mutual_recursion_0(i16 0) |
| 187 | ret void |
| 188 | } |
| 189 | |
Emma Pilkington | 4897b98 | 2024-01-22 11:22:57 -0500 | [diff] [blame] | 190 | !llvm.module.flags = !{!1} |
Emma Pilkington | 4490003 | 2024-03-06 09:51:48 -0500 | [diff] [blame] | 191 | !1 = !{i32 1, !"amdhsa_code_object_version", i32 400} |
Emma Pilkington | 4897b98 | 2024-01-22 11:22:57 -0500 | [diff] [blame] | 192 | |
Matt Arsenault | 888a20c4 | 2024-01-10 00:12:40 +0700 | [diff] [blame] | 193 | ;. |
Matt Arsenault | a216358 | 2025-03-06 09:17:51 +0700 | [diff] [blame] | 194 | ; CHECK: attributes #[[ATTR0]] = { "amdgpu-agpr-alloc"="0" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } |
| 195 | ; CHECK: attributes #[[ATTR1]] = { "amdgpu-agpr-alloc"="0" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } |
| 196 | ; CHECK: attributes #[[ATTR2]] = { "amdgpu-agpr-alloc"="0" "amdgpu-lds-size"="2" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } |
Shilei Tian | f403727 | 2024-12-11 16:50:06 -0500 | [diff] [blame] | 197 | ; CHECK: attributes #[[ATTR3]] = { "amdgpu-lds-size"="4" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" } |
Matt Arsenault | a216358 | 2025-03-06 09:17:51 +0700 | [diff] [blame] | 198 | ; CHECK: attributes #[[ATTR4]] = { "amdgpu-agpr-alloc"="0" "amdgpu-lds-size"="2" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } |
| 199 | ; CHECK: attributes #[[ATTR5]] = { "amdgpu-agpr-alloc"="0" "amdgpu-lds-size"="4" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" } |
Shilei Tian | f403727 | 2024-12-11 16:50:06 -0500 | [diff] [blame] | 200 | ; CHECK: attributes #[[ATTR6:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(none) } |
| 201 | ; CHECK: attributes #[[ATTR7:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } |
Matt Arsenault | 888a20c4 | 2024-01-10 00:12:40 +0700 | [diff] [blame] | 202 | ;. |
Shilei Tian | f32f028 | 2024-09-02 12:23:26 -0400 | [diff] [blame] | 203 | ; CHECK: [[META0]] = !{i32 0, i32 1} |
| 204 | ; CHECK: [[META1:![0-9]+]] = !{i32 1, !"amdhsa_code_object_version", i32 400} |
| 205 | ; CHECK: [[META2]] = !{i32 0} |
| 206 | ; CHECK: [[META3]] = !{i32 1} |
| 207 | ; CHECK: [[META4]] = !{[[META5:![0-9]+]]} |
| 208 | ; CHECK: [[META5]] = distinct !{[[META5]], [[META6:![0-9]+]]} |
| 209 | ; CHECK: [[META6]] = distinct !{[[META6]]} |
| 210 | ; CHECK: [[META7]] = !{[[META8:![0-9]+]]} |
| 211 | ; CHECK: [[META8]] = distinct !{[[META8]], [[META6]]} |
| 212 | ; CHECK: [[META9]] = !{i32 2} |
Matt Arsenault | 888a20c4 | 2024-01-10 00:12:40 +0700 | [diff] [blame] | 213 | ;. |
| 214 | ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: |
| 215 | ; TABLE: {{.*}} |