blob: 4bd08c19a081cf4dfe5ef651002d666338cc013d [file] [edit]
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals --version 3
; RUN: opt -S -mtriple=amdgcn-- -passes=amdgpu-lower-module-lds < %s | FileCheck %s
; Not reached by a non-kernel function and therefore not changed by this pass
@dynamic_kernel_only = external addrspace(3) global [0 x double]
; shared8 is used directly by a kernel so remains in the outbout
; the other three are only used by functions and will be replaced with
; table lookups and dropped from the IR
@dynamic_shared1 = external addrspace(3) global [0 x i8], align 1
@dynamic_shared2 = external addrspace(3) global [0 x i16], align 2
@dynamic_shared4 = external addrspace(3) global [0 x i32], align 4
@dynamic_shared8 = external addrspace(3) global [0 x i64], align 8
; CHECK: %llvm.amdgcn.module.lds.t = type { i32 }
;.
; CHECK: @dynamic_kernel_only = external addrspace(3) global [0 x double]
; CHECK: @dynamic_shared8 = external addrspace(3) global [0 x i64], align 8
; CHECK: @llvm.amdgcn.module.lds = internal addrspace(3) global %llvm.amdgcn.module.lds.t poison, align 4, !absolute_symbol [[META0:![0-9]+]]
; CHECK: @llvm.compiler.used = appending addrspace(1) global [1 x ptr] [ptr addrspacecast (ptr addrspace(3) @llvm.amdgcn.module.lds to ptr)], section "llvm.metadata"
; CHECK: @llvm.amdgcn.expect_align1.dynlds = external addrspace(3) global [0 x i8], align 1, !absolute_symbol [[META0]]
; CHECK: @llvm.amdgcn.expect_align2.dynlds = external addrspace(3) global [0 x i8], align 2, !absolute_symbol [[META0]]
; CHECK: @llvm.amdgcn.expect_align4.dynlds = external addrspace(3) global [0 x i8], align 4, !absolute_symbol [[META1:![0-9]+]]
; CHECK: @llvm.amdgcn.expect_align8.dynlds = external addrspace(3) global [0 x i8], align 8, !absolute_symbol [[META0]]
; CHECK: @llvm.amdgcn.expect_max_of_2_and_4.dynlds = external addrspace(3) global [0 x i8], align 4, !absolute_symbol [[META1]]
; CHECK: @llvm.amdgcn.dynlds.offset.table = internal addrspace(4) constant [5 x ptr addrspace(3)] [ptr addrspace(3) @llvm.amdgcn.expect_align1.dynlds, ptr addrspace(3) @llvm.amdgcn.expect_align2.dynlds, ptr addrspace(3) @llvm.amdgcn.expect_align4.dynlds, ptr addrspace(3) @llvm.amdgcn.expect_align8.dynlds, ptr addrspace(3) @llvm.amdgcn.expect_max_of_2_and_4.dynlds]
;.
define amdgpu_kernel void @kernel_only() {
; CHECK-LABEL: define amdgpu_kernel void @kernel_only() {
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [0 x double], ptr addrspace(3) @dynamic_kernel_only, i32 0, i32 0
; CHECK-NEXT: store double 3.140000e+00, ptr addrspace(3) [[ARRAYIDX]], align 8
; CHECK-NEXT: ret void
;
%arrayidx = getelementptr inbounds [0 x double], ptr addrspace(3) @dynamic_kernel_only, i32 0, i32 0
store double 3.140000e+00, ptr addrspace(3) %arrayidx
ret void
}
; The accesses from functions are rewritten to go through the llvm.amdgcn.dynlds.offset.table
define void @use_shared1() {
; CHECK-LABEL: define void @use_shared1() {
; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.amdgcn.lds.kernel.id()
; CHECK-NEXT: [[DYNAMIC_SHARED1:%.*]] = getelementptr inbounds [5 x ptr addrspace(3)], ptr addrspace(4) @llvm.amdgcn.dynlds.offset.table, i32 0, i32 [[TMP1]]
; CHECK-NEXT: [[DYNAMIC_SHARED11:%.*]] = load ptr addrspace(3), ptr addrspace(4) [[DYNAMIC_SHARED1]], align 4
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [0 x i8], ptr addrspace(3) [[DYNAMIC_SHARED11]], i32 0, i32 1
; CHECK-NEXT: store i8 0, ptr addrspace(3) [[ARRAYIDX]], align 1
; CHECK-NEXT: ret void
;
%arrayidx = getelementptr inbounds [0 x i8], ptr addrspace(3) @dynamic_shared1, i32 0, i32 1
store i8 0, ptr addrspace(3) %arrayidx
ret void
}
define void @use_shared2() #0 {
; CHECK-LABEL: define void @use_shared2(
; CHECK-SAME: ) #[[ATTR0:[0-9]+]] {
; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.amdgcn.lds.kernel.id()
; CHECK-NEXT: [[DYNAMIC_SHARED2:%.*]] = getelementptr inbounds [5 x ptr addrspace(3)], ptr addrspace(4) @llvm.amdgcn.dynlds.offset.table, i32 0, i32 [[TMP1]]
; CHECK-NEXT: [[DYNAMIC_SHARED21:%.*]] = load ptr addrspace(3), ptr addrspace(4) [[DYNAMIC_SHARED2]], align 4
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [0 x i16], ptr addrspace(3) [[DYNAMIC_SHARED21]], i32 0, i32 3
; CHECK-NEXT: store i16 1, ptr addrspace(3) [[ARRAYIDX]], align 2
; CHECK-NEXT: ret void
;
%arrayidx = getelementptr inbounds [0 x i16], ptr addrspace(3) @dynamic_shared2, i32 0, i32 3
store i16 1, ptr addrspace(3) %arrayidx
ret void
}
; Include a normal variable so that the new variables aren't all at the same absolute_symbol
@static_shared = addrspace(3) global i32 poison
define void @use_shared4() #0 {
; CHECK-LABEL: define void @use_shared4(
; CHECK-SAME: ) #[[ATTR0]] {
; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.amdgcn.lds.kernel.id()
; CHECK-NEXT: store i32 4, ptr addrspace(3) @llvm.amdgcn.module.lds, align 4
; CHECK-NEXT: [[DYNAMIC_SHARED4:%.*]] = getelementptr inbounds [5 x ptr addrspace(3)], ptr addrspace(4) @llvm.amdgcn.dynlds.offset.table, i32 0, i32 [[TMP1]]
; CHECK-NEXT: [[DYNAMIC_SHARED41:%.*]] = load ptr addrspace(3), ptr addrspace(4) [[DYNAMIC_SHARED4]], align 4
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [0 x i32], ptr addrspace(3) [[DYNAMIC_SHARED41]], i32 0, i32 5
; CHECK-NEXT: store i32 2, ptr addrspace(3) [[ARRAYIDX]], align 4
; CHECK-NEXT: ret void
;
store i32 4, ptr addrspace(3) @static_shared
%arrayidx = getelementptr inbounds [0 x i32], ptr addrspace(3) @dynamic_shared4, i32 0, i32 5
store i32 2, ptr addrspace(3) %arrayidx
ret void
}
define void @use_shared8() #0 {
; CHECK-LABEL: define void @use_shared8(
; CHECK-SAME: ) #[[ATTR0]] {
; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.amdgcn.lds.kernel.id()
; CHECK-NEXT: [[DYNAMIC_SHARED8:%.*]] = getelementptr inbounds [5 x ptr addrspace(3)], ptr addrspace(4) @llvm.amdgcn.dynlds.offset.table, i32 0, i32 [[TMP1]]
; CHECK-NEXT: [[DYNAMIC_SHARED81:%.*]] = load ptr addrspace(3), ptr addrspace(4) [[DYNAMIC_SHARED8]], align 4
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [0 x i64], ptr addrspace(3) [[DYNAMIC_SHARED81]], i32 0, i32 7
; CHECK-NEXT: store i64 3, ptr addrspace(3) [[ARRAYIDX]], align 8
; CHECK-NEXT: ret void
;
%arrayidx = getelementptr inbounds [0 x i64], ptr addrspace(3) @dynamic_shared8, i32 0, i32 7
store i64 3, ptr addrspace(3) %arrayidx
ret void
}
; The kernels are annotated with kernel.id and llvm.donothing use of the corresponding variable
define amdgpu_kernel void @expect_align1() {
; CHECK-LABEL: define amdgpu_kernel void @expect_align1(
; CHECK-SAME: ) !llvm.amdgcn.lds.kernel.id [[META2:![0-9]+]] {
; CHECK-NEXT: call void @llvm.donothing() [ "ExplicitUse"(ptr addrspace(3) @llvm.amdgcn.expect_align1.dynlds) ]
; CHECK-NEXT: call void @use_shared1()
; CHECK-NEXT: ret void
;
call void @use_shared1()
ret void
}
define amdgpu_kernel void @expect_align2() {
; CHECK-LABEL: define amdgpu_kernel void @expect_align2(
; CHECK-SAME: ) !llvm.amdgcn.lds.kernel.id [[META3:![0-9]+]] {
; CHECK-NEXT: call void @llvm.donothing() [ "ExplicitUse"(ptr addrspace(3) @llvm.amdgcn.expect_align2.dynlds) ]
; CHECK-NEXT: call void @use_shared2()
; CHECK-NEXT: ret void
;
call void @use_shared2()
ret void
}
define amdgpu_kernel void @expect_align4() {
; CHECK-LABEL: define amdgpu_kernel void @expect_align4(
; CHECK-SAME: ) #[[ATTR1:[0-9]+]] !llvm.amdgcn.lds.kernel.id [[META4:![0-9]+]] {
; CHECK-NEXT: call void @llvm.donothing() [ "ExplicitUse"(ptr addrspace(3) @llvm.amdgcn.expect_align4.dynlds) ]
; CHECK-NEXT: call void @llvm.donothing() [ "ExplicitUse"(ptr addrspace(3) @llvm.amdgcn.module.lds) ]
; CHECK-NEXT: call void @use_shared4()
; CHECK-NEXT: ret void
;
call void @use_shared4()
ret void
}
; Use dynamic_shared directly too.
define amdgpu_kernel void @expect_align8() {
; CHECK-LABEL: define amdgpu_kernel void @expect_align8(
; CHECK-SAME: ) !llvm.amdgcn.lds.kernel.id [[META5:![0-9]+]] {
; CHECK-NEXT: call void @llvm.donothing() [ "ExplicitUse"(ptr addrspace(3) @llvm.amdgcn.expect_align8.dynlds) ]
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [0 x i64], ptr addrspace(3) @dynamic_shared8, i32 0, i32 9
; CHECK-NEXT: store i64 3, ptr addrspace(3) [[ARRAYIDX]], align 8
; CHECK-NEXT: call void @use_shared8()
; CHECK-NEXT: ret void
;
%arrayidx = getelementptr inbounds [0 x i64], ptr addrspace(3) @dynamic_shared8, i32 0, i32 9
store i64 3, ptr addrspace(3) %arrayidx
call void @use_shared8()
ret void
}
; Note: use_shared4 uses module.lds so this will allocate at offset 4
define amdgpu_kernel void @expect_max_of_2_and_4() {
; CHECK-LABEL: define amdgpu_kernel void @expect_max_of_2_and_4(
; CHECK-SAME: ) #[[ATTR1]] !llvm.amdgcn.lds.kernel.id [[META6:![0-9]+]] {
; CHECK-NEXT: call void @llvm.donothing() [ "ExplicitUse"(ptr addrspace(3) @llvm.amdgcn.expect_max_of_2_and_4.dynlds) ]
; CHECK-NEXT: call void @llvm.donothing() [ "ExplicitUse"(ptr addrspace(3) @llvm.amdgcn.module.lds) ]
; CHECK-NEXT: call void @use_shared2()
; CHECK-NEXT: call void @use_shared4()
; CHECK-NEXT: ret void
;
call void @use_shared2()
call void @use_shared4()
ret void
}
attributes #0 = { noinline }
;.
; CHECK: attributes #[[ATTR0]] = { noinline }
; CHECK: attributes #[[ATTR1]] = { "amdgpu-lds-size"="4,4" }
; CHECK: attributes #[[ATTR2:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(none) }
; CHECK: attributes #[[ATTR3:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
;.
; CHECK: [[META0]] = !{i32 0, i32 1}
; CHECK: [[META1]] = !{i32 4, i32 5}
; CHECK: [[META2]] = !{i32 0}
; CHECK: [[META3]] = !{i32 1}
; CHECK: [[META4]] = !{i32 2}
; CHECK: [[META5]] = !{i32 3}
; CHECK: [[META6]] = !{i32 4}
;.