blob: 90fae498d9f199d381eee23dc98ef57d969e7e8f [file] [edit]
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
; RUN: opt -mtriple=amdgcn-amd-amdhsa -S -passes=amdgpu-lower-kernel-arguments %s | FileCheck %s
; Regression test for a bug where addAliasScopeMetadata skipped memory-
; accessing calls with no pointer arguments, leaving them without !noalias
; metadata. This caused AA to conservatively report them as potential
; clobbers of noalias kernel arguments, blocking downstream scalarization
; in AMDGPUAnnotateUniformValues and causing severe performance regressions
; (e.g. in rocFFT).
declare i32 @memory_read_no_ptr_args() #1
; The call reads memory but has no pointer arguments — it cannot alias
; any noalias kernel argument. The pass must add !noalias metadata to it.
define amdgpu_kernel void @call_without_ptr_args(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in) #0 {
; CHECK-LABEL: define amdgpu_kernel void @call_without_ptr_args(
; CHECK-SAME: ptr addrspace(1) noalias [[OUT:%.*]], ptr addrspace(1) noalias [[IN:%.*]]) #[[ATTR1:[0-9]+]] {
; CHECK-NEXT: [[CALL_WITHOUT_PTR_ARGS_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(272) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
; CHECK-NEXT: [[OUT_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[CALL_WITHOUT_PTR_ARGS_KERNARG_SEGMENT]], i64 0
; CHECK-NEXT: [[OUT_LOAD:%.*]] = load ptr addrspace(1), ptr addrspace(4) [[OUT_KERNARG_OFFSET]], align 16, !invariant.load [[META0:![0-9]+]]
; CHECK-NEXT: [[IN_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[CALL_WITHOUT_PTR_ARGS_KERNARG_SEGMENT]], i64 8
; CHECK-NEXT: [[IN_LOAD:%.*]] = load ptr addrspace(1), ptr addrspace(4) [[IN_KERNARG_OFFSET]], align 8, !invariant.load [[META0]]
; CHECK-NEXT: [[VAL:%.*]] = call i32 @memory_read_no_ptr_args(), !noalias [[META1:![0-9]+]]
; CHECK-NEXT: [[GEP:%.*]] = getelementptr i32, ptr addrspace(1) [[IN_LOAD]], i32 [[VAL]]
; CHECK-NEXT: [[LOAD:%.*]] = load i32, ptr addrspace(1) [[GEP]], align 4, !alias.scope [[META5:![0-9]+]], !noalias [[META6:![0-9]+]]
; CHECK-NEXT: store i32 [[LOAD]], ptr addrspace(1) [[OUT_LOAD]], align 4, !alias.scope [[META6]], !noalias [[META5]]
; CHECK-NEXT: ret void
;
%val = call i32 @memory_read_no_ptr_args()
%gep = getelementptr i32, ptr addrspace(1) %in, i32 %val
%load = load i32, ptr addrspace(1) %gep, align 4
store i32 %load, ptr addrspace(1) %out, align 4
ret void
}
; Same scenario but the call is readnone — should NOT get noalias metadata
; because it doesn't access memory at all and is skipped by the pass.
declare i32 @readnone_no_ptr_args() #2
define amdgpu_kernel void @readnone_call_without_ptr_args(ptr addrspace(1) noalias %out) #0 {
; CHECK-LABEL: define amdgpu_kernel void @readnone_call_without_ptr_args(
; CHECK-SAME: ptr addrspace(1) noalias [[OUT:%.*]]) #[[ATTR1]] {
; CHECK-NEXT: [[READNONE_CALL_WITHOUT_PTR_ARGS_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
; CHECK-NEXT: [[OUT_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[READNONE_CALL_WITHOUT_PTR_ARGS_KERNARG_SEGMENT]], i64 0
; CHECK-NEXT: [[OUT_LOAD:%.*]] = load ptr addrspace(1), ptr addrspace(4) [[OUT_KERNARG_OFFSET]], align 16, !invariant.load [[META0]]
; CHECK-NEXT: [[VAL:%.*]] = call i32 @readnone_no_ptr_args()
; CHECK-NEXT: store i32 [[VAL]], ptr addrspace(1) [[OUT_LOAD]], align 4, !alias.scope [[META7:![0-9]+]]
; CHECK-NEXT: ret void
;
%val = call i32 @readnone_no_ptr_args()
store i32 %val, ptr addrspace(1) %out, align 4
ret void
}
; argmemonly variant: memory(argmem: read) with no pointer arguments.
; This function can only access memory through its pointer arguments, but
; has none — so it effectively cannot access memory at all. The pass must
; still add !noalias metadata since doesNotAccessMemory() returns false.
declare i32 @argmemonly_read_no_ptr_args() #3
define amdgpu_kernel void @argmemonly_call_without_ptr_args(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in) #0 {
; CHECK-LABEL: define amdgpu_kernel void @argmemonly_call_without_ptr_args(
; CHECK-SAME: ptr addrspace(1) noalias [[OUT:%.*]], ptr addrspace(1) noalias [[IN:%.*]]) #[[ATTR1]] {
; CHECK-NEXT: [[ARGMEMONLY_CALL_WITHOUT_PTR_ARGS_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(272) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
; CHECK-NEXT: [[OUT_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[ARGMEMONLY_CALL_WITHOUT_PTR_ARGS_KERNARG_SEGMENT]], i64 0
; CHECK-NEXT: [[OUT_LOAD:%.*]] = load ptr addrspace(1), ptr addrspace(4) [[OUT_KERNARG_OFFSET]], align 16, !invariant.load [[META0]]
; CHECK-NEXT: [[IN_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[ARGMEMONLY_CALL_WITHOUT_PTR_ARGS_KERNARG_SEGMENT]], i64 8
; CHECK-NEXT: [[IN_LOAD:%.*]] = load ptr addrspace(1), ptr addrspace(4) [[IN_KERNARG_OFFSET]], align 8, !invariant.load [[META0]]
; CHECK-NEXT: [[VAL:%.*]] = call i32 @argmemonly_read_no_ptr_args(), !noalias [[META10:![0-9]+]]
; CHECK-NEXT: [[GEP:%.*]] = getelementptr i32, ptr addrspace(1) [[IN_LOAD]], i32 [[VAL]]
; CHECK-NEXT: [[LOAD:%.*]] = load i32, ptr addrspace(1) [[GEP]], align 4, !alias.scope [[META14:![0-9]+]], !noalias [[META15:![0-9]+]]
; CHECK-NEXT: store i32 [[LOAD]], ptr addrspace(1) [[OUT_LOAD]], align 4, !alias.scope [[META15]], !noalias [[META14]]
; CHECK-NEXT: ret void
;
%val = call i32 @argmemonly_read_no_ptr_args()
%gep = getelementptr i32, ptr addrspace(1) %in, i32 %val
%load = load i32, ptr addrspace(1) %gep, align 4
store i32 %load, ptr addrspace(1) %out, align 4
ret void
}
; argmemonly with a pointer argument pointing to a noalias kernel arg —
; standard metadata path. The call accesses kernel arg memory through its
; pointer argument and gets both !alias.scope and !noalias as appropriate.
declare void @argmemonly_with_ptr_arg(ptr addrspace(1)) #4
define amdgpu_kernel void @argmemonly_call_with_ptr_arg(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in) #0 {
; CHECK-LABEL: define amdgpu_kernel void @argmemonly_call_with_ptr_arg(
; CHECK-SAME: ptr addrspace(1) noalias [[OUT:%.*]], ptr addrspace(1) noalias [[IN:%.*]]) #[[ATTR1]] {
; CHECK-NEXT: [[ARGMEMONLY_CALL_WITH_PTR_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(272) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
; CHECK-NEXT: [[OUT_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[ARGMEMONLY_CALL_WITH_PTR_ARG_KERNARG_SEGMENT]], i64 0
; CHECK-NEXT: [[OUT_LOAD:%.*]] = load ptr addrspace(1), ptr addrspace(4) [[OUT_KERNARG_OFFSET]], align 16, !invariant.load [[META0]]
; CHECK-NEXT: [[IN_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[ARGMEMONLY_CALL_WITH_PTR_ARG_KERNARG_SEGMENT]], i64 8
; CHECK-NEXT: [[IN_LOAD:%.*]] = load ptr addrspace(1), ptr addrspace(4) [[IN_KERNARG_OFFSET]], align 8, !invariant.load [[META0]]
; CHECK-NEXT: [[LOAD:%.*]] = load i32, ptr addrspace(1) [[IN_LOAD]], align 4, !alias.scope [[META16:![0-9]+]], !noalias [[META19:![0-9]+]]
; CHECK-NEXT: call void @argmemonly_with_ptr_arg(ptr addrspace(1) [[OUT_LOAD]]), !alias.scope [[META19]], !noalias [[META16]]
; CHECK-NEXT: store i32 [[LOAD]], ptr addrspace(1) [[OUT_LOAD]], align 4, !alias.scope [[META19]], !noalias [[META16]]
; CHECK-NEXT: ret void
;
%load = load i32, ptr addrspace(1) %in, align 4
call void @argmemonly_with_ptr_arg(ptr addrspace(1) %out)
store i32 %load, ptr addrspace(1) %out, align 4
ret void
}
attributes #0 = { nounwind }
attributes #1 = { nounwind memory(read) }
attributes #2 = { nounwind memory(none) }
attributes #3 = { nounwind memory(argmem: read) }
attributes #4 = { nounwind memory(argmem: readwrite) }
;.
; CHECK: [[META0]] = !{}
; CHECK: [[META1]] = !{[[META2:![0-9]+]], [[META4:![0-9]+]]}
; CHECK: [[META2]] = distinct !{[[META2]], [[META3:![0-9]+]], !"out"}
; CHECK: [[META3]] = distinct !{[[META3]], !"call_without_ptr_args"}
; CHECK: [[META4]] = distinct !{[[META4]], [[META3]], !"in"}
; CHECK: [[META5]] = !{[[META4]]}
; CHECK: [[META6]] = !{[[META2]]}
; CHECK: [[META7]] = !{[[META8:![0-9]+]]}
; CHECK: [[META8]] = distinct !{[[META8]], [[META9:![0-9]+]], !"out"}
; CHECK: [[META9]] = distinct !{[[META9]], !"readnone_call_without_ptr_args"}
; CHECK: [[META10]] = !{[[META11:![0-9]+]], [[META13:![0-9]+]]}
; CHECK: [[META11]] = distinct !{[[META11]], [[META12:![0-9]+]], !"out"}
; CHECK: [[META12]] = distinct !{[[META12]], !"argmemonly_call_without_ptr_args"}
; CHECK: [[META13]] = distinct !{[[META13]], [[META12]], !"in"}
; CHECK: [[META14]] = !{[[META13]]}
; CHECK: [[META15]] = !{[[META11]]}
; CHECK: [[META16]] = !{[[META17:![0-9]+]]}
; CHECK: [[META17]] = distinct !{[[META17]], [[META18:![0-9]+]], !"in"}
; CHECK: [[META18]] = distinct !{[[META18]], !"argmemonly_call_with_ptr_arg"}
; CHECK: [[META19]] = !{[[META20:![0-9]+]]}
; CHECK: [[META20]] = distinct !{[[META20]], [[META18]], !"out"}
;.