| ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6 |
| ; RUN: opt -mtriple=amdgcn-amd-amdhsa -S -passes=amdgpu-lower-kernel-arguments %s | FileCheck %s |
| |
| ; Regression test for a bug where addAliasScopeMetadata skipped memory- |
| ; accessing calls with no pointer arguments, leaving them without !noalias |
| ; metadata. This caused AA to conservatively report them as potential |
| ; clobbers of noalias kernel arguments, blocking downstream scalarization |
| ; in AMDGPUAnnotateUniformValues and causing severe performance regressions |
| ; (e.g. in rocFFT). |
| |
| declare i32 @memory_read_no_ptr_args() #1 |
| |
| ; The call reads memory but has no pointer arguments — it cannot alias |
| ; any noalias kernel argument. The pass must add !noalias metadata to it. |
| define amdgpu_kernel void @call_without_ptr_args(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in) #0 { |
| ; CHECK-LABEL: define amdgpu_kernel void @call_without_ptr_args( |
| ; CHECK-SAME: ptr addrspace(1) noalias [[OUT:%.*]], ptr addrspace(1) noalias [[IN:%.*]]) #[[ATTR1:[0-9]+]] { |
| ; CHECK-NEXT: [[CALL_WITHOUT_PTR_ARGS_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(272) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() |
| ; CHECK-NEXT: [[OUT_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[CALL_WITHOUT_PTR_ARGS_KERNARG_SEGMENT]], i64 0 |
| ; CHECK-NEXT: [[OUT_LOAD:%.*]] = load ptr addrspace(1), ptr addrspace(4) [[OUT_KERNARG_OFFSET]], align 16, !invariant.load [[META0:![0-9]+]] |
| ; CHECK-NEXT: [[IN_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[CALL_WITHOUT_PTR_ARGS_KERNARG_SEGMENT]], i64 8 |
| ; CHECK-NEXT: [[IN_LOAD:%.*]] = load ptr addrspace(1), ptr addrspace(4) [[IN_KERNARG_OFFSET]], align 8, !invariant.load [[META0]] |
| ; CHECK-NEXT: [[VAL:%.*]] = call i32 @memory_read_no_ptr_args(), !noalias [[META1:![0-9]+]] |
| ; CHECK-NEXT: [[GEP:%.*]] = getelementptr i32, ptr addrspace(1) [[IN_LOAD]], i32 [[VAL]] |
| ; CHECK-NEXT: [[LOAD:%.*]] = load i32, ptr addrspace(1) [[GEP]], align 4, !alias.scope [[META5:![0-9]+]], !noalias [[META6:![0-9]+]] |
| ; CHECK-NEXT: store i32 [[LOAD]], ptr addrspace(1) [[OUT_LOAD]], align 4, !alias.scope [[META6]], !noalias [[META5]] |
| ; CHECK-NEXT: ret void |
| ; |
| %val = call i32 @memory_read_no_ptr_args() |
| %gep = getelementptr i32, ptr addrspace(1) %in, i32 %val |
| %load = load i32, ptr addrspace(1) %gep, align 4 |
| store i32 %load, ptr addrspace(1) %out, align 4 |
| ret void |
| } |
| |
| ; Same scenario but the call is readnone — should NOT get noalias metadata |
| ; because it doesn't access memory at all and is skipped by the pass. |
| declare i32 @readnone_no_ptr_args() #2 |
| |
| define amdgpu_kernel void @readnone_call_without_ptr_args(ptr addrspace(1) noalias %out) #0 { |
| ; CHECK-LABEL: define amdgpu_kernel void @readnone_call_without_ptr_args( |
| ; CHECK-SAME: ptr addrspace(1) noalias [[OUT:%.*]]) #[[ATTR1]] { |
| ; CHECK-NEXT: [[READNONE_CALL_WITHOUT_PTR_ARGS_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() |
| ; CHECK-NEXT: [[OUT_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[READNONE_CALL_WITHOUT_PTR_ARGS_KERNARG_SEGMENT]], i64 0 |
| ; CHECK-NEXT: [[OUT_LOAD:%.*]] = load ptr addrspace(1), ptr addrspace(4) [[OUT_KERNARG_OFFSET]], align 16, !invariant.load [[META0]] |
| ; CHECK-NEXT: [[VAL:%.*]] = call i32 @readnone_no_ptr_args() |
| ; CHECK-NEXT: store i32 [[VAL]], ptr addrspace(1) [[OUT_LOAD]], align 4, !alias.scope [[META7:![0-9]+]] |
| ; CHECK-NEXT: ret void |
| ; |
| %val = call i32 @readnone_no_ptr_args() |
| store i32 %val, ptr addrspace(1) %out, align 4 |
| ret void |
| } |
| |
| ; argmemonly variant: memory(argmem: read) with no pointer arguments. |
| ; This function can only access memory through its pointer arguments, but |
| ; has none — so it effectively cannot access memory at all. The pass must |
| ; still add !noalias metadata since doesNotAccessMemory() returns false. |
| declare i32 @argmemonly_read_no_ptr_args() #3 |
| |
| define amdgpu_kernel void @argmemonly_call_without_ptr_args(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in) #0 { |
| ; CHECK-LABEL: define amdgpu_kernel void @argmemonly_call_without_ptr_args( |
| ; CHECK-SAME: ptr addrspace(1) noalias [[OUT:%.*]], ptr addrspace(1) noalias [[IN:%.*]]) #[[ATTR1]] { |
| ; CHECK-NEXT: [[ARGMEMONLY_CALL_WITHOUT_PTR_ARGS_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(272) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() |
| ; CHECK-NEXT: [[OUT_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[ARGMEMONLY_CALL_WITHOUT_PTR_ARGS_KERNARG_SEGMENT]], i64 0 |
| ; CHECK-NEXT: [[OUT_LOAD:%.*]] = load ptr addrspace(1), ptr addrspace(4) [[OUT_KERNARG_OFFSET]], align 16, !invariant.load [[META0]] |
| ; CHECK-NEXT: [[IN_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[ARGMEMONLY_CALL_WITHOUT_PTR_ARGS_KERNARG_SEGMENT]], i64 8 |
| ; CHECK-NEXT: [[IN_LOAD:%.*]] = load ptr addrspace(1), ptr addrspace(4) [[IN_KERNARG_OFFSET]], align 8, !invariant.load [[META0]] |
| ; CHECK-NEXT: [[VAL:%.*]] = call i32 @argmemonly_read_no_ptr_args(), !noalias [[META10:![0-9]+]] |
| ; CHECK-NEXT: [[GEP:%.*]] = getelementptr i32, ptr addrspace(1) [[IN_LOAD]], i32 [[VAL]] |
| ; CHECK-NEXT: [[LOAD:%.*]] = load i32, ptr addrspace(1) [[GEP]], align 4, !alias.scope [[META14:![0-9]+]], !noalias [[META15:![0-9]+]] |
| ; CHECK-NEXT: store i32 [[LOAD]], ptr addrspace(1) [[OUT_LOAD]], align 4, !alias.scope [[META15]], !noalias [[META14]] |
| ; CHECK-NEXT: ret void |
| ; |
| %val = call i32 @argmemonly_read_no_ptr_args() |
| %gep = getelementptr i32, ptr addrspace(1) %in, i32 %val |
| %load = load i32, ptr addrspace(1) %gep, align 4 |
| store i32 %load, ptr addrspace(1) %out, align 4 |
| ret void |
| } |
| |
| ; argmemonly with a pointer argument pointing to a noalias kernel arg — |
| ; standard metadata path. The call accesses kernel arg memory through its |
| ; pointer argument and gets both !alias.scope and !noalias as appropriate. |
| declare void @argmemonly_with_ptr_arg(ptr addrspace(1)) #4 |
| |
| define amdgpu_kernel void @argmemonly_call_with_ptr_arg(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in) #0 { |
| ; CHECK-LABEL: define amdgpu_kernel void @argmemonly_call_with_ptr_arg( |
| ; CHECK-SAME: ptr addrspace(1) noalias [[OUT:%.*]], ptr addrspace(1) noalias [[IN:%.*]]) #[[ATTR1]] { |
| ; CHECK-NEXT: [[ARGMEMONLY_CALL_WITH_PTR_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(272) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() |
| ; CHECK-NEXT: [[OUT_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[ARGMEMONLY_CALL_WITH_PTR_ARG_KERNARG_SEGMENT]], i64 0 |
| ; CHECK-NEXT: [[OUT_LOAD:%.*]] = load ptr addrspace(1), ptr addrspace(4) [[OUT_KERNARG_OFFSET]], align 16, !invariant.load [[META0]] |
| ; CHECK-NEXT: [[IN_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[ARGMEMONLY_CALL_WITH_PTR_ARG_KERNARG_SEGMENT]], i64 8 |
| ; CHECK-NEXT: [[IN_LOAD:%.*]] = load ptr addrspace(1), ptr addrspace(4) [[IN_KERNARG_OFFSET]], align 8, !invariant.load [[META0]] |
| ; CHECK-NEXT: [[LOAD:%.*]] = load i32, ptr addrspace(1) [[IN_LOAD]], align 4, !alias.scope [[META16:![0-9]+]], !noalias [[META19:![0-9]+]] |
| ; CHECK-NEXT: call void @argmemonly_with_ptr_arg(ptr addrspace(1) [[OUT_LOAD]]), !alias.scope [[META19]], !noalias [[META16]] |
| ; CHECK-NEXT: store i32 [[LOAD]], ptr addrspace(1) [[OUT_LOAD]], align 4, !alias.scope [[META19]], !noalias [[META16]] |
| ; CHECK-NEXT: ret void |
| ; |
| %load = load i32, ptr addrspace(1) %in, align 4 |
| call void @argmemonly_with_ptr_arg(ptr addrspace(1) %out) |
| store i32 %load, ptr addrspace(1) %out, align 4 |
| ret void |
| } |
| |
| attributes #0 = { nounwind } |
| attributes #1 = { nounwind memory(read) } |
| attributes #2 = { nounwind memory(none) } |
| attributes #3 = { nounwind memory(argmem: read) } |
| attributes #4 = { nounwind memory(argmem: readwrite) } |
| ;. |
| ; CHECK: [[META0]] = !{} |
| ; CHECK: [[META1]] = !{[[META2:![0-9]+]], [[META4:![0-9]+]]} |
| ; CHECK: [[META2]] = distinct !{[[META2]], [[META3:![0-9]+]], !"out"} |
| ; CHECK: [[META3]] = distinct !{[[META3]], !"call_without_ptr_args"} |
| ; CHECK: [[META4]] = distinct !{[[META4]], [[META3]], !"in"} |
| ; CHECK: [[META5]] = !{[[META4]]} |
| ; CHECK: [[META6]] = !{[[META2]]} |
| ; CHECK: [[META7]] = !{[[META8:![0-9]+]]} |
| ; CHECK: [[META8]] = distinct !{[[META8]], [[META9:![0-9]+]], !"out"} |
| ; CHECK: [[META9]] = distinct !{[[META9]], !"readnone_call_without_ptr_args"} |
| ; CHECK: [[META10]] = !{[[META11:![0-9]+]], [[META13:![0-9]+]]} |
| ; CHECK: [[META11]] = distinct !{[[META11]], [[META12:![0-9]+]], !"out"} |
| ; CHECK: [[META12]] = distinct !{[[META12]], !"argmemonly_call_without_ptr_args"} |
| ; CHECK: [[META13]] = distinct !{[[META13]], [[META12]], !"in"} |
| ; CHECK: [[META14]] = !{[[META13]]} |
| ; CHECK: [[META15]] = !{[[META11]]} |
| ; CHECK: [[META16]] = !{[[META17:![0-9]+]]} |
| ; CHECK: [[META17]] = distinct !{[[META17]], [[META18:![0-9]+]], !"in"} |
| ; CHECK: [[META18]] = distinct !{[[META18]], !"argmemonly_call_with_ptr_arg"} |
| ; CHECK: [[META19]] = !{[[META20:![0-9]+]]} |
| ; CHECK: [[META20]] = distinct !{[[META20]], [[META18]], !"out"} |
| ;. |