| ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-attributes --check-globals all --version 5 |
| ; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -passes=amdgpu-attributor < %s | FileCheck -check-prefixes=GFX9 %s |
| ; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -passes=amdgpu-attributor < %s | FileCheck -check-prefixes=GFX10 %s |
| |
| ; |
| ; None of these functions should have the attribute amdgpu-no-flat-scratch-init. In these tests |
| ; we manually set the attribute for the functions. The purpose is to test how the amdgpu-attributor pass |
| ; handles this situation. |
| ; |
| ;; tests of addrspacecast |
| |
| define void @with_private_to_flat_addrspacecast(ptr addrspace(5) %ptr) #0 { |
| ; GFX9-LABEL: define void @with_private_to_flat_addrspacecast( |
| ; GFX9-SAME: ptr addrspace(5) [[PTR:%.*]]) #[[ATTR0:[0-9]+]] { |
| ; GFX9-NEXT: [[STOF:%.*]] = addrspacecast ptr addrspace(5) [[PTR]] to ptr |
| ; GFX9-NEXT: store volatile i32 0, ptr [[STOF]], align 4, !noalias.addrspace [[META0:![0-9]+]] |
| ; GFX9-NEXT: ret void |
| ; |
| ; GFX10-LABEL: define void @with_private_to_flat_addrspacecast( |
| ; GFX10-SAME: ptr addrspace(5) [[PTR:%.*]]) #[[ATTR0:[0-9]+]] { |
| ; GFX10-NEXT: [[STOF:%.*]] = addrspacecast ptr addrspace(5) [[PTR]] to ptr |
| ; GFX10-NEXT: store volatile i32 0, ptr [[STOF]], align 4, !noalias.addrspace [[META0:![0-9]+]] |
| ; GFX10-NEXT: ret void |
| ; |
| %stof = addrspacecast ptr addrspace(5) %ptr to ptr |
| store volatile i32 0, ptr %stof |
| ret void |
| } |
| |
| define amdgpu_kernel void @with_private_to_flat_addrspacecast_cc_kernel(ptr addrspace(5) %ptr) #0 { |
| ; GFX9-LABEL: define amdgpu_kernel void @with_private_to_flat_addrspacecast_cc_kernel( |
| ; GFX9-SAME: ptr addrspace(5) [[PTR:%.*]]) #[[ATTR0]] { |
| ; GFX9-NEXT: [[STOF:%.*]] = addrspacecast ptr addrspace(5) [[PTR]] to ptr |
| ; GFX9-NEXT: store volatile i32 0, ptr [[STOF]], align 4, !noalias.addrspace [[META0]] |
| ; GFX9-NEXT: ret void |
| ; |
| ; GFX10-LABEL: define amdgpu_kernel void @with_private_to_flat_addrspacecast_cc_kernel( |
| ; GFX10-SAME: ptr addrspace(5) [[PTR:%.*]]) #[[ATTR0]] { |
| ; GFX10-NEXT: [[STOF:%.*]] = addrspacecast ptr addrspace(5) [[PTR]] to ptr |
| ; GFX10-NEXT: store volatile i32 0, ptr [[STOF]], align 4, !noalias.addrspace [[META0]] |
| ; GFX10-NEXT: ret void |
| ; |
| %stof = addrspacecast ptr addrspace(5) %ptr to ptr |
| store volatile i32 0, ptr %stof |
| ret void |
| } |
| |
| define void @call_with_private_to_flat_addrspacecast(ptr addrspace(5) %ptr) #0 { |
| ; GFX9-LABEL: define void @call_with_private_to_flat_addrspacecast( |
| ; GFX9-SAME: ptr addrspace(5) [[PTR:%.*]]) #[[ATTR0]] { |
| ; GFX9-NEXT: call void @with_private_to_flat_addrspacecast(ptr addrspace(5) [[PTR]]) |
| ; GFX9-NEXT: ret void |
| ; |
| ; GFX10-LABEL: define void @call_with_private_to_flat_addrspacecast( |
| ; GFX10-SAME: ptr addrspace(5) [[PTR:%.*]]) #[[ATTR0]] { |
| ; GFX10-NEXT: call void @with_private_to_flat_addrspacecast(ptr addrspace(5) [[PTR]]) |
| ; GFX10-NEXT: ret void |
| ; |
| call void @with_private_to_flat_addrspacecast(ptr addrspace(5) %ptr) |
| ret void |
| } |
| |
| define amdgpu_kernel void @call_with_private_to_flat_addrspacecast_cc_kernel(ptr addrspace(5) %ptr) #0 { |
| ; GFX9-LABEL: define amdgpu_kernel void @call_with_private_to_flat_addrspacecast_cc_kernel( |
| ; GFX9-SAME: ptr addrspace(5) [[PTR:%.*]]) #[[ATTR0]] { |
| ; GFX9-NEXT: call void @with_private_to_flat_addrspacecast(ptr addrspace(5) [[PTR]]) |
| ; GFX9-NEXT: ret void |
| ; |
| ; GFX10-LABEL: define amdgpu_kernel void @call_with_private_to_flat_addrspacecast_cc_kernel( |
| ; GFX10-SAME: ptr addrspace(5) [[PTR:%.*]]) #[[ATTR0]] { |
| ; GFX10-NEXT: call void @with_private_to_flat_addrspacecast(ptr addrspace(5) [[PTR]]) |
| ; GFX10-NEXT: ret void |
| ; |
| call void @with_private_to_flat_addrspacecast(ptr addrspace(5) %ptr) |
| ret void |
| } |
| |
| ;; tests of addrspacecast in a constant |
| |
| define amdgpu_kernel void @private_constant_expression_use(ptr addrspace(1) nocapture %out) #0 { |
| ; GFX9-LABEL: define amdgpu_kernel void @private_constant_expression_use( |
| ; GFX9-SAME: ptr addrspace(1) captures(none) [[OUT:%.*]]) #[[ATTR0]] { |
| ; GFX9-NEXT: store volatile ptr addrspacecast (ptr addrspace(5) inttoptr (i32 123 to ptr addrspace(5)) to ptr), ptr addrspace(1) [[OUT]], align 8 |
| ; GFX9-NEXT: ret void |
| ; |
| ; GFX10-LABEL: define amdgpu_kernel void @private_constant_expression_use( |
| ; GFX10-SAME: ptr addrspace(1) captures(none) [[OUT:%.*]]) #[[ATTR0]] { |
| ; GFX10-NEXT: store volatile ptr addrspacecast (ptr addrspace(5) inttoptr (i32 123 to ptr addrspace(5)) to ptr), ptr addrspace(1) [[OUT]], align 8 |
| ; GFX10-NEXT: ret void |
| ; |
| store volatile ptr addrspacecast (ptr addrspace(5) inttoptr (i32 123 to ptr addrspace(5)) to ptr), ptr addrspace(1) %out, align 8 |
| ret void |
| } |
| |
| ;; tests of intrinsics |
| |
| define amdgpu_kernel void @calls_intrin_ascast_cc_kernel(ptr addrspace(3) %ptr) #0 { |
| ; GFX9-LABEL: define amdgpu_kernel void @calls_intrin_ascast_cc_kernel( |
| ; GFX9-SAME: ptr addrspace(3) [[PTR:%.*]]) #[[ATTR0]] { |
| ; GFX9-NEXT: [[TMP1:%.*]] = call ptr @llvm.amdgcn.addrspacecast.nonnull.p0.p3(ptr addrspace(3) [[PTR]]) |
| ; GFX9-NEXT: store volatile i32 7, ptr [[TMP1]], align 4 |
| ; GFX9-NEXT: ret void |
| ; |
| ; GFX10-LABEL: define amdgpu_kernel void @calls_intrin_ascast_cc_kernel( |
| ; GFX10-SAME: ptr addrspace(3) [[PTR:%.*]]) #[[ATTR0]] { |
| ; GFX10-NEXT: [[TMP1:%.*]] = call ptr @llvm.amdgcn.addrspacecast.nonnull.p0.p3(ptr addrspace(3) [[PTR]]) |
| ; GFX10-NEXT: store volatile i32 7, ptr [[TMP1]], align 4 |
| ; GFX10-NEXT: ret void |
| ; |
| %1 = call ptr @llvm.amdgcn.addrspacecast.nonnull.p0.p3(ptr addrspace(3) %ptr) |
| store volatile i32 7, ptr %1, align 4 |
| ret void |
| } |
| |
| define void @calls_intrin_ascast(ptr addrspace(3) %ptr) #0 { |
| ; GFX9-LABEL: define void @calls_intrin_ascast( |
| ; GFX9-SAME: ptr addrspace(3) [[PTR:%.*]]) #[[ATTR0]] { |
| ; GFX9-NEXT: [[TMP1:%.*]] = call ptr @llvm.amdgcn.addrspacecast.nonnull.p0.p3(ptr addrspace(3) [[PTR]]) |
| ; GFX9-NEXT: store volatile i32 7, ptr [[TMP1]], align 4 |
| ; GFX9-NEXT: ret void |
| ; |
| ; GFX10-LABEL: define void @calls_intrin_ascast( |
| ; GFX10-SAME: ptr addrspace(3) [[PTR:%.*]]) #[[ATTR0]] { |
| ; GFX10-NEXT: [[TMP1:%.*]] = call ptr @llvm.amdgcn.addrspacecast.nonnull.p0.p3(ptr addrspace(3) [[PTR]]) |
| ; GFX10-NEXT: store volatile i32 7, ptr [[TMP1]], align 4 |
| ; GFX10-NEXT: ret void |
| ; |
| %1 = call ptr @llvm.amdgcn.addrspacecast.nonnull.p0.p3(ptr addrspace(3) %ptr) |
| store volatile i32 7, ptr %1, align 4 |
| ret void |
| } |
| |
| define amdgpu_kernel void @call_calls_intrin_ascast_cc_kernel(ptr addrspace(3) %ptr) #0 { |
| ; GFX9-LABEL: define amdgpu_kernel void @call_calls_intrin_ascast_cc_kernel( |
| ; GFX9-SAME: ptr addrspace(3) [[PTR:%.*]]) #[[ATTR0]] { |
| ; GFX9-NEXT: call void @calls_intrin_ascast(ptr addrspace(3) [[PTR]]) |
| ; GFX9-NEXT: ret void |
| ; |
| ; GFX10-LABEL: define amdgpu_kernel void @call_calls_intrin_ascast_cc_kernel( |
| ; GFX10-SAME: ptr addrspace(3) [[PTR:%.*]]) #[[ATTR0]] { |
| ; GFX10-NEXT: call void @calls_intrin_ascast(ptr addrspace(3) [[PTR]]) |
| ; GFX10-NEXT: ret void |
| ; |
| call void @calls_intrin_ascast(ptr addrspace(3) %ptr) |
| ret void |
| } |
| |
| attributes #0 = { "amdgpu-no-flat-scratch-init" } |
| ;. |
| ; GFX9: attributes #[[ATTR0]] = { "amdgpu-agpr-alloc"="0" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx900" "uniform-work-group-size"="false" } |
| ; GFX9: attributes #[[ATTR1:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) "target-cpu"="gfx900" } |
| ;. |
| ; GFX10: attributes #[[ATTR0]] = { "amdgpu-agpr-alloc"="0" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx1010" "uniform-work-group-size"="false" } |
| ; GFX10: attributes #[[ATTR1:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) "target-cpu"="gfx1010" } |
| ;. |
| ; GFX9: [[META0]] = !{i32 1, i32 5, i32 6, i32 10} |
| ;. |
| ; GFX10: [[META0]] = !{i32 1, i32 5, i32 6, i32 10} |
| ;. |