| ; Note: uses a randomly selected assumed external call stack size so that the |
| ; test assertions are unlikely to succeed by accident. |
| |
| ; RUN: llc -amdgpu-assume-external-call-stack-size=5310 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 -enable-misched=0 -filetype=asm -o - < %s | FileCheck --check-prefixes CHECK,GFX7 %s |
| ; RUN: llc -amdgpu-assume-external-call-stack-size=5310 -mattr=-xnack -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 -enable-misched=0 -filetype=asm -o - < %s | FileCheck --check-prefixes CHECK,GFX8 %s |
| ; RUN: llc -amdgpu-assume-external-call-stack-size=5310 -mattr=-xnack -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -enable-misched=0 -filetype=asm -o - < %s | FileCheck --check-prefixes CHECK,GFX9 %s |
| ; RUN: llc -amdgpu-assume-external-call-stack-size=5310 -mattr=-xnack -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -enable-misched=0 -filetype=asm -o - < %s | FileCheck --check-prefixes CHECK,GFX10 %s |
| |
| ; CHECK-LABEL: amdhsa.kernels |
| |
| ; test a kernel without an external call that occurs before its callee in the module |
| ; CHECK-LABEL: test1 |
| ; CHECK: .private_segment_fixed_size: 20 |
| |
| ; GFX7: .sgpr_count: 37 |
| ; GFX7: .sgpr_spill_count: 0 |
| ; GFX7: .vgpr_count: 4 |
| ; GFX7: .vgpr_spill_count: 0 |
| |
| ; GFX8: .sgpr_count: 39 |
| ; GFX8: .sgpr_spill_count: 0 |
| ; GFX8: .vgpr_count: 4 |
| ; GFX8: .vgpr_spill_count: 0 |
| |
| ; GFX9: .sgpr_count: 39 |
| ; GFX9: .sgpr_spill_count: 0 |
| ; GFX9: .vgpr_count: 4 |
| ; GFX9: .vgpr_spill_count: 0 |
| |
| ; GFX10: .sgpr_count: 33 |
| ; GFX10: .sgpr_spill_count: 0 |
| ; GFX10: .vgpr_count: 4 |
| ; GFX10: .vgpr_spill_count: 0 |
| define amdgpu_kernel void @test1(ptr %x) { |
| %1 = load volatile float, ptr %x |
| %2 = call float @f(float %1) |
| store volatile float %2, ptr %x |
| ret void |
| } |
| |
| define internal float @f(float %arg0) #0 { |
| %stack = alloca float, i32 4, align 4, addrspace(5) |
| store volatile float 3.0, ptr addrspace(5) %stack |
| %val = load volatile float, ptr addrspace(5) %stack |
| %add = fadd float %arg0, %val |
| ret float %add |
| } |
| |
| ; test a kernel without an external call that occurs after its callee in the module |
| ; CHECK-LABEL: test2 |
| ; CHECK: .private_segment_fixed_size: 20 |
| |
| ; GFX7: .sgpr_count: 37 |
| ; GFX7: .sgpr_spill_count: 0 |
| ; GFX7: .vgpr_count: 4 |
| ; GFX7: .vgpr_spill_count: 0 |
| |
| ; GFX8: .sgpr_count: 39 |
| ; GFX8: .sgpr_spill_count: 0 |
| ; GFX8: .vgpr_count: 4 |
| ; GFX8: .vgpr_spill_count: 0 |
| |
| ; GFX9: .sgpr_count: 39 |
| ; GFX9: .sgpr_spill_count: 0 |
| ; GFX9: .vgpr_count: 4 |
| ; GFX9: .vgpr_spill_count: 0 |
| |
| ; GFX10: .sgpr_count: 33 |
| ; GFX10: .sgpr_spill_count: 0 |
| ; GFX10: .vgpr_count: 4 |
| ; GFX10: .vgpr_spill_count: 0 |
| define amdgpu_kernel void @test2(ptr %x) { |
| %1 = load volatile float, ptr %x |
| %2 = call float @f(float %1) |
| store volatile float %2, ptr %x |
| ret void |
| } |
| |
| ; test a kernel with an external call that occurs before its callee in the module |
| ; CHECK-LABEL: test3 |
| ; CHECK: .private_segment_fixed_size: 5310 |
| |
| ; GFX7: .sgpr_count: 37 |
| ; GFX7: .sgpr_spill_count: 0 |
| ; GFX7: .vgpr_count: 32 |
| ; GFX7: .vgpr_spill_count: 0 |
| |
| ; GFX8: .sgpr_count: 39 |
| ; GFX8: .sgpr_spill_count: 0 |
| ; GFX8: .vgpr_count: 32 |
| ; GFX8: .vgpr_spill_count: 0 |
| |
| ; GFX9: .sgpr_count: 39 |
| ; GFX9: .sgpr_spill_count: 0 |
| ; GFX9: .vgpr_count: 32 |
| ; GFX9: .vgpr_spill_count: 0 |
| |
| ; GFX10: .sgpr_count: 35 |
| ; GFX10: .sgpr_spill_count: 0 |
| ; GFX10: .vgpr_count: 32 |
| ; GFX10: .vgpr_spill_count: 0 |
| define amdgpu_kernel void @test3() { |
| call void @g() |
| ret void |
| } |
| |
| declare void @g() #0 |
| |
| ; test a kernel without an external call that occurs after its callee in the module |
| ; CHECK-LABEL: test4 |
| ; CHECK: .private_segment_fixed_size: 5310 |
| |
| ; GFX7: .sgpr_count: 37 |
| ; GFX7: .sgpr_spill_count: 0 |
| ; GFX7: .vgpr_count: 32 |
| ; GFX7: .vgpr_spill_count: 0 |
| |
| ; GFX8: .sgpr_count: 39 |
| ; GFX8: .sgpr_spill_count: 0 |
| ; GFX8: .vgpr_count: 32 |
| ; GFX8: .vgpr_spill_count: 0 |
| |
| ; GFX9: .sgpr_count: 39 |
| ; GFX9: .sgpr_spill_count: 0 |
| ; GFX9: .vgpr_count: 32 |
| ; GFX9: .vgpr_spill_count: 0 |
| |
| ; GFX10: .sgpr_count: 35 |
| ; GFX10: .sgpr_spill_count: 0 |
| ; GFX10: .vgpr_count: 32 |
| ; GFX10: .vgpr_spill_count: 0 |
| define amdgpu_kernel void @test4() { |
| call void @g() |
| ret void |
| } |
| |
| attributes #0 = { norecurse } |
| |
| !llvm.module.flags = !{!0} |
| !0 = !{i32 1, !"amdgpu_code_object_version", i32 400} |