| ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -filetype=asm < %s | FileCheck %s --check-prefix=DEFAULT |
| ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -amdgpu-enable-object-linking -filetype=asm < %s | FileCheck %s --check-prefix=OL |
| |
| declare void @extern_callee() |
| |
| define void @calls_extern() { |
| call void @extern_callee() |
| ret void |
| } |
| |
| define void @calls_indirect(ptr %fptr) { |
| call void %fptr() |
| ret void |
| } |
| |
| define void @calls_local() { |
| ret void |
| } |
| |
| define amdgpu_kernel void @my_kernel(ptr %fptr) { |
| call void @calls_extern() |
| call void @calls_indirect(ptr %fptr) |
| call void @calls_local() |
| ret void |
| } |
| |
| ; COM: Default mode: direct-to-extern triggers the conservative "unknown |
| ; COM: callee" path. Register/stack-size symbols include the module-level |
| ; COM: sinks; boolean flags are all forced to 1; HasIndirectCall is set too |
| ; COM: (IsIndirect covers calls to declarations). |
| ; DEFAULT: .set .Lcalls_extern.num_vgpr, max({{[0-9]+}}, amdgpu.max_num_vgpr) |
| ; DEFAULT: .set .Lcalls_extern.num_agpr, max({{[0-9]+}}, amdgpu.max_num_agpr) |
| ; DEFAULT: .set .Lcalls_extern.numbered_sgpr, max({{[0-9]+}}, amdgpu.max_num_sgpr) |
| ; DEFAULT: .set .Lcalls_extern.num_named_barrier, max({{[0-9]+}}, amdgpu.max_num_named_barrier) |
| ; DEFAULT: .set .Lcalls_extern.uses_vcc, 1 |
| ; DEFAULT: .set .Lcalls_extern.uses_flat_scratch, 1 |
| ; DEFAULT: .set .Lcalls_extern.has_dyn_sized_stack, 1 |
| ; DEFAULT: .set .Lcalls_extern.has_recursion, 1 |
| ; DEFAULT: .set .Lcalls_extern.has_indirect_call, 1 |
| |
| ; COM: Object linking: the same function reports only its own local usage. |
| ; COM: The sinks drop out of the register/stack-size expressions and the |
| ; COM: pessimized boolean flags collapse to the true local values (UsesVCC is |
| ; COM: still 1 here because the call-site lowering on gfx900 genuinely uses |
| ; COM: VCC). |
| ; OL: .set .Lcalls_extern.num_vgpr, {{[0-9]+}} |
| ; OL: .set .Lcalls_extern.num_agpr, {{[0-9]+}} |
| ; OL: .set .Lcalls_extern.numbered_sgpr, {{[0-9]+}} |
| ; OL: .set .Lcalls_extern.num_named_barrier, {{[0-9]+}} |
| ; OL: .set .Lcalls_extern.uses_vcc, 1 |
| ; OL: .set .Lcalls_extern.uses_flat_scratch, 0 |
| ; OL: .set .Lcalls_extern.has_dyn_sized_stack, 0 |
| ; OL: .set .Lcalls_extern.has_recursion, 0 |
| ; OL: .set .Lcalls_extern.has_indirect_call, 1 |
| |
| ; COM: True indirect call: same DEFAULT-vs-OL behavior as the direct-to-extern |
| ; COM: case above. In DEFAULT mode all the flags are pessimized; with object |
| ; COM: linking only HasIndirectCall is preserved (the linker sees the call |
| ; COM: site's typeid and address-taken set and handles propagation). |
| ; DEFAULT: .set .Lcalls_indirect.uses_vcc, 1 |
| ; DEFAULT: .set .Lcalls_indirect.uses_flat_scratch, 1 |
| ; DEFAULT: .set .Lcalls_indirect.has_dyn_sized_stack, 1 |
| ; DEFAULT: .set .Lcalls_indirect.has_recursion, 1 |
| ; DEFAULT: .set .Lcalls_indirect.has_indirect_call, 1 |
| |
| ; OL: .set .Lcalls_indirect.uses_vcc, 1 |
| ; OL: .set .Lcalls_indirect.uses_flat_scratch, 0 |
| ; OL: .set .Lcalls_indirect.has_dyn_sized_stack, 0 |
| ; OL: .set .Lcalls_indirect.has_recursion, 0 |
| ; OL: .set .Lcalls_indirect.has_indirect_call, 1 |
| |
| ; COM: Baseline: a function that makes no calls outside itself reports the |
| ; COM: same all-zero local flags in both modes. |
| ; DEFAULT: .set .Lcalls_local.uses_vcc, 0 |
| ; DEFAULT: .set .Lcalls_local.uses_flat_scratch, 0 |
| ; DEFAULT: .set .Lcalls_local.has_dyn_sized_stack, 0 |
| ; DEFAULT: .set .Lcalls_local.has_recursion, 0 |
| ; DEFAULT: .set .Lcalls_local.has_indirect_call, 0 |
| |
| ; OL: .set .Lcalls_local.uses_vcc, 0 |
| ; OL: .set .Lcalls_local.uses_flat_scratch, 0 |
| ; OL: .set .Lcalls_local.has_dyn_sized_stack, 0 |
| ; OL: .set .Lcalls_local.has_recursion, 0 |
| ; OL: .set .Lcalls_local.has_indirect_call, 0 |
| |
| ; COM: Kernel side of the DEFAULT-vs-OL comparison. DEFAULT mode emits |
| ; COM: call-graph-propagation expressions (max()/or() over every callee's |
| ; COM: symbols) so the kernel picks up its callees' pessimized values; object |
| ; COM: linking emits concrete literals and leaves cross-TU aggregation to the |
| ; COM: linker. |
| ; DEFAULT: .set .Lmy_kernel.num_vgpr, max({{[0-9]+}}, .Lcalls_extern.num_vgpr, .Lcalls_indirect.num_vgpr, .Lcalls_local.num_vgpr) |
| ; DEFAULT: .set .Lmy_kernel.num_agpr, max({{[0-9]+}}, .Lcalls_extern.num_agpr, .Lcalls_indirect.num_agpr, .Lcalls_local.num_agpr) |
| ; DEFAULT: .set .Lmy_kernel.num_named_barrier, max({{[0-9]+}}, .Lcalls_extern.num_named_barrier, .Lcalls_indirect.num_named_barrier, .Lcalls_local.num_named_barrier) |
| ; DEFAULT: .set .Lmy_kernel.private_seg_size, {{[0-9]+}}+max(.Lcalls_extern.private_seg_size, .Lcalls_indirect.private_seg_size, .Lcalls_local.private_seg_size) |
| ; DEFAULT: .set .Lmy_kernel.uses_vcc, or({{[0-9]+}}, .Lcalls_extern.uses_vcc, .Lcalls_indirect.uses_vcc, .Lcalls_local.uses_vcc) |
| ; DEFAULT: .set .Lmy_kernel.uses_flat_scratch, or({{[0-9]+}}, .Lcalls_extern.uses_flat_scratch, .Lcalls_indirect.uses_flat_scratch, .Lcalls_local.uses_flat_scratch) |
| ; DEFAULT: .set .Lmy_kernel.has_dyn_sized_stack, or({{[0-9]+}}, .Lcalls_extern.has_dyn_sized_stack, .Lcalls_indirect.has_dyn_sized_stack, .Lcalls_local.has_dyn_sized_stack) |
| ; DEFAULT: .set .Lmy_kernel.has_recursion, or({{[0-9]+}}, .Lcalls_extern.has_recursion, .Lcalls_indirect.has_recursion, .Lcalls_local.has_recursion) |
| ; DEFAULT: .set .Lmy_kernel.has_indirect_call, or({{[0-9]+}}, .Lcalls_extern.has_indirect_call, .Lcalls_indirect.has_indirect_call, .Lcalls_local.has_indirect_call) |
| |
| ; OL: .set .Lmy_kernel.num_vgpr, {{[0-9]+}} |
| ; OL: .set .Lmy_kernel.num_agpr, {{[0-9]+}} |
| ; OL: .set .Lmy_kernel.num_named_barrier, {{[0-9]+}} |
| ; OL: .set .Lmy_kernel.private_seg_size, {{[0-9]+}} |
| ; OL: .set .Lmy_kernel.uses_vcc, {{[01]}} |
| ; OL: .set .Lmy_kernel.uses_flat_scratch, {{[01]}} |
| ; OL: .set .Lmy_kernel.has_dyn_sized_stack, 0 |
| ; OL: .set .Lmy_kernel.has_recursion, 0 |
| ; OL: .set .Lmy_kernel.has_indirect_call, 0 |