blob: 95214bcf7c06de34ad73bcfffceb9addc6064ade [file]
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -filetype=asm < %s | FileCheck %s --check-prefix=DEFAULT
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -amdgpu-enable-object-linking -filetype=asm < %s | FileCheck %s --check-prefix=OL
declare void @extern_callee()
define void @calls_extern() {
call void @extern_callee()
ret void
}
define void @calls_indirect(ptr %fptr) {
call void %fptr()
ret void
}
define void @calls_local() {
ret void
}
define amdgpu_kernel void @my_kernel(ptr %fptr) {
call void @calls_extern()
call void @calls_indirect(ptr %fptr)
call void @calls_local()
ret void
}
; COM: Default mode: direct-to-extern triggers the conservative "unknown
; COM: callee" path. Register/stack-size symbols include the module-level
; COM: sinks; boolean flags are all forced to 1; HasIndirectCall is set too
; COM: (IsIndirect covers calls to declarations).
; DEFAULT: .set .Lcalls_extern.num_vgpr, max({{[0-9]+}}, amdgpu.max_num_vgpr)
; DEFAULT: .set .Lcalls_extern.num_agpr, max({{[0-9]+}}, amdgpu.max_num_agpr)
; DEFAULT: .set .Lcalls_extern.numbered_sgpr, max({{[0-9]+}}, amdgpu.max_num_sgpr)
; DEFAULT: .set .Lcalls_extern.num_named_barrier, max({{[0-9]+}}, amdgpu.max_num_named_barrier)
; DEFAULT: .set .Lcalls_extern.uses_vcc, 1
; DEFAULT: .set .Lcalls_extern.uses_flat_scratch, 1
; DEFAULT: .set .Lcalls_extern.has_dyn_sized_stack, 1
; DEFAULT: .set .Lcalls_extern.has_recursion, 1
; DEFAULT: .set .Lcalls_extern.has_indirect_call, 1
; COM: Object linking: the same function reports only its own local usage.
; COM: The sinks drop out of the register/stack-size expressions and the
; COM: pessimized boolean flags collapse to the true local values (UsesVCC is
; COM: still 1 here because the call-site lowering on gfx900 genuinely uses
; COM: VCC).
; OL: .set .Lcalls_extern.num_vgpr, {{[0-9]+}}
; OL: .set .Lcalls_extern.num_agpr, {{[0-9]+}}
; OL: .set .Lcalls_extern.numbered_sgpr, {{[0-9]+}}
; OL: .set .Lcalls_extern.num_named_barrier, {{[0-9]+}}
; OL: .set .Lcalls_extern.uses_vcc, 1
; OL: .set .Lcalls_extern.uses_flat_scratch, 0
; OL: .set .Lcalls_extern.has_dyn_sized_stack, 0
; OL: .set .Lcalls_extern.has_recursion, 0
; OL: .set .Lcalls_extern.has_indirect_call, 1
; COM: True indirect call: same DEFAULT-vs-OL behavior as the direct-to-extern
; COM: case above. In DEFAULT mode all the flags are pessimized; with object
; COM: linking only HasIndirectCall is preserved (the linker sees the call
; COM: site's typeid and address-taken set and handles propagation).
; DEFAULT: .set .Lcalls_indirect.uses_vcc, 1
; DEFAULT: .set .Lcalls_indirect.uses_flat_scratch, 1
; DEFAULT: .set .Lcalls_indirect.has_dyn_sized_stack, 1
; DEFAULT: .set .Lcalls_indirect.has_recursion, 1
; DEFAULT: .set .Lcalls_indirect.has_indirect_call, 1
; OL: .set .Lcalls_indirect.uses_vcc, 1
; OL: .set .Lcalls_indirect.uses_flat_scratch, 0
; OL: .set .Lcalls_indirect.has_dyn_sized_stack, 0
; OL: .set .Lcalls_indirect.has_recursion, 0
; OL: .set .Lcalls_indirect.has_indirect_call, 1
; COM: Baseline: a function that makes no calls outside itself reports the
; COM: same all-zero local flags in both modes.
; DEFAULT: .set .Lcalls_local.uses_vcc, 0
; DEFAULT: .set .Lcalls_local.uses_flat_scratch, 0
; DEFAULT: .set .Lcalls_local.has_dyn_sized_stack, 0
; DEFAULT: .set .Lcalls_local.has_recursion, 0
; DEFAULT: .set .Lcalls_local.has_indirect_call, 0
; OL: .set .Lcalls_local.uses_vcc, 0
; OL: .set .Lcalls_local.uses_flat_scratch, 0
; OL: .set .Lcalls_local.has_dyn_sized_stack, 0
; OL: .set .Lcalls_local.has_recursion, 0
; OL: .set .Lcalls_local.has_indirect_call, 0
; COM: Kernel side of the DEFAULT-vs-OL comparison. DEFAULT mode emits
; COM: call-graph-propagation expressions (max()/or() over every callee's
; COM: symbols) so the kernel picks up its callees' pessimized values; object
; COM: linking emits concrete literals and leaves cross-TU aggregation to the
; COM: linker.
; DEFAULT: .set .Lmy_kernel.num_vgpr, max({{[0-9]+}}, .Lcalls_extern.num_vgpr, .Lcalls_indirect.num_vgpr, .Lcalls_local.num_vgpr)
; DEFAULT: .set .Lmy_kernel.num_agpr, max({{[0-9]+}}, .Lcalls_extern.num_agpr, .Lcalls_indirect.num_agpr, .Lcalls_local.num_agpr)
; DEFAULT: .set .Lmy_kernel.num_named_barrier, max({{[0-9]+}}, .Lcalls_extern.num_named_barrier, .Lcalls_indirect.num_named_barrier, .Lcalls_local.num_named_barrier)
; DEFAULT: .set .Lmy_kernel.private_seg_size, {{[0-9]+}}+max(.Lcalls_extern.private_seg_size, .Lcalls_indirect.private_seg_size, .Lcalls_local.private_seg_size)
; DEFAULT: .set .Lmy_kernel.uses_vcc, or({{[0-9]+}}, .Lcalls_extern.uses_vcc, .Lcalls_indirect.uses_vcc, .Lcalls_local.uses_vcc)
; DEFAULT: .set .Lmy_kernel.uses_flat_scratch, or({{[0-9]+}}, .Lcalls_extern.uses_flat_scratch, .Lcalls_indirect.uses_flat_scratch, .Lcalls_local.uses_flat_scratch)
; DEFAULT: .set .Lmy_kernel.has_dyn_sized_stack, or({{[0-9]+}}, .Lcalls_extern.has_dyn_sized_stack, .Lcalls_indirect.has_dyn_sized_stack, .Lcalls_local.has_dyn_sized_stack)
; DEFAULT: .set .Lmy_kernel.has_recursion, or({{[0-9]+}}, .Lcalls_extern.has_recursion, .Lcalls_indirect.has_recursion, .Lcalls_local.has_recursion)
; DEFAULT: .set .Lmy_kernel.has_indirect_call, or({{[0-9]+}}, .Lcalls_extern.has_indirect_call, .Lcalls_indirect.has_indirect_call, .Lcalls_local.has_indirect_call)
; OL: .set .Lmy_kernel.num_vgpr, {{[0-9]+}}
; OL: .set .Lmy_kernel.num_agpr, {{[0-9]+}}
; OL: .set .Lmy_kernel.num_named_barrier, {{[0-9]+}}
; OL: .set .Lmy_kernel.private_seg_size, {{[0-9]+}}
; OL: .set .Lmy_kernel.uses_vcc, {{[01]}}
; OL: .set .Lmy_kernel.uses_flat_scratch, {{[01]}}
; OL: .set .Lmy_kernel.has_dyn_sized_stack, 0
; OL: .set .Lmy_kernel.has_recursion, 0
; OL: .set .Lmy_kernel.has_indirect_call, 0