blob: 7b01eec2698c3cbe074460ad5333cce0d74d8994 [file] [edit]
; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx1200 < %s | FileCheck -check-prefix=DVGPR %s
; RUN: sed 's/"amdgpu-dynamic-vgpr-block-size"="16"/"amdgpu-dynamic-vgpr-block-size"="0"/' %s \
; RUN: | llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx1200 | FileCheck -check-prefix=NODVGPR %s
; Test a set of functions with mixed use of DVGPR (enabled or disabled). Also
; test chain functions directly calling other chain functions without going
; through `first_retry_wrapper` helper. A chain function will not propagate VGPR
; counts to a directly called chain function, regardless of whether either has
; DVGPRs enabled or disabled.
; func.2 and func.4 have DVGPRs disabled
; DVGPR: .set .Lgfx_func_a.num_vgpr, 40
; DVGPR: .set .Lamdgpu_cs_main.num_vgpr, max(42, .Lgfx_func_a.num_vgpr)
; DVGPR: .set .Lfunc.0.num_vgpr, max(11, .Lgfx_func_a.num_vgpr)
; DVGPR: .set .Lfunc.1.num_vgpr, 82
; DVGPR: .set .Lfunc.2.num_vgpr, max(11, amdgpu.max_num_vgpr)
; DVGPR: .set .Lfunc.3.num_vgpr, 11
; DVGPR: .set .Lfunc.4.num_vgpr, max(82, amdgpu.max_num_vgpr)
; DVGPR: .set .Lretry_vgpr_alloc.num_vgpr, 11
; DVGPR: .set .Lfirst_retry_wrapper.num_vgpr, 11
; DVGPR: .set amdgpu.max_num_vgpr, 82
; NODVGPR: .set .Lgfx_func_a.num_vgpr, 40
; NODVGPR: .set .Lamdgpu_cs_main.num_vgpr, max(42, amdgpu.max_num_vgpr)
; NODVGPR: .set .Lfunc.0.num_vgpr, max(11, amdgpu.max_num_vgpr)
; NODVGPR: .set .Lfunc.1.num_vgpr, max(82, amdgpu.max_num_vgpr)
; NODVGPR: .set .Lfunc.2.num_vgpr, max(11, amdgpu.max_num_vgpr)
; NODVGPR: .set .Lfunc.3.num_vgpr, max(11, amdgpu.max_num_vgpr)
; NODVGPR: .set .Lfunc.4.num_vgpr, max(82, amdgpu.max_num_vgpr)
; NODVGPR: .set .Lretry_vgpr_alloc.num_vgpr, max(11, amdgpu.max_num_vgpr)
; NODVGPR: .set .Lfirst_retry_wrapper.num_vgpr, max(11, amdgpu.max_num_vgpr)
; NODVGPR: .set amdgpu.max_num_vgpr, 82
; DVGPR: - .hardware_stages:
; DVGPR: .vgpr_count: 0x2a
; DVGPR: .shader_functions:
; DVGPR: func.0:
; DVGPR: .vgpr_count: 0x28
; DVGPR: func.1:
; DVGPR: .vgpr_count: 0x52
; DVGPR: func.2:
; DVGPR: .vgpr_count: 0x52
; DVGPR: func.3:
; DVGPR: .vgpr_count: 0xb
; DVGPR: func.4:
; DVGPR: .vgpr_count: 0x52
; DVGPR: gfx_func_a:
; DVGPR: .vgpr_count: 0x28
; NODVGPR: - .hardware_stages:
; NODVGPR: .vgpr_count: 0x52
; NODVGPR: .shader_functions:
; NODVGPR: func.0:
; NODVGPR: .vgpr_count: 0x52
; NODVGPR: func.1:
; NODVGPR: .vgpr_count: 0x52
; NODVGPR: func.2:
; NODVGPR: .vgpr_count: 0x52
; NODVGPR: func.3:
; NODVGPR: .vgpr_count: 0x52
; NODVGPR: func.4:
; NODVGPR: .vgpr_count: 0x52
; NODVGPR: gfx_func_a:
; NODVGPR: .vgpr_count: 0x28
define amdgpu_gfx void @gfx_func_a() #0 {
call void asm sideeffect "", "~{v0},~{v1},~{v2},~{v3},~{v4},~{v5},~{v6},~{v7},~{v8},~{v9},~{v10},~{v11},~{v12},~{v13},~{v14},~{v15},~{v16},~{v17},~{v18},~{v19},~{v20},~{v21},~{v22},~{v23},~{v24},~{v25},~{v26},~{v27},~{v28},~{v29},~{v30},~{v31},~{v32},~{v33},~{v34},~{v35},~{v36},~{v37},~{v38},~{v39}"()
ret void
}
define amdgpu_cs void @amdgpu_cs_main(<3 x i32> inreg %sgprs, <3 x i32> %vgprs) #0 {
%fptr = load ptr, ptr inttoptr(i64 0 to ptr)
call amdgpu_gfx void @gfx_func_a()
call void(ptr, i32, <3 x i32>, <3 x i32>, i32, ...) @llvm.amdgcn.cs.chain.v3i32(ptr inreg %fptr, i32 inreg 0, <3 x i32> inreg %sgprs, <3 x i32> zeroinitializer, i32 1, i32 0, i32 -1, ptr @func.1)
unreachable
}
define amdgpu_cs_chain void @func.0(<3 x i32> inreg %sgprs, <3 x i32> %vgprs) #0 {
call amdgpu_gfx void @gfx_func_a()
%fptr = load ptr, ptr inttoptr(i64 0 to ptr)
call void(ptr, i32, <3 x i32>, <3 x i32>, i32, ...) @llvm.amdgcn.cs.chain.v3i32(ptr inreg %fptr, i32 inreg 0, <3 x i32> inreg %sgprs, <3 x i32> zeroinitializer, i32 1, i32 0, i32 -1, ptr @first_retry_wrapper)
unreachable
}
; Has direct chain call to func.2 instead of using first_retry_wrapper
define amdgpu_cs_chain void @func.1(<3 x i32> inreg %sgprs, <3 x i32> %vgprs) #0 {
call void asm sideeffect "", "~{v0},~{v1},~{v2},~{v3},~{v4},~{v5},~{v6},~{v7},~{v8},~{v9},~{v10},~{v11},~{v12},~{v13},~{v14},~{v15},~{v16},~{v17},~{v18},~{v19},~{v20},~{v21},~{v22},~{v23},~{v24},~{v25},~{v26},~{v27},~{v28},~{v29},~{v30},~{v31},~{v32},~{v33},~{v34},~{v35},~{v36},~{v37},~{v38},~{v39},~{v40},~{v41},~{v42},~{v43},~{v44},~{v45},~{v46},~{v47},~{v48},~{v49},~{v50},~{v51},~{v52},~{v53},~{v54},~{v55},~{v56},~{v57},~{v58},~{v59},~{v60},~{v61},~{v62},~{v63},~{v64},~{v65},~{v66},~{v67},~{v68},~{v69},~{v70},~{v71},~{v72},~{v73},~{v74},~{v75},~{v76},~{v77},~{v78},~{v79}"()
%fptr = load ptr, ptr inttoptr(i64 0 to ptr)
call void(ptr, i32, <3 x i32>, <3 x i32>, i32, ...) @llvm.amdgcn.cs.chain.v3i32(ptr inreg %fptr, i32 inreg 0, <3 x i32> inreg %sgprs, <3 x i32> zeroinitializer, i32 1, i32 0, i32 -1, ptr @func.2)
unreachable
}
; amdgpu-dynamic-vgpr-block-size = 0
define amdgpu_cs_chain void @func.2(<3 x i32> inreg %sgprs, <3 x i32> %vgprs) #1 {
%fptr = load ptr, ptr inttoptr(i64 0 to ptr)
call void(ptr, i32, <3 x i32>, <3 x i32>, i32, ...) @llvm.amdgcn.cs.chain.v3i32(ptr inreg %fptr, i32 inreg 0, <3 x i32> inreg %sgprs, <3 x i32> zeroinitializer, i32 1, i32 0, i32 -1, ptr @first_retry_wrapper)
unreachable
}
; Has direct chain call to func.4 instead of using first_retry_wrapper
define amdgpu_cs_chain void @func.3(<3 x i32> inreg %sgprs, <3 x i32> %vgprs) #0 {
%fptr = load ptr, ptr inttoptr(i64 0 to ptr)
call void(ptr, i32, <3 x i32>, <3 x i32>, i32, ...) @llvm.amdgcn.cs.chain.v3i32(ptr inreg %fptr, i32 inreg 0, <3 x i32> inreg %sgprs, <3 x i32> zeroinitializer, i32 1, i32 0, i32 -1, ptr @func.4)
unreachable
}
; amdgpu-dynamic-vgpr-block-size = 0
define amdgpu_cs_chain void @func.4(<3 x i32> inreg %sgprs, <3 x i32> %vgprs) #1 {
call void asm sideeffect "", "~{v0},~{v1},~{v2},~{v3},~{v4},~{v5},~{v6},~{v7},~{v8},~{v9},~{v10},~{v11},~{v12},~{v13},~{v14},~{v15},~{v16},~{v17},~{v18},~{v19},~{v20},~{v21},~{v22},~{v23},~{v24},~{v25},~{v26},~{v27},~{v28},~{v29},~{v30},~{v31},~{v32},~{v33},~{v34},~{v35},~{v36},~{v37},~{v38},~{v39},~{v40},~{v41},~{v42},~{v43},~{v44},~{v45},~{v46},~{v47},~{v48},~{v49},~{v50},~{v51},~{v52},~{v53},~{v54},~{v55},~{v56},~{v57},~{v58},~{v59},~{v60},~{v61},~{v62},~{v63},~{v64},~{v65},~{v66},~{v67},~{v68},~{v69},~{v70},~{v71},~{v72},~{v73},~{v74},~{v75},~{v76},~{v77},~{v78},~{v79}"()
%fptr = load ptr, ptr inttoptr(i64 0 to ptr)
call void(ptr, i32, <3 x i32>, <3 x i32>, i32, ...) @llvm.amdgcn.cs.chain.v3i32(ptr inreg %fptr, i32 inreg 0, <3 x i32> inreg %sgprs, <3 x i32> zeroinitializer, i32 1, i32 0, i32 -1, ptr @first_retry_wrapper)
unreachable
}
define amdgpu_cs_chain_preserve void @retry_vgpr_alloc(<3 x i32> inreg %sgprs) #0 {
%fptr = load ptr, ptr inttoptr(i64 0 to ptr)
call void(ptr, i32, <3 x i32>, <3 x i32>, i32, ...) @llvm.amdgcn.cs.chain.v3i32(ptr inreg %fptr, i32 inreg 0, <3 x i32> inreg %sgprs, <3 x i32> zeroinitializer, i32 1, i32 0, i32 -1, ptr @retry_vgpr_alloc)
unreachable
}
define amdgpu_cs_chain_preserve void @first_retry_wrapper(<3 x i32> inreg %sgprs) #0 {
call void(ptr, i32, <3 x i32>, <3 x i32>, i32, ...) @llvm.amdgcn.cs.chain.v3i32(ptr inreg @retry_vgpr_alloc, i32 inreg 0, <3 x i32> inreg %sgprs, <3 x i32> zeroinitializer, i32 1, i32 0, i32 -1, ptr @retry_vgpr_alloc)
unreachable
}
attributes #0 = { "amdgpu-dynamic-vgpr-block-size"="16" }
attributes #1 = { "amdgpu-dynamic-vgpr-block-size"="0" }