blob: fe031096479ad98b1a1d49b434af5a705671af13 [file] [log] [blame]
; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=gfx908 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX908 %s
; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=gfx90a -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX90A %s
; GCN-LABEL: {{^}}kernel_32_agprs:
; GFX908: .amdhsa_next_free_vgpr 32
; GFX90A: .amdhsa_next_free_vgpr 44
; GFX90A: .amdhsa_accum_offset 12
; GCN: NumVgprs: 9
; GCN: NumAgprs: 32
; GFX908: TotalNumVgprs: 32
; GFX90A: TotalNumVgprs: 44
; GFX908: VGPRBlocks: 7
; GFX90A: VGPRBlocks: 5
; GFX908: NumVGPRsForWavesPerEU: 32
; GFX90A: NumVGPRsForWavesPerEU: 44
; GFX90A: AccumOffset: 12
; GCN: Occupancy: 8
; GFX90A: COMPUTE_PGM_RSRC3_GFX90A:ACCUM_OFFSET: 2
define amdgpu_kernel void @kernel_32_agprs() #0 {
bb:
call void asm sideeffect "", "~{v8}" ()
call void asm sideeffect "", "~{a31}" ()
ret void
}
; GCN-LABEL: {{^}}kernel_0_agprs:
; GCN: .amdhsa_next_free_vgpr 1
; GFX90A: .amdhsa_accum_offset 4
; GCN: NumVgprs: 1
; GCN: NumAgprs: 0
; GCN: TotalNumVgprs: 1
; GCN: VGPRBlocks: 0
; GCN: NumVGPRsForWavesPerEU: 1
; GFX90A: AccumOffset: 4
; GFX908: Occupancy: 10
; GFX90A: Occupancy: 8
; GFX90A: COMPUTE_PGM_RSRC3_GFX90A:ACCUM_OFFSET: 0
define amdgpu_kernel void @kernel_0_agprs() #0 {
bb:
call void asm sideeffect "", "~{v0}" ()
ret void
}
; GCN-LABEL: {{^}}kernel_40_vgprs:
; GFX908: .amdhsa_next_free_vgpr 40
; GFX90A: .amdhsa_next_free_vgpr 56
; GFX90A: .amdhsa_accum_offset 40
; GCN: NumVgprs: 40
; GCN: NumAgprs: 16
; GFX908: TotalNumVgprs: 40
; GFX90A: TotalNumVgprs: 56
; GFX908: VGPRBlocks: 9
; GFX90A: VGPRBlocks: 6
; GFX908: NumVGPRsForWavesPerEU: 40
; GFX90A: NumVGPRsForWavesPerEU: 56
; GFX90A: AccumOffset: 40
; GFX908: Occupancy: 6
; GFX90A: Occupancy: 8
; GFX90A: COMPUTE_PGM_RSRC3_GFX90A:ACCUM_OFFSET: 9
define amdgpu_kernel void @kernel_40_vgprs() #0 {
bb:
call void asm sideeffect "", "~{v39}" ()
call void asm sideeffect "", "~{a15}" ()
ret void
}
; GCN-LABEL: {{^}}func_32_agprs:
; GCN: NumVgprs: 9
; GCN: NumAgprs: 32
; GFX908: TotalNumVgprs: 32
; GFX90A: TotalNumVgprs: 44
define void @func_32_agprs() #0 {
bb:
call void asm sideeffect "", "~{v8}" ()
call void asm sideeffect "", "~{a31}" ()
ret void
}
; GCN-LABEL: {{^}}func_32_vgprs:
; GCN: NumVgprs: 32
; GCN: NumAgprs: 9
; GFX908: TotalNumVgprs: 32
; GFX90A: TotalNumVgprs: 41
define void @func_32_vgprs() #0 {
bb:
call void asm sideeffect "", "~{v31}" ()
call void asm sideeffect "", "~{a8}" ()
ret void
}
; GCN-LABEL: {{^}}func_0_agprs:
; GCN: NumVgprs: 1
; GCN: NumAgprs: 0
; GCN: TotalNumVgprs: 1
define amdgpu_kernel void @func_0_agprs() #0 {
bb:
call void asm sideeffect "", "~{v0}" ()
ret void
}
; GCN-LABEL: {{^}}kernel_max_gprs:
; GFX908: .amdhsa_next_free_vgpr 256
; GFX90A: .amdhsa_next_free_vgpr 512
; GFX90A: .amdhsa_accum_offset 256
; GCN: NumVgprs: 256
; GCN: NumAgprs: 256
; GFX908: TotalNumVgprs: 256
; GFX90A: TotalNumVgprs: 512
; GFX908: VGPRBlocks: 63
; GFX90A: VGPRBlocks: 63
; GFX908: NumVGPRsForWavesPerEU: 256
; GFX90A: NumVGPRsForWavesPerEU: 512
; GFX90A: AccumOffset: 256
; GCN: Occupancy: 1
; GFX90A: COMPUTE_PGM_RSRC3_GFX90A:ACCUM_OFFSET: 63
define amdgpu_kernel void @kernel_max_gprs() #0 {
bb:
call void asm sideeffect "", "~{v255}" ()
call void asm sideeffect "", "~{a255}" ()
ret void
}
; GCN-LABEL: {{^}}kernel_call_func_32_agprs:
; GFX908: .amdhsa_next_free_vgpr 32
; GFX90A: .amdhsa_accum_offset 12
; GCN: NumVgprs: 9
; GCN: NumAgprs: 32
; GFX908: TotalNumVgprs: 32
; GFX90A: TotalNumVgprs: 44
; GFX908: VGPRBlocks: 7
; GFX90A: VGPRBlocks: 5
; GFX908: NumVGPRsForWavesPerEU: 32
; GFX90A: NumVGPRsForWavesPerEU: 44
; GFX90A: AccumOffset: 12
; GCN: Occupancy: 8
; GFX90A: COMPUTE_PGM_RSRC3_GFX90A:ACCUM_OFFSET: 2
define amdgpu_kernel void @kernel_call_func_32_agprs() #0 {
bb:
call void @func_32_agprs() #0
ret void
}
; GCN-LABEL: {{^}}func_call_func_32_agprs:
; GCN: NumVgprs: 9
; GCN: NumAgprs: 32
; GFX908: TotalNumVgprs: 32
; GFX90A: TotalNumVgprs: 44
define void @func_call_func_32_agprs() #0 {
bb:
call void @func_32_agprs() #0
ret void
}
declare void @undef_func()
; GCN-LABEL: {{^}}kernel_call_undef_func:
; GFX908: .amdhsa_next_free_vgpr 24
; GFX90A: .amdhsa_next_free_vgpr 48
; GFX90A: .amdhsa_accum_offset 24
; GCN: NumVgprs: 24
; GCN: NumAgprs: 24
; GFX908: TotalNumVgprs: 24
; GFX90A: TotalNumVgprs: 48
; GFX908: VGPRBlocks: 5
; GFX90A: VGPRBlocks: 5
; GFX908: NumVGPRsForWavesPerEU: 24
; GFX90A: NumVGPRsForWavesPerEU: 48
; GFX90A: AccumOffset: 24
; GFX908: Occupancy: 10
; GFX90A: Occupancy: 8
; GFX90A: COMPUTE_PGM_RSRC3_GFX90A:ACCUM_OFFSET: 5
define amdgpu_kernel void @kernel_call_undef_func() #0 {
bb:
call void @undef_func()
ret void
}
attributes #0 = { nounwind noinline "amdgpu-flat-work-group-size"="1,512" }