blob: a72522b784022480b3d14631930530349239bc7d [file] [log] [blame]
# RUN: llc -mtriple=amdgcn-amd-amdhsa -run-pass=none -verify-machineinstrs %s -o - | FileCheck -check-prefixes=FULL,ALL %s
# RUN: llc -mtriple=amdgcn-amd-amdhsa -run-pass=none -simplify-mir -verify-machineinstrs %s -o - | FileCheck -check-prefixes=SIMPLE,ALL %s
---
# ALL-LABEL: name: kernel0
# FULL: machineFunctionInfo:
# FULL-NEXT: explicitKernArgSize: 128
# FULL-NEXT: maxKernArgAlign: 64
# FULL-NEXT: ldsSize: 2048
# FULL-NEXT: dynLDSAlign: 1
# FULL-NEXT: isEntryFunction: true
# FULL-NEXT: noSignedZerosFPMath: false
# FULL-NEXT: memoryBound: true
# FULL-NEXT: waveLimiter: true
# FULL-NEXT: hasSpilledSGPRs: false
# FULL-NEXT: hasSpilledVGPRs: false
# FULL-NEXT: scratchRSrcReg: '$sgpr8_sgpr9_sgpr10_sgpr11'
# FULL-NEXT: frameOffsetReg: '$sgpr12'
# FULL-NEXT: stackPtrOffsetReg: '$sgpr13'
# FULL-NEXT: argumentInfo:
# FULL-NEXT: privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' }
# FULL-NEXT: kernargSegmentPtr: { reg: '$sgpr4_sgpr5' }
# FULL-NEXT: workGroupIDX: { reg: '$sgpr6' }
# FULL-NEXT: privateSegmentWaveByteOffset: { reg: '$sgpr7' }
# FULL-NEXT: workItemIDX: { reg: '$vgpr0' }
# FULL-NEXT: mode:
# FULL-NEXT: ieee: true
# FULL-NEXT: dx10-clamp: true
# FULL-NEXT: fp32-input-denormals: true
# FULL-NEXT: fp32-output-denormals: true
# FULL-NEXT: fp64-fp16-input-denormals: true
# FULL-NEXT: fp64-fp16-output-denormals: true
# FULL-NEXT: highBitsOf32BitAddress: 0
# FULL-NEXT: occupancy: 10
# FULL-NEXT: body:
# SIMPLE: machineFunctionInfo:
# SIMPLE-NEXT: explicitKernArgSize: 128
# SIMPLE-NEXT: maxKernArgAlign: 64
# SIMPLE-NEXT: ldsSize: 2048
# SIMPLE-NEXT: isEntryFunction: true
# SIMPLE-NEXT: memoryBound: true
# SIMPLE-NEXT: waveLimiter: true
# SIMPLE-NEXT: scratchRSrcReg: '$sgpr8_sgpr9_sgpr10_sgpr11'
# SIMPLE-NEXT: frameOffsetReg: '$sgpr12'
# SIMPLE-NEXT: stackPtrOffsetReg: '$sgpr13'
# SIMPLE-NEXT: argumentInfo:
# SIMPLE-NEXT: privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' }
# SIMPLE-NEXT: kernargSegmentPtr: { reg: '$sgpr4_sgpr5' }
# SIMPLE-NEXT: workGroupIDX: { reg: '$sgpr6' }
# SIMPLE-NEXT: privateSegmentWaveByteOffset: { reg: '$sgpr7' }
# SIMPLE-NEXT: workItemIDX: { reg: '$vgpr0' }
# SIMPLE-NEXT: occupancy: 10
# SIMPLE-NEXT: body:
name: kernel0
machineFunctionInfo:
explicitKernArgSize: 128
maxKernArgAlign: 64
ldsSize: 2048
isEntryFunction: true
noSignedZerosFPMath: false
memoryBound: true
waveLimiter: true
scratchRSrcReg: '$sgpr8_sgpr9_sgpr10_sgpr11'
frameOffsetReg: '$sgpr12'
stackPtrOffsetReg: '$sgpr13'
argumentInfo:
privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' }
kernargSegmentPtr: { reg: '$sgpr4_sgpr5' }
workGroupIDX: { reg: '$sgpr6' }
privateSegmentWaveByteOffset: { reg: '$sgpr7' }
workItemIDX: { reg: '$vgpr0' }
body: |
bb.0:
S_ENDPGM 0
...
# FIXME: Should be able to not print section for simple
---
# ALL-LABEL: name: no_mfi
# FULL: machineFunctionInfo:
# FULL-NEXT: explicitKernArgSize: 0
# FULL-NEXT: maxKernArgAlign: 1
# FULL-NEXT: ldsSize: 0
# FULL-NEXT: dynLDSAlign: 1
# FULL-NEXT: isEntryFunction: false
# FULL-NEXT: noSignedZerosFPMath: false
# FULL-NEXT: memoryBound: false
# FULL-NEXT: waveLimiter: false
# FULL-NEXT: hasSpilledSGPRs: false
# FULL-NEXT: hasSpilledVGPRs: false
# FULL-NEXT: scratchRSrcReg: '$private_rsrc_reg'
# FULL-NEXT: frameOffsetReg: '$fp_reg'
# FULL-NEXT: stackPtrOffsetReg: '$sp_reg'
# FULL-NEXT: argumentInfo:
# FULL-NEXT: privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' }
# FULL-NEXT: mode:
# FULL-NEXT: ieee: true
# FULL-NEXT: dx10-clamp: true
# FULL-NEXT: fp32-input-denormals: true
# FULL-NEXT: fp32-output-denormals: true
# FULL-NEXT: fp64-fp16-input-denormals: true
# FULL-NEXT: fp64-fp16-output-denormals: true
# FULL-NEXT: highBitsOf32BitAddress: 0
# FULL-NEXT: occupancy: 10
# FULL-NEXT: body:
# SIMPLE: machineFunctionInfo:
# SIMPLE-NEXT: maxKernArgAlign: 1
# SIMPLE-NEXT: argumentInfo:
# SIMPLE-NEXT: privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' }
# SIMPLE-NEXT: occupancy: 10
# SIMPLE-NEXT: body:
name: no_mfi
body: |
bb.0:
S_ENDPGM 0
...
---
# ALL-LABEL: name: empty_mfi
# FULL: machineFunctionInfo:
# FULL-NEXT: explicitKernArgSize: 0
# FULL-NEXT: maxKernArgAlign: 1
# FULL-NEXT: ldsSize: 0
# FULL-NEXT: dynLDSAlign: 1
# FULL-NEXT: isEntryFunction: false
# FULL-NEXT: noSignedZerosFPMath: false
# FULL-NEXT: memoryBound: false
# FULL-NEXT: waveLimiter: false
# FULL-NEXT: hasSpilledSGPRs: false
# FULL-NEXT: hasSpilledVGPRs: false
# FULL-NEXT: scratchRSrcReg: '$private_rsrc_reg'
# FULL-NEXT: frameOffsetReg: '$fp_reg'
# FULL-NEXT: stackPtrOffsetReg: '$sp_reg'
# FULL-NEXT: argumentInfo:
# FULL-NEXT: privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' }
# FULL-NEXT: mode:
# FULL-NEXT: ieee: true
# FULL-NEXT: dx10-clamp: true
# FULL-NEXT: fp32-input-denormals: true
# FULL-NEXT: fp32-output-denormals: true
# FULL-NEXT: fp64-fp16-input-denormals: true
# FULL-NEXT: fp64-fp16-output-denormals: true
# FULL-NEXT: highBitsOf32BitAddress: 0
# FULL-NEXT: occupancy: 10
# FULL-NEXT: body:
# SIMPLE: machineFunctionInfo:
# SIMPLE-NEXT: maxKernArgAlign: 1
# SIMPLE-NEXT: argumentInfo:
# SIMPLE-NEXT: privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' }
# SIMPLE-NEXT: occupancy: 10
# SIMPLE-NEXT: body:
name: empty_mfi
machineFunctionInfo:
body: |
bb.0:
S_ENDPGM 0
...
---
# ALL-LABEL: name: empty_mfi_entry_func
# FULL: machineFunctionInfo:
# FULL-NEXT: explicitKernArgSize: 0
# FULL-NEXT: maxKernArgAlign: 1
# FULL-NEXT: ldsSize: 0
# FULL-NEXT: dynLDSAlign: 1
# FULL-NEXT: isEntryFunction: true
# FULL-NEXT: noSignedZerosFPMath: false
# FULL-NEXT: memoryBound: false
# FULL-NEXT: waveLimiter: false
# FULL-NEXT: hasSpilledSGPRs: false
# FULL-NEXT: hasSpilledVGPRs: false
# FULL-NEXT: scratchRSrcReg: '$private_rsrc_reg'
# FULL-NEXT: frameOffsetReg: '$fp_reg'
# FULL-NEXT: stackPtrOffsetReg: '$sp_reg'
# FULL-NEXT: argumentInfo:
# FULL-NEXT: privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' }
# FULL-NEXT: mode:
# FULL-NEXT: ieee: true
# FULL-NEXT: dx10-clamp: true
# FULL-NEXT: fp32-input-denormals: true
# FULL-NEXT: fp32-output-denormals: true
# FULL-NEXT: fp64-fp16-input-denormals: true
# FULL-NEXT: fp64-fp16-output-denormals: true
# FULL-NEXT: highBitsOf32BitAddress: 0
# FULL-NEXT: occupancy: 10
# FULL-NEXT: body:
# SIMPLE: machineFunctionInfo:
# SIMPLE-NEXT: maxKernArgAlign: 1
# SIMPLE-NEXT: isEntryFunction: true
# SIMPLE-NEXT: argumentInfo:
# SIMPLE-NEXT: privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' }
# SIMPLE-NEXT: occupancy: 10
# SIMPLE-NEXT: body:
name: empty_mfi_entry_func
machineFunctionInfo:
isEntryFunction: true
body: |
bb.0:
S_ENDPGM 0
...
---
# ALL-LABEL: name: default_regs_mfi
# FULL: scratchRSrcReg: '$private_rsrc_reg'
# FULL-NEXT: frameOffsetReg: '$fp_reg'
# FULL-NEXT: stackPtrOffsetReg: '$sp_reg'
# SIMPLE-NOT: scratchRSrcReg
# SIMPLE-NOT:: stackPtrOffsetReg
name: default_regs_mfi
machineFunctionInfo:
scratchRSrcReg: '$private_rsrc_reg'
body: |
bb.0:
S_ENDPGM 0
...
---
# ALL-LABEL: name: fake_stack_arginfo
# FULL: argumentInfo:
# FULL-NEXT: privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' }
# FULL-NEXT: flatScratchInit: { offset: 4 }
# FULL-NEXT: workItemIDY: { reg: '$vgpr0', mask: 65280 }
# SIMPLE: argumentInfo:
# SIMPLE-NEXT: privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' }
# SIMPLE-NEXT: flatScratchInit: { offset: 4 }
# SIMPLE-NEXT: workItemIDY: { reg: '$vgpr0', mask: 65280 }
name: fake_stack_arginfo
machineFunctionInfo:
argumentInfo:
flatScratchInit: { offset: 4 }
workItemIDY: { reg: '$vgpr0' , mask: 0xff00 }
body: |
bb.0:
S_ENDPGM 0
...
---
# ALL-LABEL: name: parse_mode
# ALL: mode:
# ALL-NEXT: ieee: false
# ALL-NEXT: dx10-clamp: false
# ALL-NEXT: fp32-input-denormals: false
# ALL-NEXT: fp32-output-denormals: false
# ALL-NEXT: fp64-fp16-input-denormals: false
# ALL-NEXT: fp64-fp16-output-denormals: false
name: parse_mode
machineFunctionInfo:
mode:
ieee: false
dx10-clamp: false
fp32-input-denormals: false
fp32-output-denormals: false
fp64-fp16-input-denormals: false
fp64-fp16-output-denormals: false
body: |
bb.0:
S_ENDPGM 0
...
---
# ALL-LABEL: name: parse_spilled_regs
# ALL: machineFunctionInfo:
# ALL: hasSpilledSGPRs: true
# ALL-NEXT: hasSpilledVGPRs: true
name: parse_spilled_regs
machineFunctionInfo:
hasSpilledSGPRs: true
hasSpilledVGPRs: true
body: |
bb.0:
S_ENDPGM 0
...
---
# ALL-LABEL: name: dyn_lds_with_alignment
# FULL: ldsSize: 0
# FULL-NEXT: dynLDSAlign: 8
# SIMPLE: dynLDSAlign: 8
name: dyn_lds_with_alignment
machineFunctionInfo:
dynLDSAlign: 8
body: |
bb.0:
S_ENDPGM 0
...
---
# ALL-LABEL: name: occupancy_0
# ALL: occupancy: 10
name: occupancy_0
machineFunctionInfo:
occupancy: 0
body: |
bb.0:
S_ENDPGM 0
...
---
# ALL-LABEL: name: occupancy_3
# ALL: occupancy: 3
name: occupancy_3
machineFunctionInfo:
occupancy: 3
body: |
bb.0:
S_ENDPGM 0
...
---
# ALL-LABEL: name: scavenge_fi
# ALL: scavengeFI: '%stack.0'
name: scavenge_fi
stack:
- { id: 0, name: '', type: spill-slot, offset: 0, size: 4, alignment: 4 }
machineFunctionInfo:
scavengeFI: '%stack.0'
body: |
bb.0:
S_ENDPGM 0
...