blob: d06a2875345c9b58b5e7d81b50fb9982d7ffd787 [file] [edit]
# RUN: llc -mtriple=amdgcn -mcpu=gfx90a -verify-machineinstrs -run-pass=si-fix-sgpr-copies -o - %s | FileCheck %s
# Regression for the gfx90a waterfall lane reader: when an inreg call
# argument's source virtual register lives in an AV_* class (which
# allows AGPR), emitLoadScalarOpsFromVGPRLoop must legalize the
# ScalarOp into a class compatible with V_READFIRSTLANE_B32 by
# inserting a COPY to the common subclass (e.g. VGPR_32). Without the
# COPY, the loop emits `V_READFIRSTLANE_B32 %vreg:av_32`, and
# `-verify-machineinstrs` fails with:
# *** Bad machine code: Illegal virtual register for instruction ***
# Expected a VGPR_32 register, but got a AV_32 register
# AV_32 ScalarOp: a COPY to VGPR_32 must be inserted in the loop header
# before V_READFIRSTLANE_B32 reads the lane, so the readfirstlane
# source is a pure VGPR class.
---
name: waterfall_si_call_inreg_av32
tracksRegLiveness: true
frameInfo:
adjustsStack: true
hasCalls: true
machineFunctionInfo:
scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3'
stackPtrOffsetReg: '$sgpr32'
occupancy: 8
body: |
; CHECK-LABEL: name: waterfall_si_call_inreg_av32
; CHECK: bb.0:
; CHECK: [[SRC:%[0-9]+]]:av_32 = COPY $vgpr0
; CHECK: [[DEST:%[0-9]+]]:sreg_64_xexec = COPY $sgpr4_sgpr5
; CHECK: bb.1:
; CHECK: [[VCOPY:%[0-9]+]]:vgpr_32 = COPY [[SRC]]
; CHECK: [[LANE:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[VCOPY]], implicit $exec
; CHECK: {{.*}}:sreg_64_xexec = V_CMP_EQ_U32_e64 [[LANE]], [[VCOPY]], implicit $exec
; CHECK: {{.*}}:sreg_64_xexec = S_AND_SAVEEXEC_B64
; CHECK: bb.2:
; CHECK: ADJCALLSTACKUP
; CHECK: $sgpr16 = COPY [[LANE]]
; CHECK: SI_CALL_ISEL [[DEST]], 0, csr_amdgpu_gfx90ainsts, implicit killed $sgpr16
; CHECK: ADJCALLSTACKDOWN
; CHECK: SI_WATERFALL_LOOP %bb.1
bb.0:
liveins: $vgpr0, $sgpr4_sgpr5
%0:av_32 = COPY $vgpr0
%1:sreg_64_xexec = COPY $sgpr4_sgpr5
ADJCALLSTACKUP 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32
$sgpr16 = COPY %0
SI_CALL_ISEL killed %1, 0, csr_amdgpu_gfx90ainsts, implicit $sgpr16
ADJCALLSTACKDOWN 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32
S_ENDPGM 0
...
# Baseline: a VGPR_32 ScalarOp is already a legal V_READFIRSTLANE_B32
# source, so no extra COPY should appear in the loop header and
# V_READFIRSTLANE_B32 should read the original VGPR directly.
---
name: waterfall_si_call_inreg_vgpr32
tracksRegLiveness: true
frameInfo:
adjustsStack: true
hasCalls: true
machineFunctionInfo:
scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3'
stackPtrOffsetReg: '$sgpr32'
occupancy: 8
body: |
; CHECK-LABEL: name: waterfall_si_call_inreg_vgpr32
; CHECK: bb.0:
; CHECK: [[SRC:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; CHECK: [[DEST:%[0-9]+]]:sreg_64_xexec = COPY $sgpr4_sgpr5
; CHECK: bb.1:
; CHECK-NOT: vgpr_32 = COPY [[SRC]]
; CHECK: [[LANE:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[SRC]], implicit $exec
; CHECK: {{.*}}:sreg_64_xexec = V_CMP_EQ_U32_e64 [[LANE]], [[SRC]], implicit $exec
bb.0:
liveins: $vgpr0, $sgpr4_sgpr5
%0:vgpr_32 = COPY $vgpr0
%1:sreg_64_xexec = COPY $sgpr4_sgpr5
ADJCALLSTACKUP 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32
$sgpr16 = COPY %0
SI_CALL_ISEL killed %1, 0, csr_amdgpu_gfx90ainsts, implicit $sgpr16
ADJCALLSTACKDOWN 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32
S_ENDPGM 0
...