[AMDGPU] Select AGPR in PHI operand legalization If a PHI defines AGPR legalize its operands to AGPR. At the moment we can get an AGPR PHI with VGPR operands. I am not aware of any problems as it seems to be handled gracefully in RA, but this is not right anyway. It also slightly decreases VGPR pressure in some cases because we do not have to a copy via VGPR. Differential Revision: https://reviews.llvm.org/D69206 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@375446 91177308-0d34-0410-b5e6-96231b3b80d8

commit: 6e6c5e79c99dc0eca108cd4bcaa4e31b9c64ea35 [log] [tgz]
author: Stanislav Mekhanoshin <Stanislav.Mekhanoshin@amd.com> Mon Oct 21 19:25:27 2019 +0000
committer: Stanislav Mekhanoshin <Stanislav.Mekhanoshin@amd.com> Mon Oct 21 19:25:27 2019 +0000
tree: 33b3712ec7c13b12111df5ebffd301c7aa7b4357
parent: b1407138d45ba368aefd73f33ecb25f09432531a [diff]
diff --git a/lib/Target/AMDGPU/SIInstrInfo.cpp b/lib/Target/AMDGPU/SIInstrInfo.cpp
index 88dc938..57c271a 100644
--- a/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/lib/Target/AMDGPU/SIInstrInfo.cpp

@@ -4576,6 +4576,10 @@
           VRC = RI.hasAGPRs(getOpRegClass(MI, 0))
                     ? RI.getEquivalentAGPRClass(SRC)
                     : RI.getEquivalentVGPRClass(SRC);
+      } else {
+          VRC = RI.hasAGPRs(getOpRegClass(MI, 0))
+                    ? RI.getEquivalentAGPRClass(VRC)
+                    : RI.getEquivalentVGPRClass(VRC);
       }
       RC = VRC;
     } else {

diff --git a/test/CodeGen/AMDGPU/mfma-loop.ll b/test/CodeGen/AMDGPU/mfma-loop.ll
index 02f7c9b..a67aadf 100644
--- a/test/CodeGen/AMDGPU/mfma-loop.ll
+++ b/test/CodeGen/AMDGPU/mfma-loop.ll

@@ -1,13 +1,64 @@
 ; RUN: llc -march=amdgcn -mcpu=gfx908 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
 
 ; GCN-LABEL: {{^}}test_mfma_loop_zeroinit:
-; GCN-COUNT32: v_accvgpr_write_b32
+
+; Check that we do not use 32 temp vgprs, but rotate 3 vgprs only.
+; 3 vgprs are needed to avoid wait states between writes.
+
+; FIXME: We should not be using and temporary registers at all.
+; At the moment we initialize an sgpr, then copy it via vgprs.
+
+; GCN: v_accvgpr_write_b32 a{{[0-9]+}}, [[TMP2:v[0-9]+]]
+; GCN: v_accvgpr_write_b32 a{{[0-9]+}}, [[TMP3:v[0-9]+]]
+
+; GCN: v_accvgpr_write_b32 a{{[0-9]+}}, [[TMP1:v[0-9]+]]
+; GCN: v_accvgpr_write_b32 a{{[0-9]+}}, [[TMP2]]
+; GCN: v_accvgpr_write_b32 a{{[0-9]+}}, [[TMP3]]
+
+; GCN: v_accvgpr_write_b32 a{{[0-9]+}}, [[TMP1]]
+; GCN: v_accvgpr_write_b32 a{{[0-9]+}}, [[TMP2]]
+; GCN: v_accvgpr_write_b32 a{{[0-9]+}}, [[TMP3]]
+
+; GCN: v_accvgpr_write_b32 a{{[0-9]+}}, [[TMP1]]
+; GCN: v_accvgpr_write_b32 a{{[0-9]+}}, [[TMP2]]
+; GCN: v_accvgpr_write_b32 a{{[0-9]+}}, [[TMP3]]
+
+; GCN: v_accvgpr_write_b32 a{{[0-9]+}}, [[TMP1]]
+; GCN: v_accvgpr_write_b32 a{{[0-9]+}}, [[TMP2]]
+; GCN: v_accvgpr_write_b32 a{{[0-9]+}}, [[TMP3]]
+
+; GCN: v_accvgpr_write_b32 a{{[0-9]+}}, [[TMP1]]
+; GCN: v_accvgpr_write_b32 a{{[0-9]+}}, [[TMP2]]
+; GCN: v_accvgpr_write_b32 a{{[0-9]+}}, [[TMP3]]
+
+; GCN: v_accvgpr_write_b32 a{{[0-9]+}}, [[TMP1]]
+; GCN: v_accvgpr_write_b32 a{{[0-9]+}}, [[TMP2]]
+; GCN: v_accvgpr_write_b32 a{{[0-9]+}}, [[TMP3]]
+
+; GCN: v_accvgpr_write_b32 a{{[0-9]+}}, [[TMP1]]
+; GCN: v_accvgpr_write_b32 a{{[0-9]+}}, [[TMP2]]
+; GCN: v_accvgpr_write_b32 a{{[0-9]+}}, [[TMP3]]
+
+; GCN: v_accvgpr_write_b32 a{{[0-9]+}}, [[TMP1]]
+; GCN: v_accvgpr_write_b32 a{{[0-9]+}}, [[TMP2]]
+; GCN: v_accvgpr_write_b32 a{{[0-9]+}}, [[TMP3]]
+
+; GCN: v_accvgpr_write_b32 a{{[0-9]+}}, [[TMP1]]
+; GCN: v_accvgpr_write_b32 a{{[0-9]+}}, [[TMP2]]
+; GCN: v_accvgpr_write_b32 a{{[0-9]+}}, [[TMP3]]
+
+; Check that we do not copy agprs to vgprs and back inside the loop.
+
 ; GCN: [[LOOP:BB[0-9_]+]]:
 ; GCN-NOT: v_accvgpr
 ; GCN: v_mfma_f32_32x32x1f32
 ; GCN-NOT: v_accvgpr
 ; GCN: s_cbranch_scc1 [[LOOP]]
+
+; Final result should be read only once after the loop.
+
 ; GCN-COUNT32: v_accvgpr_read_b32
+
 define amdgpu_kernel void @test_mfma_loop_zeroinit(<32 x float> addrspace(1)* %arg) {
 entry:
   br label %for.cond.preheader
commit	6e6c5e79c99dc0eca108cd4bcaa4e31b9c64ea35	[log] [tgz]
author	Stanislav Mekhanoshin <Stanislav.Mekhanoshin@amd.com>	Mon Oct 21 19:25:27 2019 +0000
committer	Stanislav Mekhanoshin <Stanislav.Mekhanoshin@amd.com>	Mon Oct 21 19:25:27 2019 +0000
tree	33b3712ec7c13b12111df5ebffd301c7aa7b4357
parent	b1407138d45ba368aefd73f33ecb25f09432531a [diff]