| //===-- GCNPreRAOptimizations.cpp -----------------------------------------===// |
| // |
| // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| // See https://llvm.org/LICENSE.txt for license information. |
| // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| // |
| //===----------------------------------------------------------------------===// |
| // |
| /// \file |
| /// This pass combines split register tuple initialization into a single pseudo: |
| /// |
| /// undef %0.sub1:sreg_64 = S_MOV_B32 1 |
| /// %0.sub0:sreg_64 = S_MOV_B32 2 |
| /// => |
| /// %0:sreg_64 = S_MOV_B64_IMM_PSEUDO 0x200000001 |
| /// |
| /// This is to allow rematerialization of a value instead of spilling. It is |
| /// supposed to be done after register coalescer to allow it to do its job and |
| /// before actual register allocation to allow rematerialization. |
| /// |
| /// Right now the pass only handles 64 bit SGPRs with immediate initializers, |
| /// although the same shall be possible with other register classes and |
| /// instructions if necessary. |
| /// |
| //===----------------------------------------------------------------------===// |
| |
| #include "AMDGPU.h" |
| #include "GCNSubtarget.h" |
| #include "MCTargetDesc/AMDGPUMCTargetDesc.h" |
| #include "llvm/CodeGen/LiveIntervals.h" |
| #include "llvm/CodeGen/MachineFunctionPass.h" |
| #include "llvm/InitializePasses.h" |
| |
| using namespace llvm; |
| |
| #define DEBUG_TYPE "amdgpu-pre-ra-optimizations" |
| |
| namespace { |
| |
| class GCNPreRAOptimizations : public MachineFunctionPass { |
| private: |
| const SIInstrInfo *TII; |
| const SIRegisterInfo *TRI; |
| MachineRegisterInfo *MRI; |
| LiveIntervals *LIS; |
| |
| bool processReg(Register Reg); |
| |
| public: |
| static char ID; |
| |
| GCNPreRAOptimizations() : MachineFunctionPass(ID) { |
| initializeGCNPreRAOptimizationsPass(*PassRegistry::getPassRegistry()); |
| } |
| |
| bool runOnMachineFunction(MachineFunction &MF) override; |
| |
| StringRef getPassName() const override { |
| return "AMDGPU Pre-RA optimizations"; |
| } |
| |
| void getAnalysisUsage(AnalysisUsage &AU) const override { |
| AU.addRequired<LiveIntervals>(); |
| AU.setPreservesAll(); |
| MachineFunctionPass::getAnalysisUsage(AU); |
| } |
| }; |
| |
| } // End anonymous namespace. |
| |
| INITIALIZE_PASS_BEGIN(GCNPreRAOptimizations, DEBUG_TYPE, |
| "AMDGPU Pre-RA optimizations", false, false) |
| INITIALIZE_PASS_DEPENDENCY(LiveIntervals) |
| INITIALIZE_PASS_END(GCNPreRAOptimizations, DEBUG_TYPE, "Pre-RA optimizations", |
| false, false) |
| |
| char GCNPreRAOptimizations::ID = 0; |
| |
| char &llvm::GCNPreRAOptimizationsID = GCNPreRAOptimizations::ID; |
| |
| FunctionPass *llvm::createGCNPreRAOptimizationsPass() { |
| return new GCNPreRAOptimizations(); |
| } |
| |
| bool GCNPreRAOptimizations::processReg(Register Reg) { |
| MachineInstr *Def0 = nullptr; |
| MachineInstr *Def1 = nullptr; |
| uint64_t Init = 0; |
| bool Changed = false; |
| SmallSet<Register, 32> ModifiedRegs; |
| bool IsAGPRDst = TRI->isAGPRClass(MRI->getRegClass(Reg)); |
| |
| for (MachineInstr &I : MRI->def_instructions(Reg)) { |
| switch (I.getOpcode()) { |
| default: |
| return false; |
| case AMDGPU::V_ACCVGPR_WRITE_B32_e64: |
| break; |
| case AMDGPU::COPY: { |
| // Some subtargets cannot do an AGPR to AGPR copy directly, and need an |
| // intermdiate temporary VGPR register. Try to find the defining |
| // accvgpr_write to avoid temporary registers. |
| |
| if (!IsAGPRDst) |
| return false; |
| |
| Register SrcReg = I.getOperand(1).getReg(); |
| |
| if (!SrcReg.isVirtual()) |
| break; |
| |
| // Check if source of copy is from another AGPR. |
| bool IsAGPRSrc = TRI->isAGPRClass(MRI->getRegClass(SrcReg)); |
| if (!IsAGPRSrc) |
| break; |
| |
| // def_instructions() does not look at subregs so it may give us a |
| // different instruction that defines the same vreg but different subreg |
| // so we have to manually check subreg. |
| Register SrcSubReg = I.getOperand(1).getSubReg(); |
| for (auto &Def : MRI->def_instructions(SrcReg)) { |
| if (SrcSubReg != Def.getOperand(0).getSubReg()) |
| continue; |
| |
| if (Def.getOpcode() == AMDGPU::V_ACCVGPR_WRITE_B32_e64) { |
| MachineOperand DefSrcMO = Def.getOperand(1); |
| |
| // Immediates are not an issue and can be propagated in |
| // postrapseudos pass. Only handle cases where defining |
| // accvgpr_write source is a vreg. |
| if (DefSrcMO.isReg() && DefSrcMO.getReg().isVirtual()) { |
| // Propagate source reg of accvgpr write to this copy instruction |
| I.getOperand(1).setReg(DefSrcMO.getReg()); |
| I.getOperand(1).setSubReg(DefSrcMO.getSubReg()); |
| |
| // Reg uses were changed, collect unique set of registers to update |
| // live intervals at the end. |
| ModifiedRegs.insert(DefSrcMO.getReg()); |
| ModifiedRegs.insert(SrcReg); |
| |
| Changed = true; |
| } |
| |
| // Found the defining accvgpr_write, stop looking any further. |
| break; |
| } |
| } |
| break; |
| } |
| case AMDGPU::S_MOV_B32: |
| if (I.getOperand(0).getReg() != Reg || !I.getOperand(1).isImm() || |
| I.getNumOperands() != 2) |
| return false; |
| |
| switch (I.getOperand(0).getSubReg()) { |
| default: |
| return false; |
| case AMDGPU::sub0: |
| if (Def0) |
| return false; |
| Def0 = &I; |
| Init |= I.getOperand(1).getImm() & 0xffffffff; |
| break; |
| case AMDGPU::sub1: |
| if (Def1) |
| return false; |
| Def1 = &I; |
| Init |= static_cast<uint64_t>(I.getOperand(1).getImm()) << 32; |
| break; |
| } |
| break; |
| } |
| } |
| |
| // For AGPR reg, check if live intervals need to be updated. |
| if (IsAGPRDst) { |
| if (Changed) { |
| for (Register RegToUpdate : ModifiedRegs) { |
| LIS->removeInterval(RegToUpdate); |
| LIS->createAndComputeVirtRegInterval(RegToUpdate); |
| } |
| } |
| |
| return Changed; |
| } |
| |
| // For SGPR reg, check if we can combine instructions. |
| if (!Def0 || !Def1 || Def0->getParent() != Def1->getParent()) |
| return Changed; |
| |
| LLVM_DEBUG(dbgs() << "Combining:\n " << *Def0 << " " << *Def1 |
| << " =>\n"); |
| |
| if (SlotIndex::isEarlierInstr(LIS->getInstructionIndex(*Def1), |
| LIS->getInstructionIndex(*Def0))) |
| std::swap(Def0, Def1); |
| |
| LIS->RemoveMachineInstrFromMaps(*Def0); |
| LIS->RemoveMachineInstrFromMaps(*Def1); |
| auto NewI = BuildMI(*Def0->getParent(), *Def0, Def0->getDebugLoc(), |
| TII->get(AMDGPU::S_MOV_B64_IMM_PSEUDO), Reg) |
| .addImm(Init); |
| |
| Def0->eraseFromParent(); |
| Def1->eraseFromParent(); |
| LIS->InsertMachineInstrInMaps(*NewI); |
| LIS->removeInterval(Reg); |
| LIS->createAndComputeVirtRegInterval(Reg); |
| |
| LLVM_DEBUG(dbgs() << " " << *NewI); |
| |
| return true; |
| } |
| |
| bool GCNPreRAOptimizations::runOnMachineFunction(MachineFunction &MF) { |
| if (skipFunction(MF.getFunction())) |
| return false; |
| |
| const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); |
| TII = ST.getInstrInfo(); |
| MRI = &MF.getRegInfo(); |
| LIS = &getAnalysis<LiveIntervals>(); |
| TRI = ST.getRegisterInfo(); |
| |
| bool Changed = false; |
| |
| for (unsigned I = 0, E = MRI->getNumVirtRegs(); I != E; ++I) { |
| Register Reg = Register::index2VirtReg(I); |
| if (!LIS->hasInterval(Reg)) |
| continue; |
| const TargetRegisterClass *RC = MRI->getRegClass(Reg); |
| if ((RC->MC->getSizeInBits() != 64 || !TRI->isSGPRClass(RC)) && |
| (ST.hasGFX90AInsts() || !TRI->isAGPRClass(RC))) |
| continue; |
| |
| Changed |= processReg(Reg); |
| } |
| |
| return Changed; |
| } |