| //===-- NVPTXPeephole.cpp - NVPTX Peephole Optimiztions -------------------===// |
| // |
| // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| // See https://llvm.org/LICENSE.txt for license information. |
| // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| // |
| //===----------------------------------------------------------------------===// |
| // |
| // In NVPTX, NVPTXFrameLowering will emit following instruction at the beginning |
| // of a MachineFunction. |
| // |
| // mov %SPL, %depot |
| // cvta.local %SP, %SPL |
| // |
| // Because Frame Index is a generic address and alloca can only return generic |
| // pointer, without this pass the instructions producing alloca'ed address will |
| // be based on %SP. NVPTXLowerAlloca tends to help replace store and load on |
| // this address with their .local versions, but this may introduce a lot of |
| // cvta.to.local instructions. Performance can be improved if we avoid casting |
| // address back and forth and directly calculate local address based on %SPL. |
| // This peephole pass optimizes these cases, for example |
| // |
| // It will transform the following pattern |
| // %0 = LEA_ADDRi64 %VRFrame64, 4 |
| // %1 = cvta_to_local_yes_64 %0 |
| // |
| // into |
| // %1 = LEA_ADDRi64 %VRFrameLocal64, 4 |
| // |
| // %VRFrameLocal64 is the virtual register name of %SPL |
| // |
| //===----------------------------------------------------------------------===// |
| |
| #include "NVPTX.h" |
| #include "NVPTXRegisterInfo.h" |
| #include "NVPTXSubtarget.h" |
| #include "llvm/CodeGen/MachineFunctionPass.h" |
| #include "llvm/CodeGen/MachineInstrBuilder.h" |
| #include "llvm/CodeGen/MachineRegisterInfo.h" |
| #include "llvm/CodeGen/TargetInstrInfo.h" |
| #include "llvm/CodeGen/TargetRegisterInfo.h" |
| |
| using namespace llvm; |
| |
| #define DEBUG_TYPE "nvptx-peephole" |
| |
| namespace llvm { |
| void initializeNVPTXPeepholePass(PassRegistry &); |
| } |
| |
| namespace { |
| struct NVPTXPeephole : public MachineFunctionPass { |
| public: |
| static char ID; |
| NVPTXPeephole() : MachineFunctionPass(ID) { |
| initializeNVPTXPeepholePass(*PassRegistry::getPassRegistry()); |
| } |
| |
| bool runOnMachineFunction(MachineFunction &MF) override; |
| |
| StringRef getPassName() const override { |
| return "NVPTX optimize redundant cvta.to.local instruction"; |
| } |
| |
| void getAnalysisUsage(AnalysisUsage &AU) const override { |
| MachineFunctionPass::getAnalysisUsage(AU); |
| } |
| }; |
| } |
| |
| char NVPTXPeephole::ID = 0; |
| |
| INITIALIZE_PASS(NVPTXPeephole, "nvptx-peephole", "NVPTX Peephole", false, false) |
| |
| static bool isCVTAToLocalCombinationCandidate(MachineInstr &Root) { |
| auto &MBB = *Root.getParent(); |
| auto &MF = *MBB.getParent(); |
| // Check current instruction is cvta.to.local |
| if (Root.getOpcode() != NVPTX::cvta_to_local_yes_64 && |
| Root.getOpcode() != NVPTX::cvta_to_local_yes) |
| return false; |
| |
| auto &Op = Root.getOperand(1); |
| const auto &MRI = MF.getRegInfo(); |
| MachineInstr *GenericAddrDef = nullptr; |
| if (Op.isReg() && Register::isVirtualRegister(Op.getReg())) { |
| GenericAddrDef = MRI.getUniqueVRegDef(Op.getReg()); |
| } |
| |
| // Check the register operand is uniquely defined by LEA_ADDRi instruction |
| if (!GenericAddrDef || GenericAddrDef->getParent() != &MBB || |
| (GenericAddrDef->getOpcode() != NVPTX::LEA_ADDRi64 && |
| GenericAddrDef->getOpcode() != NVPTX::LEA_ADDRi)) { |
| return false; |
| } |
| |
| const NVPTXRegisterInfo *NRI = |
| MF.getSubtarget<NVPTXSubtarget>().getRegisterInfo(); |
| |
| // Check the LEA_ADDRi operand is Frame index |
| auto &BaseAddrOp = GenericAddrDef->getOperand(1); |
| if (BaseAddrOp.isReg() && BaseAddrOp.getReg() == NRI->getFrameRegister(MF)) { |
| return true; |
| } |
| |
| return false; |
| } |
| |
| static void CombineCVTAToLocal(MachineInstr &Root) { |
| auto &MBB = *Root.getParent(); |
| auto &MF = *MBB.getParent(); |
| const auto &MRI = MF.getRegInfo(); |
| const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo(); |
| auto &Prev = *MRI.getUniqueVRegDef(Root.getOperand(1).getReg()); |
| |
| const NVPTXRegisterInfo *NRI = |
| MF.getSubtarget<NVPTXSubtarget>().getRegisterInfo(); |
| |
| MachineInstrBuilder MIB = |
| BuildMI(MF, Root.getDebugLoc(), TII->get(Prev.getOpcode()), |
| Root.getOperand(0).getReg()) |
| .addReg(NRI->getFrameLocalRegister(MF)) |
| .add(Prev.getOperand(2)); |
| |
| MBB.insert((MachineBasicBlock::iterator)&Root, MIB); |
| |
| // Check if MRI has only one non dbg use, which is Root |
| if (MRI.hasOneNonDBGUse(Prev.getOperand(0).getReg())) { |
| Prev.eraseFromParentAndMarkDBGValuesForRemoval(); |
| } |
| Root.eraseFromParentAndMarkDBGValuesForRemoval(); |
| } |
| |
| bool NVPTXPeephole::runOnMachineFunction(MachineFunction &MF) { |
| if (skipFunction(MF.getFunction())) |
| return false; |
| |
| bool Changed = false; |
| // Loop over all of the basic blocks. |
| for (auto &MBB : MF) { |
| // Traverse the basic block. |
| auto BlockIter = MBB.begin(); |
| |
| while (BlockIter != MBB.end()) { |
| auto &MI = *BlockIter++; |
| if (isCVTAToLocalCombinationCandidate(MI)) { |
| CombineCVTAToLocal(MI); |
| Changed = true; |
| } |
| } // Instruction |
| } // Basic Block |
| |
| const NVPTXRegisterInfo *NRI = |
| MF.getSubtarget<NVPTXSubtarget>().getRegisterInfo(); |
| |
| // Remove unnecessary %VRFrame = cvta.local %VRFrameLocal |
| const auto &MRI = MF.getRegInfo(); |
| if (MRI.use_empty(NRI->getFrameRegister(MF))) { |
| if (auto MI = MRI.getUniqueVRegDef(NRI->getFrameRegister(MF))) { |
| MI->eraseFromParentAndMarkDBGValuesForRemoval(); |
| } |
| } |
| |
| return Changed; |
| } |
| |
| MachineFunctionPass *llvm::createNVPTXPeephole() { return new NVPTXPeephole(); } |