[PowerPC Peephole] Constants into a join add, use ADDI over LI/ADD.

Two blocks prior to the join each perform an li and the the join block has an
add using the initialized register. Optimize each predecessor block to instead
use addi and delete the li's and add.

Differential Revision: https://reviews.llvm.org/D36734

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@313639 91177308-0d34-0410-b5e6-96231b3b80d8
diff --git a/lib/Target/PowerPC/PPCMIPeephole.cpp b/lib/Target/PowerPC/PPCMIPeephole.cpp
index 5c50d02..7a5091b 100644
--- a/lib/Target/PowerPC/PPCMIPeephole.cpp
+++ b/lib/Target/PowerPC/PPCMIPeephole.cpp
@@ -23,6 +23,8 @@
 #include "PPCInstrBuilder.h"
 #include "PPCInstrInfo.h"
 #include "PPCTargetMachine.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/MachineDominators.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
@@ -33,6 +35,8 @@
 
 #define DEBUG_TYPE "ppc-mi-peepholes"
 
+STATISTIC(NumOptADDLIs, "Number of optimized ADD instruction fed by LI");
+
 namespace llvm {
   void initializePPCMIPeepholePass(PassRegistry&);
 }
@@ -51,6 +55,8 @@
   }
 
 private:
+  MachineDominatorTree *MDT;
+
   // Initialize class variables.
   void initialize(MachineFunction &MFParm);
 
@@ -65,6 +71,13 @@
   unsigned lookThruCopyLike(unsigned SrcReg);
 
 public:
+
+  void getAnalysisUsage(AnalysisUsage &AU) const override {
+    AU.addRequired<MachineDominatorTree>();
+    AU.addPreserved<MachineDominatorTree>();
+    MachineFunctionPass::getAnalysisUsage(AU);
+  }
+
   // Main entry point for this pass.
   bool runOnMachineFunction(MachineFunction &MF) override {
     if (skipFunction(*MF.getFunction()))
@@ -78,11 +91,25 @@
 void PPCMIPeephole::initialize(MachineFunction &MFParm) {
   MF = &MFParm;
   MRI = &MF->getRegInfo();
+  MDT = &getAnalysis<MachineDominatorTree>();
   TII = MF->getSubtarget<PPCSubtarget>().getInstrInfo();
   DEBUG(dbgs() << "*** PowerPC MI peephole pass ***\n\n");
   DEBUG(MF->dump());
 }
 
+static MachineInstr *getVRegDefOrNull(MachineOperand *Op,
+                                      MachineRegisterInfo *MRI) {
+  assert(Op && "Invalid Operand!");
+  if (!Op->isReg())
+    return nullptr;
+
+  unsigned Reg = Op->getReg();
+  if (!TargetRegisterInfo::isVirtualRegister(Reg))
+    return nullptr;
+
+  return MRI->getVRegDef(Reg);
+}
+
 // Perform peephole optimizations.
 bool PPCMIPeephole::simplifyCode(void) {
   bool Simplified = false;
@@ -340,8 +367,97 @@
         }
         break;
       }
+
+      // TODO: Any instruction that has an immediate form fed only by a PHI
+      // whose operands are all load immediate can be folded away. We currently
+      // do this for ADD instructions, but should expand it to arithmetic and
+      // binary instructions with immediate forms in the future.
+      case PPC::ADD4:
+      case PPC::ADD8: {
+        auto isSingleUsePHI = [&](MachineOperand *PhiOp) {
+          assert(PhiOp && "Invalid Operand!");
+          MachineInstr *DefPhiMI = getVRegDefOrNull(PhiOp, MRI);
+
+          return DefPhiMI && (DefPhiMI->getOpcode() == PPC::PHI) &&
+                 MRI->hasOneNonDBGUse(DefPhiMI->getOperand(0).getReg());
+        };
+
+        auto dominatesAllSingleUseLIs = [&](MachineOperand *DominatorOp,
+                                            MachineOperand *PhiOp) {
+          assert(PhiOp && "Invalid Operand!");
+          assert(DominatorOp && "Invalid Operand!");
+          MachineInstr *DefPhiMI = getVRegDefOrNull(PhiOp, MRI);
+          MachineInstr *DefDomMI = getVRegDefOrNull(DominatorOp, MRI);
+
+          // Note: the vregs only show up at odd indices position of PHI Node,
+          // the even indices position save the BB info.
+          for (unsigned i = 1; i < DefPhiMI->getNumOperands(); i += 2) {
+            MachineInstr *LiMI =
+                getVRegDefOrNull(&DefPhiMI->getOperand(i), MRI);
+            if (!LiMI || !MRI->hasOneNonDBGUse(LiMI->getOperand(0).getReg()) ||
+                !MDT->dominates(DefDomMI, LiMI) ||
+                (LiMI->getOpcode() != PPC::LI && LiMI->getOpcode() != PPC::LI8))
+              return false;
+          }
+
+          return true;
+        };
+
+        MachineOperand Op1 = MI.getOperand(1);
+        MachineOperand Op2 = MI.getOperand(2);
+        if (isSingleUsePHI(&Op2) && dominatesAllSingleUseLIs(&Op1, &Op2))
+          std::swap(Op1, Op2);
+        else if (!isSingleUsePHI(&Op1) || !dominatesAllSingleUseLIs(&Op2, &Op1))
+          break; // We don't have an ADD fed by LI's that can be transformed
+
+        // Now we know that Op1 is the PHI node and Op2 is the dominator
+        unsigned DominatorReg = Op2.getReg();
+
+        const TargetRegisterClass *TRC = MI.getOpcode() == PPC::ADD8
+                                             ? &PPC::G8RC_and_G8RC_NOX0RegClass
+                                             : &PPC::GPRC_and_GPRC_NOR0RegClass;
+        MRI->setRegClass(DominatorReg, TRC);
+
+        // replace LIs with ADDIs
+        MachineInstr *DefPhiMI = getVRegDefOrNull(&Op1, MRI);
+        for (unsigned i = 1; i < DefPhiMI->getNumOperands(); i += 2) {
+          MachineInstr *LiMI = getVRegDefOrNull(&DefPhiMI->getOperand(i), MRI);
+          DEBUG(dbgs() << "Optimizing LI to ADDI: ");
+          DEBUG(LiMI->dump());
+
+          // There could be repeated registers in the PHI, e.g: %vreg1<def> =
+          // PHI %vreg6, <BB#2>, %vreg8, <BB#3>, %vreg8, <BB#6>; So if we've
+          // already replaced the def instruction, skip.
+          if (LiMI->getOpcode() == PPC::ADDI || LiMI->getOpcode() == PPC::ADDI8)
+            continue;
+
+          assert((LiMI->getOpcode() == PPC::LI ||
+                  LiMI->getOpcode() == PPC::LI8) &&
+                 "Invalid Opcode!");
+          auto LiImm = LiMI->getOperand(1).getImm(); // save the imm of LI
+          LiMI->RemoveOperand(1);                    // remove the imm of LI
+          LiMI->setDesc(TII->get(LiMI->getOpcode() == PPC::LI ? PPC::ADDI
+                                                              : PPC::ADDI8));
+          MachineInstrBuilder(*LiMI->getParent()->getParent(), *LiMI)
+              .addReg(DominatorReg)
+              .addImm(LiImm); // restore the imm of LI
+          DEBUG(LiMI->dump());
+        }
+
+        // Replace ADD with COPY
+        DEBUG(dbgs() << "Optimizing ADD to COPY: ");
+        DEBUG(MI.dump());
+        BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::COPY),
+                MI.getOperand(0).getReg())
+            .add(Op1);
+        ToErase = &MI;
+        Simplified = true;
+        NumOptADDLIs++;
+        break;
+      }
       }
     }
+
     // If the last instruction was marked for elimination,
     // remove it now.
     if (ToErase) {