[PowerPC] Convert r+r instructions to r+i (pre and post RA)

This patch adds the necessary infrastructure to convert instructions that
take two register operands to those that take a register and immediate if
the necessary operand is produced by a load-immediate. Furthermore, it uses
this infrastructure to perform such conversions twice - first at MachineSSA
and then pre-emit.

There are a number of reasons we may end up with opportunities for this
transformation, including but not limited to:
- X-Form instructions chosen since the exact offset isn't available at ISEL time
- Atomic instructions with constant operands (we will add patterns for this
  in the future)
- Tail duplication may duplicate code where one block contains this redundancy
- When emitting compare-free code in PPCDAGToDAGISel, we don't handle constant
  comparands specially

Furthermore, this patch moves the initialization of PPCMIPeepholePass so that
it can be used for MIR tests.


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@320791 91177308-0d34-0410-b5e6-96231b3b80d8
diff --git a/lib/Target/PowerPC/CMakeLists.txt b/lib/Target/PowerPC/CMakeLists.txt
index 1a11c95..3f17378 100644
--- a/lib/Target/PowerPC/CMakeLists.txt
+++ b/lib/Target/PowerPC/CMakeLists.txt
@@ -43,6 +43,7 @@
   PPCVSXFMAMutate.cpp
   PPCVSXSwapRemoval.cpp
   PPCExpandISEL.cpp
+  PPCPreEmitPeephole.cpp
   )
 
 add_subdirectory(AsmParser)
diff --git a/lib/Target/PowerPC/PPC.h b/lib/Target/PowerPC/PPC.h
index 199455d..dfdec24 100644
--- a/lib/Target/PowerPC/PPC.h
+++ b/lib/Target/PowerPC/PPC.h
@@ -50,6 +50,7 @@
   FunctionPass *createPPCTLSDynamicCallPass();
   FunctionPass *createPPCBoolRetToIntPass();
   FunctionPass *createPPCExpandISELPass();
+  FunctionPass *createPPCPreEmitPeepholePass();
   void LowerPPCMachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI,
                                     AsmPrinter &AP, bool isDarwin);
   bool LowerPPCMachineOperandToMCOperand(const MachineOperand &MO,
@@ -59,7 +60,9 @@
   void initializePPCVSXFMAMutatePass(PassRegistry&);
   void initializePPCBoolRetToIntPass(PassRegistry&);
   void initializePPCExpandISELPass(PassRegistry &);
+  void initializePPCPreEmitPeepholePass(PassRegistry &);
   void initializePPCTLSDynamicCallPass(PassRegistry &);
+  void initializePPCMIPeepholePass(PassRegistry&);
   extern char &PPCVSXFMAMutateID;
 
   namespace PPCII {
diff --git a/lib/Target/PowerPC/PPCInstr64Bit.td b/lib/Target/PowerPC/PPCInstr64Bit.td
index d5b5f69..fdd28c2 100644
--- a/lib/Target/PowerPC/PPCInstr64Bit.td
+++ b/lib/Target/PowerPC/PPCInstr64Bit.td
@@ -194,6 +194,11 @@
           (BL8_NOP texternalsym:$dst)>;
 
 // Atomic operations
+// FIXME: some of these might be used with constant operands. This will result
+// in constant materialization instructions that may be redundant. We currently
+// clean this up in PPCMIPeephole with calls to
+// PPCInstrInfo::convertToImmediateForm() but we should probably not emit them
+// in the first place.
 let usesCustomInserter = 1 in {
   let Defs = [CR0] in {
     def ATOMIC_LOAD_ADD_I64 : Pseudo<
diff --git a/lib/Target/PowerPC/PPCInstrInfo.cpp b/lib/Target/PowerPC/PPCInstrInfo.cpp
index 15e4575..a9060b4 100644
--- a/lib/Target/PowerPC/PPCInstrInfo.cpp
+++ b/lib/Target/PowerPC/PPCInstrInfo.cpp
@@ -51,6 +51,10 @@
 STATISTIC(NumStoreSPILLVSRRCAsGpr,
           "Number of spillvsrrc spilled to stack as gpr");
 STATISTIC(NumGPRtoVSRSpill, "Number of gpr spills to spillvsrrc");
+STATISTIC(CmpIselsConverted,
+          "Number of ISELs that depend on comparison of constants converted");
+STATISTIC(MissedConvertibleImmediateInstrs,
+          "Number of compare-immediate instructions fed by constants");
 
 static cl::
 opt<bool> DisableCTRLoopAnal("disable-ppc-ctrloop-analysis", cl::Hidden,
@@ -2147,6 +2151,816 @@
   return false;
 }
 
+unsigned PPCInstrInfo::lookThruCopyLike(unsigned SrcReg,
+                                        const MachineRegisterInfo *MRI) {
+  while (true) {
+    MachineInstr *MI = MRI->getVRegDef(SrcReg);
+    if (!MI->isCopyLike())
+      return SrcReg;
+
+    unsigned CopySrcReg;
+    if (MI->isCopy())
+      CopySrcReg = MI->getOperand(1).getReg();
+    else {
+      assert(MI->isSubregToReg() && "Bad opcode for lookThruCopyLike");
+      CopySrcReg = MI->getOperand(2).getReg();
+    }
+
+    if (!TargetRegisterInfo::isVirtualRegister(CopySrcReg))
+      return CopySrcReg;
+
+    SrcReg = CopySrcReg;
+  }
+}
+
+// Essentially a compile-time implementation of a compare->isel sequence.
+// It takes two constants to compare, along with the true/false registers
+// and the comparison type (as a subreg to a CR field) and returns one
+// of the true/false registers, depending on the comparison results.
+static unsigned selectReg(int64_t Imm1, int64_t Imm2, unsigned CompareOpc,
+                          unsigned TrueReg, unsigned FalseReg,
+                          unsigned CRSubReg) {
+  // Signed comparisons. The immediates are assumed to be sign-extended.
+  if (CompareOpc == PPC::CMPWI || CompareOpc == PPC::CMPDI) {
+    switch (CRSubReg) {
+    default: llvm_unreachable("Unknown integer comparison type.");
+    case PPC::sub_lt:
+      return Imm1 < Imm2 ? TrueReg : FalseReg;
+    case PPC::sub_gt:
+      return Imm1 > Imm2 ? TrueReg : FalseReg;
+    case PPC::sub_eq:
+      return Imm1 == Imm2 ? TrueReg : FalseReg;
+    }
+  }
+  // Unsigned comparisons.
+  else if (CompareOpc == PPC::CMPLWI || CompareOpc == PPC::CMPLDI) {
+    switch (CRSubReg) {
+    default: llvm_unreachable("Unknown integer comparison type.");
+    case PPC::sub_lt:
+      return (uint64_t)Imm1 < (uint64_t)Imm2 ? TrueReg : FalseReg;
+    case PPC::sub_gt:
+      return (uint64_t)Imm1 > (uint64_t)Imm2 ? TrueReg : FalseReg;
+    case PPC::sub_eq:
+      return Imm1 == Imm2 ? TrueReg : FalseReg;
+    }
+  }
+  return PPC::NoRegister;
+}
+
+// Replace an instruction with one that materializes a constant (and sets
+// CR0 if the original instruction was a record-form instruction).
+void PPCInstrInfo::replaceInstrWithLI(MachineInstr &MI,
+                                      const LoadImmediateInfo &LII) const {
+  // Remove existing operands.
+  int OperandToKeep = LII.SetCR ? 1 : 0;
+  for (int i = MI.getNumOperands() - 1; i > OperandToKeep; i--)
+    MI.RemoveOperand(i);
+
+  // Replace the instruction.
+  if (LII.SetCR)
+    MI.setDesc(get(LII.Is64Bit ? PPC::ANDIo8 : PPC::ANDIo));
+  else
+    MI.setDesc(get(LII.Is64Bit ? PPC::LI8 : PPC::LI));
+
+  // Set the immediate.
+  MachineInstrBuilder(*MI.getParent()->getParent(), MI)
+      .addImm(LII.Imm);
+}
+
+MachineInstr *PPCInstrInfo::getConstantDefMI(MachineInstr &MI,
+                                             unsigned &ConstOp,
+                                             bool &SeenIntermediateUse) const {
+  ConstOp = ~0U;
+  MachineInstr *DefMI = nullptr;
+  MachineRegisterInfo *MRI = &MI.getParent()->getParent()->getRegInfo();
+  // If we'ere in SSA, get the defs through the MRI. Otherwise, only look
+  // within the basic block to see if the register is defined using an LI/LI8.
+  if (MRI->isSSA()) {
+    for (int i = 1, e = MI.getNumOperands(); i < e; i++) {
+      if (!MI.getOperand(i).isReg())
+        continue;
+      unsigned Reg = MI.getOperand(i).getReg();
+      if (!TargetRegisterInfo::isVirtualRegister(Reg))
+        continue;
+      unsigned TrueReg = lookThruCopyLike(Reg, MRI);
+      if (TargetRegisterInfo::isVirtualRegister(TrueReg)) {
+        DefMI = MRI->getVRegDef(TrueReg);
+        if (DefMI->getOpcode() == PPC::LI || DefMI->getOpcode() == PPC::LI8) {
+          ConstOp = i;
+          break;
+        }
+      }
+    }
+  } else {
+    // Looking back through the definition for each operand could be expensive,
+    // so exit early if this isn't an instruction that either has an immediate
+    // form or is already an immediate form that we can handle.
+    ImmInstrInfo III;
+    unsigned Opc = MI.getOpcode();
+    bool ConvertibleImmForm =
+      Opc == PPC::CMPWI || Opc == PPC::CMPLWI ||
+      Opc == PPC::CMPDI || Opc == PPC::CMPLDI ||
+      Opc == PPC::ADDI || Opc == PPC::ADDI8 ||
+      Opc == PPC::ORI || Opc == PPC::ORI8 ||
+      Opc == PPC::XORI || Opc == PPC::XORI8 ||
+      Opc == PPC::RLDICL || Opc == PPC::RLDICLo ||
+      Opc == PPC::RLDICL_32 || Opc == PPC::RLDICL_32_64 ||
+      Opc == PPC::RLWINM || Opc == PPC::RLWINMo ||
+      Opc == PPC::RLWINM8 || Opc == PPC::RLWINM8o;
+    if (!instrHasImmForm(MI, III) && !ConvertibleImmForm)
+      return nullptr;
+
+    // Don't convert or %X, %Y, %Y since that's just a register move.
+    if ((Opc == PPC::OR || Opc == PPC::OR8) &&
+        MI.getOperand(1).getReg() == MI.getOperand(2).getReg())
+      return nullptr;
+    for (int i = 1, e = MI.getNumOperands(); i < e; i++) {
+      MachineOperand &MO = MI.getOperand(i);
+      SeenIntermediateUse = false;
+      if (MO.isReg() && MO.isUse() && !MO.isImplicit()) {
+        MachineBasicBlock::reverse_iterator E = MI.getParent()->rend(), It = MI;
+        It++;
+        unsigned Reg = MI.getOperand(i).getReg();
+
+        // Is this register defined by a load-immediate in this block?
+        for ( ; It != E; ++It) {
+          if (It->modifiesRegister(Reg, &getRegisterInfo())) {
+            if (It->getOpcode() == PPC::LI || It->getOpcode() == PPC::LI8) {
+              ConstOp = i;
+              return &*It;
+            } else
+              break;
+          } else if (It->readsRegister(Reg, &getRegisterInfo()))
+            // If we see another use of this reg between the def and the MI,
+            // we want to flat it so the def isn't deleted.
+            SeenIntermediateUse = true;
+        }
+      }
+    }
+  }
+  return ConstOp == ~0U ? nullptr : DefMI;
+}
+
+// If this instruction has an immediate form and one of its operands is a
+// result of a load-immediate, convert it to the immediate form if the constant
+// is in range.
+bool PPCInstrInfo::convertToImmediateForm(MachineInstr &MI,
+                                          MachineInstr **KilledDef) const {
+  MachineFunction *MF = MI.getParent()->getParent();
+  MachineRegisterInfo *MRI = &MF->getRegInfo();
+  bool PostRA = !MRI->isSSA();
+  bool SeenIntermediateUse = true;
+  unsigned ConstantOperand = ~0U;
+  MachineInstr *DefMI = getConstantDefMI(MI, ConstantOperand,
+                                         SeenIntermediateUse);
+  if (!DefMI || !DefMI->getOperand(1).isImm())
+    return false;
+  assert(ConstantOperand < MI.getNumOperands() &&
+         "The constant operand needs to be valid at this point");
+
+  int64_t Immediate = DefMI->getOperand(1).getImm();
+  // Sign-extend to 64-bits.
+  int64_t SExtImm = ((uint64_t)Immediate & ~0x7FFFuLL) != 0 ?
+    (Immediate | 0xFFFFFFFFFFFF0000) : Immediate;
+
+  if (KilledDef && MI.getOperand(ConstantOperand).isKill() &&
+      !SeenIntermediateUse)
+    *KilledDef = DefMI;
+
+  // If this is a reg+reg instruction that has a reg+imm form, convert it now.
+  ImmInstrInfo III;
+  if (instrHasImmForm(MI, III))
+    return transformToImmForm(MI, III, ConstantOperand, SExtImm);
+
+  bool ReplaceWithLI = false;
+  bool Is64BitLI = false;
+  int64_t NewImm = 0;
+  bool SetCR = false;
+  unsigned Opc = MI.getOpcode();
+  switch (Opc) {
+  default: return false;
+
+  // FIXME: Any branches conditional on such a comparison can be made
+  // unconditional. At this time, this happens too infrequently to be worth
+  // the implementation effort, but if that ever changes, we could convert
+  // such a pattern here.
+  case PPC::CMPWI:
+  case PPC::CMPLWI:
+  case PPC::CMPDI:
+  case PPC::CMPLDI: {
+    // Doing this post-RA would require dataflow analysis to reliably find uses
+    // of the CR register set by the compare.
+    if (PostRA)
+      return false;
+    // If a compare-immediate is fed by an immediate and is itself an input of
+    // an ISEL (the most common case) into a COPY of the correct register.
+    bool Changed = false;
+    unsigned DefReg = MI.getOperand(0).getReg();
+    int64_t Comparand = MI.getOperand(2).getImm();
+    int64_t SExtComparand = ((uint64_t)Comparand & ~0x7FFFuLL) != 0 ?
+      (Comparand | 0xFFFFFFFFFFFF0000) : Comparand;
+
+    for (auto &CompareUseMI : MRI->use_instructions(DefReg)) {
+      unsigned UseOpc = CompareUseMI.getOpcode();
+      if (UseOpc != PPC::ISEL && UseOpc != PPC::ISEL8)
+        continue;
+      unsigned CRSubReg = CompareUseMI.getOperand(3).getSubReg();
+      unsigned TrueReg = CompareUseMI.getOperand(1).getReg();
+      unsigned FalseReg = CompareUseMI.getOperand(2).getReg();
+      unsigned RegToCopy = selectReg(SExtImm, SExtComparand, Opc, TrueReg,
+                                     FalseReg, CRSubReg);
+      if (RegToCopy == PPC::NoRegister)
+        continue;
+      // Can't use PPC::COPY to copy PPC::ZERO[8]. Convert it to LI[8] 0.
+      if (RegToCopy == PPC::ZERO || RegToCopy == PPC::ZERO8) {
+        CompareUseMI.setDesc(get(UseOpc == PPC::ISEL8 ? PPC::LI8 : PPC::LI));
+        CompareUseMI.getOperand(1).ChangeToImmediate(0);
+        CompareUseMI.RemoveOperand(3);
+        CompareUseMI.RemoveOperand(2);
+        continue;
+      }
+      DEBUG(dbgs() << "Found LI -> CMPI -> ISEL, replacing with a copy.\n");
+      DEBUG(DefMI->dump(); MI.dump(); CompareUseMI.dump());
+      DEBUG(dbgs() << "Is converted to:\n");
+      // Convert to copy and remove unneeded operands.
+      CompareUseMI.setDesc(get(PPC::COPY));
+      CompareUseMI.RemoveOperand(3);
+      CompareUseMI.RemoveOperand(RegToCopy == TrueReg ? 2 : 1);
+      CmpIselsConverted++;
+      Changed = true;
+      DEBUG(CompareUseMI.dump());
+    }
+    if (Changed)
+      return true;
+    // This may end up incremented multiple times since this function is called
+    // during a fixed-point transformation, but it is only meant to indicate the
+    // presence of this opportunity.
+    MissedConvertibleImmediateInstrs++;
+    return false;
+  }
+
+  // Immediate forms - may simply be convertable to an LI.
+  case PPC::ADDI:
+  case PPC::ADDI8: {
+    // Does the sum fit in a 16-bit signed field?
+    int64_t Addend = MI.getOperand(2).getImm();
+    if (isInt<16>(Addend + SExtImm)) {
+      ReplaceWithLI = true;
+      Is64BitLI = Opc == PPC::ADDI8;
+      NewImm = Addend + SExtImm;
+      break;
+    }
+  }
+  case PPC::RLDICL:
+  case PPC::RLDICLo:
+  case PPC::RLDICL_32:
+  case PPC::RLDICL_32_64: {
+    // Use APInt's rotate function.
+    int64_t SH = MI.getOperand(2).getImm();
+    int64_t MB = MI.getOperand(3).getImm();
+    APInt InVal(Opc == PPC::RLDICL ? 64 : 32, SExtImm, true);
+    InVal = InVal.rotl(SH);
+    uint64_t Mask = (1LU << (63 - MB + 1)) - 1;
+    InVal &= Mask;
+    // Can't replace negative values with an LI as that will sign-extend
+    // and not clear the left bits. If we're setting the CR bit, we will use
+    // ANDIo which won't sign extend, so that's safe.
+    if (isUInt<15>(InVal.getSExtValue()) ||
+        (Opc == PPC::RLDICLo && isUInt<16>(InVal.getSExtValue()))) {
+      ReplaceWithLI = true;
+      Is64BitLI = Opc != PPC::RLDICL_32;
+      NewImm = InVal.getSExtValue();
+      SetCR = Opc == PPC::RLDICLo;
+      break;
+    }
+    return false;
+  }
+  case PPC::RLWINM:
+  case PPC::RLWINM8:
+  case PPC::RLWINMo:
+  case PPC::RLWINM8o: {
+    int64_t SH = MI.getOperand(2).getImm();
+    int64_t MB = MI.getOperand(3).getImm();
+    int64_t ME = MI.getOperand(4).getImm();
+    APInt InVal(32, SExtImm, true);
+    InVal = InVal.rotl(SH);
+    // Set the bits (       MB + 32      ) to (       ME + 32      ).
+    uint64_t Mask = ((1 << (32 - MB)) - 1) & ~((1 << (31 - ME)) - 1);
+    InVal &= Mask;
+    // Can't replace negative values with an LI as that will sign-extend
+    // and not clear the left bits. If we're setting the CR bit, we will use
+    // ANDIo which won't sign extend, so that's safe.
+    bool ValueFits = isUInt<15>(InVal.getSExtValue());
+    ValueFits |= ((Opc == PPC::RLWINMo || Opc == PPC::RLWINM8o) &&
+                  isUInt<16>(InVal.getSExtValue()));
+    if (ValueFits) {
+      ReplaceWithLI = true;
+      Is64BitLI = Opc == PPC::RLWINM8 || Opc == PPC::RLWINM8o;
+      NewImm = InVal.getSExtValue();
+      SetCR = Opc == PPC::RLWINMo || Opc == PPC::RLWINM8o;
+      break;
+    }
+    return false;
+  }
+  case PPC::ORI:
+  case PPC::ORI8:
+  case PPC::XORI:
+  case PPC::XORI8: {
+    int64_t LogicalImm = MI.getOperand(2).getImm();
+    int64_t Result = 0;
+    if (Opc == PPC::ORI || Opc == PPC::ORI8)
+      Result = LogicalImm | SExtImm;
+    else
+      Result = LogicalImm ^ SExtImm;
+    if (isInt<16>(Result)) {
+      ReplaceWithLI = true;
+      Is64BitLI = Opc == PPC::ORI8 || Opc == PPC::XORI8;
+      NewImm = Result;
+      break;
+    }
+    return false;
+  }
+  }
+
+  if (ReplaceWithLI) {
+    DEBUG(dbgs() << "Replacing instruction:\n");
+    DEBUG(MI.dump());
+    DEBUG(dbgs() << "Fed by:\n");
+    DEBUG(DefMI->dump());
+    LoadImmediateInfo LII;
+    LII.Imm = NewImm;
+    LII.Is64Bit = Is64BitLI;
+    LII.SetCR = SetCR;
+    // If we're setting the CR, the original load-immediate must be kept (as an
+    // operand to ANDIo/ANDI8o).
+    if (KilledDef && SetCR)
+      *KilledDef = nullptr;
+    replaceInstrWithLI(MI, LII);
+    DEBUG(dbgs() << "With:\n");
+    DEBUG(MI.dump());
+    return true;
+  }
+  return false;
+}
+
+bool PPCInstrInfo::instrHasImmForm(const MachineInstr &MI,
+                                   ImmInstrInfo &III) const {
+  unsigned Opc = MI.getOpcode();
+  // The vast majority of the instructions would need their operand 2 replaced
+  // with an immediate when switching to the reg+imm form. A marked exception
+  // are the update form loads/stores for which a constant operand 2 would need
+  // to turn into a displacement and move operand 1 to the operand 2 position.
+  III.ImmOpNo = 2;
+  III.ConstantOpNo = 2;
+  III.ImmWidth = 16;
+  III.ImmMustBeMultipleOf = 1;
+  switch (Opc) {
+  default: return false;
+  case PPC::ADD4:
+  case PPC::ADD8:
+    III.SignedImm = true;
+    III.ZeroIsSpecialOrig = 0;
+    III.ZeroIsSpecialNew = 1;
+    III.IsCommutative = true;
+    III.ImmOpcode = Opc == PPC::ADD4 ? PPC::ADDI : PPC::ADDI8;
+    break;
+  case PPC::ADDC:
+  case PPC::ADDC8:
+    III.SignedImm = true;
+    III.ZeroIsSpecialOrig = 0;
+    III.ZeroIsSpecialNew = 0;
+    III.IsCommutative = true;
+    III.ImmOpcode = Opc == PPC::ADDC ? PPC::ADDIC : PPC::ADDIC8;
+    break;
+  case PPC::ADDCo:
+    III.SignedImm = true;
+    III.ZeroIsSpecialOrig = 0;
+    III.ZeroIsSpecialNew = 0;
+    III.IsCommutative = true;
+    III.ImmOpcode = PPC::ADDICo;
+    break;
+  case PPC::SUBFC:
+  case PPC::SUBFC8:
+    III.SignedImm = true;
+    III.ZeroIsSpecialOrig = 0;
+    III.ZeroIsSpecialNew = 0;
+    III.IsCommutative = false;
+    III.ImmOpcode = Opc == PPC::SUBFC ? PPC::SUBFIC : PPC::SUBFIC8;
+    break;
+  case PPC::CMPW:
+  case PPC::CMPD:
+    III.SignedImm = true;
+    III.ZeroIsSpecialOrig = 0;
+    III.ZeroIsSpecialNew = 0;
+    III.IsCommutative = false;
+    III.ImmOpcode = Opc == PPC::CMPW ? PPC::CMPWI : PPC::CMPDI;
+    break;
+  case PPC::CMPLW:
+  case PPC::CMPLD:
+    III.SignedImm = false;
+    III.ZeroIsSpecialOrig = 0;
+    III.ZeroIsSpecialNew = 0;
+    III.IsCommutative = false;
+    III.ImmOpcode = Opc == PPC::CMPLW ? PPC::CMPLWI : PPC::CMPLDI;
+    break;
+  case PPC::ANDo:
+  case PPC::AND8o:
+  case PPC::OR:
+  case PPC::OR8:
+  case PPC::XOR:
+  case PPC::XOR8:
+    III.SignedImm = false;
+    III.ZeroIsSpecialOrig = 0;
+    III.ZeroIsSpecialNew = 0;
+    III.IsCommutative = true;
+    switch(Opc) {
+    default: llvm_unreachable("Unknown opcode");
+    case PPC::ANDo: III.ImmOpcode = PPC::ANDIo; break;
+    case PPC::AND8o: III.ImmOpcode = PPC::ANDIo8; break;
+    case PPC::OR: III.ImmOpcode = PPC::ORI; break;
+    case PPC::OR8: III.ImmOpcode = PPC::ORI8; break;
+    case PPC::XOR: III.ImmOpcode = PPC::XORI; break;
+    case PPC::XOR8: III.ImmOpcode = PPC::XORI8; break;
+    }
+    break;
+  case PPC::RLWNM:
+  case PPC::RLWNM8:
+  case PPC::RLWNMo:
+  case PPC::RLWNM8o:
+  case PPC::RLDCL:
+  case PPC::RLDCLo:
+  case PPC::RLDCR:
+  case PPC::RLDCRo:
+  case PPC::SLW:
+  case PPC::SLW8:
+  case PPC::SLWo:
+  case PPC::SLW8o:
+  case PPC::SRW:
+  case PPC::SRW8:
+  case PPC::SRWo:
+  case PPC::SRW8o:
+  case PPC::SRAW:
+  case PPC::SRAWo:
+  case PPC::SLD:
+  case PPC::SLDo:
+  case PPC::SRD:
+  case PPC::SRDo:
+  case PPC::SRAD:
+  case PPC::SRADo:
+    III.SignedImm = false;
+    III.ZeroIsSpecialOrig = 0;
+    III.ZeroIsSpecialNew = 0;
+    III.IsCommutative = false;
+    // This isn't actually true, but the instructions ignore any of the
+    // upper bits, so any immediate loaded with an LI is acceptable.
+    III.ImmWidth = 16;
+    switch(Opc) {
+    default: llvm_unreachable("Unknown opcode");
+    case PPC::RLWNM: III.ImmOpcode = PPC::RLWINM; break;
+    case PPC::RLWNM8: III.ImmOpcode = PPC::RLWINM8; break;
+    case PPC::RLWNMo: III.ImmOpcode = PPC::RLWINMo; break;
+    case PPC::RLWNM8o: III.ImmOpcode = PPC::RLWINM8o; break;
+    case PPC::RLDCL: III.ImmOpcode = PPC::RLDICL; break;
+    case PPC::RLDCLo: III.ImmOpcode = PPC::RLDICLo; break;
+    case PPC::RLDCR: III.ImmOpcode = PPC::RLDICR; break;
+    case PPC::RLDCRo: III.ImmOpcode = PPC::RLDICRo; break;
+    case PPC::SLW: III.ImmOpcode = PPC::RLWINM; break;
+    case PPC::SLW8: III.ImmOpcode = PPC::RLWINM8; break;
+    case PPC::SLWo: III.ImmOpcode = PPC::RLWINMo; break;
+    case PPC::SLW8o: III.ImmOpcode = PPC::RLWINM8o; break;
+    case PPC::SRW: III.ImmOpcode = PPC::RLWINM; break;
+    case PPC::SRW8: III.ImmOpcode = PPC::RLWINM8; break;
+    case PPC::SRWo: III.ImmOpcode = PPC::RLWINMo; break;
+    case PPC::SRW8o: III.ImmOpcode = PPC::RLWINM8o; break;
+    case PPC::SRAW: III.ImmOpcode = PPC::SRAWI; break;
+    case PPC::SRAWo: III.ImmOpcode = PPC::SRAWIo; break;
+    case PPC::SLD: III.ImmOpcode = PPC::RLDICR; break;
+    case PPC::SLDo: III.ImmOpcode = PPC::RLDICRo; break;
+    case PPC::SRD: III.ImmOpcode = PPC::RLDICL; break;
+    case PPC::SRDo: III.ImmOpcode = PPC::RLDICLo; break;
+    case PPC::SRAD: III.ImmOpcode = PPC::SRADI; break;
+    case PPC::SRADo: III.ImmOpcode = PPC::SRADIo; break;
+    }
+    break;
+  // Loads and stores:
+  case PPC::LBZX:
+  case PPC::LBZX8:
+  case PPC::LHZX:
+  case PPC::LHZX8:
+  case PPC::LHAX:
+  case PPC::LHAX8:
+  case PPC::LWZX:
+  case PPC::LWZX8:
+  case PPC::LWAX:
+  case PPC::LDX:
+  case PPC::LFSX:
+  case PPC::LFDX:
+  case PPC::STBX:
+  case PPC::STBX8:
+  case PPC::STHX:
+  case PPC::STHX8:
+  case PPC::STWX:
+  case PPC::STWX8:
+  case PPC::STDX:
+  case PPC::STFSX:
+  case PPC::STFDX:
+    III.SignedImm = true;
+    III.ZeroIsSpecialOrig = 1;
+    III.ZeroIsSpecialNew = 2;
+    III.IsCommutative = true;
+    III.ImmOpNo = 1;
+    III.ConstantOpNo = 2;
+    switch(Opc) {
+    default: llvm_unreachable("Unknown opcode");
+    case PPC::LBZX: III.ImmOpcode = PPC::LBZ; break;
+    case PPC::LBZX8: III.ImmOpcode = PPC::LBZ8; break;
+    case PPC::LHZX: III.ImmOpcode = PPC::LHZ; break;
+    case PPC::LHZX8: III.ImmOpcode = PPC::LHZ8; break;
+    case PPC::LHAX: III.ImmOpcode = PPC::LHA; break;
+    case PPC::LHAX8: III.ImmOpcode = PPC::LHA8; break;
+    case PPC::LWZX: III.ImmOpcode = PPC::LWZ; break;
+    case PPC::LWZX8: III.ImmOpcode = PPC::LWZ8; break;
+    case PPC::LWAX:
+      III.ImmOpcode = PPC::LWA;
+      III.ImmMustBeMultipleOf = 4;
+      break;
+    case PPC::LDX: III.ImmOpcode = PPC::LD; III.ImmMustBeMultipleOf = 4; break;
+    case PPC::LFSX: III.ImmOpcode = PPC::LFS; break;
+    case PPC::LFDX: III.ImmOpcode = PPC::LFD; break;
+    case PPC::STBX: III.ImmOpcode = PPC::STB; break;
+    case PPC::STBX8: III.ImmOpcode = PPC::STB8; break;
+    case PPC::STHX: III.ImmOpcode = PPC::STH; break;
+    case PPC::STHX8: III.ImmOpcode = PPC::STH8; break;
+    case PPC::STWX: III.ImmOpcode = PPC::STW; break;
+    case PPC::STWX8: III.ImmOpcode = PPC::STW8; break;
+    case PPC::STDX:
+      III.ImmOpcode = PPC::STD;
+      III.ImmMustBeMultipleOf = 4;
+      break;
+    case PPC::STFSX: III.ImmOpcode = PPC::STFS; break;
+    case PPC::STFDX: III.ImmOpcode = PPC::STFD; break;
+    }
+    break;
+  case PPC::LBZUX:
+  case PPC::LBZUX8:
+  case PPC::LHZUX:
+  case PPC::LHZUX8:
+  case PPC::LHAUX:
+  case PPC::LHAUX8:
+  case PPC::LWZUX:
+  case PPC::LWZUX8:
+  case PPC::LDUX:
+  case PPC::LFSUX:
+  case PPC::LFDUX:
+  case PPC::STBUX:
+  case PPC::STBUX8:
+  case PPC::STHUX:
+  case PPC::STHUX8:
+  case PPC::STWUX:
+  case PPC::STWUX8:
+  case PPC::STDUX:
+  case PPC::STFSUX:
+  case PPC::STFDUX:
+    III.SignedImm = true;
+    III.ZeroIsSpecialOrig = 2;
+    III.ZeroIsSpecialNew = 3;
+    III.IsCommutative = false;
+    III.ImmOpNo = 2;
+    III.ConstantOpNo = 3;
+    switch(Opc) {
+    default: llvm_unreachable("Unknown opcode");
+    case PPC::LBZUX: III.ImmOpcode = PPC::LBZU; break;
+    case PPC::LBZUX8: III.ImmOpcode = PPC::LBZU8; break;
+    case PPC::LHZUX: III.ImmOpcode = PPC::LHZU; break;
+    case PPC::LHZUX8: III.ImmOpcode = PPC::LHZU8; break;
+    case PPC::LHAUX: III.ImmOpcode = PPC::LHAU; break;
+    case PPC::LHAUX8: III.ImmOpcode = PPC::LHAU8; break;
+    case PPC::LWZUX: III.ImmOpcode = PPC::LWZU; break;
+    case PPC::LWZUX8: III.ImmOpcode = PPC::LWZU8; break;
+    case PPC::LDUX:
+      III.ImmOpcode = PPC::LDU;
+      III.ImmMustBeMultipleOf = 4;
+      break;
+    case PPC::LFSUX: III.ImmOpcode = PPC::LFSU; break;
+    case PPC::LFDUX: III.ImmOpcode = PPC::LFDU; break;
+    case PPC::STBUX: III.ImmOpcode = PPC::STBU; break;
+    case PPC::STBUX8: III.ImmOpcode = PPC::STBU8; break;
+    case PPC::STHUX: III.ImmOpcode = PPC::STHU; break;
+    case PPC::STHUX8: III.ImmOpcode = PPC::STHU8; break;
+    case PPC::STWUX: III.ImmOpcode = PPC::STWU; break;
+    case PPC::STWUX8: III.ImmOpcode = PPC::STWU8; break;
+    case PPC::STDUX:
+      III.ImmOpcode = PPC::STDU;
+      III.ImmMustBeMultipleOf = 4;
+      break;
+    case PPC::STFSUX: III.ImmOpcode = PPC::STFSU; break;
+    case PPC::STFDUX: III.ImmOpcode = PPC::STFDU; break;
+    }
+    break;
+  // Power9 only.
+  case PPC::LXVX:
+  case PPC::LXSSPX:
+  case PPC::LXSDX:
+  case PPC::STXVX:
+  case PPC::STXSSPX:
+  case PPC::STXSDX:
+    if (!Subtarget.hasP9Vector())
+      return false;
+    III.SignedImm = true;
+    III.ZeroIsSpecialOrig = 1;
+    III.ZeroIsSpecialNew = 2;
+    III.IsCommutative = true;
+    III.ImmOpNo = 1;
+    III.ConstantOpNo = 2;
+    switch(Opc) {
+    default: llvm_unreachable("Unknown opcode");
+    case PPC::LXVX:
+      III.ImmOpcode = PPC::LXV;
+      III.ImmMustBeMultipleOf = 16;
+      break;
+    case PPC::LXSSPX:
+      III.ImmOpcode = PPC::LXSSP;
+      III.ImmMustBeMultipleOf = 4;
+      break;
+    case PPC::LXSDX:
+      III.ImmOpcode = PPC::LXSD;
+      III.ImmMustBeMultipleOf = 4;
+      break;
+    case PPC::STXVX:
+      III.ImmOpcode = PPC::STXV;
+      III.ImmMustBeMultipleOf = 16;
+      break;
+    case PPC::STXSSPX:
+      III.ImmOpcode = PPC::STXSSP;
+      III.ImmMustBeMultipleOf = 4;
+      break;
+    case PPC::STXSDX:
+      III.ImmOpcode = PPC::STXSD;
+      III.ImmMustBeMultipleOf = 4;
+      break;
+    }
+    break;
+  }
+  return true;
+}
+
+// Utility function for swaping two arbitrary operands of an instruction.
+static void swapMIOperands(MachineInstr &MI, unsigned Op1, unsigned Op2) {
+  assert(Op1 != Op2 && "Cannot swap operand with itself.");
+
+  unsigned MaxOp = std::max(Op1, Op2);
+  unsigned MinOp = std::min(Op1, Op2);
+  MachineOperand MOp1 = MI.getOperand(MinOp);
+  MachineOperand MOp2 = MI.getOperand(MaxOp);
+  MI.RemoveOperand(std::max(Op1, Op2));
+  MI.RemoveOperand(std::min(Op1, Op2));
+
+  // If the operands we are swapping are the two at the end (the common case)
+  // we can just remove both and add them in the opposite order.
+  if (MaxOp - MinOp == 1 && MI.getNumOperands() == MinOp) {
+    MI.addOperand(MOp2);
+    MI.addOperand(MOp1);
+  } else {
+    // Store all operands in a temporary vector, remove them and re-add in the
+    // right order.
+    SmallVector<MachineOperand, 2> MOps;
+    unsigned TotalOps = MI.getNumOperands() + 2; // We've already removed 2 ops.
+    for (unsigned i = MI.getNumOperands() - 1; i >= MinOp; i--) {
+      MOps.push_back(MI.getOperand(i));
+      MI.RemoveOperand(i);
+    }
+    // MOp2 needs to be added next.
+    MI.addOperand(MOp2);
+    // Now add the rest.
+    for (unsigned i = MI.getNumOperands(); i < TotalOps; i++) {
+      if (i == MaxOp)
+        MI.addOperand(MOp1);
+      else {
+        MI.addOperand(MOps.back());
+        MOps.pop_back();
+      }
+    }
+  }
+}
+
+bool PPCInstrInfo::transformToImmForm(MachineInstr &MI, const ImmInstrInfo &III,
+                                      unsigned ConstantOpNo,
+                                      int64_t Imm) const {
+  MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
+  bool PostRA = !MRI.isSSA();
+  // Exit early if we can't convert this.
+  if ((ConstantOpNo != III.ConstantOpNo) && !III.IsCommutative)
+    return false;
+  if (Imm % III.ImmMustBeMultipleOf)
+    return false;
+  if (III.SignedImm) {
+    APInt ActualValue(64, Imm, true);
+    if (!ActualValue.isSignedIntN(III.ImmWidth))
+      return false;
+  } else {
+    uint64_t UnsignedMax = (1 << III.ImmWidth) - 1;
+    if ((uint64_t)Imm > UnsignedMax)
+      return false;
+  }
+
+  // If we're post-RA, the instructions don't agree on whether register zero is
+  // special, we can transform this as long as the register operand that will
+  // end up in the location where zero is special isn't R0.
+  if (PostRA && III.ZeroIsSpecialOrig != III.ZeroIsSpecialNew) {
+    unsigned PosForOrigZero = III.ZeroIsSpecialOrig ? III.ZeroIsSpecialOrig :
+      III.ZeroIsSpecialNew + 1;
+    unsigned OrigZeroReg = MI.getOperand(PosForOrigZero).getReg();
+    unsigned NewZeroReg = MI.getOperand(III.ZeroIsSpecialNew).getReg();
+    // If R0 is in the operand where zero is special for the new instruction,
+    // it is unsafe to transform if the constant operand isn't that operand.
+    if ((NewZeroReg == PPC::R0 || NewZeroReg == PPC::X0) &&
+        ConstantOpNo != III.ZeroIsSpecialNew)
+      return false;
+    if ((OrigZeroReg == PPC::R0 || OrigZeroReg == PPC::X0) &&
+        ConstantOpNo != PosForOrigZero)
+      return false;
+  }
+
+  unsigned Opc = MI.getOpcode();
+  bool SpecialShift32 =
+    Opc == PPC::SLW || Opc == PPC::SLWo || Opc == PPC::SRW || Opc == PPC::SRWo;
+  bool SpecialShift64 =
+    Opc == PPC::SLD || Opc == PPC::SLDo || Opc == PPC::SRD || Opc == PPC::SRDo;
+  bool SetCR = Opc == PPC::SLWo || Opc == PPC::SRWo ||
+    Opc == PPC::SLDo || Opc == PPC::SRDo;
+  bool RightShift =
+    Opc == PPC::SRW || Opc == PPC::SRWo || Opc == PPC::SRD || Opc == PPC::SRDo;
+
+  MI.setDesc(get(III.ImmOpcode));
+  if (ConstantOpNo == III.ConstantOpNo) {
+    // Converting shifts to immediate form is a bit tricky since they may do
+    // one of three things:
+    // 1. If the shift amount is between OpSize and 2*OpSize, the result is zero
+    // 2. If the shift amount is zero, the result is unchanged (save for maybe
+    //    setting CR0)
+    // 3. If the shift amount is in [1, OpSize), it's just a shift
+    if (SpecialShift32 || SpecialShift64) {
+      LoadImmediateInfo LII;
+      LII.Imm = 0;
+      LII.SetCR = SetCR;
+      LII.Is64Bit = SpecialShift64;
+      uint64_t ShAmt = Imm & (SpecialShift32 ? 0x1F : 0x3F);
+      if (Imm & (SpecialShift32 ? 0x20 : 0x40))
+        replaceInstrWithLI(MI, LII);
+      // Shifts by zero don't change the value. If we don't need to set CR0,
+      // just convert this to a COPY. Can't do this post-RA since we've already
+      // cleaned up the copies.
+      else if (!SetCR && ShAmt == 0 && !PostRA) {
+        MI.RemoveOperand(2);
+        MI.setDesc(get(PPC::COPY));
+      } else {
+        // The 32 bit and 64 bit instructions are quite different.
+        if (SpecialShift32) {
+          // Left shifts use (N, 0, 31-N), right shifts use (32-N, N, 31).
+          uint64_t SH = RightShift ? 32 - ShAmt : ShAmt;
+          uint64_t MB = RightShift ? ShAmt : 0;
+          uint64_t ME = RightShift ? 31 : 31 - ShAmt;
+          MI.getOperand(III.ConstantOpNo).ChangeToImmediate(SH);
+          MachineInstrBuilder(*MI.getParent()->getParent(), MI).addImm(MB)
+            .addImm(ME);
+        } else {
+          // Left shifts use (N, 63-N), right shifts use (64-N, N).
+          uint64_t SH = RightShift ? 64 - ShAmt : ShAmt;
+          uint64_t ME = RightShift ? ShAmt : 63 - ShAmt;
+          MI.getOperand(III.ConstantOpNo).ChangeToImmediate(SH);
+          MachineInstrBuilder(*MI.getParent()->getParent(), MI).addImm(ME);
+        }
+      }
+    } else
+      MI.getOperand(ConstantOpNo).ChangeToImmediate(Imm);
+  }
+  // Convert commutative instructions (switch the operands and convert the
+  // desired one to an immediate.
+  else if (III.IsCommutative) {
+    MI.getOperand(ConstantOpNo).ChangeToImmediate(Imm);
+    swapMIOperands(MI, ConstantOpNo, III.ConstantOpNo);
+  } else
+    llvm_unreachable("Should have exited early!");
+
+  // For instructions for which the constant register replaces a different
+  // operand than where the immediate goes, we need to swap them.
+  if (III.ConstantOpNo != III.ImmOpNo)
+    swapMIOperands(MI, III.ConstantOpNo, III.ImmOpNo);
+
+  // If the R0/X0 register is special for the original instruction and not for
+  // the new instruction (or vice versa), we need to fix up the register class.
+  if (!PostRA && III.ZeroIsSpecialOrig != III.ZeroIsSpecialNew) {
+    if (!III.ZeroIsSpecialOrig) {
+      unsigned RegToModify = MI.getOperand(III.ZeroIsSpecialNew).getReg();
+      const TargetRegisterClass *NewRC =
+        MRI.getRegClass(RegToModify)->hasSuperClassEq(&PPC::GPRCRegClass) ?
+        &PPC::GPRC_and_GPRC_NOR0RegClass : &PPC::G8RC_and_G8RC_NOX0RegClass;
+      MRI.setRegClass(RegToModify, NewRC);
+    }
+  }
+  return true;
+}
+
 const TargetRegisterClass *
 PPCInstrInfo::updatedRC(const TargetRegisterClass *RC) const {
   if (Subtarget.hasVSX() && RC == &PPC::VRRCRegClass)
diff --git a/lib/Target/PowerPC/PPCInstrInfo.h b/lib/Target/PowerPC/PPCInstrInfo.h
index 097faf7..4271c50 100644
--- a/lib/Target/PowerPC/PPCInstrInfo.h
+++ b/lib/Target/PowerPC/PPCInstrInfo.h
@@ -72,6 +72,41 @@
 };
 } // end namespace PPCII
 
+// Instructions that have an immediate form might be convertible to that
+// form if the correct input is a result of a load immediate. In order to
+// know whether the transformation is special, we might need to know some
+// of the details of the two forms.
+struct ImmInstrInfo {
+  // Is the immediate field in the immediate form signed or unsigned?
+  uint64_t SignedImm : 1;
+  // Does the immediate need to be a multiple of some value?
+  uint64_t ImmMustBeMultipleOf : 5;
+  // Is R0/X0 treated specially by the original r+r instruction?
+  // If so, in which operand?
+  uint64_t ZeroIsSpecialOrig : 3;
+  // Is R0/X0 treated specially by the new r+i instruction?
+  // If so, in which operand?
+  uint64_t ZeroIsSpecialNew : 3;
+  // Is the operation commutative?
+  uint64_t IsCommutative : 1;
+  // The operand number to check for load immediate.
+  uint64_t ConstantOpNo : 3;
+  // The operand number for the immediate.
+  uint64_t ImmOpNo : 3;
+  // The opcode of the new instruction.
+  uint64_t ImmOpcode : 16;
+  // The size of the immediate.
+  uint64_t ImmWidth : 5;
+};
+
+// Information required to convert an instruction to just a materialized
+// immediate.
+struct LoadImmediateInfo {
+  unsigned Imm : 16;
+  unsigned Is64Bit : 1;
+  unsigned SetCR : 1;
+};
+
 class PPCSubtarget;
 class PPCInstrInfo : public PPCGenInstrInfo {
   PPCSubtarget &Subtarget;
@@ -87,6 +122,10 @@
                             const TargetRegisterClass *RC,
                             SmallVectorImpl<MachineInstr *> &NewMIs,
                             bool &NonRI, bool &SpillsVRS) const;
+  bool transformToImmForm(MachineInstr &MI, const ImmInstrInfo &III,
+                          unsigned ConstantOpNo, int64_t Imm) const;
+  MachineInstr *getConstantDefMI(MachineInstr &MI, unsigned &ConstOp,
+                                 bool &SeenIntermediateUse) const;
   virtual void anchor();
 
 protected:
@@ -313,6 +352,19 @@
   bool isZeroExtended(const MachineInstr &MI, const unsigned depth = 0) const {
    return isSignOrZeroExtended(MI, false, depth);
   }
+
+  bool convertToImmediateForm(MachineInstr &MI,
+                              MachineInstr **KilledDef = nullptr) const;
+  void replaceInstrWithLI(MachineInstr &MI, const LoadImmediateInfo &LII) const;
+
+  // This is used to find the "true" source register for n
+  // Machine instruction. Returns the original SrcReg unless it is the target
+  // of a copy-like operation, in which case we chain backwards through all
+  // such operations to the ultimate source register.  If a
+  // physical register is encountered, we stop the search.
+  static unsigned lookThruCopyLike(unsigned SrcReg,
+                                   const MachineRegisterInfo *MRI);
+  bool instrHasImmForm(const MachineInstr &MI, ImmInstrInfo &III) const;
 };
 
 }
diff --git a/lib/Target/PowerPC/PPCInstrInfo.td b/lib/Target/PowerPC/PPCInstrInfo.td
index 181dbb9..a932d05 100644
--- a/lib/Target/PowerPC/PPCInstrInfo.td
+++ b/lib/Target/PowerPC/PPCInstrInfo.td
@@ -1590,6 +1590,11 @@
           (ICBT 0, xoaddr:$dst)>, Requires<[HasICBT]>; // inst prefetch (for read)
 
 // Atomic operations
+// FIXME: some of these might be used with constant operands. This will result
+// in constant materialization instructions that may be redundant. We currently
+// clean this up in PPCMIPeephole with calls to
+// PPCInstrInfo::convertToImmediateForm() but we should probably not emit them
+// in the first place.
 let usesCustomInserter = 1 in {
   let Defs = [CR0] in {
     def ATOMIC_LOAD_ADD_I8 : Pseudo<
diff --git a/lib/Target/PowerPC/PPCMIPeephole.cpp b/lib/Target/PowerPC/PPCMIPeephole.cpp
index 2f44b8c..45647a2 100644
--- a/lib/Target/PowerPC/PPCMIPeephole.cpp
+++ b/lib/Target/PowerPC/PPCMIPeephole.cpp
@@ -41,6 +41,22 @@
 STATISTIC(NumEliminatedSExt, "Number of eliminated sign-extensions");
 STATISTIC(NumEliminatedZExt, "Number of eliminated zero-extensions");
 STATISTIC(NumOptADDLIs, "Number of optimized ADD instruction fed by LI");
+STATISTIC(NumConvertedToImmediateForm,
+          "Number of instructions converted to their immediate form");
+STATISTIC(NumFunctionsEnteredInMIPeephole,
+          "Number of functions entered in PPC MI Peepholes");
+STATISTIC(NumFixedPointIterations,
+          "Number of fixed-point iterations converting reg-reg instructions "
+          "to reg-imm ones");
+
+static cl::opt<bool>
+FixedPointRegToImm("ppc-reg-to-imm-fixed-point", cl::Hidden, cl::init(true),
+                   cl::desc("Iterate to a fixed point when attempting to "
+                            "convert reg-reg instructions to reg-imm"));
+
+static cl::opt<bool>
+ConvertRegReg("ppc-convert-rr-to-ri", cl::Hidden, cl::init(true),
+              cl::desc("Convert eligible reg+reg instructions to reg+imm"));
 
 static cl::opt<bool>
     EnableSExtElimination("ppc-eliminate-signext",
@@ -52,10 +68,6 @@
                           cl::desc("enable elimination of zero-extensions"),
                           cl::init(false), cl::Hidden);
 
-namespace llvm {
-  void initializePPCMIPeepholePass(PassRegistry&);
-}
-
 namespace {
 
 struct PPCMIPeephole : public MachineFunctionPass {
@@ -83,9 +95,6 @@
   bool eliminateRedundantTOCSaves(std::map<MachineInstr *, bool> &TOCSaves);
   void UpdateTOCSaves(std::map<MachineInstr *, bool> &TOCSaves,
                       MachineInstr *MI);
-  // Find the "true" register represented by SrcReg (following chains
-  // of copies and subreg_to_reg operations).
-  unsigned lookThruCopyLike(unsigned SrcReg);
 
 public:
 
@@ -212,6 +221,35 @@
   MachineInstr* ToErase = nullptr;
   std::map<MachineInstr *, bool> TOCSaves;
 
+  NumFunctionsEnteredInMIPeephole++;
+  if (ConvertRegReg) {
+    // Fixed-point conversion of reg/reg instructions fed by load-immediate
+    // into reg/imm instructions. FIXME: This is expensive, control it with
+    // an option.
+    bool SomethingChanged = false;
+    do {
+      NumFixedPointIterations++;
+      SomethingChanged = false;
+      for (MachineBasicBlock &MBB : *MF) {
+        for (MachineInstr &MI : MBB) {
+          if (MI.isDebugValue())
+            continue;
+
+          if (TII->convertToImmediateForm(MI)) {
+            // We don't erase anything in case the def has other uses. Let DCE
+            // remove it if it can be removed.
+            DEBUG(dbgs() << "Converted instruction to imm form: ");
+            DEBUG(MI.dump());
+            NumConvertedToImmediateForm++;
+            SomethingChanged = true;
+            Simplified = true;
+            continue;
+          }
+        }
+      }
+    } while (SomethingChanged && FixedPointRegToImm);
+  }
+
   for (MachineBasicBlock &MBB : *MF) {
     for (MachineInstr &MI : MBB) {
 
@@ -258,8 +296,10 @@
           //   XXPERMDI t, SUBREG_TO_REG(s), SUBREG_TO_REG(s), immed.
           // We have to look through chains of COPY and SUBREG_TO_REG
           // to find the real source values for comparison.
-          unsigned TrueReg1 = lookThruCopyLike(MI.getOperand(1).getReg());
-          unsigned TrueReg2 = lookThruCopyLike(MI.getOperand(2).getReg());
+          unsigned TrueReg1 =
+            TII->lookThruCopyLike(MI.getOperand(1).getReg(), MRI);
+          unsigned TrueReg2 =
+            TII->lookThruCopyLike(MI.getOperand(2).getReg(), MRI);
 
           if (TrueReg1 == TrueReg2
               && TargetRegisterInfo::isVirtualRegister(TrueReg1)) {
@@ -273,7 +313,8 @@
             auto isConversionOfLoadAndSplat = [=]() -> bool {
               if (DefOpc != PPC::XVCVDPSXDS && DefOpc != PPC::XVCVDPUXDS)
                 return false;
-              unsigned DefReg = lookThruCopyLike(DefMI->getOperand(1).getReg());
+              unsigned DefReg =
+                TII->lookThruCopyLike(DefMI->getOperand(1).getReg(), MRI);
               if (TargetRegisterInfo::isVirtualRegister(DefReg)) {
                 MachineInstr *LoadMI = MRI->getVRegDef(DefReg);
                 if (LoadMI && LoadMI->getOpcode() == PPC::LXVDSX)
@@ -299,10 +340,10 @@
             // can replace it with a copy.
             if (DefOpc == PPC::XXPERMDI) {
               unsigned FeedImmed = DefMI->getOperand(3).getImm();
-              unsigned FeedReg1
-                = lookThruCopyLike(DefMI->getOperand(1).getReg());
-              unsigned FeedReg2
-                = lookThruCopyLike(DefMI->getOperand(2).getReg());
+              unsigned FeedReg1 =
+                TII->lookThruCopyLike(DefMI->getOperand(1).getReg(), MRI);
+              unsigned FeedReg2 =
+                TII->lookThruCopyLike(DefMI->getOperand(2).getReg(), MRI);
 
               if ((FeedImmed == 0 || FeedImmed == 3) && FeedReg1 == FeedReg2) {
                 DEBUG(dbgs()
@@ -360,7 +401,8 @@
       case PPC::XXSPLTW: {
         unsigned MyOpcode = MI.getOpcode();
         unsigned OpNo = MyOpcode == PPC::XXSPLTW ? 1 : 2;
-        unsigned TrueReg = lookThruCopyLike(MI.getOperand(OpNo).getReg());
+        unsigned TrueReg =
+          TII->lookThruCopyLike(MI.getOperand(OpNo).getReg(), MRI);
         if (!TargetRegisterInfo::isVirtualRegister(TrueReg))
           break;
         MachineInstr *DefMI = MRI->getVRegDef(TrueReg);
@@ -422,7 +464,8 @@
       }
       case PPC::XVCVDPSP: {
         // If this is a DP->SP conversion fed by an FRSP, the FRSP is redundant.
-        unsigned TrueReg = lookThruCopyLike(MI.getOperand(1).getReg());
+        unsigned TrueReg =
+          TII->lookThruCopyLike(MI.getOperand(1).getReg(), MRI);
         if (!TargetRegisterInfo::isVirtualRegister(TrueReg))
           break;
         MachineInstr *DefMI = MRI->getVRegDef(TrueReg);
@@ -430,8 +473,10 @@
         // This can occur when building a vector of single precision or integer
         // values.
         if (DefMI && DefMI->getOpcode() == PPC::XXPERMDI) {
-          unsigned DefsReg1 = lookThruCopyLike(DefMI->getOperand(1).getReg());
-          unsigned DefsReg2 = lookThruCopyLike(DefMI->getOperand(2).getReg());
+          unsigned DefsReg1 =
+            TII->lookThruCopyLike(DefMI->getOperand(1).getReg(), MRI);
+          unsigned DefsReg2 =
+            TII->lookThruCopyLike(DefMI->getOperand(2).getReg(), MRI);
           if (!TargetRegisterInfo::isVirtualRegister(DefsReg1) ||
               !TargetRegisterInfo::isVirtualRegister(DefsReg2))
             break;
@@ -1221,36 +1266,6 @@
   return Simplified;
 }
 
-// This is used to find the "true" source register for an
-// XXPERMDI instruction, since MachineCSE does not handle the
-// "copy-like" operations (Copy and SubregToReg).  Returns
-// the original SrcReg unless it is the target of a copy-like
-// operation, in which case we chain backwards through all
-// such operations to the ultimate source register.  If a
-// physical register is encountered, we stop the search.
-unsigned PPCMIPeephole::lookThruCopyLike(unsigned SrcReg) {
-
-  while (true) {
-
-    MachineInstr *MI = MRI->getVRegDef(SrcReg);
-    if (!MI->isCopyLike())
-      return SrcReg;
-
-    unsigned CopySrcReg;
-    if (MI->isCopy())
-      CopySrcReg = MI->getOperand(1).getReg();
-    else {
-      assert(MI->isSubregToReg() && "bad opcode for lookThruCopyLike");
-      CopySrcReg = MI->getOperand(2).getReg();
-    }
-
-    if (!TargetRegisterInfo::isVirtualRegister(CopySrcReg))
-      return CopySrcReg;
-
-    SrcReg = CopySrcReg;
-  }
-}
-
 } // end default namespace
 
 INITIALIZE_PASS_BEGIN(PPCMIPeephole, DEBUG_TYPE,
diff --git a/lib/Target/PowerPC/PPCPreEmitPeephole.cpp b/lib/Target/PowerPC/PPCPreEmitPeephole.cpp
new file mode 100644
index 0000000..df0e9f3
--- /dev/null
+++ b/lib/Target/PowerPC/PPCPreEmitPeephole.cpp
@@ -0,0 +1,95 @@
+//===--------- PPCPreEmitPeephole.cpp - Late peephole optimizations -------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// A pre-emit peephole for catching opportunities introduced by late passes such
+// as MachineBlockPlacement.
+//
+//===----------------------------------------------------------------------===//
+
+#include "PPC.h"
+#include "PPCInstrInfo.h"
+#include "PPCSubtarget.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/LivePhysRegs.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Support/Debug.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "ppc-pre-emit-peephole"
+
+STATISTIC(NumRRConvertedInPreEmit,
+          "Number of r+r instructions converted to r+i in pre-emit peephole");
+STATISTIC(NumRemovedInPreEmit,
+          "Number of instructions deleted in pre-emit peephole");
+
+static cl::opt<bool>
+RunPreEmitPeephole("ppc-late-peephole", cl::Hidden, cl::init(true),
+                   cl::desc("Run pre-emit peephole optimizations."));
+
+namespace {
+  class PPCPreEmitPeephole : public MachineFunctionPass {
+  public:
+    static char ID;
+    PPCPreEmitPeephole() : MachineFunctionPass(ID) {
+      initializePPCPreEmitPeepholePass(*PassRegistry::getPassRegistry());
+    }
+
+    void getAnalysisUsage(AnalysisUsage &AU) const override {
+      MachineFunctionPass::getAnalysisUsage(AU);
+    }
+
+    MachineFunctionProperties getRequiredProperties() const override {
+      return MachineFunctionProperties().set(
+          MachineFunctionProperties::Property::NoVRegs);
+    }
+
+    bool runOnMachineFunction(MachineFunction &MF) override {
+      if (skipFunction(*MF.getFunction()) || !RunPreEmitPeephole)
+        return false;
+      bool Changed = false;
+      const PPCInstrInfo *TII = MF.getSubtarget<PPCSubtarget>().getInstrInfo();
+      SmallVector<MachineInstr *, 4> InstrsToErase;
+      for (MachineBasicBlock &MBB : MF) {
+        for (MachineInstr &MI : MBB) {
+          MachineInstr *DefMIToErase = nullptr;
+          if (TII->convertToImmediateForm(MI, &DefMIToErase)) {
+            Changed = true;
+            NumRRConvertedInPreEmit++;
+            DEBUG(dbgs() << "Converted instruction to imm form: ");
+            DEBUG(MI.dump());
+            if (DefMIToErase) {
+              InstrsToErase.push_back(DefMIToErase);
+            }
+          }
+        }
+      }
+      for (MachineInstr *MI : InstrsToErase) {
+        DEBUG(dbgs() << "PPC pre-emit peephole: erasing instruction: ");
+        DEBUG(MI->dump());
+        MI->eraseFromParent();
+        NumRemovedInPreEmit++;
+      }
+      return Changed;
+    }
+  };
+}
+
+INITIALIZE_PASS(PPCPreEmitPeephole, DEBUG_TYPE, "PowerPC Pre-Emit Peephole",
+                false, false)
+char PPCPreEmitPeephole::ID = 0;
+
+FunctionPass *llvm::createPPCPreEmitPeepholePass() {
+  return new PPCPreEmitPeephole();
+}
diff --git a/lib/Target/PowerPC/PPCTargetMachine.cpp b/lib/Target/PowerPC/PPCTargetMachine.cpp
index c934668..491f25c 100644
--- a/lib/Target/PowerPC/PPCTargetMachine.cpp
+++ b/lib/Target/PowerPC/PPCTargetMachine.cpp
@@ -101,7 +101,9 @@
   PassRegistry &PR = *PassRegistry::getPassRegistry();
   initializePPCBoolRetToIntPass(PR);
   initializePPCExpandISELPass(PR);
+  initializePPCPreEmitPeepholePass(PR);
   initializePPCTLSDynamicCallPass(PR);
+  initializePPCMIPeepholePass(PR);
 }
 
 /// Return the datalayout string of a subtarget.
@@ -440,6 +442,7 @@
 }
 
 void PPCPassConfig::addPreEmitPass() {
+  addPass(createPPCPreEmitPeepholePass());
   addPass(createPPCExpandISELPass());
 
   if (getOptLevel() != CodeGenOpt::None)