blob: 80b7ac24345da5bce9e5525649b90202195e3031 [file] [log] [blame]
Bill Schmidt9e24ab72015-11-10 21:38:26 +00001//===-------------- PPCMIPeephole.cpp - MI Peephole Cleanups -------------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===---------------------------------------------------------------------===//
9//
10// This pass performs peephole optimizations to clean up ugly code
11// sequences at the MachineInstruction layer. It runs at the end of
12// the SSA phases, following VSX swap removal. A pass of dead code
13// elimination follows this one for quick clean-up of any dead
14// instructions introduced here. Although we could do this as callbacks
15// from the generic peephole pass, this would have a couple of bad
16// effects: it might remove optimization opportunities for VSX swap
17// removal, and it would miss cleanups made possible following VSX
18// swap removal.
19//
20//===---------------------------------------------------------------------===//
21
Bill Schmidt9e24ab72015-11-10 21:38:26 +000022#include "PPC.h"
23#include "PPCInstrBuilder.h"
Chandler Carruthe3e43d92017-06-06 11:49:48 +000024#include "PPCInstrInfo.h"
Bill Schmidt9e24ab72015-11-10 21:38:26 +000025#include "PPCTargetMachine.h"
Tony Jianga2daaca2017-09-19 16:14:37 +000026#include "llvm/ADT/Statistic.h"
27#include "llvm/CodeGen/MachineDominators.h"
Bill Schmidt9e24ab72015-11-10 21:38:26 +000028#include "llvm/CodeGen/MachineFunctionPass.h"
29#include "llvm/CodeGen/MachineInstrBuilder.h"
30#include "llvm/CodeGen/MachineRegisterInfo.h"
31#include "llvm/Support/Debug.h"
Hiroshi Inouea7d48282017-10-16 04:12:57 +000032#include "llvm/ADT/Statistic.h"
Hiroshi Inoue7166ffb2017-09-05 04:15:17 +000033#include "MCTargetDesc/PPCPredicates.h"
Bill Schmidt9e24ab72015-11-10 21:38:26 +000034
35using namespace llvm;
36
37#define DEBUG_TYPE "ppc-mi-peepholes"
38
Hiroshi Inouea7d48282017-10-16 04:12:57 +000039STATISTIC(NumEliminatedSExt, "Number of eliminated sign-extensions");
40STATISTIC(NumEliminatedZExt, "Number of eliminated zero-extensions");
Tony Jianga2daaca2017-09-19 16:14:37 +000041STATISTIC(NumOptADDLIs, "Number of optimized ADD instruction fed by LI");
42
Hiroshi Inouea7d48282017-10-16 04:12:57 +000043static cl::opt<bool>
44 EnableSExtElimination("ppc-eliminate-signext",
45 cl::desc("enable elimination of sign-extensions"),
Nemanja Ivanovicf56176d2017-10-20 00:36:46 +000046 cl::init(false), cl::Hidden);
Hiroshi Inouea7d48282017-10-16 04:12:57 +000047
48static cl::opt<bool>
49 EnableZExtElimination("ppc-eliminate-zeroext",
50 cl::desc("enable elimination of zero-extensions"),
Nemanja Ivanovicf56176d2017-10-20 00:36:46 +000051 cl::init(false), cl::Hidden);
Hiroshi Inouea7d48282017-10-16 04:12:57 +000052
Bill Schmidt9e24ab72015-11-10 21:38:26 +000053namespace llvm {
54 void initializePPCMIPeepholePass(PassRegistry&);
55}
56
57namespace {
58
59struct PPCMIPeephole : public MachineFunctionPass {
60
61 static char ID;
62 const PPCInstrInfo *TII;
63 MachineFunction *MF;
64 MachineRegisterInfo *MRI;
65
66 PPCMIPeephole() : MachineFunctionPass(ID) {
67 initializePPCMIPeepholePass(*PassRegistry::getPassRegistry());
68 }
69
70private:
Tony Jianga2daaca2017-09-19 16:14:37 +000071 MachineDominatorTree *MDT;
72
Bill Schmidt9e24ab72015-11-10 21:38:26 +000073 // Initialize class variables.
74 void initialize(MachineFunction &MFParm);
75
76 // Perform peepholes.
77 bool simplifyCode(void);
78
Hiroshi Inoue7166ffb2017-09-05 04:15:17 +000079 // Perform peepholes.
80 bool eliminateRedundantCompare(void);
81
Bill Schmidt9e24ab72015-11-10 21:38:26 +000082 // Find the "true" register represented by SrcReg (following chains
83 // of copies and subreg_to_reg operations).
84 unsigned lookThruCopyLike(unsigned SrcReg);
85
86public:
Tony Jianga2daaca2017-09-19 16:14:37 +000087
88 void getAnalysisUsage(AnalysisUsage &AU) const override {
89 AU.addRequired<MachineDominatorTree>();
90 AU.addPreserved<MachineDominatorTree>();
91 MachineFunctionPass::getAnalysisUsage(AU);
92 }
93
Bill Schmidt9e24ab72015-11-10 21:38:26 +000094 // Main entry point for this pass.
95 bool runOnMachineFunction(MachineFunction &MF) override {
Andrew Kaylor7a544852016-04-27 19:39:32 +000096 if (skipFunction(*MF.getFunction()))
97 return false;
Bill Schmidt9e24ab72015-11-10 21:38:26 +000098 initialize(MF);
99 return simplifyCode();
100 }
101};
102
103// Initialize class variables.
104void PPCMIPeephole::initialize(MachineFunction &MFParm) {
105 MF = &MFParm;
106 MRI = &MF->getRegInfo();
Tony Jianga2daaca2017-09-19 16:14:37 +0000107 MDT = &getAnalysis<MachineDominatorTree>();
Bill Schmidt9e24ab72015-11-10 21:38:26 +0000108 TII = MF->getSubtarget<PPCSubtarget>().getInstrInfo();
109 DEBUG(dbgs() << "*** PowerPC MI peephole pass ***\n\n");
110 DEBUG(MF->dump());
111}
112
Tony Jianga2daaca2017-09-19 16:14:37 +0000113static MachineInstr *getVRegDefOrNull(MachineOperand *Op,
114 MachineRegisterInfo *MRI) {
115 assert(Op && "Invalid Operand!");
116 if (!Op->isReg())
117 return nullptr;
118
119 unsigned Reg = Op->getReg();
120 if (!TargetRegisterInfo::isVirtualRegister(Reg))
121 return nullptr;
122
123 return MRI->getVRegDef(Reg);
124}
125
Hiroshi Inouea7d48282017-10-16 04:12:57 +0000126// This function returns number of known zero bits in output of MI
127// starting from the most significant bit.
128static unsigned
129getKnownLeadingZeroCount(MachineInstr *MI, const PPCInstrInfo *TII) {
130 unsigned Opcode = MI->getOpcode();
131 if (Opcode == PPC::RLDICL || Opcode == PPC::RLDICLo ||
132 Opcode == PPC::RLDCL || Opcode == PPC::RLDCLo)
133 return MI->getOperand(3).getImm();
134
135 if ((Opcode == PPC::RLDIC || Opcode == PPC::RLDICo) &&
136 MI->getOperand(3).getImm() <= 63 - MI->getOperand(2).getImm())
137 return MI->getOperand(3).getImm();
138
139 if ((Opcode == PPC::RLWINM || Opcode == PPC::RLWINMo ||
140 Opcode == PPC::RLWNM || Opcode == PPC::RLWNMo ||
141 Opcode == PPC::RLWINM8 || Opcode == PPC::RLWNM8) &&
142 MI->getOperand(3).getImm() <= MI->getOperand(4).getImm())
143 return 32 + MI->getOperand(3).getImm();
144
145 if (Opcode == PPC::ANDIo) {
146 uint16_t Imm = MI->getOperand(2).getImm();
147 return 48 + countLeadingZeros(Imm);
148 }
149
150 if (Opcode == PPC::CNTLZW || Opcode == PPC::CNTLZWo ||
151 Opcode == PPC::CNTTZW || Opcode == PPC::CNTTZWo ||
152 Opcode == PPC::CNTLZW8 || Opcode == PPC::CNTTZW8)
153 // The result ranges from 0 to 32.
154 return 58;
155
156 if (Opcode == PPC::CNTLZD || Opcode == PPC::CNTLZDo ||
157 Opcode == PPC::CNTTZD || Opcode == PPC::CNTTZDo)
158 // The result ranges from 0 to 64.
159 return 57;
160
161 if (Opcode == PPC::LHZ || Opcode == PPC::LHZX ||
162 Opcode == PPC::LHZ8 || Opcode == PPC::LHZX8 ||
163 Opcode == PPC::LHZU || Opcode == PPC::LHZUX ||
164 Opcode == PPC::LHZU8 || Opcode == PPC::LHZUX8)
165 return 48;
166
167 if (Opcode == PPC::LBZ || Opcode == PPC::LBZX ||
168 Opcode == PPC::LBZ8 || Opcode == PPC::LBZX8 ||
169 Opcode == PPC::LBZU || Opcode == PPC::LBZUX ||
170 Opcode == PPC::LBZU8 || Opcode == PPC::LBZUX8)
171 return 56;
172
173 if (TII->isZeroExtended(*MI))
174 return 32;
175
176 return 0;
177}
178
Bill Schmidt9e24ab72015-11-10 21:38:26 +0000179// Perform peephole optimizations.
180bool PPCMIPeephole::simplifyCode(void) {
181 bool Simplified = false;
182 MachineInstr* ToErase = nullptr;
183
184 for (MachineBasicBlock &MBB : *MF) {
185 for (MachineInstr &MI : MBB) {
186
187 // If the previous instruction was marked for elimination,
188 // remove it now.
189 if (ToErase) {
190 ToErase->eraseFromParent();
191 ToErase = nullptr;
192 }
193
194 // Ignore debug instructions.
195 if (MI.isDebugValue())
196 continue;
197
198 // Per-opcode peepholes.
199 switch (MI.getOpcode()) {
200
201 default:
202 break;
203
204 case PPC::XXPERMDI: {
205 // Perform simplifications of 2x64 vector swaps and splats.
206 // A swap is identified by an immediate value of 2, and a splat
207 // is identified by an immediate value of 0 or 3.
208 int Immed = MI.getOperand(3).getImm();
209
210 if (Immed != 1) {
211
212 // For each of these simplifications, we need the two source
213 // regs to match. Unfortunately, MachineCSE ignores COPY and
214 // SUBREG_TO_REG, so for example we can see
215 // XXPERMDI t, SUBREG_TO_REG(s), SUBREG_TO_REG(s), immed.
216 // We have to look through chains of COPY and SUBREG_TO_REG
217 // to find the real source values for comparison.
218 unsigned TrueReg1 = lookThruCopyLike(MI.getOperand(1).getReg());
219 unsigned TrueReg2 = lookThruCopyLike(MI.getOperand(2).getReg());
220
221 if (TrueReg1 == TrueReg2
222 && TargetRegisterInfo::isVirtualRegister(TrueReg1)) {
223 MachineInstr *DefMI = MRI->getVRegDef(TrueReg1);
Nemanja Ivanovic3d641da2016-12-06 11:47:14 +0000224 unsigned DefOpc = DefMI ? DefMI->getOpcode() : 0;
225
226 // If this is a splat fed by a splatting load, the splat is
227 // redundant. Replace with a copy. This doesn't happen directly due
228 // to code in PPCDAGToDAGISel.cpp, but it can happen when converting
229 // a load of a double to a vector of 64-bit integers.
230 auto isConversionOfLoadAndSplat = [=]() -> bool {
231 if (DefOpc != PPC::XVCVDPSXDS && DefOpc != PPC::XVCVDPUXDS)
232 return false;
233 unsigned DefReg = lookThruCopyLike(DefMI->getOperand(1).getReg());
234 if (TargetRegisterInfo::isVirtualRegister(DefReg)) {
235 MachineInstr *LoadMI = MRI->getVRegDef(DefReg);
236 if (LoadMI && LoadMI->getOpcode() == PPC::LXVDSX)
237 return true;
238 }
239 return false;
240 };
241 if (DefMI && (Immed == 0 || Immed == 3)) {
242 if (DefOpc == PPC::LXVDSX || isConversionOfLoadAndSplat()) {
243 DEBUG(dbgs()
244 << "Optimizing load-and-splat/splat "
245 "to load-and-splat/copy: ");
246 DEBUG(MI.dump());
Diana Picus8a478102017-01-13 09:58:52 +0000247 BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::COPY),
248 MI.getOperand(0).getReg())
249 .add(MI.getOperand(1));
Nemanja Ivanovic3d641da2016-12-06 11:47:14 +0000250 ToErase = &MI;
251 Simplified = true;
252 }
253 }
Bill Schmidt9e24ab72015-11-10 21:38:26 +0000254
255 // If this is a splat or a swap fed by another splat, we
256 // can replace it with a copy.
Nemanja Ivanovic3d641da2016-12-06 11:47:14 +0000257 if (DefOpc == PPC::XXPERMDI) {
Bill Schmidt9e24ab72015-11-10 21:38:26 +0000258 unsigned FeedImmed = DefMI->getOperand(3).getImm();
259 unsigned FeedReg1
260 = lookThruCopyLike(DefMI->getOperand(1).getReg());
261 unsigned FeedReg2
262 = lookThruCopyLike(DefMI->getOperand(2).getReg());
263
264 if ((FeedImmed == 0 || FeedImmed == 3) && FeedReg1 == FeedReg2) {
265 DEBUG(dbgs()
266 << "Optimizing splat/swap or splat/splat "
267 "to splat/copy: ");
268 DEBUG(MI.dump());
Diana Picus8a478102017-01-13 09:58:52 +0000269 BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::COPY),
270 MI.getOperand(0).getReg())
271 .add(MI.getOperand(1));
Bill Schmidt9e24ab72015-11-10 21:38:26 +0000272 ToErase = &MI;
273 Simplified = true;
274 }
275
276 // If this is a splat fed by a swap, we can simplify modify
277 // the splat to splat the other value from the swap's input
278 // parameter.
279 else if ((Immed == 0 || Immed == 3)
280 && FeedImmed == 2 && FeedReg1 == FeedReg2) {
281 DEBUG(dbgs() << "Optimizing swap/splat => splat: ");
282 DEBUG(MI.dump());
283 MI.getOperand(1).setReg(DefMI->getOperand(1).getReg());
284 MI.getOperand(2).setReg(DefMI->getOperand(2).getReg());
285 MI.getOperand(3).setImm(3 - Immed);
286 Simplified = true;
287 }
288
289 // If this is a swap fed by a swap, we can replace it
290 // with a copy from the first swap's input.
291 else if (Immed == 2 && FeedImmed == 2 && FeedReg1 == FeedReg2) {
292 DEBUG(dbgs() << "Optimizing swap/swap => copy: ");
293 DEBUG(MI.dump());
Diana Picus8a478102017-01-13 09:58:52 +0000294 BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::COPY),
295 MI.getOperand(0).getReg())
296 .add(DefMI->getOperand(1));
Bill Schmidt9e24ab72015-11-10 21:38:26 +0000297 ToErase = &MI;
298 Simplified = true;
299 }
Nemanja Ivanovic3d641da2016-12-06 11:47:14 +0000300 } else if ((Immed == 0 || Immed == 3) && DefOpc == PPC::XXPERMDIs &&
301 (DefMI->getOperand(2).getImm() == 0 ||
302 DefMI->getOperand(2).getImm() == 3)) {
Nemanja Ivanovicd0e875c2016-10-04 06:59:23 +0000303 // Splat fed by another splat - switch the output of the first
304 // and remove the second.
305 DefMI->getOperand(0).setReg(MI.getOperand(0).getReg());
306 ToErase = &MI;
307 Simplified = true;
308 DEBUG(dbgs() << "Removing redundant splat: ");
309 DEBUG(MI.dump());
Bill Schmidt9e24ab72015-11-10 21:38:26 +0000310 }
311 }
312 }
313 break;
314 }
Nemanja Ivanovicd0e875c2016-10-04 06:59:23 +0000315 case PPC::VSPLTB:
316 case PPC::VSPLTH:
317 case PPC::XXSPLTW: {
318 unsigned MyOpcode = MI.getOpcode();
319 unsigned OpNo = MyOpcode == PPC::XXSPLTW ? 1 : 2;
320 unsigned TrueReg = lookThruCopyLike(MI.getOperand(OpNo).getReg());
Nemanja Ivanovic3d641da2016-12-06 11:47:14 +0000321 if (!TargetRegisterInfo::isVirtualRegister(TrueReg))
322 break;
Nemanja Ivanovicd0e875c2016-10-04 06:59:23 +0000323 MachineInstr *DefMI = MRI->getVRegDef(TrueReg);
324 if (!DefMI)
325 break;
326 unsigned DefOpcode = DefMI->getOpcode();
Nemanja Ivanovic3d641da2016-12-06 11:47:14 +0000327 auto isConvertOfSplat = [=]() -> bool {
328 if (DefOpcode != PPC::XVCVSPSXWS && DefOpcode != PPC::XVCVSPUXWS)
329 return false;
330 unsigned ConvReg = DefMI->getOperand(1).getReg();
331 if (!TargetRegisterInfo::isVirtualRegister(ConvReg))
332 return false;
333 MachineInstr *Splt = MRI->getVRegDef(ConvReg);
334 return Splt && (Splt->getOpcode() == PPC::LXVWSX ||
335 Splt->getOpcode() == PPC::XXSPLTW);
336 };
337 bool AlreadySplat = (MyOpcode == DefOpcode) ||
Nemanja Ivanovicd0e875c2016-10-04 06:59:23 +0000338 (MyOpcode == PPC::VSPLTB && DefOpcode == PPC::VSPLTBs) ||
339 (MyOpcode == PPC::VSPLTH && DefOpcode == PPC::VSPLTHs) ||
Nemanja Ivanovic3d641da2016-12-06 11:47:14 +0000340 (MyOpcode == PPC::XXSPLTW && DefOpcode == PPC::XXSPLTWs) ||
341 (MyOpcode == PPC::XXSPLTW && DefOpcode == PPC::LXVWSX) ||
342 (MyOpcode == PPC::XXSPLTW && DefOpcode == PPC::MTVSRWS)||
343 (MyOpcode == PPC::XXSPLTW && isConvertOfSplat());
344 // If the instruction[s] that feed this splat have already splat
345 // the value, this splat is redundant.
346 if (AlreadySplat) {
Tim Shen13774ee2016-10-12 00:48:25 +0000347 DEBUG(dbgs() << "Changing redundant splat to a copy: ");
348 DEBUG(MI.dump());
349 BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::COPY),
350 MI.getOperand(0).getReg())
Diana Picus8a478102017-01-13 09:58:52 +0000351 .add(MI.getOperand(OpNo));
Nemanja Ivanovicd0e875c2016-10-04 06:59:23 +0000352 ToErase = &MI;
353 Simplified = true;
Nemanja Ivanovicd0e875c2016-10-04 06:59:23 +0000354 }
355 // Splat fed by a shift. Usually when we align value to splat into
356 // vector element zero.
357 if (DefOpcode == PPC::XXSLDWI) {
358 unsigned ShiftRes = DefMI->getOperand(0).getReg();
359 unsigned ShiftOp1 = DefMI->getOperand(1).getReg();
360 unsigned ShiftOp2 = DefMI->getOperand(2).getReg();
361 unsigned ShiftImm = DefMI->getOperand(3).getImm();
362 unsigned SplatImm = MI.getOperand(2).getImm();
363 if (ShiftOp1 == ShiftOp2) {
364 unsigned NewElem = (SplatImm + ShiftImm) & 0x3;
365 if (MRI->hasOneNonDBGUse(ShiftRes)) {
366 DEBUG(dbgs() << "Removing redundant shift: ");
367 DEBUG(DefMI->dump());
368 ToErase = DefMI;
369 }
370 Simplified = true;
371 DEBUG(dbgs() << "Changing splat immediate from " << SplatImm <<
372 " to " << NewElem << " in instruction: ");
373 DEBUG(MI.dump());
374 MI.getOperand(1).setReg(ShiftOp1);
375 MI.getOperand(2).setImm(NewElem);
376 }
377 }
378 break;
379 }
Nemanja Ivanovic3d641da2016-12-06 11:47:14 +0000380 case PPC::XVCVDPSP: {
381 // If this is a DP->SP conversion fed by an FRSP, the FRSP is redundant.
382 unsigned TrueReg = lookThruCopyLike(MI.getOperand(1).getReg());
383 if (!TargetRegisterInfo::isVirtualRegister(TrueReg))
384 break;
385 MachineInstr *DefMI = MRI->getVRegDef(TrueReg);
386
387 // This can occur when building a vector of single precision or integer
388 // values.
389 if (DefMI && DefMI->getOpcode() == PPC::XXPERMDI) {
390 unsigned DefsReg1 = lookThruCopyLike(DefMI->getOperand(1).getReg());
391 unsigned DefsReg2 = lookThruCopyLike(DefMI->getOperand(2).getReg());
392 if (!TargetRegisterInfo::isVirtualRegister(DefsReg1) ||
393 !TargetRegisterInfo::isVirtualRegister(DefsReg2))
394 break;
395 MachineInstr *P1 = MRI->getVRegDef(DefsReg1);
396 MachineInstr *P2 = MRI->getVRegDef(DefsReg2);
397
398 if (!P1 || !P2)
399 break;
400
401 // Remove the passed FRSP instruction if it only feeds this MI and
402 // set any uses of that FRSP (in this MI) to the source of the FRSP.
403 auto removeFRSPIfPossible = [&](MachineInstr *RoundInstr) {
404 if (RoundInstr->getOpcode() == PPC::FRSP &&
405 MRI->hasOneNonDBGUse(RoundInstr->getOperand(0).getReg())) {
406 Simplified = true;
407 unsigned ConvReg1 = RoundInstr->getOperand(1).getReg();
408 unsigned FRSPDefines = RoundInstr->getOperand(0).getReg();
409 MachineInstr &Use = *(MRI->use_instr_begin(FRSPDefines));
410 for (int i = 0, e = Use.getNumOperands(); i < e; ++i)
411 if (Use.getOperand(i).isReg() &&
412 Use.getOperand(i).getReg() == FRSPDefines)
413 Use.getOperand(i).setReg(ConvReg1);
414 DEBUG(dbgs() << "Removing redundant FRSP:\n");
415 DEBUG(RoundInstr->dump());
416 DEBUG(dbgs() << "As it feeds instruction:\n");
417 DEBUG(MI.dump());
418 DEBUG(dbgs() << "Through instruction:\n");
419 DEBUG(DefMI->dump());
420 RoundInstr->eraseFromParent();
421 }
422 };
423
424 // If the input to XVCVDPSP is a vector that was built (even
425 // partially) out of FRSP's, the FRSP(s) can safely be removed
426 // since this instruction performs the same operation.
427 if (P1 != P2) {
428 removeFRSPIfPossible(P1);
429 removeFRSPIfPossible(P2);
430 break;
431 }
432 removeFRSPIfPossible(P1);
433 }
434 break;
435 }
Hiroshi Inouea7d48282017-10-16 04:12:57 +0000436 case PPC::EXTSH:
437 case PPC::EXTSH8:
438 case PPC::EXTSH8_32_64: {
439 if (!EnableSExtElimination) break;
440 unsigned NarrowReg = MI.getOperand(1).getReg();
441 if (!TargetRegisterInfo::isVirtualRegister(NarrowReg))
442 break;
443
444 MachineInstr *SrcMI = MRI->getVRegDef(NarrowReg);
445 // If we've used a zero-extending load that we will sign-extend,
446 // just do a sign-extending load.
447 if (SrcMI->getOpcode() == PPC::LHZ ||
448 SrcMI->getOpcode() == PPC::LHZX) {
449 if (!MRI->hasOneNonDBGUse(SrcMI->getOperand(0).getReg()))
450 break;
451 auto is64Bit = [] (unsigned Opcode) {
452 return Opcode == PPC::EXTSH8;
453 };
454 auto isXForm = [] (unsigned Opcode) {
455 return Opcode == PPC::LHZX;
456 };
457 auto getSextLoadOp = [] (bool is64Bit, bool isXForm) {
458 if (is64Bit)
459 if (isXForm) return PPC::LHAX8;
460 else return PPC::LHA8;
461 else
462 if (isXForm) return PPC::LHAX;
463 else return PPC::LHA;
464 };
465 unsigned Opc = getSextLoadOp(is64Bit(MI.getOpcode()),
466 isXForm(SrcMI->getOpcode()));
467 DEBUG(dbgs() << "Zero-extending load\n");
468 DEBUG(SrcMI->dump());
469 DEBUG(dbgs() << "and sign-extension\n");
470 DEBUG(MI.dump());
471 DEBUG(dbgs() << "are merged into sign-extending load\n");
472 SrcMI->setDesc(TII->get(Opc));
473 SrcMI->getOperand(0).setReg(MI.getOperand(0).getReg());
474 ToErase = &MI;
475 Simplified = true;
476 NumEliminatedSExt++;
477 }
478 break;
479 }
480 case PPC::EXTSW:
481 case PPC::EXTSW_32:
482 case PPC::EXTSW_32_64: {
483 if (!EnableSExtElimination) break;
484 unsigned NarrowReg = MI.getOperand(1).getReg();
485 if (!TargetRegisterInfo::isVirtualRegister(NarrowReg))
486 break;
487
488 MachineInstr *SrcMI = MRI->getVRegDef(NarrowReg);
489 // If we've used a zero-extending load that we will sign-extend,
490 // just do a sign-extending load.
491 if (SrcMI->getOpcode() == PPC::LWZ ||
492 SrcMI->getOpcode() == PPC::LWZX) {
493 if (!MRI->hasOneNonDBGUse(SrcMI->getOperand(0).getReg()))
494 break;
495 auto is64Bit = [] (unsigned Opcode) {
496 return Opcode == PPC::EXTSW || Opcode == PPC::EXTSW_32_64;
497 };
498 auto isXForm = [] (unsigned Opcode) {
499 return Opcode == PPC::LWZX;
500 };
501 auto getSextLoadOp = [] (bool is64Bit, bool isXForm) {
502 if (is64Bit)
503 if (isXForm) return PPC::LWAX;
504 else return PPC::LWA;
505 else
506 if (isXForm) return PPC::LWAX_32;
507 else return PPC::LWA_32;
508 };
509 unsigned Opc = getSextLoadOp(is64Bit(MI.getOpcode()),
510 isXForm(SrcMI->getOpcode()));
511 DEBUG(dbgs() << "Zero-extending load\n");
512 DEBUG(SrcMI->dump());
513 DEBUG(dbgs() << "and sign-extension\n");
514 DEBUG(MI.dump());
515 DEBUG(dbgs() << "are merged into sign-extending load\n");
516 SrcMI->setDesc(TII->get(Opc));
517 SrcMI->getOperand(0).setReg(MI.getOperand(0).getReg());
518 ToErase = &MI;
519 Simplified = true;
520 NumEliminatedSExt++;
521 } else if (MI.getOpcode() == PPC::EXTSW_32_64 &&
522 TII->isSignExtended(*SrcMI)) {
523 // We can eliminate EXTSW if the input is known to be already
524 // sign-extended.
525 DEBUG(dbgs() << "Removing redundant sign-extension\n");
526 unsigned TmpReg =
527 MF->getRegInfo().createVirtualRegister(&PPC::G8RCRegClass);
528 BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::IMPLICIT_DEF),
529 TmpReg);
530 BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::INSERT_SUBREG),
531 MI.getOperand(0).getReg())
532 .addReg(TmpReg)
533 .addReg(NarrowReg)
534 .addImm(PPC::sub_32);
535 ToErase = &MI;
536 Simplified = true;
537 NumEliminatedSExt++;
538 }
539 break;
540 }
541 case PPC::RLDICL: {
542 // We can eliminate RLDICL (e.g. for zero-extension)
543 // if all bits to clear are already zero in the input.
544 // This code assume following code sequence for zero-extension.
545 // %vreg6<def> = COPY %vreg5:sub_32; (optional)
546 // %vreg8<def> = IMPLICIT_DEF;
547 // %vreg7<def,tied1> = INSERT_SUBREG %vreg8<tied0>, %vreg6, sub_32;
548 if (!EnableZExtElimination) break;
549
550 if (MI.getOperand(2).getImm() != 0)
551 break;
552
553 unsigned SrcReg = MI.getOperand(1).getReg();
554 if (!TargetRegisterInfo::isVirtualRegister(SrcReg))
555 break;
556
557 MachineInstr *SrcMI = MRI->getVRegDef(SrcReg);
558 if (!(SrcMI && SrcMI->getOpcode() == PPC::INSERT_SUBREG &&
559 SrcMI->getOperand(0).isReg() && SrcMI->getOperand(1).isReg()))
560 break;
561
562 MachineInstr *ImpDefMI, *SubRegMI;
563 ImpDefMI = MRI->getVRegDef(SrcMI->getOperand(1).getReg());
564 SubRegMI = MRI->getVRegDef(SrcMI->getOperand(2).getReg());
565 if (ImpDefMI->getOpcode() != PPC::IMPLICIT_DEF) break;
566
567 SrcMI = SubRegMI;
568 if (SubRegMI->getOpcode() == PPC::COPY) {
569 unsigned CopyReg = SubRegMI->getOperand(1).getReg();
570 if (TargetRegisterInfo::isVirtualRegister(CopyReg))
571 SrcMI = MRI->getVRegDef(CopyReg);
572 }
573
574 unsigned KnownZeroCount = getKnownLeadingZeroCount(SrcMI, TII);
575 if (MI.getOperand(3).getImm() <= KnownZeroCount) {
576 DEBUG(dbgs() << "Removing redundant zero-extension\n");
577 BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::COPY),
578 MI.getOperand(0).getReg())
579 .addReg(SrcReg);
580 ToErase = &MI;
581 Simplified = true;
582 NumEliminatedZExt++;
583 }
584 break;
585 }
Tony Jianga2daaca2017-09-19 16:14:37 +0000586
587 // TODO: Any instruction that has an immediate form fed only by a PHI
588 // whose operands are all load immediate can be folded away. We currently
589 // do this for ADD instructions, but should expand it to arithmetic and
590 // binary instructions with immediate forms in the future.
591 case PPC::ADD4:
592 case PPC::ADD8: {
593 auto isSingleUsePHI = [&](MachineOperand *PhiOp) {
594 assert(PhiOp && "Invalid Operand!");
595 MachineInstr *DefPhiMI = getVRegDefOrNull(PhiOp, MRI);
596
597 return DefPhiMI && (DefPhiMI->getOpcode() == PPC::PHI) &&
598 MRI->hasOneNonDBGUse(DefPhiMI->getOperand(0).getReg());
599 };
600
601 auto dominatesAllSingleUseLIs = [&](MachineOperand *DominatorOp,
602 MachineOperand *PhiOp) {
603 assert(PhiOp && "Invalid Operand!");
604 assert(DominatorOp && "Invalid Operand!");
605 MachineInstr *DefPhiMI = getVRegDefOrNull(PhiOp, MRI);
606 MachineInstr *DefDomMI = getVRegDefOrNull(DominatorOp, MRI);
607
608 // Note: the vregs only show up at odd indices position of PHI Node,
609 // the even indices position save the BB info.
610 for (unsigned i = 1; i < DefPhiMI->getNumOperands(); i += 2) {
611 MachineInstr *LiMI =
612 getVRegDefOrNull(&DefPhiMI->getOperand(i), MRI);
Nemanja Ivanoviced658752017-10-10 08:46:10 +0000613 if (!LiMI ||
614 (LiMI->getOpcode() != PPC::LI && LiMI->getOpcode() != PPC::LI8)
615 || !MRI->hasOneNonDBGUse(LiMI->getOperand(0).getReg()) ||
616 !MDT->dominates(DefDomMI, LiMI))
Tony Jianga2daaca2017-09-19 16:14:37 +0000617 return false;
618 }
619
620 return true;
621 };
622
623 MachineOperand Op1 = MI.getOperand(1);
624 MachineOperand Op2 = MI.getOperand(2);
625 if (isSingleUsePHI(&Op2) && dominatesAllSingleUseLIs(&Op1, &Op2))
626 std::swap(Op1, Op2);
627 else if (!isSingleUsePHI(&Op1) || !dominatesAllSingleUseLIs(&Op2, &Op1))
628 break; // We don't have an ADD fed by LI's that can be transformed
629
630 // Now we know that Op1 is the PHI node and Op2 is the dominator
631 unsigned DominatorReg = Op2.getReg();
632
633 const TargetRegisterClass *TRC = MI.getOpcode() == PPC::ADD8
634 ? &PPC::G8RC_and_G8RC_NOX0RegClass
635 : &PPC::GPRC_and_GPRC_NOR0RegClass;
636 MRI->setRegClass(DominatorReg, TRC);
637
638 // replace LIs with ADDIs
639 MachineInstr *DefPhiMI = getVRegDefOrNull(&Op1, MRI);
640 for (unsigned i = 1; i < DefPhiMI->getNumOperands(); i += 2) {
641 MachineInstr *LiMI = getVRegDefOrNull(&DefPhiMI->getOperand(i), MRI);
642 DEBUG(dbgs() << "Optimizing LI to ADDI: ");
643 DEBUG(LiMI->dump());
644
645 // There could be repeated registers in the PHI, e.g: %vreg1<def> =
646 // PHI %vreg6, <BB#2>, %vreg8, <BB#3>, %vreg8, <BB#6>; So if we've
647 // already replaced the def instruction, skip.
648 if (LiMI->getOpcode() == PPC::ADDI || LiMI->getOpcode() == PPC::ADDI8)
649 continue;
650
651 assert((LiMI->getOpcode() == PPC::LI ||
652 LiMI->getOpcode() == PPC::LI8) &&
653 "Invalid Opcode!");
654 auto LiImm = LiMI->getOperand(1).getImm(); // save the imm of LI
655 LiMI->RemoveOperand(1); // remove the imm of LI
656 LiMI->setDesc(TII->get(LiMI->getOpcode() == PPC::LI ? PPC::ADDI
657 : PPC::ADDI8));
658 MachineInstrBuilder(*LiMI->getParent()->getParent(), *LiMI)
659 .addReg(DominatorReg)
660 .addImm(LiImm); // restore the imm of LI
661 DEBUG(LiMI->dump());
662 }
663
664 // Replace ADD with COPY
665 DEBUG(dbgs() << "Optimizing ADD to COPY: ");
666 DEBUG(MI.dump());
667 BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::COPY),
668 MI.getOperand(0).getReg())
669 .add(Op1);
670 ToErase = &MI;
671 Simplified = true;
672 NumOptADDLIs++;
673 break;
674 }
Bill Schmidt9e24ab72015-11-10 21:38:26 +0000675 }
676 }
Tony Jianga2daaca2017-09-19 16:14:37 +0000677
Bill Schmidt9e24ab72015-11-10 21:38:26 +0000678 // If the last instruction was marked for elimination,
679 // remove it now.
680 if (ToErase) {
681 ToErase->eraseFromParent();
682 ToErase = nullptr;
683 }
684 }
685
Hiroshi Inoue7166ffb2017-09-05 04:15:17 +0000686 // We try to eliminate redundant compare instruction.
687 Simplified |= eliminateRedundantCompare();
688
689 return Simplified;
690}
691
692// helper functions for eliminateRedundantCompare
693static bool isEqOrNe(MachineInstr *BI) {
694 PPC::Predicate Pred = (PPC::Predicate)BI->getOperand(0).getImm();
695 unsigned PredCond = PPC::getPredicateCondition(Pred);
696 return (PredCond == PPC::PRED_EQ || PredCond == PPC::PRED_NE);
697}
698
699static bool isSupportedCmpOp(unsigned opCode) {
700 return (opCode == PPC::CMPLD || opCode == PPC::CMPD ||
701 opCode == PPC::CMPLW || opCode == PPC::CMPW ||
702 opCode == PPC::CMPLDI || opCode == PPC::CMPDI ||
703 opCode == PPC::CMPLWI || opCode == PPC::CMPWI);
704}
705
706static bool is64bitCmpOp(unsigned opCode) {
707 return (opCode == PPC::CMPLD || opCode == PPC::CMPD ||
708 opCode == PPC::CMPLDI || opCode == PPC::CMPDI);
709}
710
711static bool isSignedCmpOp(unsigned opCode) {
712 return (opCode == PPC::CMPD || opCode == PPC::CMPW ||
713 opCode == PPC::CMPDI || opCode == PPC::CMPWI);
714}
715
716static unsigned getSignedCmpOpCode(unsigned opCode) {
717 if (opCode == PPC::CMPLD) return PPC::CMPD;
718 if (opCode == PPC::CMPLW) return PPC::CMPW;
719 if (opCode == PPC::CMPLDI) return PPC::CMPDI;
720 if (opCode == PPC::CMPLWI) return PPC::CMPWI;
721 return opCode;
722}
723
724// We can decrement immediate x in (GE x) by changing it to (GT x-1) or
725// (LT x) to (LE x-1)
726static unsigned getPredicateToDecImm(MachineInstr *BI, MachineInstr *CMPI) {
727 uint64_t Imm = CMPI->getOperand(2).getImm();
728 bool SignedCmp = isSignedCmpOp(CMPI->getOpcode());
729 if ((!SignedCmp && Imm == 0) || (SignedCmp && Imm == 0x8000))
730 return 0;
731
732 PPC::Predicate Pred = (PPC::Predicate)BI->getOperand(0).getImm();
733 unsigned PredCond = PPC::getPredicateCondition(Pred);
734 unsigned PredHint = PPC::getPredicateHint(Pred);
735 if (PredCond == PPC::PRED_GE)
736 return PPC::getPredicate(PPC::PRED_GT, PredHint);
737 if (PredCond == PPC::PRED_LT)
738 return PPC::getPredicate(PPC::PRED_LE, PredHint);
739
740 return 0;
741}
742
743// We can increment immediate x in (GT x) by changing it to (GE x+1) or
744// (LE x) to (LT x+1)
745static unsigned getPredicateToIncImm(MachineInstr *BI, MachineInstr *CMPI) {
746 uint64_t Imm = CMPI->getOperand(2).getImm();
747 bool SignedCmp = isSignedCmpOp(CMPI->getOpcode());
748 if ((!SignedCmp && Imm == 0xFFFF) || (SignedCmp && Imm == 0x7FFF))
749 return 0;
750
751 PPC::Predicate Pred = (PPC::Predicate)BI->getOperand(0).getImm();
752 unsigned PredCond = PPC::getPredicateCondition(Pred);
753 unsigned PredHint = PPC::getPredicateHint(Pred);
754 if (PredCond == PPC::PRED_GT)
755 return PPC::getPredicate(PPC::PRED_GE, PredHint);
756 if (PredCond == PPC::PRED_LE)
757 return PPC::getPredicate(PPC::PRED_LT, PredHint);
758
759 return 0;
760}
761
Hiroshi Inoue84b05de2017-09-28 08:38:19 +0000762// This takes a Phi node and returns a register value for the spefied BB.
763static unsigned getIncomingRegForBlock(MachineInstr *Phi,
764 MachineBasicBlock *MBB) {
765 for (unsigned I = 2, E = Phi->getNumOperands() + 1; I != E; I += 2) {
766 MachineOperand &MO = Phi->getOperand(I);
767 if (MO.getMBB() == MBB)
768 return Phi->getOperand(I-1).getReg();
769 }
770 llvm_unreachable("invalid src basic block for this Phi node\n");
771 return 0;
772}
773
774// This function tracks the source of the register through register copy.
775// If BB1 and BB2 are non-NULL, we also track PHI instruction in BB2
776// assuming that the control comes from BB1 into BB2.
777static unsigned getSrcVReg(unsigned Reg, MachineBasicBlock *BB1,
778 MachineBasicBlock *BB2, MachineRegisterInfo *MRI) {
779 unsigned SrcReg = Reg;
780 while (1) {
781 unsigned NextReg = SrcReg;
782 MachineInstr *Inst = MRI->getVRegDef(SrcReg);
783 if (BB1 && Inst->getOpcode() == PPC::PHI && Inst->getParent() == BB2) {
784 NextReg = getIncomingRegForBlock(Inst, BB1);
785 // We track through PHI only once to avoid infinite loop.
786 BB1 = nullptr;
787 }
788 else if (Inst->isFullCopy())
789 NextReg = Inst->getOperand(1).getReg();
790 if (NextReg == SrcReg || !TargetRegisterInfo::isVirtualRegister(NextReg))
791 break;
792 SrcReg = NextReg;
793 }
794 return SrcReg;
795}
796
Hiroshi Inoue7166ffb2017-09-05 04:15:17 +0000797static bool eligibleForCompareElimination(MachineBasicBlock &MBB,
Hiroshi Inoue84b05de2017-09-28 08:38:19 +0000798 MachineBasicBlock *&PredMBB,
799 MachineBasicBlock *&MBBtoMoveCmp,
Hiroshi Inoue7166ffb2017-09-05 04:15:17 +0000800 MachineRegisterInfo *MRI) {
801
802 auto isEligibleBB = [&](MachineBasicBlock &BB) {
803 auto BII = BB.getFirstInstrTerminator();
804 // We optimize BBs ending with a conditional branch.
805 // We check only for BCC here, not BCCLR, because BCCLR
806 // will be formed only later in the pipeline.
807 if (BB.succ_size() == 2 &&
808 BII != BB.instr_end() &&
809 (*BII).getOpcode() == PPC::BCC &&
810 (*BII).getOperand(1).isReg()) {
811 // We optimize only if the condition code is used only by one BCC.
812 unsigned CndReg = (*BII).getOperand(1).getReg();
813 if (!TargetRegisterInfo::isVirtualRegister(CndReg) ||
814 !MRI->hasOneNonDBGUse(CndReg))
815 return false;
816
817 // We skip this BB if a physical register is used in comparison.
818 MachineInstr *CMPI = MRI->getVRegDef(CndReg);
819 for (MachineOperand &MO : CMPI->operands())
820 if (MO.isReg() && !TargetRegisterInfo::isVirtualRegister(MO.getReg()))
821 return false;
822
823 return true;
824 }
825 return false;
826 };
827
Hiroshi Inoue84b05de2017-09-28 08:38:19 +0000828 // If this BB has more than one successor, we can create a new BB and
829 // move the compare instruction in the new BB.
830 // So far, we do not move compare instruction to a BB having multiple
831 // successors to avoid potentially increasing code size.
832 auto isEligibleForMoveCmp = [](MachineBasicBlock &BB) {
833 return BB.succ_size() == 1;
834 };
835
836 if (!isEligibleBB(MBB))
Hiroshi Inoue7166ffb2017-09-05 04:15:17 +0000837 return false;
838
Hiroshi Inoue84b05de2017-09-28 08:38:19 +0000839 unsigned NumPredBBs = MBB.pred_size();
840 if (NumPredBBs == 1) {
841 MachineBasicBlock *TmpMBB = *MBB.pred_begin();
842 if (isEligibleBB(*TmpMBB)) {
843 PredMBB = TmpMBB;
844 MBBtoMoveCmp = nullptr;
845 return true;
846 }
847 }
848 else if (NumPredBBs == 2) {
849 // We check for partially redundant case.
850 // So far, we support cases with only two predecessors
851 // to avoid increasing the number of instructions.
852 MachineBasicBlock::pred_iterator PI = MBB.pred_begin();
853 MachineBasicBlock *Pred1MBB = *PI;
854 MachineBasicBlock *Pred2MBB = *(PI+1);
855
856 if (isEligibleBB(*Pred1MBB) && isEligibleForMoveCmp(*Pred2MBB)) {
857 // We assume Pred1MBB is the BB containing the compare to be merged and
858 // Pred2MBB is the BB to which we will append a compare instruction.
859 // Hence we can proceed as is.
860 }
861 else if (isEligibleBB(*Pred2MBB) && isEligibleForMoveCmp(*Pred1MBB)) {
862 // We need to swap Pred1MBB and Pred2MBB to canonicalize.
863 std::swap(Pred1MBB, Pred2MBB);
864 }
865 else return false;
866
867 // Here, Pred2MBB is the BB to which we need to append a compare inst.
868 // We cannot move the compare instruction if operands are not available
869 // in Pred2MBB (i.e. defined in MBB by an instruction other than PHI).
870 MachineInstr *BI = &*MBB.getFirstInstrTerminator();
871 MachineInstr *CMPI = MRI->getVRegDef(BI->getOperand(1).getReg());
872 for (int I = 1; I <= 2; I++)
873 if (CMPI->getOperand(I).isReg()) {
874 MachineInstr *Inst = MRI->getVRegDef(CMPI->getOperand(I).getReg());
875 if (Inst->getParent() == &MBB && Inst->getOpcode() != PPC::PHI)
876 return false;
877 }
878
879 PredMBB = Pred1MBB;
880 MBBtoMoveCmp = Pred2MBB;
Hiroshi Inoue7166ffb2017-09-05 04:15:17 +0000881 return true;
Hiroshi Inoue84b05de2017-09-28 08:38:19 +0000882 }
Hiroshi Inoue7166ffb2017-09-05 04:15:17 +0000883
884 return false;
885}
886
887// If multiple conditional branches are executed based on the (essentially)
888// same comparison, we merge compare instructions into one and make multiple
889// conditional branches on this comparison.
890// For example,
891// if (a == 0) { ... }
892// else if (a < 0) { ... }
893// can be executed by one compare and two conditional branches instead of
894// two pairs of a compare and a conditional branch.
895//
896// This method merges two compare instructions in two MBBs and modifies the
897// compare and conditional branch instructions if needed.
898// For the above example, the input for this pass looks like:
899// cmplwi r3, 0
900// beq 0, .LBB0_3
901// cmpwi r3, -1
902// bgt 0, .LBB0_4
903// So, before merging two compares, we need to modify these instructions as
904// cmpwi r3, 0 ; cmplwi and cmpwi yield same result for beq
905// beq 0, .LBB0_3
906// cmpwi r3, 0 ; greather than -1 means greater or equal to 0
907// bge 0, .LBB0_4
908
909bool PPCMIPeephole::eliminateRedundantCompare(void) {
910 bool Simplified = false;
911
912 for (MachineBasicBlock &MBB2 : *MF) {
Hiroshi Inoue84b05de2017-09-28 08:38:19 +0000913 MachineBasicBlock *MBB1 = nullptr, *MBBtoMoveCmp = nullptr;
914
915 // For fully redundant case, we select two basic blocks MBB1 and MBB2
916 // as an optimization target if
Hiroshi Inoue7166ffb2017-09-05 04:15:17 +0000917 // - both MBBs end with a conditional branch,
918 // - MBB1 is the only predecessor of MBB2, and
919 // - compare does not take a physical register as a operand in both MBBs.
Hiroshi Inoue84b05de2017-09-28 08:38:19 +0000920 // In this case, eligibleForCompareElimination sets MBBtoMoveCmp nullptr.
921 //
922 // As partially redundant case, we additionally handle if MBB2 has one
923 // additional predecessor, which has only one successor (MBB2).
924 // In this case, we move the compre instruction originally in MBB2 into
925 // MBBtoMoveCmp. This partially redundant case is typically appear by
926 // compiling a while loop; here, MBBtoMoveCmp is the loop preheader.
927 //
928 // Overview of CFG of related basic blocks
929 // Fully redundant case Partially redundant case
930 // -------- ---------------- --------
931 // | MBB1 | (w/ 2 succ) | MBBtoMoveCmp | | MBB1 | (w/ 2 succ)
932 // -------- ---------------- --------
933 // | \ (w/ 1 succ) \ | \
934 // | \ \ | \
935 // | \ |
936 // -------- --------
937 // | MBB2 | (w/ 1 pred | MBB2 | (w/ 2 pred
938 // -------- and 2 succ) -------- and 2 succ)
939 // | \ | \
940 // | \ | \
941 //
942 if (!eligibleForCompareElimination(MBB2, MBB1, MBBtoMoveCmp, MRI))
Hiroshi Inoue7166ffb2017-09-05 04:15:17 +0000943 continue;
944
Hiroshi Inoue7166ffb2017-09-05 04:15:17 +0000945 MachineInstr *BI1 = &*MBB1->getFirstInstrTerminator();
946 MachineInstr *CMPI1 = MRI->getVRegDef(BI1->getOperand(1).getReg());
947
948 MachineInstr *BI2 = &*MBB2.getFirstInstrTerminator();
949 MachineInstr *CMPI2 = MRI->getVRegDef(BI2->getOperand(1).getReg());
Hiroshi Inoue84b05de2017-09-28 08:38:19 +0000950 bool IsPartiallyRedundant = (MBBtoMoveCmp != nullptr);
Hiroshi Inoue7166ffb2017-09-05 04:15:17 +0000951
952 // We cannot optimize an unsupported compare opcode or
953 // a mix of 32-bit and 64-bit comaprisons
954 if (!isSupportedCmpOp(CMPI1->getOpcode()) ||
955 !isSupportedCmpOp(CMPI2->getOpcode()) ||
956 is64bitCmpOp(CMPI1->getOpcode()) != is64bitCmpOp(CMPI2->getOpcode()))
957 continue;
958
959 unsigned NewOpCode = 0;
960 unsigned NewPredicate1 = 0, NewPredicate2 = 0;
961 int16_t Imm1 = 0, NewImm1 = 0, Imm2 = 0, NewImm2 = 0;
Hiroshi Inoue84b05de2017-09-28 08:38:19 +0000962 bool SwapOperands = false;
Hiroshi Inoue7166ffb2017-09-05 04:15:17 +0000963
964 if (CMPI1->getOpcode() != CMPI2->getOpcode()) {
965 // Typically, unsigned comparison is used for equality check, but
966 // we replace it with a signed comparison if the comparison
967 // to be merged is a signed comparison.
968 // In other cases of opcode mismatch, we cannot optimize this.
969 if (isEqOrNe(BI2) &&
970 CMPI1->getOpcode() == getSignedCmpOpCode(CMPI2->getOpcode()))
971 NewOpCode = CMPI1->getOpcode();
972 else if (isEqOrNe(BI1) &&
973 getSignedCmpOpCode(CMPI1->getOpcode()) == CMPI2->getOpcode())
974 NewOpCode = CMPI2->getOpcode();
975 else continue;
976 }
977
978 if (CMPI1->getOperand(2).isReg() && CMPI2->getOperand(2).isReg()) {
979 // In case of comparisons between two registers, these two registers
980 // must be same to merge two comparisons.
Hiroshi Inoue84b05de2017-09-28 08:38:19 +0000981 unsigned Cmp1Operand1 = getSrcVReg(CMPI1->getOperand(1).getReg(),
982 nullptr, nullptr, MRI);
983 unsigned Cmp1Operand2 = getSrcVReg(CMPI1->getOperand(2).getReg(),
984 nullptr, nullptr, MRI);
985 unsigned Cmp2Operand1 = getSrcVReg(CMPI2->getOperand(1).getReg(),
986 MBB1, &MBB2, MRI);
987 unsigned Cmp2Operand2 = getSrcVReg(CMPI2->getOperand(2).getReg(),
988 MBB1, &MBB2, MRI);
989
Hiroshi Inoue7166ffb2017-09-05 04:15:17 +0000990 if (Cmp1Operand1 == Cmp2Operand1 && Cmp1Operand2 == Cmp2Operand2) {
991 // Same pair of registers in the same order; ready to merge as is.
992 }
993 else if (Cmp1Operand1 == Cmp2Operand2 && Cmp1Operand2 == Cmp2Operand1) {
994 // Same pair of registers in different order.
995 // We reverse the predicate to merge compare instructions.
996 PPC::Predicate Pred = (PPC::Predicate)BI2->getOperand(0).getImm();
997 NewPredicate2 = (unsigned)PPC::getSwappedPredicate(Pred);
Hiroshi Inoue84b05de2017-09-28 08:38:19 +0000998 // In case of partial redundancy, we need to swap operands
999 // in another compare instruction.
1000 SwapOperands = true;
Hiroshi Inoue7166ffb2017-09-05 04:15:17 +00001001 }
1002 else continue;
1003 }
Hiroshi Inouec9f056d2017-10-03 07:28:58 +00001004 else if (CMPI1->getOperand(2).isImm() && CMPI2->getOperand(2).isImm()) {
Hiroshi Inoue7166ffb2017-09-05 04:15:17 +00001005 // In case of comparisons between a register and an immediate,
1006 // the operand register must be same for two compare instructions.
Hiroshi Inoue84b05de2017-09-28 08:38:19 +00001007 unsigned Cmp1Operand1 = getSrcVReg(CMPI1->getOperand(1).getReg(),
1008 nullptr, nullptr, MRI);
1009 unsigned Cmp2Operand1 = getSrcVReg(CMPI2->getOperand(1).getReg(),
1010 MBB1, &MBB2, MRI);
1011 if (Cmp1Operand1 != Cmp2Operand1)
Hiroshi Inoue7166ffb2017-09-05 04:15:17 +00001012 continue;
1013
1014 NewImm1 = Imm1 = (int16_t)CMPI1->getOperand(2).getImm();
1015 NewImm2 = Imm2 = (int16_t)CMPI2->getOperand(2).getImm();
1016
1017 // If immediate are not same, we try to adjust by changing predicate;
1018 // e.g. GT imm means GE (imm+1).
1019 if (Imm1 != Imm2 && (!isEqOrNe(BI2) || !isEqOrNe(BI1))) {
1020 int Diff = Imm1 - Imm2;
1021 if (Diff < -2 || Diff > 2)
1022 continue;
1023
1024 unsigned PredToInc1 = getPredicateToIncImm(BI1, CMPI1);
1025 unsigned PredToDec1 = getPredicateToDecImm(BI1, CMPI1);
1026 unsigned PredToInc2 = getPredicateToIncImm(BI2, CMPI2);
1027 unsigned PredToDec2 = getPredicateToDecImm(BI2, CMPI2);
1028 if (Diff == 2) {
1029 if (PredToInc2 && PredToDec1) {
1030 NewPredicate2 = PredToInc2;
1031 NewPredicate1 = PredToDec1;
1032 NewImm2++;
1033 NewImm1--;
1034 }
1035 }
1036 else if (Diff == 1) {
1037 if (PredToInc2) {
1038 NewImm2++;
1039 NewPredicate2 = PredToInc2;
1040 }
1041 else if (PredToDec1) {
1042 NewImm1--;
1043 NewPredicate1 = PredToDec1;
1044 }
1045 }
1046 else if (Diff == -1) {
1047 if (PredToDec2) {
1048 NewImm2--;
1049 NewPredicate2 = PredToDec2;
1050 }
1051 else if (PredToInc1) {
1052 NewImm1++;
1053 NewPredicate1 = PredToInc1;
1054 }
1055 }
1056 else if (Diff == -2) {
1057 if (PredToDec2 && PredToInc1) {
1058 NewPredicate2 = PredToDec2;
1059 NewPredicate1 = PredToInc1;
1060 NewImm2--;
1061 NewImm1++;
1062 }
1063 }
1064 }
1065
1066 // We cannnot merge two compares if the immediates are not same.
1067 if (NewImm2 != NewImm1)
1068 continue;
1069 }
1070
1071 DEBUG(dbgs() << "Optimize two pairs of compare and branch:\n");
1072 DEBUG(CMPI1->dump());
1073 DEBUG(BI1->dump());
1074 DEBUG(CMPI2->dump());
1075 DEBUG(BI2->dump());
1076
1077 // We adjust opcode, predicates and immediate as we determined above.
1078 if (NewOpCode != 0 && NewOpCode != CMPI1->getOpcode()) {
1079 CMPI1->setDesc(TII->get(NewOpCode));
1080 }
1081 if (NewPredicate1) {
1082 BI1->getOperand(0).setImm(NewPredicate1);
1083 }
1084 if (NewPredicate2) {
1085 BI2->getOperand(0).setImm(NewPredicate2);
1086 }
1087 if (NewImm1 != Imm1) {
1088 CMPI1->getOperand(2).setImm(NewImm1);
1089 }
1090
Hiroshi Inoue84b05de2017-09-28 08:38:19 +00001091 if (IsPartiallyRedundant) {
1092 // We touch up the compare instruction in MBB2 and move it to
1093 // a previous BB to handle partially redundant case.
1094 if (SwapOperands) {
1095 unsigned Op1 = CMPI2->getOperand(1).getReg();
1096 unsigned Op2 = CMPI2->getOperand(2).getReg();
1097 CMPI2->getOperand(1).setReg(Op2);
1098 CMPI2->getOperand(2).setReg(Op1);
1099 }
1100 if (NewImm2 != Imm2)
1101 CMPI2->getOperand(2).setImm(NewImm2);
1102
1103 for (int I = 1; I <= 2; I++) {
1104 if (CMPI2->getOperand(I).isReg()) {
1105 MachineInstr *Inst = MRI->getVRegDef(CMPI2->getOperand(I).getReg());
1106 if (Inst->getParent() != &MBB2)
1107 continue;
1108
1109 assert(Inst->getOpcode() == PPC::PHI &&
1110 "We cannot support if an operand comes from this BB.");
1111 unsigned SrcReg = getIncomingRegForBlock(Inst, MBBtoMoveCmp);
1112 CMPI2->getOperand(I).setReg(SrcReg);
1113 }
1114 }
1115 auto I = MachineBasicBlock::iterator(MBBtoMoveCmp->getFirstTerminator());
1116 MBBtoMoveCmp->splice(I, &MBB2, MachineBasicBlock::iterator(CMPI2));
1117
1118 DebugLoc DL = CMPI2->getDebugLoc();
1119 unsigned NewVReg = MRI->createVirtualRegister(&PPC::CRRCRegClass);
1120 BuildMI(MBB2, MBB2.begin(), DL,
1121 TII->get(PPC::PHI), NewVReg)
1122 .addReg(BI1->getOperand(1).getReg()).addMBB(MBB1)
1123 .addReg(BI2->getOperand(1).getReg()).addMBB(MBBtoMoveCmp);
1124 BI2->getOperand(1).setReg(NewVReg);
1125 }
1126 else {
1127 // We finally eliminate compare instruction in MBB2.
1128 BI2->getOperand(1).setReg(BI1->getOperand(1).getReg());
1129 CMPI2->eraseFromParent();
1130 }
Hiroshi Inoue7166ffb2017-09-05 04:15:17 +00001131 BI2->getOperand(1).setIsKill(true);
1132 BI1->getOperand(1).setIsKill(false);
Hiroshi Inoue7166ffb2017-09-05 04:15:17 +00001133
1134 DEBUG(dbgs() << "into a compare and two branches:\n");
1135 DEBUG(CMPI1->dump());
1136 DEBUG(BI1->dump());
1137 DEBUG(BI2->dump());
Hiroshi Inoue84b05de2017-09-28 08:38:19 +00001138 if (IsPartiallyRedundant) {
1139 DEBUG(dbgs() << "The following compare is moved into BB#" <<
1140 MBBtoMoveCmp->getNumber() << " to handle partial redundancy.\n");
1141 DEBUG(CMPI2->dump());
1142 }
Hiroshi Inoue7166ffb2017-09-05 04:15:17 +00001143
1144 Simplified = true;
1145 }
1146
Bill Schmidt9e24ab72015-11-10 21:38:26 +00001147 return Simplified;
1148}
1149
1150// This is used to find the "true" source register for an
1151// XXPERMDI instruction, since MachineCSE does not handle the
1152// "copy-like" operations (Copy and SubregToReg). Returns
1153// the original SrcReg unless it is the target of a copy-like
1154// operation, in which case we chain backwards through all
1155// such operations to the ultimate source register. If a
1156// physical register is encountered, we stop the search.
1157unsigned PPCMIPeephole::lookThruCopyLike(unsigned SrcReg) {
1158
1159 while (true) {
1160
1161 MachineInstr *MI = MRI->getVRegDef(SrcReg);
1162 if (!MI->isCopyLike())
1163 return SrcReg;
1164
1165 unsigned CopySrcReg;
1166 if (MI->isCopy())
1167 CopySrcReg = MI->getOperand(1).getReg();
1168 else {
1169 assert(MI->isSubregToReg() && "bad opcode for lookThruCopyLike");
1170 CopySrcReg = MI->getOperand(2).getReg();
1171 }
1172
1173 if (!TargetRegisterInfo::isVirtualRegister(CopySrcReg))
1174 return CopySrcReg;
1175
1176 SrcReg = CopySrcReg;
1177 }
1178}
1179
1180} // end default namespace
1181
1182INITIALIZE_PASS_BEGIN(PPCMIPeephole, DEBUG_TYPE,
1183 "PowerPC MI Peephole Optimization", false, false)
1184INITIALIZE_PASS_END(PPCMIPeephole, DEBUG_TYPE,
1185 "PowerPC MI Peephole Optimization", false, false)
1186
1187char PPCMIPeephole::ID = 0;
1188FunctionPass*
1189llvm::createPPCMIPeepholePass() { return new PPCMIPeephole(); }
1190