LLVM API Documentation

 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
HexagonHardwareLoops.cpp
Go to the documentation of this file.
1 //===-- HexagonHardwareLoops.cpp - Identify and generate hardware loops ---===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This pass identifies loops where we can generate the Hexagon hardware
11 // loop instruction. The hardware loop can perform loop branches with a
12 // zero-cycle overhead.
13 //
14 // The pattern that defines the induction variable can changed depending on
15 // prior optimizations. For example, the IndVarSimplify phase run by 'opt'
16 // normalizes induction variables, and the Loop Strength Reduction pass
17 // run by 'llc' may also make changes to the induction variable.
18 // The pattern detected by this phase is due to running Strength Reduction.
19 //
20 // Criteria for hardware loops:
21 // - Countable loops (w/ ind. var for a trip count)
22 // - Assumes loops are normalized by IndVarSimplify
23 // - Try inner-most loops first
24 // - No nested hardware loops.
25 // - No function calls in loops.
26 //
27 //===----------------------------------------------------------------------===//
28 
29 #define DEBUG_TYPE "hwloops"
30 #include "llvm/ADT/SmallSet.h"
31 #include "llvm/ADT/Statistic.h"
38 #include "llvm/PassSupport.h"
40 #include "llvm/Support/Debug.h"
43 #include "Hexagon.h"
44 #include "HexagonTargetMachine.h"
45 
46 #include <algorithm>
47 #include <vector>
48 
49 using namespace llvm;
50 
51 #ifndef NDEBUG
52 static cl::opt<int> HWLoopLimit("max-hwloop", cl::Hidden, cl::init(-1));
53 #endif
54 
55 STATISTIC(NumHWLoops, "Number of loops converted to hardware loops");
56 
57 namespace llvm {
59 }
60 
61 namespace {
62  class CountValue;
63  struct HexagonHardwareLoops : public MachineFunctionPass {
64  MachineLoopInfo *MLI;
67  const HexagonTargetMachine *TM;
68  const HexagonInstrInfo *TII;
69  const HexagonRegisterInfo *TRI;
70 #ifndef NDEBUG
71  static int Counter;
72 #endif
73 
74  public:
75  static char ID;
76 
77  HexagonHardwareLoops() : MachineFunctionPass(ID) {
79  }
80 
81  virtual bool runOnMachineFunction(MachineFunction &MF);
82 
83  const char *getPassName() const { return "Hexagon Hardware Loops"; }
84 
85  virtual void getAnalysisUsage(AnalysisUsage &AU) const {
89  }
90 
91  private:
92  /// Kinds of comparisons in the compare instructions.
93  struct Comparison {
94  enum Kind {
95  EQ = 0x01,
96  NE = 0x02,
97  L = 0x04, // Less-than property.
98  G = 0x08, // Greater-than property.
99  U = 0x40, // Unsigned property.
100  LTs = L,
101  LEs = L | EQ,
102  GTs = G,
103  GEs = G | EQ,
104  LTu = L | U,
105  LEu = L | EQ | U,
106  GTu = G | U,
107  GEu = G | EQ | U
108  };
109 
110  static Kind getSwappedComparison(Kind Cmp) {
111  assert ((!((Cmp & L) && (Cmp & G))) && "Malformed comparison operator");
112  if ((Cmp & L) || (Cmp & G))
113  return (Kind)(Cmp ^ (L|G));
114  return Cmp;
115  }
116  };
117 
118  /// \brief Find the register that contains the loop controlling
119  /// induction variable.
120  /// If successful, it will return true and set the \p Reg, \p IVBump
121  /// and \p IVOp arguments. Otherwise it will return false.
122  /// The returned induction register is the register R that follows the
123  /// following induction pattern:
124  /// loop:
125  /// R = phi ..., [ R.next, LatchBlock ]
126  /// R.next = R + #bump
127  /// if (R.next < #N) goto loop
128  /// IVBump is the immediate value added to R, and IVOp is the instruction
129  /// "R.next = R + #bump".
130  bool findInductionRegister(MachineLoop *L, unsigned &Reg,
131  int64_t &IVBump, MachineInstr *&IVOp) const;
132 
133  /// \brief Analyze the statements in a loop to determine if the loop
134  /// has a computable trip count and, if so, return a value that represents
135  /// the trip count expression.
136  CountValue *getLoopTripCount(MachineLoop *L,
138 
139  /// \brief Return the expression that represents the number of times
140  /// a loop iterates. The function takes the operands that represent the
141  /// loop start value, loop end value, and induction value. Based upon
142  /// these operands, the function attempts to compute the trip count.
143  /// If the trip count is not directly available (as an immediate value,
144  /// or a register), the function will attempt to insert computation of it
145  /// to the loop's preheader.
146  CountValue *computeCount(MachineLoop *Loop,
147  const MachineOperand *Start,
148  const MachineOperand *End,
149  unsigned IVReg,
150  int64_t IVBump,
151  Comparison::Kind Cmp) const;
152 
153  /// \brief Return true if the instruction is not valid within a hardware
154  /// loop.
155  bool isInvalidLoopOperation(const MachineInstr *MI) const;
156 
157  /// \brief Return true if the loop contains an instruction that inhibits
158  /// using the hardware loop.
159  bool containsInvalidInstruction(MachineLoop *L) const;
160 
161  /// \brief Given a loop, check if we can convert it to a hardware loop.
162  /// If so, then perform the conversion and return true.
163  bool convertToHardwareLoop(MachineLoop *L);
164 
165  /// \brief Return true if the instruction is now dead.
166  bool isDead(const MachineInstr *MI,
167  SmallVectorImpl<MachineInstr *> &DeadPhis) const;
168 
169  /// \brief Remove the instruction if it is now dead.
170  void removeIfDead(MachineInstr *MI);
171 
172  /// \brief Make sure that the "bump" instruction executes before the
173  /// compare. We need that for the IV fixup, so that the compare
174  /// instruction would not use a bumped value that has not yet been
175  /// defined. If the instructions are out of order, try to reorder them.
176  bool orderBumpCompare(MachineInstr *BumpI, MachineInstr *CmpI);
177 
178  /// \brief Get the instruction that loads an immediate value into \p R,
179  /// or 0 if such an instruction does not exist.
180  MachineInstr *defWithImmediate(unsigned R);
181 
182  /// \brief Get the immediate value referenced to by \p MO, either for
183  /// immediate operands, or for register operands, where the register
184  /// was defined with an immediate value.
185  int64_t getImmediate(MachineOperand &MO);
186 
187  /// \brief Reset the given machine operand to now refer to a new immediate
188  /// value. Assumes that the operand was already referencing an immediate
189  /// value, either directly, or via a register.
190  void setImmediate(MachineOperand &MO, int64_t Val);
191 
192  /// \brief Fix the data flow of the induction varible.
193  /// The desired flow is: phi ---> bump -+-> comparison-in-latch.
194  /// |
195  /// +-> back to phi
196  /// where "bump" is the increment of the induction variable:
197  /// iv = iv + #const.
198  /// Due to some prior code transformations, the actual flow may look
199  /// like this:
200  /// phi -+-> bump ---> back to phi
201  /// |
202  /// +-> comparison-in-latch (against upper_bound-bump),
203  /// i.e. the comparison that controls the loop execution may be using
204  /// the value of the induction variable from before the increment.
205  ///
206  /// Return true if the loop's flow is the desired one (i.e. it's
207  /// either been fixed, or no fixing was necessary).
208  /// Otherwise, return false. This can happen if the induction variable
209  /// couldn't be identified, or if the value in the latch's comparison
210  /// cannot be adjusted to reflect the post-bump value.
211  bool fixupInductionVariable(MachineLoop *L);
212 
213  /// \brief Given a loop, if it does not have a preheader, create one.
214  /// Return the block that is the preheader.
215  MachineBasicBlock *createPreheaderForLoop(MachineLoop *L);
216  };
217 
218  char HexagonHardwareLoops::ID = 0;
219 #ifndef NDEBUG
220  int HexagonHardwareLoops::Counter = 0;
221 #endif
222 
223  /// \brief Abstraction for a trip count of a loop. A smaller vesrsion
224  /// of the MachineOperand class without the concerns of changing the
225  /// operand representation.
226  class CountValue {
227  public:
228  enum CountValueType {
229  CV_Register,
230  CV_Immediate
231  };
232  private:
233  CountValueType Kind;
234  union Values {
235  struct {
236  unsigned Reg;
237  unsigned Sub;
238  } R;
239  unsigned ImmVal;
240  } Contents;
241 
242  public:
243  explicit CountValue(CountValueType t, unsigned v, unsigned u = 0) {
244  Kind = t;
245  if (Kind == CV_Register) {
246  Contents.R.Reg = v;
247  Contents.R.Sub = u;
248  } else {
249  Contents.ImmVal = v;
250  }
251  }
252  bool isReg() const { return Kind == CV_Register; }
253  bool isImm() const { return Kind == CV_Immediate; }
254 
255  unsigned getReg() const {
256  assert(isReg() && "Wrong CountValue accessor");
257  return Contents.R.Reg;
258  }
259  unsigned getSubReg() const {
260  assert(isReg() && "Wrong CountValue accessor");
261  return Contents.R.Sub;
262  }
263  unsigned getImm() const {
264  assert(isImm() && "Wrong CountValue accessor");
265  return Contents.ImmVal;
266  }
267 
268  void print(raw_ostream &OS, const TargetMachine *TM = 0) const {
269  const TargetRegisterInfo *TRI = TM ? TM->getRegisterInfo() : 0;
270  if (isReg()) { OS << PrintReg(Contents.R.Reg, TRI, Contents.R.Sub); }
271  if (isImm()) { OS << Contents.ImmVal; }
272  }
273  };
274 } // end anonymous namespace
275 
276 
277 INITIALIZE_PASS_BEGIN(HexagonHardwareLoops, "hwloops",
278  "Hexagon Hardware Loops", false, false)
281 INITIALIZE_PASS_END(HexagonHardwareLoops, "hwloops",
282  "Hexagon Hardware Loops", false, false)
283 
284 
285 /// \brief Returns true if the instruction is a hardware loop instruction.
286 static bool isHardwareLoop(const MachineInstr *MI) {
287  return MI->getOpcode() == Hexagon::LOOP0_r ||
288  MI->getOpcode() == Hexagon::LOOP0_i;
289 }
290 
292  return new HexagonHardwareLoops();
293 }
294 
295 
296 bool HexagonHardwareLoops::runOnMachineFunction(MachineFunction &MF) {
297  DEBUG(dbgs() << "********* Hexagon Hardware Loops *********\n");
298 
299  bool Changed = false;
300 
301  MLI = &getAnalysis<MachineLoopInfo>();
302  MRI = &MF.getRegInfo();
303  MDT = &getAnalysis<MachineDominatorTree>();
304  TM = static_cast<const HexagonTargetMachine*>(&MF.getTarget());
305  TII = static_cast<const HexagonInstrInfo*>(TM->getInstrInfo());
306  TRI = static_cast<const HexagonRegisterInfo*>(TM->getRegisterInfo());
307 
308  for (MachineLoopInfo::iterator I = MLI->begin(), E = MLI->end();
309  I != E; ++I) {
310  MachineLoop *L = *I;
311  if (!L->getParentLoop())
312  Changed |= convertToHardwareLoop(L);
313  }
314 
315  return Changed;
316 }
317 
318 
319 bool HexagonHardwareLoops::findInductionRegister(MachineLoop *L,
320  unsigned &Reg,
321  int64_t &IVBump,
322  MachineInstr *&IVOp
323  ) const {
324  MachineBasicBlock *Header = L->getHeader();
325  MachineBasicBlock *Preheader = L->getLoopPreheader();
326  MachineBasicBlock *Latch = L->getLoopLatch();
327  if (!Header || !Preheader || !Latch)
328  return false;
329 
330  // This pair represents an induction register together with an immediate
331  // value that will be added to it in each loop iteration.
332  typedef std::pair<unsigned,int64_t> RegisterBump;
333 
334  // Mapping: R.next -> (R, bump), where R, R.next and bump are derived
335  // from an induction operation
336  // R.next = R + bump
337  // where bump is an immediate value.
338  typedef std::map<unsigned,RegisterBump> InductionMap;
339 
340  InductionMap IndMap;
341 
342  typedef MachineBasicBlock::instr_iterator instr_iterator;
343  for (instr_iterator I = Header->instr_begin(), E = Header->instr_end();
344  I != E && I->isPHI(); ++I) {
345  MachineInstr *Phi = &*I;
346 
347  // Have a PHI instruction. Get the operand that corresponds to the
348  // latch block, and see if is a result of an addition of form "reg+imm",
349  // where the "reg" is defined by the PHI node we are looking at.
350  for (unsigned i = 1, n = Phi->getNumOperands(); i < n; i += 2) {
351  if (Phi->getOperand(i+1).getMBB() != Latch)
352  continue;
353 
354  unsigned PhiOpReg = Phi->getOperand(i).getReg();
355  MachineInstr *DI = MRI->getVRegDef(PhiOpReg);
356  unsigned UpdOpc = DI->getOpcode();
357  bool isAdd = (UpdOpc == Hexagon::ADD_ri);
358 
359  if (isAdd) {
360  // If the register operand to the add is the PHI we're
361  // looking at, this meets the induction pattern.
362  unsigned IndReg = DI->getOperand(1).getReg();
363  if (MRI->getVRegDef(IndReg) == Phi) {
364  unsigned UpdReg = DI->getOperand(0).getReg();
365  int64_t V = DI->getOperand(2).getImm();
366  IndMap.insert(std::make_pair(UpdReg, std::make_pair(IndReg, V)));
367  }
368  }
369  } // for (i)
370  } // for (instr)
371 
373  MachineBasicBlock *TB = 0, *FB = 0;
374  bool NotAnalyzed = TII->AnalyzeBranch(*Latch, TB, FB, Cond, false);
375  if (NotAnalyzed)
376  return false;
377 
378  unsigned CSz = Cond.size();
379  assert (CSz == 1 || CSz == 2);
380  unsigned PredR = Cond[CSz-1].getReg();
381 
382  MachineInstr *PredI = MRI->getVRegDef(PredR);
383  if (!PredI->isCompare())
384  return false;
385 
386  unsigned CmpReg1 = 0, CmpReg2 = 0;
387  int CmpImm = 0, CmpMask = 0;
388  bool CmpAnalyzed = TII->analyzeCompare(PredI, CmpReg1, CmpReg2,
389  CmpMask, CmpImm);
390  // Fail if the compare was not analyzed, or it's not comparing a register
391  // with an immediate value. Not checking the mask here, since we handle
392  // the individual compare opcodes (including CMPb) later on.
393  if (!CmpAnalyzed)
394  return false;
395 
396  // Exactly one of the input registers to the comparison should be among
397  // the induction registers.
398  InductionMap::iterator IndMapEnd = IndMap.end();
399  InductionMap::iterator F = IndMapEnd;
400  if (CmpReg1 != 0) {
401  InductionMap::iterator F1 = IndMap.find(CmpReg1);
402  if (F1 != IndMapEnd)
403  F = F1;
404  }
405  if (CmpReg2 != 0) {
406  InductionMap::iterator F2 = IndMap.find(CmpReg2);
407  if (F2 != IndMapEnd) {
408  if (F != IndMapEnd)
409  return false;
410  F = F2;
411  }
412  }
413  if (F == IndMapEnd)
414  return false;
415 
416  Reg = F->second.first;
417  IVBump = F->second.second;
418  IVOp = MRI->getVRegDef(F->first);
419  return true;
420 }
421 
422 
423 /// \brief Analyze the statements in a loop to determine if the loop has
424 /// a computable trip count and, if so, return a value that represents
425 /// the trip count expression.
426 ///
427 /// This function iterates over the phi nodes in the loop to check for
428 /// induction variable patterns that are used in the calculation for
429 /// the number of time the loop is executed.
430 CountValue *HexagonHardwareLoops::getLoopTripCount(MachineLoop *L,
432  MachineBasicBlock *TopMBB = L->getTopBlock();
434  assert(PI != TopMBB->pred_end() &&
435  "Loop must have more than one incoming edge!");
436  MachineBasicBlock *Backedge = *PI++;
437  if (PI == TopMBB->pred_end()) // dead loop?
438  return 0;
439  MachineBasicBlock *Incoming = *PI++;
440  if (PI != TopMBB->pred_end()) // multiple backedges?
441  return 0;
442 
443  // Make sure there is one incoming and one backedge and determine which
444  // is which.
445  if (L->contains(Incoming)) {
446  if (L->contains(Backedge))
447  return 0;
448  std::swap(Incoming, Backedge);
449  } else if (!L->contains(Backedge))
450  return 0;
451 
452  // Look for the cmp instruction to determine if we can get a useful trip
453  // count. The trip count can be either a register or an immediate. The
454  // location of the value depends upon the type (reg or imm).
455  MachineBasicBlock *Latch = L->getLoopLatch();
456  if (!Latch)
457  return 0;
458 
459  unsigned IVReg = 0;
460  int64_t IVBump = 0;
461  MachineInstr *IVOp;
462  bool FoundIV = findInductionRegister(L, IVReg, IVBump, IVOp);
463  if (!FoundIV)
464  return 0;
465 
466  MachineBasicBlock *Preheader = L->getLoopPreheader();
467 
468  MachineOperand *InitialValue = 0;
469  MachineInstr *IV_Phi = MRI->getVRegDef(IVReg);
470  for (unsigned i = 1, n = IV_Phi->getNumOperands(); i < n; i += 2) {
471  MachineBasicBlock *MBB = IV_Phi->getOperand(i+1).getMBB();
472  if (MBB == Preheader)
473  InitialValue = &IV_Phi->getOperand(i);
474  else if (MBB == Latch)
475  IVReg = IV_Phi->getOperand(i).getReg(); // Want IV reg after bump.
476  }
477  if (!InitialValue)
478  return 0;
479 
481  MachineBasicBlock *TB = 0, *FB = 0;
482  bool NotAnalyzed = TII->AnalyzeBranch(*Latch, TB, FB, Cond, false);
483  if (NotAnalyzed)
484  return 0;
485 
486  MachineBasicBlock *Header = L->getHeader();
487  // TB must be non-null. If FB is also non-null, one of them must be
488  // the header. Otherwise, branch to TB could be exiting the loop, and
489  // the fall through can go to the header.
490  assert (TB && "Latch block without a branch?");
491  assert ((!FB || TB == Header || FB == Header) && "Branches not to header?");
492  if (!TB || (FB && TB != Header && FB != Header))
493  return 0;
494 
495  // Branches of form "if (!P) ..." cause HexagonInstrInfo::AnalyzeBranch
496  // to put imm(0), followed by P in the vector Cond.
497  // If TB is not the header, it means that the "not-taken" path must lead
498  // to the header.
499  bool Negated = (Cond.size() > 1) ^ (TB != Header);
500  unsigned PredReg = Cond[Cond.size()-1].getReg();
501  MachineInstr *CondI = MRI->getVRegDef(PredReg);
502  unsigned CondOpc = CondI->getOpcode();
503 
504  unsigned CmpReg1 = 0, CmpReg2 = 0;
505  int Mask = 0, ImmValue = 0;
506  bool AnalyzedCmp = TII->analyzeCompare(CondI, CmpReg1, CmpReg2,
507  Mask, ImmValue);
508  if (!AnalyzedCmp)
509  return 0;
510 
511  // The comparison operator type determines how we compute the loop
512  // trip count.
513  OldInsts.push_back(CondI);
514  OldInsts.push_back(IVOp);
515 
516  // Sadly, the following code gets information based on the position
517  // of the operands in the compare instruction. This has to be done
518  // this way, because the comparisons check for a specific relationship
519  // between the operands (e.g. is-less-than), rather than to find out
520  // what relationship the operands are in (as on PPC).
521  Comparison::Kind Cmp;
522  bool isSwapped = false;
523  const MachineOperand &Op1 = CondI->getOperand(1);
524  const MachineOperand &Op2 = CondI->getOperand(2);
525  const MachineOperand *EndValue = 0;
526 
527  if (Op1.isReg()) {
528  if (Op2.isImm() || Op1.getReg() == IVReg)
529  EndValue = &Op2;
530  else {
531  EndValue = &Op1;
532  isSwapped = true;
533  }
534  }
535 
536  if (!EndValue)
537  return 0;
538 
539  switch (CondOpc) {
540  case Hexagon::CMPEQri:
541  case Hexagon::CMPEQrr:
542  Cmp = !Negated ? Comparison::EQ : Comparison::NE;
543  break;
544  case Hexagon::CMPGTUri:
545  case Hexagon::CMPGTUrr:
546  Cmp = !Negated ? Comparison::GTu : Comparison::LEu;
547  break;
548  case Hexagon::CMPGTri:
549  case Hexagon::CMPGTrr:
550  Cmp = !Negated ? Comparison::GTs : Comparison::LEs;
551  break;
552  // Very limited support for byte/halfword compares.
553  case Hexagon::CMPbEQri_V4:
554  case Hexagon::CMPhEQri_V4: {
555  if (IVBump != 1)
556  return 0;
557 
558  int64_t InitV, EndV;
559  // Since the comparisons are "ri", the EndValue should be an
560  // immediate. Check it just in case.
561  assert(EndValue->isImm() && "Unrecognized latch comparison");
562  EndV = EndValue->getImm();
563  // Allow InitialValue to be a register defined with an immediate.
564  if (InitialValue->isReg()) {
565  if (!defWithImmediate(InitialValue->getReg()))
566  return 0;
567  InitV = getImmediate(*InitialValue);
568  } else {
569  assert(InitialValue->isImm());
570  InitV = InitialValue->getImm();
571  }
572  if (InitV >= EndV)
573  return 0;
574  if (CondOpc == Hexagon::CMPbEQri_V4) {
575  if (!isInt<8>(InitV) || !isInt<8>(EndV))
576  return 0;
577  } else { // Hexagon::CMPhEQri_V4
578  if (!isInt<16>(InitV) || !isInt<16>(EndV))
579  return 0;
580  }
581  Cmp = !Negated ? Comparison::EQ : Comparison::NE;
582  break;
583  }
584  default:
585  return 0;
586  }
587 
588  if (isSwapped)
589  Cmp = Comparison::getSwappedComparison(Cmp);
590 
591  if (InitialValue->isReg()) {
592  unsigned R = InitialValue->getReg();
593  MachineBasicBlock *DefBB = MRI->getVRegDef(R)->getParent();
594  if (!MDT->properlyDominates(DefBB, Header))
595  return 0;
596  OldInsts.push_back(MRI->getVRegDef(R));
597  }
598  if (EndValue->isReg()) {
599  unsigned R = EndValue->getReg();
600  MachineBasicBlock *DefBB = MRI->getVRegDef(R)->getParent();
601  if (!MDT->properlyDominates(DefBB, Header))
602  return 0;
603  }
604 
605  return computeCount(L, InitialValue, EndValue, IVReg, IVBump, Cmp);
606 }
607 
608 /// \brief Helper function that returns the expression that represents the
609 /// number of times a loop iterates. The function takes the operands that
610 /// represent the loop start value, loop end value, and induction value.
611 /// Based upon these operands, the function attempts to compute the trip count.
612 CountValue *HexagonHardwareLoops::computeCount(MachineLoop *Loop,
613  const MachineOperand *Start,
614  const MachineOperand *End,
615  unsigned IVReg,
616  int64_t IVBump,
617  Comparison::Kind Cmp) const {
618  // Cannot handle comparison EQ, i.e. while (A == B).
619  if (Cmp == Comparison::EQ)
620  return 0;
621 
622  // Check if either the start or end values are an assignment of an immediate.
623  // If so, use the immediate value rather than the register.
624  if (Start->isReg()) {
625  const MachineInstr *StartValInstr = MRI->getVRegDef(Start->getReg());
626  if (StartValInstr && StartValInstr->getOpcode() == Hexagon::TFRI)
627  Start = &StartValInstr->getOperand(1);
628  }
629  if (End->isReg()) {
630  const MachineInstr *EndValInstr = MRI->getVRegDef(End->getReg());
631  if (EndValInstr && EndValInstr->getOpcode() == Hexagon::TFRI)
632  End = &EndValInstr->getOperand(1);
633  }
634 
635  assert (Start->isReg() || Start->isImm());
636  assert (End->isReg() || End->isImm());
637 
638  bool CmpLess = Cmp & Comparison::L;
639  bool CmpGreater = Cmp & Comparison::G;
640  bool CmpHasEqual = Cmp & Comparison::EQ;
641 
642  // Avoid certain wrap-arounds. This doesn't detect all wrap-arounds.
643  // If loop executes while iv is "less" with the iv value going down, then
644  // the iv must wrap.
645  if (CmpLess && IVBump < 0)
646  return 0;
647  // If loop executes while iv is "greater" with the iv value going up, then
648  // the iv must wrap.
649  if (CmpGreater && IVBump > 0)
650  return 0;
651 
652  if (Start->isImm() && End->isImm()) {
653  // Both, start and end are immediates.
654  int64_t StartV = Start->getImm();
655  int64_t EndV = End->getImm();
656  int64_t Dist = EndV - StartV;
657  if (Dist == 0)
658  return 0;
659 
660  bool Exact = (Dist % IVBump) == 0;
661 
662  if (Cmp == Comparison::NE) {
663  if (!Exact)
664  return 0;
665  if ((Dist < 0) ^ (IVBump < 0))
666  return 0;
667  }
668 
669  // For comparisons that include the final value (i.e. include equality
670  // with the final value), we need to increase the distance by 1.
671  if (CmpHasEqual)
672  Dist = Dist > 0 ? Dist+1 : Dist-1;
673 
674  // assert (CmpLess => Dist > 0);
675  assert ((!CmpLess || Dist > 0) && "Loop should never iterate!");
676  // assert (CmpGreater => Dist < 0);
677  assert ((!CmpGreater || Dist < 0) && "Loop should never iterate!");
678 
679  // "Normalized" distance, i.e. with the bump set to +-1.
680  int64_t Dist1 = (IVBump > 0) ? (Dist + (IVBump-1)) / IVBump
681  : (-Dist + (-IVBump-1)) / (-IVBump);
682  assert (Dist1 > 0 && "Fishy thing. Both operands have the same sign.");
683 
684  uint64_t Count = Dist1;
685 
686  if (Count > 0xFFFFFFFFULL)
687  return 0;
688 
689  return new CountValue(CountValue::CV_Immediate, Count);
690  }
691 
692  // A general case: Start and End are some values, but the actual
693  // iteration count may not be available. If it is not, insert
694  // a computation of it into the preheader.
695 
696  // If the induction variable bump is not a power of 2, quit.
697  // Othwerise we'd need a general integer division.
698  if (!isPowerOf2_64(abs64(IVBump)))
699  return 0;
700 
701  MachineBasicBlock *PH = Loop->getLoopPreheader();
702  assert (PH && "Should have a preheader by now");
704  DebugLoc DL = (InsertPos != PH->end()) ? InsertPos->getDebugLoc()
705  : DebugLoc();
706 
707  // If Start is an immediate and End is a register, the trip count
708  // will be "reg - imm". Hexagon's "subtract immediate" instruction
709  // is actually "reg + -imm".
710 
711  // If the loop IV is going downwards, i.e. if the bump is negative,
712  // then the iteration count (computed as End-Start) will need to be
713  // negated. To avoid the negation, just swap Start and End.
714  if (IVBump < 0) {
715  std::swap(Start, End);
716  IVBump = -IVBump;
717  }
718  // Cmp may now have a wrong direction, e.g. LEs may now be GEs.
719  // Signedness, and "including equality" are preserved.
720 
721  bool RegToImm = Start->isReg() && End->isImm(); // for (reg..imm)
722  bool RegToReg = Start->isReg() && End->isReg(); // for (reg..reg)
723 
724  int64_t StartV = 0, EndV = 0;
725  if (Start->isImm())
726  StartV = Start->getImm();
727  if (End->isImm())
728  EndV = End->getImm();
729 
730  int64_t AdjV = 0;
731  // To compute the iteration count, we would need this computation:
732  // Count = (End - Start + (IVBump-1)) / IVBump
733  // or, when CmpHasEqual:
734  // Count = (End - Start + (IVBump-1)+1) / IVBump
735  // The "IVBump-1" part is the adjustment (AdjV). We can avoid
736  // generating an instruction specifically to add it if we can adjust
737  // the immediate values for Start or End.
738 
739  if (CmpHasEqual) {
740  // Need to add 1 to the total iteration count.
741  if (Start->isImm())
742  StartV--;
743  else if (End->isImm())
744  EndV++;
745  else
746  AdjV += 1;
747  }
748 
749  if (Cmp != Comparison::NE) {
750  if (Start->isImm())
751  StartV -= (IVBump-1);
752  else if (End->isImm())
753  EndV += (IVBump-1);
754  else
755  AdjV += (IVBump-1);
756  }
757 
758  unsigned R = 0, SR = 0;
759  if (Start->isReg()) {
760  R = Start->getReg();
761  SR = Start->getSubReg();
762  } else {
763  R = End->getReg();
764  SR = End->getSubReg();
765  }
766  const TargetRegisterClass *RC = MRI->getRegClass(R);
767  // Hardware loops cannot handle 64-bit registers. If it's a double
768  // register, it has to have a subregister.
769  if (!SR && RC == &Hexagon::DoubleRegsRegClass)
770  return 0;
771  const TargetRegisterClass *IntRC = &Hexagon::IntRegsRegClass;
772 
773  // Compute DistR (register with the distance between Start and End).
774  unsigned DistR, DistSR;
775 
776  // Avoid special case, where the start value is an imm(0).
777  if (Start->isImm() && StartV == 0) {
778  DistR = End->getReg();
779  DistSR = End->getSubReg();
780  } else {
781  const MCInstrDesc &SubD = RegToReg ? TII->get(Hexagon::SUB_rr) :
782  (RegToImm ? TII->get(Hexagon::SUB_ri) :
783  TII->get(Hexagon::ADD_ri));
784  unsigned SubR = MRI->createVirtualRegister(IntRC);
785  MachineInstrBuilder SubIB =
786  BuildMI(*PH, InsertPos, DL, SubD, SubR);
787 
788  if (RegToReg) {
789  SubIB.addReg(End->getReg(), 0, End->getSubReg())
790  .addReg(Start->getReg(), 0, Start->getSubReg());
791  } else if (RegToImm) {
792  SubIB.addImm(EndV)
793  .addReg(Start->getReg(), 0, Start->getSubReg());
794  } else { // ImmToReg
795  SubIB.addReg(End->getReg(), 0, End->getSubReg())
796  .addImm(-StartV);
797  }
798  DistR = SubR;
799  DistSR = 0;
800  }
801 
802  // From DistR, compute AdjR (register with the adjusted distance).
803  unsigned AdjR, AdjSR;
804 
805  if (AdjV == 0) {
806  AdjR = DistR;
807  AdjSR = DistSR;
808  } else {
809  // Generate CountR = ADD DistR, AdjVal
810  unsigned AddR = MRI->createVirtualRegister(IntRC);
811  const MCInstrDesc &AddD = TII->get(Hexagon::ADD_ri);
812  BuildMI(*PH, InsertPos, DL, AddD, AddR)
813  .addReg(DistR, 0, DistSR)
814  .addImm(AdjV);
815 
816  AdjR = AddR;
817  AdjSR = 0;
818  }
819 
820  // From AdjR, compute CountR (register with the final count).
821  unsigned CountR, CountSR;
822 
823  if (IVBump == 1) {
824  CountR = AdjR;
825  CountSR = AdjSR;
826  } else {
827  // The IV bump is a power of two. Log_2(IV bump) is the shift amount.
828  unsigned Shift = Log2_32(IVBump);
829 
830  // Generate NormR = LSR DistR, Shift.
831  unsigned LsrR = MRI->createVirtualRegister(IntRC);
832  const MCInstrDesc &LsrD = TII->get(Hexagon::LSR_ri);
833  BuildMI(*PH, InsertPos, DL, LsrD, LsrR)
834  .addReg(AdjR, 0, AdjSR)
835  .addImm(Shift);
836 
837  CountR = LsrR;
838  CountSR = 0;
839  }
840 
841  return new CountValue(CountValue::CV_Register, CountR, CountSR);
842 }
843 
844 
845 /// \brief Return true if the operation is invalid within hardware loop.
846 bool HexagonHardwareLoops::isInvalidLoopOperation(
847  const MachineInstr *MI) const {
848 
849  // call is not allowed because the callee may use a hardware loop
850  if (MI->getDesc().isCall())
851  return true;
852 
853  // do not allow nested hardware loops
854  if (isHardwareLoop(MI))
855  return true;
856 
857  // check if the instruction defines a hardware loop register
858  for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
859  const MachineOperand &MO = MI->getOperand(i);
860  if (!MO.isReg() || !MO.isDef())
861  continue;
862  unsigned R = MO.getReg();
863  if (R == Hexagon::LC0 || R == Hexagon::LC1 ||
864  R == Hexagon::SA0 || R == Hexagon::SA1)
865  return true;
866  }
867  return false;
868 }
869 
870 
871 /// \brief - Return true if the loop contains an instruction that inhibits
872 /// the use of the hardware loop function.
873 bool HexagonHardwareLoops::containsInvalidInstruction(MachineLoop *L) const {
874  const std::vector<MachineBasicBlock *> &Blocks = L->getBlocks();
875  for (unsigned i = 0, e = Blocks.size(); i != e; ++i) {
876  MachineBasicBlock *MBB = Blocks[i];
878  MII = MBB->begin(), E = MBB->end(); MII != E; ++MII) {
879  const MachineInstr *MI = &*MII;
880  if (isInvalidLoopOperation(MI))
881  return true;
882  }
883  }
884  return false;
885 }
886 
887 
888 /// \brief Returns true if the instruction is dead. This was essentially
889 /// copied from DeadMachineInstructionElim::isDead, but with special cases
890 /// for inline asm, physical registers and instructions with side effects
891 /// removed.
892 bool HexagonHardwareLoops::isDead(const MachineInstr *MI,
893  SmallVectorImpl<MachineInstr *> &DeadPhis) const {
894  // Examine each operand.
895  for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
896  const MachineOperand &MO = MI->getOperand(i);
897  if (!MO.isReg() || !MO.isDef())
898  continue;
899 
900  unsigned Reg = MO.getReg();
901  if (MRI->use_nodbg_empty(Reg))
902  continue;
903 
904  typedef MachineRegisterInfo::use_nodbg_iterator use_nodbg_iterator;
905 
906  // This instruction has users, but if the only user is the phi node for the
907  // parent block, and the only use of that phi node is this instruction, then
908  // this instruction is dead: both it (and the phi node) can be removed.
909  use_nodbg_iterator I = MRI->use_nodbg_begin(Reg);
910  use_nodbg_iterator End = MRI->use_nodbg_end();
911  if (llvm::next(I) != End || !I.getOperand().getParent()->isPHI())
912  return false;
913 
914  MachineInstr *OnePhi = I.getOperand().getParent();
915  for (unsigned j = 0, f = OnePhi->getNumOperands(); j != f; ++j) {
916  const MachineOperand &OPO = OnePhi->getOperand(j);
917  if (!OPO.isReg() || !OPO.isDef())
918  continue;
919 
920  unsigned OPReg = OPO.getReg();
921  use_nodbg_iterator nextJ;
922  for (use_nodbg_iterator J = MRI->use_nodbg_begin(OPReg);
923  J != End; J = nextJ) {
924  nextJ = llvm::next(J);
925  MachineOperand &Use = J.getOperand();
926  MachineInstr *UseMI = Use.getParent();
927 
928  // If the phi node has a user that is not MI, bail...
929  if (MI != UseMI)
930  return false;
931  }
932  }
933  DeadPhis.push_back(OnePhi);
934  }
935 
936  // If there are no defs with uses, the instruction is dead.
937  return true;
938 }
939 
940 void HexagonHardwareLoops::removeIfDead(MachineInstr *MI) {
941  // This procedure was essentially copied from DeadMachineInstructionElim.
942 
944  if (isDead(MI, DeadPhis)) {
945  DEBUG(dbgs() << "HW looping will remove: " << *MI);
946 
947  // It is possible that some DBG_VALUE instructions refer to this
948  // instruction. Examine each def operand for such references;
949  // if found, mark the DBG_VALUE as undef (but don't delete it).
950  for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
951  const MachineOperand &MO = MI->getOperand(i);
952  if (!MO.isReg() || !MO.isDef())
953  continue;
954  unsigned Reg = MO.getReg();
956  for (MachineRegisterInfo::use_iterator I = MRI->use_begin(Reg),
957  E = MRI->use_end(); I != E; I = nextI) {
958  nextI = llvm::next(I); // I is invalidated by the setReg
959  MachineOperand &Use = I.getOperand();
960  MachineInstr *UseMI = Use.getParent();
961  if (UseMI == MI)
962  continue;
963  if (Use.isDebug())
964  UseMI->getOperand(0).setReg(0U);
965  // This may also be a "instr -> phi -> instr" case which can
966  // be removed too.
967  }
968  }
969 
970  MI->eraseFromParent();
971  for (unsigned i = 0; i < DeadPhis.size(); ++i)
972  DeadPhis[i]->eraseFromParent();
973  }
974 }
975 
976 /// \brief Check if the loop is a candidate for converting to a hardware
977 /// loop. If so, then perform the transformation.
978 ///
979 /// This function works on innermost loops first. A loop can be converted
980 /// if it is a counting loop; either a register value or an immediate.
981 ///
982 /// The code makes several assumptions about the representation of the loop
983 /// in llvm.
984 bool HexagonHardwareLoops::convertToHardwareLoop(MachineLoop *L) {
985  // This is just for sanity.
986  assert(L->getHeader() && "Loop without a header?");
987 
988  bool Changed = false;
989  // Process nested loops first.
990  for (MachineLoop::iterator I = L->begin(), E = L->end(); I != E; ++I)
991  Changed |= convertToHardwareLoop(*I);
992 
993  // If a nested loop has been converted, then we can't convert this loop.
994  if (Changed)
995  return Changed;
996 
997 #ifndef NDEBUG
998  // Stop trying after reaching the limit (if any).
999  int Limit = HWLoopLimit;
1000  if (Limit >= 0) {
1001  if (Counter >= HWLoopLimit)
1002  return false;
1003  Counter++;
1004  }
1005 #endif
1006 
1007  // Does the loop contain any invalid instructions?
1008  if (containsInvalidInstruction(L))
1009  return false;
1010 
1011  // Is the induction variable bump feeding the latch condition?
1012  if (!fixupInductionVariable(L))
1013  return false;
1014 
1015  MachineBasicBlock *LastMBB = L->getExitingBlock();
1016  // Don't generate hw loop if the loop has more than one exit.
1017  if (LastMBB == 0)
1018  return false;
1019 
1021  if (LastI == LastMBB->end())
1022  return false;
1023 
1024  // Ensure the loop has a preheader: the loop instruction will be
1025  // placed there.
1026  bool NewPreheader = false;
1027  MachineBasicBlock *Preheader = L->getLoopPreheader();
1028  if (!Preheader) {
1029  Preheader = createPreheaderForLoop(L);
1030  if (!Preheader)
1031  return false;
1032  NewPreheader = true;
1033  }
1034  MachineBasicBlock::iterator InsertPos = Preheader->getFirstTerminator();
1035 
1037  // Are we able to determine the trip count for the loop?
1038  CountValue *TripCount = getLoopTripCount(L, OldInsts);
1039  if (TripCount == 0)
1040  return false;
1041 
1042  // Is the trip count available in the preheader?
1043  if (TripCount->isReg()) {
1044  // There will be a use of the register inserted into the preheader,
1045  // so make sure that the register is actually defined at that point.
1046  MachineInstr *TCDef = MRI->getVRegDef(TripCount->getReg());
1047  MachineBasicBlock *BBDef = TCDef->getParent();
1048  if (!NewPreheader) {
1049  if (!MDT->dominates(BBDef, Preheader))
1050  return false;
1051  } else {
1052  // If we have just created a preheader, the dominator tree won't be
1053  // aware of it. Check if the definition of the register dominates
1054  // the header, but is not the header itself.
1055  if (!MDT->properlyDominates(BBDef, L->getHeader()))
1056  return false;
1057  }
1058  }
1059 
1060  // Determine the loop start.
1061  MachineBasicBlock *LoopStart = L->getTopBlock();
1062  if (L->getLoopLatch() != LastMBB) {
1063  // When the exit and latch are not the same, use the latch block as the
1064  // start.
1065  // The loop start address is used only after the 1st iteration, and the
1066  // loop latch may contains instrs. that need to be executed after the
1067  // first iteration.
1068  LoopStart = L->getLoopLatch();
1069  // Make sure the latch is a successor of the exit, otherwise it won't work.
1070  if (!LastMBB->isSuccessor(LoopStart))
1071  return false;
1072  }
1073 
1074  // Convert the loop to a hardware loop.
1075  DEBUG(dbgs() << "Change to hardware loop at "; L->dump());
1076  DebugLoc DL;
1077  if (InsertPos != Preheader->end())
1078  DL = InsertPos->getDebugLoc();
1079 
1080  if (TripCount->isReg()) {
1081  // Create a copy of the loop count register.
1082  unsigned CountReg = MRI->createVirtualRegister(&Hexagon::IntRegsRegClass);
1083  BuildMI(*Preheader, InsertPos, DL, TII->get(TargetOpcode::COPY), CountReg)
1084  .addReg(TripCount->getReg(), 0, TripCount->getSubReg());
1085  // Add the Loop instruction to the beginning of the loop.
1086  BuildMI(*Preheader, InsertPos, DL, TII->get(Hexagon::LOOP0_r))
1087  .addMBB(LoopStart)
1088  .addReg(CountReg);
1089  } else {
1090  assert(TripCount->isImm() && "Expecting immediate value for trip count");
1091  // Add the Loop immediate instruction to the beginning of the loop,
1092  // if the immediate fits in the instructions. Otherwise, we need to
1093  // create a new virtual register.
1094  int64_t CountImm = TripCount->getImm();
1095  if (!TII->isValidOffset(Hexagon::LOOP0_i, CountImm)) {
1096  unsigned CountReg = MRI->createVirtualRegister(&Hexagon::IntRegsRegClass);
1097  BuildMI(*Preheader, InsertPos, DL, TII->get(Hexagon::TFRI), CountReg)
1098  .addImm(CountImm);
1099  BuildMI(*Preheader, InsertPos, DL, TII->get(Hexagon::LOOP0_r))
1100  .addMBB(LoopStart).addReg(CountReg);
1101  } else
1102  BuildMI(*Preheader, InsertPos, DL, TII->get(Hexagon::LOOP0_i))
1103  .addMBB(LoopStart).addImm(CountImm);
1104  }
1105 
1106  // Make sure the loop start always has a reference in the CFG. We need
1107  // to create a BlockAddress operand to get this mechanism to work both the
1108  // MachineBasicBlock and BasicBlock objects need the flag set.
1109  LoopStart->setHasAddressTaken();
1110  // This line is needed to set the hasAddressTaken flag on the BasicBlock
1111  // object.
1112  BlockAddress::get(const_cast<BasicBlock *>(LoopStart->getBasicBlock()));
1113 
1114  // Replace the loop branch with an endloop instruction.
1115  DebugLoc LastIDL = LastI->getDebugLoc();
1116  BuildMI(*LastMBB, LastI, LastIDL,
1117  TII->get(Hexagon::ENDLOOP0)).addMBB(LoopStart);
1118 
1119  // The loop ends with either:
1120  // - a conditional branch followed by an unconditional branch, or
1121  // - a conditional branch to the loop start.
1122  if (LastI->getOpcode() == Hexagon::JMP_t ||
1123  LastI->getOpcode() == Hexagon::JMP_f) {
1124  // Delete one and change/add an uncond. branch to out of the loop.
1125  MachineBasicBlock *BranchTarget = LastI->getOperand(1).getMBB();
1126  LastI = LastMBB->erase(LastI);
1127  if (!L->contains(BranchTarget)) {
1128  if (LastI != LastMBB->end())
1129  LastI = LastMBB->erase(LastI);
1131  TII->InsertBranch(*LastMBB, BranchTarget, 0, Cond, LastIDL);
1132  }
1133  } else {
1134  // Conditional branch to loop start; just delete it.
1135  LastMBB->erase(LastI);
1136  }
1137  delete TripCount;
1138 
1139  // The induction operation and the comparison may now be
1140  // unneeded. If these are unneeded, then remove them.
1141  for (unsigned i = 0; i < OldInsts.size(); ++i)
1142  removeIfDead(OldInsts[i]);
1143 
1144  ++NumHWLoops;
1145  return true;
1146 }
1147 
1148 
1149 bool HexagonHardwareLoops::orderBumpCompare(MachineInstr *BumpI,
1150  MachineInstr *CmpI) {
1151  assert (BumpI != CmpI && "Bump and compare in the same instruction?");
1152 
1153  MachineBasicBlock *BB = BumpI->getParent();
1154  if (CmpI->getParent() != BB)
1155  return false;
1156 
1157  typedef MachineBasicBlock::instr_iterator instr_iterator;
1158  // Check if things are in order to begin with.
1159  for (instr_iterator I = BumpI, E = BB->instr_end(); I != E; ++I)
1160  if (&*I == CmpI)
1161  return true;
1162 
1163  // Out of order.
1164  unsigned PredR = CmpI->getOperand(0).getReg();
1165  bool FoundBump = false;
1166  instr_iterator CmpIt = CmpI, NextIt = llvm::next(CmpIt);
1167  for (instr_iterator I = NextIt, E = BB->instr_end(); I != E; ++I) {
1168  MachineInstr *In = &*I;
1169  for (unsigned i = 0, n = In->getNumOperands(); i < n; ++i) {
1170  MachineOperand &MO = In->getOperand(i);
1171  if (MO.isReg() && MO.isUse()) {
1172  if (MO.getReg() == PredR) // Found an intervening use of PredR.
1173  return false;
1174  }
1175  }
1176 
1177  if (In == BumpI) {
1178  instr_iterator After = BumpI;
1179  instr_iterator From = CmpI;
1180  BB->splice(llvm::next(After), BB, From);
1181  FoundBump = true;
1182  break;
1183  }
1184  }
1185  assert (FoundBump && "Cannot determine instruction order");
1186  return FoundBump;
1187 }
1188 
1189 
1190 MachineInstr *HexagonHardwareLoops::defWithImmediate(unsigned R) {
1191  MachineInstr *DI = MRI->getVRegDef(R);
1192  unsigned DOpc = DI->getOpcode();
1193  switch (DOpc) {
1194  case Hexagon::TFRI:
1195  case Hexagon::TFRI64:
1197  case Hexagon::CONST64_Int_Real:
1198  return DI;
1199  }
1200  return 0;
1201 }
1202 
1203 
1204 int64_t HexagonHardwareLoops::getImmediate(MachineOperand &MO) {
1205  if (MO.isImm())
1206  return MO.getImm();
1207  assert(MO.isReg());
1208  unsigned R = MO.getReg();
1209  MachineInstr *DI = defWithImmediate(R);
1210  assert(DI && "Need an immediate operand");
1211  // All currently supported "define-with-immediate" instructions have the
1212  // actual immediate value in the operand(1).
1213  int64_t v = DI->getOperand(1).getImm();
1214  return v;
1215 }
1216 
1217 
1218 void HexagonHardwareLoops::setImmediate(MachineOperand &MO, int64_t Val) {
1219  if (MO.isImm()) {
1220  MO.setImm(Val);
1221  return;
1222  }
1223 
1224  assert(MO.isReg());
1225  unsigned R = MO.getReg();
1226  MachineInstr *DI = defWithImmediate(R);
1227  if (MRI->hasOneNonDBGUse(R)) {
1228  // If R has only one use, then just change its defining instruction to
1229  // the new immediate value.
1230  DI->getOperand(1).setImm(Val);
1231  return;
1232  }
1233 
1234  const TargetRegisterClass *RC = MRI->getRegClass(R);
1235  unsigned NewR = MRI->createVirtualRegister(RC);
1236  MachineBasicBlock &B = *DI->getParent();
1237  DebugLoc DL = DI->getDebugLoc();
1238  BuildMI(B, DI, DL, TII->get(DI->getOpcode()), NewR)
1239  .addImm(Val);
1240  MO.setReg(NewR);
1241 }
1242 
1243 
1244 bool HexagonHardwareLoops::fixupInductionVariable(MachineLoop *L) {
1245  MachineBasicBlock *Header = L->getHeader();
1246  MachineBasicBlock *Preheader = L->getLoopPreheader();
1247  MachineBasicBlock *Latch = L->getLoopLatch();
1248 
1249  if (!Header || !Preheader || !Latch)
1250  return false;
1251 
1252  // These data structures follow the same concept as the corresponding
1253  // ones in findInductionRegister (where some comments are).
1254  typedef std::pair<unsigned,int64_t> RegisterBump;
1255  typedef std::pair<unsigned,RegisterBump> RegisterInduction;
1256  typedef std::set<RegisterInduction> RegisterInductionSet;
1257 
1258  // Register candidates for induction variables, with their associated bumps.
1259  RegisterInductionSet IndRegs;
1260 
1261  // Look for induction patterns:
1262  // vreg1 = PHI ..., [ latch, vreg2 ]
1263  // vreg2 = ADD vreg1, imm
1264  typedef MachineBasicBlock::instr_iterator instr_iterator;
1265  for (instr_iterator I = Header->instr_begin(), E = Header->instr_end();
1266  I != E && I->isPHI(); ++I) {
1267  MachineInstr *Phi = &*I;
1268 
1269  // Have a PHI instruction.
1270  for (unsigned i = 1, n = Phi->getNumOperands(); i < n; i += 2) {
1271  if (Phi->getOperand(i+1).getMBB() != Latch)
1272  continue;
1273 
1274  unsigned PhiReg = Phi->getOperand(i).getReg();
1275  MachineInstr *DI = MRI->getVRegDef(PhiReg);
1276  unsigned UpdOpc = DI->getOpcode();
1277  bool isAdd = (UpdOpc == Hexagon::ADD_ri);
1278 
1279  if (isAdd) {
1280  // If the register operand to the add/sub is the PHI we are looking
1281  // at, this meets the induction pattern.
1282  unsigned IndReg = DI->getOperand(1).getReg();
1283  if (MRI->getVRegDef(IndReg) == Phi) {
1284  unsigned UpdReg = DI->getOperand(0).getReg();
1285  int64_t V = DI->getOperand(2).getImm();
1286  IndRegs.insert(std::make_pair(UpdReg, std::make_pair(IndReg, V)));
1287  }
1288  }
1289  } // for (i)
1290  } // for (instr)
1291 
1292  if (IndRegs.empty())
1293  return false;
1294 
1295  MachineBasicBlock *TB = 0, *FB = 0;
1297  // AnalyzeBranch returns true if it fails to analyze branch.
1298  bool NotAnalyzed = TII->AnalyzeBranch(*Latch, TB, FB, Cond, false);
1299  if (NotAnalyzed)
1300  return false;
1301 
1302  // Check if the latch branch is unconditional.
1303  if (Cond.empty())
1304  return false;
1305 
1306  if (TB != Header && FB != Header)
1307  // The latch does not go back to the header. Not a latch we know and love.
1308  return false;
1309 
1310  // Expecting a predicate register as a condition. It won't be a hardware
1311  // predicate register at this point yet, just a vreg.
1312  // HexagonInstrInfo::AnalyzeBranch for negated branches inserts imm(0)
1313  // into Cond, followed by the predicate register. For non-negated branches
1314  // it's just the register.
1315  unsigned CSz = Cond.size();
1316  if (CSz != 1 && CSz != 2)
1317  return false;
1318 
1319  unsigned P = Cond[CSz-1].getReg();
1320  MachineInstr *PredDef = MRI->getVRegDef(P);
1321 
1322  if (!PredDef->isCompare())
1323  return false;
1324 
1325  SmallSet<unsigned,2> CmpRegs;
1326  MachineOperand *CmpImmOp = 0;
1327 
1328  // Go over all operands to the compare and look for immediate and register
1329  // operands. Assume that if the compare has a single register use and a
1330  // single immediate operand, then the register is being compared with the
1331  // immediate value.
1332  for (unsigned i = 0, n = PredDef->getNumOperands(); i < n; ++i) {
1333  MachineOperand &MO = PredDef->getOperand(i);
1334  if (MO.isReg()) {
1335  // Skip all implicit references. In one case there was:
1336  // %vreg140<def> = FCMPUGT32_rr %vreg138, %vreg139, %USR<imp-use>
1337  if (MO.isImplicit())
1338  continue;
1339  if (MO.isUse()) {
1340  unsigned R = MO.getReg();
1341  if (!defWithImmediate(R)) {
1342  CmpRegs.insert(MO.getReg());
1343  continue;
1344  }
1345  // Consider the register to be the "immediate" operand.
1346  if (CmpImmOp)
1347  return false;
1348  CmpImmOp = &MO;
1349  }
1350  } else if (MO.isImm()) {
1351  if (CmpImmOp) // A second immediate argument? Confusing. Bail out.
1352  return false;
1353  CmpImmOp = &MO;
1354  }
1355  }
1356 
1357  if (CmpRegs.empty())
1358  return false;
1359 
1360  // Check if the compared register follows the order we want. Fix if needed.
1361  for (RegisterInductionSet::iterator I = IndRegs.begin(), E = IndRegs.end();
1362  I != E; ++I) {
1363  // This is a success. If the register used in the comparison is one that
1364  // we have identified as a bumped (updated) induction register, there is
1365  // nothing to do.
1366  if (CmpRegs.count(I->first))
1367  return true;
1368 
1369  // Otherwise, if the register being compared comes out of a PHI node,
1370  // and has been recognized as following the induction pattern, and is
1371  // compared against an immediate, we can fix it.
1372  const RegisterBump &RB = I->second;
1373  if (CmpRegs.count(RB.first)) {
1374  if (!CmpImmOp)
1375  return false;
1376 
1377  int64_t CmpImm = getImmediate(*CmpImmOp);
1378  int64_t V = RB.second;
1379  if (V > 0 && CmpImm+V < CmpImm) // Overflow (64-bit).
1380  return false;
1381  if (V < 0 && CmpImm+V > CmpImm) // Overflow (64-bit).
1382  return false;
1383  CmpImm += V;
1384  // Some forms of cmp-immediate allow u9 and s10. Assume the worst case
1385  // scenario, i.e. an 8-bit value.
1386  if (CmpImmOp->isImm() && !isInt<8>(CmpImm))
1387  return false;
1388 
1389  // Make sure that the compare happens after the bump. Otherwise,
1390  // after the fixup, the compare would use a yet-undefined register.
1391  MachineInstr *BumpI = MRI->getVRegDef(I->first);
1392  bool Order = orderBumpCompare(BumpI, PredDef);
1393  if (!Order)
1394  return false;
1395 
1396  // Finally, fix the compare instruction.
1397  setImmediate(*CmpImmOp, CmpImm);
1398  for (unsigned i = 0, n = PredDef->getNumOperands(); i < n; ++i) {
1399  MachineOperand &MO = PredDef->getOperand(i);
1400  if (MO.isReg() && MO.getReg() == RB.first) {
1401  MO.setReg(I->first);
1402  return true;
1403  }
1404  }
1405  }
1406  }
1407 
1408  return false;
1409 }
1410 
1411 
1412 /// \brief Create a preheader for a given loop.
1413 MachineBasicBlock *HexagonHardwareLoops::createPreheaderForLoop(
1414  MachineLoop *L) {
1415  if (MachineBasicBlock *TmpPH = L->getLoopPreheader())
1416  return TmpPH;
1417 
1418  MachineBasicBlock *Header = L->getHeader();
1419  MachineBasicBlock *Latch = L->getLoopLatch();
1420  MachineFunction *MF = Header->getParent();
1421  DebugLoc DL;
1422 
1423  if (!Latch || Header->hasAddressTaken())
1424  return 0;
1425 
1426  typedef MachineBasicBlock::instr_iterator instr_iterator;
1427 
1428  // Verify that all existing predecessors have analyzable branches
1429  // (or no branches at all).
1430  typedef std::vector<MachineBasicBlock*> MBBVector;
1431  MBBVector Preds(Header->pred_begin(), Header->pred_end());
1433  MachineBasicBlock *TB = 0, *FB = 0;
1434 
1435  if (TII->AnalyzeBranch(*Latch, TB, FB, Tmp1, false))
1436  return 0;
1437 
1438  for (MBBVector::iterator I = Preds.begin(), E = Preds.end(); I != E; ++I) {
1439  MachineBasicBlock *PB = *I;
1440  if (PB != Latch) {
1441  bool NotAnalyzed = TII->AnalyzeBranch(*PB, TB, FB, Tmp1, false);
1442  if (NotAnalyzed)
1443  return 0;
1444  }
1445  }
1446 
1448  MF->insert(Header, NewPH);
1449 
1450  if (Header->pred_size() > 2) {
1451  // Ensure that the header has only two predecessors: the preheader and
1452  // the loop latch. Any additional predecessors of the header should
1453  // join at the newly created preheader. Inspect all PHI nodes from the
1454  // header and create appropriate corresponding PHI nodes in the preheader.
1455 
1456  for (instr_iterator I = Header->instr_begin(), E = Header->instr_end();
1457  I != E && I->isPHI(); ++I) {
1458  MachineInstr *PN = &*I;
1459 
1460  const MCInstrDesc &PD = TII->get(TargetOpcode::PHI);
1461  MachineInstr *NewPN = MF->CreateMachineInstr(PD, DL);
1462  NewPH->insert(NewPH->end(), NewPN);
1463 
1464  unsigned PR = PN->getOperand(0).getReg();
1465  const TargetRegisterClass *RC = MRI->getRegClass(PR);
1466  unsigned NewPR = MRI->createVirtualRegister(RC);
1467  NewPN->addOperand(MachineOperand::CreateReg(NewPR, true));
1468 
1469  // Copy all non-latch operands of a header's PHI node to the newly
1470  // created PHI node in the preheader.
1471  for (unsigned i = 1, n = PN->getNumOperands(); i < n; i += 2) {
1472  unsigned PredR = PN->getOperand(i).getReg();
1473  MachineBasicBlock *PredB = PN->getOperand(i+1).getMBB();
1474  if (PredB == Latch)
1475  continue;
1476 
1477  NewPN->addOperand(MachineOperand::CreateReg(PredR, false));
1478  NewPN->addOperand(MachineOperand::CreateMBB(PredB));
1479  }
1480 
1481  // Remove copied operands from the old PHI node and add the value
1482  // coming from the preheader's PHI.
1483  for (int i = PN->getNumOperands()-2; i > 0; i -= 2) {
1484  MachineBasicBlock *PredB = PN->getOperand(i+1).getMBB();
1485  if (PredB != Latch) {
1486  PN->RemoveOperand(i+1);
1487  PN->RemoveOperand(i);
1488  }
1489  }
1490  PN->addOperand(MachineOperand::CreateReg(NewPR, false));
1492  }
1493 
1494  } else {
1495  assert(Header->pred_size() == 2);
1496 
1497  // The header has only two predecessors, but the non-latch predecessor
1498  // is not a preheader (e.g. it has other successors, etc.)
1499  // In such a case we don't need any extra PHI nodes in the new preheader,
1500  // all we need is to adjust existing PHIs in the header to now refer to
1501  // the new preheader.
1502  for (instr_iterator I = Header->instr_begin(), E = Header->instr_end();
1503  I != E && I->isPHI(); ++I) {
1504  MachineInstr *PN = &*I;
1505  for (unsigned i = 1, n = PN->getNumOperands(); i < n; i += 2) {
1506  MachineOperand &MO = PN->getOperand(i+1);
1507  if (MO.getMBB() != Latch)
1508  MO.setMBB(NewPH);
1509  }
1510  }
1511  }
1512 
1513  // "Reroute" the CFG edges to link in the new preheader.
1514  // If any of the predecessors falls through to the header, insert a branch
1515  // to the new preheader in that place.
1518 
1519  TB = FB = 0;
1520 
1521  for (MBBVector::iterator I = Preds.begin(), E = Preds.end(); I != E; ++I) {
1522  MachineBasicBlock *PB = *I;
1523  if (PB != Latch) {
1524  Tmp2.clear();
1525  bool NotAnalyzed = TII->AnalyzeBranch(*PB, TB, FB, Tmp2, false);
1526  (void)NotAnalyzed; // supress compiler warning
1527  assert (!NotAnalyzed && "Should be analyzable!");
1528  if (TB != Header && (Tmp2.empty() || FB != Header))
1529  TII->InsertBranch(*PB, NewPH, 0, EmptyCond, DL);
1530  PB->ReplaceUsesOfBlockWith(Header, NewPH);
1531  }
1532  }
1533 
1534  // It can happen that the latch block will fall through into the header.
1535  // Insert an unconditional branch to the header.
1536  TB = FB = 0;
1537  bool LatchNotAnalyzed = TII->AnalyzeBranch(*Latch, TB, FB, Tmp2, false);
1538  (void)LatchNotAnalyzed; // supress compiler warning
1539  assert (!LatchNotAnalyzed && "Should be analyzable!");
1540  if (!TB && !FB)
1541  TII->InsertBranch(*Latch, Header, 0, EmptyCond, DL);
1542 
1543  // Finally, the branch from the preheader to the header.
1544  TII->InsertBranch(*NewPH, Header, 0, EmptyCond, DL);
1545  NewPH->addSuccessor(Header);
1546 
1547  return NewPH;
1548 }
static bool isReg(const MCInst &MI, unsigned OpNo)
bool isImplicit() const
const MachineFunction * getParent() const
instr_iterator erase(instr_iterator I)
MachineInstr * CreateMachineInstr(const MCInstrDesc &MCID, DebugLoc DL, bool NoImp=false)
MachineInstr * getParent()
static PassRegistry * getPassRegistry()
instr_iterator instr_begin()
instr_iterator instr_end()
virtual bool AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl< MachineOperand > &Cond, bool AllowModify) const
MachineBasicBlock * getMBB() const
void initializeHexagonHardwareLoopsPass(PassRegistry &)
const MCInstrDesc & getDesc() const
Definition: MachineInstr.h:257
STATISTIC(NumHWLoops,"Number of loops converted to hardware loops")
LoopT * getParentLoop() const
Definition: LoopInfo.h:96
F(f)
FunctionPass * createHexagonHardwareLoops()
Instructions::iterator instr_iterator
const std::vector< BlockT * > & getBlocks() const
Definition: LoopInfo.h:138
BlockT * getHeader() const
Definition: LoopInfo.h:95
BlockT * getLoopLatch() const
Definition: LoopInfoImpl.h:154
INITIALIZE_PASS_BEGIN(HexagonHardwareLoops,"hwloops","Hexagon Hardware Loops", false, false) INITIALIZE_PASS_END(HexagonHardwareLoops
AnalysisUsage & addRequired()
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:167
void ReplaceUsesOfBlockWith(MachineBasicBlock *Old, MachineBasicBlock *New)
Hexagon Hardware Loops
MachineBasicBlock * getTopBlock()
const HexagonInstrInfo * TII
static MachineOperand CreateReg(unsigned Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false)
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
Definition: Use.h:60
bool isCall() const
Return true if the instruction is a call.
Definition: MCInstrDesc.h:232
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:172
bool isReg() const
isReg - Tests if this is a MO_Register operand.
bool isInt< 8 >(int64_t x)
Definition: MathExtras.h:268
int64_t abs64(int64_t x)
Definition: MathExtras.h:579
ID
LLVM Calling Convention Representation.
Definition: CallingConv.h:26
const MachineInstrBuilder & addImm(int64_t Val) const
#define G(x, y, z)
Definition: MD5.cpp:52
unsigned getNumOperands() const
Definition: MachineInstr.h:265
void RemoveOperand(unsigned i)
bool isValidOffset(const int Opcode, const int Offset) const
bool LLVM_ATTRIBUTE_UNUSED_RESULT empty() const
Definition: SmallVector.h:56
int getOpcode() const
Definition: MachineInstr.h:261
bool insert(const T &V)
Definition: SmallSet.h:59
std::vector< MachineBasicBlock * >::iterator pred_iterator
static cl::opt< int > HWLoopLimit("max-hwloop", cl::Hidden, cl::init(-1))
#define EQ(a, b)
Definition: regexec.c:112
int64_t getImm() const
void dump() const
const BasicBlock * getBasicBlock() const
const MachineBasicBlock * getParent() const
Definition: MachineInstr.h:119
bundle_iterator< MachineInstr, instr_iterator > iterator
#define P(N)
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:314
iterator begin() const
Definition: LoopInfo.h:130
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *bb=0)
BlockT * getLoopPreheader() const
Definition: LoopInfoImpl.h:106
static BlockAddress * get(Function *F, BasicBlock *BB)
get - Return a BlockAddress for the specified function and basic block.
Definition: Constants.cpp:1358
const MCInstrInfo & MII
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:267
void setMBB(MachineBasicBlock *MBB)
iterator end() const
Definition: LoopInfo.h:131
ItTy next(ItTy it, Dist n)
Definition: STLExtras.h:154
virtual unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, const SmallVectorImpl< MachineOperand > &Cond, DebugLoc DL) const
bool contains(const LoopT *L) const
Definition: LoopInfo.h:104
void setImm(int64_t immVal)
BlockT * getExitingBlock() const
Definition: LoopInfoImpl.h:49
virtual bool analyzeCompare(const MachineInstr *MI, unsigned &SrcReg, unsigned &SrcReg2, int &Mask, int &Value) const
For a comparison instruction, return the source registers in SrcReg and SrcReg2 if having two registe...
MachineInstrBuilder BuildMI(MachineFunction &MF, DebugLoc DL, const MCInstrDesc &MCID)
unsigned getSubReg() const
Hexagon Hardware static false bool isHardwareLoop(const MachineInstr *MI)
Returns true if the instruction is a hardware loop instruction.
void addOperand(MachineFunction &MF, const MachineOperand &Op)
Hexagon Hardware false
bool isSuccessor(const MachineBasicBlock *MBB) const
bool isCompare(QueryType Type=IgnoreBundle) const
isCompare - Return true if this instruction is a comparison.
Definition: MachineInstr.h:411
raw_ostream & dbgs()
dbgs - Return a circular-buffered debug stream.
Definition: Debug.cpp:101
unsigned Log2_32(uint32_t Value)
Definition: MathExtras.h:443
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:591
bool isPowerOf2_64(uint64_t Value)
Definition: MathExtras.h:360
bool count(const T &V) const
count - Return true if the element is in the set.
Definition: SmallSet.h:48
static MachineOperand CreateMBB(MachineBasicBlock *MBB, unsigned char TargetFlags=0)
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
MachineRegisterInfo & getRegInfo()
virtual void getAnalysisUsage(AnalysisUsage &AU) const
void setReg(unsigned Reg)
#define I(x, y, z)
Definition: MD5.cpp:54
static unsigned getReg(const void *D, unsigned RC, unsigned RegNo)
const TargetMachine & getTarget() const
instr_iterator insert(instr_iterator I, MachineInstr *M)
bool isInt< 16 >(int64_t x)
Definition: MathExtras.h:272
unsigned getReg() const
getReg - Returns the register number.
void insert(iterator MBBI, MachineBasicBlock *MBB)
bool isDebug() const
#define DEBUG(X)
Definition: Debug.h:97
const MCRegisterInfo & MRI
bool empty() const
Definition: SmallSet.h:42
std::vector< LoopT * >::const_iterator iterator
Definition: LoopInfo.h:127
LoopInfoBase< MachineBasicBlock, MachineLoop >::iterator iterator
const MachineInstrBuilder & addReg(unsigned RegNo, unsigned flags=0, unsigned SubReg=0) const
void addSuccessor(MachineBasicBlock *succ, uint32_t weight=0)
unsigned pred_size() const
DebugLoc getDebugLoc() const
Definition: MachineInstr.h:244