LLVM API Documentation

 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
MachineScheduler.cpp
Go to the documentation of this file.
1 //===- MachineScheduler.cpp - Machine Instruction Scheduler ---------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // MachineScheduler schedules machine instructions after phi elimination. It
11 // preserves LiveIntervals so it can be invoked before register allocation.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #define DEBUG_TYPE "misched"
16 
18 #include "llvm/ADT/OwningPtr.h"
19 #include "llvm/ADT/PriorityQueue.h"
25 #include "llvm/CodeGen/Passes.h"
30 #include "llvm/Support/Debug.h"
35 #include <queue>
36 
37 using namespace llvm;
38 
39 namespace llvm {
40 cl::opt<bool> ForceTopDown("misched-topdown", cl::Hidden,
41  cl::desc("Force top-down list scheduling"));
42 cl::opt<bool> ForceBottomUp("misched-bottomup", cl::Hidden,
43  cl::desc("Force bottom-up list scheduling"));
44 }
45 
46 #ifndef NDEBUG
47 static cl::opt<bool> ViewMISchedDAGs("view-misched-dags", cl::Hidden,
48  cl::desc("Pop up a window to show MISched dags after they are processed"));
49 
50 static cl::opt<unsigned> MISchedCutoff("misched-cutoff", cl::Hidden,
51  cl::desc("Stop scheduling after N instructions"), cl::init(~0U));
52 #else
53 static bool ViewMISchedDAGs = false;
54 #endif // NDEBUG
55 
56 static cl::opt<bool> EnableRegPressure("misched-regpressure", cl::Hidden,
57  cl::desc("Enable register pressure scheduling."), cl::init(true));
58 
59 static cl::opt<bool> EnableCyclicPath("misched-cyclicpath", cl::Hidden,
60  cl::desc("Enable cyclic critical path analysis."), cl::init(true));
61 
62 static cl::opt<bool> EnableLoadCluster("misched-cluster", cl::Hidden,
63  cl::desc("Enable load clustering."), cl::init(true));
64 
65 // Experimental heuristics
66 static cl::opt<bool> EnableMacroFusion("misched-fusion", cl::Hidden,
67  cl::desc("Enable scheduling for macro fusion."), cl::init(true));
68 
69 static cl::opt<bool> VerifyScheduling("verify-misched", cl::Hidden,
70  cl::desc("Verify machine instrs before and after machine scheduling"));
71 
72 // DAG subtrees must have at least this many nodes.
73 static const unsigned MinSubtreeSize = 8;
74 
75 // Pin the vtables to this file.
76 void MachineSchedStrategy::anchor() {}
77 void ScheduleDAGMutation::anchor() {}
78 
79 //===----------------------------------------------------------------------===//
80 // Machine Instruction Scheduling Pass and Registry
81 //===----------------------------------------------------------------------===//
82 
84  MF(0), MLI(0), MDT(0), PassConfig(0), AA(0), LIS(0) {
86 }
87 
89  delete RegClassInfo;
90 }
91 
92 namespace {
93 /// MachineScheduler runs after coalescing and before register allocation.
94 class MachineScheduler : public MachineSchedContext,
95  public MachineFunctionPass {
96 public:
97  MachineScheduler();
98 
99  virtual void getAnalysisUsage(AnalysisUsage &AU) const;
100 
101  virtual void releaseMemory() {}
102 
103  virtual bool runOnMachineFunction(MachineFunction&);
104 
105  virtual void print(raw_ostream &O, const Module* = 0) const;
106 
107  static char ID; // Class identification, replacement for typeinfo
108 
109 protected:
110  ScheduleDAGInstrs *createMachineScheduler();
111 };
112 } // namespace
113 
114 char MachineScheduler::ID = 0;
115 
117 
118 INITIALIZE_PASS_BEGIN(MachineScheduler, "misched",
119  "Machine Instruction Scheduler", false, false)
123 INITIALIZE_PASS_END(MachineScheduler, "misched",
125 
126 MachineScheduler::MachineScheduler()
129 }
130 
131 void MachineScheduler::getAnalysisUsage(AnalysisUsage &AU) const {
132  AU.setPreservesCFG();
137  AU.addRequired<SlotIndexes>();
142 }
143 
145 
146 /// A dummy default scheduler factory indicates whether the scheduler
147 /// is overridden on the command line.
149  return 0;
150 }
151 
152 /// MachineSchedOpt allows command line selection of the scheduler.
155 MachineSchedOpt("misched",
157  cl::desc("Machine instruction scheduler to use"));
158 
160 DefaultSchedRegistry("default", "Use the target's default scheduler choice.",
162 
163 /// Forward declare the standard machine scheduler. This will be used as the
164 /// default scheduler if the target does not set a default.
166 
167 
168 /// Decrement this iterator until reaching the top or a non-debug instr.
172  assert(I != Beg && "reached the top of the region, cannot decrement");
173  while (--I != Beg) {
174  if (!I->isDebugValue())
175  break;
176  }
177  return I;
178 }
179 
180 /// Non-const version.
184  return const_cast<MachineInstr*>(
186 }
187 
188 /// If this iterator is a debug value, increment until reaching the End or a
189 /// non-debug instruction.
193  for(; I != End; ++I) {
194  if (!I->isDebugValue())
195  break;
196  }
197  return I;
198 }
199 
200 /// Non-const version.
204  // Cast the return value to nonconst MachineInstr, then cast to an
205  // instr_iterator, which does not check for null, finally return a
206  // bundle_iterator.
208  const_cast<MachineInstr*>(
210 }
211 
212 /// Instantiate a ScheduleDAGInstrs that will be owned by the caller.
213 ScheduleDAGInstrs *MachineScheduler::createMachineScheduler() {
214  // Select the scheduler, or set the default.
216  if (Ctor != useDefaultMachineSched)
217  return Ctor(this);
218 
219  // Get the default scheduler set by the target for this function.
220  ScheduleDAGInstrs *Scheduler = PassConfig->createMachineScheduler(this);
221  if (Scheduler)
222  return Scheduler;
223 
224  // Default to GenericScheduler.
225  return createGenericSched(this);
226 }
227 
228 /// Top-level MachineScheduler pass driver.
229 ///
230 /// Visit blocks in function order. Divide each block into scheduling regions
231 /// and visit them bottom-up. Visiting regions bottom-up is not required, but is
232 /// consistent with the DAG builder, which traverses the interior of the
233 /// scheduling regions bottom-up.
234 ///
235 /// This design avoids exposing scheduling boundaries to the DAG builder,
236 /// simplifying the DAG builder's support for "special" target instructions.
237 /// At the same time the design allows target schedulers to operate across
238 /// scheduling boundaries, for example to bundle the boudary instructions
239 /// without reordering them. This creates complexity, because the target
240 /// scheduler must update the RegionBegin and RegionEnd positions cached by
241 /// ScheduleDAGInstrs whenever adding or removing instructions. A much simpler
242 /// design would be to split blocks at scheduling boundaries, but LLVM has a
243 /// general bias against block splitting purely for implementation simplicity.
244 bool MachineScheduler::runOnMachineFunction(MachineFunction &mf) {
245  DEBUG(dbgs() << "Before MISsched:\n"; mf.print(dbgs()));
246 
247  // Initialize the context of the pass.
248  MF = &mf;
249  MLI = &getAnalysis<MachineLoopInfo>();
250  MDT = &getAnalysis<MachineDominatorTree>();
251  PassConfig = &getAnalysis<TargetPassConfig>();
252  AA = &getAnalysis<AliasAnalysis>();
253 
254  LIS = &getAnalysis<LiveIntervals>();
255  const TargetInstrInfo *TII = MF->getTarget().getInstrInfo();
256 
257  if (VerifyScheduling) {
258  DEBUG(LIS->dump());
259  MF->verify(this, "Before machine scheduling.");
260  }
261  RegClassInfo->runOnMachineFunction(*MF);
262 
263  // Instantiate the selected scheduler for this target, function, and
264  // optimization level.
265  OwningPtr<ScheduleDAGInstrs> Scheduler(createMachineScheduler());
266 
267  // Visit all machine basic blocks.
268  //
269  // TODO: Visit blocks in global postorder or postorder within the bottom-up
270  // loop tree. Then we can optionally compute global RegPressure.
271  for (MachineFunction::iterator MBB = MF->begin(), MBBEnd = MF->end();
272  MBB != MBBEnd; ++MBB) {
273 
274  Scheduler->startBlock(MBB);
275 
276  // Break the block into scheduling regions [I, RegionEnd), and schedule each
277  // region as soon as it is discovered. RegionEnd points the scheduling
278  // boundary at the bottom of the region. The DAG does not include RegionEnd,
279  // but the region does (i.e. the next RegionEnd is above the previous
280  // RegionBegin). If the current block has no terminator then RegionEnd ==
281  // MBB->end() for the bottom region.
282  //
283  // The Scheduler may insert instructions during either schedule() or
284  // exitRegion(), even for empty regions. So the local iterators 'I' and
285  // 'RegionEnd' are invalid across these calls.
286  unsigned RemainingInstrs = MBB->size();
287  for(MachineBasicBlock::iterator RegionEnd = MBB->end();
288  RegionEnd != MBB->begin(); RegionEnd = Scheduler->begin()) {
289 
290  // Avoid decrementing RegionEnd for blocks with no terminator.
291  if (RegionEnd != MBB->end()
292  || TII->isSchedulingBoundary(llvm::prior(RegionEnd), MBB, *MF)) {
293  --RegionEnd;
294  // Count the boundary instruction.
295  --RemainingInstrs;
296  }
297 
298  // The next region starts above the previous region. Look backward in the
299  // instruction stream until we find the nearest boundary.
300  unsigned NumRegionInstrs = 0;
301  MachineBasicBlock::iterator I = RegionEnd;
302  for(;I != MBB->begin(); --I, --RemainingInstrs, ++NumRegionInstrs) {
303  if (TII->isSchedulingBoundary(llvm::prior(I), MBB, *MF))
304  break;
305  }
306  // Notify the scheduler of the region, even if we may skip scheduling
307  // it. Perhaps it still needs to be bundled.
308  Scheduler->enterRegion(MBB, I, RegionEnd, NumRegionInstrs);
309 
310  // Skip empty scheduling regions (0 or 1 schedulable instructions).
311  if (I == RegionEnd || I == llvm::prior(RegionEnd)) {
312  // Close the current region. Bundle the terminator if needed.
313  // This invalidates 'RegionEnd' and 'I'.
314  Scheduler->exitRegion();
315  continue;
316  }
317  DEBUG(dbgs() << "********** MI Scheduling **********\n");
318  DEBUG(dbgs() << MF->getName()
319  << ":BB#" << MBB->getNumber() << " " << MBB->getName()
320  << "\n From: " << *I << " To: ";
321  if (RegionEnd != MBB->end()) dbgs() << *RegionEnd;
322  else dbgs() << "End";
323  dbgs() << " RegionInstrs: " << NumRegionInstrs
324  << " Remaining: " << RemainingInstrs << "\n");
325 
326  // Schedule a region: possibly reorder instructions.
327  // This invalidates 'RegionEnd' and 'I'.
328  Scheduler->schedule();
329 
330  // Close the current region.
331  Scheduler->exitRegion();
332 
333  // Scheduling has invalidated the current iterator 'I'. Ask the
334  // scheduler for the top of it's scheduled region.
335  RegionEnd = Scheduler->begin();
336  }
337  assert(RemainingInstrs == 0 && "Instruction count mismatch!");
338  Scheduler->finishBlock();
339  }
340  Scheduler->finalizeSchedule();
341  DEBUG(LIS->dump());
342  if (VerifyScheduling)
343  MF->verify(this, "After machine scheduling.");
344  return true;
345 }
346 
347 void MachineScheduler::print(raw_ostream &O, const Module* m) const {
348  // unimplemented
349 }
350 
351 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
353  dbgs() << Name << ": ";
354  for (unsigned i = 0, e = Queue.size(); i < e; ++i)
355  dbgs() << Queue[i]->NodeNum << " ";
356  dbgs() << "\n";
357 }
358 #endif
359 
360 //===----------------------------------------------------------------------===//
361 // ScheduleDAGMI - Base class for MachineInstr scheduling with LiveIntervals
362 // preservation.
363 //===----------------------------------------------------------------------===//
364 
366  delete DFSResult;
368  delete SchedImpl;
369 }
370 
371 bool ScheduleDAGMI::canAddEdge(SUnit *SuccSU, SUnit *PredSU) {
372  return SuccSU == &ExitSU || !Topo.IsReachable(PredSU, SuccSU);
373 }
374 
375 bool ScheduleDAGMI::addEdge(SUnit *SuccSU, const SDep &PredDep) {
376  if (SuccSU != &ExitSU) {
377  // Do not use WillCreateCycle, it assumes SD scheduling.
378  // If Pred is reachable from Succ, then the edge creates a cycle.
379  if (Topo.IsReachable(PredDep.getSUnit(), SuccSU))
380  return false;
381  Topo.AddPred(SuccSU, PredDep.getSUnit());
382  }
383  SuccSU->addPred(PredDep, /*Required=*/!PredDep.isArtificial());
384  // Return true regardless of whether a new edge needed to be inserted.
385  return true;
386 }
387 
388 /// ReleaseSucc - Decrement the NumPredsLeft count of a successor. When
389 /// NumPredsLeft reaches zero, release the successor node.
390 ///
391 /// FIXME: Adjust SuccSU height based on MinLatency.
392 void ScheduleDAGMI::releaseSucc(SUnit *SU, SDep *SuccEdge) {
393  SUnit *SuccSU = SuccEdge->getSUnit();
394 
395  if (SuccEdge->isWeak()) {
396  --SuccSU->WeakPredsLeft;
397  if (SuccEdge->isCluster())
398  NextClusterSucc = SuccSU;
399  return;
400  }
401 #ifndef NDEBUG
402  if (SuccSU->NumPredsLeft == 0) {
403  dbgs() << "*** Scheduling failed! ***\n";
404  SuccSU->dump(this);
405  dbgs() << " has been released too many times!\n";
406  llvm_unreachable(0);
407  }
408 #endif
409  --SuccSU->NumPredsLeft;
410  if (SuccSU->NumPredsLeft == 0 && SuccSU != &ExitSU)
411  SchedImpl->releaseTopNode(SuccSU);
412 }
413 
414 /// releaseSuccessors - Call releaseSucc on each of SU's successors.
416  for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
417  I != E; ++I) {
418  releaseSucc(SU, &*I);
419  }
420 }
421 
422 /// ReleasePred - Decrement the NumSuccsLeft count of a predecessor. When
423 /// NumSuccsLeft reaches zero, release the predecessor node.
424 ///
425 /// FIXME: Adjust PredSU height based on MinLatency.
426 void ScheduleDAGMI::releasePred(SUnit *SU, SDep *PredEdge) {
427  SUnit *PredSU = PredEdge->getSUnit();
428 
429  if (PredEdge->isWeak()) {
430  --PredSU->WeakSuccsLeft;
431  if (PredEdge->isCluster())
432  NextClusterPred = PredSU;
433  return;
434  }
435 #ifndef NDEBUG
436  if (PredSU->NumSuccsLeft == 0) {
437  dbgs() << "*** Scheduling failed! ***\n";
438  PredSU->dump(this);
439  dbgs() << " has been released too many times!\n";
440  llvm_unreachable(0);
441  }
442 #endif
443  --PredSU->NumSuccsLeft;
444  if (PredSU->NumSuccsLeft == 0 && PredSU != &EntrySU)
445  SchedImpl->releaseBottomNode(PredSU);
446 }
447 
448 /// releasePredecessors - Call releasePred on each of SU's predecessors.
450  for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
451  I != E; ++I) {
452  releasePred(SU, &*I);
453  }
454 }
455 
456 /// This is normally called from the main scheduler loop but may also be invoked
457 /// by the scheduling strategy to perform additional code motion.
459  MachineBasicBlock::iterator InsertPos) {
460  // Advance RegionBegin if the first instruction moves down.
461  if (&*RegionBegin == MI)
462  ++RegionBegin;
463 
464  // Update the instruction stream.
465  BB->splice(InsertPos, BB, MI);
466 
467  // Update LiveIntervals
468  LIS->handleMove(MI, /*UpdateFlags=*/true);
469 
470  // Recede RegionBegin if an instruction moves above the first.
471  if (RegionBegin == InsertPos)
472  RegionBegin = MI;
473 }
474 
476 #ifndef NDEBUG
479  return false;
480  }
482 #endif
483  return true;
484 }
485 
486 /// enterRegion - Called back from MachineScheduler::runOnMachineFunction after
487 /// crossing a scheduling boundary. [begin, end) includes all instructions in
488 /// the region, including the boundary itself and single-instruction regions
489 /// that don't get scheduled.
493  unsigned regioninstrs)
494 {
495  ScheduleDAGInstrs::enterRegion(bb, begin, end, regioninstrs);
496 
497  // For convenience remember the end of the liveness region.
498  LiveRegionEnd =
499  (RegionEnd == bb->end()) ? RegionEnd : llvm::next(RegionEnd);
500 
502 
503  SchedImpl->initPolicy(begin, end, regioninstrs);
504 
506 }
507 
508 // Setup the register pressure trackers for the top scheduled top and bottom
509 // scheduled regions.
513 
514  // Close the RPTracker to finalize live ins.
516 
517  DEBUG(RPTracker.dump());
518 
519  // Initialize the live ins and live outs.
522 
523  // Close one end of the tracker so we can call
524  // getMaxUpward/DownwardPressureDelta before advancing across any
525  // instructions. This converts currently live regs into live ins/outs.
528 
530  if (!BotRPTracker.getLiveThru().empty()) {
532  DEBUG(dbgs() << "Live Thru: ";
534  };
535 
536  // For each live out vreg reduce the pressure change associated with other
537  // uses of the same vreg below the live-out reaching def.
539 
540  // Account for liveness generated by the region boundary.
541  if (LiveRegionEnd != RegionEnd) {
542  SmallVector<unsigned, 8> LiveUses;
543  BotRPTracker.recede(&LiveUses);
544  updatePressureDiffs(LiveUses);
545  }
546 
547  assert(BotRPTracker.getPos() == RegionEnd && "Can't find the region bottom");
548 
549  // Cache the list of excess pressure sets in this region. This will also track
550  // the max pressure in the scheduled code for these sets.
551  RegionCriticalPSets.clear();
552  const std::vector<unsigned> &RegionPressure =
554  for (unsigned i = 0, e = RegionPressure.size(); i < e; ++i) {
555  unsigned Limit = RegClassInfo->getRegPressureSetLimit(i);
556  if (RegionPressure[i] > Limit) {
558  << " Limit " << Limit
559  << " Actual " << RegionPressure[i] << "\n");
560  RegionCriticalPSets.push_back(PressureChange(i));
561  }
562  }
563  DEBUG(dbgs() << "Excess PSets: ";
564  for (unsigned i = 0, e = RegionCriticalPSets.size(); i != e; ++i)
566  RegionCriticalPSets[i].getPSet()) << " ";
567  dbgs() << "\n");
568 }
569 
570 void ScheduleDAGMI::
572  const std::vector<unsigned> &NewMaxPressure) {
573  const PressureDiff &PDiff = getPressureDiff(SU);
574  unsigned CritIdx = 0, CritEnd = RegionCriticalPSets.size();
575  for (PressureDiff::const_iterator I = PDiff.begin(), E = PDiff.end();
576  I != E; ++I) {
577  if (!I->isValid())
578  break;
579  unsigned ID = I->getPSet();
580  while (CritIdx != CritEnd && RegionCriticalPSets[CritIdx].getPSet() < ID)
581  ++CritIdx;
582  if (CritIdx != CritEnd && RegionCriticalPSets[CritIdx].getPSet() == ID) {
583  if ((int)NewMaxPressure[ID] > RegionCriticalPSets[CritIdx].getUnitInc()
584  && NewMaxPressure[ID] <= INT16_MAX)
585  RegionCriticalPSets[CritIdx].setUnitInc(NewMaxPressure[ID]);
586  }
587  unsigned Limit = RegClassInfo->getRegPressureSetLimit(ID);
588  if (NewMaxPressure[ID] >= Limit - 2) {
589  DEBUG(dbgs() << " " << TRI->getRegPressureSetName(ID) << ": "
590  << NewMaxPressure[ID] << " > " << Limit << "(+ "
591  << BotRPTracker.getLiveThru()[ID] << " livethru)\n");
592  }
593  }
594 }
595 
596 /// Update the PressureDiff array for liveness after scheduling this
597 /// instruction.
599  for (unsigned LUIdx = 0, LUEnd = LiveUses.size(); LUIdx != LUEnd; ++LUIdx) {
600  /// FIXME: Currently assuming single-use physregs.
601  unsigned Reg = LiveUses[LUIdx];
602  DEBUG(dbgs() << " LiveReg: " << PrintVRegOrUnit(Reg, TRI) << "\n");
603  if (!TRI->isVirtualRegister(Reg))
604  continue;
605 
606  // This may be called before CurrentBottom has been initialized. However,
607  // BotRPTracker must have a valid position. We want the value live into the
608  // instruction or live out of the block, so ask for the previous
609  // instruction's live-out.
610  const LiveInterval &LI = LIS->getInterval(Reg);
611  VNInfo *VNI;
614  if (I == BB->end())
615  VNI = LI.getVNInfoBefore(LIS->getMBBEndIdx(BB));
616  else {
617  LiveQueryResult LRQ = LI.Query(LIS->getInstructionIndex(I));
618  VNI = LRQ.valueIn();
619  }
620  // RegisterPressureTracker guarantees that readsReg is true for LiveUses.
621  assert(VNI && "No live value at use.");
623  UI = VRegUses.find(Reg); UI != VRegUses.end(); ++UI) {
624  SUnit *SU = UI->SU;
625  DEBUG(dbgs() << " UpdateRegP: SU(" << SU->NodeNum << ") "
626  << *SU->getInstr());
627  // If this use comes before the reaching def, it cannot be a last use, so
628  // descrease its pressure change.
629  if (!SU->isScheduled && SU != &ExitSU) {
630  LiveQueryResult LRQ
631  = LI.Query(LIS->getInstructionIndex(SU->getInstr()));
632  if (LRQ.valueIn() == VNI)
633  getPressureDiff(SU).addPressureChange(Reg, true, &MRI);
634  }
635  }
636  }
637 }
638 
639 /// schedule - Called back from MachineScheduler::runOnMachineFunction
640 /// after setting up the current scheduling region. [RegionBegin, RegionEnd)
641 /// only includes instructions that have DAG nodes, not scheduling boundaries.
642 ///
643 /// This is a skeletal driver, with all the functionality pushed into helpers,
644 /// so that it can be easilly extended by experimental schedulers. Generally,
645 /// implementing MachineSchedStrategy should be sufficient to implement a new
646 /// scheduling algorithm. However, if a scheduler further subclasses
647 /// ScheduleDAGMI then it will want to override this virtual method in order to
648 /// update any specialized state.
651 
653 
654  postprocessDAG();
655 
656  SmallVector<SUnit*, 8> TopRoots, BotRoots;
657  findRootsAndBiasEdges(TopRoots, BotRoots);
658 
659  // Initialize the strategy before modifying the DAG.
660  // This may initialize a DFSResult to be used for queue priority.
661  SchedImpl->initialize(this);
662 
663  DEBUG(for (unsigned su = 0, e = SUnits.size(); su != e; ++su)
664  SUnits[su].dumpAll(this));
665  if (ViewMISchedDAGs) viewGraph();
666 
667  // Initialize ready queues now that the DAG and priority data are finalized.
668  initQueues(TopRoots, BotRoots);
669 
670  bool IsTopNode = false;
671  while (SUnit *SU = SchedImpl->pickNode(IsTopNode)) {
672  assert(!SU->isScheduled && "Node already scheduled");
673  if (!checkSchedLimit())
674  break;
675 
676  scheduleMI(SU, IsTopNode);
677 
678  updateQueues(SU, IsTopNode);
679  }
680  assert(CurrentTop == CurrentBottom && "Nonempty unscheduled zone.");
681 
683 
684  DEBUG({
685  unsigned BBNum = begin()->getParent()->getNumber();
686  dbgs() << "*** Final schedule for BB#" << BBNum << " ***\n";
687  dumpSchedule();
688  dbgs() << '\n';
689  });
690 }
691 
692 /// Build the DAG and setup three register pressure trackers.
694  if (!ShouldTrackPressure) {
695  RPTracker.reset();
696  RegionCriticalPSets.clear();
697  buildSchedGraph(AA);
698  return;
699  }
700 
701  // Initialize the register pressure tracker used by buildSchedGraph.
703  /*TrackUntiedDefs=*/true);
704 
705  // Account for liveness generate by the region boundary.
706  if (LiveRegionEnd != RegionEnd)
707  RPTracker.recede();
708 
709  // Build the DAG, and compute current register pressure.
711 
712  // Initialize top/bottom trackers after computing region pressure.
713  initRegPressure();
714 }
715 
716 /// Apply each ScheduleDAGMutation step in order.
718  for (unsigned i = 0, e = Mutations.size(); i < e; ++i) {
719  Mutations[i]->apply(this);
720  }
721 }
722 
724  if (!DFSResult)
725  DFSResult = new SchedDFSResult(/*BottomU*/true, MinSubtreeSize);
726  DFSResult->clear();
728  DFSResult->resize(SUnits.size());
731 }
732 
734  SmallVectorImpl<SUnit*> &BotRoots) {
735  for (std::vector<SUnit>::iterator
736  I = SUnits.begin(), E = SUnits.end(); I != E; ++I) {
737  SUnit *SU = &(*I);
738  assert(!SU->isBoundaryNode() && "Boundary node should not be in SUnits");
739 
740  // Order predecessors so DFSResult follows the critical path.
741  SU->biasCriticalPath();
742 
743  // A SUnit is ready to top schedule if it has no predecessors.
744  if (!I->NumPredsLeft)
745  TopRoots.push_back(SU);
746  // A SUnit is ready to bottom schedule if it has no successors.
747  if (!I->NumSuccsLeft)
748  BotRoots.push_back(SU);
749  }
751 }
752 
753 /// Compute the max cyclic critical path through the DAG. The scheduling DAG
754 /// only provides the critical path for single block loops. To handle loops that
755 /// span blocks, we could use the vreg path latencies provided by
756 /// MachineTraceMetrics instead. However, MachineTraceMetrics is not currently
757 /// available for use in the scheduler.
758 ///
759 /// The cyclic path estimation identifies a def-use pair that crosses the back
760 /// edge and considers the depth and height of the nodes. For example, consider
761 /// the following instruction sequence where each instruction has unit latency
762 /// and defines an epomymous virtual register:
763 ///
764 /// a->b(a,c)->c(b)->d(c)->exit
765 ///
766 /// The cyclic critical path is a two cycles: b->c->b
767 /// The acyclic critical path is four cycles: a->b->c->d->exit
768 /// LiveOutHeight = height(c) = len(c->d->exit) = 2
769 /// LiveOutDepth = depth(c) + 1 = len(a->b->c) + 1 = 3
770 /// LiveInHeight = height(b) + 1 = len(b->c->d->exit) + 1 = 4
771 /// LiveInDepth = depth(b) = len(a->b) = 1
772 ///
773 /// LiveOutDepth - LiveInDepth = 3 - 1 = 2
774 /// LiveInHeight - LiveOutHeight = 4 - 2 = 2
775 /// CyclicCriticalPath = min(2, 2) = 2
777  // This only applies to single block loop.
778  if (!BB->isSuccessor(BB))
779  return 0;
780 
781  unsigned MaxCyclicLatency = 0;
782  // Visit each live out vreg def to find def/use pairs that cross iterations.
784  for (ArrayRef<unsigned>::iterator RI = LiveOuts.begin(), RE = LiveOuts.end();
785  RI != RE; ++RI) {
786  unsigned Reg = *RI;
787  if (!TRI->isVirtualRegister(Reg))
788  continue;
789  const LiveInterval &LI = LIS->getInterval(Reg);
790  const VNInfo *DefVNI = LI.getVNInfoBefore(LIS->getMBBEndIdx(BB));
791  if (!DefVNI)
792  continue;
793 
794  MachineInstr *DefMI = LIS->getInstructionFromIndex(DefVNI->def);
795  const SUnit *DefSU = getSUnit(DefMI);
796  if (!DefSU)
797  continue;
798 
799  unsigned LiveOutHeight = DefSU->getHeight();
800  unsigned LiveOutDepth = DefSU->getDepth() + DefSU->Latency;
801  // Visit all local users of the vreg def.
803  UI = VRegUses.find(Reg); UI != VRegUses.end(); ++UI) {
804  if (UI->SU == &ExitSU)
805  continue;
806 
807  // Only consider uses of the phi.
808  LiveQueryResult LRQ =
809  LI.Query(LIS->getInstructionIndex(UI->SU->getInstr()));
810  if (!LRQ.valueIn()->isPHIDef())
811  continue;
812 
813  // Assume that a path spanning two iterations is a cycle, which could
814  // overestimate in strange cases. This allows cyclic latency to be
815  // estimated as the minimum slack of the vreg's depth or height.
816  unsigned CyclicLatency = 0;
817  if (LiveOutDepth > UI->SU->getDepth())
818  CyclicLatency = LiveOutDepth - UI->SU->getDepth();
819 
820  unsigned LiveInHeight = UI->SU->getHeight() + DefSU->Latency;
821  if (LiveInHeight > LiveOutHeight) {
822  if (LiveInHeight - LiveOutHeight < CyclicLatency)
823  CyclicLatency = LiveInHeight - LiveOutHeight;
824  }
825  else
826  CyclicLatency = 0;
827 
828  DEBUG(dbgs() << "Cyclic Path: SU(" << DefSU->NodeNum << ") -> SU("
829  << UI->SU->NodeNum << ") = " << CyclicLatency << "c\n");
830  if (CyclicLatency > MaxCyclicLatency)
831  MaxCyclicLatency = CyclicLatency;
832  }
833  }
834  DEBUG(dbgs() << "Cyclic Critical Path: " << MaxCyclicLatency << "c\n");
835  return MaxCyclicLatency;
836 }
837 
838 /// Identify DAG roots and setup scheduler queues.
840  ArrayRef<SUnit*> BotRoots) {
841  NextClusterSucc = NULL;
842  NextClusterPred = NULL;
843 
844  // Release all DAG roots for scheduling, not including EntrySU/ExitSU.
845  //
846  // Nodes with unreleased weak edges can still be roots.
847  // Release top roots in forward order.
849  I = TopRoots.begin(), E = TopRoots.end(); I != E; ++I) {
851  }
852  // Release bottom roots in reverse order so the higher priority nodes appear
853  // first. This is more natural and slightly more efficient.
855  I = BotRoots.rbegin(), E = BotRoots.rend(); I != E; ++I) {
857  }
858 
861 
863 
864  // Advance past initial DebugValues.
867 
868  if (ShouldTrackPressure) {
869  assert(TopRPTracker.getPos() == RegionBegin && "bad initial Top tracker");
871  }
872 }
873 
874 /// Move an instruction and update register pressure.
875 void ScheduleDAGMI::scheduleMI(SUnit *SU, bool IsTopNode) {
876  // Move the instruction to its new location in the instruction stream.
877  MachineInstr *MI = SU->getInstr();
878 
879  if (IsTopNode) {
880  assert(SU->isTopReady() && "node still has unscheduled dependencies");
881  if (&*CurrentTop == MI)
883  else {
885  TopRPTracker.setPos(MI);
886  }
887 
888  if (ShouldTrackPressure) {
889  // Update top scheduled pressure.
891  assert(TopRPTracker.getPos() == CurrentTop && "out of sync");
893  }
894  }
895  else {
896  assert(SU->isBottomReady() && "node still has unscheduled dependencies");
899  if (&*priorII == MI)
900  CurrentBottom = priorII;
901  else {
902  if (&*CurrentTop == MI) {
903  CurrentTop = nextIfDebug(++CurrentTop, priorII);
905  }
907  CurrentBottom = MI;
908  }
909  if (ShouldTrackPressure) {
910  // Update bottom scheduled pressure.
911  SmallVector<unsigned, 8> LiveUses;
912  BotRPTracker.recede(&LiveUses);
913  assert(BotRPTracker.getPos() == CurrentBottom && "out of sync");
915  updatePressureDiffs(LiveUses);
916  }
917  }
918 }
919 
920 /// Update scheduler queues after scheduling an instruction.
921 void ScheduleDAGMI::updateQueues(SUnit *SU, bool IsTopNode) {
922  // Release dependent instructions for scheduling.
923  if (IsTopNode)
924  releaseSuccessors(SU);
925  else
927 
928  SU->isScheduled = true;
929 
930  if (DFSResult) {
931  unsigned SubtreeID = DFSResult->getSubtreeID(SU);
932  if (!ScheduledTrees.test(SubtreeID)) {
933  ScheduledTrees.set(SubtreeID);
934  DFSResult->scheduleTree(SubtreeID);
935  SchedImpl->scheduleTree(SubtreeID);
936  }
937  }
938 
939  // Notify the scheduling strategy after updating the DAG.
940  SchedImpl->schedNode(SU, IsTopNode);
941 }
942 
943 /// Reinsert any remaining debug_values, just like the PostRA scheduler.
945  // If first instruction was a DBG_VALUE then put it back.
946  if (FirstDbgValue) {
949  }
950 
951  for (std::vector<std::pair<MachineInstr *, MachineInstr *> >::iterator
952  DI = DbgValues.end(), DE = DbgValues.begin(); DI != DE; --DI) {
953  std::pair<MachineInstr *, MachineInstr *> P = *prior(DI);
954  MachineInstr *DbgValue = P.first;
955  MachineBasicBlock::iterator OrigPrevMI = P.second;
956  if (&*RegionBegin == DbgValue)
957  ++RegionBegin;
958  BB->splice(++OrigPrevMI, BB, DbgValue);
959  if (OrigPrevMI == llvm::prior(RegionEnd))
960  RegionEnd = DbgValue;
961  }
962  DbgValues.clear();
963  FirstDbgValue = NULL;
964 }
965 
966 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
968  for (MachineBasicBlock::iterator MI = begin(), ME = end(); MI != ME; ++MI) {
969  if (SUnit *SU = getSUnit(&(*MI)))
970  SU->dump(this);
971  else
972  dbgs() << "Missing SUnit\n";
973  }
974 }
975 #endif
976 
977 //===----------------------------------------------------------------------===//
978 // LoadClusterMutation - DAG post-processing to cluster loads.
979 //===----------------------------------------------------------------------===//
980 
981 namespace {
982 /// \brief Post-process the DAG to create cluster edges between neighboring
983 /// loads.
984 class LoadClusterMutation : public ScheduleDAGMutation {
985  struct LoadInfo {
986  SUnit *SU;
987  unsigned BaseReg;
988  unsigned Offset;
989  LoadInfo(SUnit *su, unsigned reg, unsigned ofs)
990  : SU(su), BaseReg(reg), Offset(ofs) {}
991  };
992  static bool LoadInfoLess(const LoadClusterMutation::LoadInfo &LHS,
993  const LoadClusterMutation::LoadInfo &RHS);
994 
995  const TargetInstrInfo *TII;
996  const TargetRegisterInfo *TRI;
997 public:
998  LoadClusterMutation(const TargetInstrInfo *tii,
999  const TargetRegisterInfo *tri)
1000  : TII(tii), TRI(tri) {}
1001 
1002  virtual void apply(ScheduleDAGMI *DAG);
1003 protected:
1004  void clusterNeighboringLoads(ArrayRef<SUnit*> Loads, ScheduleDAGMI *DAG);
1005 };
1006 } // anonymous
1007 
1008 bool LoadClusterMutation::LoadInfoLess(
1009  const LoadClusterMutation::LoadInfo &LHS,
1010  const LoadClusterMutation::LoadInfo &RHS) {
1011  if (LHS.BaseReg != RHS.BaseReg)
1012  return LHS.BaseReg < RHS.BaseReg;
1013  return LHS.Offset < RHS.Offset;
1014 }
1015 
1016 void LoadClusterMutation::clusterNeighboringLoads(ArrayRef<SUnit*> Loads,
1017  ScheduleDAGMI *DAG) {
1019  for (unsigned Idx = 0, End = Loads.size(); Idx != End; ++Idx) {
1020  SUnit *SU = Loads[Idx];
1021  unsigned BaseReg;
1022  unsigned Offset;
1023  if (TII->getLdStBaseRegImmOfs(SU->getInstr(), BaseReg, Offset, TRI))
1024  LoadRecords.push_back(LoadInfo(SU, BaseReg, Offset));
1025  }
1026  if (LoadRecords.size() < 2)
1027  return;
1028  std::sort(LoadRecords.begin(), LoadRecords.end(), LoadInfoLess);
1029  unsigned ClusterLength = 1;
1030  for (unsigned Idx = 0, End = LoadRecords.size(); Idx < (End - 1); ++Idx) {
1031  if (LoadRecords[Idx].BaseReg != LoadRecords[Idx+1].BaseReg) {
1032  ClusterLength = 1;
1033  continue;
1034  }
1035 
1036  SUnit *SUa = LoadRecords[Idx].SU;
1037  SUnit *SUb = LoadRecords[Idx+1].SU;
1038  if (TII->shouldClusterLoads(SUa->getInstr(), SUb->getInstr(), ClusterLength)
1039  && DAG->addEdge(SUb, SDep(SUa, SDep::Cluster))) {
1040 
1041  DEBUG(dbgs() << "Cluster loads SU(" << SUa->NodeNum << ") - SU("
1042  << SUb->NodeNum << ")\n");
1043  // Copy successor edges from SUa to SUb. Interleaving computation
1044  // dependent on SUa can prevent load combining due to register reuse.
1045  // Predecessor edges do not need to be copied from SUb to SUa since nearby
1046  // loads should have effectively the same inputs.
1048  SI = SUa->Succs.begin(), SE = SUa->Succs.end(); SI != SE; ++SI) {
1049  if (SI->getSUnit() == SUb)
1050  continue;
1051  DEBUG(dbgs() << " Copy Succ SU(" << SI->getSUnit()->NodeNum << ")\n");
1052  DAG->addEdge(SI->getSUnit(), SDep(SUb, SDep::Artificial));
1053  }
1054  ++ClusterLength;
1055  }
1056  else
1057  ClusterLength = 1;
1058  }
1059 }
1060 
1061 /// \brief Callback from DAG postProcessing to create cluster edges for loads.
1063  // Map DAG NodeNum to store chain ID.
1064  DenseMap<unsigned, unsigned> StoreChainIDs;
1065  // Map each store chain to a set of dependent loads.
1066  SmallVector<SmallVector<SUnit*,4>, 32> StoreChainDependents;
1067  for (unsigned Idx = 0, End = DAG->SUnits.size(); Idx != End; ++Idx) {
1068  SUnit *SU = &DAG->SUnits[Idx];
1069  if (!SU->getInstr()->mayLoad())
1070  continue;
1071  unsigned ChainPredID = DAG->SUnits.size();
1073  PI = SU->Preds.begin(), PE = SU->Preds.end(); PI != PE; ++PI) {
1074  if (PI->isCtrl()) {
1075  ChainPredID = PI->getSUnit()->NodeNum;
1076  break;
1077  }
1078  }
1079  // Check if this chain-like pred has been seen
1080  // before. ChainPredID==MaxNodeID for loads at the top of the schedule.
1081  unsigned NumChains = StoreChainDependents.size();
1082  std::pair<DenseMap<unsigned, unsigned>::iterator, bool> Result =
1083  StoreChainIDs.insert(std::make_pair(ChainPredID, NumChains));
1084  if (Result.second)
1085  StoreChainDependents.resize(NumChains + 1);
1086  StoreChainDependents[Result.first->second].push_back(SU);
1087  }
1088  // Iterate over the store chains.
1089  for (unsigned Idx = 0, End = StoreChainDependents.size(); Idx != End; ++Idx)
1090  clusterNeighboringLoads(StoreChainDependents[Idx], DAG);
1091 }
1092 
1093 //===----------------------------------------------------------------------===//
1094 // MacroFusion - DAG post-processing to encourage fusion of macro ops.
1095 //===----------------------------------------------------------------------===//
1096 
1097 namespace {
1098 /// \brief Post-process the DAG to create cluster edges between instructions
1099 /// that may be fused by the processor into a single operation.
1100 class MacroFusion : public ScheduleDAGMutation {
1101  const TargetInstrInfo *TII;
1102 public:
1103  MacroFusion(const TargetInstrInfo *tii): TII(tii) {}
1104 
1105  virtual void apply(ScheduleDAGMI *DAG);
1106 };
1107 } // anonymous
1108 
1109 /// \brief Callback from DAG postProcessing to create cluster edges to encourage
1110 /// fused operations.
1111 void MacroFusion::apply(ScheduleDAGMI *DAG) {
1112  // For now, assume targets can only fuse with the branch.
1113  MachineInstr *Branch = DAG->ExitSU.getInstr();
1114  if (!Branch)
1115  return;
1116 
1117  for (unsigned Idx = DAG->SUnits.size(); Idx > 0;) {
1118  SUnit *SU = &DAG->SUnits[--Idx];
1119  if (!TII->shouldScheduleAdjacent(SU->getInstr(), Branch))
1120  continue;
1121 
1122  // Create a single weak edge from SU to ExitSU. The only effect is to cause
1123  // bottom-up scheduling to heavily prioritize the clustered SU. There is no
1124  // need to copy predecessor edges from ExitSU to SU, since top-down
1125  // scheduling cannot prioritize ExitSU anyway. To defer top-down scheduling
1126  // of SU, we could create an artificial edge from the deepest root, but it
1127  // hasn't been needed yet.
1128  bool Success = DAG->addEdge(&DAG->ExitSU, SDep(SU, SDep::Cluster));
1129  (void)Success;
1130  assert(Success && "No DAG nodes should be reachable from ExitSU");
1131 
1132  DEBUG(dbgs() << "Macro Fuse SU(" << SU->NodeNum << ")\n");
1133  break;
1134  }
1135 }
1136 
1137 //===----------------------------------------------------------------------===//
1138 // CopyConstrain - DAG post-processing to encourage copy elimination.
1139 //===----------------------------------------------------------------------===//
1140 
1141 namespace {
1142 /// \brief Post-process the DAG to create weak edges from all uses of a copy to
1143 /// the one use that defines the copy's source vreg, most likely an induction
1144 /// variable increment.
1145 class CopyConstrain : public ScheduleDAGMutation {
1146  // Transient state.
1147  SlotIndex RegionBeginIdx;
1148  // RegionEndIdx is the slot index of the last non-debug instruction in the
1149  // scheduling region. So we may have RegionBeginIdx == RegionEndIdx.
1150  SlotIndex RegionEndIdx;
1151 public:
1152  CopyConstrain(const TargetInstrInfo *, const TargetRegisterInfo *) {}
1153 
1154  virtual void apply(ScheduleDAGMI *DAG);
1155 
1156 protected:
1157  void constrainLocalCopy(SUnit *CopySU, ScheduleDAGMI *DAG);
1158 };
1159 } // anonymous
1160 
1161 /// constrainLocalCopy handles two possibilities:
1162 /// 1) Local src:
1163 /// I0: = dst
1164 /// I1: src = ...
1165 /// I2: = dst
1166 /// I3: dst = src (copy)
1167 /// (create pred->succ edges I0->I1, I2->I1)
1168 ///
1169 /// 2) Local copy:
1170 /// I0: dst = src (copy)
1171 /// I1: = dst
1172 /// I2: src = ...
1173 /// I3: = dst
1174 /// (create pred->succ edges I1->I2, I3->I2)
1175 ///
1176 /// Although the MachineScheduler is currently constrained to single blocks,
1177 /// this algorithm should handle extended blocks. An EBB is a set of
1178 /// contiguously numbered blocks such that the previous block in the EBB is
1179 /// always the single predecessor.
1180 void CopyConstrain::constrainLocalCopy(SUnit *CopySU, ScheduleDAGMI *DAG) {
1181  LiveIntervals *LIS = DAG->getLIS();
1182  MachineInstr *Copy = CopySU->getInstr();
1183 
1184  // Check for pure vreg copies.
1185  unsigned SrcReg = Copy->getOperand(1).getReg();
1187  return;
1188 
1189  unsigned DstReg = Copy->getOperand(0).getReg();
1191  return;
1192 
1193  // Check if either the dest or source is local. If it's live across a back
1194  // edge, it's not local. Note that if both vregs are live across the back
1195  // edge, we cannot successfully contrain the copy without cyclic scheduling.
1196  unsigned LocalReg = DstReg;
1197  unsigned GlobalReg = SrcReg;
1198  LiveInterval *LocalLI = &LIS->getInterval(LocalReg);
1199  if (!LocalLI->isLocal(RegionBeginIdx, RegionEndIdx)) {
1200  LocalReg = SrcReg;
1201  GlobalReg = DstReg;
1202  LocalLI = &LIS->getInterval(LocalReg);
1203  if (!LocalLI->isLocal(RegionBeginIdx, RegionEndIdx))
1204  return;
1205  }
1206  LiveInterval *GlobalLI = &LIS->getInterval(GlobalReg);
1207 
1208  // Find the global segment after the start of the local LI.
1209  LiveInterval::iterator GlobalSegment = GlobalLI->find(LocalLI->beginIndex());
1210  // If GlobalLI does not overlap LocalLI->start, then a copy directly feeds a
1211  // local live range. We could create edges from other global uses to the local
1212  // start, but the coalescer should have already eliminated these cases, so
1213  // don't bother dealing with it.
1214  if (GlobalSegment == GlobalLI->end())
1215  return;
1216 
1217  // If GlobalSegment is killed at the LocalLI->start, the call to find()
1218  // returned the next global segment. But if GlobalSegment overlaps with
1219  // LocalLI->start, then advance to the next segement. If a hole in GlobalLI
1220  // exists in LocalLI's vicinity, GlobalSegment will be the end of the hole.
1221  if (GlobalSegment->contains(LocalLI->beginIndex()))
1222  ++GlobalSegment;
1223 
1224  if (GlobalSegment == GlobalLI->end())
1225  return;
1226 
1227  // Check if GlobalLI contains a hole in the vicinity of LocalLI.
1228  if (GlobalSegment != GlobalLI->begin()) {
1229  // Two address defs have no hole.
1230  if (SlotIndex::isSameInstr(llvm::prior(GlobalSegment)->end,
1231  GlobalSegment->start)) {
1232  return;
1233  }
1234  // If the prior global segment may be defined by the same two-address
1235  // instruction that also defines LocalLI, then can't make a hole here.
1236  if (SlotIndex::isSameInstr(llvm::prior(GlobalSegment)->start,
1237  LocalLI->beginIndex())) {
1238  return;
1239  }
1240  // If GlobalLI has a prior segment, it must be live into the EBB. Otherwise
1241  // it would be a disconnected component in the live range.
1242  assert(llvm::prior(GlobalSegment)->start < LocalLI->beginIndex() &&
1243  "Disconnected LRG within the scheduling region.");
1244  }
1245  MachineInstr *GlobalDef = LIS->getInstructionFromIndex(GlobalSegment->start);
1246  if (!GlobalDef)
1247  return;
1248 
1249  SUnit *GlobalSU = DAG->getSUnit(GlobalDef);
1250  if (!GlobalSU)
1251  return;
1252 
1253  // GlobalDef is the bottom of the GlobalLI hole. Open the hole by
1254  // constraining the uses of the last local def to precede GlobalDef.
1255  SmallVector<SUnit*,8> LocalUses;
1256  const VNInfo *LastLocalVN = LocalLI->getVNInfoBefore(LocalLI->endIndex());
1257  MachineInstr *LastLocalDef = LIS->getInstructionFromIndex(LastLocalVN->def);
1258  SUnit *LastLocalSU = DAG->getSUnit(LastLocalDef);
1260  I = LastLocalSU->Succs.begin(), E = LastLocalSU->Succs.end();
1261  I != E; ++I) {
1262  if (I->getKind() != SDep::Data || I->getReg() != LocalReg)
1263  continue;
1264  if (I->getSUnit() == GlobalSU)
1265  continue;
1266  if (!DAG->canAddEdge(GlobalSU, I->getSUnit()))
1267  return;
1268  LocalUses.push_back(I->getSUnit());
1269  }
1270  // Open the top of the GlobalLI hole by constraining any earlier global uses
1271  // to precede the start of LocalLI.
1272  SmallVector<SUnit*,8> GlobalUses;
1273  MachineInstr *FirstLocalDef =
1274  LIS->getInstructionFromIndex(LocalLI->beginIndex());
1275  SUnit *FirstLocalSU = DAG->getSUnit(FirstLocalDef);
1277  I = GlobalSU->Preds.begin(), E = GlobalSU->Preds.end(); I != E; ++I) {
1278  if (I->getKind() != SDep::Anti || I->getReg() != GlobalReg)
1279  continue;
1280  if (I->getSUnit() == FirstLocalSU)
1281  continue;
1282  if (!DAG->canAddEdge(FirstLocalSU, I->getSUnit()))
1283  return;
1284  GlobalUses.push_back(I->getSUnit());
1285  }
1286  DEBUG(dbgs() << "Constraining copy SU(" << CopySU->NodeNum << ")\n");
1287  // Add the weak edges.
1289  I = LocalUses.begin(), E = LocalUses.end(); I != E; ++I) {
1290  DEBUG(dbgs() << " Local use SU(" << (*I)->NodeNum << ") -> SU("
1291  << GlobalSU->NodeNum << ")\n");
1292  DAG->addEdge(GlobalSU, SDep(*I, SDep::Weak));
1293  }
1295  I = GlobalUses.begin(), E = GlobalUses.end(); I != E; ++I) {
1296  DEBUG(dbgs() << " Global use SU(" << (*I)->NodeNum << ") -> SU("
1297  << FirstLocalSU->NodeNum << ")\n");
1298  DAG->addEdge(FirstLocalSU, SDep(*I, SDep::Weak));
1299  }
1300 }
1301 
1302 /// \brief Callback from DAG postProcessing to create weak edges to encourage
1303 /// copy elimination.
1305  MachineBasicBlock::iterator FirstPos = nextIfDebug(DAG->begin(), DAG->end());
1306  if (FirstPos == DAG->end())
1307  return;
1308  RegionBeginIdx = DAG->getLIS()->getInstructionIndex(&*FirstPos);
1309  RegionEndIdx = DAG->getLIS()->getInstructionIndex(
1310  &*priorNonDebug(DAG->end(), DAG->begin()));
1311 
1312  for (unsigned Idx = 0, End = DAG->SUnits.size(); Idx != End; ++Idx) {
1313  SUnit *SU = &DAG->SUnits[Idx];
1314  if (!SU->getInstr()->isCopy())
1315  continue;
1316 
1317  constrainLocalCopy(SU, DAG);
1318  }
1319 }
1320 
1321 //===----------------------------------------------------------------------===//
1322 // GenericScheduler - Implementation of the generic MachineSchedStrategy.
1323 //===----------------------------------------------------------------------===//
1324 
1325 namespace {
1326 /// GenericScheduler shrinks the unscheduled zone using heuristics to balance
1327 /// the schedule.
1328 class GenericScheduler : public MachineSchedStrategy {
1329 public:
1330  /// Represent the type of SchedCandidate found within a single queue.
1331  /// pickNodeBidirectional depends on these listed by decreasing priority.
1332  enum CandReason {
1333  NoCand, PhysRegCopy, RegExcess, RegCritical, Cluster, Weak, RegMax,
1334  ResourceReduce, ResourceDemand, BotHeightReduce, BotPathReduce,
1335  TopDepthReduce, TopPathReduce, NextDefUse, NodeOrder};
1336 
1337 #ifndef NDEBUG
1338  static const char *getReasonStr(GenericScheduler::CandReason Reason);
1339 #endif
1340 
1341  /// Policy for scheduling the next instruction in the candidate's zone.
1342  struct CandPolicy {
1343  bool ReduceLatency;
1344  unsigned ReduceResIdx;
1345  unsigned DemandResIdx;
1346 
1347  CandPolicy(): ReduceLatency(false), ReduceResIdx(0), DemandResIdx(0) {}
1348  };
1349 
1350  /// Status of an instruction's critical resource consumption.
1351  struct SchedResourceDelta {
1352  // Count critical resources in the scheduled region required by SU.
1353  unsigned CritResources;
1354 
1355  // Count critical resources from another region consumed by SU.
1356  unsigned DemandedResources;
1357 
1358  SchedResourceDelta(): CritResources(0), DemandedResources(0) {}
1359 
1360  bool operator==(const SchedResourceDelta &RHS) const {
1361  return CritResources == RHS.CritResources
1362  && DemandedResources == RHS.DemandedResources;
1363  }
1364  bool operator!=(const SchedResourceDelta &RHS) const {
1365  return !operator==(RHS);
1366  }
1367  };
1368 
1369  /// Store the state used by GenericScheduler heuristics, required for the
1370  /// lifetime of one invocation of pickNode().
1371  struct SchedCandidate {
1372  CandPolicy Policy;
1373 
1374  // The best SUnit candidate.
1375  SUnit *SU;
1376 
1377  // The reason for this candidate.
1378  CandReason Reason;
1379 
1380  // Set of reasons that apply to multiple candidates.
1381  uint32_t RepeatReasonSet;
1382 
1383  // Register pressure values for the best candidate.
1384  RegPressureDelta RPDelta;
1385 
1386  // Critical resource consumption of the best candidate.
1387  SchedResourceDelta ResDelta;
1388 
1389  SchedCandidate(const CandPolicy &policy)
1390  : Policy(policy), SU(NULL), Reason(NoCand), RepeatReasonSet(0) {}
1391 
1392  bool isValid() const { return SU; }
1393 
1394  // Copy the status of another candidate without changing policy.
1395  void setBest(SchedCandidate &Best) {
1396  assert(Best.Reason != NoCand && "uninitialized Sched candidate");
1397  SU = Best.SU;
1398  Reason = Best.Reason;
1399  RPDelta = Best.RPDelta;
1400  ResDelta = Best.ResDelta;
1401  }
1402 
1403  bool isRepeat(CandReason R) { return RepeatReasonSet & (1 << R); }
1404  void setRepeat(CandReason R) { RepeatReasonSet |= (1 << R); }
1405 
1406  void initResourceDelta(const ScheduleDAGMI *DAG,
1407  const TargetSchedModel *SchedModel);
1408  };
1409 
1410  /// Summarize the unscheduled region.
1411  struct SchedRemainder {
1412  // Critical path through the DAG in expected latency.
1413  unsigned CriticalPath;
1414  unsigned CyclicCritPath;
1415 
1416  // Scaled count of micro-ops left to schedule.
1417  unsigned RemIssueCount;
1418 
1419  bool IsAcyclicLatencyLimited;
1420 
1421  // Unscheduled resources
1422  SmallVector<unsigned, 16> RemainingCounts;
1423 
1424  void reset() {
1425  CriticalPath = 0;
1426  CyclicCritPath = 0;
1427  RemIssueCount = 0;
1428  IsAcyclicLatencyLimited = false;
1429  RemainingCounts.clear();
1430  }
1431 
1432  SchedRemainder() { reset(); }
1433 
1434  void init(ScheduleDAGMI *DAG, const TargetSchedModel *SchedModel);
1435  };
1436 
1437  /// Each Scheduling boundary is associated with ready queues. It tracks the
1438  /// current cycle in the direction of movement, and maintains the state
1439  /// of "hazards" and other interlocks at the current cycle.
1440  struct SchedBoundary {
1441  ScheduleDAGMI *DAG;
1442  const TargetSchedModel *SchedModel;
1443  SchedRemainder *Rem;
1444 
1445  ReadyQueue Available;
1446  ReadyQueue Pending;
1447  bool CheckPending;
1448 
1449  // For heuristics, keep a list of the nodes that immediately depend on the
1450  // most recently scheduled node.
1452 
1453  ScheduleHazardRecognizer *HazardRec;
1454 
1455  /// Number of cycles it takes to issue the instructions scheduled in this
1456  /// zone. It is defined as: scheduled-micro-ops / issue-width + stalls.
1457  /// See getStalls().
1458  unsigned CurrCycle;
1459 
1460  /// Micro-ops issued in the current cycle
1461  unsigned CurrMOps;
1462 
1463  /// MinReadyCycle - Cycle of the soonest available instruction.
1464  unsigned MinReadyCycle;
1465 
1466  // The expected latency of the critical path in this scheduled zone.
1467  unsigned ExpectedLatency;
1468 
1469  // The latency of dependence chains leading into this zone.
1470  // For each node scheduled bottom-up: DLat = max DLat, N.Depth.
1471  // For each cycle scheduled: DLat -= 1.
1472  unsigned DependentLatency;
1473 
1474  /// Count the scheduled (issued) micro-ops that can be retired by
1475  /// time=CurrCycle assuming the first scheduled instr is retired at time=0.
1476  unsigned RetiredMOps;
1477 
1478  // Count scheduled resources that have been executed. Resources are
1479  // considered executed if they become ready in the time that it takes to
1480  // saturate any resource including the one in question. Counts are scaled
1481  // for direct comparison with other resources. Counts can be compared with
1482  // MOps * getMicroOpFactor and Latency * getLatencyFactor.
1483  SmallVector<unsigned, 16> ExecutedResCounts;
1484 
1485  /// Cache the max count for a single resource.
1486  unsigned MaxExecutedResCount;
1487 
1488  // Cache the critical resources ID in this scheduled zone.
1489  unsigned ZoneCritResIdx;
1490 
1491  // Is the scheduled region resource limited vs. latency limited.
1492  bool IsResourceLimited;
1493 
1494 #ifndef NDEBUG
1495  // Remember the greatest operand latency as an upper bound on the number of
1496  // times we should retry the pending queue because of a hazard.
1497  unsigned MaxObservedLatency;
1498 #endif
1499 
1500  void reset() {
1501  // A new HazardRec is created for each DAG and owned by SchedBoundary.
1502  // Destroying and reconstructing it is very expensive though. So keep
1503  // invalid, placeholder HazardRecs.
1504  if (HazardRec && HazardRec->isEnabled()) {
1505  delete HazardRec;
1506  HazardRec = 0;
1507  }
1508  Available.clear();
1509  Pending.clear();
1510  CheckPending = false;
1511  NextSUs.clear();
1512  CurrCycle = 0;
1513  CurrMOps = 0;
1514  MinReadyCycle = UINT_MAX;
1515  ExpectedLatency = 0;
1516  DependentLatency = 0;
1517  RetiredMOps = 0;
1518  MaxExecutedResCount = 0;
1519  ZoneCritResIdx = 0;
1520  IsResourceLimited = false;
1521 #ifndef NDEBUG
1522  MaxObservedLatency = 0;
1523 #endif
1524  // Reserve a zero-count for invalid CritResIdx.
1525  ExecutedResCounts.resize(1);
1526  assert(!ExecutedResCounts[0] && "nonzero count for bad resource");
1527  }
1528 
1529  /// Pending queues extend the ready queues with the same ID and the
1530  /// PendingFlag set.
1531  SchedBoundary(unsigned ID, const Twine &Name):
1532  DAG(0), SchedModel(0), Rem(0), Available(ID, Name+".A"),
1533  Pending(ID << GenericScheduler::LogMaxQID, Name+".P"),
1534  HazardRec(0) {
1535  reset();
1536  }
1537 
1538  ~SchedBoundary() { delete HazardRec; }
1539 
1540  void init(ScheduleDAGMI *dag, const TargetSchedModel *smodel,
1541  SchedRemainder *rem);
1542 
1543  bool isTop() const {
1544  return Available.getID() == GenericScheduler::TopQID;
1545  }
1546 
1547 #ifndef NDEBUG
1548  const char *getResourceName(unsigned PIdx) {
1549  if (!PIdx)
1550  return "MOps";
1551  return SchedModel->getProcResource(PIdx)->Name;
1552  }
1553 #endif
1554 
1555  /// Get the number of latency cycles "covered" by the scheduled
1556  /// instructions. This is the larger of the critical path within the zone
1557  /// and the number of cycles required to issue the instructions.
1558  unsigned getScheduledLatency() const {
1559  return std::max(ExpectedLatency, CurrCycle);
1560  }
1561 
1562  unsigned getUnscheduledLatency(SUnit *SU) const {
1563  return isTop() ? SU->getHeight() : SU->getDepth();
1564  }
1565 
1566  unsigned getResourceCount(unsigned ResIdx) const {
1567  return ExecutedResCounts[ResIdx];
1568  }
1569 
1570  /// Get the scaled count of scheduled micro-ops and resources, including
1571  /// executed resources.
1572  unsigned getCriticalCount() const {
1573  if (!ZoneCritResIdx)
1574  return RetiredMOps * SchedModel->getMicroOpFactor();
1575  return getResourceCount(ZoneCritResIdx);
1576  }
1577 
1578  /// Get a scaled count for the minimum execution time of the scheduled
1579  /// micro-ops that are ready to execute by getExecutedCount. Notice the
1580  /// feedback loop.
1581  unsigned getExecutedCount() const {
1582  return std::max(CurrCycle * SchedModel->getLatencyFactor(),
1583  MaxExecutedResCount);
1584  }
1585 
1586  bool checkHazard(SUnit *SU);
1587 
1588  unsigned findMaxLatency(ArrayRef<SUnit*> ReadySUs);
1589 
1590  unsigned getOtherResourceCount(unsigned &OtherCritIdx);
1591 
1592  void setPolicy(CandPolicy &Policy, SchedBoundary &OtherZone);
1593 
1594  void releaseNode(SUnit *SU, unsigned ReadyCycle);
1595 
1596  void bumpCycle(unsigned NextCycle);
1597 
1598  void incExecutedResources(unsigned PIdx, unsigned Count);
1599 
1600  unsigned countResource(unsigned PIdx, unsigned Cycles, unsigned ReadyCycle);
1601 
1602  void bumpNode(SUnit *SU);
1603 
1604  void releasePending();
1605 
1606  void removeReady(SUnit *SU);
1607 
1608  SUnit *pickOnlyChoice();
1609 
1610 #ifndef NDEBUG
1611  void dumpScheduledState();
1612 #endif
1613  };
1614 
1615 private:
1616  const MachineSchedContext *Context;
1617  ScheduleDAGMI *DAG;
1618  const TargetSchedModel *SchedModel;
1619  const TargetRegisterInfo *TRI;
1620 
1621  // State of the top and bottom scheduled instruction boundaries.
1622  SchedRemainder Rem;
1623  SchedBoundary Top;
1624  SchedBoundary Bot;
1625 
1626  MachineSchedPolicy RegionPolicy;
1627 public:
1628  /// SUnit::NodeQueueId: 0 (none), 1 (top), 2 (bot), 3 (both)
1629  enum {
1630  TopQID = 1,
1631  BotQID = 2,
1632  LogMaxQID = 2
1633  };
1634 
1635  GenericScheduler(const MachineSchedContext *C):
1636  Context(C), DAG(0), SchedModel(0), TRI(0),
1637  Top(TopQID, "TopQ"), Bot(BotQID, "BotQ") {}
1638 
1639  virtual void initPolicy(MachineBasicBlock::iterator Begin,
1641  unsigned NumRegionInstrs);
1642 
1643  bool shouldTrackPressure() const { return RegionPolicy.ShouldTrackPressure; }
1644 
1645  virtual void initialize(ScheduleDAGMI *dag);
1646 
1647  virtual SUnit *pickNode(bool &IsTopNode);
1648 
1649  virtual void schedNode(SUnit *SU, bool IsTopNode);
1650 
1651  virtual void releaseTopNode(SUnit *SU);
1652 
1653  virtual void releaseBottomNode(SUnit *SU);
1654 
1655  virtual void registerRoots();
1656 
1657 protected:
1658  void checkAcyclicLatency();
1659 
1660  void tryCandidate(SchedCandidate &Cand,
1661  SchedCandidate &TryCand,
1662  SchedBoundary &Zone,
1663  const RegPressureTracker &RPTracker,
1664  RegPressureTracker &TempTracker);
1665 
1666  SUnit *pickNodeBidirectional(bool &IsTopNode);
1667 
1668  void pickNodeFromQueue(SchedBoundary &Zone,
1669  const RegPressureTracker &RPTracker,
1670  SchedCandidate &Candidate);
1671 
1672  void reschedulePhysRegCopies(SUnit *SU, bool isTop);
1673 
1674 #ifndef NDEBUG
1675  void traceCandidate(const SchedCandidate &Cand);
1676 #endif
1677 };
1678 } // namespace
1679 
1681 init(ScheduleDAGMI *DAG, const TargetSchedModel *SchedModel) {
1682  reset();
1683  if (!SchedModel->hasInstrSchedModel())
1684  return;
1685  RemainingCounts.resize(SchedModel->getNumProcResourceKinds());
1686  for (std::vector<SUnit>::iterator
1687  I = DAG->SUnits.begin(), E = DAG->SUnits.end(); I != E; ++I) {
1688  const MCSchedClassDesc *SC = DAG->getSchedClass(&*I);
1689  RemIssueCount += SchedModel->getNumMicroOps(I->getInstr(), SC)
1690  * SchedModel->getMicroOpFactor();
1692  PI = SchedModel->getWriteProcResBegin(SC),
1693  PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
1694  unsigned PIdx = PI->ProcResourceIdx;
1695  unsigned Factor = SchedModel->getResourceFactor(PIdx);
1696  RemainingCounts[PIdx] += (Factor * PI->Cycles);
1697  }
1698  }
1699 }
1700 
1702 init(ScheduleDAGMI *dag, const TargetSchedModel *smodel, SchedRemainder *rem) {
1703  reset();
1704  DAG = dag;
1705  SchedModel = smodel;
1706  Rem = rem;
1707  if (SchedModel->hasInstrSchedModel())
1708  ExecutedResCounts.resize(SchedModel->getNumProcResourceKinds());
1709 }
1710 
1711 /// Initialize the per-region scheduling policy.
1712 void GenericScheduler::initPolicy(MachineBasicBlock::iterator Begin,
1714  unsigned NumRegionInstrs) {
1715  const TargetMachine &TM = Context->MF->getTarget();
1716 
1717  // Avoid setting up the register pressure tracker for small regions to save
1718  // compile time. As a rough heuristic, only track pressure when the number of
1719  // schedulable instructions exceeds half the integer register file.
1720  unsigned NIntRegs = Context->RegClassInfo->getNumAllocatableRegs(
1722 
1723  RegionPolicy.ShouldTrackPressure = NumRegionInstrs > (NIntRegs / 2);
1724 
1725  // For generic targets, we default to bottom-up, because it's simpler and more
1726  // compile-time optimizations have been implemented in that direction.
1727  RegionPolicy.OnlyBottomUp = true;
1728 
1729  // Allow the subtarget to override default policy.
1731  ST.overrideSchedPolicy(RegionPolicy, Begin, End, NumRegionInstrs);
1732 
1733  // After subtarget overrides, apply command line options.
1734  if (!EnableRegPressure)
1735  RegionPolicy.ShouldTrackPressure = false;
1736 
1737  // Check -misched-topdown/bottomup can force or unforce scheduling direction.
1738  // e.g. -misched-bottomup=false allows scheduling in both directions.
1739  assert((!ForceTopDown || !ForceBottomUp) &&
1740  "-misched-topdown incompatible with -misched-bottomup");
1741  if (ForceBottomUp.getNumOccurrences() > 0) {
1742  RegionPolicy.OnlyBottomUp = ForceBottomUp;
1743  if (RegionPolicy.OnlyBottomUp)
1744  RegionPolicy.OnlyTopDown = false;
1745  }
1746  if (ForceTopDown.getNumOccurrences() > 0) {
1747  RegionPolicy.OnlyTopDown = ForceTopDown;
1748  if (RegionPolicy.OnlyTopDown)
1749  RegionPolicy.OnlyBottomUp = false;
1750  }
1751 }
1752 
1754  DAG = dag;
1755  SchedModel = DAG->getSchedModel();
1756  TRI = DAG->TRI;
1757 
1758  Rem.init(DAG, SchedModel);
1759  Top.init(DAG, SchedModel, &Rem);
1760  Bot.init(DAG, SchedModel, &Rem);
1761 
1762  // Initialize resource counts.
1763 
1764  // Initialize the HazardRecognizers. If itineraries don't exist, are empty, or
1765  // are disabled, then these HazardRecs will be disabled.
1766  const InstrItineraryData *Itin = SchedModel->getInstrItineraries();
1767  const TargetMachine &TM = DAG->MF.getTarget();
1768  if (!Top.HazardRec) {
1769  Top.HazardRec =
1771  }
1772  if (!Bot.HazardRec) {
1773  Bot.HazardRec =
1775  }
1776 }
1777 
1778 void GenericScheduler::releaseTopNode(SUnit *SU) {
1779  if (SU->isScheduled)
1780  return;
1781 
1782  for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
1783  I != E; ++I) {
1784  if (I->isWeak())
1785  continue;
1786  unsigned PredReadyCycle = I->getSUnit()->TopReadyCycle;
1787  unsigned Latency = I->getLatency();
1788 #ifndef NDEBUG
1789  Top.MaxObservedLatency = std::max(Latency, Top.MaxObservedLatency);
1790 #endif
1791  if (SU->TopReadyCycle < PredReadyCycle + Latency)
1792  SU->TopReadyCycle = PredReadyCycle + Latency;
1793  }
1794  Top.releaseNode(SU, SU->TopReadyCycle);
1795 }
1796 
1797 void GenericScheduler::releaseBottomNode(SUnit *SU) {
1798  if (SU->isScheduled)
1799  return;
1800 
1801  assert(SU->getInstr() && "Scheduled SUnit must have instr");
1802 
1803  for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
1804  I != E; ++I) {
1805  if (I->isWeak())
1806  continue;
1807  unsigned SuccReadyCycle = I->getSUnit()->BotReadyCycle;
1808  unsigned Latency = I->getLatency();
1809 #ifndef NDEBUG
1810  Bot.MaxObservedLatency = std::max(Latency, Bot.MaxObservedLatency);
1811 #endif
1812  if (SU->BotReadyCycle < SuccReadyCycle + Latency)
1813  SU->BotReadyCycle = SuccReadyCycle + Latency;
1814  }
1815  Bot.releaseNode(SU, SU->BotReadyCycle);
1816 }
1817 
1818 /// Set IsAcyclicLatencyLimited if the acyclic path is longer than the cyclic
1819 /// critical path by more cycles than it takes to drain the instruction buffer.
1820 /// We estimate an upper bounds on in-flight instructions as:
1821 ///
1822 /// CyclesPerIteration = max( CyclicPath, Loop-Resource-Height )
1823 /// InFlightIterations = AcyclicPath / CyclesPerIteration
1824 /// InFlightResources = InFlightIterations * LoopResources
1825 ///
1826 /// TODO: Check execution resources in addition to IssueCount.
1827 void GenericScheduler::checkAcyclicLatency() {
1828  if (Rem.CyclicCritPath == 0 || Rem.CyclicCritPath >= Rem.CriticalPath)
1829  return;
1830 
1831  // Scaled number of cycles per loop iteration.
1832  unsigned IterCount =
1833  std::max(Rem.CyclicCritPath * SchedModel->getLatencyFactor(),
1834  Rem.RemIssueCount);
1835  // Scaled acyclic critical path.
1836  unsigned AcyclicCount = Rem.CriticalPath * SchedModel->getLatencyFactor();
1837  // InFlightCount = (AcyclicPath / IterCycles) * InstrPerLoop
1838  unsigned InFlightCount =
1839  (AcyclicCount * Rem.RemIssueCount + IterCount-1) / IterCount;
1840  unsigned BufferLimit =
1841  SchedModel->getMicroOpBufferSize() * SchedModel->getMicroOpFactor();
1842 
1843  Rem.IsAcyclicLatencyLimited = InFlightCount > BufferLimit;
1844 
1845  DEBUG(dbgs() << "IssueCycles="
1846  << Rem.RemIssueCount / SchedModel->getLatencyFactor() << "c "
1847  << "IterCycles=" << IterCount / SchedModel->getLatencyFactor()
1848  << "c NumIters=" << (AcyclicCount + IterCount-1) / IterCount
1849  << " InFlight=" << InFlightCount / SchedModel->getMicroOpFactor()
1850  << "m BufferLim=" << SchedModel->getMicroOpBufferSize() << "m\n";
1851  if (Rem.IsAcyclicLatencyLimited)
1852  dbgs() << " ACYCLIC LATENCY LIMIT\n");
1853 }
1854 
1855 void GenericScheduler::registerRoots() {
1856  Rem.CriticalPath = DAG->ExitSU.getDepth();
1857 
1858  // Some roots may not feed into ExitSU. Check all of them in case.
1859  for (std::vector<SUnit*>::const_iterator
1860  I = Bot.Available.begin(), E = Bot.Available.end(); I != E; ++I) {
1861  if ((*I)->getDepth() > Rem.CriticalPath)
1862  Rem.CriticalPath = (*I)->getDepth();
1863  }
1864  DEBUG(dbgs() << "Critical Path: " << Rem.CriticalPath << '\n');
1865 
1866  if (EnableCyclicPath) {
1867  Rem.CyclicCritPath = DAG->computeCyclicCriticalPath();
1868  checkAcyclicLatency();
1869  }
1870 }
1871 
1872 /// Does this SU have a hazard within the current instruction group.
1873 ///
1874 /// The scheduler supports two modes of hazard recognition. The first is the
1875 /// ScheduleHazardRecognizer API. It is a fully general hazard recognizer that
1876 /// supports highly complicated in-order reservation tables
1877 /// (ScoreboardHazardRecognizer) and arbitraty target-specific logic.
1878 ///
1879 /// The second is a streamlined mechanism that checks for hazards based on
1880 /// simple counters that the scheduler itself maintains. It explicitly checks
1881 /// for instruction dispatch limitations, including the number of micro-ops that
1882 /// can dispatch per cycle.
1883 ///
1884 /// TODO: Also check whether the SU must start a new group.
1885 bool GenericScheduler::SchedBoundary::checkHazard(SUnit *SU) {
1886  if (HazardRec->isEnabled())
1887  return HazardRec->getHazardType(SU) != ScheduleHazardRecognizer::NoHazard;
1888 
1889  unsigned uops = SchedModel->getNumMicroOps(SU->getInstr());
1890  if ((CurrMOps > 0) && (CurrMOps + uops > SchedModel->getIssueWidth())) {
1891  DEBUG(dbgs() << " SU(" << SU->NodeNum << ") uops="
1892  << SchedModel->getNumMicroOps(SU->getInstr()) << '\n');
1893  return true;
1894  }
1895  return false;
1896 }
1897 
1898 // Find the unscheduled node in ReadySUs with the highest latency.
1899 unsigned GenericScheduler::SchedBoundary::
1900 findMaxLatency(ArrayRef<SUnit*> ReadySUs) {
1901  SUnit *LateSU = 0;
1902  unsigned RemLatency = 0;
1903  for (ArrayRef<SUnit*>::iterator I = ReadySUs.begin(), E = ReadySUs.end();
1904  I != E; ++I) {
1905  unsigned L = getUnscheduledLatency(*I);
1906  if (L > RemLatency) {
1907  RemLatency = L;
1908  LateSU = *I;
1909  }
1910  }
1911  if (LateSU) {
1912  DEBUG(dbgs() << Available.getName() << " RemLatency SU("
1913  << LateSU->NodeNum << ") " << RemLatency << "c\n");
1914  }
1915  return RemLatency;
1916 }
1917 
1918 // Count resources in this zone and the remaining unscheduled
1919 // instruction. Return the max count, scaled. Set OtherCritIdx to the critical
1920 // resource index, or zero if the zone is issue limited.
1921 unsigned GenericScheduler::SchedBoundary::
1922 getOtherResourceCount(unsigned &OtherCritIdx) {
1923  OtherCritIdx = 0;
1924  if (!SchedModel->hasInstrSchedModel())
1925  return 0;
1926 
1927  unsigned OtherCritCount = Rem->RemIssueCount
1928  + (RetiredMOps * SchedModel->getMicroOpFactor());
1929  DEBUG(dbgs() << " " << Available.getName() << " + Remain MOps: "
1930  << OtherCritCount / SchedModel->getMicroOpFactor() << '\n');
1931  for (unsigned PIdx = 1, PEnd = SchedModel->getNumProcResourceKinds();
1932  PIdx != PEnd; ++PIdx) {
1933  unsigned OtherCount = getResourceCount(PIdx) + Rem->RemainingCounts[PIdx];
1934  if (OtherCount > OtherCritCount) {
1935  OtherCritCount = OtherCount;
1936  OtherCritIdx = PIdx;
1937  }
1938  }
1939  if (OtherCritIdx) {
1940  DEBUG(dbgs() << " " << Available.getName() << " + Remain CritRes: "
1941  << OtherCritCount / SchedModel->getResourceFactor(OtherCritIdx)
1942  << " " << getResourceName(OtherCritIdx) << "\n");
1943  }
1944  return OtherCritCount;
1945 }
1946 
1947 /// Set the CandPolicy for this zone given the current resources and latencies
1948 /// inside and outside the zone.
1949 void GenericScheduler::SchedBoundary::setPolicy(CandPolicy &Policy,
1950  SchedBoundary &OtherZone) {
1951  // Now that potential stalls have been considered, apply preemptive heuristics
1952  // based on the the total latency and resources inside and outside this
1953  // zone.
1954 
1955  // Compute remaining latency. We need this both to determine whether the
1956  // overall schedule has become latency-limited and whether the instructions
1957  // outside this zone are resource or latency limited.
1958  //
1959  // The "dependent" latency is updated incrementally during scheduling as the
1960  // max height/depth of scheduled nodes minus the cycles since it was
1961  // scheduled:
1962  // DLat = max (N.depth - (CurrCycle - N.ReadyCycle) for N in Zone
1963  //
1964  // The "independent" latency is the max ready queue depth:
1965  // ILat = max N.depth for N in Available|Pending
1966  //
1967  // RemainingLatency is the greater of independent and dependent latency.
1968  unsigned RemLatency = DependentLatency;
1969  RemLatency = std::max(RemLatency, findMaxLatency(Available.elements()));
1970  RemLatency = std::max(RemLatency, findMaxLatency(Pending.elements()));
1971 
1972  // Compute the critical resource outside the zone.
1973  unsigned OtherCritIdx;
1974  unsigned OtherCount = OtherZone.getOtherResourceCount(OtherCritIdx);
1975 
1976  bool OtherResLimited = false;
1977  if (SchedModel->hasInstrSchedModel()) {
1978  unsigned LFactor = SchedModel->getLatencyFactor();
1979  OtherResLimited = (int)(OtherCount - (RemLatency * LFactor)) > (int)LFactor;
1980  }
1981  if (!OtherResLimited && (RemLatency + CurrCycle > Rem->CriticalPath)) {
1982  Policy.ReduceLatency |= true;
1983  DEBUG(dbgs() << " " << Available.getName() << " RemainingLatency "
1984  << RemLatency << " + " << CurrCycle << "c > CritPath "
1985  << Rem->CriticalPath << "\n");
1986  }
1987  // If the same resource is limiting inside and outside the zone, do nothing.
1988  if (ZoneCritResIdx == OtherCritIdx)
1989  return;
1990 
1991  DEBUG(
1992  if (IsResourceLimited) {
1993  dbgs() << " " << Available.getName() << " ResourceLimited: "
1994  << getResourceName(ZoneCritResIdx) << "\n";
1995  }
1996  if (OtherResLimited)
1997  dbgs() << " RemainingLimit: " << getResourceName(OtherCritIdx) << "\n";
1998  if (!IsResourceLimited && !OtherResLimited)
1999  dbgs() << " Latency limited both directions.\n");
2000 
2001  if (IsResourceLimited && !Policy.ReduceResIdx)
2002  Policy.ReduceResIdx = ZoneCritResIdx;
2003 
2004  if (OtherResLimited)
2005  Policy.DemandResIdx = OtherCritIdx;
2006 }
2007 
2008 void GenericScheduler::SchedBoundary::releaseNode(SUnit *SU,
2009  unsigned ReadyCycle) {
2010  if (ReadyCycle < MinReadyCycle)
2011  MinReadyCycle = ReadyCycle;
2012 
2013  // Check for interlocks first. For the purpose of other heuristics, an
2014  // instruction that cannot issue appears as if it's not in the ReadyQueue.
2015  bool IsBuffered = SchedModel->getMicroOpBufferSize() != 0;
2016  if ((!IsBuffered && ReadyCycle > CurrCycle) || checkHazard(SU))
2017  Pending.push(SU);
2018  else
2019  Available.push(SU);
2020 
2021  // Record this node as an immediate dependent of the scheduled node.
2022  NextSUs.insert(SU);
2023 }
2024 
2025 /// Move the boundary of scheduled code by one cycle.
2026 void GenericScheduler::SchedBoundary::bumpCycle(unsigned NextCycle) {
2027  if (SchedModel->getMicroOpBufferSize() == 0) {
2028  assert(MinReadyCycle < UINT_MAX && "MinReadyCycle uninitialized");
2029  if (MinReadyCycle > NextCycle)
2030  NextCycle = MinReadyCycle;
2031  }
2032  // Update the current micro-ops, which will issue in the next cycle.
2033  unsigned DecMOps = SchedModel->getIssueWidth() * (NextCycle - CurrCycle);
2034  CurrMOps = (CurrMOps <= DecMOps) ? 0 : CurrMOps - DecMOps;
2035 
2036  // Decrement DependentLatency based on the next cycle.
2037  if ((NextCycle - CurrCycle) > DependentLatency)
2038  DependentLatency = 0;
2039  else
2040  DependentLatency -= (NextCycle - CurrCycle);
2041 
2042  if (!HazardRec->isEnabled()) {
2043  // Bypass HazardRec virtual calls.
2044  CurrCycle = NextCycle;
2045  }
2046  else {
2047  // Bypass getHazardType calls in case of long latency.
2048  for (; CurrCycle != NextCycle; ++CurrCycle) {
2049  if (isTop())
2050  HazardRec->AdvanceCycle();
2051  else
2052  HazardRec->RecedeCycle();
2053  }
2054  }
2055  CheckPending = true;
2056  unsigned LFactor = SchedModel->getLatencyFactor();
2057  IsResourceLimited =
2058  (int)(getCriticalCount() - (getScheduledLatency() * LFactor))
2059  > (int)LFactor;
2060 
2061  DEBUG(dbgs() << "Cycle: " << CurrCycle << ' ' << Available.getName() << '\n');
2062 }
2063 
2064 void GenericScheduler::SchedBoundary::incExecutedResources(unsigned PIdx,
2065  unsigned Count) {
2066  ExecutedResCounts[PIdx] += Count;
2067  if (ExecutedResCounts[PIdx] > MaxExecutedResCount)
2068  MaxExecutedResCount = ExecutedResCounts[PIdx];
2069 }
2070 
2071 /// Add the given processor resource to this scheduled zone.
2072 ///
2073 /// \param Cycles indicates the number of consecutive (non-pipelined) cycles
2074 /// during which this resource is consumed.
2075 ///
2076 /// \return the next cycle at which the instruction may execute without
2077 /// oversubscribing resources.
2078 unsigned GenericScheduler::SchedBoundary::
2079 countResource(unsigned PIdx, unsigned Cycles, unsigned ReadyCycle) {
2080  unsigned Factor = SchedModel->getResourceFactor(PIdx);
2081  unsigned Count = Factor * Cycles;
2082  DEBUG(dbgs() << " " << getResourceName(PIdx)
2083  << " +" << Cycles << "x" << Factor << "u\n");
2084 
2085  // Update Executed resources counts.
2086  incExecutedResources(PIdx, Count);
2087  assert(Rem->RemainingCounts[PIdx] >= Count && "resource double counted");
2088  Rem->RemainingCounts[PIdx] -= Count;
2089 
2090  // Check if this resource exceeds the current critical resource. If so, it
2091  // becomes the critical resource.
2092  if (ZoneCritResIdx != PIdx && (getResourceCount(PIdx) > getCriticalCount())) {
2093  ZoneCritResIdx = PIdx;
2094  DEBUG(dbgs() << " *** Critical resource "
2095  << getResourceName(PIdx) << ": "
2096  << getResourceCount(PIdx) / SchedModel->getLatencyFactor() << "c\n");
2097  }
2098  // TODO: We don't yet model reserved resources. It's not hard though.
2099  return CurrCycle;
2100 }
2101 
2102 /// Move the boundary of scheduled code by one SUnit.
2103 void GenericScheduler::SchedBoundary::bumpNode(SUnit *SU) {
2104  // Update the reservation table.
2105  if (HazardRec->isEnabled()) {
2106  if (!isTop() && SU->isCall) {
2107  // Calls are scheduled with their preceding instructions. For bottom-up
2108  // scheduling, clear the pipeline state before emitting.
2109  HazardRec->Reset();
2110  }
2111  HazardRec->EmitInstruction(SU);
2112  }
2113  const MCSchedClassDesc *SC = DAG->getSchedClass(SU);
2114  unsigned IncMOps = SchedModel->getNumMicroOps(SU->getInstr());
2115  CurrMOps += IncMOps;
2116  // checkHazard prevents scheduling multiple instructions per cycle that exceed
2117  // issue width. However, we commonly reach the maximum. In this case
2118  // opportunistically bump the cycle to avoid uselessly checking everything in
2119  // the readyQ. Furthermore, a single instruction may produce more than one
2120  // cycle's worth of micro-ops.
2121  //
2122  // TODO: Also check if this SU must end a dispatch group.
2123  unsigned NextCycle = CurrCycle;
2124  if (CurrMOps >= SchedModel->getIssueWidth()) {
2125  ++NextCycle;
2126  DEBUG(dbgs() << " *** Max MOps " << CurrMOps
2127  << " at cycle " << CurrCycle << '\n');
2128  }
2129  unsigned ReadyCycle = (isTop() ? SU->TopReadyCycle : SU->BotReadyCycle);
2130  DEBUG(dbgs() << " Ready @" << ReadyCycle << "c\n");
2131 
2132  switch (SchedModel->getMicroOpBufferSize()) {
2133  case 0:
2134  assert(ReadyCycle <= CurrCycle && "Broken PendingQueue");
2135  break;
2136  case 1:
2137  if (ReadyCycle > NextCycle) {
2138  NextCycle = ReadyCycle;
2139  DEBUG(dbgs() << " *** Stall until: " << ReadyCycle << "\n");
2140  }
2141  break;
2142  default:
2143  // We don't currently model the OOO reorder buffer, so consider all
2144  // scheduled MOps to be "retired".
2145  break;
2146  }
2147  RetiredMOps += IncMOps;
2148 
2149  // Update resource counts and critical resource.
2150  if (SchedModel->hasInstrSchedModel()) {
2151  unsigned DecRemIssue = IncMOps * SchedModel->getMicroOpFactor();
2152  assert(Rem->RemIssueCount >= DecRemIssue && "MOps double counted");
2153  Rem->RemIssueCount -= DecRemIssue;
2154  if (ZoneCritResIdx) {
2155  // Scale scheduled micro-ops for comparing with the critical resource.
2156  unsigned ScaledMOps =
2157  RetiredMOps * SchedModel->getMicroOpFactor();
2158 
2159  // If scaled micro-ops are now more than the previous critical resource by
2160  // a full cycle, then micro-ops issue becomes critical.
2161  if ((int)(ScaledMOps - getResourceCount(ZoneCritResIdx))
2162  >= (int)SchedModel->getLatencyFactor()) {
2163  ZoneCritResIdx = 0;
2164  DEBUG(dbgs() << " *** Critical resource NumMicroOps: "
2165  << ScaledMOps / SchedModel->getLatencyFactor() << "c\n");
2166  }
2167  }
2169  PI = SchedModel->getWriteProcResBegin(SC),
2170  PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
2171  unsigned RCycle =
2172  countResource(PI->ProcResourceIdx, PI->Cycles, ReadyCycle);
2173  if (RCycle > NextCycle)
2174  NextCycle = RCycle;
2175  }
2176  }
2177  // Update ExpectedLatency and DependentLatency.
2178  unsigned &TopLatency = isTop() ? ExpectedLatency : DependentLatency;
2179  unsigned &BotLatency = isTop() ? DependentLatency : ExpectedLatency;
2180  if (SU->getDepth() > TopLatency) {
2181  TopLatency = SU->getDepth();
2182  DEBUG(dbgs() << " " << Available.getName()
2183  << " TopLatency SU(" << SU->NodeNum << ") " << TopLatency << "c\n");
2184  }
2185  if (SU->getHeight() > BotLatency) {
2186  BotLatency = SU->getHeight();
2187  DEBUG(dbgs() << " " << Available.getName()
2188  << " BotLatency SU(" << SU->NodeNum << ") " << BotLatency << "c\n");
2189  }
2190  // If we stall for any reason, bump the cycle.
2191  if (NextCycle > CurrCycle) {
2192  bumpCycle(NextCycle);
2193  }
2194  else {
2195  // After updating ZoneCritResIdx and ExpectedLatency, check if we're
2196  // resource limited. If a stall occured, bumpCycle does this.
2197  unsigned LFactor = SchedModel->getLatencyFactor();
2198  IsResourceLimited =
2199  (int)(getCriticalCount() - (getScheduledLatency() * LFactor))
2200  > (int)LFactor;
2201  }
2202  DEBUG(dumpScheduledState());
2203 }
2204 
2205 /// Release pending ready nodes in to the available queue. This makes them
2206 /// visible to heuristics.
2207 void GenericScheduler::SchedBoundary::releasePending() {
2208  // If the available queue is empty, it is safe to reset MinReadyCycle.
2209  if (Available.empty())
2210  MinReadyCycle = UINT_MAX;
2211 
2212  // Check to see if any of the pending instructions are ready to issue. If
2213  // so, add them to the available queue.
2214  bool IsBuffered = SchedModel->getMicroOpBufferSize() != 0;
2215  for (unsigned i = 0, e = Pending.size(); i != e; ++i) {
2216  SUnit *SU = *(Pending.begin()+i);
2217  unsigned ReadyCycle = isTop() ? SU->TopReadyCycle : SU->BotReadyCycle;
2218 
2219  if (ReadyCycle < MinReadyCycle)
2220  MinReadyCycle = ReadyCycle;
2221 
2222  if (!IsBuffered && ReadyCycle > CurrCycle)
2223  continue;
2224 
2225  if (checkHazard(SU))
2226  continue;
2227 
2228  Available.push(SU);
2229  Pending.remove(Pending.begin()+i);
2230  --i; --e;
2231  }
2232  DEBUG(if (!Pending.empty()) Pending.dump());
2233  CheckPending = false;
2234 }
2235 
2236 /// Remove SU from the ready set for this boundary.
2237 void GenericScheduler::SchedBoundary::removeReady(SUnit *SU) {
2238  if (Available.isInQueue(SU))
2239  Available.remove(Available.find(SU));
2240  else {
2241  assert(Pending.isInQueue(SU) && "bad ready count");
2242  Pending.remove(Pending.find(SU));
2243  }
2244 }
2245 
2246 /// If this queue only has one ready candidate, return it. As a side effect,
2247 /// defer any nodes that now hit a hazard, and advance the cycle until at least
2248 /// one node is ready. If multiple instructions are ready, return NULL.
2249 SUnit *GenericScheduler::SchedBoundary::pickOnlyChoice() {
2250  if (CheckPending)
2251  releasePending();
2252 
2253  if (CurrMOps > 0) {
2254  // Defer any ready instrs that now have a hazard.
2255  for (ReadyQueue::iterator I = Available.begin(); I != Available.end();) {
2256  if (checkHazard(*I)) {
2257  Pending.push(*I);
2258  I = Available.remove(I);
2259  continue;
2260  }
2261  ++I;
2262  }
2263  }
2264  for (unsigned i = 0; Available.empty(); ++i) {
2265  assert(i <= (HazardRec->getMaxLookAhead() + MaxObservedLatency) &&
2266  "permanent hazard"); (void)i;
2267  bumpCycle(CurrCycle + 1);
2268  releasePending();
2269  }
2270  if (Available.size() == 1)
2271  return *Available.begin();
2272  return NULL;
2273 }
2274 
2275 #ifndef NDEBUG
2276 // This is useful information to dump after bumpNode.
2277 // Note that the Queue contents are more useful before pickNodeFromQueue.
2278 void GenericScheduler::SchedBoundary::dumpScheduledState() {
2279  unsigned ResFactor;
2280  unsigned ResCount;
2281  if (ZoneCritResIdx) {
2282  ResFactor = SchedModel->getResourceFactor(ZoneCritResIdx);
2283  ResCount = getResourceCount(ZoneCritResIdx);
2284  }
2285  else {
2286  ResFactor = SchedModel->getMicroOpFactor();
2287  ResCount = RetiredMOps * SchedModel->getMicroOpFactor();
2288  }
2289  unsigned LFactor = SchedModel->getLatencyFactor();
2290  dbgs() << Available.getName() << " @" << CurrCycle << "c\n"
2291  << " Retired: " << RetiredMOps;
2292  dbgs() << "\n Executed: " << getExecutedCount() / LFactor << "c";
2293  dbgs() << "\n Critical: " << ResCount / LFactor << "c, "
2294  << ResCount / ResFactor << " " << getResourceName(ZoneCritResIdx)
2295  << "\n ExpectedLatency: " << ExpectedLatency << "c\n"
2296  << (IsResourceLimited ? " - Resource" : " - Latency")
2297  << " limited.\n";
2298 }
2299 #endif
2300 
2301 void GenericScheduler::SchedCandidate::
2302 initResourceDelta(const ScheduleDAGMI *DAG,
2303  const TargetSchedModel *SchedModel) {
2304  if (!Policy.ReduceResIdx && !Policy.DemandResIdx)
2305  return;
2306 
2307  const MCSchedClassDesc *SC = DAG->getSchedClass(SU);
2309  PI = SchedModel->getWriteProcResBegin(SC),
2310  PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
2311  if (PI->ProcResourceIdx == Policy.ReduceResIdx)
2312  ResDelta.CritResources += PI->Cycles;
2313  if (PI->ProcResourceIdx == Policy.DemandResIdx)
2314  ResDelta.DemandedResources += PI->Cycles;
2315  }
2316 }
2317 
2318 
2319 /// Return true if this heuristic determines order.
2320 static bool tryLess(int TryVal, int CandVal,
2321  GenericScheduler::SchedCandidate &TryCand,
2322  GenericScheduler::SchedCandidate &Cand,
2323  GenericScheduler::CandReason Reason) {
2324  if (TryVal < CandVal) {
2325  TryCand.Reason = Reason;
2326  return true;
2327  }
2328  if (TryVal > CandVal) {
2329  if (Cand.Reason > Reason)
2330  Cand.Reason = Reason;
2331  return true;
2332  }
2333  Cand.setRepeat(Reason);
2334  return false;
2335 }
2336 
2337 static bool tryGreater(int TryVal, int CandVal,
2338  GenericScheduler::SchedCandidate &TryCand,
2339  GenericScheduler::SchedCandidate &Cand,
2340  GenericScheduler::CandReason Reason) {
2341  if (TryVal > CandVal) {
2342  TryCand.Reason = Reason;
2343  return true;
2344  }
2345  if (TryVal < CandVal) {
2346  if (Cand.Reason > Reason)
2347  Cand.Reason = Reason;
2348  return true;
2349  }
2350  Cand.setRepeat(Reason);
2351  return false;
2352 }
2353 
2354 static bool tryPressure(const PressureChange &TryP,
2355  const PressureChange &CandP,
2356  GenericScheduler::SchedCandidate &TryCand,
2357  GenericScheduler::SchedCandidate &Cand,
2358  GenericScheduler::CandReason Reason) {
2359  int TryRank = TryP.getPSetOrMax();
2360  int CandRank = CandP.getPSetOrMax();
2361  // If both candidates affect the same set, go with the smallest increase.
2362  if (TryRank == CandRank) {
2363  return tryLess(TryP.getUnitInc(), CandP.getUnitInc(), TryCand, Cand,
2364  Reason);
2365  }
2366  // If one candidate decreases and the other increases, go with it.
2367  // Invalid candidates have UnitInc==0.
2368  if (tryLess(TryP.getUnitInc() < 0, CandP.getUnitInc() < 0, TryCand, Cand,
2369  Reason)) {
2370  return true;
2371  }
2372  // If the candidates are decreasing pressure, reverse priority.
2373  if (TryP.getUnitInc() < 0)
2374  std::swap(TryRank, CandRank);
2375  return tryGreater(TryRank, CandRank, TryCand, Cand, Reason);
2376 }
2377 
2378 static unsigned getWeakLeft(const SUnit *SU, bool isTop) {
2379  return (isTop) ? SU->WeakPredsLeft : SU->WeakSuccsLeft;
2380 }
2381 
2382 /// Minimize physical register live ranges. Regalloc wants them adjacent to
2383 /// their physreg def/use.
2384 ///
2385 /// FIXME: This is an unnecessary check on the critical path. Most are root/leaf
2386 /// copies which can be prescheduled. The rest (e.g. x86 MUL) could be bundled
2387 /// with the operation that produces or consumes the physreg. We'll do this when
2388 /// regalloc has support for parallel copies.
2389 static int biasPhysRegCopy(const SUnit *SU, bool isTop) {
2390  const MachineInstr *MI = SU->getInstr();
2391  if (!MI->isCopy())
2392  return 0;
2393 
2394  unsigned ScheduledOper = isTop ? 1 : 0;
2395  unsigned UnscheduledOper = isTop ? 0 : 1;
2396  // If we have already scheduled the physreg produce/consumer, immediately
2397  // schedule the copy.
2399  MI->getOperand(ScheduledOper).getReg()))
2400  return 1;
2401  // If the physreg is at the boundary, defer it. Otherwise schedule it
2402  // immediately to free the dependent. We can hoist the copy later.
2403  bool AtBoundary = isTop ? !SU->NumSuccsLeft : !SU->NumPredsLeft;
2405  MI->getOperand(UnscheduledOper).getReg()))
2406  return AtBoundary ? -1 : 1;
2407  return 0;
2408 }
2409 
2410 static bool tryLatency(GenericScheduler::SchedCandidate &TryCand,
2411  GenericScheduler::SchedCandidate &Cand,
2412  GenericScheduler::SchedBoundary &Zone) {
2413  if (Zone.isTop()) {
2414  if (Cand.SU->getDepth() > Zone.getScheduledLatency()) {
2415  if (tryLess(TryCand.SU->getDepth(), Cand.SU->getDepth(),
2416  TryCand, Cand, GenericScheduler::TopDepthReduce))
2417  return true;
2418  }
2419  if (tryGreater(TryCand.SU->getHeight(), Cand.SU->getHeight(),
2420  TryCand, Cand, GenericScheduler::TopPathReduce))
2421  return true;
2422  }
2423  else {
2424  if (Cand.SU->getHeight() > Zone.getScheduledLatency()) {
2425  if (tryLess(TryCand.SU->getHeight(), Cand.SU->getHeight(),
2426  TryCand, Cand, GenericScheduler::BotHeightReduce))
2427  return true;
2428  }
2429  if (tryGreater(TryCand.SU->getDepth(), Cand.SU->getDepth(),
2430  TryCand, Cand, GenericScheduler::BotPathReduce))
2431  return true;
2432  }
2433  return false;
2434 }
2435 
2436 /// Apply a set of heursitics to a new candidate. Heuristics are currently
2437 /// hierarchical. This may be more efficient than a graduated cost model because
2438 /// we don't need to evaluate all aspects of the model for each node in the
2439 /// queue. But it's really done to make the heuristics easier to debug and
2440 /// statistically analyze.
2441 ///
2442 /// \param Cand provides the policy and current best candidate.
2443 /// \param TryCand refers to the next SUnit candidate, otherwise uninitialized.
2444 /// \param Zone describes the scheduled zone that we are extending.
2445 /// \param RPTracker describes reg pressure within the scheduled zone.
2446 /// \param TempTracker is a scratch pressure tracker to reuse in queries.
2447 void GenericScheduler::tryCandidate(SchedCandidate &Cand,
2448  SchedCandidate &TryCand,
2449  SchedBoundary &Zone,
2450  const RegPressureTracker &RPTracker,
2451  RegPressureTracker &TempTracker) {
2452 
2453  if (DAG->isTrackingPressure()) {
2454  // Always initialize TryCand's RPDelta.
2455  if (Zone.isTop()) {
2456  TempTracker.getMaxDownwardPressureDelta(
2457  TryCand.SU->getInstr(),
2458  TryCand.RPDelta,
2459  DAG->getRegionCriticalPSets(),
2461  }
2462  else {
2463  if (VerifyScheduling) {
2464  TempTracker.getMaxUpwardPressureDelta(
2465  TryCand.SU->getInstr(),
2466  &DAG->getPressureDiff(TryCand.SU),
2467  TryCand.RPDelta,
2468  DAG->getRegionCriticalPSets(),
2470  }
2471  else {
2472  RPTracker.getUpwardPressureDelta(
2473  TryCand.SU->getInstr(),
2474  DAG->getPressureDiff(TryCand.SU),
2475  TryCand.RPDelta,
2476  DAG->getRegionCriticalPSets(),
2478  }
2479  }
2480  }
2481  DEBUG(if (TryCand.RPDelta.Excess.isValid())
2482  dbgs() << " SU(" << TryCand.SU->NodeNum << ") "
2483  << TRI->getRegPressureSetName(TryCand.RPDelta.Excess.getPSet())
2484  << ":" << TryCand.RPDelta.Excess.getUnitInc() << "\n");
2485 
2486  // Initialize the candidate if needed.
2487  if (!Cand.isValid()) {
2488  TryCand.Reason = NodeOrder;
2489  return;
2490  }
2491 
2492  if (tryGreater(biasPhysRegCopy(TryCand.SU, Zone.isTop()),
2493  biasPhysRegCopy(Cand.SU, Zone.isTop()),
2494  TryCand, Cand, PhysRegCopy))
2495  return;
2496 
2497  // Avoid exceeding the target's limit. If signed PSetID is negative, it is
2498  // invalid; convert it to INT_MAX to give it lowest priority.
2499  if (DAG->isTrackingPressure() && tryPressure(TryCand.RPDelta.Excess,
2500  Cand.RPDelta.Excess,
2501  TryCand, Cand, RegExcess))
2502  return;
2503 
2504  // Avoid increasing the max critical pressure in the scheduled region.
2505  if (DAG->isTrackingPressure() && tryPressure(TryCand.RPDelta.CriticalMax,
2506  Cand.RPDelta.CriticalMax,
2507  TryCand, Cand, RegCritical))
2508  return;
2509 
2510  // For loops that are acyclic path limited, aggressively schedule for latency.
2511  // This can result in very long dependence chains scheduled in sequence, so
2512  // once every cycle (when CurrMOps == 0), switch to normal heuristics.
2513  if (Rem.IsAcyclicLatencyLimited && !Zone.CurrMOps
2514  && tryLatency(TryCand, Cand, Zone))
2515  return;
2516 
2517  // Keep clustered nodes together to encourage downstream peephole
2518  // optimizations which may reduce resource requirements.
2519  //
2520  // This is a best effort to set things up for a post-RA pass. Optimizations
2521  // like generating loads of multiple registers should ideally be done within
2522  // the scheduler pass by combining the loads during DAG postprocessing.
2523  const SUnit *NextClusterSU =
2524  Zone.isTop() ? DAG->getNextClusterSucc() : DAG->getNextClusterPred();
2525  if (tryGreater(TryCand.SU == NextClusterSU, Cand.SU == NextClusterSU,
2526  TryCand, Cand, Cluster))
2527  return;
2528 
2529  // Weak edges are for clustering and other constraints.
2530  if (tryLess(getWeakLeft(TryCand.SU, Zone.isTop()),
2531  getWeakLeft(Cand.SU, Zone.isTop()),
2532  TryCand, Cand, Weak)) {
2533  return;
2534  }
2535  // Avoid increasing the max pressure of the entire region.
2536  if (DAG->isTrackingPressure() && tryPressure(TryCand.RPDelta.CurrentMax,
2537  Cand.RPDelta.CurrentMax,
2538  TryCand, Cand, RegMax))
2539  return;
2540 
2541  // Avoid critical resource consumption and balance the schedule.
2542  TryCand.initResourceDelta(DAG, SchedModel);
2543  if (tryLess(TryCand.ResDelta.CritResources, Cand.ResDelta.CritResources,
2544  TryCand, Cand, ResourceReduce))
2545  return;
2546  if (tryGreater(TryCand.ResDelta.DemandedResources,
2547  Cand.ResDelta.DemandedResources,
2548  TryCand, Cand, ResourceDemand))
2549  return;
2550 
2551  // Avoid serializing long latency dependence chains.
2552  // For acyclic path limited loops, latency was already checked above.
2553  if (Cand.Policy.ReduceLatency && !Rem.IsAcyclicLatencyLimited
2554  && tryLatency(TryCand, Cand, Zone)) {
2555  return;
2556  }
2557 
2558  // Prefer immediate defs/users of the last scheduled instruction. This is a
2559  // local pressure avoidance strategy that also makes the machine code
2560  // readable.
2561  if (tryGreater(Zone.NextSUs.count(TryCand.SU), Zone.NextSUs.count(Cand.SU),
2562  TryCand, Cand, NextDefUse))
2563  return;
2564 
2565  // Fall through to original instruction order.
2566  if ((Zone.isTop() && TryCand.SU->NodeNum < Cand.SU->NodeNum)
2567  || (!Zone.isTop() && TryCand.SU->NodeNum > Cand.SU->NodeNum)) {
2568  TryCand.Reason = NodeOrder;
2569  }
2570 }
2571 
2572 #ifndef NDEBUG
2573 const char *GenericScheduler::getReasonStr(
2574  GenericScheduler::CandReason Reason) {
2575  switch (Reason) {
2576  case NoCand: return "NOCAND ";
2577  case PhysRegCopy: return "PREG-COPY";
2578  case RegExcess: return "REG-EXCESS";
2579  case RegCritical: return "REG-CRIT ";
2580  case Cluster: return "CLUSTER ";
2581  case Weak: return "WEAK ";
2582  case RegMax: return "REG-MAX ";
2583  case ResourceReduce: return "RES-REDUCE";
2584  case ResourceDemand: return "RES-DEMAND";
2585  case TopDepthReduce: return "TOP-DEPTH ";
2586  case TopPathReduce: return "TOP-PATH ";
2587  case BotHeightReduce:return "BOT-HEIGHT";
2588  case BotPathReduce: return "BOT-PATH ";
2589  case NextDefUse: return "DEF-USE ";
2590  case NodeOrder: return "ORDER ";
2591  };
2592  llvm_unreachable("Unknown reason!");
2593 }
2594 
2595 void GenericScheduler::traceCandidate(const SchedCandidate &Cand) {
2596  PressureChange P;
2597  unsigned ResIdx = 0;
2598  unsigned Latency = 0;
2599  switch (Cand.Reason) {
2600  default:
2601  break;
2602  case RegExcess:
2603  P = Cand.RPDelta.Excess;
2604  break;
2605  case RegCritical:
2606  P = Cand.RPDelta.CriticalMax;
2607  break;
2608  case RegMax:
2609  P = Cand.RPDelta.CurrentMax;
2610  break;
2611  case ResourceReduce:
2612  ResIdx = Cand.Policy.ReduceResIdx;
2613  break;
2614  case ResourceDemand:
2615  ResIdx = Cand.Policy.DemandResIdx;
2616  break;
2617  case TopDepthReduce:
2618  Latency = Cand.SU->getDepth();
2619  break;
2620  case TopPathReduce:
2621  Latency = Cand.SU->getHeight();
2622  break;
2623  case BotHeightReduce:
2624  Latency = Cand.SU->getHeight();
2625  break;
2626  case BotPathReduce:
2627  Latency = Cand.SU->getDepth();
2628  break;
2629  }
2630  dbgs() << " SU(" << Cand.SU->NodeNum << ") " << getReasonStr(Cand.Reason);
2631  if (P.isValid())
2632  dbgs() << " " << TRI->getRegPressureSetName(P.getPSet())
2633  << ":" << P.getUnitInc() << " ";
2634  else
2635  dbgs() << " ";
2636  if (ResIdx)
2637  dbgs() << " " << SchedModel->getProcResource(ResIdx)->Name << " ";
2638  else
2639  dbgs() << " ";
2640  if (Latency)
2641  dbgs() << " " << Latency << " cycles ";
2642  else
2643  dbgs() << " ";
2644  dbgs() << '\n';
2645 }
2646 #endif
2647 
2648 /// Pick the best candidate from the queue.
2649 ///
2650 /// TODO: getMaxPressureDelta results can be mostly cached for each SUnit during
2651 /// DAG building. To adjust for the current scheduling location we need to
2652 /// maintain the number of vreg uses remaining to be top-scheduled.
2653 void GenericScheduler::pickNodeFromQueue(SchedBoundary &Zone,
2654  const RegPressureTracker &RPTracker,
2655  SchedCandidate &Cand) {
2656  ReadyQueue &Q = Zone.Available;
2657 
2658  DEBUG(Q.dump());
2659 
2660  // getMaxPressureDelta temporarily modifies the tracker.
2661  RegPressureTracker &TempTracker = const_cast<RegPressureTracker&>(RPTracker);
2662 
2663  for (ReadyQueue::iterator I = Q.begin(), E = Q.end(); I != E; ++I) {
2664 
2665  SchedCandidate TryCand(Cand.Policy);
2666  TryCand.SU = *I;
2667  tryCandidate(Cand, TryCand, Zone, RPTracker, TempTracker);
2668  if (TryCand.Reason != NoCand) {
2669  // Initialize resource delta if needed in case future heuristics query it.
2670  if (TryCand.ResDelta == SchedResourceDelta())
2671  TryCand.initResourceDelta(DAG, SchedModel);
2672  Cand.setBest(TryCand);
2673  DEBUG(traceCandidate(Cand));
2674  }
2675  }
2676 }
2677 
2678 static void tracePick(const GenericScheduler::SchedCandidate &Cand,
2679  bool IsTop) {
2680  DEBUG(dbgs() << "Pick " << (IsTop ? "Top " : "Bot ")
2681  << GenericScheduler::getReasonStr(Cand.Reason) << '\n');
2682 }
2683 
2684 /// Pick the best candidate node from either the top or bottom queue.
2685 SUnit *GenericScheduler::pickNodeBidirectional(bool &IsTopNode) {
2686  // Schedule as far as possible in the direction of no choice. This is most
2687  // efficient, but also provides the best heuristics for CriticalPSets.
2688  if (SUnit *SU = Bot.pickOnlyChoice()) {
2689  IsTopNode = false;
2690  DEBUG(dbgs() << "Pick Bot NOCAND\n");
2691  return SU;
2692  }
2693  if (SUnit *SU = Top.pickOnlyChoice()) {
2694  IsTopNode = true;
2695  DEBUG(dbgs() << "Pick Top NOCAND\n");
2696  return SU;
2697  }
2698  CandPolicy NoPolicy;
2699  SchedCandidate BotCand(NoPolicy);
2700  SchedCandidate TopCand(NoPolicy);
2701  Bot.setPolicy(BotCand.Policy, Top);
2702  Top.setPolicy(TopCand.Policy, Bot);
2703 
2704  // Prefer bottom scheduling when heuristics are silent.
2705  pickNodeFromQueue(Bot, DAG->getBotRPTracker(), BotCand);
2706  assert(BotCand.Reason != NoCand && "failed to find the first candidate");
2707 
2708  // If either Q has a single candidate that provides the least increase in
2709  // Excess pressure, we can immediately schedule from that Q.
2710  //
2711  // RegionCriticalPSets summarizes the pressure within the scheduled region and
2712  // affects picking from either Q. If scheduling in one direction must
2713  // increase pressure for one of the excess PSets, then schedule in that
2714  // direction first to provide more freedom in the other direction.
2715  if ((BotCand.Reason == RegExcess && !BotCand.isRepeat(RegExcess))
2716  || (BotCand.Reason == RegCritical
2717  && !BotCand.isRepeat(RegCritical)))
2718  {
2719  IsTopNode = false;
2720  tracePick(BotCand, IsTopNode);
2721  return BotCand.SU;
2722  }
2723  // Check if the top Q has a better candidate.
2724  pickNodeFromQueue(Top, DAG->getTopRPTracker(), TopCand);
2725  assert(TopCand.Reason != NoCand && "failed to find the first candidate");
2726 
2727  // Choose the queue with the most important (lowest enum) reason.
2728  if (TopCand.Reason < BotCand.Reason) {
2729  IsTopNode = true;
2730  tracePick(TopCand, IsTopNode);
2731  return TopCand.SU;
2732  }
2733  // Otherwise prefer the bottom candidate, in node order if all else failed.
2734  IsTopNode = false;
2735  tracePick(BotCand, IsTopNode);
2736  return BotCand.SU;
2737 }
2738 
2739 /// Pick the best node to balance the schedule. Implements MachineSchedStrategy.
2740 SUnit *GenericScheduler::pickNode(bool &IsTopNode) {
2741  if (DAG->top() == DAG->bottom()) {
2742  assert(Top.Available.empty() && Top.Pending.empty() &&
2743  Bot.Available.empty() && Bot.Pending.empty() && "ReadyQ garbage");
2744  return NULL;
2745  }
2746  SUnit *SU;
2747  do {
2748  if (RegionPolicy.OnlyTopDown) {
2749  SU = Top.pickOnlyChoice();
2750  if (!SU) {
2751  CandPolicy NoPolicy;
2752  SchedCandidate TopCand(NoPolicy);
2753  pickNodeFromQueue(Top, DAG->getTopRPTracker(), TopCand);
2754  assert(TopCand.Reason != NoCand && "failed to find a candidate");
2755  tracePick(TopCand, true);
2756  SU = TopCand.SU;
2757  }
2758  IsTopNode = true;
2759  }
2760  else if (RegionPolicy.OnlyBottomUp) {
2761  SU = Bot.pickOnlyChoice();
2762  if (!SU) {
2763  CandPolicy NoPolicy;
2764  SchedCandidate BotCand(NoPolicy);
2765  pickNodeFromQueue(Bot, DAG->getBotRPTracker(), BotCand);
2766  assert(BotCand.Reason != NoCand && "failed to find a candidate");
2767  tracePick(BotCand, false);
2768  SU = BotCand.SU;
2769  }
2770  IsTopNode = false;
2771  }
2772  else {
2773  SU = pickNodeBidirectional(IsTopNode);
2774  }
2775  } while (SU->isScheduled);
2776 
2777  if (SU->isTopReady())
2778  Top.removeReady(SU);
2779  if (SU->isBottomReady())
2780  Bot.removeReady(SU);
2781 
2782  DEBUG(dbgs() << "Scheduling SU(" << SU->NodeNum << ") " << *SU->getInstr());
2783  return SU;
2784 }
2785 
2786 void GenericScheduler::reschedulePhysRegCopies(SUnit *SU, bool isTop) {
2787 
2788  MachineBasicBlock::iterator InsertPos = SU->getInstr();
2789  if (!isTop)
2790  ++InsertPos;
2791  SmallVectorImpl<SDep> &Deps = isTop ? SU->Preds : SU->Succs;
2792 
2793  // Find already scheduled copies with a single physreg dependence and move
2794  // them just above the scheduled instruction.
2795  for (SmallVectorImpl<SDep>::iterator I = Deps.begin(), E = Deps.end();
2796  I != E; ++I) {
2797  if (I->getKind() != SDep::Data || !TRI->isPhysicalRegister(I->getReg()))
2798  continue;
2799  SUnit *DepSU = I->getSUnit();
2800  if (isTop ? DepSU->Succs.size() > 1 : DepSU->Preds.size() > 1)
2801  continue;
2802  MachineInstr *Copy = DepSU->getInstr();
2803  if (!Copy->isCopy())
2804  continue;
2805  DEBUG(dbgs() << " Rescheduling physreg copy ";
2806  I->getSUnit()->dump(DAG));
2807  DAG->moveInstruction(Copy, InsertPos);
2808  }
2809 }
2810 
2811 /// Update the scheduler's state after scheduling a node. This is the same node
2812 /// that was just returned by pickNode(). However, ScheduleDAGMI needs to update
2813 /// it's state based on the current cycle before MachineSchedStrategy does.
2814 ///
2815 /// FIXME: Eventually, we may bundle physreg copies rather than rescheduling
2816 /// them here. See comments in biasPhysRegCopy.
2817 void GenericScheduler::schedNode(SUnit *SU, bool IsTopNode) {
2818  if (IsTopNode) {
2819  SU->TopReadyCycle = std::max(SU->TopReadyCycle, Top.CurrCycle);
2820  Top.bumpNode(SU);
2821  if (SU->hasPhysRegUses)
2822  reschedulePhysRegCopies(SU, true);
2823  }
2824  else {
2825  SU->BotReadyCycle = std::max(SU->BotReadyCycle, Bot.CurrCycle);
2826  Bot.bumpNode(SU);
2827  if (SU->hasPhysRegDefs)
2828  reschedulePhysRegCopies(SU, false);
2829  }
2830 }
2831 
2832 /// Create the standard converging machine scheduler. This will be used as the
2833 /// default scheduler if the target does not set a default.
2835  ScheduleDAGMI *DAG = new ScheduleDAGMI(C, new GenericScheduler(C));
2836  // Register DAG post-processors.
2837  //
2838  // FIXME: extend the mutation API to allow earlier mutations to instantiate
2839  // data and pass it to later mutations. Have a single mutation that gathers
2840  // the interesting nodes in one pass.
2841  DAG->addMutation(new CopyConstrain(DAG->TII, DAG->TRI));
2842  if (EnableLoadCluster && DAG->TII->enableClusterLoads())
2843  DAG->addMutation(new LoadClusterMutation(DAG->TII, DAG->TRI));
2844  if (EnableMacroFusion)
2845  DAG->addMutation(new MacroFusion(DAG->TII));
2846  return DAG;
2847 }
2848 static MachineSchedRegistry
2849 GenericSchedRegistry("converge", "Standard converging scheduler.",
2851 
2852 //===----------------------------------------------------------------------===//
2853 // ILP Scheduler. Currently for experimental analysis of heuristics.
2854 //===----------------------------------------------------------------------===//
2855 
2856 namespace {
2857 /// \brief Order nodes by the ILP metric.
2858 struct ILPOrder {
2859  const SchedDFSResult *DFSResult;
2860  const BitVector *ScheduledTrees;
2861  bool MaximizeILP;
2862 
2863  ILPOrder(bool MaxILP): DFSResult(0), ScheduledTrees(0), MaximizeILP(MaxILP) {}
2864 
2865  /// \brief Apply a less-than relation on node priority.
2866  ///
2867  /// (Return true if A comes after B in the Q.)
2868  bool operator()(const SUnit *A, const SUnit *B) const {
2869  unsigned SchedTreeA = DFSResult->getSubtreeID(A);
2870  unsigned SchedTreeB = DFSResult->getSubtreeID(B);
2871  if (SchedTreeA != SchedTreeB) {
2872  // Unscheduled trees have lower priority.
2873  if (ScheduledTrees->test(SchedTreeA) != ScheduledTrees->test(SchedTreeB))
2874  return ScheduledTrees->test(SchedTreeB);
2875 
2876  // Trees with shallower connections have have lower priority.
2877  if (DFSResult->getSubtreeLevel(SchedTreeA)
2878  != DFSResult->getSubtreeLevel(SchedTreeB)) {
2879  return DFSResult->getSubtreeLevel(SchedTreeA)
2880  < DFSResult->getSubtreeLevel(SchedTreeB);
2881  }
2882  }
2883  if (MaximizeILP)
2884  return DFSResult->getILP(A) < DFSResult->getILP(B);
2885  else
2886  return DFSResult->getILP(A) > DFSResult->getILP(B);
2887  }
2888 };
2889 
2890 /// \brief Schedule based on the ILP metric.
2891 class ILPScheduler : public MachineSchedStrategy {
2892  ScheduleDAGMI *DAG;
2893  ILPOrder Cmp;
2894 
2895  std::vector<SUnit*> ReadyQ;
2896 public:
2897  ILPScheduler(bool MaximizeILP): DAG(0), Cmp(MaximizeILP) {}
2898 
2899  virtual void initialize(ScheduleDAGMI *dag) {
2900  DAG = dag;
2901  DAG->computeDFSResult();
2902  Cmp.DFSResult = DAG->getDFSResult();
2903  Cmp.ScheduledTrees = &DAG->getScheduledTrees();
2904  ReadyQ.clear();
2905  }
2906 
2907  virtual void registerRoots() {
2908  // Restore the heap in ReadyQ with the updated DFS results.
2909  std::make_heap(ReadyQ.begin(), ReadyQ.end(), Cmp);
2910  }
2911 
2912  /// Implement MachineSchedStrategy interface.
2913  /// -----------------------------------------
2914 
2915  /// Callback to select the highest priority node from the ready Q.
2916  virtual SUnit *pickNode(bool &IsTopNode) {
2917  if (ReadyQ.empty()) return NULL;
2918  std::pop_heap(ReadyQ.begin(), ReadyQ.end(), Cmp);
2919  SUnit *SU = ReadyQ.back();
2920  ReadyQ.pop_back();
2921  IsTopNode = false;
2922  DEBUG(dbgs() << "Pick node " << "SU(" << SU->NodeNum << ") "
2923  << " ILP: " << DAG->getDFSResult()->getILP(SU)
2924  << " Tree: " << DAG->getDFSResult()->getSubtreeID(SU) << " @"
2925  << DAG->getDFSResult()->getSubtreeLevel(
2926  DAG->getDFSResult()->getSubtreeID(SU)) << '\n'
2927  << "Scheduling " << *SU->getInstr());
2928  return SU;
2929  }
2930 
2931  /// \brief Scheduler callback to notify that a new subtree is scheduled.
2932  virtual void scheduleTree(unsigned SubtreeID) {
2933  std::make_heap(ReadyQ.begin(), ReadyQ.end(), Cmp);
2934  }
2935 
2936  /// Callback after a node is scheduled. Mark a newly scheduled tree, notify
2937  /// DFSResults, and resort the priority Q.
2938  virtual void schedNode(SUnit *SU, bool IsTopNode) {
2939  assert(!IsTopNode && "SchedDFSResult needs bottom-up");
2940  }
2941 
2942  virtual void releaseTopNode(SUnit *) { /*only called for top roots*/ }
2943 
2944  virtual void releaseBottomNode(SUnit *SU) {
2945  ReadyQ.push_back(SU);
2946  std::push_heap(ReadyQ.begin(), ReadyQ.end(), Cmp);
2947  }
2948 };
2949 } // namespace
2950 
2952  return new ScheduleDAGMI(C, new ILPScheduler(true));
2953 }
2955  return new ScheduleDAGMI(C, new ILPScheduler(false));
2956 }
2958  "ilpmax", "Schedule bottom-up for max ILP", createILPMaxScheduler);
2960  "ilpmin", "Schedule bottom-up for min ILP", createILPMinScheduler);
2961 
2962 //===----------------------------------------------------------------------===//
2963 // Machine Instruction Shuffler for Correctness Testing
2964 //===----------------------------------------------------------------------===//
2965 
2966 #ifndef NDEBUG
2967 namespace {
2968 /// Apply a less-than relation on the node order, which corresponds to the
2969 /// instruction order prior to scheduling. IsReverse implements greater-than.
2970 template<bool IsReverse>
2971 struct SUnitOrder {
2972  bool operator()(SUnit *A, SUnit *B) const {
2973  if (IsReverse)
2974  return A->NodeNum > B->NodeNum;
2975  else
2976  return A->NodeNum < B->NodeNum;
2977  }
2978 };
2979 
2980 /// Reorder instructions as much as possible.
2981 class InstructionShuffler : public MachineSchedStrategy {
2982  bool IsAlternating;
2983  bool IsTopDown;
2984 
2985  // Using a less-than relation (SUnitOrder<false>) for the TopQ priority
2986  // gives nodes with a higher number higher priority causing the latest
2987  // instructions to be scheduled first.
2988  PriorityQueue<SUnit*, std::vector<SUnit*>, SUnitOrder<false> >
2989  TopQ;
2990  // When scheduling bottom-up, use greater-than as the queue priority.
2991  PriorityQueue<SUnit*, std::vector<SUnit*>, SUnitOrder<true> >
2992  BottomQ;
2993 public:
2994  InstructionShuffler(bool alternate, bool topdown)
2995  : IsAlternating(alternate), IsTopDown(topdown) {}
2996 
2997  virtual void initialize(ScheduleDAGMI *) {
2998  TopQ.clear();
2999  BottomQ.clear();
3000  }
3001 
3002  /// Implement MachineSchedStrategy interface.
3003  /// -----------------------------------------
3004 
3005  virtual SUnit *pickNode(bool &IsTopNode) {
3006  SUnit *SU;
3007  if (IsTopDown) {
3008  do {
3009  if (TopQ.empty()) return NULL;
3010  SU = TopQ.top();
3011  TopQ.pop();
3012  } while (SU->isScheduled);
3013  IsTopNode = true;
3014  }
3015  else {
3016  do {
3017  if (BottomQ.empty()) return NULL;
3018  SU = BottomQ.top();
3019  BottomQ.pop();
3020  } while (SU->isScheduled);
3021  IsTopNode = false;
3022  }
3023  if (IsAlternating)
3024  IsTopDown = !IsTopDown;
3025  return SU;
3026  }
3027 
3028  virtual void schedNode(SUnit *SU, bool IsTopNode) {}
3029 
3030  virtual void releaseTopNode(SUnit *SU) {
3031  TopQ.push(SU);
3032  }
3033  virtual void releaseBottomNode(SUnit *SU) {
3034  BottomQ.push(SU);
3035  }
3036 };
3037 } // namespace
3038 
3040  bool Alternate = !ForceTopDown && !ForceBottomUp;
3041  bool TopDown = !ForceBottomUp;
3042  assert((TopDown || !ForceTopDown) &&
3043  "-misched-topdown incompatible with -misched-bottomup");
3044  return new ScheduleDAGMI(C, new InstructionShuffler(Alternate, TopDown));
3045 }
3047  "shuffle", "Shuffle machine instructions alternating directions",
3049 #endif // !NDEBUG
3050 
3051 //===----------------------------------------------------------------------===//
3052 // GraphWriter support for ScheduleDAGMI.
3053 //===----------------------------------------------------------------------===//
3054 
3055 #ifndef NDEBUG
3056 namespace llvm {
3057 
3058 template<> struct GraphTraits<
3060 
3061 template<>
3063 
3064  DOTGraphTraits (bool isSimple=false) : DefaultDOTGraphTraits(isSimple) {}
3065 
3066  static std::string getGraphName(const ScheduleDAG *G) {
3067  return G->MF.getName();
3068  }
3069 
3070  static bool renderGraphFromBottomUp() {
3071  return true;
3072  }
3073 
3074  static bool isNodeHidden(const SUnit *Node) {
3075  return (Node->Preds.size() > 10 || Node->Succs.size() > 10);
3076  }
3077 
3078  static bool hasNodeAddressLabel(const SUnit *Node,
3079  const ScheduleDAG *Graph) {
3080  return false;
3081  }
3082 
3083  /// If you want to override the dot attributes printed for a particular
3084  /// edge, override this method.
3085  static std::string getEdgeAttributes(const SUnit *Node,
3086  SUnitIterator EI,
3087  const ScheduleDAG *Graph) {
3088  if (EI.isArtificialDep())
3089  return "color=cyan,style=dashed";
3090  if (EI.isCtrlDep())
3091  return "color=blue,style=dashed";
3092  return "";
3093  }
3094 
3095  static std::string getNodeLabel(const SUnit *SU, const ScheduleDAG *G) {
3096  std::string Str;
3097  raw_string_ostream SS(Str);
3098  const SchedDFSResult *DFS =
3099  static_cast<const ScheduleDAGMI*>(G)->getDFSResult();
3100  SS << "SU:" << SU->NodeNum;
3101  if (DFS)
3102  SS << " I:" << DFS->getNumInstrs(SU);
3103  return SS.str();
3104  }
3105  static std::string getNodeDescription(const SUnit *SU, const ScheduleDAG *G) {
3106  return G->getGraphNodeLabel(SU);
3107  }
3108 
3109  static std::string getNodeAttributes(const SUnit *N,
3110  const ScheduleDAG *Graph) {
3111  std::string Str("shape=Mrecord");
3112  const SchedDFSResult *DFS =
3113  static_cast<const ScheduleDAGMI*>(Graph)->getDFSResult();
3114  if (DFS) {
3115  Str += ",style=filled,fillcolor=\"#";
3116  Str += DOT::getColorString(DFS->getSubtreeID(N));
3117  Str += '"';
3118  }
3119  return Str;
3120  }
3121 };
3122 } // namespace llvm
3123 #endif // NDEBUG
3124 
3125 /// viewGraph - Pop up a ghostview window with the reachable parts of the DAG
3126 /// rendered using 'dot'.
3127 ///
3128 void ScheduleDAGMI::viewGraph(const Twine &Name, const Twine &Title) {
3129 #ifndef NDEBUG
3130  ViewGraph(this, Name, false, Title);
3131 #else
3132  errs() << "ScheduleDAGMI::viewGraph is only available in debug builds on "
3133  << "systems with Graphviz or gv!\n";
3134 #endif // NDEBUG
3135 }
3136 
3137 /// Out-of-line implementation with no arguments is handy for gdb.
3139  viewGraph(getDAGName(), "Scheduling-Units Graph for " + getDAGName());
3140 }
void resize(unsigned N, bool t=false)
resize - Grow or shrink the bitvector.
Definition: BitVector.h:210
virtual ScheduleHazardRecognizer * CreateTargetMIHazardRecognizer(const InstrItineraryData *, const ScheduleDAG *DAG) const
void DeleteContainerPointers(Container &C)
Definition: STLExtras.h:315
void releaseSucc(SUnit *SU, SDep *SuccEdge)
static int biasPhysRegCopy(const SUnit *SU, bool isTop)
Weak DAG edge linking a chain of clustered instrs.
Definition: ScheduleDAG.h:70
static ScheduleDAGInstrs * createGenericSched(MachineSchedContext *C)
const_iterator end(StringRef path)
Get end iterator over path.
Definition: Path.cpp:181
BitVector & set()
Definition: BitVector.h:236
virtual void finishBlock()
finishBlock - Clean up after scheduling in the given block.
virtual void initialize(ScheduleDAGMI *DAG)=0
Initialize the strategy after building the DAG for a new region.
bool advance()
Advance across the current instruction.
virtual const TargetLowering * getTargetLowering() const
AnalysisUsage & addPreserved()
static cl::opt< bool > ViewMISchedDAGs("view-misched-dags", cl::Hidden, cl::desc("Pop up a window to show MISched dags after they are processed"))
raw_ostream & errs()
void clear()
Clear the results.
Definition: ScheduleDFS.h:131
static PassRegistry * getPassRegistry()
Segments::iterator iterator
Definition: LiveInterval.h:191
bool isArtificialDep() const
Definition: ScheduleDAG.h:641
SlotIndex def
The index of the defining instruction.
Definition: LiveInterval.h:52
void addMutation(ScheduleDAGMutation *Mutation)
void addPressureChange(unsigned RegUnit, bool IsDec, const MachineRegisterInfo *MRI)
Add a change in pressure to the pressure diff of a given instruction.
virtual void releaseTopNode(SUnit *SU)=0
The main container class for the LLVM Intermediate Representation.
Definition: Module.h:112
static MachineSchedRegistry ILPMaxRegistry("ilpmax","Schedule bottom-up for max ILP", createILPMaxScheduler)
ScheduleDAGTopologicalSort Topo
void print(raw_ostream &OS, SlotIndexes *=0) const
unsigned getSubtreeLevel(unsigned SubtreeID) const
Get the connection level of a subtree.
Definition: ScheduleDFS.h:183
MachineBasicBlock::iterator CurrentTop
The top of the unscheduled zone.
unsigned getNumMicroOps(const MachineInstr *MI, const MCSchedClassDesc *SC=0) const
Return the number of issue slots required for this MI.
char & MachineDominatorsID
MachineDominators - This pass is a machine dominators analysis pass.
MachineInstr * getInstr() const
Definition: ScheduleDAG.h:386
iterator end() const
Definition: ArrayRef.h:98
SlotIndex getInstructionIndex(const MachineInstr *instr) const
Returns the base index of the given instruction.
static bool isVirtualRegister(unsigned Reg)
void buildDAGWithRegPressure()
Build the DAG and setup three register pressure trackers.
bool isLocal(SlotIndex Start, SlotIndex End) const
Definition: LiveInterval.h:424
MachineBasicBlock::iterator begin() const
begin - Return an iterator to the top of the current scheduling region.
unsigned getMicroOpBufferSize() const
Number of micro-ops that may be buffered for OOO execution.
Mutate the DAG as a postpass after normal DAG building.
const_iterator begin(StringRef path)
Get begin iterator over path.
Definition: Path.cpp:173
static ScheduleDAGInstrs * createILPMaxScheduler(MachineSchedContext *C)
virtual std::string getGraphNodeLabel(const SUnit *SU) const =0
const MCSchedClassDesc * getSchedClass(SUnit *SU) const
Resolve and cache a resolved scheduling class for an SUnit.
static ScheduleDAGInstrs * createInstructionShuffler(MachineSchedContext *C)
char & MachineSchedulerID
MachineScheduler - This pass schedules machine instructions.
void closeRegion()
Finalize the region boundaries and recored live ins and live outs.
unsigned getResourceFactor(unsigned ResIdx) const
Multiply the number of units consumed for a resource by this factor to normalize it relative to other...
virtual bool shouldTrackPressure() const
virtual std::string getDAGName() const
Return a label for the region of code covered by the DAG.
MachineBasicBlock::iterator top() const
Instructions::iterator instr_iterator
static std::string getNodeLabel(const SUnit *SU, const ScheduleDAG *G)
unsigned BotReadyCycle
Definition: ScheduleDAG.h:304
bool isArtificial() const
Definition: ScheduleDAG.h:204
void updateQueues(SUnit *SU, bool IsTopNode)
Update scheduler DAG and queues after scheduling an instruction.
virtual void schedNode(SUnit *SU, bool IsTopNode)=0
SmallVector< SDep, 4 > Preds
Definition: ScheduleDAG.h:263
virtual void startBlock(MachineBasicBlock *BB)
startBlock - Prepare to perform scheduling in the given block.
LoopInfoBase< BlockT, LoopT > * LI
Definition: LoopInfoImpl.h:411
MachineBasicBlock::const_iterator getPos() const
Get the MI position corresponding to this register pressure.
const RegPressureTracker & getTopRPTracker() const
A register anti-dependedence (aka WAR).
Definition: ScheduleDAG.h:50
static std::string getNodeAttributes(const SUnit *N, const ScheduleDAG *Graph)
MachineFunction & MF
Definition: ScheduleDAG.h:543
const TargetSchedModel * getSchedModel() const
Get the machine model for instruction scheduling.
bool isScheduled
Definition: ScheduleDAG.h:291
static const SCEV * apply(const SCEV *Scev, LoopToScevMapT &Map, ScalarEvolution &SE)
Applies the Map (Loop -> SCEV) to the given Scev.
unsigned getNumSubtrees() const
The number of subtrees detected in this DAG.
Definition: ScheduleDFS.h:166
unsigned getHeight() const
Definition: ScheduleDAG.h:411
AnalysisUsage & addRequired()
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:167
void clear()
clear - Clear all bits.
Definition: BitVector.h:205
void viewGraph() LLVM_OVERRIDE
Out-of-line implementation with no arguments is handy for gdb.
MachineBasicBlock::iterator RegionEnd
The end of the range to be scheduled.
BitVector & getScheduledTrees()
void closeBottom()
Set the boundary for the bottom of the region and summarize live outs.
unsigned NumSuccsLeft
Definition: ScheduleDAG.h:276
bool isWeak() const
Definition: ScheduleDAG.h:198
Provide an instruction scheduling machine model to CodeGen passes.
const HexagonInstrInfo * TII
static std::string getEdgeAttributes(const SUnit *Node, SUnitIterator EI, const ScheduleDAG *Graph)
static MachineSchedRegistry DefaultSchedRegistry("default","Use the target's default scheduler choice.", useDefaultMachineSched)
static cl::opt< MachineSchedRegistry::ScheduleDAGCtor, false, RegisterPassParser< MachineSchedRegistry > > MachineSchedOpt("misched", cl::init(&useDefaultMachineSched), cl::Hidden, cl::desc("Machine instruction scheduler to use"))
MachineSchedOpt allows command line selection of the scheduler.
#define llvm_unreachable(msg)
virtual const char * getRegPressureSetName(unsigned Idx) const =0
Get the name of this register unit pressure set.
iterator end()
Definition: LiveInterval.h:193
unsigned getPSet() const
static unsigned getWeakLeft(const SUnit *SU, bool isTop)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:172
Regular data dependence (aka true-dependence).
Definition: ScheduleDAG.h:49
bool mayLoad(QueryType Type=AnyInBundle) const
Definition: MachineInstr.h:465
bool hasPhysRegUses
Definition: ScheduleDAG.h:286
void enterRegion(MachineBasicBlock *bb, MachineBasicBlock::iterator begin, MachineBasicBlock::iterator end, unsigned regioninstrs) LLVM_OVERRIDE
bool hasPhysRegDefs
Definition: ScheduleDAG.h:287
static void tracePick(const GenericScheduler::SchedCandidate &Cand, bool IsTop)
ID
LLVM Calling Convention Representation.
Definition: CallingConv.h:26
#define G(x, y, z)
Definition: MD5.cpp:52
static MachineSchedRegistry ShufflerRegistry("shuffle","Shuffle machine instructions alternating directions", createInstructionShuffler)
bool recede(SmallVectorImpl< unsigned > *LiveUses=0, PressureDiff *PDiff=0)
Recede across the previous instruction.
Compute the values of each DAG node for various metrics during DFS.
Definition: ScheduleDFS.h:68
unsigned TopReadyCycle
Definition: ScheduleDAG.h:303
static cl::opt< bool > VerifyScheduling("verify-misched", cl::Hidden, cl::desc("Verify machine instrs before and after machine scheduling"))
static MachineBasicBlock::const_iterator priorNonDebug(MachineBasicBlock::const_iterator I, MachineBasicBlock::const_iterator Beg)
Decrement this iterator until reaching the top or a non-debug instr.
MachineBasicBlock::iterator RegionBegin
The beginning of the range to be scheduled.
reverse_iterator rbegin() const
Definition: ArrayRef.h:100
unsigned WeakSuccsLeft
Definition: ScheduleDAG.h:278
static std::string getGraphName(const ScheduleDAG *G)
unsigned NumPredsLeft
Definition: ScheduleDAG.h:275
virtual void initPolicy(MachineBasicBlock::iterator Begin, MachineBasicBlock::iterator End, unsigned NumRegionInstrs)
Optionally override the per-region scheduling policy.
COFF::MachineTypes Machine
Definition: COFFYAML.cpp:211
bool isTrackingPressure() const
Return true if register pressure tracking is enabled.
virtual void enterRegion(MachineBasicBlock *bb, MachineBasicBlock::iterator begin, MachineBasicBlock::iterator end, unsigned regioninstrs)
Initialize the scheduler state for the next scheduling region.
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:109
virtual bool enableClusterLoads() const
static std::string getNodeDescription(const SUnit *SU, const ScheduleDAG *G)
static cl::opt< bool > EnableLoadCluster("misched-cluster", cl::Hidden, cl::desc("Enable load clustering."), cl::init(true))
void resize(unsigned NumSUnits)
Initialize the result data with the size of the DAG.
Definition: ScheduleDFS.h:139
std::vector< SUnit * >::iterator iterator
static bool tryLess(int TryVal, int CandVal, GenericScheduler::SchedCandidate &TryCand, GenericScheduler::SchedCandidate &Cand, GenericScheduler::CandReason Reason)
Return true if this heuristic determines order.
MachineSchedStrategy * SchedImpl
reverse_iterator rend() const
Definition: ArrayRef.h:101
RegisterClassInfo * RegClassInfo
bundle_iterator< MachineInstr, instr_iterator > iterator
void getMaxDownwardPressureDelta(const MachineInstr *MI, RegPressureDelta &Delta, ArrayRef< PressureChange > CriticalPSets, ArrayRef< unsigned > MaxPressureLimit)
virtual void overrideSchedPolicy(MachineSchedPolicy &Policy, MachineInstr *begin, MachineInstr *end, unsigned NumRegionInstrs) const
Override generic scheduling policy within a region.
#define P(N)
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:314
std::vector< ScheduleDAGMutation * > Mutations
Ordered list of DAG postprocessing steps.
LiveQueryResult Query(SlotIndex Idx) const
Definition: LiveInterval.h:441
ArrayRef< unsigned > getLiveThru() const
void updatePressureDiffs(ArrayRef< unsigned > LiveUses)
const InstrItineraryData * getInstrItineraries() const
unsigned short Latency
Definition: ScheduleDAG.h:280
static const unsigned MinSubtreeSize
static void initialize(TargetLibraryInfo &TLI, const Triple &T, const char **StandardNames)
void dumpSchedule() const
dump the scheduled Sequence.
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:267
unsigned getLatencyFactor() const
Multiply cycle count by this factor to normalize it relative to other resources. This is the number o...
PressureDiff & getPressureDiff(const SUnit *SU)
bool addEdge(SUnit *SuccSU, const SDep &PredDep)
Add a DAG edge to the given SU with the given predecessor dependence data.
bool isCopy() const
Definition: MachineInstr.h:669
ItTy next(ItTy it, Dist n)
Definition: STLExtras.h:154
static MachinePassRegistry Registry
void releasePred(SUnit *SU, SDep *PredEdge)
iterator begin() const
Definition: ArrayRef.h:97
virtual void releaseBottomNode(SUnit *SU)=0
void findRootsAndBiasEdges(SmallVectorImpl< SUnit * > &TopRoots, SmallVectorImpl< SUnit * > &BotRoots)
SmallVector< unsigned, 8 > LiveOutRegs
static ScheduleDAGInstrs * useDefaultMachineSched(MachineSchedContext *C)
virtual void exitRegion()
Notify that the scheduler has finished scheduling the current region.
void updateScheduledPressure(const SUnit *SU, const std::vector< unsigned > &NewMaxPressure)
unsigned getRegPressureSetLimit(unsigned Idx) const
void ViewGraph(const GraphType &G, const Twine &Name, bool ShortNames=false, const Twine &Title="", GraphProgram::Name Program=GraphProgram::DOT)
Definition: GraphWriter.h:346
void dumpRegSetPressure(ArrayRef< unsigned > SetPressure, const TargetRegisterInfo *TRI)
Arbitrary weak DAG edge.
Definition: ScheduleDAG.h:69
unsigned getNumProcResourceKinds() const
Get the number of kinds of resources for this target.
static MachineBasicBlock::const_iterator nextIfDebug(MachineBasicBlock::const_iterator I, MachineBasicBlock::const_iterator End)
#define INITIALIZE_AG_DEPENDENCY(depName)
Definition: PassSupport.h:169
LiveIntervals * getLIS() const
Expose LiveIntervals for use in DAG mutators and such.
static cl::opt< bool > EnableMacroFusion("misched-fusion", cl::Hidden, cl::desc("Enable scheduling for macro fusion."), cl::init(true))
bool isPHIDef() const
Definition: LiveInterval.h:73
Machine Instruction false
void releaseSuccessors(SUnit *SU)
releaseSuccessors - Call releaseSucc on each of SU's successors.
const SUnit * NextClusterSucc
std::string & str()
Definition: raw_ostream.h:441
void initLiveThru(const RegPressureTracker &RPTracker)
void scheduleMI(SUnit *SU, bool IsTopNode)
Move an instruction and update register pressure.
static cl::opt< bool > EnableRegPressure("misched-regpressure", cl::Hidden, cl::desc("Enable register pressure scheduling."), cl::init(true))
ProcResIter getWriteProcResEnd(const MCSchedClassDesc *SC) const
iterator find(const KeyT &Key)
MachineBasicBlock::iterator LiveRegionEnd
iterator find(SlotIndex Pos)
unsigned WeakPredsLeft
Definition: ScheduleDAG.h:277
bool hasInstrSchedModel() const
Return true if this machine model includes an instruction-level scheduling model. ...
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:153
std::vector< unsigned > MaxSetPressure
Map of max reg pressure indexed by pressure set ID, not class ID.
virtual const TargetInstrInfo * getInstrInfo() const
static MachineSchedRegistry GenericSchedRegistry("converge","Standard converging scheduler.", createGenericSched)
const STC & getSubtarget() const
const SUnit * NextClusterPred
Record the next node in a scheduled cluster.
const MCProcResourceDesc * getProcResource(unsigned PIdx) const
Get a processor resource by ID for convenience.
static bool isSameInstr(SlotIndex A, SlotIndex B)
isSameInstr - Return true if A and B refer to the same instruction.
Definition: SlotIndexes.h:206
void compute(ArrayRef< SUnit > SUnits)
Compute various metrics for the DAG with given roots.
AnalysisUsage & addRequiredID(const void *ID)
Definition: Pass.cpp:262
ScheduleDAGInstrs *(* ScheduleDAGCtor)(MachineSchedContext *)
unsigned computeCyclicCriticalPath()
Compute the cyclic critical path through the DAG.
static cl::opt< unsigned > MISchedCutoff("misched-cutoff", cl::Hidden, cl::desc("Stop scheduling after N instructions"), cl::init(~0U))
static ScheduleDAGInstrs * createILPMinScheduler(MachineSchedContext *C)
bool test(unsigned Idx) const
Definition: BitVector.h:337
bool isCtrlDep() const
isCtrlDep - Test if this is not an SDep::Data dependence.
Definition: ScheduleDAG.h:638
std::reverse_iterator< const_iterator > const_reverse_iterator
Definition: SmallVector.h:103
bool isSuccessor(const MachineBasicBlock *MBB) const
const std::vector< PressureChange > & getRegionCriticalPSets() const
StringRef getColorString(unsigned NodeNumber)
Get a color string for this node number. Simply round-robin selects from a reasonable number of color...
Definition: GraphWriter.cpp:59
void setPreservesCFG()
Definition: Pass.cpp:249
LiveInterval & getInterval(unsigned Reg)
PressureDiffs SUPressureDiffs
static cl::opt< bool > EnableCyclicPath("misched-cyclicpath", cl::Hidden, cl::desc("Enable cyclic critical path analysis."), cl::init(true))
void initializeMachineSchedulerPass(PassRegistry &)
raw_ostream & dbgs()
dbgs - Return a circular-buffered debug stream.
Definition: Debug.cpp:101
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:591
virtual void finalizeSchedule()
bool isTopReady() const
Definition: ScheduleDAG.h:453
bool IsReachable(const SUnit *SU, const SUnit *TargetSU)
IsReachable - Checks if SU is reachable from TargetSU.
void biasCriticalPath()
Order this node's predecessor edges such that the critical path edge occurs first.
virtual void schedule()=0
SUnit * getSUnit() const
Definition: ScheduleDAG.h:160
ILPValue getILP(const SUnit *SU) const
Get the ILP value for a DAG node.
Definition: ScheduleDFS.h:161
virtual const TargetRegisterClass * getRegClassFor(MVT VT) const
MachineBasicBlock::iterator bottom() const
void moveInstruction(MachineInstr *MI, MachineBasicBlock::iterator InsertPos)
MachineBasicBlock::iterator end() const
end - Return an iterator to the bottom of the current scheduling region.
static bool tryGreater(int TryVal, int CandVal, GenericScheduler::SchedCandidate &TryCand, GenericScheduler::SchedCandidate &Cand, GenericScheduler::CandReason Reason)
bool isBoundaryNode() const
Boundary nodes are placeholders for the boundary of the scheduling region.
Definition: ScheduleDAG.h:357
const IntervalPressure & getRegPressure() const
Get register pressure for the entire scheduling region before scheduling.
unsigned getDepth() const
Definition: ScheduleDAG.h:403
const SUnit * getNextClusterSucc() const
virtual void scheduleTree(unsigned SubtreeID)
Scheduler callback to notify that a new subtree is scheduled.
void init(const MachineFunction *mf, const RegisterClassInfo *rci, const LiveIntervals *lis, const MachineBasicBlock *mbb, MachineBasicBlock::const_iterator pos, bool ShouldTrackUntiedDefs=false)
void closeTop()
Set the boundary for the top of the region and summarize live ins.
RegPressureTracker BotRPTracker
void releasePredecessors(SUnit *SU)
releasePredecessors - Call releasePred on each of SU's predecessors.
unsigned getSubtreeID(const SUnit *SU) const
Get the ID of the subtree the given DAG node belongs to.
Definition: ScheduleDFS.h:172
SlotIndex beginIndex() const
beginIndex - Return the lowest numbered slot covered.
Definition: LiveInterval.h:314
bool isCluster() const
Definition: ScheduleDAG.h:210
SUnit * getSUnit(MachineInstr *MI) const
getSUnit - Return an existing SUnit for this MI, or NULL.
virtual SUnit * pickNode(bool &IsTopNode)=0
bool operator!=(uint64_t V1, const APInt &V2)
Definition: APInt.h:1686
bundle_iterator< const MachineInstr, const_instr_iterator > const_iterator
RegPressureTracker RPTracker
static bool isPhysicalRegister(unsigned Reg)
const TargetRegisterInfo * TRI
Definition: ScheduleDAG.h:542
INITIALIZE_PASS_BEGIN(MachineScheduler,"misched","Machine Instruction Scheduler", false, false) INITIALIZE_PASS_END(MachineScheduler
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
cl::opt< bool > ForceBottomUp
virtual void getAnalysisUsage(AnalysisUsage &AU) const
SlotIndex endIndex() const
Definition: LiveInterval.h:321
unsigned getNumInstrs(const SUnit *SU) const
Get the number of instructions in the given subtree and its children.
Definition: ScheduleDFS.h:148
#define I(x, y, z)
Definition: MD5.cpp:54
#define N
void resize(unsigned N)
Definition: SmallVector.h:401
static bool tryPressure(const PressureChange &TryP, const PressureChange &CandP, GenericScheduler::SchedCandidate &TryCand, GenericScheduler::SchedCandidate &Cand, GenericScheduler::CandReason Reason)
void getMaxUpwardPressureDelta(const MachineInstr *MI, PressureDiff *PDiff, RegPressureDelta &Delta, ArrayRef< PressureChange > CriticalPSets, ArrayRef< unsigned > MaxPressureLimit)
const TargetMachine & getTarget() const
static bool hasNodeAddressLabel(const SUnit *Node, const ScheduleDAG *Graph)
void placeDebugValues()
Reinsert debug_values recorded in ScheduleDAGInstrs::DbgValues.
RegisterClassInfo * RegClassInfo
static bool tryLatency(GenericScheduler::SchedCandidate &TryCand, GenericScheduler::SchedCandidate &Cand, GenericScheduler::SchedBoundary &Zone)
SchedDFSResult * DFSResult
const TargetInstrInfo * TII
Definition: ScheduleDAG.h:541
static bool isNodeHidden(const SUnit *Node)
std::vector< PressureChange > RegionCriticalPSets
unsigned NodeNum
Definition: ScheduleDAG.h:271
unsigned getPSetOrMax() const
iterator begin()
Definition: LiveInterval.h:192
unsigned getReg() const
getReg - Returns the register number.
MachineBasicBlock::iterator CurrentBottom
The bottom of the unscheduled zone.
void addLiveRegs(ArrayRef< unsigned > Regs)
Force liveness of registers.
bool addPred(const SDep &D, bool Required=true)
Definition: ScheduleDAG.cpp:65
void initQueues(ArrayRef< SUnit * > TopRoots, ArrayRef< SUnit * > BotRoots)
Release ExitSU predecessors and setup scheduler queues.
unsigned getMicroOpFactor() const
Multiply number of micro-ops by this factor to normalize it relative to other resources.
void postprocessDAG()
Apply each ScheduleDAGMutation step in order.
bool ShouldTrackPressure
Register pressure in this region computed by initRegPressure.
SmallVector< SDep, 4 > Succs
Definition: ScheduleDAG.h:264
unsigned getIssueWidth() const
Maximum number of micro-ops that may be scheduled per cycle.
MachineInstr * getInstructionFromIndex(SlotIndex index) const
Returns the instruction associated with the given index.
RegPressureTracker TopRPTracker
bool isBottomReady() const
Definition: ScheduleDAG.h:456
SmallVector< unsigned, 8 > LiveInRegs
List of live in virtual registers or physical register units.
static MachineSchedRegistry ILPMinRegistry("ilpmin","Schedule bottom-up for min ILP", createILPMinScheduler)
Arbitrary strong DAG edge (no real dependence).
Definition: ScheduleDAG.h:68
void scheduleTree(unsigned SubtreeID)
Scheduler callback to update SubtreeConnectLevels when a tree is initially scheduled.
BasicBlockListType::iterator iterator
const SUnit * getNextClusterPred() const
ItTy prior(ItTy it, Dist n)
Definition: STLExtras.h:167
#define DEBUG(X)
Definition: Debug.h:97
void AddPred(SUnit *Y, SUnit *X)
Machine Instruction Scheduler
MachineBasicBlock * BB
The block in which to insert instructions.
bool canAddEdge(SUnit *SuccSU, SUnit *PredSU)
True if an edge can be added from PredSU to SuccSU without creating a cycle.
const RegPressureTracker & getBotRPTracker() const
void setPos(MachineBasicBlock::const_iterator Pos)
bool operator==(uint64_t V1, const APInt &V2)
Definition: APInt.h:1684
StringRef getName() const
VNInfo * valueIn() const
Definition: LiveInterval.h:100
RegisterPressure & getPressure()
MachineRegisterInfo & MRI
Definition: ScheduleDAG.h:544
std::vector< SUnit > SUnits
Definition: ScheduleDAG.h:545
ProcResIter getWriteProcResBegin(const MCSchedClassDesc *SC) const
SlotIndex - An opaque wrapper around machine indexes.
Definition: SlotIndexes.h:92
void buildSchedGraph(AliasAnalysis *AA, RegPressureTracker *RPTracker=0, PressureDiffs *PDiffs=0)
const SchedDFSResult * getDFSResult() const
Return a non-null DFS result if the scheduling strategy initialized it.
void dump(const ScheduleDAG *G) const
void getUpwardPressureDelta(const MachineInstr *MI, PressureDiff &PDiff, RegPressureDelta &Delta, ArrayRef< PressureChange > CriticalPSets, ArrayRef< unsigned > MaxPressureLimit) const
VNInfo * getVNInfoBefore(SlotIndex Idx) const
Definition: LiveInterval.h:358
SUnit - Scheduling unit. This is a node in the scheduling DAG.
Definition: ScheduleDAG.h:249
cl::opt< bool > ForceTopDown