LLVM API Documentation

 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
ScheduleDAGRRList.cpp
Go to the documentation of this file.
1 //===----- ScheduleDAGRRList.cpp - Reg pressure reduction list scheduler --===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This implements bottom-up and top-down register pressure reduction list
11 // schedulers, using standard algorithms. The basic approach uses a priority
12 // queue of available nodes to schedule. One at a time, nodes are taken from
13 // the priority queue (thus in priority order), checked for legality to
14 // schedule, and emitted if legal.
15 //
16 //===----------------------------------------------------------------------===//
17 
18 #define DEBUG_TYPE "pre-RA-sched"
20 #include "ScheduleDAGSDNodes.h"
21 #include "llvm/ADT/STLExtras.h"
22 #include "llvm/ADT/SmallSet.h"
23 #include "llvm/ADT/Statistic.h"
27 #include "llvm/IR/DataLayout.h"
28 #include "llvm/IR/InlineAsm.h"
29 #include "llvm/Support/Debug.h"
36 #include <climits>
37 using namespace llvm;
38 
39 STATISTIC(NumBacktracks, "Number of times scheduler backtracked");
40 STATISTIC(NumUnfolds, "Number of nodes unfolded");
41 STATISTIC(NumDups, "Number of duplicated nodes");
42 STATISTIC(NumPRCopies, "Number of physical register copies");
43 
44 static RegisterScheduler
45  burrListDAGScheduler("list-burr",
46  "Bottom-up register reduction list scheduling",
48 static RegisterScheduler
49  sourceListDAGScheduler("source",
50  "Similar to list-burr but schedules in source "
51  "order when possible",
53 
54 static RegisterScheduler
55  hybridListDAGScheduler("list-hybrid",
56  "Bottom-up register pressure aware list scheduling "
57  "which tries to balance latency and register pressure",
59 
60 static RegisterScheduler
61  ILPListDAGScheduler("list-ilp",
62  "Bottom-up register pressure aware list scheduling "
63  "which tries to balance ILP and register pressure",
65 
67  "disable-sched-cycles", cl::Hidden, cl::init(false),
68  cl::desc("Disable cycle-level precision during preRA scheduling"));
69 
70 // Temporary sched=list-ilp flags until the heuristics are robust.
71 // Some options are also available under sched=list-hybrid.
73  "disable-sched-reg-pressure", cl::Hidden, cl::init(false),
74  cl::desc("Disable regpressure priority in sched=list-ilp"));
76  "disable-sched-live-uses", cl::Hidden, cl::init(true),
77  cl::desc("Disable live use priority in sched=list-ilp"));
79  "disable-sched-vrcycle", cl::Hidden, cl::init(false),
80  cl::desc("Disable virtual register cycle interference checks"));
82  "disable-sched-physreg-join", cl::Hidden, cl::init(false),
83  cl::desc("Disable physreg def-use affinity"));
85  "disable-sched-stalls", cl::Hidden, cl::init(true),
86  cl::desc("Disable no-stall priority in sched=list-ilp"));
88  "disable-sched-critical-path", cl::Hidden, cl::init(false),
89  cl::desc("Disable critical path priority in sched=list-ilp"));
91  "disable-sched-height", cl::Hidden, cl::init(false),
92  cl::desc("Disable scheduled-height priority in sched=list-ilp"));
94  "disable-2addr-hack", cl::Hidden, cl::init(true),
95  cl::desc("Disable scheduler's two-address hack"));
96 
98  "max-sched-reorder", cl::Hidden, cl::init(6),
99  cl::desc("Number of instructions to allow ahead of the critical path "
100  "in sched=list-ilp"));
101 
103  "sched-avg-ipc", cl::Hidden, cl::init(1),
104  cl::desc("Average inst/cycle whan no target itinerary exists."));
105 
106 namespace {
107 //===----------------------------------------------------------------------===//
108 /// ScheduleDAGRRList - The actual register reduction list scheduler
109 /// implementation. This supports both top-down and bottom-up scheduling.
110 ///
111 class ScheduleDAGRRList : public ScheduleDAGSDNodes {
112 private:
113  /// NeedLatency - True if the scheduler will make use of latency information.
114  ///
115  bool NeedLatency;
116 
117  /// AvailableQueue - The priority queue to use for the available SUnits.
118  SchedulingPriorityQueue *AvailableQueue;
119 
120  /// PendingQueue - This contains all of the instructions whose operands have
121  /// been issued, but their results are not ready yet (due to the latency of
122  /// the operation). Once the operands becomes available, the instruction is
123  /// added to the AvailableQueue.
124  std::vector<SUnit*> PendingQueue;
125 
126  /// HazardRec - The hazard recognizer to use.
127  ScheduleHazardRecognizer *HazardRec;
128 
129  /// CurCycle - The current scheduler state corresponds to this cycle.
130  unsigned CurCycle;
131 
132  /// MinAvailableCycle - Cycle of the soonest available instruction.
133  unsigned MinAvailableCycle;
134 
135  /// IssueCount - Count instructions issued in this cycle
136  /// Currently valid only for bottom-up scheduling.
137  unsigned IssueCount;
138 
139  /// LiveRegDefs - A set of physical registers and their definition
140  /// that are "live". These nodes must be scheduled before any other nodes that
141  /// modifies the registers can be scheduled.
142  unsigned NumLiveRegs;
143  std::vector<SUnit*> LiveRegDefs;
144  std::vector<SUnit*> LiveRegGens;
145 
146  // Collect interferences between physical register use/defs.
147  // Each interference is an SUnit and set of physical registers.
148  SmallVector<SUnit*, 4> Interferences;
149  typedef DenseMap<SUnit*, SmallVector<unsigned, 4> > LRegsMapT;
150  LRegsMapT LRegsMap;
151 
152  /// Topo - A topological ordering for SUnits which permits fast IsReachable
153  /// and similar queries.
155 
156  // Hack to keep track of the inverse of FindCallSeqStart without more crazy
157  // DAG crawling.
158  DenseMap<SUnit*, SUnit*> CallSeqEndForStart;
159 
160 public:
161  ScheduleDAGRRList(MachineFunction &mf, bool needlatency,
162  SchedulingPriorityQueue *availqueue,
163  CodeGenOpt::Level OptLevel)
164  : ScheduleDAGSDNodes(mf),
165  NeedLatency(needlatency), AvailableQueue(availqueue), CurCycle(0),
166  Topo(SUnits, NULL) {
167 
168  const TargetMachine &tm = mf.getTarget();
169  if (DisableSchedCycles || !NeedLatency)
170  HazardRec = new ScheduleHazardRecognizer();
171  else
172  HazardRec = tm.getInstrInfo()->CreateTargetHazardRecognizer(&tm, this);
173  }
174 
175  ~ScheduleDAGRRList() {
176  delete HazardRec;
177  delete AvailableQueue;
178  }
179 
180  void Schedule();
181 
182  ScheduleHazardRecognizer *getHazardRec() { return HazardRec; }
183 
184  /// IsReachable - Checks if SU is reachable from TargetSU.
185  bool IsReachable(const SUnit *SU, const SUnit *TargetSU) {
186  return Topo.IsReachable(SU, TargetSU);
187  }
188 
189  /// WillCreateCycle - Returns true if adding an edge from SU to TargetSU will
190  /// create a cycle.
191  bool WillCreateCycle(SUnit *SU, SUnit *TargetSU) {
192  return Topo.WillCreateCycle(SU, TargetSU);
193  }
194 
195  /// AddPred - adds a predecessor edge to SUnit SU.
196  /// This returns true if this is a new predecessor.
197  /// Updates the topological ordering if required.
198  void AddPred(SUnit *SU, const SDep &D) {
199  Topo.AddPred(SU, D.getSUnit());
200  SU->addPred(D);
201  }
202 
203  /// RemovePred - removes a predecessor edge from SUnit SU.
204  /// This returns true if an edge was removed.
205  /// Updates the topological ordering if required.
206  void RemovePred(SUnit *SU, const SDep &D) {
207  Topo.RemovePred(SU, D.getSUnit());
208  SU->removePred(D);
209  }
210 
211 private:
212  bool isReady(SUnit *SU) {
213  return DisableSchedCycles || !AvailableQueue->hasReadyFilter() ||
214  AvailableQueue->isReady(SU);
215  }
216 
217  void ReleasePred(SUnit *SU, const SDep *PredEdge);
218  void ReleasePredecessors(SUnit *SU);
219  void ReleasePending();
220  void AdvanceToCycle(unsigned NextCycle);
221  void AdvancePastStalls(SUnit *SU);
222  void EmitNode(SUnit *SU);
223  void ScheduleNodeBottomUp(SUnit*);
224  void CapturePred(SDep *PredEdge);
225  void UnscheduleNodeBottomUp(SUnit*);
226  void RestoreHazardCheckerBottomUp();
227  void BacktrackBottomUp(SUnit*, SUnit*);
228  SUnit *CopyAndMoveSuccessors(SUnit*);
229  void InsertCopiesAndMoveSuccs(SUnit*, unsigned,
230  const TargetRegisterClass*,
231  const TargetRegisterClass*,
233  bool DelayForLiveRegsBottomUp(SUnit*, SmallVectorImpl<unsigned>&);
234 
235  void releaseInterferences(unsigned Reg = 0);
236 
237  SUnit *PickNodeToScheduleBottomUp();
238  void ListScheduleBottomUp();
239 
240  /// CreateNewSUnit - Creates a new SUnit and returns a pointer to it.
241  /// Updates the topological ordering if required.
242  SUnit *CreateNewSUnit(SDNode *N) {
243  unsigned NumSUnits = SUnits.size();
244  SUnit *NewNode = newSUnit(N);
245  // Update the topological ordering.
246  if (NewNode->NodeNum >= NumSUnits)
247  Topo.InitDAGTopologicalSorting();
248  return NewNode;
249  }
250 
251  /// CreateClone - Creates a new SUnit from an existing one.
252  /// Updates the topological ordering if required.
253  SUnit *CreateClone(SUnit *N) {
254  unsigned NumSUnits = SUnits.size();
255  SUnit *NewNode = Clone(N);
256  // Update the topological ordering.
257  if (NewNode->NodeNum >= NumSUnits)
258  Topo.InitDAGTopologicalSorting();
259  return NewNode;
260  }
261 
262  /// forceUnitLatencies - Register-pressure-reducing scheduling doesn't
263  /// need actual latency information but the hybrid scheduler does.
264  bool forceUnitLatencies() const {
265  return !NeedLatency;
266  }
267 };
268 } // end anonymous namespace
269 
270 /// GetCostForDef - Looks up the register class and cost for a given definition.
271 /// Typically this just means looking up the representative register class,
272 /// but for untyped values (MVT::Untyped) it means inspecting the node's
273 /// opcode to determine what register class is being generated.
274 static void GetCostForDef(const ScheduleDAGSDNodes::RegDefIter &RegDefPos,
275  const TargetLowering *TLI,
276  const TargetInstrInfo *TII,
277  const TargetRegisterInfo *TRI,
278  unsigned &RegClass, unsigned &Cost,
279  const MachineFunction &MF) {
280  MVT VT = RegDefPos.GetValue();
281 
282  // Special handling for untyped values. These values can only come from
283  // the expansion of custom DAG-to-DAG patterns.
284  if (VT == MVT::Untyped) {
285  const SDNode *Node = RegDefPos.GetNode();
286 
287  // Special handling for CopyFromReg of untyped values.
288  if (!Node->isMachineOpcode() && Node->getOpcode() == ISD::CopyFromReg) {
289  unsigned Reg = cast<RegisterSDNode>(Node->getOperand(1))->getReg();
290  const TargetRegisterClass *RC = MF.getRegInfo().getRegClass(Reg);
291  RegClass = RC->getID();
292  Cost = 1;
293  return;
294  }
295 
296  unsigned Opcode = Node->getMachineOpcode();
297  if (Opcode == TargetOpcode::REG_SEQUENCE) {
298  unsigned DstRCIdx = cast<ConstantSDNode>(Node->getOperand(0))->getZExtValue();
299  const TargetRegisterClass *RC = TRI->getRegClass(DstRCIdx);
300  RegClass = RC->getID();
301  Cost = 1;
302  return;
303  }
304 
305  unsigned Idx = RegDefPos.GetIdx();
306  const MCInstrDesc Desc = TII->get(Opcode);
307  const TargetRegisterClass *RC = TII->getRegClass(Desc, Idx, TRI, MF);
308  RegClass = RC->getID();
309  // FIXME: Cost arbitrarily set to 1 because there doesn't seem to be a
310  // better way to determine it.
311  Cost = 1;
312  } else {
313  RegClass = TLI->getRepRegClassFor(VT)->getID();
314  Cost = TLI->getRepRegClassCostFor(VT);
315  }
316 }
317 
318 /// Schedule - Schedule the DAG using list scheduling.
319 void ScheduleDAGRRList::Schedule() {
320  DEBUG(dbgs()
321  << "********** List Scheduling BB#" << BB->getNumber()
322  << " '" << BB->getName() << "' **********\n");
323 
324  CurCycle = 0;
325  IssueCount = 0;
326  MinAvailableCycle = DisableSchedCycles ? 0 : UINT_MAX;
327  NumLiveRegs = 0;
328  // Allocate slots for each physical register, plus one for a special register
329  // to track the virtual resource of a calling sequence.
330  LiveRegDefs.resize(TRI->getNumRegs() + 1, NULL);
331  LiveRegGens.resize(TRI->getNumRegs() + 1, NULL);
332  CallSeqEndForStart.clear();
333  assert(Interferences.empty() && LRegsMap.empty() && "stale Interferences");
334 
335  // Build the scheduling graph.
336  BuildSchedGraph(NULL);
337 
338  DEBUG(for (unsigned su = 0, e = SUnits.size(); su != e; ++su)
339  SUnits[su].dumpAll(this));
340  Topo.InitDAGTopologicalSorting();
341 
342  AvailableQueue->initNodes(SUnits);
343 
344  HazardRec->Reset();
345 
346  // Execute the actual scheduling loop.
347  ListScheduleBottomUp();
348 
349  AvailableQueue->releaseState();
350 
351  DEBUG({
352  dbgs() << "*** Final schedule ***\n";
353  dumpSchedule();
354  dbgs() << '\n';
355  });
356 }
357 
358 //===----------------------------------------------------------------------===//
359 // Bottom-Up Scheduling
360 //===----------------------------------------------------------------------===//
361 
362 /// ReleasePred - Decrement the NumSuccsLeft count of a predecessor. Add it to
363 /// the AvailableQueue if the count reaches zero. Also update its cycle bound.
364 void ScheduleDAGRRList::ReleasePred(SUnit *SU, const SDep *PredEdge) {
365  SUnit *PredSU = PredEdge->getSUnit();
366 
367 #ifndef NDEBUG
368  if (PredSU->NumSuccsLeft == 0) {
369  dbgs() << "*** Scheduling failed! ***\n";
370  PredSU->dump(this);
371  dbgs() << " has been released too many times!\n";
372  llvm_unreachable(0);
373  }
374 #endif
375  --PredSU->NumSuccsLeft;
376 
377  if (!forceUnitLatencies()) {
378  // Updating predecessor's height. This is now the cycle when the
379  // predecessor can be scheduled without causing a pipeline stall.
380  PredSU->setHeightToAtLeast(SU->getHeight() + PredEdge->getLatency());
381  }
382 
383  // If all the node's successors are scheduled, this node is ready
384  // to be scheduled. Ignore the special EntrySU node.
385  if (PredSU->NumSuccsLeft == 0 && PredSU != &EntrySU) {
386  PredSU->isAvailable = true;
387 
388  unsigned Height = PredSU->getHeight();
389  if (Height < MinAvailableCycle)
390  MinAvailableCycle = Height;
391 
392  if (isReady(PredSU)) {
393  AvailableQueue->push(PredSU);
394  }
395  // CapturePred and others may have left the node in the pending queue, avoid
396  // adding it twice.
397  else if (!PredSU->isPending) {
398  PredSU->isPending = true;
399  PendingQueue.push_back(PredSU);
400  }
401  }
402 }
403 
404 /// IsChainDependent - Test if Outer is reachable from Inner through
405 /// chain dependencies.
406 static bool IsChainDependent(SDNode *Outer, SDNode *Inner,
407  unsigned NestLevel,
408  const TargetInstrInfo *TII) {
409  SDNode *N = Outer;
410  for (;;) {
411  if (N == Inner)
412  return true;
413  // For a TokenFactor, examine each operand. There may be multiple ways
414  // to get to the CALLSEQ_BEGIN, but we need to find the path with the
415  // most nesting in order to ensure that we find the corresponding match.
416  if (N->getOpcode() == ISD::TokenFactor) {
417  for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
418  if (IsChainDependent(N->getOperand(i).getNode(), Inner, NestLevel, TII))
419  return true;
420  return false;
421  }
422  // Check for a lowered CALLSEQ_BEGIN or CALLSEQ_END.
423  if (N->isMachineOpcode()) {
424  if (N->getMachineOpcode() ==
426  ++NestLevel;
427  } else if (N->getMachineOpcode() ==
429  if (NestLevel == 0)
430  return false;
431  --NestLevel;
432  }
433  }
434  // Otherwise, find the chain and continue climbing.
435  for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
436  if (N->getOperand(i).getValueType() == MVT::Other) {
437  N = N->getOperand(i).getNode();
438  goto found_chain_operand;
439  }
440  return false;
441  found_chain_operand:;
442  if (N->getOpcode() == ISD::EntryToken)
443  return false;
444  }
445 }
446 
447 /// FindCallSeqStart - Starting from the (lowered) CALLSEQ_END node, locate
448 /// the corresponding (lowered) CALLSEQ_BEGIN node.
449 ///
450 /// NestLevel and MaxNested are used in recursion to indcate the current level
451 /// of nesting of CALLSEQ_BEGIN and CALLSEQ_END pairs, as well as the maximum
452 /// level seen so far.
453 ///
454 /// TODO: It would be better to give CALLSEQ_END an explicit operand to point
455 /// to the corresponding CALLSEQ_BEGIN to avoid needing to search for it.
456 static SDNode *
457 FindCallSeqStart(SDNode *N, unsigned &NestLevel, unsigned &MaxNest,
458  const TargetInstrInfo *TII) {
459  for (;;) {
460  // For a TokenFactor, examine each operand. There may be multiple ways
461  // to get to the CALLSEQ_BEGIN, but we need to find the path with the
462  // most nesting in order to ensure that we find the corresponding match.
463  if (N->getOpcode() == ISD::TokenFactor) {
464  SDNode *Best = 0;
465  unsigned BestMaxNest = MaxNest;
466  for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
467  unsigned MyNestLevel = NestLevel;
468  unsigned MyMaxNest = MaxNest;
469  if (SDNode *New = FindCallSeqStart(N->getOperand(i).getNode(),
470  MyNestLevel, MyMaxNest, TII))
471  if (!Best || (MyMaxNest > BestMaxNest)) {
472  Best = New;
473  BestMaxNest = MyMaxNest;
474  }
475  }
476  assert(Best);
477  MaxNest = BestMaxNest;
478  return Best;
479  }
480  // Check for a lowered CALLSEQ_BEGIN or CALLSEQ_END.
481  if (N->isMachineOpcode()) {
482  if (N->getMachineOpcode() ==
484  ++NestLevel;
485  MaxNest = std::max(MaxNest, NestLevel);
486  } else if (N->getMachineOpcode() ==
488  assert(NestLevel != 0);
489  --NestLevel;
490  if (NestLevel == 0)
491  return N;
492  }
493  }
494  // Otherwise, find the chain and continue climbing.
495  for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
496  if (N->getOperand(i).getValueType() == MVT::Other) {
497  N = N->getOperand(i).getNode();
498  goto found_chain_operand;
499  }
500  return 0;
501  found_chain_operand:;
502  if (N->getOpcode() == ISD::EntryToken)
503  return 0;
504  }
505 }
506 
507 /// Call ReleasePred for each predecessor, then update register live def/gen.
508 /// Always update LiveRegDefs for a register dependence even if the current SU
509 /// also defines the register. This effectively create one large live range
510 /// across a sequence of two-address node. This is important because the
511 /// entire chain must be scheduled together. Example:
512 ///
513 /// flags = (3) add
514 /// flags = (2) addc flags
515 /// flags = (1) addc flags
516 ///
517 /// results in
518 ///
519 /// LiveRegDefs[flags] = 3
520 /// LiveRegGens[flags] = 1
521 ///
522 /// If (2) addc is unscheduled, then (1) addc must also be unscheduled to avoid
523 /// interference on flags.
524 void ScheduleDAGRRList::ReleasePredecessors(SUnit *SU) {
525  // Bottom up: release predecessors
526  for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
527  I != E; ++I) {
528  ReleasePred(SU, &*I);
529  if (I->isAssignedRegDep()) {
530  // This is a physical register dependency and it's impossible or
531  // expensive to copy the register. Make sure nothing that can
532  // clobber the register is scheduled between the predecessor and
533  // this node.
534  SUnit *RegDef = LiveRegDefs[I->getReg()]; (void)RegDef;
535  assert((!RegDef || RegDef == SU || RegDef == I->getSUnit()) &&
536  "interference on register dependence");
537  LiveRegDefs[I->getReg()] = I->getSUnit();
538  if (!LiveRegGens[I->getReg()]) {
539  ++NumLiveRegs;
540  LiveRegGens[I->getReg()] = SU;
541  }
542  }
543  }
544 
545  // If we're scheduling a lowered CALLSEQ_END, find the corresponding
546  // CALLSEQ_BEGIN. Inject an artificial physical register dependence between
547  // these nodes, to prevent other calls from being interscheduled with them.
548  unsigned CallResource = TRI->getNumRegs();
549  if (!LiveRegDefs[CallResource])
550  for (SDNode *Node = SU->getNode(); Node; Node = Node->getGluedNode())
551  if (Node->isMachineOpcode() &&
552  Node->getMachineOpcode() == (unsigned)TII->getCallFrameDestroyOpcode()) {
553  unsigned NestLevel = 0;
554  unsigned MaxNest = 0;
555  SDNode *N = FindCallSeqStart(Node, NestLevel, MaxNest, TII);
556 
557  SUnit *Def = &SUnits[N->getNodeId()];
558  CallSeqEndForStart[Def] = SU;
559 
560  ++NumLiveRegs;
561  LiveRegDefs[CallResource] = Def;
562  LiveRegGens[CallResource] = SU;
563  break;
564  }
565 }
566 
567 /// Check to see if any of the pending instructions are ready to issue. If
568 /// so, add them to the available queue.
569 void ScheduleDAGRRList::ReleasePending() {
570  if (DisableSchedCycles) {
571  assert(PendingQueue.empty() && "pending instrs not allowed in this mode");
572  return;
573  }
574 
575  // If the available queue is empty, it is safe to reset MinAvailableCycle.
576  if (AvailableQueue->empty())
577  MinAvailableCycle = UINT_MAX;
578 
579  // Check to see if any of the pending instructions are ready to issue. If
580  // so, add them to the available queue.
581  for (unsigned i = 0, e = PendingQueue.size(); i != e; ++i) {
582  unsigned ReadyCycle = PendingQueue[i]->getHeight();
583  if (ReadyCycle < MinAvailableCycle)
584  MinAvailableCycle = ReadyCycle;
585 
586  if (PendingQueue[i]->isAvailable) {
587  if (!isReady(PendingQueue[i]))
588  continue;
589  AvailableQueue->push(PendingQueue[i]);
590  }
591  PendingQueue[i]->isPending = false;
592  PendingQueue[i] = PendingQueue.back();
593  PendingQueue.pop_back();
594  --i; --e;
595  }
596 }
597 
598 /// Move the scheduler state forward by the specified number of Cycles.
599 void ScheduleDAGRRList::AdvanceToCycle(unsigned NextCycle) {
600  if (NextCycle <= CurCycle)
601  return;
602 
603  IssueCount = 0;
604  AvailableQueue->setCurCycle(NextCycle);
605  if (!HazardRec->isEnabled()) {
606  // Bypass lots of virtual calls in case of long latency.
607  CurCycle = NextCycle;
608  }
609  else {
610  for (; CurCycle != NextCycle; ++CurCycle) {
611  HazardRec->RecedeCycle();
612  }
613  }
614  // FIXME: Instead of visiting the pending Q each time, set a dirty flag on the
615  // available Q to release pending nodes at least once before popping.
616  ReleasePending();
617 }
618 
619 /// Move the scheduler state forward until the specified node's dependents are
620 /// ready and can be scheduled with no resource conflicts.
621 void ScheduleDAGRRList::AdvancePastStalls(SUnit *SU) {
622  if (DisableSchedCycles)
623  return;
624 
625  // FIXME: Nodes such as CopyFromReg probably should not advance the current
626  // cycle. Otherwise, we can wrongly mask real stalls. If the non-machine node
627  // has predecessors the cycle will be advanced when they are scheduled.
628  // But given the crude nature of modeling latency though such nodes, we
629  // currently need to treat these nodes like real instructions.
630  // if (!SU->getNode() || !SU->getNode()->isMachineOpcode()) return;
631 
632  unsigned ReadyCycle = SU->getHeight();
633 
634  // Bump CurCycle to account for latency. We assume the latency of other
635  // available instructions may be hidden by the stall (not a full pipe stall).
636  // This updates the hazard recognizer's cycle before reserving resources for
637  // this instruction.
638  AdvanceToCycle(ReadyCycle);
639 
640  // Calls are scheduled in their preceding cycle, so don't conflict with
641  // hazards from instructions after the call. EmitNode will reset the
642  // scoreboard state before emitting the call.
643  if (SU->isCall)
644  return;
645 
646  // FIXME: For resource conflicts in very long non-pipelined stages, we
647  // should probably skip ahead here to avoid useless scoreboard checks.
648  int Stalls = 0;
649  while (true) {
651  HazardRec->getHazardType(SU, -Stalls);
652 
654  break;
655 
656  ++Stalls;
657  }
658  AdvanceToCycle(CurCycle + Stalls);
659 }
660 
661 /// Record this SUnit in the HazardRecognizer.
662 /// Does not update CurCycle.
663 void ScheduleDAGRRList::EmitNode(SUnit *SU) {
664  if (!HazardRec->isEnabled())
665  return;
666 
667  // Check for phys reg copy.
668  if (!SU->getNode())
669  return;
670 
671  switch (SU->getNode()->getOpcode()) {
672  default:
673  assert(SU->getNode()->isMachineOpcode() &&
674  "This target-independent node should not be scheduled.");
675  break;
676  case ISD::MERGE_VALUES:
677  case ISD::TokenFactor:
678  case ISD::LIFETIME_START:
679  case ISD::LIFETIME_END:
680  case ISD::CopyToReg:
681  case ISD::CopyFromReg:
682  case ISD::EH_LABEL:
683  // Noops don't affect the scoreboard state. Copies are likely to be
684  // removed.
685  return;
686  case ISD::INLINEASM:
687  // For inline asm, clear the pipeline state.
688  HazardRec->Reset();
689  return;
690  }
691  if (SU->isCall) {
692  // Calls are scheduled with their preceding instructions. For bottom-up
693  // scheduling, clear the pipeline state before emitting.
694  HazardRec->Reset();
695  }
696 
697  HazardRec->EmitInstruction(SU);
698 }
699 
700 static void resetVRegCycle(SUnit *SU);
701 
702 /// ScheduleNodeBottomUp - Add the node to the schedule. Decrement the pending
703 /// count of its predecessors. If a predecessor pending count is zero, add it to
704 /// the Available queue.
705 void ScheduleDAGRRList::ScheduleNodeBottomUp(SUnit *SU) {
706  DEBUG(dbgs() << "\n*** Scheduling [" << CurCycle << "]: ");
707  DEBUG(SU->dump(this));
708 
709 #ifndef NDEBUG
710  if (CurCycle < SU->getHeight())
711  DEBUG(dbgs() << " Height [" << SU->getHeight()
712  << "] pipeline stall!\n");
713 #endif
714 
715  // FIXME: Do not modify node height. It may interfere with
716  // backtracking. Instead add a "ready cycle" to SUnit. Before scheduling the
717  // node its ready cycle can aid heuristics, and after scheduling it can
718  // indicate the scheduled cycle.
719  SU->setHeightToAtLeast(CurCycle);
720 
721  // Reserve resources for the scheduled instruction.
722  EmitNode(SU);
723 
724  Sequence.push_back(SU);
725 
726  AvailableQueue->scheduledNode(SU);
727 
728  // If HazardRec is disabled, and each inst counts as one cycle, then
729  // advance CurCycle before ReleasePredecessors to avoid useless pushes to
730  // PendingQueue for schedulers that implement HasReadyFilter.
731  if (!HazardRec->isEnabled() && AvgIPC < 2)
732  AdvanceToCycle(CurCycle + 1);
733 
734  // Update liveness of predecessors before successors to avoid treating a
735  // two-address node as a live range def.
736  ReleasePredecessors(SU);
737 
738  // Release all the implicit physical register defs that are live.
739  for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
740  I != E; ++I) {
741  // LiveRegDegs[I->getReg()] != SU when SU is a two-address node.
742  if (I->isAssignedRegDep() && LiveRegDefs[I->getReg()] == SU) {
743  assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!");
744  --NumLiveRegs;
745  LiveRegDefs[I->getReg()] = NULL;
746  LiveRegGens[I->getReg()] = NULL;
747  releaseInterferences(I->getReg());
748  }
749  }
750  // Release the special call resource dependence, if this is the beginning
751  // of a call.
752  unsigned CallResource = TRI->getNumRegs();
753  if (LiveRegDefs[CallResource] == SU)
754  for (const SDNode *SUNode = SU->getNode(); SUNode;
755  SUNode = SUNode->getGluedNode()) {
756  if (SUNode->isMachineOpcode() &&
757  SUNode->getMachineOpcode() == (unsigned)TII->getCallFrameSetupOpcode()) {
758  assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!");
759  --NumLiveRegs;
760  LiveRegDefs[CallResource] = NULL;
761  LiveRegGens[CallResource] = NULL;
762  releaseInterferences(CallResource);
763  }
764  }
765 
766  resetVRegCycle(SU);
767 
768  SU->isScheduled = true;
769 
770  // Conditions under which the scheduler should eagerly advance the cycle:
771  // (1) No available instructions
772  // (2) All pipelines full, so available instructions must have hazards.
773  //
774  // If HazardRec is disabled, the cycle was pre-advanced before calling
775  // ReleasePredecessors. In that case, IssueCount should remain 0.
776  //
777  // Check AvailableQueue after ReleasePredecessors in case of zero latency.
778  if (HazardRec->isEnabled() || AvgIPC > 1) {
779  if (SU->getNode() && SU->getNode()->isMachineOpcode())
780  ++IssueCount;
781  if ((HazardRec->isEnabled() && HazardRec->atIssueLimit())
782  || (!HazardRec->isEnabled() && IssueCount == AvgIPC))
783  AdvanceToCycle(CurCycle + 1);
784  }
785 }
786 
787 /// CapturePred - This does the opposite of ReleasePred. Since SU is being
788 /// unscheduled, incrcease the succ left count of its predecessors. Remove
789 /// them from AvailableQueue if necessary.
790 void ScheduleDAGRRList::CapturePred(SDep *PredEdge) {
791  SUnit *PredSU = PredEdge->getSUnit();
792  if (PredSU->isAvailable) {
793  PredSU->isAvailable = false;
794  if (!PredSU->isPending)
795  AvailableQueue->remove(PredSU);
796  }
797 
798  assert(PredSU->NumSuccsLeft < UINT_MAX && "NumSuccsLeft will overflow!");
799  ++PredSU->NumSuccsLeft;
800 }
801 
802 /// UnscheduleNodeBottomUp - Remove the node from the schedule, update its and
803 /// its predecessor states to reflect the change.
804 void ScheduleDAGRRList::UnscheduleNodeBottomUp(SUnit *SU) {
805  DEBUG(dbgs() << "*** Unscheduling [" << SU->getHeight() << "]: ");
806  DEBUG(SU->dump(this));
807 
808  for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
809  I != E; ++I) {
810  CapturePred(&*I);
811  if (I->isAssignedRegDep() && SU == LiveRegGens[I->getReg()]){
812  assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!");
813  assert(LiveRegDefs[I->getReg()] == I->getSUnit() &&
814  "Physical register dependency violated?");
815  --NumLiveRegs;
816  LiveRegDefs[I->getReg()] = NULL;
817  LiveRegGens[I->getReg()] = NULL;
818  releaseInterferences(I->getReg());
819  }
820  }
821 
822  // Reclaim the special call resource dependence, if this is the beginning
823  // of a call.
824  unsigned CallResource = TRI->getNumRegs();
825  for (const SDNode *SUNode = SU->getNode(); SUNode;
826  SUNode = SUNode->getGluedNode()) {
827  if (SUNode->isMachineOpcode() &&
828  SUNode->getMachineOpcode() == (unsigned)TII->getCallFrameSetupOpcode()) {
829  ++NumLiveRegs;
830  LiveRegDefs[CallResource] = SU;
831  LiveRegGens[CallResource] = CallSeqEndForStart[SU];
832  }
833  }
834 
835  // Release the special call resource dependence, if this is the end
836  // of a call.
837  if (LiveRegGens[CallResource] == SU)
838  for (const SDNode *SUNode = SU->getNode(); SUNode;
839  SUNode = SUNode->getGluedNode()) {
840  if (SUNode->isMachineOpcode() &&
841  SUNode->getMachineOpcode() == (unsigned)TII->getCallFrameDestroyOpcode()) {
842  assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!");
843  --NumLiveRegs;
844  LiveRegDefs[CallResource] = NULL;
845  LiveRegGens[CallResource] = NULL;
846  releaseInterferences(CallResource);
847  }
848  }
849 
850  for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
851  I != E; ++I) {
852  if (I->isAssignedRegDep()) {
853  if (!LiveRegDefs[I->getReg()])
854  ++NumLiveRegs;
855  // This becomes the nearest def. Note that an earlier def may still be
856  // pending if this is a two-address node.
857  LiveRegDefs[I->getReg()] = SU;
858  if (LiveRegGens[I->getReg()] == NULL ||
859  I->getSUnit()->getHeight() < LiveRegGens[I->getReg()]->getHeight())
860  LiveRegGens[I->getReg()] = I->getSUnit();
861  }
862  }
863  if (SU->getHeight() < MinAvailableCycle)
864  MinAvailableCycle = SU->getHeight();
865 
866  SU->setHeightDirty();
867  SU->isScheduled = false;
868  SU->isAvailable = true;
869  if (!DisableSchedCycles && AvailableQueue->hasReadyFilter()) {
870  // Don't make available until backtracking is complete.
871  SU->isPending = true;
872  PendingQueue.push_back(SU);
873  }
874  else {
875  AvailableQueue->push(SU);
876  }
877  AvailableQueue->unscheduledNode(SU);
878 }
879 
880 /// After backtracking, the hazard checker needs to be restored to a state
881 /// corresponding the current cycle.
882 void ScheduleDAGRRList::RestoreHazardCheckerBottomUp() {
883  HazardRec->Reset();
884 
885  unsigned LookAhead = std::min((unsigned)Sequence.size(),
886  HazardRec->getMaxLookAhead());
887  if (LookAhead == 0)
888  return;
889 
890  std::vector<SUnit*>::const_iterator I = (Sequence.end() - LookAhead);
891  unsigned HazardCycle = (*I)->getHeight();
892  for (std::vector<SUnit*>::const_iterator E = Sequence.end(); I != E; ++I) {
893  SUnit *SU = *I;
894  for (; SU->getHeight() > HazardCycle; ++HazardCycle) {
895  HazardRec->RecedeCycle();
896  }
897  EmitNode(SU);
898  }
899 }
900 
901 /// BacktrackBottomUp - Backtrack scheduling to a previous cycle specified in
902 /// BTCycle in order to schedule a specific node.
903 void ScheduleDAGRRList::BacktrackBottomUp(SUnit *SU, SUnit *BtSU) {
904  SUnit *OldSU = Sequence.back();
905  while (true) {
906  Sequence.pop_back();
907  // FIXME: use ready cycle instead of height
908  CurCycle = OldSU->getHeight();
909  UnscheduleNodeBottomUp(OldSU);
910  AvailableQueue->setCurCycle(CurCycle);
911  if (OldSU == BtSU)
912  break;
913  OldSU = Sequence.back();
914  }
915 
916  assert(!SU->isSucc(OldSU) && "Something is wrong!");
917 
918  RestoreHazardCheckerBottomUp();
919 
920  ReleasePending();
921 
922  ++NumBacktracks;
923 }
924 
925 static bool isOperandOf(const SUnit *SU, SDNode *N) {
926  for (const SDNode *SUNode = SU->getNode(); SUNode;
927  SUNode = SUNode->getGluedNode()) {
928  if (SUNode->isOperandOf(N))
929  return true;
930  }
931  return false;
932 }
933 
934 /// CopyAndMoveSuccessors - Clone the specified node and move its scheduled
935 /// successors to the newly created node.
936 SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) {
937  SDNode *N = SU->getNode();
938  if (!N)
939  return NULL;
940 
941  if (SU->getNode()->getGluedNode())
942  return NULL;
943 
944  SUnit *NewSU;
945  bool TryUnfold = false;
946  for (unsigned i = 0, e = N->getNumValues(); i != e; ++i) {
947  EVT VT = N->getValueType(i);
948  if (VT == MVT::Glue)
949  return NULL;
950  else if (VT == MVT::Other)
951  TryUnfold = true;
952  }
953  for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
954  const SDValue &Op = N->getOperand(i);
955  EVT VT = Op.getNode()->getValueType(Op.getResNo());
956  if (VT == MVT::Glue)
957  return NULL;
958  }
959 
960  if (TryUnfold) {
961  SmallVector<SDNode*, 2> NewNodes;
962  if (!TII->unfoldMemoryOperand(*DAG, N, NewNodes))
963  return NULL;
964 
965  // unfolding an x86 DEC64m operation results in store, dec, load which
966  // can't be handled here so quit
967  if (NewNodes.size() == 3)
968  return NULL;
969 
970  DEBUG(dbgs() << "Unfolding SU #" << SU->NodeNum << "\n");
971  assert(NewNodes.size() == 2 && "Expected a load folding node!");
972 
973  N = NewNodes[1];
974  SDNode *LoadNode = NewNodes[0];
975  unsigned NumVals = N->getNumValues();
976  unsigned OldNumVals = SU->getNode()->getNumValues();
977  for (unsigned i = 0; i != NumVals; ++i)
978  DAG->ReplaceAllUsesOfValueWith(SDValue(SU->getNode(), i), SDValue(N, i));
979  DAG->ReplaceAllUsesOfValueWith(SDValue(SU->getNode(), OldNumVals-1),
980  SDValue(LoadNode, 1));
981 
982  // LoadNode may already exist. This can happen when there is another
983  // load from the same location and producing the same type of value
984  // but it has different alignment or volatileness.
985  bool isNewLoad = true;
986  SUnit *LoadSU;
987  if (LoadNode->getNodeId() != -1) {
988  LoadSU = &SUnits[LoadNode->getNodeId()];
989  isNewLoad = false;
990  } else {
991  LoadSU = CreateNewSUnit(LoadNode);
992  LoadNode->setNodeId(LoadSU->NodeNum);
993 
994  InitNumRegDefsLeft(LoadSU);
995  computeLatency(LoadSU);
996  }
997 
998  SUnit *NewSU = CreateNewSUnit(N);
999  assert(N->getNodeId() == -1 && "Node already inserted!");
1000  N->setNodeId(NewSU->NodeNum);
1001 
1002  const MCInstrDesc &MCID = TII->get(N->getMachineOpcode());
1003  for (unsigned i = 0; i != MCID.getNumOperands(); ++i) {
1004  if (MCID.getOperandConstraint(i, MCOI::TIED_TO) != -1) {
1005  NewSU->isTwoAddress = true;
1006  break;
1007  }
1008  }
1009  if (MCID.isCommutable())
1010  NewSU->isCommutable = true;
1011 
1012  InitNumRegDefsLeft(NewSU);
1013  computeLatency(NewSU);
1014 
1015  // Record all the edges to and from the old SU, by category.
1016  SmallVector<SDep, 4> ChainPreds;
1017  SmallVector<SDep, 4> ChainSuccs;
1018  SmallVector<SDep, 4> LoadPreds;
1019  SmallVector<SDep, 4> NodePreds;
1020  SmallVector<SDep, 4> NodeSuccs;
1021  for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
1022  I != E; ++I) {
1023  if (I->isCtrl())
1024  ChainPreds.push_back(*I);
1025  else if (isOperandOf(I->getSUnit(), LoadNode))
1026  LoadPreds.push_back(*I);
1027  else
1028  NodePreds.push_back(*I);
1029  }
1030  for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
1031  I != E; ++I) {
1032  if (I->isCtrl())
1033  ChainSuccs.push_back(*I);
1034  else
1035  NodeSuccs.push_back(*I);
1036  }
1037 
1038  // Now assign edges to the newly-created nodes.
1039  for (unsigned i = 0, e = ChainPreds.size(); i != e; ++i) {
1040  const SDep &Pred = ChainPreds[i];
1041  RemovePred(SU, Pred);
1042  if (isNewLoad)
1043  AddPred(LoadSU, Pred);
1044  }
1045  for (unsigned i = 0, e = LoadPreds.size(); i != e; ++i) {
1046  const SDep &Pred = LoadPreds[i];
1047  RemovePred(SU, Pred);
1048  if (isNewLoad)
1049  AddPred(LoadSU, Pred);
1050  }
1051  for (unsigned i = 0, e = NodePreds.size(); i != e; ++i) {
1052  const SDep &Pred = NodePreds[i];
1053  RemovePred(SU, Pred);
1054  AddPred(NewSU, Pred);
1055  }
1056  for (unsigned i = 0, e = NodeSuccs.size(); i != e; ++i) {
1057  SDep D = NodeSuccs[i];
1058  SUnit *SuccDep = D.getSUnit();
1059  D.setSUnit(SU);
1060  RemovePred(SuccDep, D);
1061  D.setSUnit(NewSU);
1062  AddPred(SuccDep, D);
1063  // Balance register pressure.
1064  if (AvailableQueue->tracksRegPressure() && SuccDep->isScheduled
1065  && !D.isCtrl() && NewSU->NumRegDefsLeft > 0)
1066  --NewSU->NumRegDefsLeft;
1067  }
1068  for (unsigned i = 0, e = ChainSuccs.size(); i != e; ++i) {
1069  SDep D = ChainSuccs[i];
1070  SUnit *SuccDep = D.getSUnit();
1071  D.setSUnit(SU);
1072  RemovePred(SuccDep, D);
1073  if (isNewLoad) {
1074  D.setSUnit(LoadSU);
1075  AddPred(SuccDep, D);
1076  }
1077  }
1078 
1079  // Add a data dependency to reflect that NewSU reads the value defined
1080  // by LoadSU.
1081  SDep D(LoadSU, SDep::Data, 0);
1082  D.setLatency(LoadSU->Latency);
1083  AddPred(NewSU, D);
1084 
1085  if (isNewLoad)
1086  AvailableQueue->addNode(LoadSU);
1087  AvailableQueue->addNode(NewSU);
1088 
1089  ++NumUnfolds;
1090 
1091  if (NewSU->NumSuccsLeft == 0) {
1092  NewSU->isAvailable = true;
1093  return NewSU;
1094  }
1095  SU = NewSU;
1096  }
1097 
1098  DEBUG(dbgs() << " Duplicating SU #" << SU->NodeNum << "\n");
1099  NewSU = CreateClone(SU);
1100 
1101  // New SUnit has the exact same predecessors.
1102  for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
1103  I != E; ++I)
1104  if (!I->isArtificial())
1105  AddPred(NewSU, *I);
1106 
1107  // Only copy scheduled successors. Cut them from old node's successor
1108  // list and move them over.
1110  for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
1111  I != E; ++I) {
1112  if (I->isArtificial())
1113  continue;
1114  SUnit *SuccSU = I->getSUnit();
1115  if (SuccSU->isScheduled) {
1116  SDep D = *I;
1117  D.setSUnit(NewSU);
1118  AddPred(SuccSU, D);
1119  D.setSUnit(SU);
1120  DelDeps.push_back(std::make_pair(SuccSU, D));
1121  }
1122  }
1123  for (unsigned i = 0, e = DelDeps.size(); i != e; ++i)
1124  RemovePred(DelDeps[i].first, DelDeps[i].second);
1125 
1126  AvailableQueue->updateNode(SU);
1127  AvailableQueue->addNode(NewSU);
1128 
1129  ++NumDups;
1130  return NewSU;
1131 }
1132 
1133 /// InsertCopiesAndMoveSuccs - Insert register copies and move all
1134 /// scheduled successors of the given SUnit to the last copy.
1135 void ScheduleDAGRRList::InsertCopiesAndMoveSuccs(SUnit *SU, unsigned Reg,
1136  const TargetRegisterClass *DestRC,
1137  const TargetRegisterClass *SrcRC,
1138  SmallVectorImpl<SUnit*> &Copies) {
1139  SUnit *CopyFromSU = CreateNewSUnit(NULL);
1140  CopyFromSU->CopySrcRC = SrcRC;
1141  CopyFromSU->CopyDstRC = DestRC;
1142 
1143  SUnit *CopyToSU = CreateNewSUnit(NULL);
1144  CopyToSU->CopySrcRC = DestRC;
1145  CopyToSU->CopyDstRC = SrcRC;
1146 
1147  // Only copy scheduled successors. Cut them from old node's successor
1148  // list and move them over.
1150  for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
1151  I != E; ++I) {
1152  if (I->isArtificial())
1153  continue;
1154  SUnit *SuccSU = I->getSUnit();
1155  if (SuccSU->isScheduled) {
1156  SDep D = *I;
1157  D.setSUnit(CopyToSU);
1158  AddPred(SuccSU, D);
1159  DelDeps.push_back(std::make_pair(SuccSU, *I));
1160  }
1161  else {
1162  // Avoid scheduling the def-side copy before other successors. Otherwise
1163  // we could introduce another physreg interference on the copy and
1164  // continue inserting copies indefinitely.
1165  AddPred(SuccSU, SDep(CopyFromSU, SDep::Artificial));
1166  }
1167  }
1168  for (unsigned i = 0, e = DelDeps.size(); i != e; ++i)
1169  RemovePred(DelDeps[i].first, DelDeps[i].second);
1170 
1171  SDep FromDep(SU, SDep::Data, Reg);
1172  FromDep.setLatency(SU->Latency);
1173  AddPred(CopyFromSU, FromDep);
1174  SDep ToDep(CopyFromSU, SDep::Data, 0);
1175  ToDep.setLatency(CopyFromSU->Latency);
1176  AddPred(CopyToSU, ToDep);
1177 
1178  AvailableQueue->updateNode(SU);
1179  AvailableQueue->addNode(CopyFromSU);
1180  AvailableQueue->addNode(CopyToSU);
1181  Copies.push_back(CopyFromSU);
1182  Copies.push_back(CopyToSU);
1183 
1184  ++NumPRCopies;
1185 }
1186 
1187 /// getPhysicalRegisterVT - Returns the ValueType of the physical register
1188 /// definition of the specified node.
1189 /// FIXME: Move to SelectionDAG?
1190 static EVT getPhysicalRegisterVT(SDNode *N, unsigned Reg,
1191  const TargetInstrInfo *TII) {
1192  const MCInstrDesc &MCID = TII->get(N->getMachineOpcode());
1193  assert(MCID.ImplicitDefs && "Physical reg def must be in implicit def list!");
1194  unsigned NumRes = MCID.getNumDefs();
1195  for (const uint16_t *ImpDef = MCID.getImplicitDefs(); *ImpDef; ++ImpDef) {
1196  if (Reg == *ImpDef)
1197  break;
1198  ++NumRes;
1199  }
1200  return N->getValueType(NumRes);
1201 }
1202 
1203 /// CheckForLiveRegDef - Return true and update live register vector if the
1204 /// specified register def of the specified SUnit clobbers any "live" registers.
1205 static void CheckForLiveRegDef(SUnit *SU, unsigned Reg,
1206  std::vector<SUnit*> &LiveRegDefs,
1207  SmallSet<unsigned, 4> &RegAdded,
1209  const TargetRegisterInfo *TRI) {
1210  for (MCRegAliasIterator AliasI(Reg, TRI, true); AliasI.isValid(); ++AliasI) {
1211 
1212  // Check if Ref is live.
1213  if (!LiveRegDefs[*AliasI]) continue;
1214 
1215  // Allow multiple uses of the same def.
1216  if (LiveRegDefs[*AliasI] == SU) continue;
1217 
1218  // Add Reg to the set of interfering live regs.
1219  if (RegAdded.insert(*AliasI)) {
1220  LRegs.push_back(*AliasI);
1221  }
1222  }
1223 }
1224 
1225 /// CheckForLiveRegDefMasked - Check for any live physregs that are clobbered
1226 /// by RegMask, and add them to LRegs.
1227 static void CheckForLiveRegDefMasked(SUnit *SU, const uint32_t *RegMask,
1228  std::vector<SUnit*> &LiveRegDefs,
1229  SmallSet<unsigned, 4> &RegAdded,
1230  SmallVectorImpl<unsigned> &LRegs) {
1231  // Look at all live registers. Skip Reg0 and the special CallResource.
1232  for (unsigned i = 1, e = LiveRegDefs.size()-1; i != e; ++i) {
1233  if (!LiveRegDefs[i]) continue;
1234  if (LiveRegDefs[i] == SU) continue;
1235  if (!MachineOperand::clobbersPhysReg(RegMask, i)) continue;
1236  if (RegAdded.insert(i))
1237  LRegs.push_back(i);
1238  }
1239 }
1240 
1241 /// getNodeRegMask - Returns the register mask attached to an SDNode, if any.
1242 static const uint32_t *getNodeRegMask(const SDNode *N) {
1243  for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
1244  if (const RegisterMaskSDNode *Op =
1245  dyn_cast<RegisterMaskSDNode>(N->getOperand(i).getNode()))
1246  return Op->getRegMask();
1247  return NULL;
1248 }
1249 
1250 /// DelayForLiveRegsBottomUp - Returns true if it is necessary to delay
1251 /// scheduling of the given node to satisfy live physical register dependencies.
1252 /// If the specific node is the last one that's available to schedule, do
1253 /// whatever is necessary (i.e. backtracking or cloning) to make it possible.
1254 bool ScheduleDAGRRList::
1255 DelayForLiveRegsBottomUp(SUnit *SU, SmallVectorImpl<unsigned> &LRegs) {
1256  if (NumLiveRegs == 0)
1257  return false;
1258 
1259  SmallSet<unsigned, 4> RegAdded;
1260  // If this node would clobber any "live" register, then it's not ready.
1261  //
1262  // If SU is the currently live definition of the same register that it uses,
1263  // then we are free to schedule it.
1264  for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
1265  I != E; ++I) {
1266  if (I->isAssignedRegDep() && LiveRegDefs[I->getReg()] != SU)
1267  CheckForLiveRegDef(I->getSUnit(), I->getReg(), LiveRegDefs,
1268  RegAdded, LRegs, TRI);
1269  }
1270 
1271  for (SDNode *Node = SU->getNode(); Node; Node = Node->getGluedNode()) {
1272  if (Node->getOpcode() == ISD::INLINEASM) {
1273  // Inline asm can clobber physical defs.
1274  unsigned NumOps = Node->getNumOperands();
1275  if (Node->getOperand(NumOps-1).getValueType() == MVT::Glue)
1276  --NumOps; // Ignore the glue operand.
1277 
1278  for (unsigned i = InlineAsm::Op_FirstOperand; i != NumOps;) {
1279  unsigned Flags =
1280  cast<ConstantSDNode>(Node->getOperand(i))->getZExtValue();
1281  unsigned NumVals = InlineAsm::getNumOperandRegisters(Flags);
1282 
1283  ++i; // Skip the ID value.
1284  if (InlineAsm::isRegDefKind(Flags) ||
1286  InlineAsm::isClobberKind(Flags)) {
1287  // Check for def of register or earlyclobber register.
1288  for (; NumVals; --NumVals, ++i) {
1289  unsigned Reg = cast<RegisterSDNode>(Node->getOperand(i))->getReg();
1291  CheckForLiveRegDef(SU, Reg, LiveRegDefs, RegAdded, LRegs, TRI);
1292  }
1293  } else
1294  i += NumVals;
1295  }
1296  continue;
1297  }
1298 
1299  if (!Node->isMachineOpcode())
1300  continue;
1301  // If we're in the middle of scheduling a call, don't begin scheduling
1302  // another call. Also, don't allow any physical registers to be live across
1303  // the call.
1304  if (Node->getMachineOpcode() == (unsigned)TII->getCallFrameDestroyOpcode()) {
1305  // Check the special calling-sequence resource.
1306  unsigned CallResource = TRI->getNumRegs();
1307  if (LiveRegDefs[CallResource]) {
1308  SDNode *Gen = LiveRegGens[CallResource]->getNode();
1309  while (SDNode *Glued = Gen->getGluedNode())
1310  Gen = Glued;
1311  if (!IsChainDependent(Gen, Node, 0, TII) && RegAdded.insert(CallResource))
1312  LRegs.push_back(CallResource);
1313  }
1314  }
1315  if (const uint32_t *RegMask = getNodeRegMask(Node))
1316  CheckForLiveRegDefMasked(SU, RegMask, LiveRegDefs, RegAdded, LRegs);
1317 
1318  const MCInstrDesc &MCID = TII->get(Node->getMachineOpcode());
1319  if (!MCID.ImplicitDefs)
1320  continue;
1321  for (const uint16_t *Reg = MCID.getImplicitDefs(); *Reg; ++Reg)
1322  CheckForLiveRegDef(SU, *Reg, LiveRegDefs, RegAdded, LRegs, TRI);
1323  }
1324 
1325  return !LRegs.empty();
1326 }
1327 
1328 void ScheduleDAGRRList::releaseInterferences(unsigned Reg) {
1329  // Add the nodes that aren't ready back onto the available list.
1330  for (unsigned i = Interferences.size(); i > 0; --i) {
1331  SUnit *SU = Interferences[i-1];
1332  LRegsMapT::iterator LRegsPos = LRegsMap.find(SU);
1333  if (Reg) {
1334  SmallVectorImpl<unsigned> &LRegs = LRegsPos->second;
1335  if (std::find(LRegs.begin(), LRegs.end(), Reg) == LRegs.end())
1336  continue;
1337  }
1338  SU->isPending = false;
1339  // The interfering node may no longer be available due to backtracking.
1340  // Furthermore, it may have been made available again, in which case it is
1341  // now already in the AvailableQueue.
1342  if (SU->isAvailable && !SU->NodeQueueId) {
1343  DEBUG(dbgs() << " Repushing SU #" << SU->NodeNum << '\n');
1344  AvailableQueue->push(SU);
1345  }
1346  if (i < Interferences.size())
1347  Interferences[i-1] = Interferences.back();
1348  Interferences.pop_back();
1349  LRegsMap.erase(LRegsPos);
1350  }
1351 }
1352 
1353 /// Return a node that can be scheduled in this cycle. Requirements:
1354 /// (1) Ready: latency has been satisfied
1355 /// (2) No Hazards: resources are available
1356 /// (3) No Interferences: may unschedule to break register interferences.
1357 SUnit *ScheduleDAGRRList::PickNodeToScheduleBottomUp() {
1358  SUnit *CurSU = AvailableQueue->empty() ? 0 : AvailableQueue->pop();
1359  while (CurSU) {
1361  if (!DelayForLiveRegsBottomUp(CurSU, LRegs))
1362  break;
1363  DEBUG(dbgs() << " Interfering reg " <<
1364  (LRegs[0] == TRI->getNumRegs() ? "CallResource"
1365  : TRI->getName(LRegs[0]))
1366  << " SU #" << CurSU->NodeNum << '\n');
1367  std::pair<LRegsMapT::iterator, bool> LRegsPair =
1368  LRegsMap.insert(std::make_pair(CurSU, LRegs));
1369  if (LRegsPair.second) {
1370  CurSU->isPending = true; // This SU is not in AvailableQueue right now.
1371  Interferences.push_back(CurSU);
1372  }
1373  else {
1374  assert(CurSU->isPending && "Intereferences are pending");
1375  // Update the interference with current live regs.
1376  LRegsPair.first->second = LRegs;
1377  }
1378  CurSU = AvailableQueue->pop();
1379  }
1380  if (CurSU)
1381  return CurSU;
1382 
1383  // All candidates are delayed due to live physical reg dependencies.
1384  // Try backtracking, code duplication, or inserting cross class copies
1385  // to resolve it.
1386  for (unsigned i = 0, e = Interferences.size(); i != e; ++i) {
1387  SUnit *TrySU = Interferences[i];
1388  SmallVectorImpl<unsigned> &LRegs = LRegsMap[TrySU];
1389 
1390  // Try unscheduling up to the point where it's safe to schedule
1391  // this node.
1392  SUnit *BtSU = NULL;
1393  unsigned LiveCycle = UINT_MAX;
1394  for (unsigned j = 0, ee = LRegs.size(); j != ee; ++j) {
1395  unsigned Reg = LRegs[j];
1396  if (LiveRegGens[Reg]->getHeight() < LiveCycle) {
1397  BtSU = LiveRegGens[Reg];
1398  LiveCycle = BtSU->getHeight();
1399  }
1400  }
1401  if (!WillCreateCycle(TrySU, BtSU)) {
1402  // BacktrackBottomUp mutates Interferences!
1403  BacktrackBottomUp(TrySU, BtSU);
1404 
1405  // Force the current node to be scheduled before the node that
1406  // requires the physical reg dep.
1407  if (BtSU->isAvailable) {
1408  BtSU->isAvailable = false;
1409  if (!BtSU->isPending)
1410  AvailableQueue->remove(BtSU);
1411  }
1412  DEBUG(dbgs() << "ARTIFICIAL edge from SU(" << BtSU->NodeNum << ") to SU("
1413  << TrySU->NodeNum << ")\n");
1414  AddPred(TrySU, SDep(BtSU, SDep::Artificial));
1415 
1416  // If one or more successors has been unscheduled, then the current
1417  // node is no longer available.
1418  if (!TrySU->isAvailable)
1419  CurSU = AvailableQueue->pop();
1420  else {
1421  AvailableQueue->remove(TrySU);
1422  CurSU = TrySU;
1423  }
1424  // Interferences has been mutated. We must break.
1425  break;
1426  }
1427  }
1428 
1429  if (!CurSU) {
1430  // Can't backtrack. If it's too expensive to copy the value, then try
1431  // duplicate the nodes that produces these "too expensive to copy"
1432  // values to break the dependency. In case even that doesn't work,
1433  // insert cross class copies.
1434  // If it's not too expensive, i.e. cost != -1, issue copies.
1435  SUnit *TrySU = Interferences[0];
1436  SmallVectorImpl<unsigned> &LRegs = LRegsMap[TrySU];
1437  assert(LRegs.size() == 1 && "Can't handle this yet!");
1438  unsigned Reg = LRegs[0];
1439  SUnit *LRDef = LiveRegDefs[Reg];
1440  EVT VT = getPhysicalRegisterVT(LRDef->getNode(), Reg, TII);
1441  const TargetRegisterClass *RC =
1442  TRI->getMinimalPhysRegClass(Reg, VT);
1443  const TargetRegisterClass *DestRC = TRI->getCrossCopyRegClass(RC);
1444 
1445  // If cross copy register class is the same as RC, then it must be possible
1446  // copy the value directly. Do not try duplicate the def.
1447  // If cross copy register class is not the same as RC, then it's possible to
1448  // copy the value but it require cross register class copies and it is
1449  // expensive.
1450  // If cross copy register class is null, then it's not possible to copy
1451  // the value at all.
1452  SUnit *NewDef = 0;
1453  if (DestRC != RC) {
1454  NewDef = CopyAndMoveSuccessors(LRDef);
1455  if (!DestRC && !NewDef)
1456  report_fatal_error("Can't handle live physical register dependency!");
1457  }
1458  if (!NewDef) {
1459  // Issue copies, these can be expensive cross register class copies.
1460  SmallVector<SUnit*, 2> Copies;
1461  InsertCopiesAndMoveSuccs(LRDef, Reg, DestRC, RC, Copies);
1462  DEBUG(dbgs() << " Adding an edge from SU #" << TrySU->NodeNum
1463  << " to SU #" << Copies.front()->NodeNum << "\n");
1464  AddPred(TrySU, SDep(Copies.front(), SDep::Artificial));
1465  NewDef = Copies.back();
1466  }
1467 
1468  DEBUG(dbgs() << " Adding an edge from SU #" << NewDef->NodeNum
1469  << " to SU #" << TrySU->NodeNum << "\n");
1470  LiveRegDefs[Reg] = NewDef;
1471  AddPred(NewDef, SDep(TrySU, SDep::Artificial));
1472  TrySU->isAvailable = false;
1473  CurSU = NewDef;
1474  }
1475  assert(CurSU && "Unable to resolve live physical register dependencies!");
1476  return CurSU;
1477 }
1478 
1479 /// ListScheduleBottomUp - The main loop of list scheduling for bottom-up
1480 /// schedulers.
1481 void ScheduleDAGRRList::ListScheduleBottomUp() {
1482  // Release any predecessors of the special Exit node.
1483  ReleasePredecessors(&ExitSU);
1484 
1485  // Add root to Available queue.
1486  if (!SUnits.empty()) {
1487  SUnit *RootSU = &SUnits[DAG->getRoot().getNode()->getNodeId()];
1488  assert(RootSU->Succs.empty() && "Graph root shouldn't have successors!");
1489  RootSU->isAvailable = true;
1490  AvailableQueue->push(RootSU);
1491  }
1492 
1493  // While Available queue is not empty, grab the node with the highest
1494  // priority. If it is not ready put it back. Schedule the node.
1495  Sequence.reserve(SUnits.size());
1496  while (!AvailableQueue->empty() || !Interferences.empty()) {
1497  DEBUG(dbgs() << "\nExamining Available:\n";
1498  AvailableQueue->dump(this));
1499 
1500  // Pick the best node to schedule taking all constraints into
1501  // consideration.
1502  SUnit *SU = PickNodeToScheduleBottomUp();
1503 
1504  AdvancePastStalls(SU);
1505 
1506  ScheduleNodeBottomUp(SU);
1507 
1508  while (AvailableQueue->empty() && !PendingQueue.empty()) {
1509  // Advance the cycle to free resources. Skip ahead to the next ready SU.
1510  assert(MinAvailableCycle < UINT_MAX && "MinAvailableCycle uninitialized");
1511  AdvanceToCycle(std::max(CurCycle + 1, MinAvailableCycle));
1512  }
1513  }
1514 
1515  // Reverse the order if it is bottom up.
1516  std::reverse(Sequence.begin(), Sequence.end());
1517 
1518 #ifndef NDEBUG
1519  VerifyScheduledSequence(/*isBottomUp=*/true);
1520 #endif
1521 }
1522 
1523 //===----------------------------------------------------------------------===//
1524 // RegReductionPriorityQueue Definition
1525 //===----------------------------------------------------------------------===//
1526 //
1527 // This is a SchedulingPriorityQueue that schedules using Sethi Ullman numbers
1528 // to reduce register pressure.
1529 //
1530 namespace {
1531 class RegReductionPQBase;
1532 
1533 struct queue_sort : public std::binary_function<SUnit*, SUnit*, bool> {
1534  bool isReady(SUnit* SU, unsigned CurCycle) const { return true; }
1535 };
1536 
1537 #ifndef NDEBUG
1538 template<class SF>
1539 struct reverse_sort : public queue_sort {
1540  SF &SortFunc;
1541  reverse_sort(SF &sf) : SortFunc(sf) {}
1542  reverse_sort(const reverse_sort &RHS) : SortFunc(RHS.SortFunc) {}
1543 
1544  bool operator()(SUnit* left, SUnit* right) const {
1545  // reverse left/right rather than simply !SortFunc(left, right)
1546  // to expose different paths in the comparison logic.
1547  return SortFunc(right, left);
1548  }
1549 };
1550 #endif // NDEBUG
1551 
1552 /// bu_ls_rr_sort - Priority function for bottom up register pressure
1553 // reduction scheduler.
1554 struct bu_ls_rr_sort : public queue_sort {
1555  enum {
1556  IsBottomUp = true,
1557  HasReadyFilter = false
1558  };
1559 
1560  RegReductionPQBase *SPQ;
1561  bu_ls_rr_sort(RegReductionPQBase *spq) : SPQ(spq) {}
1562  bu_ls_rr_sort(const bu_ls_rr_sort &RHS) : SPQ(RHS.SPQ) {}
1563 
1564  bool operator()(SUnit* left, SUnit* right) const;
1565 };
1566 
1567 // src_ls_rr_sort - Priority function for source order scheduler.
1568 struct src_ls_rr_sort : public queue_sort {
1569  enum {
1570  IsBottomUp = true,
1571  HasReadyFilter = false
1572  };
1573 
1574  RegReductionPQBase *SPQ;
1575  src_ls_rr_sort(RegReductionPQBase *spq)
1576  : SPQ(spq) {}
1577  src_ls_rr_sort(const src_ls_rr_sort &RHS)
1578  : SPQ(RHS.SPQ) {}
1579 
1580  bool operator()(SUnit* left, SUnit* right) const;
1581 };
1582 
1583 // hybrid_ls_rr_sort - Priority function for hybrid scheduler.
1584 struct hybrid_ls_rr_sort : public queue_sort {
1585  enum {
1586  IsBottomUp = true,
1587  HasReadyFilter = false
1588  };
1589 
1590  RegReductionPQBase *SPQ;
1591  hybrid_ls_rr_sort(RegReductionPQBase *spq)
1592  : SPQ(spq) {}
1593  hybrid_ls_rr_sort(const hybrid_ls_rr_sort &RHS)
1594  : SPQ(RHS.SPQ) {}
1595 
1596  bool isReady(SUnit *SU, unsigned CurCycle) const;
1597 
1598  bool operator()(SUnit* left, SUnit* right) const;
1599 };
1600 
1601 // ilp_ls_rr_sort - Priority function for ILP (instruction level parallelism)
1602 // scheduler.
1603 struct ilp_ls_rr_sort : public queue_sort {
1604  enum {
1605  IsBottomUp = true,
1606  HasReadyFilter = false
1607  };
1608 
1609  RegReductionPQBase *SPQ;
1610  ilp_ls_rr_sort(RegReductionPQBase *spq)
1611  : SPQ(spq) {}
1612  ilp_ls_rr_sort(const ilp_ls_rr_sort &RHS)
1613  : SPQ(RHS.SPQ) {}
1614 
1615  bool isReady(SUnit *SU, unsigned CurCycle) const;
1616 
1617  bool operator()(SUnit* left, SUnit* right) const;
1618 };
1619 
1620 class RegReductionPQBase : public SchedulingPriorityQueue {
1621 protected:
1622  std::vector<SUnit*> Queue;
1623  unsigned CurQueueId;
1624  bool TracksRegPressure;
1625  bool SrcOrder;
1626 
1627  // SUnits - The SUnits for the current graph.
1628  std::vector<SUnit> *SUnits;
1629 
1630  MachineFunction &MF;
1631  const TargetInstrInfo *TII;
1632  const TargetRegisterInfo *TRI;
1633  const TargetLowering *TLI;
1634  ScheduleDAGRRList *scheduleDAG;
1635 
1636  // SethiUllmanNumbers - The SethiUllman number for each node.
1637  std::vector<unsigned> SethiUllmanNumbers;
1638 
1639  /// RegPressure - Tracking current reg pressure per register class.
1640  ///
1641  std::vector<unsigned> RegPressure;
1642 
1643  /// RegLimit - Tracking the number of allocatable registers per register
1644  /// class.
1645  std::vector<unsigned> RegLimit;
1646 
1647 public:
1648  RegReductionPQBase(MachineFunction &mf,
1649  bool hasReadyFilter,
1650  bool tracksrp,
1651  bool srcorder,
1652  const TargetInstrInfo *tii,
1653  const TargetRegisterInfo *tri,
1654  const TargetLowering *tli)
1655  : SchedulingPriorityQueue(hasReadyFilter),
1656  CurQueueId(0), TracksRegPressure(tracksrp), SrcOrder(srcorder),
1657  MF(mf), TII(tii), TRI(tri), TLI(tli), scheduleDAG(NULL) {
1658  if (TracksRegPressure) {
1659  unsigned NumRC = TRI->getNumRegClasses();
1660  RegLimit.resize(NumRC);
1661  RegPressure.resize(NumRC);
1662  std::fill(RegLimit.begin(), RegLimit.end(), 0);
1663  std::fill(RegPressure.begin(), RegPressure.end(), 0);
1664  for (TargetRegisterInfo::regclass_iterator I = TRI->regclass_begin(),
1665  E = TRI->regclass_end(); I != E; ++I)
1666  RegLimit[(*I)->getID()] = tri->getRegPressureLimit(*I, MF);
1667  }
1668  }
1669 
1670  void setScheduleDAG(ScheduleDAGRRList *scheduleDag) {
1671  scheduleDAG = scheduleDag;
1672  }
1673 
1674  ScheduleHazardRecognizer* getHazardRec() {
1675  return scheduleDAG->getHazardRec();
1676  }
1677 
1678  void initNodes(std::vector<SUnit> &sunits);
1679 
1680  void addNode(const SUnit *SU);
1681 
1682  void updateNode(const SUnit *SU);
1683 
1684  void releaseState() {
1685  SUnits = 0;
1686  SethiUllmanNumbers.clear();
1687  std::fill(RegPressure.begin(), RegPressure.end(), 0);
1688  }
1689 
1690  unsigned getNodePriority(const SUnit *SU) const;
1691 
1692  unsigned getNodeOrdering(const SUnit *SU) const {
1693  if (!SU->getNode()) return 0;
1694 
1695  return SU->getNode()->getIROrder();
1696  }
1697 
1698  bool empty() const { return Queue.empty(); }
1699 
1700  void push(SUnit *U) {
1701  assert(!U->NodeQueueId && "Node in the queue already");
1702  U->NodeQueueId = ++CurQueueId;
1703  Queue.push_back(U);
1704  }
1705 
1706  void remove(SUnit *SU) {
1707  assert(!Queue.empty() && "Queue is empty!");
1708  assert(SU->NodeQueueId != 0 && "Not in queue!");
1709  std::vector<SUnit *>::iterator I = std::find(Queue.begin(), Queue.end(),
1710  SU);
1711  if (I != prior(Queue.end()))
1712  std::swap(*I, Queue.back());
1713  Queue.pop_back();
1714  SU->NodeQueueId = 0;
1715  }
1716 
1717  bool tracksRegPressure() const { return TracksRegPressure; }
1718 
1719  void dumpRegPressure() const;
1720 
1721  bool HighRegPressure(const SUnit *SU) const;
1722 
1723  bool MayReduceRegPressure(SUnit *SU) const;
1724 
1725  int RegPressureDiff(SUnit *SU, unsigned &LiveUses) const;
1726 
1727  void scheduledNode(SUnit *SU);
1728 
1729  void unscheduledNode(SUnit *SU);
1730 
1731 protected:
1732  bool canClobber(const SUnit *SU, const SUnit *Op);
1733  void AddPseudoTwoAddrDeps();
1734  void PrescheduleNodesWithMultipleUses();
1735  void CalculateSethiUllmanNumbers();
1736 };
1737 
1738 template<class SF>
1739 static SUnit *popFromQueueImpl(std::vector<SUnit*> &Q, SF &Picker) {
1740  std::vector<SUnit *>::iterator Best = Q.begin();
1741  for (std::vector<SUnit *>::iterator I = llvm::next(Q.begin()),
1742  E = Q.end(); I != E; ++I)
1743  if (Picker(*Best, *I))
1744  Best = I;
1745  SUnit *V = *Best;
1746  if (Best != prior(Q.end()))
1747  std::swap(*Best, Q.back());
1748  Q.pop_back();
1749  return V;
1750 }
1751 
1752 template<class SF>
1753 SUnit *popFromQueue(std::vector<SUnit*> &Q, SF &Picker, ScheduleDAG *DAG) {
1754 #ifndef NDEBUG
1755  if (DAG->StressSched) {
1756  reverse_sort<SF> RPicker(Picker);
1757  return popFromQueueImpl(Q, RPicker);
1758  }
1759 #endif
1760  (void)DAG;
1761  return popFromQueueImpl(Q, Picker);
1762 }
1763 
1764 template<class SF>
1765 class RegReductionPriorityQueue : public RegReductionPQBase {
1766  SF Picker;
1767 
1768 public:
1769  RegReductionPriorityQueue(MachineFunction &mf,
1770  bool tracksrp,
1771  bool srcorder,
1772  const TargetInstrInfo *tii,
1773  const TargetRegisterInfo *tri,
1774  const TargetLowering *tli)
1775  : RegReductionPQBase(mf, SF::HasReadyFilter, tracksrp, srcorder,
1776  tii, tri, tli),
1777  Picker(this) {}
1778 
1779  bool isBottomUp() const { return SF::IsBottomUp; }
1780 
1781  bool isReady(SUnit *U) const {
1782  return Picker.HasReadyFilter && Picker.isReady(U, getCurCycle());
1783  }
1784 
1785  SUnit *pop() {
1786  if (Queue.empty()) return NULL;
1787 
1788  SUnit *V = popFromQueue(Queue, Picker, scheduleDAG);
1789  V->NodeQueueId = 0;
1790  return V;
1791  }
1792 
1793 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1794  void dump(ScheduleDAG *DAG) const {
1795  // Emulate pop() without clobbering NodeQueueIds.
1796  std::vector<SUnit*> DumpQueue = Queue;
1797  SF DumpPicker = Picker;
1798  while (!DumpQueue.empty()) {
1799  SUnit *SU = popFromQueue(DumpQueue, DumpPicker, scheduleDAG);
1800  dbgs() << "Height " << SU->getHeight() << ": ";
1801  SU->dump(DAG);
1802  }
1803  }
1804 #endif
1805 };
1806 
1807 typedef RegReductionPriorityQueue<bu_ls_rr_sort>
1808 BURegReductionPriorityQueue;
1809 
1810 typedef RegReductionPriorityQueue<src_ls_rr_sort>
1811 SrcRegReductionPriorityQueue;
1812 
1813 typedef RegReductionPriorityQueue<hybrid_ls_rr_sort>
1814 HybridBURRPriorityQueue;
1815 
1816 typedef RegReductionPriorityQueue<ilp_ls_rr_sort>
1817 ILPBURRPriorityQueue;
1818 } // end anonymous namespace
1819 
1820 //===----------------------------------------------------------------------===//
1821 // Static Node Priority for Register Pressure Reduction
1822 //===----------------------------------------------------------------------===//
1823 
1824 // Check for special nodes that bypass scheduling heuristics.
1825 // Currently this pushes TokenFactor nodes down, but may be used for other
1826 // pseudo-ops as well.
1827 //
1828 // Return -1 to schedule right above left, 1 for left above right.
1829 // Return 0 if no bias exists.
1830 static int checkSpecialNodes(const SUnit *left, const SUnit *right) {
1831  bool LSchedLow = left->isScheduleLow;
1832  bool RSchedLow = right->isScheduleLow;
1833  if (LSchedLow != RSchedLow)
1834  return LSchedLow < RSchedLow ? 1 : -1;
1835  return 0;
1836 }
1837 
1838 /// CalcNodeSethiUllmanNumber - Compute Sethi Ullman number.
1839 /// Smaller number is the higher priority.
1840 static unsigned
1841 CalcNodeSethiUllmanNumber(const SUnit *SU, std::vector<unsigned> &SUNumbers) {
1842  unsigned &SethiUllmanNumber = SUNumbers[SU->NodeNum];
1843  if (SethiUllmanNumber != 0)
1844  return SethiUllmanNumber;
1845 
1846  unsigned Extra = 0;
1847  for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
1848  I != E; ++I) {
1849  if (I->isCtrl()) continue; // ignore chain preds
1850  SUnit *PredSU = I->getSUnit();
1851  unsigned PredSethiUllman = CalcNodeSethiUllmanNumber(PredSU, SUNumbers);
1852  if (PredSethiUllman > SethiUllmanNumber) {
1853  SethiUllmanNumber = PredSethiUllman;
1854  Extra = 0;
1855  } else if (PredSethiUllman == SethiUllmanNumber)
1856  ++Extra;
1857  }
1858 
1859  SethiUllmanNumber += Extra;
1860 
1861  if (SethiUllmanNumber == 0)
1862  SethiUllmanNumber = 1;
1863 
1864  return SethiUllmanNumber;
1865 }
1866 
1867 /// CalculateSethiUllmanNumbers - Calculate Sethi-Ullman numbers of all
1868 /// scheduling units.
1869 void RegReductionPQBase::CalculateSethiUllmanNumbers() {
1870  SethiUllmanNumbers.assign(SUnits->size(), 0);
1871 
1872  for (unsigned i = 0, e = SUnits->size(); i != e; ++i)
1873  CalcNodeSethiUllmanNumber(&(*SUnits)[i], SethiUllmanNumbers);
1874 }
1875 
1876 void RegReductionPQBase::addNode(const SUnit *SU) {
1877  unsigned SUSize = SethiUllmanNumbers.size();
1878  if (SUnits->size() > SUSize)
1879  SethiUllmanNumbers.resize(SUSize*2, 0);
1880  CalcNodeSethiUllmanNumber(SU, SethiUllmanNumbers);
1881 }
1882 
1883 void RegReductionPQBase::updateNode(const SUnit *SU) {
1884  SethiUllmanNumbers[SU->NodeNum] = 0;
1885  CalcNodeSethiUllmanNumber(SU, SethiUllmanNumbers);
1886 }
1887 
1888 // Lower priority means schedule further down. For bottom-up scheduling, lower
1889 // priority SUs are scheduled before higher priority SUs.
1890 unsigned RegReductionPQBase::getNodePriority(const SUnit *SU) const {
1891  assert(SU->NodeNum < SethiUllmanNumbers.size());
1892  unsigned Opc = SU->getNode() ? SU->getNode()->getOpcode() : 0;
1893  if (Opc == ISD::TokenFactor || Opc == ISD::CopyToReg)
1894  // CopyToReg should be close to its uses to facilitate coalescing and
1895  // avoid spilling.
1896  return 0;
1897  if (Opc == TargetOpcode::EXTRACT_SUBREG ||
1898  Opc == TargetOpcode::SUBREG_TO_REG ||
1900  // EXTRACT_SUBREG, INSERT_SUBREG, and SUBREG_TO_REG nodes should be
1901  // close to their uses to facilitate coalescing.
1902  return 0;
1903  if (SU->NumSuccs == 0 && SU->NumPreds != 0)
1904  // If SU does not have a register use, i.e. it doesn't produce a value
1905  // that would be consumed (e.g. store), then it terminates a chain of
1906  // computation. Give it a large SethiUllman number so it will be
1907  // scheduled right before its predecessors that it doesn't lengthen
1908  // their live ranges.
1909  return 0xffff;
1910  if (SU->NumPreds == 0 && SU->NumSuccs != 0)
1911  // If SU does not have a register def, schedule it close to its uses
1912  // because it does not lengthen any live ranges.
1913  return 0;
1914 #if 1
1915  return SethiUllmanNumbers[SU->NodeNum];
1916 #else
1917  unsigned Priority = SethiUllmanNumbers[SU->NodeNum];
1918  if (SU->isCallOp) {
1919  // FIXME: This assumes all of the defs are used as call operands.
1920  int NP = (int)Priority - SU->getNode()->getNumValues();
1921  return (NP > 0) ? NP : 0;
1922  }
1923  return Priority;
1924 #endif
1925 }
1926 
1927 //===----------------------------------------------------------------------===//
1928 // Register Pressure Tracking
1929 //===----------------------------------------------------------------------===//
1930 
1931 void RegReductionPQBase::dumpRegPressure() const {
1932 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1933  for (TargetRegisterInfo::regclass_iterator I = TRI->regclass_begin(),
1934  E = TRI->regclass_end(); I != E; ++I) {
1935  const TargetRegisterClass *RC = *I;
1936  unsigned Id = RC->getID();
1937  unsigned RP = RegPressure[Id];
1938  if (!RP) continue;
1939  DEBUG(dbgs() << RC->getName() << ": " << RP << " / " << RegLimit[Id]
1940  << '\n');
1941  }
1942 #endif
1943 }
1944 
1945 bool RegReductionPQBase::HighRegPressure(const SUnit *SU) const {
1946  if (!TLI)
1947  return false;
1948 
1949  for (SUnit::const_pred_iterator I = SU->Preds.begin(),E = SU->Preds.end();
1950  I != E; ++I) {
1951  if (I->isCtrl())
1952  continue;
1953  SUnit *PredSU = I->getSUnit();
1954  // NumRegDefsLeft is zero when enough uses of this node have been scheduled
1955  // to cover the number of registers defined (they are all live).
1956  if (PredSU->NumRegDefsLeft == 0) {
1957  continue;
1958  }
1959  for (ScheduleDAGSDNodes::RegDefIter RegDefPos(PredSU, scheduleDAG);
1960  RegDefPos.IsValid(); RegDefPos.Advance()) {
1961  unsigned RCId, Cost;
1962  GetCostForDef(RegDefPos, TLI, TII, TRI, RCId, Cost, MF);
1963 
1964  if ((RegPressure[RCId] + Cost) >= RegLimit[RCId])
1965  return true;
1966  }
1967  }
1968  return false;
1969 }
1970 
1971 bool RegReductionPQBase::MayReduceRegPressure(SUnit *SU) const {
1972  const SDNode *N = SU->getNode();
1973 
1974  if (!N->isMachineOpcode() || !SU->NumSuccs)
1975  return false;
1976 
1977  unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs();
1978  for (unsigned i = 0; i != NumDefs; ++i) {
1979  MVT VT = N->getSimpleValueType(i);
1980  if (!N->hasAnyUseOfValue(i))
1981  continue;
1982  unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
1983  if (RegPressure[RCId] >= RegLimit[RCId])
1984  return true;
1985  }
1986  return false;
1987 }
1988 
1989 // Compute the register pressure contribution by this instruction by count up
1990 // for uses that are not live and down for defs. Only count register classes
1991 // that are already under high pressure. As a side effect, compute the number of
1992 // uses of registers that are already live.
1993 //
1994 // FIXME: This encompasses the logic in HighRegPressure and MayReduceRegPressure
1995 // so could probably be factored.
1996 int RegReductionPQBase::RegPressureDiff(SUnit *SU, unsigned &LiveUses) const {
1997  LiveUses = 0;
1998  int PDiff = 0;
1999  for (SUnit::const_pred_iterator I = SU->Preds.begin(),E = SU->Preds.end();
2000  I != E; ++I) {
2001  if (I->isCtrl())
2002  continue;
2003  SUnit *PredSU = I->getSUnit();
2004  // NumRegDefsLeft is zero when enough uses of this node have been scheduled
2005  // to cover the number of registers defined (they are all live).
2006  if (PredSU->NumRegDefsLeft == 0) {
2007  if (PredSU->getNode()->isMachineOpcode())
2008  ++LiveUses;
2009  continue;
2010  }
2011  for (ScheduleDAGSDNodes::RegDefIter RegDefPos(PredSU, scheduleDAG);
2012  RegDefPos.IsValid(); RegDefPos.Advance()) {
2013  MVT VT = RegDefPos.GetValue();
2014  unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
2015  if (RegPressure[RCId] >= RegLimit[RCId])
2016  ++PDiff;
2017  }
2018  }
2019  const SDNode *N = SU->getNode();
2020 
2021  if (!N || !N->isMachineOpcode() || !SU->NumSuccs)
2022  return PDiff;
2023 
2024  unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs();
2025  for (unsigned i = 0; i != NumDefs; ++i) {
2026  MVT VT = N->getSimpleValueType(i);
2027  if (!N->hasAnyUseOfValue(i))
2028  continue;
2029  unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
2030  if (RegPressure[RCId] >= RegLimit[RCId])
2031  --PDiff;
2032  }
2033  return PDiff;
2034 }
2035 
2036 void RegReductionPQBase::scheduledNode(SUnit *SU) {
2037  if (!TracksRegPressure)
2038  return;
2039 
2040  if (!SU->getNode())
2041  return;
2042 
2043  for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
2044  I != E; ++I) {
2045  if (I->isCtrl())
2046  continue;
2047  SUnit *PredSU = I->getSUnit();
2048  // NumRegDefsLeft is zero when enough uses of this node have been scheduled
2049  // to cover the number of registers defined (they are all live).
2050  if (PredSU->NumRegDefsLeft == 0) {
2051  continue;
2052  }
2053  // FIXME: The ScheduleDAG currently loses information about which of a
2054  // node's values is consumed by each dependence. Consequently, if the node
2055  // defines multiple register classes, we don't know which to pressurize
2056  // here. Instead the following loop consumes the register defs in an
2057  // arbitrary order. At least it handles the common case of clustered loads
2058  // to the same class. For precise liveness, each SDep needs to indicate the
2059  // result number. But that tightly couples the ScheduleDAG with the
2060  // SelectionDAG making updates tricky. A simpler hack would be to attach a
2061  // value type or register class to SDep.
2062  //
2063  // The most important aspect of register tracking is balancing the increase
2064  // here with the reduction further below. Note that this SU may use multiple
2065  // defs in PredSU. The can't be determined here, but we've already
2066  // compensated by reducing NumRegDefsLeft in PredSU during
2067  // ScheduleDAGSDNodes::AddSchedEdges.
2068  --PredSU->NumRegDefsLeft;
2069  unsigned SkipRegDefs = PredSU->NumRegDefsLeft;
2070  for (ScheduleDAGSDNodes::RegDefIter RegDefPos(PredSU, scheduleDAG);
2071  RegDefPos.IsValid(); RegDefPos.Advance(), --SkipRegDefs) {
2072  if (SkipRegDefs)
2073  continue;
2074 
2075  unsigned RCId, Cost;
2076  GetCostForDef(RegDefPos, TLI, TII, TRI, RCId, Cost, MF);
2077  RegPressure[RCId] += Cost;
2078  break;
2079  }
2080  }
2081 
2082  // We should have this assert, but there may be dead SDNodes that never
2083  // materialize as SUnits, so they don't appear to generate liveness.
2084  //assert(SU->NumRegDefsLeft == 0 && "not all regdefs have scheduled uses");
2085  int SkipRegDefs = (int)SU->NumRegDefsLeft;
2086  for (ScheduleDAGSDNodes::RegDefIter RegDefPos(SU, scheduleDAG);
2087  RegDefPos.IsValid(); RegDefPos.Advance(), --SkipRegDefs) {
2088  if (SkipRegDefs > 0)
2089  continue;
2090  unsigned RCId, Cost;
2091  GetCostForDef(RegDefPos, TLI, TII, TRI, RCId, Cost, MF);
2092  if (RegPressure[RCId] < Cost) {
2093  // Register pressure tracking is imprecise. This can happen. But we try
2094  // hard not to let it happen because it likely results in poor scheduling.
2095  DEBUG(dbgs() << " SU(" << SU->NodeNum << ") has too many regdefs\n");
2096  RegPressure[RCId] = 0;
2097  }
2098  else {
2099  RegPressure[RCId] -= Cost;
2100  }
2101  }
2102  dumpRegPressure();
2103 }
2104 
2105 void RegReductionPQBase::unscheduledNode(SUnit *SU) {
2106  if (!TracksRegPressure)
2107  return;
2108 
2109  const SDNode *N = SU->getNode();
2110  if (!N) return;
2111 
2112  if (!N->isMachineOpcode()) {
2113  if (N->getOpcode() != ISD::CopyToReg)
2114  return;
2115  } else {
2116  unsigned Opc = N->getMachineOpcode();
2117  if (Opc == TargetOpcode::EXTRACT_SUBREG ||
2118  Opc == TargetOpcode::INSERT_SUBREG ||
2119  Opc == TargetOpcode::SUBREG_TO_REG ||
2120  Opc == TargetOpcode::REG_SEQUENCE ||
2122  return;
2123  }
2124 
2125  for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
2126  I != E; ++I) {
2127  if (I->isCtrl())
2128  continue;
2129  SUnit *PredSU = I->getSUnit();
2130  // NumSuccsLeft counts all deps. Don't compare it with NumSuccs which only
2131  // counts data deps.
2132  if (PredSU->NumSuccsLeft != PredSU->Succs.size())
2133  continue;
2134  const SDNode *PN = PredSU->getNode();
2135  if (!PN->isMachineOpcode()) {
2136  if (PN->getOpcode() == ISD::CopyFromReg) {
2137  MVT VT = PN->getSimpleValueType(0);
2138  unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
2139  RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
2140  }
2141  continue;
2142  }
2143  unsigned POpc = PN->getMachineOpcode();
2144  if (POpc == TargetOpcode::IMPLICIT_DEF)
2145  continue;
2146  if (POpc == TargetOpcode::EXTRACT_SUBREG ||
2147  POpc == TargetOpcode::INSERT_SUBREG ||
2148  POpc == TargetOpcode::SUBREG_TO_REG) {
2149  MVT VT = PN->getSimpleValueType(0);
2150  unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
2151  RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
2152  continue;
2153  }
2154  unsigned NumDefs = TII->get(PN->getMachineOpcode()).getNumDefs();
2155  for (unsigned i = 0; i != NumDefs; ++i) {
2156  MVT VT = PN->getSimpleValueType(i);
2157  if (!PN->hasAnyUseOfValue(i))
2158  continue;
2159  unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
2160  if (RegPressure[RCId] < TLI->getRepRegClassCostFor(VT))
2161  // Register pressure tracking is imprecise. This can happen.
2162  RegPressure[RCId] = 0;
2163  else
2164  RegPressure[RCId] -= TLI->getRepRegClassCostFor(VT);
2165  }
2166  }
2167 
2168  // Check for isMachineOpcode() as PrescheduleNodesWithMultipleUses()
2169  // may transfer data dependencies to CopyToReg.
2170  if (SU->NumSuccs && N->isMachineOpcode()) {
2171  unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs();
2172  for (unsigned i = NumDefs, e = N->getNumValues(); i != e; ++i) {
2173  MVT VT = N->getSimpleValueType(i);
2174  if (VT == MVT::Glue || VT == MVT::Other)
2175  continue;
2176  if (!N->hasAnyUseOfValue(i))
2177  continue;
2178  unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
2179  RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
2180  }
2181  }
2182 
2183  dumpRegPressure();
2184 }
2185 
2186 //===----------------------------------------------------------------------===//
2187 // Dynamic Node Priority for Register Pressure Reduction
2188 //===----------------------------------------------------------------------===//
2189 
2190 /// closestSucc - Returns the scheduled cycle of the successor which is
2191 /// closest to the current cycle.
2192 static unsigned closestSucc(const SUnit *SU) {
2193  unsigned MaxHeight = 0;
2194  for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
2195  I != E; ++I) {
2196  if (I->isCtrl()) continue; // ignore chain succs
2197  unsigned Height = I->getSUnit()->getHeight();
2198  // If there are bunch of CopyToRegs stacked up, they should be considered
2199  // to be at the same position.
2200  if (I->getSUnit()->getNode() &&
2201  I->getSUnit()->getNode()->getOpcode() == ISD::CopyToReg)
2202  Height = closestSucc(I->getSUnit())+1;
2203  if (Height > MaxHeight)
2204  MaxHeight = Height;
2205  }
2206  return MaxHeight;
2207 }
2208 
2209 /// calcMaxScratches - Returns an cost estimate of the worse case requirement
2210 /// for scratch registers, i.e. number of data dependencies.
2211 static unsigned calcMaxScratches(const SUnit *SU) {
2212  unsigned Scratches = 0;
2213  for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
2214  I != E; ++I) {
2215  if (I->isCtrl()) continue; // ignore chain preds
2216  Scratches++;
2217  }
2218  return Scratches;
2219 }
2220 
2221 /// hasOnlyLiveInOpers - Return true if SU has only value predecessors that are
2222 /// CopyFromReg from a virtual register.
2223 static bool hasOnlyLiveInOpers(const SUnit *SU) {
2224  bool RetVal = false;
2225  for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
2226  I != E; ++I) {
2227  if (I->isCtrl()) continue;
2228  const SUnit *PredSU = I->getSUnit();
2229  if (PredSU->getNode() &&
2230  PredSU->getNode()->getOpcode() == ISD::CopyFromReg) {
2231  unsigned Reg =
2232  cast<RegisterSDNode>(PredSU->getNode()->getOperand(1))->getReg();
2234  RetVal = true;
2235  continue;
2236  }
2237  }
2238  return false;
2239  }
2240  return RetVal;
2241 }
2242 
2243 /// hasOnlyLiveOutUses - Return true if SU has only value successors that are
2244 /// CopyToReg to a virtual register. This SU def is probably a liveout and
2245 /// it has no other use. It should be scheduled closer to the terminator.
2246 static bool hasOnlyLiveOutUses(const SUnit *SU) {
2247  bool RetVal = false;
2248  for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
2249  I != E; ++I) {
2250  if (I->isCtrl()) continue;
2251  const SUnit *SuccSU = I->getSUnit();
2252  if (SuccSU->getNode() && SuccSU->getNode()->getOpcode() == ISD::CopyToReg) {
2253  unsigned Reg =
2254  cast<RegisterSDNode>(SuccSU->getNode()->getOperand(1))->getReg();
2256  RetVal = true;
2257  continue;
2258  }
2259  }
2260  return false;
2261  }
2262  return RetVal;
2263 }
2264 
2265 // Set isVRegCycle for a node with only live in opers and live out uses. Also
2266 // set isVRegCycle for its CopyFromReg operands.
2267 //
2268 // This is only relevant for single-block loops, in which case the VRegCycle
2269 // node is likely an induction variable in which the operand and target virtual
2270 // registers should be coalesced (e.g. pre/post increment values). Setting the
2271 // isVRegCycle flag helps the scheduler prioritize other uses of the same
2272 // CopyFromReg so that this node becomes the virtual register "kill". This
2273 // avoids interference between the values live in and out of the block and
2274 // eliminates a copy inside the loop.
2275 static void initVRegCycle(SUnit *SU) {
2277  return;
2278 
2279  if (!hasOnlyLiveInOpers(SU) || !hasOnlyLiveOutUses(SU))
2280  return;
2281 
2282  DEBUG(dbgs() << "VRegCycle: SU(" << SU->NodeNum << ")\n");
2283 
2284  SU->isVRegCycle = true;
2285 
2286  for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
2287  I != E; ++I) {
2288  if (I->isCtrl()) continue;
2289  I->getSUnit()->isVRegCycle = true;
2290  }
2291 }
2292 
2293 // After scheduling the definition of a VRegCycle, clear the isVRegCycle flag of
2294 // CopyFromReg operands. We should no longer penalize other uses of this VReg.
2295 static void resetVRegCycle(SUnit *SU) {
2296  if (!SU->isVRegCycle)
2297  return;
2298 
2299  for (SUnit::const_pred_iterator I = SU->Preds.begin(),E = SU->Preds.end();
2300  I != E; ++I) {
2301  if (I->isCtrl()) continue; // ignore chain preds
2302  SUnit *PredSU = I->getSUnit();
2303  if (PredSU->isVRegCycle) {
2304  assert(PredSU->getNode()->getOpcode() == ISD::CopyFromReg &&
2305  "VRegCycle def must be CopyFromReg");
2306  I->getSUnit()->isVRegCycle = 0;
2307  }
2308  }
2309 }
2310 
2311 // Return true if this SUnit uses a CopyFromReg node marked as a VRegCycle. This
2312 // means a node that defines the VRegCycle has not been scheduled yet.
2313 static bool hasVRegCycleUse(const SUnit *SU) {
2314  // If this SU also defines the VReg, don't hoist it as a "use".
2315  if (SU->isVRegCycle)
2316  return false;
2317 
2318  for (SUnit::const_pred_iterator I = SU->Preds.begin(),E = SU->Preds.end();
2319  I != E; ++I) {
2320  if (I->isCtrl()) continue; // ignore chain preds
2321  if (I->getSUnit()->isVRegCycle &&
2322  I->getSUnit()->getNode()->getOpcode() == ISD::CopyFromReg) {
2323  DEBUG(dbgs() << " VReg cycle use: SU (" << SU->NodeNum << ")\n");
2324  return true;
2325  }
2326  }
2327  return false;
2328 }
2329 
2330 // Check for either a dependence (latency) or resource (hazard) stall.
2331 //
2332 // Note: The ScheduleHazardRecognizer interface requires a non-const SU.
2333 static bool BUHasStall(SUnit *SU, int Height, RegReductionPQBase *SPQ) {
2334  if ((int)SPQ->getCurCycle() < Height) return true;
2335  if (SPQ->getHazardRec()->getHazardType(SU, 0)
2337  return true;
2338  return false;
2339 }
2340 
2341 // Return -1 if left has higher priority, 1 if right has higher priority.
2342 // Return 0 if latency-based priority is equivalent.
2343 static int BUCompareLatency(SUnit *left, SUnit *right, bool checkPref,
2344  RegReductionPQBase *SPQ) {
2345  // Scheduling an instruction that uses a VReg whose postincrement has not yet
2346  // been scheduled will induce a copy. Model this as an extra cycle of latency.
2347  int LPenalty = hasVRegCycleUse(left) ? 1 : 0;
2348  int RPenalty = hasVRegCycleUse(right) ? 1 : 0;
2349  int LHeight = (int)left->getHeight() + LPenalty;
2350  int RHeight = (int)right->getHeight() + RPenalty;
2351 
2352  bool LStall = (!checkPref || left->SchedulingPref == Sched::ILP) &&
2353  BUHasStall(left, LHeight, SPQ);
2354  bool RStall = (!checkPref || right->SchedulingPref == Sched::ILP) &&
2355  BUHasStall(right, RHeight, SPQ);
2356 
2357  // If scheduling one of the node will cause a pipeline stall, delay it.
2358  // If scheduling either one of the node will cause a pipeline stall, sort
2359  // them according to their height.
2360  if (LStall) {
2361  if (!RStall)
2362  return 1;
2363  if (LHeight != RHeight)
2364  return LHeight > RHeight ? 1 : -1;
2365  } else if (RStall)
2366  return -1;
2367 
2368  // If either node is scheduling for latency, sort them by height/depth
2369  // and latency.
2370  if (!checkPref || (left->SchedulingPref == Sched::ILP ||
2371  right->SchedulingPref == Sched::ILP)) {
2372  // If neither instruction stalls (!LStall && !RStall) and HazardRecognizer
2373  // is enabled, grouping instructions by cycle, then its height is already
2374  // covered so only its depth matters. We also reach this point if both stall
2375  // but have the same height.
2376  if (!SPQ->getHazardRec()->isEnabled()) {
2377  if (LHeight != RHeight)
2378  return LHeight > RHeight ? 1 : -1;
2379  }
2380  int LDepth = left->getDepth() - LPenalty;
2381  int RDepth = right->getDepth() - RPenalty;
2382  if (LDepth != RDepth) {
2383  DEBUG(dbgs() << " Comparing latency of SU (" << left->NodeNum
2384  << ") depth " << LDepth << " vs SU (" << right->NodeNum
2385  << ") depth " << RDepth << "\n");
2386  return LDepth < RDepth ? 1 : -1;
2387  }
2388  if (left->Latency != right->Latency)
2389  return left->Latency > right->Latency ? 1 : -1;
2390  }
2391  return 0;
2392 }
2393 
2394 static bool BURRSort(SUnit *left, SUnit *right, RegReductionPQBase *SPQ) {
2395  // Schedule physical register definitions close to their use. This is
2396  // motivated by microarchitectures that can fuse cmp+jump macro-ops. But as
2397  // long as shortening physreg live ranges is generally good, we can defer
2398  // creating a subtarget hook.
2399  if (!DisableSchedPhysRegJoin) {
2400  bool LHasPhysReg = left->hasPhysRegDefs;
2401  bool RHasPhysReg = right->hasPhysRegDefs;
2402  if (LHasPhysReg != RHasPhysReg) {
2403  #ifndef NDEBUG
2404  static const char *const PhysRegMsg[] = { " has no physreg",
2405  " defines a physreg" };
2406  #endif
2407  DEBUG(dbgs() << " SU (" << left->NodeNum << ") "
2408  << PhysRegMsg[LHasPhysReg] << " SU(" << right->NodeNum << ") "
2409  << PhysRegMsg[RHasPhysReg] << "\n");
2410  return LHasPhysReg < RHasPhysReg;
2411  }
2412  }
2413 
2414  // Prioritize by Sethi-Ulmann number and push CopyToReg nodes down.
2415  unsigned LPriority = SPQ->getNodePriority(left);
2416  unsigned RPriority = SPQ->getNodePriority(right);
2417 
2418  // Be really careful about hoisting call operands above previous calls.
2419  // Only allows it if it would reduce register pressure.
2420  if (left->isCall && right->isCallOp) {
2421  unsigned RNumVals = right->getNode()->getNumValues();
2422  RPriority = (RPriority > RNumVals) ? (RPriority - RNumVals) : 0;
2423  }
2424  if (right->isCall && left->isCallOp) {
2425  unsigned LNumVals = left->getNode()->getNumValues();
2426  LPriority = (LPriority > LNumVals) ? (LPriority - LNumVals) : 0;
2427  }
2428 
2429  if (LPriority != RPriority)
2430  return LPriority > RPriority;
2431 
2432  // One or both of the nodes are calls and their sethi-ullman numbers are the
2433  // same, then keep source order.
2434  if (left->isCall || right->isCall) {
2435  unsigned LOrder = SPQ->getNodeOrdering(left);
2436  unsigned ROrder = SPQ->getNodeOrdering(right);
2437 
2438  // Prefer an ordering where the lower the non-zero order number, the higher
2439  // the preference.
2440  if ((LOrder || ROrder) && LOrder != ROrder)
2441  return LOrder != 0 && (LOrder < ROrder || ROrder == 0);
2442  }
2443 
2444  // Try schedule def + use closer when Sethi-Ullman numbers are the same.
2445  // e.g.
2446  // t1 = op t2, c1
2447  // t3 = op t4, c2
2448  //
2449  // and the following instructions are both ready.
2450  // t2 = op c3
2451  // t4 = op c4
2452  //
2453  // Then schedule t2 = op first.
2454  // i.e.
2455  // t4 = op c4
2456  // t2 = op c3
2457  // t1 = op t2, c1
2458  // t3 = op t4, c2
2459  //
2460  // This creates more short live intervals.
2461  unsigned LDist = closestSucc(left);
2462  unsigned RDist = closestSucc(right);
2463  if (LDist != RDist)
2464  return LDist < RDist;
2465 
2466  // How many registers becomes live when the node is scheduled.
2467  unsigned LScratch = calcMaxScratches(left);
2468  unsigned RScratch = calcMaxScratches(right);
2469  if (LScratch != RScratch)
2470  return LScratch > RScratch;
2471 
2472  // Comparing latency against a call makes little sense unless the node
2473  // is register pressure-neutral.
2474  if ((left->isCall && RPriority > 0) || (right->isCall && LPriority > 0))
2475  return (left->NodeQueueId > right->NodeQueueId);
2476 
2477  // Do not compare latencies when one or both of the nodes are calls.
2478  if (!DisableSchedCycles &&
2479  !(left->isCall || right->isCall)) {
2480  int result = BUCompareLatency(left, right, false /*checkPref*/, SPQ);
2481  if (result != 0)
2482  return result > 0;
2483  }
2484  else {
2485  if (left->getHeight() != right->getHeight())
2486  return left->getHeight() > right->getHeight();
2487 
2488  if (left->getDepth() != right->getDepth())
2489  return left->getDepth() < right->getDepth();
2490  }
2491 
2492  assert(left->NodeQueueId && right->NodeQueueId &&
2493  "NodeQueueId cannot be zero");
2494  return (left->NodeQueueId > right->NodeQueueId);
2495 }
2496 
2497 // Bottom up
2498 bool bu_ls_rr_sort::operator()(SUnit *left, SUnit *right) const {
2499  if (int res = checkSpecialNodes(left, right))
2500  return res > 0;
2501 
2502  return BURRSort(left, right, SPQ);
2503 }
2504 
2505 // Source order, otherwise bottom up.
2506 bool src_ls_rr_sort::operator()(SUnit *left, SUnit *right) const {
2507  if (int res = checkSpecialNodes(left, right))
2508  return res > 0;
2509 
2510  unsigned LOrder = SPQ->getNodeOrdering(left);
2511  unsigned ROrder = SPQ->getNodeOrdering(right);
2512 
2513  // Prefer an ordering where the lower the non-zero order number, the higher
2514  // the preference.
2515  if ((LOrder || ROrder) && LOrder != ROrder)
2516  return LOrder != 0 && (LOrder < ROrder || ROrder == 0);
2517 
2518  return BURRSort(left, right, SPQ);
2519 }
2520 
2521 // If the time between now and when the instruction will be ready can cover
2522 // the spill code, then avoid adding it to the ready queue. This gives long
2523 // stalls highest priority and allows hoisting across calls. It should also
2524 // speed up processing the available queue.
2525 bool hybrid_ls_rr_sort::isReady(SUnit *SU, unsigned CurCycle) const {
2526  static const unsigned ReadyDelay = 3;
2527 
2528  if (SPQ->MayReduceRegPressure(SU)) return true;
2529 
2530  if (SU->getHeight() > (CurCycle + ReadyDelay)) return false;
2531 
2532  if (SPQ->getHazardRec()->getHazardType(SU, -ReadyDelay)
2534  return false;
2535 
2536  return true;
2537 }
2538 
2539 // Return true if right should be scheduled with higher priority than left.
2540 bool hybrid_ls_rr_sort::operator()(SUnit *left, SUnit *right) const {
2541  if (int res = checkSpecialNodes(left, right))
2542  return res > 0;
2543 
2544  if (left->isCall || right->isCall)
2545  // No way to compute latency of calls.
2546  return BURRSort(left, right, SPQ);
2547 
2548  bool LHigh = SPQ->HighRegPressure(left);
2549  bool RHigh = SPQ->HighRegPressure(right);
2550  // Avoid causing spills. If register pressure is high, schedule for
2551  // register pressure reduction.
2552  if (LHigh && !RHigh) {
2553  DEBUG(dbgs() << " pressure SU(" << left->NodeNum << ") > SU("
2554  << right->NodeNum << ")\n");
2555  return true;
2556  }
2557  else if (!LHigh && RHigh) {
2558  DEBUG(dbgs() << " pressure SU(" << right->NodeNum << ") > SU("
2559  << left->NodeNum << ")\n");
2560  return false;
2561  }
2562  if (!LHigh && !RHigh) {
2563  int result = BUCompareLatency(left, right, true /*checkPref*/, SPQ);
2564  if (result != 0)
2565  return result > 0;
2566  }
2567  return BURRSort(left, right, SPQ);
2568 }
2569 
2570 // Schedule as many instructions in each cycle as possible. So don't make an
2571 // instruction available unless it is ready in the current cycle.
2572 bool ilp_ls_rr_sort::isReady(SUnit *SU, unsigned CurCycle) const {
2573  if (SU->getHeight() > CurCycle) return false;
2574 
2575  if (SPQ->getHazardRec()->getHazardType(SU, 0)
2577  return false;
2578 
2579  return true;
2580 }
2581 
2582 static bool canEnableCoalescing(SUnit *SU) {
2583  unsigned Opc = SU->getNode() ? SU->getNode()->getOpcode() : 0;
2584  if (Opc == ISD::TokenFactor || Opc == ISD::CopyToReg)
2585  // CopyToReg should be close to its uses to facilitate coalescing and
2586  // avoid spilling.
2587  return true;
2588 
2589  if (Opc == TargetOpcode::EXTRACT_SUBREG ||
2590  Opc == TargetOpcode::SUBREG_TO_REG ||
2592  // EXTRACT_SUBREG, INSERT_SUBREG, and SUBREG_TO_REG nodes should be
2593  // close to their uses to facilitate coalescing.
2594  return true;
2595 
2596  if (SU->NumPreds == 0 && SU->NumSuccs != 0)
2597  // If SU does not have a register def, schedule it close to its uses
2598  // because it does not lengthen any live ranges.
2599  return true;
2600 
2601  return false;
2602 }
2603 
2604 // list-ilp is currently an experimental scheduler that allows various
2605 // heuristics to be enabled prior to the normal register reduction logic.
2606 bool ilp_ls_rr_sort::operator()(SUnit *left, SUnit *right) const {
2607  if (int res = checkSpecialNodes(left, right))
2608  return res > 0;
2609 
2610  if (left->isCall || right->isCall)
2611  // No way to compute latency of calls.
2612  return BURRSort(left, right, SPQ);
2613 
2614  unsigned LLiveUses = 0, RLiveUses = 0;
2615  int LPDiff = 0, RPDiff = 0;
2617  LPDiff = SPQ->RegPressureDiff(left, LLiveUses);
2618  RPDiff = SPQ->RegPressureDiff(right, RLiveUses);
2619  }
2620  if (!DisableSchedRegPressure && LPDiff != RPDiff) {
2621  DEBUG(dbgs() << "RegPressureDiff SU(" << left->NodeNum << "): " << LPDiff
2622  << " != SU(" << right->NodeNum << "): " << RPDiff << "\n");
2623  return LPDiff > RPDiff;
2624  }
2625 
2626  if (!DisableSchedRegPressure && (LPDiff > 0 || RPDiff > 0)) {
2627  bool LReduce = canEnableCoalescing(left);
2628  bool RReduce = canEnableCoalescing(right);
2629  if (LReduce && !RReduce) return false;
2630  if (RReduce && !LReduce) return true;
2631  }
2632 
2633  if (!DisableSchedLiveUses && (LLiveUses != RLiveUses)) {
2634  DEBUG(dbgs() << "Live uses SU(" << left->NodeNum << "): " << LLiveUses
2635  << " != SU(" << right->NodeNum << "): " << RLiveUses << "\n");
2636  return LLiveUses < RLiveUses;
2637  }
2638 
2639  if (!DisableSchedStalls) {
2640  bool LStall = BUHasStall(left, left->getHeight(), SPQ);
2641  bool RStall = BUHasStall(right, right->getHeight(), SPQ);
2642  if (LStall != RStall)
2643  return left->getHeight() > right->getHeight();
2644  }
2645 
2646  if (!DisableSchedCriticalPath) {
2647  int spread = (int)left->getDepth() - (int)right->getDepth();
2648  if (std::abs(spread) > MaxReorderWindow) {
2649  DEBUG(dbgs() << "Depth of SU(" << left->NodeNum << "): "
2650  << left->getDepth() << " != SU(" << right->NodeNum << "): "
2651  << right->getDepth() << "\n");
2652  return left->getDepth() < right->getDepth();
2653  }
2654  }
2655 
2656  if (!DisableSchedHeight && left->getHeight() != right->getHeight()) {
2657  int spread = (int)left->getHeight() - (int)right->getHeight();
2658  if (std::abs(spread) > MaxReorderWindow)
2659  return left->getHeight() > right->getHeight();
2660  }
2661 
2662  return BURRSort(left, right, SPQ);
2663 }
2664 
2665 void RegReductionPQBase::initNodes(std::vector<SUnit> &sunits) {
2666  SUnits = &sunits;
2667  // Add pseudo dependency edges for two-address nodes.
2668  if (!Disable2AddrHack)
2669  AddPseudoTwoAddrDeps();
2670  // Reroute edges to nodes with multiple uses.
2671  if (!TracksRegPressure && !SrcOrder)
2672  PrescheduleNodesWithMultipleUses();
2673  // Calculate node priorities.
2674  CalculateSethiUllmanNumbers();
2675 
2676  // For single block loops, mark nodes that look like canonical IV increments.
2677  if (scheduleDAG->BB->isSuccessor(scheduleDAG->BB)) {
2678  for (unsigned i = 0, e = sunits.size(); i != e; ++i) {
2679  initVRegCycle(&sunits[i]);
2680  }
2681  }
2682 }
2683 
2684 //===----------------------------------------------------------------------===//
2685 // Preschedule for Register Pressure
2686 //===----------------------------------------------------------------------===//
2687 
2688 bool RegReductionPQBase::canClobber(const SUnit *SU, const SUnit *Op) {
2689  if (SU->isTwoAddress) {
2690  unsigned Opc = SU->getNode()->getMachineOpcode();
2691  const MCInstrDesc &MCID = TII->get(Opc);
2692  unsigned NumRes = MCID.getNumDefs();
2693  unsigned NumOps = MCID.getNumOperands() - NumRes;
2694  for (unsigned i = 0; i != NumOps; ++i) {
2695  if (MCID.getOperandConstraint(i+NumRes, MCOI::TIED_TO) != -1) {
2696  SDNode *DU = SU->getNode()->getOperand(i).getNode();
2697  if (DU->getNodeId() != -1 &&
2698  Op->OrigNode == &(*SUnits)[DU->getNodeId()])
2699  return true;
2700  }
2701  }
2702  }
2703  return false;
2704 }
2705 
2706 /// canClobberReachingPhysRegUse - True if SU would clobber one of it's
2707 /// successor's explicit physregs whose definition can reach DepSU.
2708 /// i.e. DepSU should not be scheduled above SU.
2709 static bool canClobberReachingPhysRegUse(const SUnit *DepSU, const SUnit *SU,
2710  ScheduleDAGRRList *scheduleDAG,
2711  const TargetInstrInfo *TII,
2712  const TargetRegisterInfo *TRI) {
2713  const uint16_t *ImpDefs
2714  = TII->get(SU->getNode()->getMachineOpcode()).getImplicitDefs();
2715  const uint32_t *RegMask = getNodeRegMask(SU->getNode());
2716  if(!ImpDefs && !RegMask)
2717  return false;
2718 
2719  for (SUnit::const_succ_iterator SI = SU->Succs.begin(), SE = SU->Succs.end();
2720  SI != SE; ++SI) {
2721  SUnit *SuccSU = SI->getSUnit();
2722  for (SUnit::const_pred_iterator PI = SuccSU->Preds.begin(),
2723  PE = SuccSU->Preds.end(); PI != PE; ++PI) {
2724  if (!PI->isAssignedRegDep())
2725  continue;
2726 
2727  if (RegMask && MachineOperand::clobbersPhysReg(RegMask, PI->getReg()) &&
2728  scheduleDAG->IsReachable(DepSU, PI->getSUnit()))
2729  return true;
2730 
2731  if (ImpDefs)
2732  for (const uint16_t *ImpDef = ImpDefs; *ImpDef; ++ImpDef)
2733  // Return true if SU clobbers this physical register use and the
2734  // definition of the register reaches from DepSU. IsReachable queries
2735  // a topological forward sort of the DAG (following the successors).
2736  if (TRI->regsOverlap(*ImpDef, PI->getReg()) &&
2737  scheduleDAG->IsReachable(DepSU, PI->getSUnit()))
2738  return true;
2739  }
2740  }
2741  return false;
2742 }
2743 
2744 /// canClobberPhysRegDefs - True if SU would clobber one of SuccSU's
2745 /// physical register defs.
2746 static bool canClobberPhysRegDefs(const SUnit *SuccSU, const SUnit *SU,
2747  const TargetInstrInfo *TII,
2748  const TargetRegisterInfo *TRI) {
2749  SDNode *N = SuccSU->getNode();
2750  unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs();
2751  const uint16_t *ImpDefs = TII->get(N->getMachineOpcode()).getImplicitDefs();
2752  assert(ImpDefs && "Caller should check hasPhysRegDefs");
2753  for (const SDNode *SUNode = SU->getNode(); SUNode;
2754  SUNode = SUNode->getGluedNode()) {
2755  if (!SUNode->isMachineOpcode())
2756  continue;
2757  const uint16_t *SUImpDefs =
2758  TII->get(SUNode->getMachineOpcode()).getImplicitDefs();
2759  const uint32_t *SURegMask = getNodeRegMask(SUNode);
2760  if (!SUImpDefs && !SURegMask)
2761  continue;
2762  for (unsigned i = NumDefs, e = N->getNumValues(); i != e; ++i) {
2763  EVT VT = N->getValueType(i);
2764  if (VT == MVT::Glue || VT == MVT::Other)
2765  continue;
2766  if (!N->hasAnyUseOfValue(i))
2767  continue;
2768  unsigned Reg = ImpDefs[i - NumDefs];
2769  if (SURegMask && MachineOperand::clobbersPhysReg(SURegMask, Reg))
2770  return true;
2771  if (!SUImpDefs)
2772  continue;
2773  for (;*SUImpDefs; ++SUImpDefs) {
2774  unsigned SUReg = *SUImpDefs;
2775  if (TRI->regsOverlap(Reg, SUReg))
2776  return true;
2777  }
2778  }
2779  }
2780  return false;
2781 }
2782 
2783 /// PrescheduleNodesWithMultipleUses - Nodes with multiple uses
2784 /// are not handled well by the general register pressure reduction
2785 /// heuristics. When presented with code like this:
2786 ///
2787 /// N
2788 /// / |
2789 /// / |
2790 /// U store
2791 /// |
2792 /// ...
2793 ///
2794 /// the heuristics tend to push the store up, but since the
2795 /// operand of the store has another use (U), this would increase
2796 /// the length of that other use (the U->N edge).
2797 ///
2798 /// This function transforms code like the above to route U's
2799 /// dependence through the store when possible, like this:
2800 ///
2801 /// N
2802 /// ||
2803 /// ||
2804 /// store
2805 /// |
2806 /// U
2807 /// |
2808 /// ...
2809 ///
2810 /// This results in the store being scheduled immediately
2811 /// after N, which shortens the U->N live range, reducing
2812 /// register pressure.
2813 ///
2814 void RegReductionPQBase::PrescheduleNodesWithMultipleUses() {
2815  // Visit all the nodes in topological order, working top-down.
2816  for (unsigned i = 0, e = SUnits->size(); i != e; ++i) {
2817  SUnit *SU = &(*SUnits)[i];
2818  // For now, only look at nodes with no data successors, such as stores.
2819  // These are especially important, due to the heuristics in
2820  // getNodePriority for nodes with no data successors.
2821  if (SU->NumSuccs != 0)
2822  continue;
2823  // For now, only look at nodes with exactly one data predecessor.
2824  if (SU->NumPreds != 1)
2825  continue;
2826  // Avoid prescheduling copies to virtual registers, which don't behave
2827  // like other nodes from the perspective of scheduling heuristics.
2828  if (SDNode *N = SU->getNode())
2829  if (N->getOpcode() == ISD::CopyToReg &&
2831  (cast<RegisterSDNode>(N->getOperand(1))->getReg()))
2832  continue;
2833 
2834  // Locate the single data predecessor.
2835  SUnit *PredSU = 0;
2836  for (SUnit::const_pred_iterator II = SU->Preds.begin(),
2837  EE = SU->Preds.end(); II != EE; ++II)
2838  if (!II->isCtrl()) {
2839  PredSU = II->getSUnit();
2840  break;
2841  }
2842  assert(PredSU);
2843 
2844  // Don't rewrite edges that carry physregs, because that requires additional
2845  // support infrastructure.
2846  if (PredSU->hasPhysRegDefs)
2847  continue;
2848  // Short-circuit the case where SU is PredSU's only data successor.
2849  if (PredSU->NumSuccs == 1)
2850  continue;
2851  // Avoid prescheduling to copies from virtual registers, which don't behave
2852  // like other nodes from the perspective of scheduling heuristics.
2853  if (SDNode *N = SU->getNode())
2854  if (N->getOpcode() == ISD::CopyFromReg &&
2856  (cast<RegisterSDNode>(N->getOperand(1))->getReg()))
2857  continue;
2858 
2859  // Perform checks on the successors of PredSU.
2860  for (SUnit::const_succ_iterator II = PredSU->Succs.begin(),
2861  EE = PredSU->Succs.end(); II != EE; ++II) {
2862  SUnit *PredSuccSU = II->getSUnit();
2863  if (PredSuccSU == SU) continue;
2864  // If PredSU has another successor with no data successors, for
2865  // now don't attempt to choose either over the other.
2866  if (PredSuccSU->NumSuccs == 0)
2867  goto outer_loop_continue;
2868  // Don't break physical register dependencies.
2869  if (SU->hasPhysRegClobbers && PredSuccSU->hasPhysRegDefs)
2870  if (canClobberPhysRegDefs(PredSuccSU, SU, TII, TRI))
2871  goto outer_loop_continue;
2872  // Don't introduce graph cycles.
2873  if (scheduleDAG->IsReachable(SU, PredSuccSU))
2874  goto outer_loop_continue;
2875  }
2876 
2877  // Ok, the transformation is safe and the heuristics suggest it is
2878  // profitable. Update the graph.
2879  DEBUG(dbgs() << " Prescheduling SU #" << SU->NodeNum
2880  << " next to PredSU #" << PredSU->NodeNum
2881  << " to guide scheduling in the presence of multiple uses\n");
2882  for (unsigned i = 0; i != PredSU->Succs.size(); ++i) {
2883  SDep Edge = PredSU->Succs[i];
2884  assert(!Edge.isAssignedRegDep());
2885  SUnit *SuccSU = Edge.getSUnit();
2886  if (SuccSU != SU) {
2887  Edge.setSUnit(PredSU);
2888  scheduleDAG->RemovePred(SuccSU, Edge);
2889  scheduleDAG->AddPred(SU, Edge);
2890  Edge.setSUnit(SU);
2891  scheduleDAG->AddPred(SuccSU, Edge);
2892  --i;
2893  }
2894  }
2895  outer_loop_continue:;
2896  }
2897 }
2898 
2899 /// AddPseudoTwoAddrDeps - If two nodes share an operand and one of them uses
2900 /// it as a def&use operand. Add a pseudo control edge from it to the other
2901 /// node (if it won't create a cycle) so the two-address one will be scheduled
2902 /// first (lower in the schedule). If both nodes are two-address, favor the
2903 /// one that has a CopyToReg use (more likely to be a loop induction update).
2904 /// If both are two-address, but one is commutable while the other is not
2905 /// commutable, favor the one that's not commutable.
2906 void RegReductionPQBase::AddPseudoTwoAddrDeps() {
2907  for (unsigned i = 0, e = SUnits->size(); i != e; ++i) {
2908  SUnit *SU = &(*SUnits)[i];
2909  if (!SU->isTwoAddress)
2910  continue;
2911 
2912  SDNode *Node = SU->getNode();
2913  if (!Node || !Node->isMachineOpcode() || SU->getNode()->getGluedNode())
2914  continue;
2915 
2916  bool isLiveOut = hasOnlyLiveOutUses(SU);
2917  unsigned Opc = Node->getMachineOpcode();
2918  const MCInstrDesc &MCID = TII->get(Opc);
2919  unsigned NumRes = MCID.getNumDefs();
2920  unsigned NumOps = MCID.getNumOperands() - NumRes;
2921  for (unsigned j = 0; j != NumOps; ++j) {
2922  if (MCID.getOperandConstraint(j+NumRes, MCOI::TIED_TO) == -1)
2923  continue;
2924  SDNode *DU = SU->getNode()->getOperand(j).getNode();
2925  if (DU->getNodeId() == -1)
2926  continue;
2927  const SUnit *DUSU = &(*SUnits)[DU->getNodeId()];
2928  if (!DUSU) continue;
2929  for (SUnit::const_succ_iterator I = DUSU->Succs.begin(),
2930  E = DUSU->Succs.end(); I != E; ++I) {
2931  if (I->isCtrl()) continue;
2932  SUnit *SuccSU = I->getSUnit();
2933  if (SuccSU == SU)
2934  continue;
2935  // Be conservative. Ignore if nodes aren't at roughly the same
2936  // depth and height.
2937  if (SuccSU->getHeight() < SU->getHeight() &&
2938  (SU->getHeight() - SuccSU->getHeight()) > 1)
2939  continue;
2940  // Skip past COPY_TO_REGCLASS nodes, so that the pseudo edge
2941  // constrains whatever is using the copy, instead of the copy
2942  // itself. In the case that the copy is coalesced, this
2943  // preserves the intent of the pseudo two-address heurietics.
2944  while (SuccSU->Succs.size() == 1 &&
2945  SuccSU->getNode()->isMachineOpcode() &&
2946  SuccSU->getNode()->getMachineOpcode() ==
2948  SuccSU = SuccSU->Succs.front().getSUnit();
2949  // Don't constrain non-instruction nodes.
2950  if (!SuccSU->getNode() || !SuccSU->getNode()->isMachineOpcode())
2951  continue;
2952  // Don't constrain nodes with physical register defs if the
2953  // predecessor can clobber them.
2954  if (SuccSU->hasPhysRegDefs && SU->hasPhysRegClobbers) {
2955  if (canClobberPhysRegDefs(SuccSU, SU, TII, TRI))
2956  continue;
2957  }
2958  // Don't constrain EXTRACT_SUBREG, INSERT_SUBREG, and SUBREG_TO_REG;
2959  // these may be coalesced away. We want them close to their uses.
2960  unsigned SuccOpc = SuccSU->getNode()->getMachineOpcode();
2961  if (SuccOpc == TargetOpcode::EXTRACT_SUBREG ||
2962  SuccOpc == TargetOpcode::INSERT_SUBREG ||
2963  SuccOpc == TargetOpcode::SUBREG_TO_REG)
2964  continue;
2965  if (!canClobberReachingPhysRegUse(SuccSU, SU, scheduleDAG, TII, TRI) &&
2966  (!canClobber(SuccSU, DUSU) ||
2967  (isLiveOut && !hasOnlyLiveOutUses(SuccSU)) ||
2968  (!SU->isCommutable && SuccSU->isCommutable)) &&
2969  !scheduleDAG->IsReachable(SuccSU, SU)) {
2970  DEBUG(dbgs() << " Adding a pseudo-two-addr edge from SU #"
2971  << SU->NodeNum << " to SU #" << SuccSU->NodeNum << "\n");
2972  scheduleDAG->AddPred(SU, SDep(SuccSU, SDep::Artificial));
2973  }
2974  }
2975  }
2976  }
2977 }
2978 
2979 //===----------------------------------------------------------------------===//
2980 // Public Constructor Functions
2981 //===----------------------------------------------------------------------===//
2982 
2985  CodeGenOpt::Level OptLevel) {
2986  const TargetMachine &TM = IS->TM;
2987  const TargetInstrInfo *TII = TM.getInstrInfo();
2988  const TargetRegisterInfo *TRI = TM.getRegisterInfo();
2989 
2990  BURegReductionPriorityQueue *PQ =
2991  new BURegReductionPriorityQueue(*IS->MF, false, false, TII, TRI, 0);
2992  ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, false, PQ, OptLevel);
2993  PQ->setScheduleDAG(SD);
2994  return SD;
2995 }
2996 
2999  CodeGenOpt::Level OptLevel) {
3000  const TargetMachine &TM = IS->TM;
3001  const TargetInstrInfo *TII = TM.getInstrInfo();
3002  const TargetRegisterInfo *TRI = TM.getRegisterInfo();
3003 
3004  SrcRegReductionPriorityQueue *PQ =
3005  new SrcRegReductionPriorityQueue(*IS->MF, false, true, TII, TRI, 0);
3006  ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, false, PQ, OptLevel);
3007  PQ->setScheduleDAG(SD);
3008  return SD;
3009 }
3010 
3013  CodeGenOpt::Level OptLevel) {
3014  const TargetMachine &TM = IS->TM;
3015  const TargetInstrInfo *TII = TM.getInstrInfo();
3016  const TargetRegisterInfo *TRI = TM.getRegisterInfo();
3017  const TargetLowering *TLI = IS->getTargetLowering();
3018 
3019  HybridBURRPriorityQueue *PQ =
3020  new HybridBURRPriorityQueue(*IS->MF, true, false, TII, TRI, TLI);
3021 
3022  ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, true, PQ, OptLevel);
3023  PQ->setScheduleDAG(SD);
3024  return SD;
3025 }
3026 
3029  CodeGenOpt::Level OptLevel) {
3030  const TargetMachine &TM = IS->TM;
3031  const TargetInstrInfo *TII = TM.getInstrInfo();
3032  const TargetRegisterInfo *TRI = TM.getRegisterInfo();
3033  const TargetLowering *TLI = IS->getTargetLowering();
3034 
3035  ILPBURRPriorityQueue *PQ =
3036  new ILPBURRPriorityQueue(*IS->MF, true, false, TII, TRI, TLI);
3037  ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, true, PQ, OptLevel);
3038  PQ->setScheduleDAG(SD);
3039  return SD;
3040 }
bool isCtrl() const
isCtrl - Shorthand for getKind() != SDep::Data.
Definition: ScheduleDAG.h:175
const uint16_t * getImplicitDefs() const
Definition: MCInstrDesc.h:523
unsigned NumPreds
Definition: ScheduleDAG.h:273
bool isSucc(SUnit *N)
isSucc - Test if node N is a successor of this node.
Definition: ScheduleDAG.h:446
static int BUCompareLatency(SUnit *left, SUnit *right, bool checkPref, RegReductionPQBase *SPQ)
bool isCommutable() const
Definition: MCInstrDesc.h:411
int abs(int j);
static bool canEnableCoalescing(SUnit *SU)
unsigned getNumDefs() const
Return the number of MachineOperands that are register definitions. Register definitions always occur...
Definition: MCInstrDesc.h:198
static cl::opt< bool > DisableSchedRegPressure("disable-sched-reg-pressure", cl::Hidden, cl::init(false), cl::desc("Disable regpressure priority in sched=list-ilp"))
static cl::opt< unsigned > AvgIPC("sched-avg-ipc", cl::Hidden, cl::init(1), cl::desc("Average inst/cycle whan no target itinerary exists."))
bool isTwoAddress
Definition: ScheduleDAG.h:284
SDNode * getGluedNode() const
static void GetCostForDef(const ScheduleDAGSDNodes::RegDefIter &RegDefPos, const TargetLowering *TLI, const TargetInstrInfo *TII, const TargetRegisterInfo *TRI, unsigned &RegClass, unsigned &Cost, const MachineFunction &MF)
static bool isVirtualRegister(unsigned Reg)
unsigned getOpcode() const
static bool isClobberKind(unsigned Flag)
Definition: InlineAsm.h:272
void setSUnit(SUnit *SU)
Definition: ScheduleDAG.h:165
static void CheckForLiveRegDef(SUnit *SU, unsigned Reg, std::vector< SUnit * > &LiveRegDefs, SmallSet< unsigned, 4 > &RegAdded, SmallVectorImpl< unsigned > &LRegs, const TargetRegisterInfo *TRI)
unsigned getNumOperands() const
static bool canClobberReachingPhysRegUse(const SUnit *DepSU, const SUnit *SU, ScheduleDAGRRList *scheduleDAG, const TargetInstrInfo *TII, const TargetRegisterInfo *TRI)
static EVT getPhysicalRegisterVT(SDNode *N, unsigned Reg, const TargetInstrInfo *TII)
const SDValue & getOperand(unsigned Num) const
int getCallFrameSetupOpcode() const
void removePred(const SDep &D)
static cl::opt< bool > DisableSchedVRegCycle("disable-sched-vrcycle", cl::Hidden, cl::init(false), cl::desc("Disable virtual register cycle interference checks"))
void setNodeId(int Id)
setNodeId - Set unique node id.
static unsigned CalcNodeSethiUllmanNumber(const SUnit *SU, std::vector< unsigned > &SUNumbers)
bool isAssignedRegDep() const
Definition: ScheduleDAG.h:216
static bool hasVRegCycleUse(const SUnit *SU)
const TargetRegisterClass * CopyDstRC
Definition: ScheduleDAG.h:306
EntryToken - This is the marker used to indicate the start of a region.
Definition: ISDOpcodes.h:45
virtual const TargetRegisterClass * getRepRegClassFor(MVT VT) const
unsigned getResNo() const
get the index which selects a specific result in the SDNode
MachineFunction * MF
SmallVector< SDep, 4 > Preds
Definition: ScheduleDAG.h:263
LLVM_ATTRIBUTE_NORETURN void report_fatal_error(const char *reason, bool gen_crash_diag=true)
static SDNode * FindCallSeqStart(SDNode *N, unsigned &NestLevel, unsigned &MaxNest, const TargetInstrInfo *TII)
bool isScheduled
Definition: ScheduleDAG.h:291
unsigned getHeight() const
Definition: ScheduleDAG.h:411
static int checkSpecialNodes(const SUnit *left, const SUnit *right)
unsigned NumSuccs
Definition: ScheduleDAG.h:274
unsigned NumSuccsLeft
Definition: ScheduleDAG.h:276
const HexagonInstrInfo * TII
const char * getName() const
#define llvm_unreachable(msg)
EVT getValueType(unsigned ResNo) const
const TargetRegisterClass * getRegClass(unsigned i) const
Regular data dependence (aka true-dependence).
Definition: ScheduleDAG.h:49
const TargetLowering * getTargetLowering() const
const TargetRegisterClass * getRegClass(unsigned Reg) const
bool hasPhysRegDefs
Definition: ScheduleDAG.h:287
static bool isRegDefEarlyClobberKind(unsigned Flag)
Definition: InlineAsm.h:269
static const uint32_t * getNodeRegMask(const SDNode *N)
getNodeRegMask - Returns the register mask attached to an SDNode, if any.
SUnit * OrigNode
Definition: ScheduleDAG.h:256
unsigned getIROrder() const
unsigned getNumValues() const
bool LLVM_ATTRIBUTE_UNUSED_RESULT empty() const
Definition: SmallVector.h:56
ScheduleDAGSDNodes * createHybridListDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level)
bool isPending
Definition: ScheduleDAG.h:289
bool insert(const T &V)
Definition: SmallSet.h:59
static bool isRegDefKind(unsigned Flag)
Definition: InlineAsm.h:266
ScheduleDAGSDNodes * createSourceListDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level OptLevel)
Sequence
A sequence of states that a pointer may go through in which an objc_retain and objc_release are actua...
static RegisterScheduler burrListDAGScheduler("list-burr","Bottom-up register reduction list scheduling", createBURRListDAGScheduler)
SDNode * getNode() const
get the SDNode which holds the desired result
void setHeightToAtLeast(unsigned NewHeight)
bool isAvailable()
Definition: Compression.cpp:49
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:314
static RegisterScheduler hybridListDAGScheduler("list-hybrid","Bottom-up register pressure aware list scheduling ""which tries to balance latency and register pressure", createHybridListDAGScheduler)
bool regsOverlap(unsigned regA, unsigned regB) const
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
static cl::opt< bool > DisableSchedStalls("disable-sched-stalls", cl::Hidden, cl::init(true), cl::desc("Disable no-stall priority in sched=list-ilp"))
SDNode * getNode() const
Definition: ScheduleDAG.h:368
unsigned short Latency
Definition: ScheduleDAG.h:280
ScheduleDAGSDNodes * createBURRListDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level OptLevel)
static bool isOperandOf(const SUnit *SU, SDNode *N)
static RegisterScheduler ILPListDAGScheduler("list-ilp","Bottom-up register pressure aware list scheduling ""which tries to balance ILP and register pressure", createILPListDAGScheduler)
static unsigned calcMaxScratches(const SUnit *SU)
static bool IsChainDependent(SDNode *Outer, SDNode *Inner, unsigned NestLevel, const TargetInstrInfo *TII)
static bool BUHasStall(SUnit *SU, int Height, RegReductionPQBase *SPQ)
static unsigned getNumOperandRegisters(unsigned Flag)
Definition: InlineAsm.h:278
static cl::opt< bool > DisableSchedCriticalPath("disable-sched-critical-path", cl::Hidden, cl::init(false), cl::desc("Disable critical path priority in sched=list-ilp"))
bool isVRegCycle
Definition: ScheduleDAG.h:281
ItTy next(ItTy it, Dist n)
Definition: STLExtras.h:154
unsigned getLatency() const
Definition: ScheduleDAG.h:150
for(unsigned i=0, e=MI->getNumOperands();i!=e;++i)
static bool canClobberPhysRegDefs(const SUnit *SuccSU, const SUnit *SU, const TargetInstrInfo *TII, const TargetRegisterInfo *TRI)
static void resetVRegCycle(SUnit *SU)
static cl::opt< bool > DisableSchedLiveUses("disable-sched-live-uses", cl::Hidden, cl::init(true), cl::desc("Disable live use priority in sched=list-ilp"))
static bool hasOnlyLiveInOpers(const SUnit *SU)
static unsigned closestSucc(const SUnit *SU)
Sched::Preference SchedulingPref
Definition: ScheduleDAG.h:295
static void CheckForLiveRegDefMasked(SUnit *SU, const uint32_t *RegMask, std::vector< SUnit * > &LiveRegDefs, SmallSet< unsigned, 4 > &RegAdded, SmallVectorImpl< unsigned > &LRegs)
int getOperandConstraint(unsigned OpNum, MCOI::OperandConstraint Constraint) const
Returns the value of the specific constraint if it is set. Returns -1 if it is not set...
Definition: MCInstrDesc.h:156
const MCInstrDesc & get(unsigned Opcode) const
Definition: MCInstrInfo.h:48
static bool BURRSort(SUnit *left, SUnit *right, RegReductionPQBase *SPQ)
virtual const TargetInstrInfo * getInstrInfo() const
bool isCommutable
Definition: ScheduleDAG.h:285
ScheduleDAGSDNodes * createILPListDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level)
static cl::opt< bool > DisableSchedHeight("disable-sched-height", cl::Hidden, cl::init(false), cl::desc("Disable scheduled-height priority in sched=list-ilp"))
static void initVRegCycle(SUnit *SU)
const TargetRegisterClass * CopySrcRC
Definition: ScheduleDAG.h:307
raw_ostream & dbgs()
dbgs - Return a circular-buffered debug stream.
Definition: Debug.cpp:101
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:591
static bool clobbersPhysReg(const uint32_t *RegMask, unsigned PhysReg)
SUnit * getSUnit() const
Definition: ScheduleDAG.h:160
virtual ScheduleHazardRecognizer * CreateTargetHazardRecognizer(const TargetMachine *TM, const ScheduleDAG *DAG) const
unsigned getDepth() const
Definition: ScheduleDAG.h:403
void setLatency(unsigned Lat)
setLatency - Set the latency for this edge.
Definition: ScheduleDAG.h:155
bool isAvailable
Definition: ScheduleDAG.h:290
unsigned NodeQueueId
Definition: ScheduleDAG.h:272
static bool isPhysicalRegister(unsigned Reg)
bool hasAnyUseOfValue(unsigned Value) const
MachineRegisterInfo & getRegInfo()
static bool hasOnlyLiveOutUses(const SUnit *SU)
IMPLICIT_DEF - This is the MachineInstr-level equivalent of undef.
Definition: TargetOpcodes.h:52
static cl::opt< bool > DisableSchedPhysRegJoin("disable-sched-physreg-join", cl::Hidden, cl::init(false), cl::desc("Disable physreg def-use affinity"))
static cl::opt< int > MaxReorderWindow("max-sched-reorder", cl::Hidden, cl::init(6), cl::desc("Number of instructions to allow ahead of the critical path ""in sched=list-ilp"))
#define I(x, y, z)
Definition: MD5.cpp:54
#define N
unsigned short NumRegDefsLeft
Definition: ScheduleDAG.h:279
static unsigned getReg(const void *D, unsigned RC, unsigned RegNo)
const TargetMachine & getTarget() const
virtual const TargetRegisterInfo * getRegisterInfo() const
static cl::opt< bool > Disable2AddrHack("disable-2addr-hack", cl::Hidden, cl::init(true), cl::desc("Disable scheduler's two-address hack"))
static cl::opt< bool > DisableSchedCycles("disable-sched-cycles", cl::Hidden, cl::init(false), cl::desc("Disable cycle-level precision during preRA scheduling"))
int getNodeId() const
static RegisterScheduler sourceListDAGScheduler("source","Similar to list-burr but schedules in source ""order when possible", createSourceListDAGScheduler)
EVT getValueType() const
virtual unsigned getRegPressureLimit(const TargetRegisterClass *RC, MachineFunction &MF) const
unsigned NodeNum
Definition: ScheduleDAG.h:271
STATISTIC(NumBacktracks,"Number of times scheduler backtracked")
void setHeightDirty()
const uint16_t * ImplicitDefs
Definition: MCInstrDesc.h:147
bool addPred(const SDep &D, bool Required=true)
Definition: ScheduleDAG.cpp:65
SmallVector< SDep, 4 > Succs
Definition: ScheduleDAG.h:264
unsigned getNumOperands() const
Return the number of declared MachineOperands for this MachineInstruction. Note that variadic (isVari...
Definition: MCInstrDesc.h:190
Arbitrary strong DAG edge (no real dependence).
Definition: ScheduleDAG.h:68
ItTy prior(ItTy it, Dist n)
Definition: STLExtras.h:167
#define DEBUG(X)
Definition: Debug.h:97
virtual uint8_t getRepRegClassCostFor(MVT VT) const
const TargetRegisterClass * getRegClass(const MCInstrDesc &TID, unsigned OpNum, const TargetRegisterInfo *TRI, const MachineFunction &MF) const
bool isScheduleLow
Definition: ScheduleDAG.h:293
int getCallFrameDestroyOpcode() const
bool hasPhysRegClobbers
Definition: ScheduleDAG.h:288
MVT getSimpleValueType(unsigned ResNo) const
const TargetRegisterClass *const * regclass_iterator
void dump(const ScheduleDAG *G) const
SUnit - Scheduling unit. This is a node in the scheduling DAG.
Definition: ScheduleDAG.h:249
bool isMachineOpcode() const
unsigned getMachineOpcode() const