LLVM API Documentation

 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
ScheduleDAGInstrs.cpp
Go to the documentation of this file.
1 //===---- ScheduleDAGInstrs.cpp - MachineInstr Rescheduling ---------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This implements the ScheduleDAGInstrs class, which implements re-scheduling
11 // of MachineInstrs.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #define DEBUG_TYPE "misched"
17 #include "llvm/ADT/MapVector.h"
18 #include "llvm/ADT/SmallPtrSet.h"
19 #include "llvm/ADT/SmallSet.h"
29 #include "llvm/IR/Operator.h"
32 #include "llvm/Support/Debug.h"
33 #include "llvm/Support/Format.h"
39 #include <queue>
40 
41 using namespace llvm;
42 
43 static cl::opt<bool> EnableAASchedMI("enable-aa-sched-mi", cl::Hidden,
44  cl::ZeroOrMore, cl::init(false),
45  cl::desc("Enable use of AA during MI GAD construction"));
46 
48  const MachineLoopInfo &mli,
49  const MachineDominatorTree &mdt,
50  bool IsPostRAFlag,
51  LiveIntervals *lis)
52  : ScheduleDAG(mf), MLI(mli), MDT(mdt), MFI(mf.getFrameInfo()), LIS(lis),
53  IsPostRA(IsPostRAFlag), CanHandleTerminators(false), FirstDbgValue(0) {
54  assert((IsPostRA || LIS) && "PreRA scheduling requires LiveIntervals");
55  DbgValues.clear();
56  assert(!(IsPostRA && MRI.getNumVirtRegs()) &&
57  "Virtual registers must be removed prior to PostRA scheduling");
58 
61 }
62 
63 /// getUnderlyingObjectFromInt - This is the function that does the work of
64 /// looking through basic ptrtoint+arithmetic+inttoptr sequences.
65 static const Value *getUnderlyingObjectFromInt(const Value *V) {
66  do {
67  if (const Operator *U = dyn_cast<Operator>(V)) {
68  // If we find a ptrtoint, we can transfer control back to the
69  // regular getUnderlyingObjectFromInt.
70  if (U->getOpcode() == Instruction::PtrToInt)
71  return U->getOperand(0);
72  // If we find an add of a constant, a multiplied value, or a phi, it's
73  // likely that the other operand will lead us to the base
74  // object. We don't have to worry about the case where the
75  // object address is somehow being computed by the multiply,
76  // because our callers only care when the result is an
77  // identifiable object.
78  if (U->getOpcode() != Instruction::Add ||
79  (!isa<ConstantInt>(U->getOperand(1)) &&
80  Operator::getOpcode(U->getOperand(1)) != Instruction::Mul &&
81  !isa<PHINode>(U->getOperand(1))))
82  return V;
83  V = U->getOperand(0);
84  } else {
85  return V;
86  }
87  assert(V->getType()->isIntegerTy() && "Unexpected operand type!");
88  } while (1);
89 }
90 
91 /// getUnderlyingObjects - This is a wrapper around GetUnderlyingObjects
92 /// and adds support for basic ptrtoint+arithmetic+inttoptr sequences.
93 static void getUnderlyingObjects(const Value *V,
96  SmallVector<const Value *, 4> Working(1, V);
97  do {
98  V = Working.pop_back_val();
99 
101  GetUnderlyingObjects(const_cast<Value *>(V), Objs);
102 
103  for (SmallVectorImpl<Value *>::iterator I = Objs.begin(), IE = Objs.end();
104  I != IE; ++I) {
105  V = *I;
106  if (!Visited.insert(V))
107  continue;
108  if (Operator::getOpcode(V) == Instruction::IntToPtr) {
109  const Value *O =
110  getUnderlyingObjectFromInt(cast<User>(V)->getOperand(0));
111  if (O->getType()->isPointerTy()) {
112  Working.push_back(O);
113  continue;
114  }
115  }
116  Objects.push_back(const_cast<Value *>(V));
117  }
118  } while (!Working.empty());
119 }
120 
123 
124 /// getUnderlyingObjectsForInstr - If this machine instr has memory reference
125 /// information and it can be tracked to a normal reference to a known
126 /// object, return the Value for that object.
128  const MachineFrameInfo *MFI,
130  if (!MI->hasOneMemOperand() ||
131  !(*MI->memoperands_begin())->getValue() ||
132  (*MI->memoperands_begin())->isVolatile())
133  return;
134 
135  const Value *V = (*MI->memoperands_begin())->getValue();
136  if (!V)
137  return;
138 
140  getUnderlyingObjects(V, Objs);
141 
142  for (SmallVectorImpl<Value *>::iterator I = Objs.begin(), IE = Objs.end();
143  I != IE; ++I) {
144  bool MayAlias = true;
145  V = *I;
146 
147  if (const PseudoSourceValue *PSV = dyn_cast<PseudoSourceValue>(V)) {
148  // For now, ignore PseudoSourceValues which may alias LLVM IR values
149  // because the code that uses this function has no way to cope with
150  // such aliases.
151 
152  if (PSV->isAliased(MFI)) {
153  Objects.clear();
154  return;
155  }
156 
157  MayAlias = PSV->mayAlias(MFI);
158  } else if (!isIdentifiedObject(V)) {
159  Objects.clear();
160  return;
161  }
162 
163  Objects.push_back(UnderlyingObjectsVector::value_type(V, MayAlias));
164  }
165 }
166 
168  BB = bb;
169 }
170 
172  // Subclasses should no longer refer to the old block.
173  BB = 0;
174 }
175 
176 /// Initialize the DAG and common scheduler state for the current scheduling
177 /// region. This does not actually create the DAG, only clears it. The
178 /// scheduling driver may call BuildSchedGraph multiple times per scheduling
179 /// region.
183  unsigned regioninstrs) {
184  assert(bb == BB && "startBlock should set BB");
185  RegionBegin = begin;
186  RegionEnd = end;
187  NumRegionInstrs = regioninstrs;
188 }
189 
190 /// Close the current scheduling region. Don't clear any state in case the
191 /// driver wants to refer to the previous scheduling region.
193  // Nothing to do.
194 }
195 
196 /// addSchedBarrierDeps - Add dependencies from instructions in the current
197 /// list of instructions being scheduled to scheduling barrier by adding
198 /// the exit SU to the register defs and use list. This is because we want to
199 /// make sure instructions which define registers that are either used by
200 /// the terminator or are live-out are properly scheduled. This is
201 /// especially important when the definition latency of the return value(s)
202 /// are too high to be hidden by the branch or when the liveout registers
203 /// used by instructions in the fallthrough block.
205  MachineInstr *ExitMI = RegionEnd != BB->end() ? &*RegionEnd : 0;
206  ExitSU.setInstr(ExitMI);
207  bool AllDepKnown = ExitMI &&
208  (ExitMI->isCall() || ExitMI->isBarrier());
209  if (ExitMI && AllDepKnown) {
210  // If it's a call or a barrier, add dependencies on the defs and uses of
211  // instruction.
212  for (unsigned i = 0, e = ExitMI->getNumOperands(); i != e; ++i) {
213  const MachineOperand &MO = ExitMI->getOperand(i);
214  if (!MO.isReg() || MO.isDef()) continue;
215  unsigned Reg = MO.getReg();
216  if (Reg == 0) continue;
217 
218  if (TRI->isPhysicalRegister(Reg))
219  Uses.insert(PhysRegSUOper(&ExitSU, -1, Reg));
220  else {
221  assert(!IsPostRA && "Virtual register encountered after regalloc.");
222  if (MO.readsReg()) // ignore undef operands
223  addVRegUseDeps(&ExitSU, i);
224  }
225  }
226  } else {
227  // For others, e.g. fallthrough, conditional branch, assume the exit
228  // uses all the registers that are livein to the successor blocks.
229  assert(Uses.empty() && "Uses in set before adding deps?");
231  SE = BB->succ_end(); SI != SE; ++SI)
232  for (MachineBasicBlock::livein_iterator I = (*SI)->livein_begin(),
233  E = (*SI)->livein_end(); I != E; ++I) {
234  unsigned Reg = *I;
235  if (!Uses.contains(Reg))
236  Uses.insert(PhysRegSUOper(&ExitSU, -1, Reg));
237  }
238  }
239 }
240 
241 /// MO is an operand of SU's instruction that defines a physical register. Add
242 /// data dependencies from SU to any uses of the physical register.
243 void ScheduleDAGInstrs::addPhysRegDataDeps(SUnit *SU, unsigned OperIdx) {
244  const MachineOperand &MO = SU->getInstr()->getOperand(OperIdx);
245  assert(MO.isDef() && "expect physreg def");
246 
247  // Ask the target if address-backscheduling is desirable, and if so how much.
249 
250  for (MCRegAliasIterator Alias(MO.getReg(), TRI, true);
251  Alias.isValid(); ++Alias) {
252  if (!Uses.contains(*Alias))
253  continue;
254  for (Reg2SUnitsMap::iterator I = Uses.find(*Alias); I != Uses.end(); ++I) {
255  SUnit *UseSU = I->SU;
256  if (UseSU == SU)
257  continue;
258 
259  // Adjust the dependence latency using operand def/use information,
260  // then allow the target to perform its own adjustments.
261  int UseOp = I->OpIdx;
262  MachineInstr *RegUse = 0;
263  SDep Dep;
264  if (UseOp < 0)
265  Dep = SDep(SU, SDep::Artificial);
266  else {
267  // Set the hasPhysRegDefs only for physreg defs that have a use within
268  // the scheduling region.
269  SU->hasPhysRegDefs = true;
270  Dep = SDep(SU, SDep::Data, *Alias);
271  RegUse = UseSU->getInstr();
272  }
273  Dep.setLatency(
274  SchedModel.computeOperandLatency(SU->getInstr(), OperIdx, RegUse,
275  UseOp));
276 
277  ST.adjustSchedDependency(SU, UseSU, Dep);
278  UseSU->addPred(Dep);
279  }
280  }
281 }
282 
283 /// addPhysRegDeps - Add register dependencies (data, anti, and output) from
284 /// this SUnit to following instructions in the same scheduling region that
285 /// depend the physical register referenced at OperIdx.
286 void ScheduleDAGInstrs::addPhysRegDeps(SUnit *SU, unsigned OperIdx) {
287  const MachineInstr *MI = SU->getInstr();
288  const MachineOperand &MO = MI->getOperand(OperIdx);
289 
290  // Optionally add output and anti dependencies. For anti
291  // dependencies we use a latency of 0 because for a multi-issue
292  // target we want to allow the defining instruction to issue
293  // in the same cycle as the using instruction.
294  // TODO: Using a latency of 1 here for output dependencies assumes
295  // there's no cost for reusing registers.
297  for (MCRegAliasIterator Alias(MO.getReg(), TRI, true);
298  Alias.isValid(); ++Alias) {
299  if (!Defs.contains(*Alias))
300  continue;
301  for (Reg2SUnitsMap::iterator I = Defs.find(*Alias); I != Defs.end(); ++I) {
302  SUnit *DefSU = I->SU;
303  if (DefSU == &ExitSU)
304  continue;
305  if (DefSU != SU &&
306  (Kind != SDep::Output || !MO.isDead() ||
307  !DefSU->getInstr()->registerDefIsDead(*Alias))) {
308  if (Kind == SDep::Anti)
309  DefSU->addPred(SDep(SU, Kind, /*Reg=*/*Alias));
310  else {
311  SDep Dep(SU, Kind, /*Reg=*/*Alias);
312  Dep.setLatency(
313  SchedModel.computeOutputLatency(MI, OperIdx, DefSU->getInstr()));
314  DefSU->addPred(Dep);
315  }
316  }
317  }
318  }
319 
320  if (!MO.isDef()) {
321  SU->hasPhysRegUses = true;
322  // Either insert a new Reg2SUnits entry with an empty SUnits list, or
323  // retrieve the existing SUnits list for this register's uses.
324  // Push this SUnit on the use list.
325  Uses.insert(PhysRegSUOper(SU, OperIdx, MO.getReg()));
326  }
327  else {
328  addPhysRegDataDeps(SU, OperIdx);
329  unsigned Reg = MO.getReg();
330 
331  // clear this register's use list
332  if (Uses.contains(Reg))
333  Uses.eraseAll(Reg);
334 
335  if (!MO.isDead()) {
336  Defs.eraseAll(Reg);
337  } else if (SU->isCall) {
338  // Calls will not be reordered because of chain dependencies (see
339  // below). Since call operands are dead, calls may continue to be added
340  // to the DefList making dependence checking quadratic in the size of
341  // the block. Instead, we leave only one call at the back of the
342  // DefList.
344  Reg2SUnitsMap::iterator B = P.first;
345  Reg2SUnitsMap::iterator I = P.second;
346  for (bool isBegin = I == B; !isBegin; /* empty */) {
347  isBegin = (--I) == B;
348  if (!I->SU->isCall)
349  break;
350  I = Defs.erase(I);
351  }
352  }
353 
354  // Defs are pushed in the order they are visited and never reordered.
355  Defs.insert(PhysRegSUOper(SU, OperIdx, Reg));
356  }
357 }
358 
359 /// addVRegDefDeps - Add register output and data dependencies from this SUnit
360 /// to instructions that occur later in the same scheduling region if they read
361 /// from or write to the virtual register defined at OperIdx.
362 ///
363 /// TODO: Hoist loop induction variable increments. This has to be
364 /// reevaluated. Generally, IV scheduling should be done before coalescing.
365 void ScheduleDAGInstrs::addVRegDefDeps(SUnit *SU, unsigned OperIdx) {
366  const MachineInstr *MI = SU->getInstr();
367  unsigned Reg = MI->getOperand(OperIdx).getReg();
368 
369  // Singly defined vregs do not have output/anti dependencies.
370  // The current operand is a def, so we have at least one.
371  // Check here if there are any others...
372  if (MRI.hasOneDef(Reg))
373  return;
374 
375  // Add output dependence to the next nearest def of this vreg.
376  //
377  // Unless this definition is dead, the output dependence should be
378  // transitively redundant with antidependencies from this definition's
379  // uses. We're conservative for now until we have a way to guarantee the uses
380  // are not eliminated sometime during scheduling. The output dependence edge
381  // is also useful if output latency exceeds def-use latency.
383  if (DefI == VRegDefs.end())
384  VRegDefs.insert(VReg2SUnit(Reg, SU));
385  else {
386  SUnit *DefSU = DefI->SU;
387  if (DefSU != SU && DefSU != &ExitSU) {
388  SDep Dep(SU, SDep::Output, Reg);
389  Dep.setLatency(
390  SchedModel.computeOutputLatency(MI, OperIdx, DefSU->getInstr()));
391  DefSU->addPred(Dep);
392  }
393  DefI->SU = SU;
394  }
395 }
396 
397 /// addVRegUseDeps - Add a register data dependency if the instruction that
398 /// defines the virtual register used at OperIdx is mapped to an SUnit. Add a
399 /// register antidependency from this SUnit to instructions that occur later in
400 /// the same scheduling region if they write the virtual register.
401 ///
402 /// TODO: Handle ExitSU "uses" properly.
403 void ScheduleDAGInstrs::addVRegUseDeps(SUnit *SU, unsigned OperIdx) {
404  MachineInstr *MI = SU->getInstr();
405  unsigned Reg = MI->getOperand(OperIdx).getReg();
406 
407  // Record this local VReg use.
409  for (; UI != VRegUses.end(); ++UI) {
410  if (UI->SU == SU)
411  break;
412  }
413  if (UI == VRegUses.end())
414  VRegUses.insert(VReg2SUnit(Reg, SU));
415 
416  // Lookup this operand's reaching definition.
417  assert(LIS && "vreg dependencies requires LiveIntervals");
418  LiveQueryResult LRQ
420  VNInfo *VNI = LRQ.valueIn();
421 
422  // VNI will be valid because MachineOperand::readsReg() is checked by caller.
423  assert(VNI && "No value to read by operand");
425  // Phis and other noninstructions (after coalescing) have a NULL Def.
426  if (Def) {
427  SUnit *DefSU = getSUnit(Def);
428  if (DefSU) {
429  // The reaching Def lives within this scheduling region.
430  // Create a data dependence.
431  SDep dep(DefSU, SDep::Data, Reg);
432  // Adjust the dependence latency using operand def/use information, then
433  // allow the target to perform its own adjustments.
434  int DefOp = Def->findRegisterDefOperandIdx(Reg);
435  dep.setLatency(SchedModel.computeOperandLatency(Def, DefOp, MI, OperIdx));
436 
438  ST.adjustSchedDependency(DefSU, SU, const_cast<SDep &>(dep));
439  SU->addPred(dep);
440  }
441  }
442 
443  // Add antidependence to the following def of the vreg it uses.
445  if (DefI != VRegDefs.end() && DefI->SU != SU)
446  DefI->SU->addPred(SDep(SU, SDep::Anti, Reg));
447 }
448 
449 /// Return true if MI is an instruction we are unable to reason about
450 /// (like a call or something with unmodeled side effects).
452  if (MI->isCall() || MI->hasUnmodeledSideEffects() ||
453  (MI->hasOrderedMemoryRef() &&
454  (!MI->mayLoad() || !MI->isInvariantLoad(AA))))
455  return true;
456  return false;
457 }
458 
459 // This MI might have either incomplete info, or known to be unsafe
460 // to deal with (i.e. volatile object).
462  const MachineFrameInfo *MFI) {
463  if (!MI || MI->memoperands_empty())
464  return true;
465  // We purposefully do no check for hasOneMemOperand() here
466  // in hope to trigger an assert downstream in order to
467  // finish implementation.
468  if ((*MI->memoperands_begin())->isVolatile() ||
470  return true;
471  const Value *V = (*MI->memoperands_begin())->getValue();
472  if (!V)
473  return true;
474 
476  getUnderlyingObjects(V, Objs);
478  IE = Objs.end(); I != IE; ++I) {
479  V = *I;
480 
481  if (const PseudoSourceValue *PSV = dyn_cast<PseudoSourceValue>(V)) {
482  // Similarly to getUnderlyingObjectForInstr:
483  // For now, ignore PseudoSourceValues which may alias LLVM IR values
484  // because the code that uses this function has no way to cope with
485  // such aliases.
486  if (PSV->isAliased(MFI))
487  return true;
488  }
489 
490  // Does this pointer refer to a distinct and identifiable object?
491  if (!isIdentifiedObject(V))
492  return true;
493  }
494 
495  return false;
496 }
497 
498 /// This returns true if the two MIs need a chain edge betwee them.
499 /// If these are not even memory operations, we still may need
500 /// chain deps between them. The question really is - could
501 /// these two MIs be reordered during scheduling from memory dependency
502 /// point of view.
503 static bool MIsNeedChainEdge(AliasAnalysis *AA, const MachineFrameInfo *MFI,
504  MachineInstr *MIa,
505  MachineInstr *MIb) {
506  // Cover a trivial case - no edge is need to itself.
507  if (MIa == MIb)
508  return false;
509 
510  if (isUnsafeMemoryObject(MIa, MFI) || isUnsafeMemoryObject(MIb, MFI))
511  return true;
512 
513  // If we are dealing with two "normal" loads, we do not need an edge
514  // between them - they could be reordered.
515  if (!MIa->mayStore() && !MIb->mayStore())
516  return false;
517 
518  // To this point analysis is generic. From here on we do need AA.
519  if (!AA)
520  return true;
521 
522  MachineMemOperand *MMOa = *MIa->memoperands_begin();
523  MachineMemOperand *MMOb = *MIb->memoperands_begin();
524 
525  // FIXME: Need to handle multiple memory operands to support all targets.
526  if (!MIa->hasOneMemOperand() || !MIb->hasOneMemOperand())
527  llvm_unreachable("Multiple memory operands.");
528 
529  // The following interface to AA is fashioned after DAGCombiner::isAlias
530  // and operates with MachineMemOperand offset with some important
531  // assumptions:
532  // - LLVM fundamentally assumes flat address spaces.
533  // - MachineOperand offset can *only* result from legalization and
534  // cannot affect queries other than the trivial case of overlap
535  // checking.
536  // - These offsets never wrap and never step outside
537  // of allocated objects.
538  // - There should never be any negative offsets here.
539  //
540  // FIXME: Modify API to hide this math from "user"
541  // FIXME: Even before we go to AA we can reason locally about some
542  // memory objects. It can save compile time, and possibly catch some
543  // corner cases not currently covered.
544 
545  assert ((MMOa->getOffset() >= 0) && "Negative MachineMemOperand offset");
546  assert ((MMOb->getOffset() >= 0) && "Negative MachineMemOperand offset");
547 
548  int64_t MinOffset = std::min(MMOa->getOffset(), MMOb->getOffset());
549  int64_t Overlapa = MMOa->getSize() + MMOa->getOffset() - MinOffset;
550  int64_t Overlapb = MMOb->getSize() + MMOb->getOffset() - MinOffset;
551 
552  AliasAnalysis::AliasResult AAResult = AA->alias(
553  AliasAnalysis::Location(MMOa->getValue(), Overlapa,
554  MMOa->getTBAAInfo()),
555  AliasAnalysis::Location(MMOb->getValue(), Overlapb,
556  MMOb->getTBAAInfo()));
557 
558  return (AAResult != AliasAnalysis::NoAlias);
559 }
560 
561 /// This recursive function iterates over chain deps of SUb looking for
562 /// "latest" node that needs a chain edge to SUa.
563 static unsigned
565  SUnit *SUa, SUnit *SUb, SUnit *ExitSU, unsigned *Depth,
567  if (!SUa || !SUb || SUb == ExitSU)
568  return *Depth;
569 
570  // Remember visited nodes.
571  if (!Visited.insert(SUb))
572  return *Depth;
573  // If there is _some_ dependency already in place, do not
574  // descend any further.
575  // TODO: Need to make sure that if that dependency got eliminated or ignored
576  // for any reason in the future, we would not violate DAG topology.
577  // Currently it does not happen, but makes an implicit assumption about
578  // future implementation.
579  //
580  // Independently, if we encounter node that is some sort of global
581  // object (like a call) we already have full set of dependencies to it
582  // and we can stop descending.
583  if (SUa->isSucc(SUb) ||
584  isGlobalMemoryObject(AA, SUb->getInstr()))
585  return *Depth;
586 
587  // If we do need an edge, or we have exceeded depth budget,
588  // add that edge to the predecessors chain of SUb,
589  // and stop descending.
590  if (*Depth > 200 ||
591  MIsNeedChainEdge(AA, MFI, SUa->getInstr(), SUb->getInstr())) {
592  SUb->addPred(SDep(SUa, SDep::MayAliasMem));
593  return *Depth;
594  }
595  // Track current depth.
596  (*Depth)++;
597  // Iterate over chain dependencies only.
598  for (SUnit::const_succ_iterator I = SUb->Succs.begin(), E = SUb->Succs.end();
599  I != E; ++I)
600  if (I->isCtrl())
601  iterateChainSucc (AA, MFI, SUa, I->getSUnit(), ExitSU, Depth, Visited);
602  return *Depth;
603 }
604 
605 /// This function assumes that "downward" from SU there exist
606 /// tail/leaf of already constructed DAG. It iterates downward and
607 /// checks whether SU can be aliasing any node dominated
608 /// by it.
609 static void adjustChainDeps(AliasAnalysis *AA, const MachineFrameInfo *MFI,
610  SUnit *SU, SUnit *ExitSU, std::set<SUnit *> &CheckList,
611  unsigned LatencyToLoad) {
612  if (!SU)
613  return;
614 
616  unsigned Depth = 0;
617 
618  for (std::set<SUnit *>::iterator I = CheckList.begin(), IE = CheckList.end();
619  I != IE; ++I) {
620  if (SU == *I)
621  continue;
622  if (MIsNeedChainEdge(AA, MFI, SU->getInstr(), (*I)->getInstr())) {
623  SDep Dep(SU, SDep::MayAliasMem);
624  Dep.setLatency(((*I)->getInstr()->mayLoad()) ? LatencyToLoad : 0);
625  (*I)->addPred(Dep);
626  }
627  // Now go through all the chain successors and iterate from them.
628  // Keep track of visited nodes.
629  for (SUnit::const_succ_iterator J = (*I)->Succs.begin(),
630  JE = (*I)->Succs.end(); J != JE; ++J)
631  if (J->isCtrl())
632  iterateChainSucc (AA, MFI, SU, J->getSUnit(),
633  ExitSU, &Depth, Visited);
634  }
635 }
636 
637 /// Check whether two objects need a chain edge, if so, add it
638 /// otherwise remember the rejected SU.
639 static inline
641  SUnit *SUa, SUnit *SUb,
642  std::set<SUnit *> &RejectList,
643  unsigned TrueMemOrderLatency = 0,
644  bool isNormalMemory = false) {
645  // If this is a false dependency,
646  // do not add the edge, but rememeber the rejected node.
647  if (!AA || MIsNeedChainEdge(AA, MFI, SUa->getInstr(), SUb->getInstr())) {
648  SDep Dep(SUa, isNormalMemory ? SDep::MayAliasMem : SDep::Barrier);
649  Dep.setLatency(TrueMemOrderLatency);
650  SUb->addPred(Dep);
651  }
652  else {
653  // Duplicate entries should be ignored.
654  RejectList.insert(SUb);
655  DEBUG(dbgs() << "\tReject chain dep between SU("
656  << SUa->NodeNum << ") and SU("
657  << SUb->NodeNum << ")\n");
658  }
659 }
660 
661 /// Create an SUnit for each real instruction, numbered in top-down toplological
662 /// order. The instruction order A < B, implies that no edge exists from B to A.
663 ///
664 /// Map each real instruction to its SUnit.
665 ///
666 /// After initSUnits, the SUnits vector cannot be resized and the scheduler may
667 /// hang onto SUnit pointers. We may relax this in the future by using SUnit IDs
668 /// instead of pointers.
669 ///
670 /// MachineScheduler relies on initSUnits numbering the nodes by their order in
671 /// the original instruction list.
673  // We'll be allocating one SUnit for each real instruction in the region,
674  // which is contained within a basic block.
675  SUnits.reserve(NumRegionInstrs);
676 
678  MachineInstr *MI = I;
679  if (MI->isDebugValue())
680  continue;
681 
682  SUnit *SU = newSUnit(MI);
683  MISUnitMap[MI] = SU;
684 
685  SU->isCall = MI->isCall();
686  SU->isCommutable = MI->isCommutable();
687 
688  // Assign the Latency field of SU using target-provided information.
690  }
691 }
692 
693 /// If RegPressure is non null, compute register pressure as a side effect. The
694 /// DAG builder is an efficient place to do it because it already visits
695 /// operands.
697  RegPressureTracker *RPTracker,
698  PressureDiffs *PDiffs) {
700  bool UseAA = EnableAASchedMI.getNumOccurrences() > 0 ? EnableAASchedMI
701  : ST.useAA();
702  AliasAnalysis *AAForDep = UseAA ? AA : 0;
703 
704  MISUnitMap.clear();
706 
707  // Create an SUnit for each real instruction.
708  initSUnits();
709 
710  if (PDiffs)
711  PDiffs->init(SUnits.size());
712 
713  // We build scheduling units by walking a block's instruction list from bottom
714  // to top.
715 
716  // Remember where a generic side-effecting instruction is as we procede.
717  SUnit *BarrierChain = 0, *AliasChain = 0;
718 
719  // Memory references to specific known memory locations are tracked
720  // so that they can be given more precise dependencies. We track
721  // separately the known memory locations that may alias and those
722  // that are known not to alias
723  MapVector<const Value *, SUnit *> AliasMemDefs, NonAliasMemDefs;
724  MapVector<const Value *, std::vector<SUnit *> > AliasMemUses, NonAliasMemUses;
725  std::set<SUnit*> RejectMemNodes;
726 
727  // Remove any stale debug info; sometimes BuildSchedGraph is called again
728  // without emitting the info from the previous call.
729  DbgValues.clear();
730  FirstDbgValue = NULL;
731 
732  assert(Defs.empty() && Uses.empty() &&
733  "Only BuildGraph should update Defs/Uses");
736 
737  assert(VRegDefs.empty() && "Only BuildSchedGraph may access VRegDefs");
738  VRegUses.clear();
741 
742  // Model data dependencies between instructions being scheduled and the
743  // ExitSU.
745 
746  // Walk the list of instructions, from bottom moving up.
747  MachineInstr *DbgMI = NULL;
749  MII != MIE; --MII) {
750  MachineInstr *MI = prior(MII);
751  if (MI && DbgMI) {
752  DbgValues.push_back(std::make_pair(DbgMI, MI));
753  DbgMI = NULL;
754  }
755 
756  if (MI->isDebugValue()) {
757  DbgMI = MI;
758  continue;
759  }
760  SUnit *SU = MISUnitMap[MI];
761  assert(SU && "No SUnit mapped to this MI");
762 
763  if (RPTracker) {
764  PressureDiff *PDiff = PDiffs ? &(*PDiffs)[SU->NodeNum] : 0;
765  RPTracker->recede(/*LiveUses=*/0, PDiff);
766  assert(RPTracker->getPos() == prior(MII) && "RPTracker can't find MI");
767  }
768 
769  assert((CanHandleTerminators || (!MI->isTerminator() && !MI->isLabel())) &&
770  "Cannot schedule terminators or labels!");
771 
772  // Add register-based dependencies (data, anti, and output).
773  bool HasVRegDef = false;
774  for (unsigned j = 0, n = MI->getNumOperands(); j != n; ++j) {
775  const MachineOperand &MO = MI->getOperand(j);
776  if (!MO.isReg()) continue;
777  unsigned Reg = MO.getReg();
778  if (Reg == 0) continue;
779 
780  if (TRI->isPhysicalRegister(Reg))
781  addPhysRegDeps(SU, j);
782  else {
783  assert(!IsPostRA && "Virtual register encountered!");
784  if (MO.isDef()) {
785  HasVRegDef = true;
786  addVRegDefDeps(SU, j);
787  }
788  else if (MO.readsReg()) // ignore undef operands
789  addVRegUseDeps(SU, j);
790  }
791  }
792  // If we haven't seen any uses in this scheduling region, create a
793  // dependence edge to ExitSU to model the live-out latency. This is required
794  // for vreg defs with no in-region use, and prefetches with no vreg def.
795  //
796  // FIXME: NumDataSuccs would be more precise than NumSuccs here. This
797  // check currently relies on being called before adding chain deps.
798  if (SU->NumSuccs == 0 && SU->Latency > 1
799  && (HasVRegDef || MI->mayLoad())) {
800  SDep Dep(SU, SDep::Artificial);
801  Dep.setLatency(SU->Latency - 1);
802  ExitSU.addPred(Dep);
803  }
804 
805  // Add chain dependencies.
806  // Chain dependencies used to enforce memory order should have
807  // latency of 0 (except for true dependency of Store followed by
808  // aliased Load... we estimate that with a single cycle of latency
809  // assuming the hardware will bypass)
810  // Note that isStoreToStackSlot and isLoadFromStackSLot are not usable
811  // after stack slots are lowered to actual addresses.
812  // TODO: Use an AliasAnalysis and do real alias-analysis queries, and
813  // produce more precise dependence information.
814  unsigned TrueMemOrderLatency = MI->mayStore() ? 1 : 0;
815  if (isGlobalMemoryObject(AA, MI)) {
816  // Be conservative with these and add dependencies on all memory
817  // references, even those that are known to not alias.
819  NonAliasMemDefs.begin(), E = NonAliasMemDefs.end(); I != E; ++I) {
820  I->second->addPred(SDep(SU, SDep::Barrier));
821  }
822  for (MapVector<const Value *, std::vector<SUnit *> >::iterator I =
823  NonAliasMemUses.begin(), E = NonAliasMemUses.end(); I != E; ++I) {
824  for (unsigned i = 0, e = I->second.size(); i != e; ++i) {
825  SDep Dep(SU, SDep::Barrier);
826  Dep.setLatency(TrueMemOrderLatency);
827  I->second[i]->addPred(Dep);
828  }
829  }
830  // Add SU to the barrier chain.
831  if (BarrierChain)
832  BarrierChain->addPred(SDep(SU, SDep::Barrier));
833  BarrierChain = SU;
834  // This is a barrier event that acts as a pivotal node in the DAG,
835  // so it is safe to clear list of exposed nodes.
836  adjustChainDeps(AA, MFI, SU, &ExitSU, RejectMemNodes,
837  TrueMemOrderLatency);
838  RejectMemNodes.clear();
839  NonAliasMemDefs.clear();
840  NonAliasMemUses.clear();
841 
842  // fall-through
843  new_alias_chain:
844  // Chain all possibly aliasing memory references though SU.
845  if (AliasChain) {
846  unsigned ChainLatency = 0;
847  if (AliasChain->getInstr()->mayLoad())
848  ChainLatency = TrueMemOrderLatency;
849  addChainDependency(AAForDep, MFI, SU, AliasChain, RejectMemNodes,
850  ChainLatency);
851  }
852  AliasChain = SU;
853  for (unsigned k = 0, m = PendingLoads.size(); k != m; ++k)
854  addChainDependency(AAForDep, MFI, SU, PendingLoads[k], RejectMemNodes,
855  TrueMemOrderLatency);
857  E = AliasMemDefs.end(); I != E; ++I)
858  addChainDependency(AAForDep, MFI, SU, I->second, RejectMemNodes);
859  for (MapVector<const Value *, std::vector<SUnit *> >::iterator I =
860  AliasMemUses.begin(), E = AliasMemUses.end(); I != E; ++I) {
861  for (unsigned i = 0, e = I->second.size(); i != e; ++i)
862  addChainDependency(AAForDep, MFI, SU, I->second[i], RejectMemNodes,
863  TrueMemOrderLatency);
864  }
865  adjustChainDeps(AA, MFI, SU, &ExitSU, RejectMemNodes,
866  TrueMemOrderLatency);
867  PendingLoads.clear();
868  AliasMemDefs.clear();
869  AliasMemUses.clear();
870  } else if (MI->mayStore()) {
873 
874  if (Objs.empty()) {
875  // Treat all other stores conservatively.
876  goto new_alias_chain;
877  }
878 
879  bool MayAlias = false;
880  for (UnderlyingObjectsVector::iterator K = Objs.begin(), KE = Objs.end();
881  K != KE; ++K) {
882  const Value *V = K->getPointer();
883  bool ThisMayAlias = K->getInt();
884  if (ThisMayAlias)
885  MayAlias = true;
886 
887  // A store to a specific PseudoSourceValue. Add precise dependencies.
888  // Record the def in MemDefs, first adding a dep if there is
889  // an existing def.
891  ((ThisMayAlias) ? AliasMemDefs.find(V) : NonAliasMemDefs.find(V));
893  ((ThisMayAlias) ? AliasMemDefs.end() : NonAliasMemDefs.end());
894  if (I != IE) {
895  addChainDependency(AAForDep, MFI, SU, I->second, RejectMemNodes,
896  0, true);
897  I->second = SU;
898  } else {
899  if (ThisMayAlias)
900  AliasMemDefs[V] = SU;
901  else
902  NonAliasMemDefs[V] = SU;
903  }
904  // Handle the uses in MemUses, if there are any.
906  ((ThisMayAlias) ? AliasMemUses.find(V) : NonAliasMemUses.find(V));
908  ((ThisMayAlias) ? AliasMemUses.end() : NonAliasMemUses.end());
909  if (J != JE) {
910  for (unsigned i = 0, e = J->second.size(); i != e; ++i)
911  addChainDependency(AAForDep, MFI, SU, J->second[i], RejectMemNodes,
912  TrueMemOrderLatency, true);
913  J->second.clear();
914  }
915  }
916  if (MayAlias) {
917  // Add dependencies from all the PendingLoads, i.e. loads
918  // with no underlying object.
919  for (unsigned k = 0, m = PendingLoads.size(); k != m; ++k)
920  addChainDependency(AAForDep, MFI, SU, PendingLoads[k], RejectMemNodes,
921  TrueMemOrderLatency);
922  // Add dependence on alias chain, if needed.
923  if (AliasChain)
924  addChainDependency(AAForDep, MFI, SU, AliasChain, RejectMemNodes);
925  // But we also should check dependent instructions for the
926  // SU in question.
927  adjustChainDeps(AA, MFI, SU, &ExitSU, RejectMemNodes,
928  TrueMemOrderLatency);
929  }
930  // Add dependence on barrier chain, if needed.
931  // There is no point to check aliasing on barrier event. Even if
932  // SU and barrier _could_ be reordered, they should not. In addition,
933  // we have lost all RejectMemNodes below barrier.
934  if (BarrierChain)
935  BarrierChain->addPred(SDep(SU, SDep::Barrier));
936 
937  if (!ExitSU.isPred(SU))
938  // Push store's up a bit to avoid them getting in between cmp
939  // and branches.
941  } else if (MI->mayLoad()) {
942  bool MayAlias = true;
943  if (MI->isInvariantLoad(AA)) {
944  // Invariant load, no chain dependencies needed!
945  } else {
948 
949  if (Objs.empty()) {
950  // A load with no underlying object. Depend on all
951  // potentially aliasing stores.
953  AliasMemDefs.begin(), E = AliasMemDefs.end(); I != E; ++I)
954  addChainDependency(AAForDep, MFI, SU, I->second, RejectMemNodes);
955 
956  PendingLoads.push_back(SU);
957  MayAlias = true;
958  } else {
959  MayAlias = false;
960  }
961 
963  J = Objs.begin(), JE = Objs.end(); J != JE; ++J) {
964  const Value *V = J->getPointer();
965  bool ThisMayAlias = J->getInt();
966 
967  if (ThisMayAlias)
968  MayAlias = true;
969 
970  // A load from a specific PseudoSourceValue. Add precise dependencies.
972  ((ThisMayAlias) ? AliasMemDefs.find(V) : NonAliasMemDefs.find(V));
974  ((ThisMayAlias) ? AliasMemDefs.end() : NonAliasMemDefs.end());
975  if (I != IE)
976  addChainDependency(AAForDep, MFI, SU, I->second, RejectMemNodes,
977  0, true);
978  if (ThisMayAlias)
979  AliasMemUses[V].push_back(SU);
980  else
981  NonAliasMemUses[V].push_back(SU);
982  }
983  if (MayAlias)
984  adjustChainDeps(AA, MFI, SU, &ExitSU, RejectMemNodes, /*Latency=*/0);
985  // Add dependencies on alias and barrier chains, if needed.
986  if (MayAlias && AliasChain)
987  addChainDependency(AAForDep, MFI, SU, AliasChain, RejectMemNodes);
988  if (BarrierChain)
989  BarrierChain->addPred(SDep(SU, SDep::Barrier));
990  }
991  }
992  }
993  if (DbgMI)
994  FirstDbgValue = DbgMI;
995 
996  Defs.clear();
997  Uses.clear();
998  VRegDefs.clear();
999  PendingLoads.clear();
1000 }
1001 
1002 void ScheduleDAGInstrs::dumpNode(const SUnit *SU) const {
1003 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1004  SU->getInstr()->dump();
1005 #endif
1006 }
1007 
1008 std::string ScheduleDAGInstrs::getGraphNodeLabel(const SUnit *SU) const {
1009  std::string s;
1010  raw_string_ostream oss(s);
1011  if (SU == &EntrySU)
1012  oss << "<entry>";
1013  else if (SU == &ExitSU)
1014  oss << "<exit>";
1015  else
1016  SU->getInstr()->print(oss, &TM, /*SkipOpers=*/true);
1017  return oss.str();
1018 }
1019 
1020 /// Return the basic block label. It is not necessarilly unique because a block
1021 /// contains multiple scheduling regions. But it is fine for visualization.
1022 std::string ScheduleDAGInstrs::getDAGName() const {
1023  return "dag." + BB->getFullName();
1024 }
1025 
1026 //===----------------------------------------------------------------------===//
1027 // SchedDFSResult Implementation
1028 //===----------------------------------------------------------------------===//
1029 
1030 namespace llvm {
1031 /// \brief Internal state used to compute SchedDFSResult.
1033  SchedDFSResult &R;
1034 
1035  /// Join DAG nodes into equivalence classes by their subtree.
1036  IntEqClasses SubtreeClasses;
1037  /// List PredSU, SuccSU pairs that represent data edges between subtrees.
1038  std::vector<std::pair<const SUnit*, const SUnit*> > ConnectionPairs;
1039 
1040  struct RootData {
1041  unsigned NodeID;
1042  unsigned ParentNodeID; // Parent node (member of the parent subtree).
1043  unsigned SubInstrCount; // Instr count in this tree only, not children.
1044 
1045  RootData(unsigned id): NodeID(id),
1046  ParentNodeID(SchedDFSResult::InvalidSubtreeID),
1047  SubInstrCount(0) {}
1048 
1049  unsigned getSparseSetIndex() const { return NodeID; }
1050  };
1051 
1052  SparseSet<RootData> RootSet;
1053 
1054 public:
1055  SchedDFSImpl(SchedDFSResult &r): R(r), SubtreeClasses(R.DFSNodeData.size()) {
1056  RootSet.setUniverse(R.DFSNodeData.size());
1057  }
1058 
1059  /// Return true if this node been visited by the DFS traversal.
1060  ///
1061  /// During visitPostorderNode the Node's SubtreeID is assigned to the Node
1062  /// ID. Later, SubtreeID is updated but remains valid.
1063  bool isVisited(const SUnit *SU) const {
1064  return R.DFSNodeData[SU->NodeNum].SubtreeID
1065  != SchedDFSResult::InvalidSubtreeID;
1066  }
1067 
1068  /// Initialize this node's instruction count. We don't need to flag the node
1069  /// visited until visitPostorder because the DAG cannot have cycles.
1070  void visitPreorder(const SUnit *SU) {
1071  R.DFSNodeData[SU->NodeNum].InstrCount =
1072  SU->getInstr()->isTransient() ? 0 : 1;
1073  }
1074 
1075  /// Called once for each node after all predecessors are visited. Revisit this
1076  /// node's predecessors and potentially join them now that we know the ILP of
1077  /// the other predecessors.
1078  void visitPostorderNode(const SUnit *SU) {
1079  // Mark this node as the root of a subtree. It may be joined with its
1080  // successors later.
1081  R.DFSNodeData[SU->NodeNum].SubtreeID = SU->NodeNum;
1082  RootData RData(SU->NodeNum);
1083  RData.SubInstrCount = SU->getInstr()->isTransient() ? 0 : 1;
1084 
1085  // If any predecessors are still in their own subtree, they either cannot be
1086  // joined or are large enough to remain separate. If this parent node's
1087  // total instruction count is not greater than a child subtree by at least
1088  // the subtree limit, then try to join it now since splitting subtrees is
1089  // only useful if multiple high-pressure paths are possible.
1090  unsigned InstrCount = R.DFSNodeData[SU->NodeNum].InstrCount;
1092  PI = SU->Preds.begin(), PE = SU->Preds.end(); PI != PE; ++PI) {
1093  if (PI->getKind() != SDep::Data)
1094  continue;
1095  unsigned PredNum = PI->getSUnit()->NodeNum;
1096  if ((InstrCount - R.DFSNodeData[PredNum].InstrCount) < R.SubtreeLimit)
1097  joinPredSubtree(*PI, SU, /*CheckLimit=*/false);
1098 
1099  // Either link or merge the TreeData entry from the child to the parent.
1100  if (R.DFSNodeData[PredNum].SubtreeID == PredNum) {
1101  // If the predecessor's parent is invalid, this is a tree edge and the
1102  // current node is the parent.
1103  if (RootSet[PredNum].ParentNodeID == SchedDFSResult::InvalidSubtreeID)
1104  RootSet[PredNum].ParentNodeID = SU->NodeNum;
1105  }
1106  else if (RootSet.count(PredNum)) {
1107  // The predecessor is not a root, but is still in the root set. This
1108  // must be the new parent that it was just joined to. Note that
1109  // RootSet[PredNum].ParentNodeID may either be invalid or may still be
1110  // set to the original parent.
1111  RData.SubInstrCount += RootSet[PredNum].SubInstrCount;
1112  RootSet.erase(PredNum);
1113  }
1114  }
1115  RootSet[SU->NodeNum] = RData;
1116  }
1117 
1118  /// Called once for each tree edge after calling visitPostOrderNode on the
1119  /// predecessor. Increment the parent node's instruction count and
1120  /// preemptively join this subtree to its parent's if it is small enough.
1121  void visitPostorderEdge(const SDep &PredDep, const SUnit *Succ) {
1122  R.DFSNodeData[Succ->NodeNum].InstrCount
1123  += R.DFSNodeData[PredDep.getSUnit()->NodeNum].InstrCount;
1124  joinPredSubtree(PredDep, Succ);
1125  }
1126 
1127  /// Add a connection for cross edges.
1128  void visitCrossEdge(const SDep &PredDep, const SUnit *Succ) {
1129  ConnectionPairs.push_back(std::make_pair(PredDep.getSUnit(), Succ));
1130  }
1131 
1132  /// Set each node's subtree ID to the representative ID and record connections
1133  /// between trees.
1134  void finalize() {
1135  SubtreeClasses.compress();
1136  R.DFSTreeData.resize(SubtreeClasses.getNumClasses());
1137  assert(SubtreeClasses.getNumClasses() == RootSet.size()
1138  && "number of roots should match trees");
1140  RI = RootSet.begin(), RE = RootSet.end(); RI != RE; ++RI) {
1141  unsigned TreeID = SubtreeClasses[RI->NodeID];
1142  if (RI->ParentNodeID != SchedDFSResult::InvalidSubtreeID)
1143  R.DFSTreeData[TreeID].ParentTreeID = SubtreeClasses[RI->ParentNodeID];
1144  R.DFSTreeData[TreeID].SubInstrCount = RI->SubInstrCount;
1145  // Note that SubInstrCount may be greater than InstrCount if we joined
1146  // subtrees across a cross edge. InstrCount will be attributed to the
1147  // original parent, while SubInstrCount will be attributed to the joined
1148  // parent.
1149  }
1150  R.SubtreeConnections.resize(SubtreeClasses.getNumClasses());
1151  R.SubtreeConnectLevels.resize(SubtreeClasses.getNumClasses());
1152  DEBUG(dbgs() << R.getNumSubtrees() << " subtrees:\n");
1153  for (unsigned Idx = 0, End = R.DFSNodeData.size(); Idx != End; ++Idx) {
1154  R.DFSNodeData[Idx].SubtreeID = SubtreeClasses[Idx];
1155  DEBUG(dbgs() << " SU(" << Idx << ") in tree "
1156  << R.DFSNodeData[Idx].SubtreeID << '\n');
1157  }
1158  for (std::vector<std::pair<const SUnit*, const SUnit*> >::const_iterator
1159  I = ConnectionPairs.begin(), E = ConnectionPairs.end();
1160  I != E; ++I) {
1161  unsigned PredTree = SubtreeClasses[I->first->NodeNum];
1162  unsigned SuccTree = SubtreeClasses[I->second->NodeNum];
1163  if (PredTree == SuccTree)
1164  continue;
1165  unsigned Depth = I->first->getDepth();
1166  addConnection(PredTree, SuccTree, Depth);
1167  addConnection(SuccTree, PredTree, Depth);
1168  }
1169  }
1170 
1171 protected:
1172  /// Join the predecessor subtree with the successor that is its DFS
1173  /// parent. Apply some heuristics before joining.
1174  bool joinPredSubtree(const SDep &PredDep, const SUnit *Succ,
1175  bool CheckLimit = true) {
1176  assert(PredDep.getKind() == SDep::Data && "Subtrees are for data edges");
1177 
1178  // Check if the predecessor is already joined.
1179  const SUnit *PredSU = PredDep.getSUnit();
1180  unsigned PredNum = PredSU->NodeNum;
1181  if (R.DFSNodeData[PredNum].SubtreeID != PredNum)
1182  return false;
1183 
1184  // Four is the magic number of successors before a node is considered a
1185  // pinch point.
1186  unsigned NumDataSucs = 0;
1187  for (SUnit::const_succ_iterator SI = PredSU->Succs.begin(),
1188  SE = PredSU->Succs.end(); SI != SE; ++SI) {
1189  if (SI->getKind() == SDep::Data) {
1190  if (++NumDataSucs >= 4)
1191  return false;
1192  }
1193  }
1194  if (CheckLimit && R.DFSNodeData[PredNum].InstrCount > R.SubtreeLimit)
1195  return false;
1196  R.DFSNodeData[PredNum].SubtreeID = Succ->NodeNum;
1197  SubtreeClasses.join(Succ->NodeNum, PredNum);
1198  return true;
1199  }
1200 
1201  /// Called by finalize() to record a connection between trees.
1202  void addConnection(unsigned FromTree, unsigned ToTree, unsigned Depth) {
1203  if (!Depth)
1204  return;
1205 
1206  do {
1208  R.SubtreeConnections[FromTree];
1210  I = Connections.begin(), E = Connections.end(); I != E; ++I) {
1211  if (I->TreeID == ToTree) {
1212  I->Level = std::max(I->Level, Depth);
1213  return;
1214  }
1215  }
1216  Connections.push_back(SchedDFSResult::Connection(ToTree, Depth));
1217  FromTree = R.DFSTreeData[FromTree].ParentTreeID;
1218  } while (FromTree != SchedDFSResult::InvalidSubtreeID);
1219  }
1220 };
1221 } // namespace llvm
1222 
1223 namespace {
1224 /// \brief Manage the stack used by a reverse depth-first search over the DAG.
1225 class SchedDAGReverseDFS {
1226  std::vector<std::pair<const SUnit*, SUnit::const_pred_iterator> > DFSStack;
1227 public:
1228  bool isComplete() const { return DFSStack.empty(); }
1229 
1230  void follow(const SUnit *SU) {
1231  DFSStack.push_back(std::make_pair(SU, SU->Preds.begin()));
1232  }
1233  void advance() { ++DFSStack.back().second; }
1234 
1235  const SDep *backtrack() {
1236  DFSStack.pop_back();
1237  return DFSStack.empty() ? 0 : llvm::prior(DFSStack.back().second);
1238  }
1239 
1240  const SUnit *getCurr() const { return DFSStack.back().first; }
1241 
1242  SUnit::const_pred_iterator getPred() const { return DFSStack.back().second; }
1243 
1244  SUnit::const_pred_iterator getPredEnd() const {
1245  return getCurr()->Preds.end();
1246  }
1247 };
1248 } // anonymous
1249 
1250 static bool hasDataSucc(const SUnit *SU) {
1252  SI = SU->Succs.begin(), SE = SU->Succs.end(); SI != SE; ++SI) {
1253  if (SI->getKind() == SDep::Data && !SI->getSUnit()->isBoundaryNode())
1254  return true;
1255  }
1256  return false;
1257 }
1258 
1259 /// Compute an ILP metric for all nodes in the subDAG reachable via depth-first
1260 /// search from this root.
1262  if (!IsBottomUp)
1263  llvm_unreachable("Top-down ILP metric is unimplemnted");
1264 
1265  SchedDFSImpl Impl(*this);
1267  SI = SUnits.begin(), SE = SUnits.end(); SI != SE; ++SI) {
1268  const SUnit *SU = &*SI;
1269  if (Impl.isVisited(SU) || hasDataSucc(SU))
1270  continue;
1271 
1272  SchedDAGReverseDFS DFS;
1273  Impl.visitPreorder(SU);
1274  DFS.follow(SU);
1275  for (;;) {
1276  // Traverse the leftmost path as far as possible.
1277  while (DFS.getPred() != DFS.getPredEnd()) {
1278  const SDep &PredDep = *DFS.getPred();
1279  DFS.advance();
1280  // Ignore non-data edges.
1281  if (PredDep.getKind() != SDep::Data
1282  || PredDep.getSUnit()->isBoundaryNode()) {
1283  continue;
1284  }
1285  // An already visited edge is a cross edge, assuming an acyclic DAG.
1286  if (Impl.isVisited(PredDep.getSUnit())) {
1287  Impl.visitCrossEdge(PredDep, DFS.getCurr());
1288  continue;
1289  }
1290  Impl.visitPreorder(PredDep.getSUnit());
1291  DFS.follow(PredDep.getSUnit());
1292  }
1293  // Visit the top of the stack in postorder and backtrack.
1294  const SUnit *Child = DFS.getCurr();
1295  const SDep *PredDep = DFS.backtrack();
1296  Impl.visitPostorderNode(Child);
1297  if (PredDep)
1298  Impl.visitPostorderEdge(*PredDep, DFS.getCurr());
1299  if (DFS.isComplete())
1300  break;
1301  }
1302  }
1303  Impl.finalize();
1304 }
1305 
1306 /// The root of the given SubtreeID was just scheduled. For all subtrees
1307 /// connected to this tree, record the depth of the connection so that the
1308 /// nearest connected subtrees can be prioritized.
1309 void SchedDFSResult::scheduleTree(unsigned SubtreeID) {
1311  I = SubtreeConnections[SubtreeID].begin(),
1312  E = SubtreeConnections[SubtreeID].end(); I != E; ++I) {
1313  SubtreeConnectLevels[I->TreeID] =
1314  std::max(SubtreeConnectLevels[I->TreeID], I->Level);
1315  DEBUG(dbgs() << " Tree: " << I->TreeID
1316  << " @" << SubtreeConnectLevels[I->TreeID] << '\n');
1317  }
1318 }
1319 
1320 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1321 void ILPValue::print(raw_ostream &OS) const {
1322  OS << InstrCount << " / " << Length << " = ";
1323  if (!Length)
1324  OS << "BADILP";
1325  else
1326  OS << format("%g", ((double)InstrCount / Length));
1327 }
1328 
1329 void ILPValue::dump() const {
1330  dbgs() << *this << '\n';
1331 }
1332 
1333 namespace llvm {
1334 
1336  Val.print(OS);
1337  return OS;
1338 }
1339 
1340 } // namespace llvm
1341 #endif // !NDEBUG || LLVM_ENABLE_DUMP
VectorType::iterator iterator
Definition: MapVector.h:40
SmallVector< PointerIntPair< const Value *, 1, bool >, 4 > UnderlyingObjectsVector
const_iterator end(StringRef path)
Get end iterator over path.
Definition: Path.cpp:181
virtual void finishBlock()
finishBlock - Clean up after scheduling in the given block.
bool joinPredSubtree(const SDep &PredDep, const SUnit *Succ, bool CheckLimit=true)
void clear()
Definition: MapVector.h:72
SlotIndex def
The index of the defining instruction.
Definition: LiveInterval.h:52
void GetUnderlyingObjects(Value *V, SmallVectorImpl< Value * > &Objects, const DataLayout *TD=0, unsigned MaxLookup=6)
iterator insert(const ValueT &Val)
bool isSucc(SUnit *N)
isSucc - Test if node N is a successor of this node.
Definition: ScheduleDAG.h:446
bool contains(const KeyT &Key) const
Returns true if this set contains an element identified by Key.
void init(unsigned N)
Initialize an array of N PressureDiffs.
std::pair< iterator, bool > insert(const ValueT &Val)
Definition: SparseSet.h:246
unsigned size() const
Definition: SparseSet.h:185
void addVRegDefDeps(SUnit *SU, unsigned OperIdx)
unsigned computeOutputLatency(const MachineInstr *DefMI, unsigned DefIdx, const MachineInstr *DepMI) const
Output dependency latency of a pair of defs of the same register.
std::vector< unsigned >::const_iterator livein_iterator
bool mayStore(QueryType Type=AnyInBundle) const
Definition: MachineInstr.h:479
MachineInstr * getInstr() const
Definition: ScheduleDAG.h:386
iterator end() const
Definition: ArrayRef.h:98
bool isDead() const
SlotIndex getInstructionIndex(const MachineInstr *instr) const
Returns the base index of the given instruction.
Represent the ILP of the subDAG rooted at a DAG node.
Definition: ScheduleDFS.h:35
TargetSchedModel SchedModel
TargetSchedModel provides an interface to the machine model.
void init(const MCSchedModel &sm, const TargetSubtargetInfo *sti, const TargetInstrInfo *tii)
Initialize the machine model for instruction scheduling.
bool hasOrderedMemoryRef() const
bool insert(PtrType Ptr)
Definition: SmallPtrSet.h:253
MachineBasicBlock::iterator begin() const
begin - Return an iterator to the top of the current scheduling region.
const_iterator begin(StringRef path)
Get begin iterator over path.
Definition: Path.cpp:173
const MCSchedModel * getSchedModel() const
bool registerDefIsDead(unsigned Reg, const TargetRegisterInfo *TRI=NULL) const
Definition: MachineInstr.h:767
virtual std::string getDAGName() const
Return a label for the region of code covered by the DAG.
Kind
Kind - These are the different kinds of scheduling dependencies.
Definition: ScheduleDAG.h:48
unsigned NumRegionInstrs
Instructions in this region (distance(RegionBegin, RegionEnd)).
void addPhysRegDataDeps(SUnit *SU, unsigned OperIdx)
RangePair equal_range(const KeyT &K)
bool isTerminator(QueryType Type=AnyInBundle) const
Definition: MachineInstr.h:366
SmallVector< SDep, 4 > Preds
Definition: ScheduleDAG.h:263
virtual void startBlock(MachineBasicBlock *BB)
startBlock - Prepare to perform scheduling in the given block.
static unsigned iterateChainSucc(AliasAnalysis *AA, const MachineFrameInfo *MFI, SUnit *SUa, SUnit *SUb, SUnit *ExitSU, unsigned *Depth, SmallPtrSet< const SUnit *, 16 > &Visited)
MachineBasicBlock::const_iterator getPos() const
Get the MI position corresponding to this register pressure.
static void getUnderlyingObjects(const Value *V, SmallVectorImpl< Value * > &Objects)
bool empty() const
Definition: SparseSet.h:178
A register anti-dependedence (aka WAR).
Definition: ScheduleDAG.h:50
unsigned getNumSubtrees() const
The number of subtrees detected in this DAG.
Definition: ScheduleDFS.h:166
std::vector< SUnit * > PendingLoads
unsigned getNumVirtRegs() const
static error_code advance(T &it, size_t Val)
unsigned NumSuccs
Definition: ScheduleDAG.h:274
MachineBasicBlock::iterator RegionEnd
The end of the range to be scheduled.
DenseMap< MachineInstr *, SUnit * > MISUnitMap
An individual mapping from virtual register number to SUnit.
T LLVM_ATTRIBUTE_UNUSED_RESULT pop_back_val()
Definition: SmallVector.h:430
void setInstr(MachineInstr *MI)
Definition: ScheduleDAG.h:379
#define llvm_unreachable(msg)
static bool hasDataSucc(const SUnit *SU)
virtual void dumpNode(const SUnit *SU) const
bool isReg() const
isReg - Tests if this is a MO_Register operand.
Regular data dependence (aka true-dependence).
Definition: ScheduleDAG.h:49
const_iterator end() const
Definition: SparseSet.h:170
bool mayLoad(QueryType Type=AnyInBundle) const
Definition: MachineInstr.h:465
bool hasPhysRegUses
Definition: ScheduleDAG.h:286
std::vector< MachineBasicBlock * >::iterator succ_iterator
static bool isUnsafeMemoryObject(MachineInstr *MI, const MachineFrameInfo *MFI)
bool hasPhysRegDefs
Definition: ScheduleDAG.h:287
Abstract Stack Frame Information.
std::vector< std::pair< BlockT *, SuccIterTy > > DFSStack
Definition: LoopInfoImpl.h:413
#define false
Definition: ConvertUTF.c:64
const MachineFrameInfo * MFI
unsigned getNumOperands() const
Definition: MachineInstr.h:265
bool isIdentifiedObject(const Value *V)
bool recede(SmallVectorImpl< unsigned > *LiveUses=0, PressureDiff *PDiff=0)
Recede across the previous instruction.
format_object1< T > format(const char *Fmt, const T &Val)
Definition: Format.h:180
Compute the values of each DAG node for various metrics during DFS.
Definition: ScheduleDFS.h:68
unsigned getNumRegs() const
Return the number of registers this target has (useful for sizing arrays holding per register informa...
A register output-dependence (aka WAW).
Definition: ScheduleDAG.h:51
const MDNode * getTBAAInfo() const
getTBAAInfo - Return the TBAA tag for the memory reference.
bool LLVM_ATTRIBUTE_UNUSED_RESULT empty() const
Definition: SmallVector.h:56
MachineBasicBlock::iterator RegionBegin
The beginning of the range to be scheduled.
void addVRegUseDeps(SUnit *SU, unsigned OperIdx)
bool isPred(SUnit *N)
isPred - Test if node N is a predecessor of this node.
Definition: ScheduleDAG.h:438
virtual void adjustSchedDependency(SUnit *def, SUnit *use, SDep &dep) const
bool IsPostRA
isPostRA flag indicates vregs cannot be present.
virtual void enterRegion(MachineBasicBlock *bb, MachineBasicBlock::iterator begin, MachineBasicBlock::iterator end, unsigned regioninstrs)
Initialize the scheduler state for the next scheduling region.
void visitPostorderNode(const SUnit *SU)
iterator find(const KeyT &Key)
Definition: MapVector.h:110
void visitPreorder(const SUnit *SU)
iterator erase(iterator I)
Definition: SparseSet.h:277
static bool MIsNeedChainEdge(AliasAnalysis *AA, const MachineFrameInfo *MFI, MachineInstr *MIa, MachineInstr *MIb)
bool isDebugValue() const
Definition: MachineInstr.h:639
SizeType size() const
Definition: MapVector.h:43
bundle_iterator< MachineInstr, instr_iterator > iterator
#define P(N)
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:314
LiveQueryResult Query(SlotIndex Idx) const
Definition: LiveInterval.h:441
ScheduleDAGInstrs(MachineFunction &mf, const MachineLoopInfo &mli, const MachineDominatorTree &mdt, bool IsPostRAFlag, LiveIntervals *LIS=0)
Array of PressureDiffs.
void clearDAG()
clearDAG - clear the DAG state (between regions).
Definition: ScheduleDAG.cpp:50
unsigned short Latency
Definition: ScheduleDAG.h:280
virtual AliasResult alias(const Location &LocA, const Location &LocB)
void visitCrossEdge(const SDep &PredDep, const SUnit *Succ)
Add a connection for cross edges.
Internal state used to compute SchedDFSResult.
static void getUnderlyingObjectsForInstr(const MachineInstr *MI, const MachineFrameInfo *MFI, UnderlyingObjectsVector &Objects)
const MCInstrInfo & MII
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:267
void addConnection(unsigned FromTree, unsigned ToTree, unsigned Depth)
Called by finalize() to record a connection between trees.
void setUniverse(unsigned U)
iterator begin() const
Definition: ArrayRef.h:97
bool hasOneMemOperand() const
Definition: MachineInstr.h:297
bool hasUnmodeledSideEffects() const
VReg2SUnitMap VRegDefs
Track the last instruction in this region defining each virtual register.
virtual void exitRegion()
Notify that the scheduler has finished scheduling the current region.
unsigned InstrCount
Definition: ScheduleDFS.h:36
void setUniverse(unsigned U)
Definition: SparseSet.h:150
bool isInvariantLoad(AliasAnalysis *AA) const
Location - A description of a memory location.
const TargetMachine & TM
Definition: ScheduleDAG.h:540
bool isLabel() const
Definition: MachineInstr.h:628
void join(unsigned a, unsigned b)
bool isPointerTy() const
Definition: Type.h:220
std::string & str()
Definition: raw_ostream.h:441
bool isVisited(const SUnit *SU) const
const_iterator begin() const
Definition: SparseSet.h:169
static void addChainDependency(AliasAnalysis *AA, const MachineFrameInfo *MFI, SUnit *SUa, SUnit *SUb, std::set< SUnit * > &RejectList, unsigned TrueMemOrderLatency=0, bool isNormalMemory=false)
iterator find(const KeyT &Key)
An unknown scheduling barrier.
Definition: ScheduleDAG.h:65
std::string getFullName() const
Return a hopefully unique identifier for this block.
void addPhysRegDeps(SUnit *SU, unsigned OperIdx)
bool memoperands_empty() const
Definition: MachineInstr.h:293
bool isCommutable
Definition: ScheduleDAG.h:285
static ManagedStatic< LeakDetectorImpl< void > > Objects
static void adjustChainDeps(AliasAnalysis *AA, const MachineFrameInfo *MFI, SUnit *SU, SUnit *ExitSU, std::set< SUnit * > &CheckList, unsigned LatencyToLoad)
const STC & getSubtarget() const
void compute(ArrayRef< SUnit > SUnits)
Compute various metrics for the DAG with given roots.
Type * getType() const
Definition: Value.h:111
void eraseAll(const KeyT &K)
Nonvolatile load/Store instructions that may alias.
Definition: ScheduleDAG.h:66
LiveInterval & getInterval(unsigned Reg)
bool isTransient() const
Definition: MachineInstr.h:692
unsigned Length
Definition: ScheduleDFS.h:39
void print(raw_ostream &OS, const TargetMachine *TM=0, bool SkipOpers=false) const
bool count(const KeyT &Key) const
Definition: SparseSet.h:232
raw_ostream & dbgs()
dbgs - Return a circular-buffered debug stream.
Definition: Debug.cpp:101
void dump() const
const Value * getValue() const
SUnit * getSUnit() const
Definition: ScheduleDAG.h:160
bool isIntegerTy() const
Definition: Type.h:196
MachineBasicBlock::iterator end() const
end - Return an iterator to the bottom of the current scheduling region.
bool isBoundaryNode() const
Boundary nodes are placeholders for the boundary of the scheduling region.
Definition: ScheduleDAG.h:357
void visitPostorderEdge(const SDep &PredDep, const SUnit *Succ)
void setLatency(unsigned Lat)
setLatency - Set the latency for this edge.
Definition: ScheduleDAG.h:155
**iterator erase(iterator I)
unsigned getOpcode() const
Definition: Operator.h:51
SUnit * getSUnit(MachineInstr *MI) const
getSUnit - Return an existing SUnit for this MI, or NULL.
static bool isPhysicalRegister(unsigned Reg)
bool hasOneDef(unsigned RegNo) const
const TargetRegisterInfo * TRI
Definition: ScheduleDAG.h:542
void print(raw_ostream &OS) const
iterator find(const KeyT &Key)
Definition: SparseSet.h:222
#define I(x, y, z)
Definition: MD5.cpp:54
bool isCall(QueryType Type=AnyInBundle) const
Definition: MachineInstr.h:349
void resize(unsigned N)
Definition: SmallVector.h:401
int findRegisterDefOperandIdx(unsigned Reg, bool isDead=false, bool Overlap=false, const TargetRegisterInfo *TRI=NULL) const
Kind getKind() const
getKind - Return an enum value representing the kind of the dependence.
Definition: ScheduleDAG.h:170
const TargetInstrInfo * TII
Definition: ScheduleDAG.h:541
raw_ostream & operator<<(raw_ostream &OS, const APInt &I)
Definition: APInt.h:1688
unsigned computeOperandLatency(const MachineInstr *DefMI, unsigned DefOperIdx, const MachineInstr *UseMI, unsigned UseOperIdx) const
Compute operand latency based on the available machine model.
unsigned computeInstrLatency(const MachineInstr *MI, bool UseDefaultDefLatency=true) const
Compute the instruction latency based on the available machine model.
unsigned NodeNum
Definition: ScheduleDAG.h:271
iterator begin()
Definition: MapVector.h:47
unsigned getReg() const
getReg - Returns the register number.
SUnit * newSUnit(MachineInstr *MI)
newSUnit - Creates a new SUnit and return a ptr to it.
bool isCommutable(QueryType Type=IgnoreBundle) const
Definition: MachineInstr.h:502
iterator end()
Definition: MapVector.h:55
bool addPred(const SDep &D, bool Required=true)
Definition: ScheduleDAG.cpp:65
LLVM Value Representation.
Definition: Value.h:66
int64_t getOffset() const
virtual std::string getGraphNodeLabel(const SUnit *SU) const
Return a label for a DAG node that points to an instruction.
unsigned getNumClasses() const
Definition: IntEqClasses.h:72
SmallVector< SDep, 4 > Succs
Definition: ScheduleDAG.h:264
MachineInstr * getInstructionFromIndex(SlotIndex index) const
Returns the instruction associated with the given index.
Arbitrary strong DAG edge (no real dependence).
Definition: ScheduleDAG.h:68
void scheduleTree(unsigned SubtreeID)
Scheduler callback to update SubtreeConnectLevels when a tree is initially scheduled.
uint64_t getSize() const
getSize - Return the size in bytes of the memory reference.
ItTy prior(ItTy it, Dist n)
Definition: STLExtras.h:167
#define DEBUG(X)
Definition: Debug.h:97
static bool isGlobalMemoryObject(AliasAnalysis *AA, MachineInstr *MI)
MachineBasicBlock * BB
The block in which to insert instructions.
bool readsReg() const
VNInfo * valueIn() const
Definition: LiveInterval.h:100
MachineRegisterInfo & MRI
Definition: ScheduleDAG.h:544
std::vector< SUnit > SUnits
Definition: ScheduleDAG.h:545
SchedDFSImpl(SchedDFSResult &r)
static const Value * getUnderlyingObjectFromInt(const Value *V)
void buildSchedGraph(AliasAnalysis *AA, RegPressureTracker *RPTracker=0, PressureDiffs *PDiffs=0)
bool isBarrier(QueryType Type=AnyInBundle) const
Definition: MachineInstr.h:356
LiveIntervals * LIS
Live Intervals provides reaching defs in preRA scheduling.
SUnit - Scheduling unit. This is a node in the scheduling DAG.
Definition: ScheduleDAG.h:249
static cl::opt< bool > EnableAASchedMI("enable-aa-sched-mi", cl::Hidden, cl::ZeroOrMore, cl::init(false), cl::desc("Enable use of AA during MI GAD construction"))
virtual bool useAA() const
Enable use of alias analysis during code generation (during MI scheduling, DAGCombine, etc.).
mmo_iterator memoperands_begin() const
Access to memory operands of the instruction.
Definition: MachineInstr.h:291