LLVM API Documentation

 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
MachineTraceMetrics.h
Go to the documentation of this file.
1 //===- lib/CodeGen/MachineTraceMetrics.h - Super-scalar metrics -*- C++ -*-===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file defines the interface for the MachineTraceMetrics analysis pass
11 // that estimates CPU resource usage and critical data dependency paths through
12 // preferred traces. This is useful for super-scalar CPUs where execution speed
13 // can be limited both by data dependencies and by limited execution resources.
14 //
15 // Out-of-order CPUs will often be executing instructions from multiple basic
16 // blocks at the same time. This makes it difficult to estimate the resource
17 // usage accurately in a single basic block. Resources can be estimated better
18 // by looking at a trace through the current basic block.
19 //
20 // For every block, the MachineTraceMetrics pass will pick a preferred trace
21 // that passes through the block. The trace is chosen based on loop structure,
22 // branch probabilities, and resource usage. The intention is to pick likely
23 // traces that would be the most affected by code transformations.
24 //
25 // It is expensive to compute a full arbitrary trace for every block, so to
26 // save some computations, traces are chosen to be convergent. This means that
27 // if the traces through basic blocks A and B ever cross when moving away from
28 // A and B, they never diverge again. This applies in both directions - If the
29 // traces meet above A and B, they won't diverge when going further back.
30 //
31 // Traces tend to align with loops. The trace through a block in an inner loop
32 // will begin at the loop entry block and end at a back edge. If there are
33 // nested loops, the trace may begin and end at those instead.
34 //
35 // For each trace, we compute the critical path length, which is the number of
36 // cycles required to execute the trace when execution is limited by data
37 // dependencies only. We also compute the resource height, which is the number
38 // of cycles required to execute all instructions in the trace when ignoring
39 // data dependencies.
40 //
41 // Every instruction in the current block has a slack - the number of cycles
42 // execution of the instruction can be delayed without extending the critical
43 // path.
44 //
45 //===----------------------------------------------------------------------===//
46 
47 #ifndef LLVM_CODEGEN_MACHINE_TRACE_METRICS_H
48 #define LLVM_CODEGEN_MACHINE_TRACE_METRICS_H
49 
50 #include "llvm/ADT/ArrayRef.h"
51 #include "llvm/ADT/DenseMap.h"
54 
55 namespace llvm {
56 
57 class InstrItineraryData;
58 class MachineBasicBlock;
59 class MachineInstr;
60 class MachineLoop;
61 class MachineLoopInfo;
62 class MachineRegisterInfo;
63 class TargetInstrInfo;
64 class TargetRegisterInfo;
65 class raw_ostream;
66 
68  const MachineFunction *MF;
69  const TargetInstrInfo *TII;
70  const TargetRegisterInfo *TRI;
71  const MachineRegisterInfo *MRI;
72  const MachineLoopInfo *Loops;
73  TargetSchedModel SchedModel;
74 
75 public:
76  class Ensemble;
77  class Trace;
78  static char ID;
80  void getAnalysisUsage(AnalysisUsage&) const;
82  void releaseMemory();
83  void verifyAnalysis() const;
84 
85  friend class Ensemble;
86  friend class Trace;
87 
88  /// Per-basic block information that doesn't depend on the trace through the
89  /// block.
90  struct FixedBlockInfo {
91  /// The number of non-trivial instructions in the block.
92  /// Doesn't count PHI and COPY instructions that are likely to be removed.
93  unsigned InstrCount;
94 
95  /// True when the block contains calls.
96  bool HasCalls;
97 
99 
100  /// Returns true when resource information for this block has been computed.
101  bool hasResources() const { return InstrCount != ~0u; }
102 
103  /// Invalidate resource information.
104  void invalidate() { InstrCount = ~0u; }
105  };
106 
107  /// Get the fixed resource information about MBB. Compute it on demand.
108  const FixedBlockInfo *getResources(const MachineBasicBlock*);
109 
110  /// Get the scaled number of cycles used per processor resource in MBB.
111  /// This is an array with SchedModel.getNumProcResourceKinds() entries.
112  /// The getResources() function above must have been called first.
113  ///
114  /// These numbers have already been scaled by SchedModel.getResourceFactor().
115  ArrayRef<unsigned> getProcResourceCycles(unsigned MBBNum) const;
116 
117  /// A virtual register or regunit required by a basic block or its trace
118  /// successors.
119  struct LiveInReg {
120  /// The virtual register required, or a register unit.
121  unsigned Reg;
122 
123  /// For virtual registers: Minimum height of the defining instruction.
124  /// For regunits: Height of the highest user in the trace.
125  unsigned Height;
126 
127  LiveInReg(unsigned Reg, unsigned Height = 0) : Reg(Reg), Height(Height) {}
128  };
129 
130  /// Per-basic block information that relates to a specific trace through the
131  /// block. Convergent traces means that only one of these is required per
132  /// block in a trace ensemble.
133  struct TraceBlockInfo {
134  /// Trace predecessor, or NULL for the first block in the trace.
135  /// Valid when hasValidDepth().
137 
138  /// Trace successor, or NULL for the last block in the trace.
139  /// Valid when hasValidHeight().
141 
142  /// The block number of the head of the trace. (When hasValidDepth()).
143  unsigned Head;
144 
145  /// The block number of the tail of the trace. (When hasValidHeight()).
146  unsigned Tail;
147 
148  /// Accumulated number of instructions in the trace above this block.
149  /// Does not include instructions in this block.
150  unsigned InstrDepth;
151 
152  /// Accumulated number of instructions in the trace below this block.
153  /// Includes instructions in this block.
154  unsigned InstrHeight;
155 
157  Pred(0), Succ(0),
158  InstrDepth(~0u), InstrHeight(~0u),
160 
161  /// Returns true if the depth resources have been computed from the trace
162  /// above this block.
163  bool hasValidDepth() const { return InstrDepth != ~0u; }
164 
165  /// Returns true if the height resources have been computed from the trace
166  /// below this block.
167  bool hasValidHeight() const { return InstrHeight != ~0u; }
168 
169  /// Invalidate depth resources when some block above this one has changed.
170  void invalidateDepth() { InstrDepth = ~0u; HasValidInstrDepths = false; }
171 
172  /// Invalidate height resources when a block below this one has changed.
174 
175  /// Assuming that this is a dominator of TBI, determine if it contains
176  /// useful instruction depths. A dominating block can be above the current
177  /// trace head, and any dependencies from such a far away dominator are not
178  /// expected to affect the critical path.
179  ///
180  /// Also returns true when TBI == this.
181  bool isUsefulDominator(const TraceBlockInfo &TBI) const {
182  // The trace for TBI may not even be calculated yet.
183  if (!hasValidDepth() || !TBI.hasValidDepth())
184  return false;
185  // Instruction depths are only comparable if the traces share a head.
186  if (Head != TBI.Head)
187  return false;
188  // It is almost always the case that TBI belongs to the same trace as
189  // this block, but rare convoluted cases involving irreducible control
190  // flow, a dominator may share a trace head without actually being on the
191  // same trace as TBI. This is not a big problem as long as it doesn't
192  // increase the instruction depth.
193  return HasValidInstrDepths && InstrDepth <= TBI.InstrDepth;
194  }
195 
196  // Data-dependency-related information. Per-instruction depth and height
197  // are computed from data dependencies in the current trace, using
198  // itinerary data.
199 
200  /// Instruction depths have been computed. This implies hasValidDepth().
202 
203  /// Instruction heights have been computed. This implies hasValidHeight().
205 
206  /// Critical path length. This is the number of cycles in the longest data
207  /// dependency chain through the trace. This is only valid when both
208  /// HasValidInstrDepths and HasValidInstrHeights are set.
209  unsigned CriticalPath;
210 
211  /// Live-in registers. These registers are defined above the current block
212  /// and used by this block or a block below it.
213  /// This does not include PHI uses in the current block, but it does
214  /// include PHI uses in deeper blocks.
216 
217  void print(raw_ostream&) const;
218  };
219 
220  /// InstrCycles represents the cycle height and depth of an instruction in a
221  /// trace.
222  struct InstrCycles {
223  /// Earliest issue cycle as determined by data dependencies and instruction
224  /// latencies from the beginning of the trace. Data dependencies from
225  /// before the trace are not included.
226  unsigned Depth;
227 
228  /// Minimum number of cycles from this instruction is issued to the of the
229  /// trace, as determined by data dependencies and instruction latencies.
230  unsigned Height;
231  };
232 
233  /// A trace represents a plausible sequence of executed basic blocks that
234  /// passes through the current basic block one. The Trace class serves as a
235  /// handle to internal cached data structures.
236  class Trace {
237  Ensemble &TE;
238  TraceBlockInfo &TBI;
239 
240  unsigned getBlockNum() const { return &TBI - &TE.BlockInfo[0]; }
241 
242  public:
243  explicit Trace(Ensemble &te, TraceBlockInfo &tbi) : TE(te), TBI(tbi) {}
244  void print(raw_ostream&) const;
245 
246  /// Compute the total number of instructions in the trace.
247  unsigned getInstrCount() const {
248  return TBI.InstrDepth + TBI.InstrHeight;
249  }
250 
251  /// Return the resource depth of the top/bottom of the trace center block.
252  /// This is the number of cycles required to execute all instructions from
253  /// the trace head to the trace center block. The resource depth only
254  /// considers execution resources, it ignores data dependencies.
255  /// When Bottom is set, instructions in the trace center block are included.
256  unsigned getResourceDepth(bool Bottom) const;
257 
258  /// Return the resource length of the trace. This is the number of cycles
259  /// required to execute the instructions in the trace if they were all
260  /// independent, exposing the maximum instruction-level parallelism.
261  ///
262  /// Any blocks in Extrablocks are included as if they were part of the
263  /// trace. Likewise, extra resources required by the specified scheduling
264  /// classes are included. For the caller to account for extra machine
265  /// instructions, it must first resolve each instruction's scheduling class.
266  unsigned getResourceLength(
268  ArrayRef<const MCSchedClassDesc*> ExtraInstrs = None) const;
269 
270  /// Return the length of the (data dependency) critical path through the
271  /// trace.
272  unsigned getCriticalPath() const { return TBI.CriticalPath; }
273 
274  /// Return the depth and height of MI. The depth is only valid for
275  /// instructions in or above the trace center block. The height is only
276  /// valid for instructions in or below the trace center block.
278  return TE.Cycles.lookup(MI);
279  }
280 
281  /// Return the slack of MI. This is the number of cycles MI can be delayed
282  /// before the critical path becomes longer.
283  /// MI must be an instruction in the trace center block.
284  unsigned getInstrSlack(const MachineInstr *MI) const;
285 
286  /// Return the Depth of a PHI instruction in a trace center block successor.
287  /// The PHI does not have to be part of the trace.
288  unsigned getPHIDepth(const MachineInstr *PHI) const;
289  };
290 
291  /// A trace ensemble is a collection of traces selected using the same
292  /// strategy, for example 'minimum resource height'. There is one trace for
293  /// every block in the function.
294  class Ensemble {
297  SmallVector<unsigned, 0> ProcResourceDepths;
298  SmallVector<unsigned, 0> ProcResourceHeights;
299  friend class Trace;
300 
301  void computeTrace(const MachineBasicBlock*);
302  void computeDepthResources(const MachineBasicBlock*);
303  void computeHeightResources(const MachineBasicBlock*);
304  unsigned computeCrossBlockCriticalPath(const TraceBlockInfo&);
305  void computeInstrDepths(const MachineBasicBlock*);
306  void computeInstrHeights(const MachineBasicBlock*);
307  void addLiveIns(const MachineInstr *DefMI, unsigned DefOp,
309 
310  protected:
312  virtual const MachineBasicBlock *pickTracePred(const MachineBasicBlock*) =0;
313  virtual const MachineBasicBlock *pickTraceSucc(const MachineBasicBlock*) =0;
314  explicit Ensemble(MachineTraceMetrics*);
315  const MachineLoop *getLoopFor(const MachineBasicBlock*) const;
318  ArrayRef<unsigned> getProcResourceDepths(unsigned MBBNum) const;
319  ArrayRef<unsigned> getProcResourceHeights(unsigned MBBNum) const;
320 
321  public:
322  virtual ~Ensemble();
323  virtual const char *getName() const =0;
324  void print(raw_ostream&) const;
325  void invalidate(const MachineBasicBlock *MBB);
326  void verify() const;
327 
328  /// Get the trace that passes through MBB.
329  /// The trace is computed on demand.
330  Trace getTrace(const MachineBasicBlock *MBB);
331  };
332 
333  /// Strategies for selecting traces.
334  enum Strategy {
335  /// Select the trace through a block that has the fewest instructions.
337 
339  };
340 
341  /// Get the trace ensemble representing the given trace selection strategy.
342  /// The returned Ensemble object is owned by the MachineTraceMetrics analysis,
343  /// and valid for the lifetime of the analysis pass.
345 
346  /// Invalidate cached information about MBB. This must be called *before* MBB
347  /// is erased, or the CFG is otherwise changed.
348  ///
349  /// This invalidates per-block information about resource usage for MBB only,
350  /// and it invalidates per-trace information for any trace that passes
351  /// through MBB.
352  ///
353  /// Call Ensemble::getTrace() again to update any trace handles.
354  void invalidate(const MachineBasicBlock *MBB);
355 
356 private:
357  // One entry per basic block, indexed by block number.
359 
360  // Cycles consumed on each processor resource per block.
361  // The number of processor resource kinds is constant for a given subtarget,
362  // but it is not known at compile time. The number of cycles consumed by
363  // block B on processor resource R is at ProcResourceCycles[B*Kinds + R]
364  // where Kinds = SchedModel.getNumProcResourceKinds().
365  SmallVector<unsigned, 0> ProcResourceCycles;
366 
367  // One ensemble per strategy.
368  Ensemble* Ensembles[TS_NumStrategies];
369 
370  // Convert scaled resource usage to a cycle count that can be compared with
371  // latencies.
372  unsigned getCycles(unsigned Scaled) {
373  unsigned Factor = SchedModel.getLatencyFactor();
374  return (Scaled + Factor - 1) / Factor;
375  }
376 };
377 
379  const MachineTraceMetrics::Trace &Tr) {
380  Tr.print(OS);
381  return OS;
382 }
383 
385  const MachineTraceMetrics::Ensemble &En) {
386  En.print(OS);
387  return OS;
388 }
389 } // end namespace llvm
390 
391 #endif
bool HasValidInstrDepths
Instruction depths have been computed. This implies hasValidDepth().
ArrayRef< unsigned > getProcResourceDepths(unsigned MBBNum) const
bool HasCalls
True when the block contains calls.
void invalidateHeight()
Invalidate height resources when a block below this one has changed.
Trace(Ensemble &te, TraceBlockInfo &tbi)
unsigned getInstrCount() const
Compute the total number of instructions in the trace.
Provide an instruction scheduling machine model to CodeGen passes.
const MachineLoop * getLoopFor(const MachineBasicBlock *) const
Strategy
Strategies for selecting traces.
bool isUsefulDominator(const TraceBlockInfo &TBI) const
#define false
Definition: ConvertUTF.c:64
LiveInReg(unsigned Reg, unsigned Height=0)
virtual const MachineBasicBlock * pickTraceSucc(const MachineBasicBlock *)=0
Select the trace through a block that has the fewest instructions.
const TraceBlockInfo * getHeightResources(const MachineBasicBlock *) const
void invalidateDepth()
Invalidate depth resources when some block above this one has changed.
void invalidate(const MachineBasicBlock *MBB)
Invalidate traces through BadMBB.
void invalidate()
Invalidate resource information.
bool HasValidInstrHeights
Instruction heights have been computed. This implies hasValidHeight().
Ensemble * getEnsemble(Strategy)
unsigned getLatencyFactor() const
Multiply cycle count by this factor to normalize it relative to other resources. This is the number o...
const FixedBlockInfo * getResources(const MachineBasicBlock *)
Get the fixed resource information about MBB. Compute it on demand.
unsigned Tail
The block number of the tail of the trace. (When hasValidHeight()).
const TraceBlockInfo * getDepthResources(const MachineBasicBlock *) const
virtual const char * getName() const =0
virtual const MachineBasicBlock * pickTracePred(const MachineBasicBlock *)=0
ArrayRef< unsigned > getProcResourceHeights(unsigned MBBNum) const
bool runOnMachineFunction(MachineFunction &)
unsigned getResourceLength(ArrayRef< const MachineBasicBlock * > Extrablocks=None, ArrayRef< const MCSchedClassDesc * > ExtraInstrs=None) const
void invalidate(const MachineBasicBlock *MBB)
unsigned Head
The block number of the head of the trace. (When hasValidDepth()).
unsigned Reg
The virtual register required, or a register unit.
bool hasResources() const
Returns true when resource information for this block has been computed.
InstrCycles getInstrCycles(const MachineInstr *MI) const
void getAnalysisUsage(AnalysisUsage &) const
raw_ostream & operator<<(raw_ostream &OS, const APInt &I)
Definition: APInt.h:1688
unsigned getPHIDepth(const MachineInstr *PHI) const
unsigned getResourceDepth(bool Bottom) const
ArrayRef< unsigned > getProcResourceCycles(unsigned MBBNum) const
Trace getTrace(const MachineBasicBlock *MBB)
unsigned getInstrSlack(const MachineInstr *MI) const