LLVM API Documentation

 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
SIInsertWaits.cpp
Go to the documentation of this file.
1 //===-- SILowerControlFlow.cpp - Use predicates for control flow ----------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 /// \file
11 /// \brief Insert wait instructions for memory reads and writes.
12 ///
13 /// Memory reads and writes are issued asynchronously, so we need to insert
14 /// S_WAITCNT instructions when we want to access any of their results or
15 /// overwrite any register that's used asynchronously.
16 //
17 //===----------------------------------------------------------------------===//
18 
19 #include "AMDGPU.h"
20 #include "SIInstrInfo.h"
21 #include "SIMachineFunctionInfo.h"
26 
27 using namespace llvm;
28 
29 namespace {
30 
31 /// \brief One variable for each of the hardware counters
32 typedef union {
33  struct {
34  unsigned VM;
35  unsigned EXP;
36  unsigned LGKM;
37  } Named;
38  unsigned Array[3];
39 
40 } Counters;
41 
42 typedef Counters RegCounters[512];
43 typedef std::pair<unsigned, unsigned> RegInterval;
44 
45 class SIInsertWaits : public MachineFunctionPass {
46 
47 private:
48  static char ID;
49  const SIInstrInfo *TII;
50  const SIRegisterInfo *TRI;
51  const MachineRegisterInfo *MRI;
52 
53  /// \brief Constant hardware limits
54  static const Counters WaitCounts;
55 
56  /// \brief Constant zero value
57  static const Counters ZeroCounts;
58 
59  /// \brief Counter values we have already waited on.
60  Counters WaitedOn;
61 
62  /// \brief Counter values for last instruction issued.
63  Counters LastIssued;
64 
65  /// \brief Registers used by async instructions.
66  RegCounters UsedRegs;
67 
68  /// \brief Registers defined by async instructions.
69  RegCounters DefinedRegs;
70 
71  /// \brief Different export instruction types seen since last wait.
72  unsigned ExpInstrTypesSeen;
73 
74  /// \brief Get increment/decrement amount for this instruction.
75  Counters getHwCounts(MachineInstr &MI);
76 
77  /// \brief Is operand relevant for async execution?
78  bool isOpRelevant(MachineOperand &Op);
79 
80  /// \brief Get register interval an operand affects.
81  RegInterval getRegInterval(MachineOperand &Op);
82 
83  /// \brief Handle instructions async components
84  void pushInstruction(MachineInstr &MI);
85 
86  /// \brief Insert the actual wait instruction
87  bool insertWait(MachineBasicBlock &MBB,
89  const Counters &Counts);
90 
91  /// \brief Do we need def2def checks?
92  bool unorderedDefines(MachineInstr &MI);
93 
94  /// \brief Resolve all operand dependencies to counter requirements
95  Counters handleOperands(MachineInstr &MI);
96 
97 public:
98  SIInsertWaits(TargetMachine &tm) :
100  TII(0),
101  TRI(0),
102  ExpInstrTypesSeen(0) { }
103 
104  virtual bool runOnMachineFunction(MachineFunction &MF);
105 
106  const char *getPassName() const {
107  return "SI insert wait instructions";
108  }
109 
110 };
111 
112 } // End anonymous namespace
113 
114 char SIInsertWaits::ID = 0;
115 
116 const Counters SIInsertWaits::WaitCounts = { { 15, 7, 7 } };
117 const Counters SIInsertWaits::ZeroCounts = { { 0, 0, 0 } };
118 
120  return new SIInsertWaits(tm);
121 }
122 
123 Counters SIInsertWaits::getHwCounts(MachineInstr &MI) {
124 
125  uint64_t TSFlags = TII->get(MI.getOpcode()).TSFlags;
126  Counters Result;
127 
128  Result.Named.VM = !!(TSFlags & SIInstrFlags::VM_CNT);
129 
130  // Only consider stores or EXP for EXP_CNT
131  Result.Named.EXP = !!(TSFlags & SIInstrFlags::EXP_CNT &&
132  (MI.getOpcode() == AMDGPU::EXP || MI.getDesc().mayStore()));
133 
134  // LGKM may uses larger values
135  if (TSFlags & SIInstrFlags::LGKM_CNT) {
136 
137  if (TII->isSMRD(MI.getOpcode())) {
138 
139  MachineOperand &Op = MI.getOperand(0);
140  assert(Op.isReg() && "First LGKM operand must be a register!");
141 
142  unsigned Reg = Op.getReg();
143  unsigned Size = TRI->getMinimalPhysRegClass(Reg)->getSize();
144  Result.Named.LGKM = Size > 4 ? 2 : 1;
145 
146  } else {
147  // DS
148  Result.Named.LGKM = 1;
149  }
150 
151  } else {
152  Result.Named.LGKM = 0;
153  }
154 
155  return Result;
156 }
157 
158 bool SIInsertWaits::isOpRelevant(MachineOperand &Op) {
159 
160  // Constants are always irrelevant
161  if (!Op.isReg())
162  return false;
163 
164  // Defines are always relevant
165  if (Op.isDef())
166  return true;
167 
168  // For exports all registers are relevant
169  MachineInstr &MI = *Op.getParent();
170  if (MI.getOpcode() == AMDGPU::EXP)
171  return true;
172 
173  // For stores the stored value is also relevant
174  if (!MI.getDesc().mayStore())
175  return false;
176 
178  E = MI.operands_end(); I != E; ++I) {
179 
180  if (I->isReg() && I->isUse())
181  return Op.isIdenticalTo(*I);
182  }
183 
184  return false;
185 }
186 
187 RegInterval SIInsertWaits::getRegInterval(MachineOperand &Op) {
188 
189  if (!Op.isReg() || !TRI->isInAllocatableClass(Op.getReg()))
190  return std::make_pair(0, 0);
191 
192  unsigned Reg = Op.getReg();
193  unsigned Size = TRI->getMinimalPhysRegClass(Reg)->getSize();
194 
195  assert(Size >= 4);
196 
197  RegInterval Result;
198  Result.first = TRI->getEncodingValue(Reg);
199  Result.second = Result.first + Size / 4;
200 
201  return Result;
202 }
203 
204 void SIInsertWaits::pushInstruction(MachineInstr &MI) {
205 
206  // Get the hardware counter increments and sum them up
207  Counters Increment = getHwCounts(MI);
208  unsigned Sum = 0;
209 
210  for (unsigned i = 0; i < 3; ++i) {
211  LastIssued.Array[i] += Increment.Array[i];
212  Sum += Increment.Array[i];
213  }
214 
215  // If we don't increase anything then that's it
216  if (Sum == 0)
217  return;
218 
219  // Remember which export instructions we have seen
220  if (Increment.Named.EXP) {
221  ExpInstrTypesSeen |= MI.getOpcode() == AMDGPU::EXP ? 1 : 2;
222  }
223 
224  for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
225 
226  MachineOperand &Op = MI.getOperand(i);
227  if (!isOpRelevant(Op))
228  continue;
229 
230  RegInterval Interval = getRegInterval(Op);
231  for (unsigned j = Interval.first; j < Interval.second; ++j) {
232 
233  // Remember which registers we define
234  if (Op.isDef())
235  DefinedRegs[j] = LastIssued;
236 
237  // and which one we are using
238  if (Op.isUse())
239  UsedRegs[j] = LastIssued;
240  }
241  }
242 }
243 
244 bool SIInsertWaits::insertWait(MachineBasicBlock &MBB,
246  const Counters &Required) {
247 
248  // End of program? No need to wait on anything
249  if (I != MBB.end() && I->getOpcode() == AMDGPU::S_ENDPGM)
250  return false;
251 
252  // Figure out if the async instructions execute in order
253  bool Ordered[3];
254 
255  // VM_CNT is always ordered
256  Ordered[0] = true;
257 
258  // EXP_CNT is unordered if we have both EXP & VM-writes
259  Ordered[1] = ExpInstrTypesSeen == 3;
260 
261  // LGKM_CNT is handled as always unordered. TODO: Handle LDS and GDS
262  Ordered[2] = false;
263 
264  // The values we are going to put into the S_WAITCNT instruction
265  Counters Counts = WaitCounts;
266 
267  // Do we really need to wait?
268  bool NeedWait = false;
269 
270  for (unsigned i = 0; i < 3; ++i) {
271 
272  if (Required.Array[i] <= WaitedOn.Array[i])
273  continue;
274 
275  NeedWait = true;
276 
277  if (Ordered[i]) {
278  unsigned Value = LastIssued.Array[i] - Required.Array[i];
279 
280  // adjust the value to the real hardware posibilities
281  Counts.Array[i] = std::min(Value, WaitCounts.Array[i]);
282 
283  } else
284  Counts.Array[i] = 0;
285 
286  // Remember on what we have waited on
287  WaitedOn.Array[i] = LastIssued.Array[i] - Counts.Array[i];
288  }
289 
290  if (!NeedWait)
291  return false;
292 
293  // Reset EXP_CNT instruction types
294  if (Counts.Named.EXP == 0)
295  ExpInstrTypesSeen = 0;
296 
297  // Build the wait instruction
298  BuildMI(MBB, I, DebugLoc(), TII->get(AMDGPU::S_WAITCNT))
299  .addImm((Counts.Named.VM & 0xF) |
300  ((Counts.Named.EXP & 0x7) << 4) |
301  ((Counts.Named.LGKM & 0x7) << 8));
302 
303  return true;
304 }
305 
306 /// \brief helper function for handleOperands
307 static void increaseCounters(Counters &Dst, const Counters &Src) {
308 
309  for (unsigned i = 0; i < 3; ++i)
310  Dst.Array[i] = std::max(Dst.Array[i], Src.Array[i]);
311 }
312 
313 Counters SIInsertWaits::handleOperands(MachineInstr &MI) {
314 
315  Counters Result = ZeroCounts;
316 
317  // For each register affected by this
318  // instruction increase the result sequence
319  for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
320 
321  MachineOperand &Op = MI.getOperand(i);
322  RegInterval Interval = getRegInterval(Op);
323  for (unsigned j = Interval.first; j < Interval.second; ++j) {
324 
325  if (Op.isDef()) {
326  increaseCounters(Result, UsedRegs[j]);
327  increaseCounters(Result, DefinedRegs[j]);
328  }
329 
330  if (Op.isUse())
331  increaseCounters(Result, DefinedRegs[j]);
332  }
333  }
334 
335  return Result;
336 }
337 
338 bool SIInsertWaits::runOnMachineFunction(MachineFunction &MF) {
339  bool Changes = false;
340 
341  TII = static_cast<const SIInstrInfo*>(MF.getTarget().getInstrInfo());
342  TRI = static_cast<const SIRegisterInfo*>(MF.getTarget().getRegisterInfo());
343 
344  MRI = &MF.getRegInfo();
345 
346  WaitedOn = ZeroCounts;
347  LastIssued = ZeroCounts;
348 
349  memset(&UsedRegs, 0, sizeof(UsedRegs));
350  memset(&DefinedRegs, 0, sizeof(DefinedRegs));
351 
352  for (MachineFunction::iterator BI = MF.begin(), BE = MF.end();
353  BI != BE; ++BI) {
354 
355  MachineBasicBlock &MBB = *BI;
356  for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
357  I != E; ++I) {
358 
359  Changes |= insertWait(MBB, I, handleOperands(*I));
360  pushInstruction(*I);
361  }
362 
363  // Wait for everything at the end of the MBB
364  Changes |= insertWait(MBB, MBB.getFirstTerminator(), LastIssued);
365  }
366 
367  return Changes;
368 }
mop_iterator operands_end()
Definition: MachineInstr.h:285
MachineInstr * getParent()
bool mayStore() const
Return true if this instruction could possibly modify memory. Instructions with this flag set are not...
Definition: MCInstrDesc.h:376
const MCInstrDesc & getDesc() const
Definition: MachineInstr.h:257
FunctionPass * createSIInsertWaits(TargetMachine &tm)
const HexagonInstrInfo * TII
bool isReg() const
isReg - Tests if this is a MO_Register operand.
ID
LLVM Calling Convention Representation.
Definition: CallingConv.h:26
unsigned getNumOperands() const
Definition: MachineInstr.h:265
int getOpcode() const
Definition: MachineInstr.h:261
static void increaseCounters(Counters &Dst, const Counters &Src)
helper function for handleOperands
bundle_iterator< MachineInstr, instr_iterator > iterator
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:267
MachineInstrBuilder BuildMI(MachineFunction &MF, DebugLoc DL, const MCInstrDesc &MCID)
virtual const TargetInstrInfo * getInstrInfo() const
Interface definition for SIInstrInfo.
MachineRegisterInfo & getRegInfo()
#define I(x, y, z)
Definition: MD5.cpp:54
const TargetMachine & getTarget() const
virtual const TargetRegisterInfo * getRegisterInfo() const
unsigned getReg() const
getReg - Returns the register number.
LLVM Value Representation.
Definition: Value.h:66
mop_iterator operands_begin()
Definition: MachineInstr.h:284
BasicBlockListType::iterator iterator
const MCRegisterInfo & MRI
bool isIdenticalTo(const MachineOperand &Other) const