LLVM API Documentation

 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
R600ControlFlowFinalizer.cpp
Go to the documentation of this file.
1 //===-- R600ControlFlowFinalizer.cpp - Finalize Control Flow Inst----------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 /// \file
11 /// This pass compute turns all control flow pseudo instructions into native one
12 /// computing their address on the fly ; it also sets STACK_SIZE info.
13 //===----------------------------------------------------------------------===//
14 
15 #define DEBUG_TYPE "r600cf"
16 #include "llvm/Support/Debug.h"
17 #include "AMDGPU.h"
18 #include "R600Defines.h"
19 #include "R600InstrInfo.h"
21 #include "R600RegisterInfo.h"
26 
27 using namespace llvm;
28 
29 namespace {
30 
31 class R600ControlFlowFinalizer : public MachineFunctionPass {
32 
33 private:
34  typedef std::pair<MachineInstr *, std::vector<MachineInstr *> > ClauseFile;
35 
36  enum ControlFlowInstruction {
37  CF_TC,
38  CF_VC,
39  CF_CALL_FS,
40  CF_WHILE_LOOP,
41  CF_END_LOOP,
42  CF_LOOP_BREAK,
43  CF_LOOP_CONTINUE,
44  CF_JUMP,
45  CF_ELSE,
46  CF_POP,
47  CF_END
48  };
49 
50  static char ID;
51  const R600InstrInfo *TII;
52  const R600RegisterInfo *TRI;
53  unsigned MaxFetchInst;
54  const AMDGPUSubtarget &ST;
55 
56  bool IsTrivialInst(MachineInstr *MI) const {
57  switch (MI->getOpcode()) {
58  case AMDGPU::KILL:
59  case AMDGPU::RETURN:
60  return true;
61  default:
62  return false;
63  }
64  }
65 
66  const MCInstrDesc &getHWInstrDesc(ControlFlowInstruction CFI) const {
67  unsigned Opcode = 0;
68  bool isEg = (ST.getGeneration() >= AMDGPUSubtarget::EVERGREEN);
69  switch (CFI) {
70  case CF_TC:
71  Opcode = isEg ? AMDGPU::CF_TC_EG : AMDGPU::CF_TC_R600;
72  break;
73  case CF_VC:
74  Opcode = isEg ? AMDGPU::CF_VC_EG : AMDGPU::CF_VC_R600;
75  break;
76  case CF_CALL_FS:
77  Opcode = isEg ? AMDGPU::CF_CALL_FS_EG : AMDGPU::CF_CALL_FS_R600;
78  break;
79  case CF_WHILE_LOOP:
80  Opcode = isEg ? AMDGPU::WHILE_LOOP_EG : AMDGPU::WHILE_LOOP_R600;
81  break;
82  case CF_END_LOOP:
83  Opcode = isEg ? AMDGPU::END_LOOP_EG : AMDGPU::END_LOOP_R600;
84  break;
85  case CF_LOOP_BREAK:
86  Opcode = isEg ? AMDGPU::LOOP_BREAK_EG : AMDGPU::LOOP_BREAK_R600;
87  break;
88  case CF_LOOP_CONTINUE:
89  Opcode = isEg ? AMDGPU::CF_CONTINUE_EG : AMDGPU::CF_CONTINUE_R600;
90  break;
91  case CF_JUMP:
92  Opcode = isEg ? AMDGPU::CF_JUMP_EG : AMDGPU::CF_JUMP_R600;
93  break;
94  case CF_ELSE:
95  Opcode = isEg ? AMDGPU::CF_ELSE_EG : AMDGPU::CF_ELSE_R600;
96  break;
97  case CF_POP:
98  Opcode = isEg ? AMDGPU::POP_EG : AMDGPU::POP_R600;
99  break;
100  case CF_END:
101  if (ST.hasCaymanISA()) {
102  Opcode = AMDGPU::CF_END_CM;
103  break;
104  }
105  Opcode = isEg ? AMDGPU::CF_END_EG : AMDGPU::CF_END_R600;
106  break;
107  }
108  assert (Opcode && "No opcode selected");
109  return TII->get(Opcode);
110  }
111 
112  bool isCompatibleWithClause(const MachineInstr *MI,
113  std::set<unsigned> &DstRegs) const {
114  unsigned DstMI, SrcMI;
116  E = MI->operands_end(); I != E; ++I) {
117  const MachineOperand &MO = *I;
118  if (!MO.isReg())
119  continue;
120  if (MO.isDef()) {
121  unsigned Reg = MO.getReg();
122  if (AMDGPU::R600_Reg128RegClass.contains(Reg))
123  DstMI = Reg;
124  else
125  DstMI = TRI->getMatchingSuperReg(Reg,
126  TRI->getSubRegFromChannel(TRI->getHWRegChan(Reg)),
127  &AMDGPU::R600_Reg128RegClass);
128  }
129  if (MO.isUse()) {
130  unsigned Reg = MO.getReg();
131  if (AMDGPU::R600_Reg128RegClass.contains(Reg))
132  SrcMI = Reg;
133  else
134  SrcMI = TRI->getMatchingSuperReg(Reg,
135  TRI->getSubRegFromChannel(TRI->getHWRegChan(Reg)),
136  &AMDGPU::R600_Reg128RegClass);
137  }
138  }
139  if ((DstRegs.find(SrcMI) == DstRegs.end())) {
140  DstRegs.insert(DstMI);
141  return true;
142  } else
143  return false;
144  }
145 
146  ClauseFile
147  MakeFetchClause(MachineBasicBlock &MBB, MachineBasicBlock::iterator &I)
148  const {
149  MachineBasicBlock::iterator ClauseHead = I;
150  std::vector<MachineInstr *> ClauseContent;
151  unsigned AluInstCount = 0;
152  bool IsTex = TII->usesTextureCache(ClauseHead);
153  std::set<unsigned> DstRegs;
154  for (MachineBasicBlock::iterator E = MBB.end(); I != E; ++I) {
155  if (IsTrivialInst(I))
156  continue;
157  if (AluInstCount >= MaxFetchInst)
158  break;
159  if ((IsTex && !TII->usesTextureCache(I)) ||
160  (!IsTex && !TII->usesVertexCache(I)))
161  break;
162  if (!isCompatibleWithClause(I, DstRegs))
163  break;
164  AluInstCount ++;
165  ClauseContent.push_back(I);
166  }
167  MachineInstr *MIb = BuildMI(MBB, ClauseHead, MBB.findDebugLoc(ClauseHead),
168  getHWInstrDesc(IsTex?CF_TC:CF_VC))
169  .addImm(0) // ADDR
170  .addImm(AluInstCount - 1); // COUNT
171  return ClauseFile(MIb, ClauseContent);
172  }
173 
174  void getLiteral(MachineInstr *MI, std::vector<int64_t> &Lits) const {
175  static const unsigned LiteralRegs[] = {
176  AMDGPU::ALU_LITERAL_X,
177  AMDGPU::ALU_LITERAL_Y,
178  AMDGPU::ALU_LITERAL_Z,
179  AMDGPU::ALU_LITERAL_W
180  };
182  TII->getSrcs(MI);
183  for (unsigned i = 0, e = Srcs.size(); i < e; ++i) {
184  if (Srcs[i].first->getReg() != AMDGPU::ALU_LITERAL_X)
185  continue;
186  int64_t Imm = Srcs[i].second;
187  std::vector<int64_t>::iterator It =
188  std::find(Lits.begin(), Lits.end(), Imm);
189  if (It != Lits.end()) {
190  unsigned Index = It - Lits.begin();
191  Srcs[i].first->setReg(LiteralRegs[Index]);
192  } else {
193  assert(Lits.size() < 4 && "Too many literals in Instruction Group");
194  Srcs[i].first->setReg(LiteralRegs[Lits.size()]);
195  Lits.push_back(Imm);
196  }
197  }
198  }
199 
200  MachineBasicBlock::iterator insertLiterals(
201  MachineBasicBlock::iterator InsertPos,
202  const std::vector<unsigned> &Literals) const {
203  MachineBasicBlock *MBB = InsertPos->getParent();
204  for (unsigned i = 0, e = Literals.size(); i < e; i+=2) {
205  unsigned LiteralPair0 = Literals[i];
206  unsigned LiteralPair1 = (i + 1 < e)?Literals[i + 1]:0;
207  InsertPos = BuildMI(MBB, InsertPos->getDebugLoc(),
208  TII->get(AMDGPU::LITERALS))
209  .addImm(LiteralPair0)
210  .addImm(LiteralPair1);
211  }
212  return InsertPos;
213  }
214 
215  ClauseFile
216  MakeALUClause(MachineBasicBlock &MBB, MachineBasicBlock::iterator &I)
217  const {
218  MachineBasicBlock::iterator ClauseHead = I;
219  std::vector<MachineInstr *> ClauseContent;
220  I++;
221  for (MachineBasicBlock::instr_iterator E = MBB.instr_end(); I != E;) {
222  if (IsTrivialInst(I)) {
223  ++I;
224  continue;
225  }
226  if (!I->isBundle() && !TII->isALUInstr(I->getOpcode()))
227  break;
228  std::vector<int64_t> Literals;
229  if (I->isBundle()) {
230  MachineInstr *DeleteMI = I;
231  MachineBasicBlock::instr_iterator BI = I.getInstrIterator();
232  while (++BI != E && BI->isBundledWithPred()) {
233  BI->unbundleFromPred();
234  for (unsigned i = 0, e = BI->getNumOperands(); i != e; ++i) {
235  MachineOperand &MO = BI->getOperand(i);
236  if (MO.isReg() && MO.isInternalRead())
237  MO.setIsInternalRead(false);
238  }
239  getLiteral(BI, Literals);
240  ClauseContent.push_back(BI);
241  }
242  I = BI;
243  DeleteMI->eraseFromParent();
244  } else {
245  getLiteral(I, Literals);
246  ClauseContent.push_back(I);
247  I++;
248  }
249  for (unsigned i = 0, e = Literals.size(); i < e; i+=2) {
250  unsigned literal0 = Literals[i];
251  unsigned literal2 = (i + 1 < e)?Literals[i + 1]:0;
252  MachineInstr *MILit = BuildMI(MBB, I, I->getDebugLoc(),
253  TII->get(AMDGPU::LITERALS))
254  .addImm(literal0)
255  .addImm(literal2);
256  ClauseContent.push_back(MILit);
257  }
258  }
259  assert(ClauseContent.size() < 128 && "ALU clause is too big");
260  ClauseHead->getOperand(7).setImm(ClauseContent.size() - 1);
261  return ClauseFile(ClauseHead, ClauseContent);
262  }
263 
264  void
265  EmitFetchClause(MachineBasicBlock::iterator InsertPos, ClauseFile &Clause,
266  unsigned &CfCount) {
267  CounterPropagateAddr(Clause.first, CfCount);
268  MachineBasicBlock *BB = Clause.first->getParent();
269  BuildMI(BB, InsertPos->getDebugLoc(), TII->get(AMDGPU::FETCH_CLAUSE))
270  .addImm(CfCount);
271  for (unsigned i = 0, e = Clause.second.size(); i < e; ++i) {
272  BB->splice(InsertPos, BB, Clause.second[i]);
273  }
274  CfCount += 2 * Clause.second.size();
275  }
276 
277  void
278  EmitALUClause(MachineBasicBlock::iterator InsertPos, ClauseFile &Clause,
279  unsigned &CfCount) {
280  Clause.first->getOperand(0).setImm(0);
281  CounterPropagateAddr(Clause.first, CfCount);
282  MachineBasicBlock *BB = Clause.first->getParent();
283  BuildMI(BB, InsertPos->getDebugLoc(), TII->get(AMDGPU::ALU_CLAUSE))
284  .addImm(CfCount);
285  for (unsigned i = 0, e = Clause.second.size(); i < e; ++i) {
286  BB->splice(InsertPos, BB, Clause.second[i]);
287  }
288  CfCount += Clause.second.size();
289  }
290 
291  void CounterPropagateAddr(MachineInstr *MI, unsigned Addr) const {
292  MI->getOperand(0).setImm(Addr + MI->getOperand(0).getImm());
293  }
294  void CounterPropagateAddr(std::set<MachineInstr *> MIs, unsigned Addr)
295  const {
296  for (std::set<MachineInstr *>::iterator It = MIs.begin(), E = MIs.end();
297  It != E; ++It) {
298  MachineInstr *MI = *It;
299  CounterPropagateAddr(MI, Addr);
300  }
301  }
302 
303  unsigned getHWStackSize(unsigned StackSubEntry, bool hasPush) const {
304  switch (ST.getGeneration()) {
307  if (hasPush)
308  StackSubEntry += 2;
309  break;
311  if (hasPush)
312  StackSubEntry ++;
314  StackSubEntry += 2;
315  break;
316  default: llvm_unreachable("Not a VLIW4/VLIW5 GPU");
317  }
318  return (StackSubEntry + 3)/4; // Need ceil value of StackSubEntry/4
319  }
320 
321 public:
322  R600ControlFlowFinalizer(TargetMachine &tm) : MachineFunctionPass(ID),
323  TII (0), TRI(0),
326  MaxFetchInst = ST.getTexVTXClauseSize();
327  }
328 
329  virtual bool runOnMachineFunction(MachineFunction &MF) {
330  TII=static_cast<const R600InstrInfo *>(MF.getTarget().getInstrInfo());
331  TRI=static_cast<const R600RegisterInfo *>(MF.getTarget().getRegisterInfo());
332 
333  unsigned MaxStack = 0;
334  unsigned CurrentStack = 0;
335  bool HasPush = false;
336  for (MachineFunction::iterator MB = MF.begin(), ME = MF.end(); MB != ME;
337  ++MB) {
338  MachineBasicBlock &MBB = *MB;
339  unsigned CfCount = 0;
340  std::vector<std::pair<unsigned, std::set<MachineInstr *> > > LoopStack;
341  std::vector<MachineInstr * > IfThenElseStack;
343  if (MFI->ShaderType == 1) {
344  BuildMI(MBB, MBB.begin(), MBB.findDebugLoc(MBB.begin()),
345  getHWInstrDesc(CF_CALL_FS));
346  CfCount++;
347  MaxStack = 1;
348  }
349  std::vector<ClauseFile> FetchClauses, AluClauses;
350  std::vector<MachineInstr *> LastAlu(1);
351  std::vector<MachineInstr *> ToPopAfter;
352 
353  for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
354  I != E;) {
355  if (TII->usesTextureCache(I) || TII->usesVertexCache(I)) {
356  DEBUG(dbgs() << CfCount << ":"; I->dump(););
357  FetchClauses.push_back(MakeFetchClause(MBB, I));
358  CfCount++;
359  continue;
360  }
361 
363  if (MI->getOpcode() != AMDGPU::ENDIF)
364  LastAlu.back() = 0;
365  if (MI->getOpcode() == AMDGPU::CF_ALU)
366  LastAlu.back() = MI;
367  I++;
368  switch (MI->getOpcode()) {
369  case AMDGPU::CF_ALU_PUSH_BEFORE:
370  CurrentStack++;
371  MaxStack = std::max(MaxStack, CurrentStack);
372  HasPush = true;
373  case AMDGPU::CF_ALU:
374  I = MI;
375  AluClauses.push_back(MakeALUClause(MBB, I));
376  DEBUG(dbgs() << CfCount << ":"; MI->dump(););
377  CfCount++;
378  break;
379  case AMDGPU::WHILELOOP: {
380  CurrentStack+=4;
381  MaxStack = std::max(MaxStack, CurrentStack);
382  MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI),
383  getHWInstrDesc(CF_WHILE_LOOP))
384  .addImm(1);
385  std::pair<unsigned, std::set<MachineInstr *> > Pair(CfCount,
386  std::set<MachineInstr *>());
387  Pair.second.insert(MIb);
388  LoopStack.push_back(Pair);
389  MI->eraseFromParent();
390  CfCount++;
391  break;
392  }
393  case AMDGPU::ENDLOOP: {
394  CurrentStack-=4;
395  std::pair<unsigned, std::set<MachineInstr *> > Pair =
396  LoopStack.back();
397  LoopStack.pop_back();
398  CounterPropagateAddr(Pair.second, CfCount);
399  BuildMI(MBB, MI, MBB.findDebugLoc(MI), getHWInstrDesc(CF_END_LOOP))
400  .addImm(Pair.first + 1);
401  MI->eraseFromParent();
402  CfCount++;
403  break;
404  }
405  case AMDGPU::IF_PREDICATE_SET: {
406  LastAlu.push_back(0);
407  MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI),
408  getHWInstrDesc(CF_JUMP))
409  .addImm(0)
410  .addImm(0);
411  IfThenElseStack.push_back(MIb);
412  DEBUG(dbgs() << CfCount << ":"; MIb->dump(););
413  MI->eraseFromParent();
414  CfCount++;
415  break;
416  }
417  case AMDGPU::ELSE: {
418  MachineInstr * JumpInst = IfThenElseStack.back();
419  IfThenElseStack.pop_back();
420  CounterPropagateAddr(JumpInst, CfCount);
421  MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI),
422  getHWInstrDesc(CF_ELSE))
423  .addImm(0)
424  .addImm(0);
425  DEBUG(dbgs() << CfCount << ":"; MIb->dump(););
426  IfThenElseStack.push_back(MIb);
427  MI->eraseFromParent();
428  CfCount++;
429  break;
430  }
431  case AMDGPU::ENDIF: {
432  CurrentStack--;
433  if (LastAlu.back()) {
434  ToPopAfter.push_back(LastAlu.back());
435  } else {
436  MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI),
437  getHWInstrDesc(CF_POP))
438  .addImm(CfCount + 1)
439  .addImm(1);
440  (void)MIb;
441  DEBUG(dbgs() << CfCount << ":"; MIb->dump(););
442  CfCount++;
443  }
444 
445  MachineInstr *IfOrElseInst = IfThenElseStack.back();
446  IfThenElseStack.pop_back();
447  CounterPropagateAddr(IfOrElseInst, CfCount);
448  IfOrElseInst->getOperand(1).setImm(1);
449  LastAlu.pop_back();
450  MI->eraseFromParent();
451  break;
452  }
453  case AMDGPU::BREAK: {
454  CfCount ++;
455  MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI),
456  getHWInstrDesc(CF_LOOP_BREAK))
457  .addImm(0);
458  LoopStack.back().second.insert(MIb);
459  MI->eraseFromParent();
460  break;
461  }
462  case AMDGPU::CONTINUE: {
463  MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI),
464  getHWInstrDesc(CF_LOOP_CONTINUE))
465  .addImm(0);
466  LoopStack.back().second.insert(MIb);
467  MI->eraseFromParent();
468  CfCount++;
469  break;
470  }
471  case AMDGPU::RETURN: {
472  BuildMI(MBB, MI, MBB.findDebugLoc(MI), getHWInstrDesc(CF_END));
473  CfCount++;
474  MI->eraseFromParent();
475  if (CfCount % 2) {
476  BuildMI(MBB, I, MBB.findDebugLoc(MI), TII->get(AMDGPU::PAD));
477  CfCount++;
478  }
479  for (unsigned i = 0, e = FetchClauses.size(); i < e; i++)
480  EmitFetchClause(I, FetchClauses[i], CfCount);
481  for (unsigned i = 0, e = AluClauses.size(); i < e; i++)
482  EmitALUClause(I, AluClauses[i], CfCount);
483  }
484  default:
485  if (TII->isExport(MI->getOpcode())) {
486  DEBUG(dbgs() << CfCount << ":"; MI->dump(););
487  CfCount++;
488  }
489  break;
490  }
491  }
492  for (unsigned i = 0, e = ToPopAfter.size(); i < e; ++i) {
493  MachineInstr *Alu = ToPopAfter[i];
495  TII->get(AMDGPU::CF_ALU_POP_AFTER))
496  .addImm(Alu->getOperand(0).getImm())
497  .addImm(Alu->getOperand(1).getImm())
498  .addImm(Alu->getOperand(2).getImm())
499  .addImm(Alu->getOperand(3).getImm())
500  .addImm(Alu->getOperand(4).getImm())
501  .addImm(Alu->getOperand(5).getImm())
502  .addImm(Alu->getOperand(6).getImm())
503  .addImm(Alu->getOperand(7).getImm())
504  .addImm(Alu->getOperand(8).getImm());
505  Alu->eraseFromParent();
506  }
507  MFI->StackSize = getHWStackSize(MaxStack, HasPush);
508  }
509 
510  return false;
511  }
512 
513  const char *getPassName() const {
514  return "R600 Control Flow Finalizer Pass";
515  }
516 };
517 
519 
520 } // end anonymous namespace
521 
522 
524  return new R600ControlFlowFinalizer(TM);
525 }
const MachineFunction * getParent() const
mop_iterator operands_end()
Definition: MachineInstr.h:285
instr_iterator instr_end()
Interface definition for R600InstrInfo.
Interface definition for R600RegisterInfo.
Instructions::iterator instr_iterator
const HexagonInstrInfo * TII
#define llvm_unreachable(msg)
bool isReg() const
isReg - Tests if this is a MO_Register operand.
ID
LLVM Calling Convention Representation.
Definition: CallingConv.h:26
const MachineInstrBuilder & addImm(int64_t Val) const
int getOpcode() const
Definition: MachineInstr.h:261
int64_t getImm() const
FunctionPass * createR600ControlFlowFinalizer(TargetMachine &tm)
bundle_iterator< MachineInstr, instr_iterator > iterator
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:267
void setImm(int64_t immVal)
void setIsInternalRead(bool Val=true)
MachineInstrBuilder BuildMI(MachineFunction &MF, DebugLoc DL, const MCInstrDesc &MCID)
DebugLoc findDebugLoc(instr_iterator MBBI)
virtual const TargetInstrInfo * getInstrInfo() const
const STC & getSubtarget() const
raw_ostream & dbgs()
dbgs - Return a circular-buffered debug stream.
Definition: Debug.cpp:101
void dump() const
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
#define I(x, y, z)
Definition: MD5.cpp:54
const TargetMachine & getTarget() const
virtual const TargetRegisterInfo * getRegisterInfo() const
unsigned getReg() const
getReg - Returns the register number.
#define CFI(x)
Definition: X86JITInfo.cpp:79
short getTexVTXClauseSize() const
mop_iterator operands_begin()
Definition: MachineInstr.h:284
BasicBlockListType::iterator iterator
#define DEBUG(X)
Definition: Debug.h:97
bool isInternalRead() const