LLVM API Documentation

 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
R600ClauseMergePass.cpp
Go to the documentation of this file.
1 //===-- R600ClauseMergePass - Merge consecutive CF_ALU -------------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 /// \file
11 /// R600EmitClauseMarker pass emits CFAlu instruction in a conservative maneer.
12 /// This pass is merging consecutive CFAlus where applicable.
13 /// It needs to be called after IfCvt for best results.
14 //===----------------------------------------------------------------------===//
15 
16 #define DEBUG_TYPE "r600mergeclause"
17 #include "AMDGPU.h"
18 #include "R600Defines.h"
19 #include "R600InstrInfo.h"
21 #include "R600RegisterInfo.h"
25 #include "llvm/Support/Debug.h"
27 
28 using namespace llvm;
29 
30 namespace {
31 
32 static bool isCFAlu(const MachineInstr *MI) {
33  switch (MI->getOpcode()) {
34  case AMDGPU::CF_ALU:
35  case AMDGPU::CF_ALU_PUSH_BEFORE:
36  return true;
37  default:
38  return false;
39  }
40 }
41 
42 class R600ClauseMergePass : public MachineFunctionPass {
43 
44 private:
45  static char ID;
46  const R600InstrInfo *TII;
47 
48  unsigned getCFAluSize(const MachineInstr *MI) const;
49  bool isCFAluEnabled(const MachineInstr *MI) const;
50 
51  /// IfCvt pass can generate "disabled" ALU clause marker that need to be
52  /// removed and their content affected to the previous alu clause.
53  /// This function parse instructions after CFAlu untill it find a disabled
54  /// CFAlu and merge the content, or an enabled CFAlu.
55  void cleanPotentialDisabledCFAlu(MachineInstr *CFAlu) const;
56 
57  /// Check whether LatrCFAlu can be merged into RootCFAlu and do it if
58  /// it is the case.
59  bool mergeIfPossible(MachineInstr *RootCFAlu, const MachineInstr *LatrCFAlu)
60  const;
61 
62 public:
63  R600ClauseMergePass(TargetMachine &tm) : MachineFunctionPass(ID) { }
64 
65  virtual bool runOnMachineFunction(MachineFunction &MF);
66 
67  const char *getPassName() const;
68 };
69 
71 
72 unsigned R600ClauseMergePass::getCFAluSize(const MachineInstr *MI) const {
73  assert(isCFAlu(MI));
74  return MI->getOperand(
75  TII->getOperandIdx(MI->getOpcode(), AMDGPU::OpName::COUNT)).getImm();
76 }
77 
78 bool R600ClauseMergePass::isCFAluEnabled(const MachineInstr *MI) const {
79  assert(isCFAlu(MI));
80  return MI->getOperand(
81  TII->getOperandIdx(MI->getOpcode(), AMDGPU::OpName::Enabled)).getImm();
82 }
83 
84 void R600ClauseMergePass::cleanPotentialDisabledCFAlu(MachineInstr *CFAlu)
85  const {
86  int CntIdx = TII->getOperandIdx(AMDGPU::CF_ALU, AMDGPU::OpName::COUNT);
87  MachineBasicBlock::iterator I = CFAlu, E = CFAlu->getParent()->end();
88  I++;
89  do {
90  while (I!= E && !isCFAlu(I))
91  I++;
92  if (I == E)
93  return;
94  MachineInstr *MI = I++;
95  if (isCFAluEnabled(MI))
96  break;
97  CFAlu->getOperand(CntIdx).setImm(getCFAluSize(CFAlu) + getCFAluSize(MI));
98  MI->eraseFromParent();
99  } while (I != E);
100 }
101 
102 bool R600ClauseMergePass::mergeIfPossible(MachineInstr *RootCFAlu,
103  const MachineInstr *LatrCFAlu) const {
104  assert(isCFAlu(RootCFAlu) && isCFAlu(LatrCFAlu));
105  int CntIdx = TII->getOperandIdx(AMDGPU::CF_ALU, AMDGPU::OpName::COUNT);
106  unsigned RootInstCount = getCFAluSize(RootCFAlu),
107  LaterInstCount = getCFAluSize(LatrCFAlu);
108  unsigned CumuledInsts = RootInstCount + LaterInstCount;
109  if (CumuledInsts >= TII->getMaxAlusPerClause()) {
110  DEBUG(dbgs() << "Excess inst counts\n");
111  return false;
112  }
113  if (RootCFAlu->getOpcode() == AMDGPU::CF_ALU_PUSH_BEFORE)
114  return false;
115  // Is KCache Bank 0 compatible ?
116  int Mode0Idx =
117  TII->getOperandIdx(AMDGPU::CF_ALU, AMDGPU::OpName::KCACHE_MODE0);
118  int KBank0Idx =
119  TII->getOperandIdx(AMDGPU::CF_ALU, AMDGPU::OpName::KCACHE_BANK0);
120  int KBank0LineIdx =
121  TII->getOperandIdx(AMDGPU::CF_ALU, AMDGPU::OpName::KCACHE_ADDR0);
122  if (LatrCFAlu->getOperand(Mode0Idx).getImm() &&
123  RootCFAlu->getOperand(Mode0Idx).getImm() &&
124  (LatrCFAlu->getOperand(KBank0Idx).getImm() !=
125  RootCFAlu->getOperand(KBank0Idx).getImm() ||
126  LatrCFAlu->getOperand(KBank0LineIdx).getImm() !=
127  RootCFAlu->getOperand(KBank0LineIdx).getImm())) {
128  DEBUG(dbgs() << "Wrong KC0\n");
129  return false;
130  }
131  // Is KCache Bank 1 compatible ?
132  int Mode1Idx =
133  TII->getOperandIdx(AMDGPU::CF_ALU, AMDGPU::OpName::KCACHE_MODE1);
134  int KBank1Idx =
135  TII->getOperandIdx(AMDGPU::CF_ALU, AMDGPU::OpName::KCACHE_BANK1);
136  int KBank1LineIdx =
137  TII->getOperandIdx(AMDGPU::CF_ALU, AMDGPU::OpName::KCACHE_ADDR1);
138  if (LatrCFAlu->getOperand(Mode1Idx).getImm() &&
139  RootCFAlu->getOperand(Mode1Idx).getImm() &&
140  (LatrCFAlu->getOperand(KBank1Idx).getImm() !=
141  RootCFAlu->getOperand(KBank1Idx).getImm() ||
142  LatrCFAlu->getOperand(KBank1LineIdx).getImm() !=
143  RootCFAlu->getOperand(KBank1LineIdx).getImm())) {
144  DEBUG(dbgs() << "Wrong KC0\n");
145  return false;
146  }
147  if (LatrCFAlu->getOperand(Mode0Idx).getImm()) {
148  RootCFAlu->getOperand(Mode0Idx).setImm(
149  LatrCFAlu->getOperand(Mode0Idx).getImm());
150  RootCFAlu->getOperand(KBank0Idx).setImm(
151  LatrCFAlu->getOperand(KBank0Idx).getImm());
152  RootCFAlu->getOperand(KBank0LineIdx).setImm(
153  LatrCFAlu->getOperand(KBank0LineIdx).getImm());
154  }
155  if (LatrCFAlu->getOperand(Mode1Idx).getImm()) {
156  RootCFAlu->getOperand(Mode1Idx).setImm(
157  LatrCFAlu->getOperand(Mode1Idx).getImm());
158  RootCFAlu->getOperand(KBank1Idx).setImm(
159  LatrCFAlu->getOperand(KBank1Idx).getImm());
160  RootCFAlu->getOperand(KBank1LineIdx).setImm(
161  LatrCFAlu->getOperand(KBank1LineIdx).getImm());
162  }
163  RootCFAlu->getOperand(CntIdx).setImm(CumuledInsts);
164  RootCFAlu->setDesc(TII->get(LatrCFAlu->getOpcode()));
165  return true;
166 }
167 
168 bool R600ClauseMergePass::runOnMachineFunction(MachineFunction &MF) {
169  TII = static_cast<const R600InstrInfo *>(MF.getTarget().getInstrInfo());
170  for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end();
171  BB != BB_E; ++BB) {
172  MachineBasicBlock &MBB = *BB;
173  MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
174  MachineBasicBlock::iterator LatestCFAlu = E;
175  while (I != E) {
176  MachineInstr *MI = I++;
177  if ((!TII->canBeConsideredALU(MI) && !isCFAlu(MI)) ||
178  TII->mustBeLastInClause(MI->getOpcode()))
179  LatestCFAlu = E;
180  if (!isCFAlu(MI))
181  continue;
182  cleanPotentialDisabledCFAlu(MI);
183 
184  if (LatestCFAlu != E && mergeIfPossible(LatestCFAlu, MI)) {
185  MI->eraseFromParent();
186  } else {
187  assert(MI->getOperand(8).getImm() && "CF ALU instruction disabled");
188  LatestCFAlu = MI;
189  }
190  }
191  }
192  return false;
193 }
194 
195 const char *R600ClauseMergePass::getPassName() const {
196  return "R600 Merge Clause Markers Pass";
197 }
198 
199 } // end anonymous namespace
200 
201 
203  return new R600ClauseMergePass(TM);
204 }
Interface definition for R600InstrInfo.
Interface definition for R600RegisterInfo.
const HexagonInstrInfo * TII
ID
LLVM Calling Convention Representation.
Definition: CallingConv.h:26
static cl::opt< bool > Enabled("stats", cl::desc("Enable statistics output from program (available with Asserts)"))
int getOpcode() const
Definition: MachineInstr.h:261
FunctionPass * createR600ClauseMergePass(TargetMachine &tm)
int64_t getImm() const
const MachineBasicBlock * getParent() const
Definition: MachineInstr.h:119
bundle_iterator< MachineInstr, instr_iterator > iterator
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:267
void setImm(int64_t immVal)
virtual const TargetInstrInfo * getInstrInfo() const
void setDesc(const MCInstrDesc &tid)
Definition: MachineInstr.h:984
raw_ostream & dbgs()
dbgs - Return a circular-buffered debug stream.
Definition: Debug.cpp:101
#define I(x, y, z)
Definition: MD5.cpp:54
const TargetMachine & getTarget() const
BasicBlockListType::iterator iterator
#define DEBUG(X)
Definition: Debug.h:97