LLVM API Documentation

 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
R600Packetizer.cpp
Go to the documentation of this file.
1 //===----- R600Packetizer.cpp - VLIW packetizer ---------------------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 /// \file
11 /// This pass implements instructions packetization for R600. It unsets isLast
12 /// bit of instructions inside a bundle and substitutes src register with
13 /// PreviousVector when applicable.
14 //
15 //===----------------------------------------------------------------------===//
16 
17 #define DEBUG_TYPE "packets"
18 #include "llvm/Support/Debug.h"
19 #include "AMDGPU.h"
20 #include "R600InstrInfo.h"
25 #include "llvm/CodeGen/Passes.h"
28 
29 using namespace llvm;
30 
31 namespace {
32 
33 class R600Packetizer : public MachineFunctionPass {
34 
35 public:
36  static char ID;
37  R600Packetizer(const TargetMachine &TM) : MachineFunctionPass(ID) {}
38 
39  void getAnalysisUsage(AnalysisUsage &AU) const {
40  AU.setPreservesCFG();
46  }
47 
48  const char *getPassName() const {
49  return "R600 Packetizer";
50  }
51 
52  bool runOnMachineFunction(MachineFunction &Fn);
53 };
54 char R600Packetizer::ID = 0;
55 
56 class R600PacketizerList : public VLIWPacketizerList {
57 
58 private:
59  const R600InstrInfo *TII;
60  const R600RegisterInfo &TRI;
61  bool VLIW5;
62  bool ConsideredInstUsesAlreadyWrittenVectorElement;
63 
64  unsigned getSlot(const MachineInstr *MI) const {
65  return TRI.getHWRegChan(MI->getOperand(0).getReg());
66  }
67 
68  /// \returns register to PV chan mapping for bundle/single instructions that
69  /// immediatly precedes I.
71  const {
73  I--;
74  if (!TII->isALUInstr(I->getOpcode()) && !I->isBundle())
75  return Result;
76  MachineBasicBlock::instr_iterator BI = I.getInstrIterator();
77  if (I->isBundle())
78  BI++;
79  int LastDstChan = -1;
80  do {
81  bool isTrans = false;
82  int BISlot = getSlot(BI);
83  if (LastDstChan >= BISlot)
84  isTrans = true;
85  LastDstChan = BISlot;
86  if (TII->isPredicated(BI))
87  continue;
88  int OperandIdx = TII->getOperandIdx(BI->getOpcode(), AMDGPU::OpName::write);
89  if (OperandIdx > -1 && BI->getOperand(OperandIdx).getImm() == 0)
90  continue;
91  int DstIdx = TII->getOperandIdx(BI->getOpcode(), AMDGPU::OpName::dst);
92  if (DstIdx == -1) {
93  continue;
94  }
95  unsigned Dst = BI->getOperand(DstIdx).getReg();
96  if (isTrans || TII->isTransOnly(BI)) {
97  Result[Dst] = AMDGPU::PS;
98  continue;
99  }
100  if (BI->getOpcode() == AMDGPU::DOT4_r600 ||
101  BI->getOpcode() == AMDGPU::DOT4_eg) {
102  Result[Dst] = AMDGPU::PV_X;
103  continue;
104  }
105  if (Dst == AMDGPU::OQAP) {
106  continue;
107  }
108  unsigned PVReg = 0;
109  switch (TRI.getHWRegChan(Dst)) {
110  case 0:
111  PVReg = AMDGPU::PV_X;
112  break;
113  case 1:
114  PVReg = AMDGPU::PV_Y;
115  break;
116  case 2:
117  PVReg = AMDGPU::PV_Z;
118  break;
119  case 3:
120  PVReg = AMDGPU::PV_W;
121  break;
122  default:
123  llvm_unreachable("Invalid Chan");
124  }
125  Result[Dst] = PVReg;
126  } while ((++BI)->isBundledWithPred());
127  return Result;
128  }
129 
130  void substitutePV(MachineInstr *MI, const DenseMap<unsigned, unsigned> &PVs)
131  const {
132  unsigned Ops[] = {
133  AMDGPU::OpName::src0,
134  AMDGPU::OpName::src1,
135  AMDGPU::OpName::src2
136  };
137  for (unsigned i = 0; i < 3; i++) {
138  int OperandIdx = TII->getOperandIdx(MI->getOpcode(), Ops[i]);
139  if (OperandIdx < 0)
140  continue;
141  unsigned Src = MI->getOperand(OperandIdx).getReg();
143  if (It != PVs.end())
144  MI->getOperand(OperandIdx).setReg(It->second);
145  }
146  }
147 public:
148  // Ctor.
149  R600PacketizerList(MachineFunction &MF, MachineLoopInfo &MLI,
151  : VLIWPacketizerList(MF, MLI, MDT, true),
152  TII (static_cast<const R600InstrInfo *>(MF.getTarget().getInstrInfo())),
153  TRI(TII->getRegisterInfo()) {
154  VLIW5 = !MF.getTarget().getSubtarget<AMDGPUSubtarget>().hasCaymanISA();
155  }
156 
157  // initPacketizerState - initialize some internal flags.
158  void initPacketizerState() {
159  ConsideredInstUsesAlreadyWrittenVectorElement = false;
160  }
161 
162  // ignorePseudoInstruction - Ignore bundling of pseudo instructions.
163  bool ignorePseudoInstruction(MachineInstr *MI, MachineBasicBlock *MBB) {
164  return false;
165  }
166 
167  // isSoloInstruction - return true if instruction MI can not be packetized
168  // with any other instruction, which means that MI itself is a packet.
169  bool isSoloInstruction(MachineInstr *MI) {
170  if (TII->isVector(*MI))
171  return true;
172  if (!TII->isALUInstr(MI->getOpcode()))
173  return true;
174  if (MI->getOpcode() == AMDGPU::GROUP_BARRIER)
175  return true;
176  // XXX: This can be removed once the packetizer properly handles all the
177  // LDS instruction group restrictions.
178  if (TII->isLDSInstr(MI->getOpcode()))
179  return true;
180  return false;
181  }
182 
183  // isLegalToPacketizeTogether - Is it legal to packetize SUI and SUJ
184  // together.
185  bool isLegalToPacketizeTogether(SUnit *SUI, SUnit *SUJ) {
186  MachineInstr *MII = SUI->getInstr(), *MIJ = SUJ->getInstr();
187  if (getSlot(MII) == getSlot(MIJ))
188  ConsideredInstUsesAlreadyWrittenVectorElement = true;
189  // Does MII and MIJ share the same pred_sel ?
190  int OpI = TII->getOperandIdx(MII->getOpcode(), AMDGPU::OpName::pred_sel),
191  OpJ = TII->getOperandIdx(MIJ->getOpcode(), AMDGPU::OpName::pred_sel);
192  unsigned PredI = (OpI > -1)?MII->getOperand(OpI).getReg():0,
193  PredJ = (OpJ > -1)?MIJ->getOperand(OpJ).getReg():0;
194  if (PredI != PredJ)
195  return false;
196  if (SUJ->isSucc(SUI)) {
197  for (unsigned i = 0, e = SUJ->Succs.size(); i < e; ++i) {
198  const SDep &Dep = SUJ->Succs[i];
199  if (Dep.getSUnit() != SUI)
200  continue;
201  if (Dep.getKind() == SDep::Anti)
202  continue;
203  if (Dep.getKind() == SDep::Output)
204  if (MII->getOperand(0).getReg() != MIJ->getOperand(0).getReg())
205  continue;
206  return false;
207  }
208  }
209 
210  bool ARDef = TII->definesAddressRegister(MII) ||
211  TII->definesAddressRegister(MIJ);
212  bool ARUse = TII->usesAddressRegister(MII) ||
213  TII->usesAddressRegister(MIJ);
214  if (ARDef && ARUse)
215  return false;
216 
217  return true;
218  }
219 
220  // isLegalToPruneDependencies - Is it legal to prune dependece between SUI
221  // and SUJ.
222  bool isLegalToPruneDependencies(SUnit *SUI, SUnit *SUJ) {return false;}
223 
224  void setIsLastBit(MachineInstr *MI, unsigned Bit) const {
225  unsigned LastOp = TII->getOperandIdx(MI->getOpcode(), AMDGPU::OpName::last);
226  MI->getOperand(LastOp).setImm(Bit);
227  }
228 
229  bool isBundlableWithCurrentPMI(MachineInstr *MI,
231  std::vector<R600InstrInfo::BankSwizzle> &BS,
232  bool &isTransSlot) {
233  isTransSlot = TII->isTransOnly(MI);
234  assert (!isTransSlot || VLIW5);
235 
236  // Is the dst reg sequence legal ?
237  if (!isTransSlot && !CurrentPacketMIs.empty()) {
238  if (getSlot(MI) <= getSlot(CurrentPacketMIs.back())) {
239  if (ConsideredInstUsesAlreadyWrittenVectorElement &&
240  !TII->isVectorOnly(MI) && VLIW5) {
241  isTransSlot = true;
242  DEBUG(dbgs() << "Considering as Trans Inst :"; MI->dump(););
243  }
244  else
245  return false;
246  }
247  }
248 
249  // Are the Constants limitations met ?
250  CurrentPacketMIs.push_back(MI);
251  if (!TII->fitsConstReadLimitations(CurrentPacketMIs)) {
252  DEBUG(
253  dbgs() << "Couldn't pack :\n";
254  MI->dump();
255  dbgs() << "with the following packets :\n";
256  for (unsigned i = 0, e = CurrentPacketMIs.size() - 1; i < e; i++) {
257  CurrentPacketMIs[i]->dump();
258  dbgs() << "\n";
259  }
260  dbgs() << "because of Consts read limitations\n";
261  );
262  CurrentPacketMIs.pop_back();
263  return false;
264  }
265 
266  // Is there a BankSwizzle set that meet Read Port limitations ?
267  if (!TII->fitsReadPortLimitations(CurrentPacketMIs,
268  PV, BS, isTransSlot)) {
269  DEBUG(
270  dbgs() << "Couldn't pack :\n";
271  MI->dump();
272  dbgs() << "with the following packets :\n";
273  for (unsigned i = 0, e = CurrentPacketMIs.size() - 1; i < e; i++) {
274  CurrentPacketMIs[i]->dump();
275  dbgs() << "\n";
276  }
277  dbgs() << "because of Read port limitations\n";
278  );
279  CurrentPacketMIs.pop_back();
280  return false;
281  }
282 
283  // We cannot read LDS source registrs from the Trans slot.
284  if (isTransSlot && TII->readsLDSSrcReg(MI))
285  return false;
286 
287  CurrentPacketMIs.pop_back();
288  return true;
289  }
290 
291  MachineBasicBlock::iterator addToPacket(MachineInstr *MI) {
292  MachineBasicBlock::iterator FirstInBundle =
293  CurrentPacketMIs.empty() ? MI : CurrentPacketMIs.front();
294  const DenseMap<unsigned, unsigned> &PV =
295  getPreviousVector(FirstInBundle);
296  std::vector<R600InstrInfo::BankSwizzle> BS;
297  bool isTransSlot;
298 
299  if (isBundlableWithCurrentPMI(MI, PV, BS, isTransSlot)) {
300  for (unsigned i = 0, e = CurrentPacketMIs.size(); i < e; i++) {
301  MachineInstr *MI = CurrentPacketMIs[i];
302  unsigned Op = TII->getOperandIdx(MI->getOpcode(),
303  AMDGPU::OpName::bank_swizzle);
304  MI->getOperand(Op).setImm(BS[i]);
305  }
306  unsigned Op = TII->getOperandIdx(MI->getOpcode(),
307  AMDGPU::OpName::bank_swizzle);
308  MI->getOperand(Op).setImm(BS.back());
309  if (!CurrentPacketMIs.empty())
310  setIsLastBit(CurrentPacketMIs.back(), 0);
311  substitutePV(MI, PV);
313  if (isTransSlot) {
314  endPacket(llvm::next(It)->getParent(), llvm::next(It));
315  }
316  return It;
317  }
318  endPacket(MI->getParent(), MI);
319  if (TII->isTransOnly(MI))
320  return MI;
322  }
323 };
324 
325 bool R600Packetizer::runOnMachineFunction(MachineFunction &Fn) {
326  const TargetInstrInfo *TII = Fn.getTarget().getInstrInfo();
327  MachineLoopInfo &MLI = getAnalysis<MachineLoopInfo>();
328  MachineDominatorTree &MDT = getAnalysis<MachineDominatorTree>();
329 
330  // Instantiate the packetizer.
331  R600PacketizerList Packetizer(Fn, MLI, MDT);
332 
333  // DFA state table should not be empty.
334  assert(Packetizer.getResourceTracker() && "Empty DFA table!");
335 
336  //
337  // Loop over all basic blocks and remove KILL pseudo-instructions
338  // These instructions confuse the dependence analysis. Consider:
339  // D0 = ... (Insn 0)
340  // R0 = KILL R0, D0 (Insn 1)
341  // R0 = ... (Insn 2)
342  // Here, Insn 1 will result in the dependence graph not emitting an output
343  // dependence between Insn 0 and Insn 2. This can lead to incorrect
344  // packetization
345  //
346  for (MachineFunction::iterator MBB = Fn.begin(), MBBe = Fn.end();
347  MBB != MBBe; ++MBB) {
348  MachineBasicBlock::iterator End = MBB->end();
349  MachineBasicBlock::iterator MI = MBB->begin();
350  while (MI != End) {
351  if (MI->isKill() || MI->getOpcode() == AMDGPU::IMPLICIT_DEF ||
352  (MI->getOpcode() == AMDGPU::CF_ALU && !MI->getOperand(8).getImm())) {
353  MachineBasicBlock::iterator DeleteMI = MI;
354  ++MI;
355  MBB->erase(DeleteMI);
356  End = MBB->end();
357  continue;
358  }
359  ++MI;
360  }
361  }
362 
363  // Loop over all of the basic blocks.
364  for (MachineFunction::iterator MBB = Fn.begin(), MBBe = Fn.end();
365  MBB != MBBe; ++MBB) {
366  // Find scheduling regions and schedule / packetize each region.
367  unsigned RemainingCount = MBB->size();
368  for(MachineBasicBlock::iterator RegionEnd = MBB->end();
369  RegionEnd != MBB->begin();) {
370  // The next region starts above the previous region. Look backward in the
371  // instruction stream until we find the nearest boundary.
372  MachineBasicBlock::iterator I = RegionEnd;
373  for(;I != MBB->begin(); --I, --RemainingCount) {
374  if (TII->isSchedulingBoundary(llvm::prior(I), MBB, Fn))
375  break;
376  }
377  I = MBB->begin();
378 
379  // Skip empty scheduling regions.
380  if (I == RegionEnd) {
381  RegionEnd = llvm::prior(RegionEnd);
382  --RemainingCount;
383  continue;
384  }
385  // Skip regions with one instruction.
386  if (I == llvm::prior(RegionEnd)) {
387  RegionEnd = llvm::prior(RegionEnd);
388  continue;
389  }
390 
391  Packetizer.PacketizeMIs(MBB, I, RegionEnd);
392  RegionEnd = I;
393  }
394  }
395 
396  return true;
397 
398 }
399 
400 } // end anonymous namespace
401 
403  return new R600Packetizer(tm);
404 }
AnalysisUsage & addPreserved()
bool isSucc(SUnit *N)
isSucc - Test if node N is a successor of this node.
Definition: ScheduleDAG.h:446
Interface definition for R600InstrInfo.
unsigned getHWRegChan(unsigned reg) const
get the HW encoding for a register's channel.
Hexagon Packetizer
MachineInstr * getInstr() const
Definition: ScheduleDAG.h:386
virtual bool isPredicated(const MachineInstr *MI) const
virtual bool isSchedulingBoundary(const MachineInstr *MI, const MachineBasicBlock *MBB, const MachineFunction &MF) const
Instructions::iterator instr_iterator
A register anti-dependedence (aka WAR).
Definition: ScheduleDAG.h:50
AnalysisUsage & addRequired()
const HexagonInstrInfo * TII
#define llvm_unreachable(msg)
ID
LLVM Calling Convention Representation.
Definition: CallingConv.h:26
A register output-dependence (aka WAW).
Definition: ScheduleDAG.h:51
int getOpcode() const
Definition: MachineInstr.h:261
const MachineBasicBlock * getParent() const
Definition: MachineInstr.h:119
bundle_iterator< MachineInstr, instr_iterator > iterator
const MCInstrInfo & MII
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:267
FunctionPass * createR600Packetizer(TargetMachine &tm)
ItTy next(ItTy it, Dist n)
Definition: STLExtras.h:154
void setImm(int64_t immVal)
iterator end()
Definition: DenseMap.h:57
virtual const TargetInstrInfo * getInstrInfo() const
const STC & getSubtarget() const
void setPreservesCFG()
Definition: Pass.cpp:249
raw_ostream & dbgs()
dbgs - Return a circular-buffered debug stream.
Definition: Debug.cpp:101
void dump() const
SUnit * getSUnit() const
Definition: ScheduleDAG.h:160
ssize_t write(int fildes, const void *buf, size_t nbyte);
virtual void getAnalysisUsage(AnalysisUsage &AU) const
IMPLICIT_DEF - This is the MachineInstr-level equivalent of undef.
Definition: TargetOpcodes.h:52
void setReg(unsigned Reg)
#define I(x, y, z)
Definition: MD5.cpp:54
const TargetMachine & getTarget() const
Kind getKind() const
getKind - Return an enum value representing the kind of the dependence.
Definition: ScheduleDAG.h:170
unsigned getReg() const
getReg - Returns the register number.
virtual const HexagonRegisterInfo & getRegisterInfo() const
SmallVector< SDep, 4 > Succs
Definition: ScheduleDAG.h:264
static const Function * getParent(const Value *V)
BasicBlockListType::iterator iterator
ItTy prior(ItTy it, Dist n)
Definition: STLExtras.h:167
#define DEBUG(X)
Definition: Debug.h:97
iterator find(const KeyT &Val)
Definition: DenseMap.h:108
SUnit - Scheduling unit. This is a node in the scheduling DAG.
Definition: ScheduleDAG.h:249
virtual MachineBasicBlock::iterator addToPacket(MachineInstr *MI)