LLVM API Documentation

 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
R600OptimizeVectorRegisters.cpp
Go to the documentation of this file.
1 //===--------------------- R600MergeVectorRegisters.cpp -------------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 /// \file
11 /// This pass merges inputs of swizzeable instructions into vector sharing
12 /// common data and/or have enough undef subreg using swizzle abilities.
13 ///
14 /// For instance let's consider the following pseudo code :
15 /// vreg5<def> = REG_SEQ vreg1, sub0, vreg2, sub1, vreg3, sub2, undef, sub3
16 /// ...
17 /// vreg7<def> = REG_SEQ vreg1, sub0, vreg3, sub1, undef, sub2, vreg4, sub3
18 /// (swizzable Inst) vreg7, SwizzleMask : sub0, sub1, sub2, sub3
19 ///
20 /// is turned into :
21 /// vreg5<def> = REG_SEQ vreg1, sub0, vreg2, sub1, vreg3, sub2, undef, sub3
22 /// ...
23 /// vreg7<def> = INSERT_SUBREG vreg4, sub3
24 /// (swizzable Inst) vreg7, SwizzleMask : sub0, sub2, sub1, sub3
25 ///
26 /// This allow regalloc to reduce register pressure for vector registers and
27 /// to reduce MOV count.
28 //===----------------------------------------------------------------------===//
29 
30 #define DEBUG_TYPE "vec-merger"
31 #include "llvm/Support/Debug.h"
32 #include "AMDGPU.h"
33 #include "R600InstrInfo.h"
38 #include "llvm/CodeGen/Passes.h"
42 
43 using namespace llvm;
44 
45 namespace {
46 
47 static bool
48 isImplicitlyDef(MachineRegisterInfo &MRI, unsigned Reg) {
50  E = MRI.def_end(); It != E; ++It) {
51  return (*It).isImplicitDef();
52  }
53  if (MRI.isReserved(Reg)) {
54  return false;
55  }
56  llvm_unreachable("Reg without a def");
57  return false;
58 }
59 
60 class RegSeqInfo {
61 public:
62  MachineInstr *Instr;
64  std::vector<unsigned> UndefReg;
65  RegSeqInfo(MachineRegisterInfo &MRI, MachineInstr *MI) : Instr(MI) {
66  assert (MI->getOpcode() == AMDGPU::REG_SEQUENCE);
67  for (unsigned i = 1, e = Instr->getNumOperands(); i < e; i+=2) {
68  MachineOperand &MO = Instr->getOperand(i);
69  unsigned Chan = Instr->getOperand(i + 1).getImm();
70  if (isImplicitlyDef(MRI, MO.getReg()))
71  UndefReg.push_back(Chan);
72  else
73  RegToChan[MO.getReg()] = Chan;
74  }
75  }
76  RegSeqInfo() {}
77 
78  bool operator==(const RegSeqInfo &RSI) const {
79  return RSI.Instr == Instr;
80  }
81 };
82 
83 class R600VectorRegMerger : public MachineFunctionPass {
84 private:
86  const R600InstrInfo *TII;
87  bool canSwizzle(const MachineInstr &) const;
88  bool areAllUsesSwizzeable(unsigned Reg) const;
89  void SwizzleInput(MachineInstr &,
90  const std::vector<std::pair<unsigned, unsigned> > &) const;
91  bool tryMergeVector(const RegSeqInfo *, RegSeqInfo *,
92  std::vector<std::pair<unsigned, unsigned> > &Remap) const;
93  bool tryMergeUsingCommonSlot(RegSeqInfo &RSI, RegSeqInfo &CompatibleRSI,
94  std::vector<std::pair<unsigned, unsigned> > &RemapChan);
95  bool tryMergeUsingFreeSlot(RegSeqInfo &RSI, RegSeqInfo &CompatibleRSI,
96  std::vector<std::pair<unsigned, unsigned> > &RemapChan);
97  MachineInstr *RebuildVector(RegSeqInfo *MI,
98  const RegSeqInfo *BaseVec,
99  const std::vector<std::pair<unsigned, unsigned> > &RemapChan) const;
100  void RemoveMI(MachineInstr *);
101  void trackRSI(const RegSeqInfo &RSI);
102 
103  typedef DenseMap<unsigned, std::vector<MachineInstr *> > InstructionSetMap;
105  InstructionSetMap PreviousRegSeqByReg;
106  InstructionSetMap PreviousRegSeqByUndefCount;
107 public:
108  static char ID;
109  R600VectorRegMerger(TargetMachine &tm) : MachineFunctionPass(ID),
110  TII(0) { }
111 
112  void getAnalysisUsage(AnalysisUsage &AU) const {
113  AU.setPreservesCFG();
119  }
120 
121  const char *getPassName() const {
122  return "R600 Vector Registers Merge Pass";
123  }
124 
125  bool runOnMachineFunction(MachineFunction &Fn);
126 };
127 
128 char R600VectorRegMerger::ID = 0;
129 
130 bool R600VectorRegMerger::canSwizzle(const MachineInstr &MI)
131  const {
132  if (TII->get(MI.getOpcode()).TSFlags & R600_InstFlag::TEX_INST)
133  return true;
134  switch (MI.getOpcode()) {
135  case AMDGPU::R600_ExportSwz:
136  case AMDGPU::EG_ExportSwz:
137  return true;
138  default:
139  return false;
140  }
141 }
142 
143 bool R600VectorRegMerger::tryMergeVector(const RegSeqInfo *Untouched,
144  RegSeqInfo *ToMerge, std::vector< std::pair<unsigned, unsigned> > &Remap)
145  const {
146  unsigned CurrentUndexIdx = 0;
147  for (DenseMap<unsigned, unsigned>::iterator It = ToMerge->RegToChan.begin(),
148  E = ToMerge->RegToChan.end(); It != E; ++It) {
150  Untouched->RegToChan.find((*It).first);
151  if (PosInUntouched != Untouched->RegToChan.end()) {
152  Remap.push_back(std::pair<unsigned, unsigned>
153  ((*It).second, (*PosInUntouched).second));
154  continue;
155  }
156  if (CurrentUndexIdx >= Untouched->UndefReg.size())
157  return false;
158  Remap.push_back(std::pair<unsigned, unsigned>
159  ((*It).second, Untouched->UndefReg[CurrentUndexIdx++]));
160  }
161 
162  return true;
163 }
164 
165 static
166 unsigned getReassignedChan(
167  const std::vector<std::pair<unsigned, unsigned> > &RemapChan,
168  unsigned Chan) {
169  for (unsigned j = 0, je = RemapChan.size(); j < je; j++) {
170  if (RemapChan[j].first == Chan)
171  return RemapChan[j].second;
172  }
173  llvm_unreachable("Chan wasn't reassigned");
174 }
175 
176 MachineInstr *R600VectorRegMerger::RebuildVector(
177  RegSeqInfo *RSI, const RegSeqInfo *BaseRSI,
178  const std::vector<std::pair<unsigned, unsigned> > &RemapChan) const {
179  unsigned Reg = RSI->Instr->getOperand(0).getReg();
180  MachineBasicBlock::iterator Pos = RSI->Instr;
181  MachineBasicBlock &MBB = *Pos->getParent();
182  DebugLoc DL = Pos->getDebugLoc();
183 
184  unsigned SrcVec = BaseRSI->Instr->getOperand(0).getReg();
185  DenseMap<unsigned, unsigned> UpdatedRegToChan = BaseRSI->RegToChan;
186  std::vector<unsigned> UpdatedUndef = BaseRSI->UndefReg;
187  for (DenseMap<unsigned, unsigned>::iterator It = RSI->RegToChan.begin(),
188  E = RSI->RegToChan.end(); It != E; ++It) {
189  unsigned DstReg = MRI->createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
190  unsigned SubReg = (*It).first;
191  unsigned Swizzle = (*It).second;
192  unsigned Chan = getReassignedChan(RemapChan, Swizzle);
193 
194  MachineInstr *Tmp = BuildMI(MBB, Pos, DL, TII->get(AMDGPU::INSERT_SUBREG),
195  DstReg)
196  .addReg(SrcVec)
197  .addReg(SubReg)
198  .addImm(Chan);
199  UpdatedRegToChan[SubReg] = Chan;
200  std::vector<unsigned>::iterator ChanPos =
201  std::find(UpdatedUndef.begin(), UpdatedUndef.end(), Chan);
202  if (ChanPos != UpdatedUndef.end())
203  UpdatedUndef.erase(ChanPos);
204  assert(std::find(UpdatedUndef.begin(), UpdatedUndef.end(), Chan) ==
205  UpdatedUndef.end() &&
206  "UpdatedUndef shouldn't contain Chan more than once!");
207  DEBUG(dbgs() << " ->"; Tmp->dump(););
208  (void)Tmp;
209  SrcVec = DstReg;
210  }
211  Pos = BuildMI(MBB, Pos, DL, TII->get(AMDGPU::COPY), Reg)
212  .addReg(SrcVec);
213  DEBUG(dbgs() << " ->"; Pos->dump(););
214 
215  DEBUG(dbgs() << " Updating Swizzle:\n");
216  for (MachineRegisterInfo::use_iterator It = MRI->use_begin(Reg),
217  E = MRI->use_end(); It != E; ++It) {
218  DEBUG(dbgs() << " ";(*It).dump(); dbgs() << " ->");
219  SwizzleInput(*It, RemapChan);
220  DEBUG((*It).dump());
221  }
222  RSI->Instr->eraseFromParent();
223 
224  // Update RSI
225  RSI->Instr = Pos;
226  RSI->RegToChan = UpdatedRegToChan;
227  RSI->UndefReg = UpdatedUndef;
228 
229  return Pos;
230 }
231 
232 void R600VectorRegMerger::RemoveMI(MachineInstr *MI) {
233  for (InstructionSetMap::iterator It = PreviousRegSeqByReg.begin(),
234  E = PreviousRegSeqByReg.end(); It != E; ++It) {
235  std::vector<MachineInstr *> &MIs = (*It).second;
236  MIs.erase(std::find(MIs.begin(), MIs.end(), MI), MIs.end());
237  }
238  for (InstructionSetMap::iterator It = PreviousRegSeqByUndefCount.begin(),
239  E = PreviousRegSeqByUndefCount.end(); It != E; ++It) {
240  std::vector<MachineInstr *> &MIs = (*It).second;
241  MIs.erase(std::find(MIs.begin(), MIs.end(), MI), MIs.end());
242  }
243 }
244 
245 void R600VectorRegMerger::SwizzleInput(MachineInstr &MI,
246  const std::vector<std::pair<unsigned, unsigned> > &RemapChan) const {
247  unsigned Offset;
248  if (TII->get(MI.getOpcode()).TSFlags & R600_InstFlag::TEX_INST)
249  Offset = 2;
250  else
251  Offset = 3;
252  for (unsigned i = 0; i < 4; i++) {
253  unsigned Swizzle = MI.getOperand(i + Offset).getImm() + 1;
254  for (unsigned j = 0, e = RemapChan.size(); j < e; j++) {
255  if (RemapChan[j].first == Swizzle) {
256  MI.getOperand(i + Offset).setImm(RemapChan[j].second - 1);
257  break;
258  }
259  }
260  }
261 }
262 
263 bool R600VectorRegMerger::areAllUsesSwizzeable(unsigned Reg) const {
264  for (MachineRegisterInfo::use_iterator It = MRI->use_begin(Reg),
265  E = MRI->use_end(); It != E; ++It) {
266  if (!canSwizzle(*It))
267  return false;
268  }
269  return true;
270 }
271 
272 bool R600VectorRegMerger::tryMergeUsingCommonSlot(RegSeqInfo &RSI,
273  RegSeqInfo &CompatibleRSI,
274  std::vector<std::pair<unsigned, unsigned> > &RemapChan) {
275  for (MachineInstr::mop_iterator MOp = RSI.Instr->operands_begin(),
276  MOE = RSI.Instr->operands_end(); MOp != MOE; ++MOp) {
277  if (!MOp->isReg())
278  continue;
279  if (PreviousRegSeqByReg[MOp->getReg()].empty())
280  continue;
281  std::vector<MachineInstr *> MIs = PreviousRegSeqByReg[MOp->getReg()];
282  for (unsigned i = 0, e = MIs.size(); i < e; i++) {
283  CompatibleRSI = PreviousRegSeq[MIs[i]];
284  if (RSI == CompatibleRSI)
285  continue;
286  if (tryMergeVector(&CompatibleRSI, &RSI, RemapChan))
287  return true;
288  }
289  }
290  return false;
291 }
292 
293 bool R600VectorRegMerger::tryMergeUsingFreeSlot(RegSeqInfo &RSI,
294  RegSeqInfo &CompatibleRSI,
295  std::vector<std::pair<unsigned, unsigned> > &RemapChan) {
296  unsigned NeededUndefs = 4 - RSI.UndefReg.size();
297  if (PreviousRegSeqByUndefCount[NeededUndefs].empty())
298  return false;
299  std::vector<MachineInstr *> &MIs =
300  PreviousRegSeqByUndefCount[NeededUndefs];
301  CompatibleRSI = PreviousRegSeq[MIs.back()];
302  tryMergeVector(&CompatibleRSI, &RSI, RemapChan);
303  return true;
304 }
305 
306 void R600VectorRegMerger::trackRSI(const RegSeqInfo &RSI) {
308  It = RSI.RegToChan.begin(), E = RSI.RegToChan.end(); It != E; ++It) {
309  PreviousRegSeqByReg[(*It).first].push_back(RSI.Instr);
310  }
311  PreviousRegSeqByUndefCount[RSI.UndefReg.size()].push_back(RSI.Instr);
312  PreviousRegSeq[RSI.Instr] = RSI;
313 }
314 
315 bool R600VectorRegMerger::runOnMachineFunction(MachineFunction &Fn) {
316  TII = static_cast<const R600InstrInfo *>(Fn.getTarget().getInstrInfo());
317  MRI = &(Fn.getRegInfo());
318  for (MachineFunction::iterator MBB = Fn.begin(), MBBe = Fn.end();
319  MBB != MBBe; ++MBB) {
320  MachineBasicBlock *MB = MBB;
321  PreviousRegSeq.clear();
322  PreviousRegSeqByReg.clear();
323  PreviousRegSeqByUndefCount.clear();
324 
325  for (MachineBasicBlock::iterator MII = MB->begin(), MIIE = MB->end();
326  MII != MIIE; ++MII) {
327  MachineInstr *MI = MII;
328  if (MI->getOpcode() != AMDGPU::REG_SEQUENCE) {
329  if (TII->get(MI->getOpcode()).TSFlags & R600_InstFlag::TEX_INST) {
330  unsigned Reg = MI->getOperand(1).getReg();
331  for (MachineRegisterInfo::def_iterator It = MRI->def_begin(Reg),
332  E = MRI->def_end(); It != E; ++It) {
333  RemoveMI(&(*It));
334  }
335  }
336  continue;
337  }
338 
339 
340  RegSeqInfo RSI(*MRI, MI);
341 
342  // All uses of MI are swizzeable ?
343  unsigned Reg = MI->getOperand(0).getReg();
344  if (!areAllUsesSwizzeable(Reg))
345  continue;
346 
347  DEBUG (dbgs() << "Trying to optimize ";
348  MI->dump();
349  );
350 
351  RegSeqInfo CandidateRSI;
352  std::vector<std::pair<unsigned, unsigned> > RemapChan;
353  DEBUG(dbgs() << "Using common slots...\n";);
354  if (tryMergeUsingCommonSlot(RSI, CandidateRSI, RemapChan)) {
355  // Remove CandidateRSI mapping
356  RemoveMI(CandidateRSI.Instr);
357  MII = RebuildVector(&RSI, &CandidateRSI, RemapChan);
358  trackRSI(RSI);
359  continue;
360  }
361  DEBUG(dbgs() << "Using free slots...\n";);
362  RemapChan.clear();
363  if (tryMergeUsingFreeSlot(RSI, CandidateRSI, RemapChan)) {
364  RemoveMI(CandidateRSI.Instr);
365  MII = RebuildVector(&RSI, &CandidateRSI, RemapChan);
366  trackRSI(RSI);
367  continue;
368  }
369  //Failed to merge
370  trackRSI(RSI);
371  }
372  }
373  return false;
374 }
375 
376 }
377 
379  return new R600VectorRegMerger(tm);
380 }
const MachineFunction * getParent() const
AnalysisUsage & addPreserved()
Interface definition for R600InstrInfo.
unsigned createVirtualRegister(const TargetRegisterClass *RegClass)
AnalysisUsage & addRequired()
static use_iterator use_end()
const HexagonInstrInfo * TII
#define llvm_unreachable(msg)
FunctionPass * createR600VectorRegMerger(TargetMachine &tm)
ID
LLVM Calling Convention Representation.
Definition: CallingConv.h:26
const MachineInstrBuilder & addImm(int64_t Val) const
int getOpcode() const
Definition: MachineInstr.h:261
int64_t getImm() const
bundle_iterator< MachineInstr, instr_iterator > iterator
bool isReserved(unsigned PhysReg) const
const MCInstrInfo & MII
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:267
void setImm(int64_t immVal)
MachineInstrBuilder BuildMI(MachineFunction &MF, DebugLoc DL, const MCInstrDesc &MCID)
virtual const TargetInstrInfo * getInstrInfo() const
void setPreservesCFG()
Definition: Pass.cpp:249
raw_ostream & dbgs()
dbgs - Return a circular-buffered debug stream.
Definition: Debug.cpp:101
void dump() const
def_iterator def_begin(unsigned RegNo) const
MachineRegisterInfo & getRegInfo()
virtual void getAnalysisUsage(AnalysisUsage &AU) const
use_iterator use_begin(unsigned RegNo) const
const TargetMachine & getTarget() const
static std::vector< std::pair< int, unsigned > > Swizzle(std::vector< std::pair< int, unsigned > > Src, R600InstrInfo::BankSwizzle Swz)
unsigned getReg() const
getReg - Returns the register number.
static def_iterator def_end()
BasicBlockListType::iterator iterator
#define DEBUG(X)
Definition: Debug.h:97
const MCRegisterInfo & MRI
bool operator==(uint64_t V1, const APInt &V2)
Definition: APInt.h:1684
const MachineInstrBuilder & addReg(unsigned RegNo, unsigned flags=0, unsigned SubReg=0) const