LLVM API Documentation

 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
SIFixSGPRCopies.cpp
Go to the documentation of this file.
1 //===-- SIFixSGPRCopies.cpp - Remove potential VGPR => SGPR copies --------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 /// \file
11 /// Copies from VGPR to SGPR registers are illegal and the register coalescer
12 /// will sometimes generate these illegal copies in situations like this:
13 ///
14 /// Register Class <vsrc> is the union of <vgpr> and <sgpr>
15 ///
16 /// BB0:
17 /// %vreg0 <sgpr> = SCALAR_INST
18 /// %vreg1 <vsrc> = COPY %vreg0 <sgpr>
19 /// ...
20 /// BRANCH %cond BB1, BB2
21 /// BB1:
22 /// %vreg2 <vgpr> = VECTOR_INST
23 /// %vreg3 <vsrc> = COPY %vreg2 <vgpr>
24 /// BB2:
25 /// %vreg4 <vsrc> = PHI %vreg1 <vsrc>, <BB#0>, %vreg3 <vrsc>, <BB#1>
26 /// %vreg5 <vgpr> = VECTOR_INST %vreg4 <vsrc>
27 ///
28 ///
29 /// The coalescer will begin at BB0 and eliminate its copy, then the resulting
30 /// code will look like this:
31 ///
32 /// BB0:
33 /// %vreg0 <sgpr> = SCALAR_INST
34 /// ...
35 /// BRANCH %cond BB1, BB2
36 /// BB1:
37 /// %vreg2 <vgpr> = VECTOR_INST
38 /// %vreg3 <vsrc> = COPY %vreg2 <vgpr>
39 /// BB2:
40 /// %vreg4 <sgpr> = PHI %vreg0 <sgpr>, <BB#0>, %vreg3 <vsrc>, <BB#1>
41 /// %vreg5 <vgpr> = VECTOR_INST %vreg4 <sgpr>
42 ///
43 /// Now that the result of the PHI instruction is an SGPR, the register
44 /// allocator is now forced to constrain the register class of %vreg3 to
45 /// <sgpr> so we end up with final code like this:
46 ///
47 /// BB0:
48 /// %vreg0 <sgpr> = SCALAR_INST
49 /// ...
50 /// BRANCH %cond BB1, BB2
51 /// BB1:
52 /// %vreg2 <vgpr> = VECTOR_INST
53 /// %vreg3 <sgpr> = COPY %vreg2 <vgpr>
54 /// BB2:
55 /// %vreg4 <sgpr> = PHI %vreg0 <sgpr>, <BB#0>, %vreg3 <sgpr>, <BB#1>
56 /// %vreg5 <vgpr> = VECTOR_INST %vreg4 <sgpr>
57 ///
58 /// Now this code contains an illegal copy from a VGPR to an SGPR.
59 ///
60 /// In order to avoid this problem, this pass searches for PHI instructions
61 /// which define a <vsrc> register and constrains its definition class to
62 /// <vgpr> if the user of the PHI's definition register is a vector instruction.
63 /// If the PHI's definition class is constrained to <vgpr> then the coalescer
64 /// will be unable to perform the COPY removal from the above example which
65 /// ultimately led to the creation of an illegal COPY.
66 //===----------------------------------------------------------------------===//
67 
68 #define DEBUG_TYPE "sgpr-copies"
69 #include "AMDGPU.h"
70 #include "SIInstrInfo.h"
74 #include "llvm/Support/Debug.h"
77 
78 using namespace llvm;
79 
80 namespace {
81 
82 class SIFixSGPRCopies : public MachineFunctionPass {
83 
84 private:
85  static char ID;
86  const TargetRegisterClass *inferRegClassFromUses(const SIRegisterInfo *TRI,
87  const MachineRegisterInfo &MRI,
88  unsigned Reg,
89  unsigned SubReg) const;
90  const TargetRegisterClass *inferRegClassFromDef(const SIRegisterInfo *TRI,
91  const MachineRegisterInfo &MRI,
92  unsigned Reg,
93  unsigned SubReg) const;
94  bool isVGPRToSGPRCopy(const MachineInstr &Copy, const SIRegisterInfo *TRI,
95  const MachineRegisterInfo &MRI) const;
96 
97 public:
98  SIFixSGPRCopies(TargetMachine &tm) : MachineFunctionPass(ID) { }
99 
100  virtual bool runOnMachineFunction(MachineFunction &MF);
101 
102  const char *getPassName() const {
103  return "SI Fix SGPR copies";
104  }
105 
106 };
107 
108 } // End anonymous namespace
109 
110 char SIFixSGPRCopies::ID = 0;
111 
113  return new SIFixSGPRCopies(tm);
114 }
115 
116 static bool hasVGPROperands(const MachineInstr &MI, const SIRegisterInfo *TRI) {
118  for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
119  if (!MI.getOperand(i).isReg() ||
121  continue;
122 
123  if (TRI->hasVGPRs(MRI.getRegClass(MI.getOperand(i).getReg())))
124  return true;
125  }
126  return false;
127 }
128 
129 /// This functions walks the use list of Reg until it finds an Instruction
130 /// that isn't a COPY returns the register class of that instruction.
131 /// \return The register defined by the first non-COPY instruction.
132 const TargetRegisterClass *SIFixSGPRCopies::inferRegClassFromUses(
133  const SIRegisterInfo *TRI,
134  const MachineRegisterInfo &MRI,
135  unsigned Reg,
136  unsigned SubReg) const {
137  // The Reg parameter to the function must always be defined by either a PHI
138  // or a COPY, therefore it cannot be a physical register.
140  "Reg cannot be a physical register");
141 
142  const TargetRegisterClass *RC = MRI.getRegClass(Reg);
143  RC = TRI->getSubRegClass(RC, SubReg);
145  E = MRI.use_end(); I != E; ++I) {
146  switch (I->getOpcode()) {
147  case AMDGPU::COPY:
148  RC = TRI->getCommonSubClass(RC, inferRegClassFromUses(TRI, MRI,
149  I->getOperand(0).getReg(),
150  I->getOperand(0).getSubReg()));
151  break;
152  }
153  }
154 
155  return RC;
156 }
157 
158 const TargetRegisterClass *SIFixSGPRCopies::inferRegClassFromDef(
159  const SIRegisterInfo *TRI,
160  const MachineRegisterInfo &MRI,
161  unsigned Reg,
162  unsigned SubReg) const {
164  const TargetRegisterClass *RC = TRI->getPhysRegClass(Reg);
165  return TRI->getSubRegClass(RC, SubReg);
166  }
167  MachineInstr *Def = MRI.getVRegDef(Reg);
168  if (Def->getOpcode() != AMDGPU::COPY) {
169  return TRI->getSubRegClass(MRI.getRegClass(Reg), SubReg);
170  }
171 
172  return inferRegClassFromDef(TRI, MRI, Def->getOperand(1).getReg(),
173  Def->getOperand(1).getSubReg());
174 }
175 
176 bool SIFixSGPRCopies::isVGPRToSGPRCopy(const MachineInstr &Copy,
177  const SIRegisterInfo *TRI,
178  const MachineRegisterInfo &MRI) const {
179 
180  unsigned DstReg = Copy.getOperand(0).getReg();
181  unsigned SrcReg = Copy.getOperand(1).getReg();
182  unsigned SrcSubReg = Copy.getOperand(1).getSubReg();
183  const TargetRegisterClass *DstRC = MRI.getRegClass(DstReg);
184  const TargetRegisterClass *SrcRC;
185 
187  DstRC == &AMDGPU::M0RegRegClass)
188  return false;
189 
190  SrcRC = inferRegClassFromDef(TRI, MRI, SrcReg, SrcSubReg);
191  return TRI->isSGPRClass(DstRC) && TRI->hasVGPRs(SrcRC);
192 }
193 
194 bool SIFixSGPRCopies::runOnMachineFunction(MachineFunction &MF) {
195  MachineRegisterInfo &MRI = MF.getRegInfo();
196  const SIRegisterInfo *TRI = static_cast<const SIRegisterInfo *>(
197  MF.getTarget().getRegisterInfo());
198  const SIInstrInfo *TII = static_cast<const SIInstrInfo *>(
199  MF.getTarget().getInstrInfo());
200  for (MachineFunction::iterator BI = MF.begin(), BE = MF.end();
201  BI != BE; ++BI) {
202 
203  MachineBasicBlock &MBB = *BI;
204  for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
205  I != E; ++I) {
206  MachineInstr &MI = *I;
207  if (MI.getOpcode() == AMDGPU::COPY && isVGPRToSGPRCopy(MI, TRI, MRI)) {
208  DEBUG(dbgs() << "Fixing VGPR -> SGPR copy:\n");
209  DEBUG(MI.print(dbgs()));
210  TII->moveToVALU(MI);
211 
212  }
213 
214  switch (MI.getOpcode()) {
215  default: continue;
216  case AMDGPU::PHI: {
217  DEBUG(dbgs() << " Fixing PHI:\n");
218  DEBUG(MI.print(dbgs()));
219 
220  for (unsigned i = 1; i < MI.getNumOperands(); i+=2) {
221  unsigned Reg = MI.getOperand(i).getReg();
222  const TargetRegisterClass *RC = inferRegClassFromDef(TRI, MRI, Reg,
223  MI.getOperand(0).getSubReg());
224  MRI.constrainRegClass(Reg, RC);
225  }
226  unsigned Reg = MI.getOperand(0).getReg();
227  const TargetRegisterClass *RC = inferRegClassFromUses(TRI, MRI, Reg,
228  MI.getOperand(0).getSubReg());
229  if (TRI->getCommonSubClass(RC, &AMDGPU::VReg_32RegClass)) {
230  MRI.constrainRegClass(Reg, &AMDGPU::VReg_32RegClass);
231  }
232 
233  if (!TRI->isSGPRClass(MRI.getRegClass(Reg)))
234  break;
235 
236  // If a PHI node defines an SGPR and any of its operands are VGPRs,
237  // then we need to move it to the VALU.
238  for (unsigned i = 1; i < MI.getNumOperands(); i+=2) {
239  unsigned Reg = MI.getOperand(i).getReg();
240  if (TRI->hasVGPRs(MRI.getRegClass(Reg))) {
241  TII->moveToVALU(MI);
242  break;
243  }
244  }
245 
246  break;
247  }
248  case AMDGPU::REG_SEQUENCE: {
249  if (TRI->hasVGPRs(TII->getOpRegClass(MI, 0)) ||
250  !hasVGPROperands(MI, TRI))
251  continue;
252 
253  DEBUG(dbgs() << "Fixing REG_SEQUENCE:\n");
254  DEBUG(MI.print(dbgs()));
255 
256  TII->moveToVALU(MI);
257  break;
258  }
259  }
260  }
261  }
262  return false;
263 }
const MachineFunction * getParent() const
static bool isVirtualRegister(unsigned Reg)
void moveToVALU(MachineInstr &MI) const
Replace this instruction's opcode with the equivalent VALU opcode. This function will also move the u...
bool hasVGPRs(const TargetRegisterClass *RC) const
static bool hasVGPROperands(const MachineInstr &MI, const SIRegisterInfo *TRI)
static use_iterator use_end()
const HexagonInstrInfo * TII
bool isReg() const
isReg - Tests if this is a MO_Register operand.
const TargetRegisterClass * getRegClass(unsigned Reg) const
ID
LLVM Calling Convention Representation.
Definition: CallingConv.h:26
unsigned getNumOperands() const
Definition: MachineInstr.h:265
int getOpcode() const
Definition: MachineInstr.h:261
const TargetRegisterClass * constrainRegClass(unsigned Reg, const TargetRegisterClass *RC, unsigned MinNumRegs=0)
FunctionPass * createSIFixSGPRCopiesPass(TargetMachine &tm)
const MachineBasicBlock * getParent() const
Definition: MachineInstr.h:119
bundle_iterator< MachineInstr, instr_iterator > iterator
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:267
unsigned getSubReg() const
virtual const TargetInstrInfo * getInstrInfo() const
bool isSGPRClass(const TargetRegisterClass *RC) const
void print(raw_ostream &OS, const TargetMachine *TM=0, bool SkipOpers=false) const
raw_ostream & dbgs()
dbgs - Return a circular-buffered debug stream.
Definition: Debug.cpp:101
const TargetRegisterClass * getSubRegClass(const TargetRegisterClass *RC, unsigned SubIdx) const
const TargetRegisterClass * getPhysRegClass(unsigned Reg) const
Return the 'base' register class for this register. e.g. SGPR0 => SReg_32, VGPR => VReg_32 SGPR0_SGPR...
Interface definition for SIInstrInfo.
MachineRegisterInfo & getRegInfo()
use_iterator use_begin(unsigned RegNo) const
#define I(x, y, z)
Definition: MD5.cpp:54
const TargetMachine & getTarget() const
virtual const TargetRegisterInfo * getRegisterInfo() const
MachineInstr * getVRegDef(unsigned Reg) const
unsigned getReg() const
getReg - Returns the register number.
BasicBlockListType::iterator iterator
#define DEBUG(X)
Definition: Debug.h:97
const TargetRegisterClass * getOpRegClass(const MachineInstr &MI, unsigned OpNo) const
Return the correct register class for OpNo. For target-specific instructions, this will return the re...
const MCRegisterInfo & MRI