15 #define DEBUG_TYPE "r600cf"
34 typedef std::pair<MachineInstr *, std::vector<MachineInstr *> > ClauseFile;
36 enum ControlFlowInstruction {
53 unsigned MaxFetchInst;
66 const MCInstrDesc &getHWInstrDesc(ControlFlowInstruction
CFI)
const {
71 Opcode = isEg ? AMDGPU::CF_TC_EG : AMDGPU::CF_TC_R600;
74 Opcode = isEg ? AMDGPU::CF_VC_EG : AMDGPU::CF_VC_R600;
77 Opcode = isEg ? AMDGPU::CF_CALL_FS_EG : AMDGPU::CF_CALL_FS_R600;
80 Opcode = isEg ? AMDGPU::WHILE_LOOP_EG : AMDGPU::WHILE_LOOP_R600;
83 Opcode = isEg ? AMDGPU::END_LOOP_EG : AMDGPU::END_LOOP_R600;
86 Opcode = isEg ? AMDGPU::LOOP_BREAK_EG : AMDGPU::LOOP_BREAK_R600;
88 case CF_LOOP_CONTINUE:
89 Opcode = isEg ? AMDGPU::CF_CONTINUE_EG : AMDGPU::CF_CONTINUE_R600;
92 Opcode = isEg ? AMDGPU::CF_JUMP_EG : AMDGPU::CF_JUMP_R600;
95 Opcode = isEg ? AMDGPU::CF_ELSE_EG : AMDGPU::CF_ELSE_R600;
98 Opcode = isEg ? AMDGPU::POP_EG : AMDGPU::POP_R600;
101 if (
ST.hasCaymanISA()) {
102 Opcode = AMDGPU::CF_END_CM;
105 Opcode = isEg ? AMDGPU::CF_END_EG : AMDGPU::CF_END_R600;
108 assert (Opcode &&
"No opcode selected");
109 return TII->get(Opcode);
113 std::set<unsigned> &DstRegs)
const {
114 unsigned DstMI, SrcMI;
122 if (AMDGPU::R600_Reg128RegClass.contains(Reg))
125 DstMI = TRI->getMatchingSuperReg(Reg,
126 TRI->getSubRegFromChannel(TRI->getHWRegChan(Reg)),
127 &AMDGPU::R600_Reg128RegClass);
131 if (AMDGPU::R600_Reg128RegClass.contains(Reg))
134 SrcMI = TRI->getMatchingSuperReg(Reg,
135 TRI->getSubRegFromChannel(TRI->getHWRegChan(Reg)),
136 &AMDGPU::R600_Reg128RegClass);
139 if ((DstRegs.find(SrcMI) == DstRegs.end())) {
140 DstRegs.insert(DstMI);
150 std::vector<MachineInstr *> ClauseContent;
151 unsigned AluInstCount = 0;
152 bool IsTex =
TII->usesTextureCache(ClauseHead);
153 std::set<unsigned> DstRegs;
155 if (IsTrivialInst(I))
157 if (AluInstCount >= MaxFetchInst)
159 if ((IsTex && !
TII->usesTextureCache(I)) ||
160 (!IsTex && !
TII->usesVertexCache(I)))
162 if (!isCompatibleWithClause(I, DstRegs))
165 ClauseContent.push_back(I);
168 getHWInstrDesc(IsTex?CF_TC:CF_VC))
170 .
addImm(AluInstCount - 1);
171 return ClauseFile(MIb, ClauseContent);
174 void getLiteral(
MachineInstr *MI, std::vector<int64_t> &Lits)
const {
175 static const unsigned LiteralRegs[] = {
176 AMDGPU::ALU_LITERAL_X,
177 AMDGPU::ALU_LITERAL_Y,
178 AMDGPU::ALU_LITERAL_Z,
179 AMDGPU::ALU_LITERAL_W
183 for (
unsigned i = 0, e = Srcs.
size(); i < e; ++i) {
184 if (Srcs[i].first->getReg() != AMDGPU::ALU_LITERAL_X)
186 int64_t Imm = Srcs[i].second;
187 std::vector<int64_t>::iterator It =
188 std::find(Lits.begin(), Lits.end(), Imm);
189 if (It != Lits.end()) {
190 unsigned Index = It - Lits.
begin();
191 Srcs[i].first->setReg(LiteralRegs[Index]);
193 assert(Lits.size() < 4 &&
"Too many literals in Instruction Group");
194 Srcs[i].first->setReg(LiteralRegs[Lits.size()]);
202 const std::vector<unsigned> &Literals)
const {
204 for (
unsigned i = 0, e = Literals.size(); i < e; i+=2) {
205 unsigned LiteralPair0 = Literals[i];
206 unsigned LiteralPair1 = (i + 1 < e)?Literals[i + 1]:0;
207 InsertPos =
BuildMI(MBB, InsertPos->getDebugLoc(),
208 TII->get(AMDGPU::LITERALS))
209 .addImm(LiteralPair0)
219 std::vector<MachineInstr *> ClauseContent;
222 if (IsTrivialInst(I)) {
226 if (!I->isBundle() && !
TII->isALUInstr(I->getOpcode()))
228 std::vector<int64_t> Literals;
232 while (++BI != E && BI->isBundledWithPred()) {
233 BI->unbundleFromPred();
234 for (
unsigned i = 0, e = BI->getNumOperands(); i != e; ++i) {
239 getLiteral(BI, Literals);
240 ClauseContent.push_back(BI);
245 getLiteral(I, Literals);
246 ClauseContent.push_back(I);
249 for (
unsigned i = 0, e = Literals.size(); i < e; i+=2) {
250 unsigned literal0 = Literals[i];
251 unsigned literal2 = (i + 1 < e)?Literals[i + 1]:0;
253 TII->get(AMDGPU::LITERALS))
256 ClauseContent.push_back(MILit);
259 assert(ClauseContent.size() < 128 &&
"ALU clause is too big");
261 return ClauseFile(ClauseHead, ClauseContent);
267 CounterPropagateAddr(Clause.first, CfCount);
269 BuildMI(BB, InsertPos->getDebugLoc(),
TII->get(AMDGPU::FETCH_CLAUSE))
271 for (
unsigned i = 0, e = Clause.second.size(); i < e; ++i) {
272 BB->
splice(InsertPos, BB, Clause.second[i]);
274 CfCount += 2 * Clause.second.size();
280 Clause.first->getOperand(0).setImm(0);
281 CounterPropagateAddr(Clause.first, CfCount);
283 BuildMI(BB, InsertPos->getDebugLoc(),
TII->get(AMDGPU::ALU_CLAUSE))
285 for (
unsigned i = 0, e = Clause.second.size(); i < e; ++i) {
286 BB->
splice(InsertPos, BB, Clause.second[i]);
288 CfCount += Clause.second.size();
291 void CounterPropagateAddr(
MachineInstr *MI,
unsigned Addr)
const {
294 void CounterPropagateAddr(std::set<MachineInstr *> MIs,
unsigned Addr)
296 for (std::set<MachineInstr *>::iterator It = MIs.begin(), E = MIs.end();
299 CounterPropagateAddr(MI, Addr);
303 unsigned getHWStackSize(
unsigned StackSubEntry,
bool hasPush)
const {
304 switch (
ST.getGeneration()) {
318 return (StackSubEntry + 3)/4;
333 unsigned MaxStack = 0;
334 unsigned CurrentStack = 0;
335 bool HasPush =
false;
339 unsigned CfCount = 0;
340 std::vector<std::pair<unsigned, std::set<MachineInstr *> > > LoopStack;
341 std::vector<MachineInstr * > IfThenElseStack;
345 getHWInstrDesc(CF_CALL_FS));
349 std::vector<ClauseFile> FetchClauses, AluClauses;
350 std::vector<MachineInstr *> LastAlu(1);
351 std::vector<MachineInstr *> ToPopAfter;
355 if (
TII->usesTextureCache(I) ||
TII->usesVertexCache(I)) {
356 DEBUG(
dbgs() << CfCount <<
":"; I->dump(););
357 FetchClauses.push_back(MakeFetchClause(MBB, I));
363 if (MI->getOpcode() != AMDGPU::ENDIF)
365 if (MI->getOpcode() == AMDGPU::CF_ALU)
368 switch (MI->getOpcode()) {
369 case AMDGPU::CF_ALU_PUSH_BEFORE:
371 MaxStack = std::max(MaxStack, CurrentStack);
375 AluClauses.push_back(MakeALUClause(MBB, I));
376 DEBUG(
dbgs() << CfCount <<
":"; MI->dump(););
379 case AMDGPU::WHILELOOP: {
381 MaxStack = std::max(MaxStack, CurrentStack);
383 getHWInstrDesc(CF_WHILE_LOOP))
385 std::pair<unsigned, std::set<MachineInstr *> > Pair(CfCount,
386 std::set<MachineInstr *>());
387 Pair.second.insert(MIb);
388 LoopStack.push_back(Pair);
389 MI->eraseFromParent();
393 case AMDGPU::ENDLOOP: {
395 std::pair<unsigned, std::set<MachineInstr *> > Pair =
397 LoopStack.pop_back();
398 CounterPropagateAddr(Pair.second, CfCount);
400 .addImm(Pair.first + 1);
401 MI->eraseFromParent();
405 case AMDGPU::IF_PREDICATE_SET: {
406 LastAlu.push_back(0);
408 getHWInstrDesc(CF_JUMP))
411 IfThenElseStack.push_back(MIb);
413 MI->eraseFromParent();
419 IfThenElseStack.pop_back();
420 CounterPropagateAddr(JumpInst, CfCount);
422 getHWInstrDesc(CF_ELSE))
426 IfThenElseStack.push_back(MIb);
427 MI->eraseFromParent();
431 case AMDGPU::ENDIF: {
433 if (LastAlu.back()) {
434 ToPopAfter.push_back(LastAlu.back());
437 getHWInstrDesc(CF_POP))
446 IfThenElseStack.pop_back();
447 CounterPropagateAddr(IfOrElseInst, CfCount);
450 MI->eraseFromParent();
453 case AMDGPU::BREAK: {
456 getHWInstrDesc(CF_LOOP_BREAK))
458 LoopStack.back().second.insert(MIb);
459 MI->eraseFromParent();
462 case AMDGPU::CONTINUE: {
464 getHWInstrDesc(CF_LOOP_CONTINUE))
466 LoopStack.back().second.insert(MIb);
467 MI->eraseFromParent();
479 for (
unsigned i = 0, e = FetchClauses.size(); i < e; i++)
480 EmitFetchClause(I, FetchClauses[i], CfCount);
481 for (
unsigned i = 0, e = AluClauses.size(); i < e; i++)
482 EmitALUClause(I, AluClauses[i], CfCount);
485 if (
TII->isExport(MI->getOpcode())) {
486 DEBUG(
dbgs() << CfCount <<
":"; MI->dump(););
492 for (
unsigned i = 0, e = ToPopAfter.size(); i < e; ++i) {
495 TII->get(AMDGPU::CF_ALU_POP_AFTER))
496 .addImm(Alu->getOperand(0).getImm())
497 .addImm(Alu->getOperand(1).getImm())
498 .addImm(Alu->getOperand(2).getImm())
499 .addImm(Alu->getOperand(3).getImm())
500 .addImm(Alu->getOperand(4).getImm())
501 .addImm(Alu->getOperand(5).getImm())
502 .addImm(Alu->getOperand(6).getImm())
503 .addImm(Alu->getOperand(7).getImm())
504 .addImm(Alu->getOperand(8).getImm());
507 MFI->
StackSize = getHWStackSize(MaxStack, HasPush);
513 const char *getPassName()
const {
514 return "R600 Control Flow Finalizer Pass";
524 return new R600ControlFlowFinalizer(TM);
void push_back(const T &Elt)
const MachineFunction * getParent() const
mop_iterator operands_end()
instr_iterator instr_end()
Interface definition for R600InstrInfo.
Interface definition for R600RegisterInfo.
Instructions::iterator instr_iterator
const HexagonInstrInfo * TII
#define llvm_unreachable(msg)
bool isReg() const
isReg - Tests if this is a MO_Register operand.
ID
LLVM Calling Convention Representation.
const MachineInstrBuilder & addImm(int64_t Val) const
FunctionPass * createR600ControlFlowFinalizer(TargetMachine &tm)
bundle_iterator< MachineInstr, instr_iterator > iterator
const MachineOperand & getOperand(unsigned i) const
void setImm(int64_t immVal)
void setIsInternalRead(bool Val=true)
MachineInstrBuilder BuildMI(MachineFunction &MF, DebugLoc DL, const MCInstrDesc &MCID)
DebugLoc findDebugLoc(instr_iterator MBBI)
virtual const TargetInstrInfo * getInstrInfo() const
const STC & getSubtarget() const
raw_ostream & dbgs()
dbgs - Return a circular-buffered debug stream.
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
const TargetMachine & getTarget() const
virtual const TargetRegisterInfo * getRegisterInfo() const
unsigned getReg() const
getReg - Returns the register number.
short getTexVTXClauseSize() const
mop_iterator operands_begin()
BasicBlockListType::iterator iterator
bool isInternalRead() const