11 #define DEBUG_TYPE "structcfg"
38 #define DEFAULT_VEC_SLOTS 8
48 STATISTIC(numSerialPatternMatch,
"CFGStructurizer number of serial pattern "
50 STATISTIC(numIfPatternMatch,
"CFGStructurizer number of if pattern "
52 STATISTIC(numLoopcontPatternMatch,
"CFGStructurizer number of loop-continue "
54 STATISTIC(numClonedBlock,
"CFGStructurizer cloned blocks");
55 STATISTIC(numClonedInstr,
"CFGStructurizer cloned instructions");
63 #define SHOWNEWINSTR(i) \
64 DEBUG(dbgs() << "New instr: " << *i << "\n");
66 #define SHOWNEWBLK(b, msg) \
68 dbgs() << msg << "BB" << b->getNumber() << "size " << b->size(); \
72 #define SHOWBLK_DETAIL(b, msg) \
75 dbgs() << msg << "BB" << b->getNumber() << "size " << b->size(); \
81 #define INVALIDSCCNUM -1
85 size_t sz = Src.
size();
86 for (
size_t i = 0; i < sz/2; ++i) {
88 Src[i] = Src[sz - i - 1];
104 class BlockInformation {
123 typedef std::map<MachineBasicBlock *, BlockInformation *> MBBInfoMap;
124 typedef std::map<MachineLoop *, MachineBasicBlock *> LoopLandInfoMap;
128 SinglePath_InPath = 1,
129 SinglePath_NotInPath = 2
137 TRI(&
TII->getRegisterInfo()) { }
139 const char *getPassName()
const {
140 return "AMD IL Control Flow Graph structurizer Pass";
163 MLI = &getAnalysis<MachineLoopInfo>();
164 DEBUG(
dbgs() <<
"LoopInfo:\n"; PrintLoopinfo(*MLI););
165 MDT = &getAnalysis<MachineDominatorTree>();
167 PDT = &getAnalysis<MachinePostDominatorTree>();
185 void printOrderedBlocks()
const {
187 for (MBBVector::const_iterator iterBlk = OrderedBlks.begin(),
188 iterBlkEnd = OrderedBlks.end(); iterBlk != iterBlkEnd; ++iterBlk, ++i) {
189 dbgs() <<
"BB" << (*iterBlk)->getNumber();
190 dbgs() <<
"(" << getSCCNum(*iterBlk) <<
"," << (*iterBlk)->size() <<
")";
191 if (i != 0 && i % 10 == 0) {
200 iterEnd = LoopInfo.
end(); iter != iterEnd; ++iter) {
201 (*iter)->print(
dbgs(), 0);
209 static unsigned getLoopDepth(
MachineLoop *LoopRep);
213 bool AllowSideEntry =
true)
const;
214 int countActiveBlock(MBBVector::const_iterator It,
215 MBBVector::const_iterator E)
const;
235 static int getBranchNzeroOpcode(
int OldOpcode);
236 static int getBranchZeroOpcode(
int OldOpcode);
237 static int getContinueNzeroOpcode(
int OldOpcode);
238 static int getContinueZeroOpcode(
int OldOpcode);
270 int loopendPatternMatch();
342 MBBInfoMap BlockInfoMap;
343 LoopLandInfoMap LLInfoMap;
344 std::map<MachineLoop *, bool> Visited;
350 MBBInfoMap::const_iterator It = BlockInfoMap.find(MBB);
351 if (It == BlockInfoMap.end())
353 return (*It).second->SccNum;
358 LoopLandInfoMap::const_iterator It = LLInfoMap.find(LoopRep);
359 if (It == LLInfoMap.end())
372 unsigned AMDGPUCFGStructurizer::getLoopDepth(
MachineLoop *LoopRep) {
377 MBBInfoMap::const_iterator It = BlockInfoMap.find(MBB);
378 if (It == BlockInfoMap.end())
380 return (*It).second->IsRetired;
385 while (LoopRep && LoopRep->
getHeader() == MBB) {
389 if (!isRetiredBlock(LoopLand))
395 AMDGPUCFGStructurizer::PathToKind AMDGPUCFGStructurizer::singlePathTo(
397 bool AllowSideEntry)
const {
399 if (SrcMBB == DstMBB)
400 return SinglePath_InPath;
401 while (SrcMBB && SrcMBB->
succ_size() == 1) {
403 if (SrcMBB == DstMBB)
404 return SinglePath_InPath;
405 if (!AllowSideEntry && SrcMBB->
pred_size() > 1)
406 return Not_SinglePath;
409 return SinglePath_NotInPath;
410 return Not_SinglePath;
413 int AMDGPUCFGStructurizer::countActiveBlock(MBBVector::const_iterator It,
414 MBBVector::const_iterator E)
const {
417 if (!isRetiredBlock(*It))
425 unsigned BlockSizeThreshold = 30;
426 unsigned CloneInstrThreshold = 100;
427 bool MultiplePreds = MBB && (MBB->
pred_size() > 1);
431 unsigned BlkSize = MBB->
size();
432 return ((BlkSize > BlockSizeThreshold) &&
433 (BlkSize * (MBB->
pred_size() - 1) > CloneInstrThreshold));
436 void AMDGPUCFGStructurizer::reversePredicateSetter(
439 if (I->getOpcode() == AMDGPU::PRED_X) {
440 switch (static_cast<MachineInstr *>(I)->getOperand(2).getImm()) {
497 void AMDGPUCFGStructurizer::insertCondBranchBefore(
522 int NewOpcode,
int RegNum) {
531 int AMDGPUCFGStructurizer::getBranchNzeroOpcode(
int OldOpcode) {
533 case AMDGPU::JUMP_COND:
534 case AMDGPU::JUMP:
return AMDGPU::IF_PREDICATE_SET;
535 case AMDGPU::BRANCH_COND_i32:
536 case AMDGPU::BRANCH_COND_f32:
return AMDGPU::IF_LOGICALNZ_f32;
542 int AMDGPUCFGStructurizer::getBranchZeroOpcode(
int OldOpcode) {
544 case AMDGPU::JUMP_COND:
545 case AMDGPU::JUMP:
return AMDGPU::IF_PREDICATE_SET;
546 case AMDGPU::BRANCH_COND_i32:
547 case AMDGPU::BRANCH_COND_f32:
return AMDGPU::IF_LOGICALZ_f32;
553 int AMDGPUCFGStructurizer::getContinueNzeroOpcode(
int OldOpcode) {
555 case AMDGPU::JUMP_COND:
556 case AMDGPU::JUMP:
return AMDGPU::CONTINUE_LOGICALNZ_i32;
562 int AMDGPUCFGStructurizer::getContinueZeroOpcode(
int OldOpcode) {
564 case AMDGPU::JUMP_COND:
565 case AMDGPU::JUMP:
return AMDGPU::CONTINUE_LOGICALZ_i32;
575 void AMDGPUCFGStructurizer::setTrueBranch(
MachineInstr *MI,
588 return (*It == TrueBranch) ? *Next : *It;
593 case AMDGPU::JUMP_COND:
594 case AMDGPU::BRANCH_COND_i32:
595 case AMDGPU::BRANCH_COND_f32:
return true;
602 bool AMDGPUCFGStructurizer::isUncondBranch(
MachineInstr *MI) {
625 MachineInstr *AMDGPUCFGStructurizer::getNormalBlockBranchInstr(
634 MachineInstr *AMDGPUCFGStructurizer::getLoopendBlockBranchInstr(
652 if (It != MBB->
rend()) {
662 if (It != MBB->
rend()) {
678 <<
" is return block without RETURN instr\n";);
685 iterEnd = SrcMBB->
succ_end(); It != iterEnd; ++It)
701 void AMDGPUCFGStructurizer::replaceInstrUseOfBlockWith(
704 MachineInstr *BranchMI = getLoopendBlockBranchInstr(SrcMBB);
706 getTrueBranch(BranchMI) == OldMBB)
707 setTrueBranch(BranchMI, NewBlk);
713 &&
"found a jump table");
721 if (Pre->getOpcode() == AMDGPU::CONTINUE
722 && It->getOpcode() == AMDGPU::ENDLOOP)
729 for (
unsigned i = 0; i < ContInstr.
size(); ++i)
730 ContInstr[i]->eraseFromParent();
740 bool AMDGPUCFGStructurizer::prepare() {
741 bool Changed =
false;
745 DEBUG(
dbgs() <<
"AMDGPUCFGStructurizer::prepare\n";);
747 orderBlocks(FuncRep);
753 E = MLI->end(); It != E; ++It) {
755 MBBVector ExitingMBBs;
758 if (ExitingMBBs.size() == 0) {
768 It = OrderedBlks.
begin(), E = OrderedBlks.end(); It != E; ++It) {
770 removeUnconditionalBranch(MBB);
771 removeRedundantConditionalBranch(MBB);
772 if (isReturnBlock(MBB)) {
778 if (RetBlks.
size() >= 2) {
779 addDummyExitBlock(RetBlks);
786 bool AMDGPUCFGStructurizer::run() {
789 DEBUG(
dbgs() <<
"AMDGPUCFGStructurizer::run\n";FuncRep->viewCFG(););
793 ReverseVector(orderedBlks);
796 DEBUG(
dbgs() <<
"Ordered blocks:\n"; printOrderedBlocks(););
800 bool MakeProgress =
false;
801 int NumRemainedBlk = countActiveBlock(OrderedBlks.begin(),
807 dbgs() <<
"numIter = " << NumIter
808 <<
", numRemaintedBlk = " << NumRemainedBlk <<
"\n";
830 SccNumBlk = NumRemainedBlk;
832 dbgs() <<
"start processing SCC" << getSCCNum(SccBeginMBB);
837 if (!isRetiredBlock(MBB))
842 bool ContNextScc =
true;
844 || getSCCNum(SccBeginMBB) != getSCCNum(*It)) {
847 int sccRemainedNumBlk = countActiveBlock(SccBeginIter, It);
848 if (sccRemainedNumBlk != 1 && sccRemainedNumBlk >= SccNumBlk) {
850 dbgs() <<
"Can't reduce SCC " << getSCCNum(MBB)
851 <<
", sccNumIter = " << SccNumIter;
852 dbgs() <<
"doesn't make any progress\n";
855 }
else if (sccRemainedNumBlk != 1 && sccRemainedNumBlk < SccNumBlk) {
856 SccNumBlk = sccRemainedNumBlk;
860 dbgs() <<
"repeat processing SCC" << getSCCNum(MBB)
861 <<
"sccNumIter = " << SccNumIter <<
"\n";
882 dbgs() <<
"Reduce to one block\n";
885 int NewnumRemainedBlk
886 = countActiveBlock(OrderedBlks.begin(), OrderedBlks.end());
888 if (NewnumRemainedBlk == 1 || NewnumRemainedBlk < NumRemainedBlk) {
890 NumRemainedBlk = NewnumRemainedBlk;
892 MakeProgress =
false;
894 dbgs() <<
"No progress\n";
898 }
while (!Finish && MakeProgress);
904 for (MBBInfoMap::iterator It = BlockInfoMap.begin(), E = BlockInfoMap.
end();
906 if ((*It).second && (*It).second->IsRetired) {
907 assert(((*It).first)->getNumber() != -1);
909 dbgs() <<
"Erase BB" << ((*It).first)->getNumber() <<
"\n";
911 (*It).first->eraseFromParent();
915 BlockInfoMap.clear();
934 It != E; ++It, ++SccNum) {
935 std::vector<MachineBasicBlock *> &SccNext = *It;
936 for (std::vector<MachineBasicBlock *>::const_iterator
937 blockIter = SccNext.begin(), blockEnd = SccNext.end();
938 blockIter != blockEnd; ++blockIter) {
941 recordSccnum(MBB, SccNum);
948 for (; It != E; ++It) {
950 SccNum = getSCCNum(MBB);
952 dbgs() <<
"unreachable block BB" << MBB->
getNumber() <<
"\n";
961 dbgs() <<
"Begin patternMatch BB" << MBB->
getNumber() <<
"\n";
964 while ((CurMatch = patternMatchGroup(MBB)) > 0)
965 NumMatch += CurMatch;
969 <<
", numMatch = " << NumMatch <<
"\n";
977 NumMatch += loopendPatternMatch();
978 NumMatch += serialPatternMatch(MBB);
979 NumMatch += ifPatternMatch(MBB);
989 if (childBlk->
pred_size() != 1 || isActiveLoophead(childBlk))
992 mergeSerialBlock(MBB, childBlk);
993 ++numSerialPatternMatch;
1001 if (hasBackEdge(MBB))
1003 MachineInstr *BranchMI = getNormalBlockBranchInstr(MBB);
1011 NumMatch += serialPatternMatch(TrueMBB);
1012 NumMatch += ifPatternMatch(TrueMBB);
1014 NumMatch += serialPatternMatch(FalseMBB);
1015 NumMatch += ifPatternMatch(FalseMBB);
1034 reversePredicateSetter(MBB->
end());
1038 && isSameloopDetachedContbreak(TrueMBB, FalseMBB)) {
1041 && isSameloopDetachedContbreak(FalseMBB, TrueMBB)) {
1044 return NumMatch + handleJumpintoIf(MBB, TrueMBB, FalseMBB);
1052 || (FalseMBB && FalseMBB->
pred_size() > 1))) {
1053 Cloned += improveSimpleJumpintoIf(MBB, TrueMBB, FalseMBB, &LandBlk);
1056 if (TrueMBB && TrueMBB->
pred_size() > 1) {
1057 TrueMBB = cloneBlockForPredecessor(TrueMBB, MBB);
1061 if (FalseMBB && FalseMBB->
pred_size() > 1) {
1062 FalseMBB = cloneBlockForPredecessor(FalseMBB, MBB);
1066 mergeIfthenelseBlock(BranchMI, MBB, TrueMBB, FalseMBB, LandBlk);
1068 ++numIfPatternMatch;
1070 numClonedBlock += Cloned;
1072 return 1 + Cloned + NumMatch;
1075 int AMDGPUCFGStructurizer::loopendPatternMatch() {
1076 std::vector<MachineLoop *> NestedLoops;
1081 for (; LpIt != LpE; ++LpIt)
1082 NestedLoops.push_back(*LpIt);
1084 if (NestedLoops.size() == 0)
1091 E = NestedLoops.
rend(); It != E; ++It) {
1093 if (ExaminedLoop->
getNumBlocks() == 0 || Visited[ExaminedLoop])
1096 int NumBreak = mergeLoop(ExaminedLoop);
1104 int AMDGPUCFGStructurizer::mergeLoop(
MachineLoop *LoopRep) {
1106 MBBVector ExitingMBBs;
1108 assert(!ExitingMBBs.empty() &&
"Infinite Loop not supported");
1109 DEBUG(
dbgs() <<
"Loop has " << ExitingMBBs.size() <<
" exiting blocks\n";);
1114 for (
unsigned i = 0, e = ExitBlks.size(); i < e; ++i)
1115 ExitBlkSet.
insert(ExitBlks[i]);
1116 assert(ExitBlkSet.
size() == 1);
1118 assert(ExitBlk &&
"Loop has several exit block");
1119 MBBVector LatchBlks;
1121 InvMBBTraits::ChildIteratorType PI = InvMBBTraits::child_begin(LoopHeader),
1122 PE = InvMBBTraits::child_end(LoopHeader);
1123 for (; PI != PE; PI++) {
1125 LatchBlks.push_back(*PI);
1128 for (
unsigned i = 0, e = ExitingMBBs.size(); i < e; ++i)
1129 mergeLoopbreakBlock(ExitingMBBs[i], ExitBlk);
1130 for (
unsigned i = 0, e = LatchBlks.size(); i < e; ++i)
1131 settleLoopcontBlock(LatchBlks[i], LoopHeader);
1135 Match += serialPatternMatch(LoopHeader);
1136 Match += ifPatternMatch(LoopHeader);
1137 }
while (Match > 0);
1138 mergeLooplandBlock(LoopHeader, ExitBlk);
1141 MLI->changeLoopFor(LoopHeader, ParentLoop);
1143 MLI->removeBlock(LoopHeader);
1144 Visited[LoopRep] =
true;
1148 int AMDGPUCFGStructurizer::loopcontPatternMatch(
MachineLoop *LoopRep,
1153 GTIM::ChildIteratorType It = GTIM::child_begin(LoopHeader),
1154 E = GTIM::child_end(LoopHeader);
1155 for (; It != E; ++It) {
1158 handleLoopcontBlock(MBB, MLI->getLoopFor(MBB),
1159 LoopHeader, LoopRep);
1166 E = ContMBB.
end(); It != E; ++It) {
1167 (*It)->removeSuccessor(LoopHeader);
1170 numLoopcontPatternMatch += NumCont;
1176 bool AMDGPUCFGStructurizer::isSameloopDetachedContbreak(
1180 if (LoopRep&& LoopRep == MLI->getLoopFor(Src2MBB)) {
1184 dbgs() <<
"isLoopContBreakBlock yes src1 = BB"
1186 <<
" src2 = BB" << Src2MBB->
getNumber() <<
"\n";
1197 int Num = handleJumpintoIfImp(HeadMBB, TrueMBB, FalseMBB);
1200 dbgs() <<
"handleJumpintoIf swap trueBlk and FalseBlk" <<
"\n";
1202 Num = handleJumpintoIfImp(HeadMBB, FalseMBB, TrueMBB);
1216 dbgs() <<
"handleJumpintoIfImp head = BB" << HeadMBB->
getNumber()
1219 <<
" false = BB" << FalseMBB->
getNumber() <<
"\n";
1227 if (singlePathTo(FalseMBB, DownBlk) == SinglePath_InPath) {
1229 dbgs() <<
" working\n";
1232 Num += cloneOnSideEntryTo(HeadMBB, TrueMBB, DownBlk);
1233 Num += cloneOnSideEntryTo(HeadMBB, FalseMBB, DownBlk);
1235 numClonedBlock += Num;
1236 Num += serialPatternMatch(*HeadMBB->
succ_begin());
1238 Num += ifPatternMatch(HeadMBB);
1244 dbgs() <<
" not working\n";
1246 DownBlk = (DownBlk->succ_size() == 1) ? (*DownBlk->succ_begin()) : NULL;
1252 void AMDGPUCFGStructurizer::showImproveSimpleJumpintoIf(
1256 <<
" size = " << HeadMBB->
size();
1264 dbgs() <<
", true = BB" << TrueMBB->
getNumber() <<
" size = "
1265 << TrueMBB->
size() <<
" numPred = " << TrueMBB->
pred_size();
1273 dbgs() <<
", false = BB" << FalseMBB->
getNumber() <<
" size = "
1274 << FalseMBB->
size() <<
" numPred = " << FalseMBB->
pred_size();
1282 dbgs() <<
", land = BB" << LandMBB->
getNumber() <<
" size = "
1283 << LandMBB->
size() <<
" numPred = " << LandMBB->
pred_size();
1297 bool MigrateTrue =
false;
1298 bool MigrateFalse =
false;
1302 assert((!TrueMBB || TrueMBB->
succ_size() <= 1)
1303 && (!FalseMBB || FalseMBB->
succ_size() <= 1));
1305 if (TrueMBB == FalseMBB)
1308 MigrateTrue = needMigrateBlock(TrueMBB);
1309 MigrateFalse = needMigrateBlock(FalseMBB);
1311 if (!MigrateTrue && !MigrateFalse)
1317 if (!MigrateTrue && TrueMBB && TrueMBB->
pred_size() > 1)
1319 if (!MigrateFalse && FalseMBB && FalseMBB->
pred_size() > 1)
1320 MigrateFalse =
true;
1323 dbgs() <<
"before improveSimpleJumpintoIf: ";
1324 showImproveSimpleJumpintoIf(HeadMBB, TrueMBB, FalseMBB, LandBlk, 0);
1339 if (!MigrateTrue || !MigrateFalse) {
1410 bool LandBlkHasOtherPred = (LandBlk->
pred_size() > 2);
1415 if (LandBlkHasOtherPred) {
1417 unsigned CmpResReg =
1420 insertCondBranchBefore(LandBlk, I, AMDGPU::IF_PREDICATE_SET,
1428 insertCondBranchBefore(LandBlk, I, AMDGPU::IF_PREDICATE_SET, InitReg,
1432 migrateInstruction(TrueMBB, LandBlk, I);
1438 insertInstrBefore(I, AMDGPU::ELSE);
1441 migrateInstruction(FalseMBB, LandBlk, I);
1448 if (LandBlkHasOtherPred) {
1450 insertInstrBefore(I, AMDGPU::ENDIF);
1454 PE = LandBlk->
pred_end(); PI != PE; ++PI) {
1456 if (MBB != TrueMBB && MBB != FalseMBB)
1461 dbgs() <<
"result from improveSimpleJumpintoIf: ";
1462 showImproveSimpleJumpintoIf(HeadMBB, TrueMBB, FalseMBB, LandBlk, 0);
1466 *LandMBBPtr = LandBlk;
1475 <<
" header = BB" << ContMBB->
getNumber() <<
"\n";
1476 dbgs() <<
"Trying to continue loop-depth = "
1477 << getLoopDepth(ContLoop)
1478 <<
" from loop-depth = " << getLoopDepth(ContingLoop) <<
"\n";);
1479 settleLoopcontBlock(ContingMBB, ContMBB);
1486 <<
" <= BB" << SrcMBB->
getNumber() <<
"\n";
1491 cloneSuccessorList(DstMBB, SrcMBB);
1493 removeSuccessor(SrcMBB);
1494 MLI->removeBlock(SrcMBB);
1495 retireBlock(SrcMBB);
1498 void AMDGPUCFGStructurizer::mergeIfthenelseBlock(
MachineInstr *BranchMI,
1508 dbgs() <<
" } else ";
1514 dbgs() <<
"landBlock: ";
1535 insertCondBranchBefore(I, getBranchNzeroOpcode(OldOpcode),
1543 retireBlock(TrueMBB);
1544 MLI->removeBlock(TrueMBB);
1548 insertInstrBefore(I, AMDGPU::ELSE);
1552 if (LandMBB && FalseMBB->
succ_size() != 0)
1554 retireBlock(FalseMBB);
1555 MLI->removeBlock(FalseMBB);
1557 insertInstrBefore(I, AMDGPU::ENDIF);
1561 if (LandMBB && TrueMBB && FalseMBB)
1569 <<
" land = BB" << LandMBB->
getNumber() <<
"\n";);
1571 insertInstrBefore(DstBlk, AMDGPU::WHILELOOP,
DebugLoc());
1572 insertInstrEnd(DstBlk, AMDGPU::ENDLOOP,
DebugLoc());
1581 <<
" land = BB" << LandMBB->
getNumber() <<
"\n";);
1582 MachineInstr *BranchMI = getLoopendBlockBranchInstr(ExitingMBB);
1584 DebugLoc DL = BranchMI->getDebugLoc();
1587 if (TrueBranch != LandMBB)
1588 reversePredicateSetter(I);
1589 insertCondBranchBefore(ExitingMBB, I, AMDGPU::IF_PREDICATE_SET, AMDGPU::PREDICATE_BIT, DL);
1590 insertInstrBefore(I, AMDGPU::BREAK);
1591 insertInstrBefore(I, AMDGPU::ENDIF);
1593 BranchMI->eraseFromParent();
1600 DEBUG(
dbgs() <<
"settleLoopcontBlock conting = BB"
1602 <<
", cont = BB" << ContMBB->
getNumber() <<
"\n";);
1604 MachineInstr *MI = getLoopendBlockBranchInstr(ContingMBB);
1609 int OldOpcode = MI->getOpcode();
1612 bool UseContinueLogical = ((&*ContingMBB->
rbegin()) == MI);
1614 if (UseContinueLogical ==
false) {
1616 TrueBranch == ContMBB ? getBranchNzeroOpcode(OldOpcode) :
1617 getBranchZeroOpcode(OldOpcode);
1618 insertCondBranchBefore(I, BranchOpcode, DL);
1620 insertInstrEnd(ContingMBB, AMDGPU::CONTINUE, DL);
1621 insertInstrEnd(ContingMBB, AMDGPU::ENDIF, DL);
1624 TrueBranch == ContMBB ? getContinueNzeroOpcode(OldOpcode) :
1625 getContinueZeroOpcode(OldOpcode);
1626 insertCondBranchBefore(I, BranchOpcode, DL);
1629 MI->eraseFromParent();
1636 insertInstrEnd(ContingMBB, AMDGPU::CONTINUE,
1637 getLastDebugLocInBB(ContingMBB));
1645 while (SrcMBB && SrcMBB != DstMBB) {
1648 SrcMBB = cloneBlockForPredecessor(SrcMBB, PreMBB);
1663 "succBlk is not a prececessor of curBlk");
1666 replaceInstrUseOfBlockWith(PredMBB, MBB, CloneMBB);
1673 cloneSuccessorList(CloneMBB, MBB);
1675 numClonedInstr += MBB->
size();
1678 dbgs() <<
"Cloned block: " <<
"BB"
1682 SHOWNEWBLK(CloneMBB,
"result of Cloned block: ");
1691 MachineInstr *BranchMI = getNormalBlockBranchInstr(SrcMBB);
1694 dbgs() <<
"migrateInstruction don't see branch instr\n" ;
1696 SpliceEnd = SrcMBB->
end();
1699 dbgs() <<
"migrateInstruction see branch instr\n" ;
1702 SpliceEnd = BranchMI;
1705 dbgs() <<
"migrateInstruction before splice dstSize = " << DstMBB->
size()
1706 <<
"srcSize = " << SrcMBB->
size() <<
"\n";
1710 DstMBB->
splice(I, SrcMBB, SrcMBB->
begin(), SpliceEnd);
1713 dbgs() <<
"migrateInstruction after splice dstSize = " << DstMBB->
size()
1714 <<
"srcSize = " << SrcMBB->
size() <<
"\n";
1719 AMDGPUCFGStructurizer::normalizeInfiniteLoopExit(
MachineLoop* LoopRep) {
1724 if (!LoopHeader || !LoopLatch)
1726 MachineInstr *BranchMI = getLoopendBlockBranchInstr(LoopLatch);
1728 if (!BranchMI || !isUncondBranch(BranchMI))
1733 SHOWNEWBLK(DummyExitBlk,
"DummyExitBlock to normalize infiniteLoop: ");
1734 DEBUG(
dbgs() <<
"Old branch instr: " << *BranchMI <<
"\n";);
1738 MachineInstr *NewMI = insertInstrBefore(I, AMDGPU::BRANCH_COND_i32);
1740 MIB.addMBB(LoopHeader);
1741 MIB.addReg(ImmReg,
false);
1746 return DummyExitBlk;
1754 while ((BranchMI = getLoopendBlockBranchInstr(MBB))
1755 && isUncondBranch(BranchMI)) {
1756 DEBUG(
dbgs() <<
"Removing uncond branch instr"; BranchMI->
dump(););
1761 void AMDGPUCFGStructurizer::removeRedundantConditionalBranch(
1770 MachineInstr *BranchMI = getNormalBlockBranchInstr(MBB);
1772 DEBUG(
dbgs() <<
"Removing unneeded cond branch instr"; BranchMI->
dump(););
1774 SHOWNEWBLK(MBB1,
"Removing redundant successor");
1778 void AMDGPUCFGStructurizer::addDummyExitBlock(
1785 E = RetMBB.
end(); It != E; ++It) {
1796 SHOWNEWBLK(DummyExitBlk,
"DummyExitBlock: ");
1806 BlockInformation *&srcBlkInfo = BlockInfoMap[MBB];
1808 srcBlkInfo =
new BlockInformation();
1809 srcBlkInfo->SccNum = SccNum;
1817 BlockInformation *&SrcBlkInfo = BlockInfoMap[MBB];
1820 SrcBlkInfo =
new BlockInformation();
1822 SrcBlkInfo->IsRetired =
true;
1824 &&
"can't retire block yet");
1827 void AMDGPUCFGStructurizer::setLoopLandBlock(
MachineLoop *loopRep,
1831 MBB = FuncRep->CreateMachineBasicBlock();
1833 SHOWNEWBLK(MBB,
"DummyLandingBlock for loop without break: ");
1837 dbgs() <<
"setLoopLandBlock loop-header = BB"
1839 <<
" landing-block = BB" << MBB->
getNumber() <<
"\n";
1847 if (PDT->dominates(MBB1, MBB2))
1849 if (PDT->dominates(MBB2, MBB1))
1857 return findNearestCommonPostDom(*MBB1->
succ_begin(), MBB2);
1859 return findNearestCommonPostDom(MBB1, *MBB2->
succ_begin());
1861 if (!Node1 || !Node2)
1866 if (PDT->dominates(Node1, Node2))
1875 AMDGPUCFGStructurizer::findNearestCommonPostDom(
1876 std::set<MachineBasicBlock *> &MBBs) {
1878 std::set<MachineBasicBlock *>::const_iterator It = MBBs.
begin();
1879 std::set<MachineBasicBlock *>::const_iterator E = MBBs.
end();
1880 for (CommonDom = *It; It != E && CommonDom; ++It) {
1882 if (MBB != CommonDom)
1883 CommonDom = findNearestCommonPostDom(MBB, CommonDom);
1887 dbgs() <<
"Common post dominator for exit blocks is ";
1889 dbgs() <<
"BB" << CommonDom->getNumber() <<
"\n";
1903 return new AMDGPUCFGStructurizer(tm);
unsigned succ_size() const
void push_back(const T &Elt)
const MachineFunction * getParent() const
AnalysisUsage & addPreserved()
MachineInstr * CreateMachineInstr(const MCInstrDesc &MCID, DebugLoc DL, bool NoImp=false)
Interface definition for R600InstrInfo.
MachineBasicBlock * getMBB() const
The main container class for the LLVM Intermediate Representation.
unsigned createVirtualRegister(const TargetRegisterClass *RegClass)
FunctionPass * createAMDGPUCFGStructurizerPass(TargetMachine &tm)
LoopT * getParentLoop() const
#define OPCODE_IS_NOT_ZERO_INT
BlockT * getHeader() const
BlockT * getLoopLatch() const
STATISTIC(numSerialPatternMatch,"CFGStructurizer number of serial pattern ""matched")
DomTreeNodeBase< NodeT > * getIDom() const
AnalysisUsage & addRequired()
bool isUnknown() const
isUnknown - Return true if this is an unknown location.
const HexagonInstrInfo * TII
#define llvm_unreachable(msg)
std::vector< MachineBasicBlock * >::iterator succ_iterator
void getExitingBlocks(SmallVectorImpl< BlockT * > &ExitingBlocks) const
void print(raw_ostream &OS, SlotIndexes *=0) const
ID
LLVM Calling Convention Representation.
void getExitBlocks(SmallVectorImpl< BlockT * > &ExitBlocks) const
scc_iterator< T > scc_begin(const T &G)
#define OPCODE_IS_NOT_ZERO
const MachineJumpTableInfo * getJumpTableInfo() const
std::vector< MachineBasicBlock * >::iterator pred_iterator
reverse_iterator rbegin()
const MachineBasicBlock * getParent() const
bundle_iterator< MachineInstr, instr_iterator > iterator
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *bb=0)
scc_iterator< T > scc_end(const T &G)
df_iterator< T > df_end(const T &G)
const MachineOperand & getOperand(unsigned i) const
#define SHOWNEWBLK(b, msg)
void setMBB(MachineBasicBlock *MBB)
ItTy next(ItTy it, Dist n)
bool contains(const LoopT *L) const
succ_iterator succ_begin()
void removeSuccessor(MachineBasicBlock *succ)
pred_iterator pred_begin()
MachineInstr * CloneMachineInstr(const MachineInstr *Orig)
bool isSuccessor(const MachineBasicBlock *MBB) const
raw_ostream & dbgs()
dbgs - Return a circular-buffered debug stream.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
df_iterator< T > df_begin(const T &G)
Contains the definition of a TargetInstrInfo class that is common to all AMD GPUs.
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
MachineRegisterInfo & getRegInfo()
unsigned getNumBlocks() const
getNumBlocks - Get the number of blocks in this loop in constant time.
std::reverse_iterator< const_iterator > reverse_iterator
void push_back(MachineInstr *MI)
instr_iterator insert(instr_iterator I, MachineInstr *M)
unsigned getReg() const
getReg - Returns the register number.
std::reverse_iterator< iterator > reverse_iterator
void push_back(MachineBasicBlock *MBB)
static bool isCondBranch(unsigned Opc)
BasicBlockListType::iterator iterator
std::vector< LoopT * >::const_iterator iterator
LoopInfoBase< MachineBasicBlock, MachineLoop >::iterator iterator
const MachineInstrBuilder & addReg(unsigned RegNo, unsigned flags=0, unsigned SubReg=0) const
unsigned getLoopDepth() const
void addSuccessor(MachineBasicBlock *succ, uint32_t weight=0)
unsigned pred_size() const
DebugLoc getDebugLoc() const
#define OPCODE_IS_ZERO_INT