29 #define DEBUG_TYPE "hwloops"
55 STATISTIC(NumHWLoops,
"Number of loops converted to hardware loops");
83 const char *getPassName()
const {
return "Hexagon Hardware Loops"; }
110 static Kind getSwappedComparison(
Kind Cmp) {
111 assert ((!((Cmp & L) && (Cmp &
G))) &&
"Malformed comparison operator");
112 if ((Cmp & L) || (Cmp &
G))
113 return (
Kind)(Cmp ^ (L|
G));
159 bool containsInvalidInstruction(
MachineLoop *L)
const;
220 int HexagonHardwareLoops::Counter = 0;
228 enum CountValueType {
243 explicit CountValue(CountValueType t,
unsigned v,
unsigned u = 0) {
245 if (
Kind == CV_Register) {
252 bool isReg()
const {
return Kind == CV_Register; }
253 bool isImm()
const {
return Kind == CV_Immediate; }
256 assert(
isReg() &&
"Wrong CountValue accessor");
257 return Contents.R.Reg;
259 unsigned getSubReg()
const {
260 assert(
isReg() &&
"Wrong CountValue accessor");
261 return Contents.R.Sub;
263 unsigned getImm()
const {
264 assert(isImm() &&
"Wrong CountValue accessor");
265 return Contents.ImmVal;
270 if (
isReg()) { OS <<
PrintReg(Contents.R.Reg, TRI, Contents.R.Sub); }
271 if (isImm()) { OS << Contents.ImmVal; }
278 "Hexagon Hardware Loops",
false,
false)
287 return MI->getOpcode() == Hexagon::LOOP0_r ||
288 MI->getOpcode() == Hexagon::LOOP0_i;
292 return new HexagonHardwareLoops();
297 DEBUG(
dbgs() <<
"********* Hexagon Hardware Loops *********\n");
299 bool Changed =
false;
301 MLI = &getAnalysis<MachineLoopInfo>();
303 MDT = &getAnalysis<MachineDominatorTree>();
312 Changed |= convertToHardwareLoop(L);
319 bool HexagonHardwareLoops::findInductionRegister(
MachineLoop *L,
327 if (!Header || !Preheader || !Latch)
332 typedef std::pair<unsigned,int64_t> RegisterBump;
338 typedef std::map<unsigned,RegisterBump> InductionMap;
344 I != E &&
I->isPHI(); ++
I) {
357 bool isAdd = (UpdOpc == Hexagon::ADD_ri);
363 if (MRI->getVRegDef(IndReg) == Phi) {
366 IndMap.insert(std::make_pair(UpdReg, std::make_pair(IndReg, V)));
378 unsigned CSz = Cond.
size();
379 assert (CSz == 1 || CSz == 2);
380 unsigned PredR = Cond[CSz-1].getReg();
386 unsigned CmpReg1 = 0, CmpReg2 = 0;
387 int CmpImm = 0, CmpMask = 0;
398 InductionMap::iterator IndMapEnd = IndMap.end();
399 InductionMap::iterator
F = IndMapEnd;
401 InductionMap::iterator F1 = IndMap.find(CmpReg1);
406 InductionMap::iterator F2 = IndMap.find(CmpReg2);
407 if (F2 != IndMapEnd) {
416 Reg = F->second.first;
417 IVBump = F->second.second;
418 IVOp = MRI->getVRegDef(F->first);
430 CountValue *HexagonHardwareLoops::getLoopTripCount(
MachineLoop *L,
435 "Loop must have more than one incoming edge!");
462 bool FoundIV = findInductionRegister(L, IVReg, IVBump, IVOp);
470 for (
unsigned i = 1, n = IV_Phi->
getNumOperands(); i < n; i += 2) {
472 if (MBB == Preheader)
474 else if (MBB == Latch)
490 assert (TB &&
"Latch block without a branch?");
491 assert ((!FB || TB == Header || FB == Header) &&
"Branches not to header?");
492 if (!TB || (FB && TB != Header && FB != Header))
499 bool Negated = (Cond.
size() > 1) ^ (TB != Header);
500 unsigned PredReg = Cond[Cond.
size()-1].getReg();
504 unsigned CmpReg1 = 0, CmpReg2 = 0;
505 int Mask = 0, ImmValue = 0;
522 bool isSwapped =
false;
540 case Hexagon::CMPEQri:
541 case Hexagon::CMPEQrr:
544 case Hexagon::CMPGTUri:
545 case Hexagon::CMPGTUrr:
546 Cmp = !Negated ? Comparison::GTu : Comparison::LEu;
548 case Hexagon::CMPGTri:
549 case Hexagon::CMPGTrr:
550 Cmp = !Negated ? Comparison::GTs : Comparison::LEs;
553 case Hexagon::CMPbEQri_V4:
554 case Hexagon::CMPhEQri_V4: {
561 assert(EndValue->
isImm() &&
"Unrecognized latch comparison");
562 EndV = EndValue->
getImm();
564 if (InitialValue->
isReg()) {
565 if (!defWithImmediate(InitialValue->
getReg()))
567 InitV = getImmediate(*InitialValue);
569 assert(InitialValue->
isImm());
570 InitV = InitialValue->
getImm();
574 if (CondOpc == Hexagon::CMPbEQri_V4) {
589 Cmp = Comparison::getSwappedComparison(Cmp);
591 if (InitialValue->
isReg()) {
592 unsigned R = InitialValue->
getReg();
594 if (!MDT->properlyDominates(DefBB, Header))
598 if (EndValue->
isReg()) {
599 unsigned R = EndValue->
getReg();
601 if (!MDT->properlyDominates(DefBB, Header))
605 return computeCount(L, InitialValue, EndValue, IVReg, IVBump, Cmp);
624 if (Start->
isReg()) {
626 if (StartValInstr && StartValInstr->
getOpcode() == Hexagon::TFRI)
631 if (EndValInstr && EndValInstr->
getOpcode() == Hexagon::TFRI)
638 bool CmpLess = Cmp & Comparison::L;
645 if (CmpLess && IVBump < 0)
649 if (CmpGreater && IVBump > 0)
654 int64_t StartV = Start->
getImm();
655 int64_t EndV = End->
getImm();
656 int64_t Dist = EndV - StartV;
660 bool Exact = (Dist % IVBump) == 0;
665 if ((Dist < 0) ^ (IVBump < 0))
672 Dist = Dist > 0 ? Dist+1 : Dist-1;
675 assert ((!CmpLess || Dist > 0) &&
"Loop should never iterate!");
677 assert ((!CmpGreater || Dist < 0) &&
"Loop should never iterate!");
680 int64_t Dist1 = (IVBump > 0) ? (Dist + (IVBump-1)) / IVBump
681 : (-Dist + (-IVBump-1)) / (-IVBump);
682 assert (Dist1 > 0 &&
"Fishy thing. Both operands have the same sign.");
684 uint64_t Count = Dist1;
686 if (Count > 0xFFFFFFFFULL)
689 return new CountValue(CountValue::CV_Immediate, Count);
702 assert (PH &&
"Should have a preheader by now");
704 DebugLoc DL = (InsertPos != PH->
end()) ? InsertPos->getDebugLoc()
721 bool RegToImm = Start->
isReg() && End->
isImm();
722 bool RegToReg = Start->
isReg() && End->
isReg();
724 int64_t StartV = 0, EndV = 0;
743 else if (End->
isImm())
751 StartV -= (IVBump-1);
752 else if (End->
isImm())
758 unsigned R = 0, SR = 0;
759 if (Start->
isReg()) {
769 if (!SR && RC == &Hexagon::DoubleRegsRegClass)
774 unsigned DistR, DistSR;
777 if (Start->
isImm() && StartV == 0) {
782 (RegToImm ?
TII->get(Hexagon::SUB_ri) :
783 TII->get(Hexagon::ADD_ri));
784 unsigned SubR = MRI->createVirtualRegister(IntRC);
786 BuildMI(*PH, InsertPos, DL, SubD, SubR);
791 }
else if (RegToImm) {
803 unsigned AdjR, AdjSR;
810 unsigned AddR = MRI->createVirtualRegister(IntRC);
812 BuildMI(*PH, InsertPos, DL, AddD, AddR)
821 unsigned CountR, CountSR;
828 unsigned Shift =
Log2_32(IVBump);
831 unsigned LsrR = MRI->createVirtualRegister(IntRC);
833 BuildMI(*PH, InsertPos, DL, LsrD, LsrR)
841 return new CountValue(CountValue::CV_Register, CountR, CountSR);
846 bool HexagonHardwareLoops::isInvalidLoopOperation(
863 if (R == Hexagon::LC0 || R == Hexagon::LC1 ||
864 R == Hexagon::SA0 || R == Hexagon::SA1)
873 bool HexagonHardwareLoops::containsInvalidInstruction(
MachineLoop *L)
const {
874 const std::vector<MachineBasicBlock *> &Blocks = L->
getBlocks();
875 for (
unsigned i = 0, e = Blocks.size(); i != e; ++i) {
880 if (isInvalidLoopOperation(MI))
892 bool HexagonHardwareLoops::isDead(
const MachineInstr *MI,
900 unsigned Reg = MO.
getReg();
901 if (MRI->use_nodbg_empty(Reg))
909 use_nodbg_iterator
I = MRI->use_nodbg_begin(Reg);
910 use_nodbg_iterator End = MRI->use_nodbg_end();
911 if (
llvm::next(I) != End || !I.getOperand().getParent()->isPHI())
920 unsigned OPReg = OPO.
getReg();
921 use_nodbg_iterator nextJ;
922 for (use_nodbg_iterator J = MRI->use_nodbg_begin(OPReg);
923 J != End; J = nextJ) {
940 void HexagonHardwareLoops::removeIfDead(
MachineInstr *MI) {
944 if (isDead(MI, DeadPhis)) {
945 DEBUG(
dbgs() <<
"HW looping will remove: " << *MI);
954 unsigned Reg = MO.
getReg();
957 E = MRI->use_end(); I != E; I = nextI) {
971 for (
unsigned i = 0; i < DeadPhis.
size(); ++i)
972 DeadPhis[i]->eraseFromParent();
984 bool HexagonHardwareLoops::convertToHardwareLoop(
MachineLoop *L) {
986 assert(L->
getHeader() &&
"Loop without a header?");
988 bool Changed =
false;
991 Changed |= convertToHardwareLoop(*I);
1008 if (containsInvalidInstruction(L))
1012 if (!fixupInductionVariable(L))
1021 if (LastI == LastMBB->
end())
1026 bool NewPreheader =
false;
1029 Preheader = createPreheaderForLoop(L);
1032 NewPreheader =
true;
1038 CountValue *TripCount = getLoopTripCount(L, OldInsts);
1043 if (TripCount->isReg()) {
1046 MachineInstr *TCDef = MRI->getVRegDef(TripCount->getReg());
1048 if (!NewPreheader) {
1049 if (!MDT->dominates(BBDef, Preheader))
1055 if (!MDT->properlyDominates(BBDef, L->
getHeader()))
1077 if (InsertPos != Preheader->
end())
1078 DL = InsertPos->getDebugLoc();
1080 if (TripCount->isReg()) {
1082 unsigned CountReg = MRI->createVirtualRegister(&Hexagon::IntRegsRegClass);
1084 .addReg(TripCount->getReg(), 0, TripCount->getSubReg());
1086 BuildMI(*Preheader, InsertPos, DL,
TII->get(Hexagon::LOOP0_r))
1090 assert(TripCount->isImm() &&
"Expecting immediate value for trip count");
1094 int64_t CountImm = TripCount->getImm();
1096 unsigned CountReg = MRI->createVirtualRegister(&Hexagon::IntRegsRegClass);
1097 BuildMI(*Preheader, InsertPos, DL,
TII->get(Hexagon::TFRI), CountReg)
1099 BuildMI(*Preheader, InsertPos, DL,
TII->get(Hexagon::LOOP0_r))
1100 .addMBB(LoopStart).
addReg(CountReg);
1102 BuildMI(*Preheader, InsertPos, DL,
TII->get(Hexagon::LOOP0_i))
1103 .addMBB(LoopStart).
addImm(CountImm);
1115 DebugLoc LastIDL = LastI->getDebugLoc();
1116 BuildMI(*LastMBB, LastI, LastIDL,
1117 TII->get(Hexagon::ENDLOOP0)).addMBB(LoopStart);
1122 if (LastI->getOpcode() == Hexagon::JMP_t ||
1123 LastI->getOpcode() == Hexagon::JMP_f) {
1126 LastI = LastMBB->
erase(LastI);
1128 if (LastI != LastMBB->
end())
1129 LastI = LastMBB->
erase(LastI);
1135 LastMBB->
erase(LastI);
1141 for (
unsigned i = 0; i < OldInsts.
size(); ++i)
1142 removeIfDead(OldInsts[i]);
1149 bool HexagonHardwareLoops::orderBumpCompare(
MachineInstr *BumpI,
1151 assert (BumpI != CmpI &&
"Bump and compare in the same instruction?");
1159 for (instr_iterator I = BumpI, E = BB->
instr_end(); I != E; ++
I)
1165 bool FoundBump =
false;
1166 instr_iterator CmpIt = CmpI, NextIt =
llvm::next(CmpIt);
1167 for (instr_iterator I = NextIt, E = BB->
instr_end(); I != E; ++
I) {
1172 if (MO.
getReg() == PredR)
1178 instr_iterator After = BumpI;
1179 instr_iterator From = CmpI;
1185 assert (FoundBump &&
"Cannot determine instruction order");
1190 MachineInstr *HexagonHardwareLoops::defWithImmediate(
unsigned R) {
1195 case Hexagon::TFRI64:
1197 case Hexagon::CONST64_Int_Real:
1208 unsigned R = MO.
getReg();
1210 assert(DI &&
"Need an immediate operand");
1218 void HexagonHardwareLoops::setImmediate(
MachineOperand &MO, int64_t Val) {
1225 unsigned R = MO.
getReg();
1227 if (MRI->hasOneNonDBGUse(R)) {
1235 unsigned NewR = MRI->createVirtualRegister(RC);
1244 bool HexagonHardwareLoops::fixupInductionVariable(
MachineLoop *L) {
1249 if (!Header || !Preheader || !Latch)
1254 typedef std::pair<unsigned,int64_t> RegisterBump;
1255 typedef std::pair<unsigned,RegisterBump> RegisterInduction;
1256 typedef std::set<RegisterInduction> RegisterInductionSet;
1259 RegisterInductionSet IndRegs;
1266 I != E && I->isPHI(); ++
I) {
1277 bool isAdd = (UpdOpc == Hexagon::ADD_ri);
1283 if (MRI->getVRegDef(IndReg) == Phi) {
1286 IndRegs.insert(std::make_pair(UpdReg, std::make_pair(IndReg, V)));
1292 if (IndRegs.empty())
1306 if (TB != Header && FB != Header)
1315 unsigned CSz = Cond.
size();
1316 if (CSz != 1 && CSz != 2)
1319 unsigned P = Cond[CSz-1].getReg();
1332 for (
unsigned i = 0, n = PredDef->
getNumOperands(); i < n; ++i) {
1340 unsigned R = MO.
getReg();
1341 if (!defWithImmediate(R)) {
1350 }
else if (MO.
isImm()) {
1357 if (CmpRegs.
empty())
1361 for (RegisterInductionSet::iterator I = IndRegs.begin(), E = IndRegs.end();
1366 if (CmpRegs.
count(I->first))
1372 const RegisterBump &RB = I->second;
1373 if (CmpRegs.
count(RB.first)) {
1377 int64_t CmpImm = getImmediate(*CmpImmOp);
1378 int64_t V = RB.second;
1379 if (V > 0 && CmpImm+V < CmpImm)
1381 if (V < 0 && CmpImm+V > CmpImm)
1392 bool Order = orderBumpCompare(BumpI, PredDef);
1397 setImmediate(*CmpImmOp, CmpImm);
1398 for (
unsigned i = 0, n = PredDef->
getNumOperands(); i < n; ++i) {
1430 typedef std::vector<MachineBasicBlock*> MBBVector;
1438 for (MBBVector::iterator I = Preds.begin(), E = Preds.end(); I != E; ++
I) {
1448 MF->
insert(Header, NewPH);
1457 I != E && I->isPHI(); ++
I) {
1466 unsigned NewPR = MRI->createVirtualRegister(RC);
1485 if (PredB != Latch) {
1503 I != E && I->isPHI(); ++
I) {
1507 if (MO.
getMBB() != Latch)
1521 for (MBBVector::iterator I = Preds.begin(), E = Preds.end(); I != E; ++
I) {
1527 assert (!NotAnalyzed &&
"Should be analyzable!");
1528 if (TB != Header && (Tmp2.
empty() || FB != Header))
1538 (void)LatchNotAnalyzed;
1539 assert (!LatchNotAnalyzed &&
"Should be analyzable!");
static bool isReg(const MCInst &MI, unsigned OpNo)
void push_back(const T &Elt)
const MachineFunction * getParent() const
instr_iterator erase(instr_iterator I)
MachineInstr * CreateMachineInstr(const MCInstrDesc &MCID, DebugLoc DL, bool NoImp=false)
MachineInstr * getParent()
static PassRegistry * getPassRegistry()
instr_iterator instr_begin()
instr_iterator instr_end()
virtual bool AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl< MachineOperand > &Cond, bool AllowModify) const
MachineBasicBlock * getMBB() const
void initializeHexagonHardwareLoopsPass(PassRegistry &)
iterator getFirstTerminator()
const MCInstrDesc & getDesc() const
STATISTIC(NumHWLoops,"Number of loops converted to hardware loops")
LoopT * getParentLoop() const
FunctionPass * createHexagonHardwareLoops()
Instructions::iterator instr_iterator
const std::vector< BlockT * > & getBlocks() const
BlockT * getHeader() const
BlockT * getLoopLatch() const
INITIALIZE_PASS_BEGIN(HexagonHardwareLoops,"hwloops","Hexagon Hardware Loops", false, false) INITIALIZE_PASS_END(HexagonHardwareLoops
AnalysisUsage & addRequired()
#define INITIALIZE_PASS_DEPENDENCY(depName)
void ReplaceUsesOfBlockWith(MachineBasicBlock *Old, MachineBasicBlock *New)
MachineBasicBlock * getTopBlock()
const HexagonInstrInfo * TII
static MachineOperand CreateReg(unsigned Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false)
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
bool isCall() const
Return true if the instruction is a call.
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
bool isReg() const
isReg - Tests if this is a MO_Register operand.
bool isInt< 8 >(int64_t x)
ID
LLVM Calling Convention Representation.
const MachineInstrBuilder & addImm(int64_t Val) const
unsigned getNumOperands() const
void RemoveOperand(unsigned i)
bool isValidOffset(const int Opcode, const int Offset) const
bool LLVM_ATTRIBUTE_UNUSED_RESULT empty() const
std::vector< MachineBasicBlock * >::iterator pred_iterator
static cl::opt< int > HWLoopLimit("max-hwloop", cl::Hidden, cl::init(-1))
const BasicBlock * getBasicBlock() const
const MachineBasicBlock * getParent() const
bundle_iterator< MachineInstr, instr_iterator > iterator
initializer< Ty > init(const Ty &Val)
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *bb=0)
BlockT * getLoopPreheader() const
static BlockAddress * get(Function *F, BasicBlock *BB)
get - Return a BlockAddress for the specified function and basic block.
const MachineOperand & getOperand(unsigned i) const
void setMBB(MachineBasicBlock *MBB)
ItTy next(ItTy it, Dist n)
virtual unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, const SmallVectorImpl< MachineOperand > &Cond, DebugLoc DL) const
bool contains(const LoopT *L) const
void setImm(int64_t immVal)
BlockT * getExitingBlock() const
virtual bool analyzeCompare(const MachineInstr *MI, unsigned &SrcReg, unsigned &SrcReg2, int &Mask, int &Value) const
For a comparison instruction, return the source registers in SrcReg and SrcReg2 if having two registe...
MachineInstrBuilder BuildMI(MachineFunction &MF, DebugLoc DL, const MCInstrDesc &MCID)
unsigned getSubReg() const
pred_iterator pred_begin()
Hexagon Hardware static false bool isHardwareLoop(const MachineInstr *MI)
Returns true if the instruction is a hardware loop instruction.
void addOperand(MachineFunction &MF, const MachineOperand &Op)
bool isSuccessor(const MachineBasicBlock *MBB) const
bool isCompare(QueryType Type=IgnoreBundle) const
isCompare - Return true if this instruction is a comparison.
raw_ostream & dbgs()
dbgs - Return a circular-buffered debug stream.
unsigned Log2_32(uint32_t Value)
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
void setHasAddressTaken()
bool isPowerOf2_64(uint64_t Value)
bool count(const T &V) const
count - Return true if the element is in the set.
bool hasAddressTaken() const
static MachineOperand CreateMBB(MachineBasicBlock *MBB, unsigned char TargetFlags=0)
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
MachineRegisterInfo & getRegInfo()
virtual void getAnalysisUsage(AnalysisUsage &AU) const
void setReg(unsigned Reg)
static unsigned getReg(const void *D, unsigned RC, unsigned RegNo)
const TargetMachine & getTarget() const
instr_iterator insert(instr_iterator I, MachineInstr *M)
bool isInt< 16 >(int64_t x)
unsigned getReg() const
getReg - Returns the register number.
void insert(iterator MBBI, MachineBasicBlock *MBB)
const MCRegisterInfo & MRI
std::vector< LoopT * >::const_iterator iterator
LoopInfoBase< MachineBasicBlock, MachineLoop >::iterator iterator
const MachineInstrBuilder & addReg(unsigned RegNo, unsigned flags=0, unsigned SubReg=0) const
void addSuccessor(MachineBasicBlock *succ, uint32_t weight=0)
unsigned pred_size() const
DebugLoc getDebugLoc() const