14 #define DEBUG_TYPE "loop-reroll"
38 STATISTIC(NumRerolledLoops,
"Number of rerolled loops");
42 cl::desc(
"The maximum increment for loop rerolling"));
120 class LoopReroll :
public LoopPass {
153 struct SimpleLoopReduction {
155 : Valid(
false), Instructions(1, P) {
156 assert(isa<PHINode>(P) &&
"First reduction instruction must be a PHI");
165 assert(Valid &&
"Using invalid reduction");
166 return Instructions.front();
170 assert(Valid &&
"Using invalid reduction");
171 return Instructions.back();
175 assert(Valid &&
"Using invalid reduction");
176 return Instructions[i+1];
179 Instruction *operator [] (
size_t i)
const {
return get(i); }
182 size_t size()
const {
183 assert(Valid &&
"Using invalid reduction");
184 return Instructions.size()-1;
187 typedef SmallInstructionVector::iterator iterator;
188 typedef SmallInstructionVector::const_iterator const_iterator;
191 assert(Valid &&
"Using invalid reduction");
195 const_iterator
begin()
const {
196 assert(Valid &&
"Using invalid reduction");
200 iterator
end() {
return Instructions.
end(); }
201 const_iterator
end()
const {
return Instructions.
end(); }
205 SmallInstructionVector Instructions;
212 struct ReductionTracker {
216 void addSLR(SimpleLoopReduction &SLR) {
227 void restrictToScale(uint64_t Scale,
228 SmallInstructionSet &PossibleRedSet,
229 SmallInstructionSet &PossibleRedPHISet,
230 SmallInstructionSet &PossibleRedLastSet) {
231 PossibleRedIdx.clear();
232 PossibleRedIter.clear();
235 for (
unsigned i = 0, e = PossibleReds.size(); i != e; ++i)
236 if (PossibleReds[i].size() % Scale == 0) {
237 PossibleRedLastSet.insert(PossibleReds[i].getReducedValue());
238 PossibleRedPHISet.insert(PossibleReds[i].getPHI());
240 PossibleRedSet.insert(PossibleReds[i].getPHI());
241 PossibleRedIdx[PossibleReds[i].getPHI()] = i;
242 for (SimpleLoopReduction::iterator J = PossibleReds[i].
begin(),
243 JE = PossibleReds[i].
end(); J != JE; ++J) {
244 PossibleRedSet.insert(*J);
245 PossibleRedIdx[*J] = i;
256 if (J1I != PossibleRedIdx.
end()) {
258 if (J2I != PossibleRedIdx.
end() && J1I->second == J2I->second)
269 if (PossibleRedIdx.count(J1)) {
270 assert(PossibleRedIdx.count(J2) &&
271 "Recording reduction vs. non-reduction instruction?");
273 PossibleRedIter[J1] = 0;
274 PossibleRedIter[J2] = i;
276 int Idx = PossibleRedIdx[J1];
277 assert(Idx == PossibleRedIdx[J2] &&
278 "Recording pair from different reductions?");
289 if (!isa<PHINode>(J))
295 if (cast<Instruction>(J) == PossibleReds[i].getPHI())
302 bool validateSelected();
303 void replaceSelected();
307 SmallReductionVector PossibleReds;
314 void collectPossibleIVs(
Loop *L, SmallInstructionVector &PossibleIVs);
315 void collectPossibleReductions(
Loop *L,
316 ReductionTracker &Reductions);
317 void collectInLoopUserSet(
Loop *L,
318 const SmallInstructionVector &Roots,
319 const SmallInstructionSet &Exclude,
320 const SmallInstructionSet &Final,
322 void collectInLoopUserSet(
Loop *L,
324 const SmallInstructionSet &Exclude,
325 const SmallInstructionSet &Final,
327 bool findScaleFromMul(
Instruction *RealIV, uint64_t &Scale,
329 SmallInstructionVector &LoopIncs);
330 bool collectAllRoots(
Loop *L, uint64_t Inc, uint64_t Scale,
Instruction *IV,
332 SmallInstructionSet &AllRoots,
333 SmallInstructionVector &LoopIncs);
335 ReductionTracker &Reductions);
349 return new LoopReroll;
357 UIE = I->
use_end(); UI != UIE; ++UI) {
368 void LoopReroll::collectPossibleIVs(
Loop *L,
369 SmallInstructionVector &PossibleIVs) {
373 if (!isa<PHINode>(
I))
375 if (!
I->getType()->isIntegerTy())
379 dyn_cast<SCEVAddRecExpr>(SE->getSCEV(
I))) {
380 if (PHISCEV->getLoop() != L)
382 if (!PHISCEV->isAffine())
385 dyn_cast<SCEVConstant>(PHISCEV->getStepRecurrence(*SE))) {
386 if (!IncSCEV->getValue()->getValue().isStrictlyPositive())
388 if (IncSCEV->getValue()->uge(
MaxInc))
391 DEBUG(
dbgs() <<
"LRR: Possible IV: " << *
I <<
" = " <<
393 PossibleIVs.push_back(
I);
403 assert(!Valid &&
"Cannot add to an already-valid chain");
416 if (!(isa<PHINode>(Instructions.back()) ||
420 Instructions.push_back(C);
424 if (Instructions.size() < 2 ||
433 if (L->
contains(cast<Instruction>(*UI)))
434 if (cast<Instruction>(*UI ) != Instructions.front())
438 Instructions.push_back(C);
443 void LoopReroll::collectPossibleReductions(
Loop *L,
444 ReductionTracker &Reductions) {
448 if (!isa<PHINode>(
I))
450 if (!
I->getType()->isSingleValueType())
453 SimpleLoopReduction SLR(
I, L);
457 DEBUG(
dbgs() <<
"LRR: Possible reduction: " << *
I <<
" (with " <<
458 SLR.size() <<
" chained instructions)\n");
459 Reductions.addSLR(SLR);
475 void LoopReroll::collectInLoopUserSet(
Loop *L,
476 Instruction *Root,
const SmallInstructionSet &Exclude,
477 const SmallInstructionSet &Final,
479 SmallInstructionVector Queue(1, Root);
480 while (!Queue.empty()) {
482 if (!Users.
insert(I).second)
487 UIE = I->
use_end(); UI != UIE; ++UI) {
489 if (
PHINode *PN = dyn_cast<PHINode>(User)) {
491 if (PN->getIncomingBlock(UI) == L->
getHeader())
495 if (L->
contains(User) && !Exclude.count(User)) {
496 Queue.push_back(User);
502 OIE = I->
op_end(); OI != OIE; ++OI) {
504 if (Op->hasOneUse() && L->
contains(Op) && !Exclude.count(Op) &&
513 void LoopReroll::collectInLoopUserSet(
Loop *L,
514 const SmallInstructionVector &Roots,
515 const SmallInstructionSet &Exclude,
516 const SmallInstructionSet &Final,
518 for (SmallInstructionVector::const_iterator I = Roots.begin(),
519 IE = Roots.end(); I !=
IE; ++
I)
520 collectInLoopUserSet(L, *I, Exclude, Final, Users);
525 return LI->isSimple();
526 if (
StoreInst *SI = dyn_cast<StoreInst>(I))
527 return SI->isSimple();
529 return !
MI->isVolatile();
550 bool LoopReroll::findScaleFromMul(
Instruction *RealIV, uint64_t &Scale,
552 SmallInstructionVector &LoopIncs) {
561 const SCEVAddRecExpr *RealIVSCEV = cast<SCEVAddRecExpr>(SE->getSCEV(RealIV));
564 if (!SE->isSCEVable(User1->getType()) || !SE->isSCEVable(User2->getType()))
569 dyn_cast<SCEVAddRecExpr>(SE->getSCEV(User2));
570 if (!User1SCEV || !User1SCEV->
isAffine() ||
571 !User2SCEV || !User2SCEV->isAffine())
583 assert(User2SCEV->getStepRecurrence(*SE)->isOne() &&
584 "Invalid non-unit step for multiplicative scaling");
585 LoopIncs.push_back(User2);
592 if (SE->getMulExpr(RealIVSCEV->
getStart(), MulScale) !=
604 DEBUG(
dbgs() <<
"LRR: Found possible scaling " << *User1 <<
"\n");
614 bool LoopReroll::collectAllRoots(
Loop *L, uint64_t Inc, uint64_t Scale,
617 SmallInstructionSet &AllRoots,
618 SmallInstructionVector &LoopIncs) {
620 UIE = IV->
use_end(); UI != UIE; ++UI) {
622 if (!SE->isSCEVable(User->
getType()))
631 if (
const SCEVConstant *Diff = dyn_cast<SCEVConstant>(SE->getMinusSCEV(
632 SE->getSCEV(User), SE->getSCEV(IV)))) {
633 uint64_t Idx = Diff->getValue()->getValue().getZExtValue();
634 if (Idx > 0 && Idx < Scale) {
636 AllRoots.insert(User);
637 }
else if (Idx == Scale && Inc > 1) {
638 LoopIncs.push_back(User);
643 if (Roots[0].empty())
646 for (
unsigned i = 1; i < Scale-1; ++i)
647 if (Roots[i].size() != Roots[0].size()) {
661 bool LoopReroll::ReductionTracker::validateSelected() {
666 int PrevIter = 0, BaseCount = 0, Count = 0;
667 for (SimpleLoopReduction::iterator J = PossibleReds[i].
begin(),
668 JE = PossibleReds[i].
end(); J != JE; ++J) {
672 int Iter = PossibleRedIter[*J];
673 if (Iter != PrevIter && Iter != PrevIter + 1 &&
674 !PossibleReds[i].getReducedValue()->isAssociative()) {
675 DEBUG(
dbgs() <<
"LRR: Out-of-order non-associative reduction: " <<
680 if (Iter != PrevIter) {
681 if (Count != BaseCount) {
682 DEBUG(
dbgs() <<
"LRR: Iteration " << PrevIter <<
683 " reduction use count " << Count <<
684 " is not equal to the base use count " <<
707 void LoopReroll::ReductionTracker::replaceSelected() {
714 for (
int e = PossibleReds[i].size(); j != e; ++j)
715 if (PossibleRedIter[PossibleReds[i][j]] != 0) {
721 SmallInstructionVector
Users;
723 PossibleReds[i].getReducedValue()->use_begin(),
724 UIE = PossibleReds[i].getReducedValue()->use_end(); UI != UIE; ++UI)
725 Users.push_back(cast<Instruction>(*UI));
727 for (SmallInstructionVector::iterator J = Users.begin(),
728 JE = Users.end(); J != JE; ++J)
729 (*J)->replaceUsesOfWith(PossibleReds[i].getReducedValue(),
778 const SCEV *IterCount,
779 ReductionTracker &Reductions) {
780 const SCEVAddRecExpr *RealIVSCEV = cast<SCEVAddRecExpr>(SE->getSCEV(IV));
781 uint64_t Inc = cast<SCEVConstant>(RealIVSCEV->
getOperand(1))->
782 getValue()->getZExtValue();
784 SmallInstructionVector LoopIncs;
785 uint64_t Scale = Inc;
795 if (Inc == 1 && !findScaleFromMul(RealIV, Scale, IV, LoopIncs))
798 assert(Scale <=
MaxInc &&
"Scale is too large");
799 assert(Scale > 1 &&
"Scale must be at least 2");
803 SmallInstructionSet AllRoots;
804 if (!collectAllRoots(L, Inc, Scale, IV, Roots, AllRoots, LoopIncs))
807 DEBUG(
dbgs() <<
"LRR: Found all root induction increments for: " <<
815 SmallInstructionSet PossibleRedSet;
816 SmallInstructionSet PossibleRedLastSet, PossibleRedPHISet;
817 Reductions.restrictToScale(Scale, PossibleRedSet, PossibleRedPHISet,
828 SmallInstructionSet Exclude(AllRoots);
829 Exclude.insert(LoopIncs.begin(), LoopIncs.end());
832 collectInLoopUserSet(L, IV, Exclude, PossibleRedSet, BaseUseSet);
835 std::vector<DenseSet<Instruction *> > RootUseSets(Scale-1);
837 bool MatchFailed =
false;
838 for (
unsigned i = 0; i < Scale-1 && !MatchFailed; ++i) {
840 collectInLoopUserSet(L, Roots[i], SmallInstructionSet(),
841 PossibleRedSet, RootUseSet);
843 DEBUG(
dbgs() <<
"LRR: base use set size: " << BaseUseSet.
size() <<
844 " vs. iteration increment " << (i+1) <<
845 " use set size: " << RootUseSet.
size() <<
"\n");
847 if (BaseUseSet.
size() != RootUseSet.
size()) {
856 bool FutureSideEffects =
false;
862 assert(L->
getNumBlocks() == 1 &&
"Cannot handle multi-block loops");
864 JE = Header->
end(); J1 != JE && !MatchFailed; ++J1) {
865 if (cast<Instruction>(J1) == RealIV)
867 if (cast<Instruction>(J1) == IV)
869 if (!BaseUseSet.
count(J1))
871 if (PossibleRedPHISet.count(J1))
874 while (J2 != JE && (!RootUseSet.
count(J2) ||
875 std::find(Roots[i].
begin(), Roots[i].
end(), J2) !=
881 if (!isa<PHINode>(J2) && !BaseUseSet.
count(J2) &&
882 !AllRootUses.
count(J2)) {
891 FutureSideEffects =
true;
898 DEBUG(
dbgs() <<
"LRR: iteration root match failed at " << *J1 <<
899 " vs. " << *J2 <<
"\n");
907 if (BaseUseSet.
count(J2) || AllRootUses.
count(J2)) {
908 DEBUG(
dbgs() <<
"LRR: iteration root match failed at " << *J1 <<
909 " vs. " << *J2 <<
" (prev. case overlap)\n");
919 K != KE && !MatchFailed; ++K) {
920 if (K->aliasesUnknownInst(J2, *AA)) {
921 DEBUG(
dbgs() <<
"LRR: iteration root match failed at " << *J1 <<
922 " vs. " << *J2 <<
" (depends on future store)\n");
933 if (FutureSideEffects &&
936 DEBUG(
dbgs() <<
"LRR: iteration root match failed at " << *J1 <<
938 " (side effects prevent reordering)\n");
953 bool InReduction = Reductions.isPairInSame(J1, J2);
956 bool Swapped =
false, SomeOpMatched =
false;;
957 for (
unsigned j = 0; j < J1->
getNumOperands() && !MatchFailed; ++j) {
964 if (
Instruction *Op2I = dyn_cast<Instruction>(Op2))
965 if (Reductions.isPairInSame(J2, Op2I))
969 if (BMI != BaseMap.
end())
971 else if (std::find(Roots[i].
begin(), Roots[i].
end(),
975 if (J1->
getOperand(Swapped ?
unsigned(!j) : j) != Op2) {
985 DEBUG(
dbgs() <<
"LRR: iteration root match failed at " << *J1 <<
986 " vs. " << *J2 <<
" (operand " << j <<
")\n");
992 SomeOpMatched =
true;
998 DEBUG(
dbgs() <<
"LRR: iteration root match failed at " << *J1 <<
999 " vs. " << *J2 <<
" (uses outside loop)\n");
1005 BaseMap.
insert(std::pair<Value *, Value *>(J2, J1));
1008 Reductions.recordPair(J1, J2, i+1);
1017 DEBUG(
dbgs() <<
"LRR: Matched all iteration increments for " <<
1021 collectInLoopUserSet(L, LoopIncs, SmallInstructionSet(),
1022 SmallInstructionSet(), LoopIncUseSet);
1023 DEBUG(
dbgs() <<
"LRR: Loop increment set size: " <<
1024 LoopIncUseSet.
size() <<
"\n");
1030 if (isa<DbgInfoIntrinsic>(J))
1032 if (cast<Instruction>(J) == RealIV)
1034 if (cast<Instruction>(J) == IV)
1036 if (BaseUseSet.
count(J) || AllRootUses.
count(J) ||
1041 if (AllRoots.count(J))
1044 if (Reductions.isSelectedPHI(J))
1047 DEBUG(
dbgs() <<
"LRR: aborting reroll based on " << *RealIV <<
1048 " unprocessed instruction found: " << *J <<
"\n");
1056 DEBUG(
dbgs() <<
"LRR: all instructions processed from " <<
1059 if (!Reductions.validateSelected())
1065 Reductions.replaceSelected();
1069 J != Header->
rend();) {
1070 if (AllRootUses.
count(&*J)) {
1072 DEBUG(
dbgs() <<
"LRR: removing: " << *D <<
"\n");
1083 Start = SE->getMulExpr(Start,
1084 SE->getConstant(Start->
getType(), Scale));
1086 cast<SCEVAddRecExpr>(SE->getAddRecExpr(Start,
1087 SE->getConstant(RealIVSCEV->
getType(), 1),
1092 cast<PHINode>(Expander.expandCodeFor(H, IV->
getType(),
1095 JE = BaseUseSet.
end(); J != JE; ++J)
1096 (*J)->replaceUsesOfWith(IV, NewIV);
1099 if (LoopIncUseSet.
count(BI)) {
1103 SE->getMulExpr(ICSCEV, SE->getConstant(ICSCEV->
getType(), Scale));
1105 if (isa<SCEVConstant>(ICSCEV)) {
1106 IC = Expander.expandCodeFor(ICSCEV, NewIV->
getType(), BI);
1112 IC = Expander.expandCodeFor(ICSCEV, NewIV->
getType(),
1119 BI->setCondition(Cond);
1121 if (BI->getSuccessor(1) != Header)
1122 BI->swapSuccessors();
1134 AA = &getAnalysis<AliasAnalysis>();
1135 LI = &getAnalysis<LoopInfo>();
1136 SE = &getAnalysis<ScalarEvolution>();
1137 TLI = &getAnalysis<TargetLibraryInfo>();
1138 DL = getAnalysisIfAvailable<DataLayout>();
1139 DT = &getAnalysis<DominatorTree>();
1142 DEBUG(
dbgs() <<
"LRR: F[" << Header->getParent()->getName() <<
1143 "] Loop %" << Header->getName() <<
" (" <<
1146 bool Changed =
false;
1152 if (!SE->hasLoopInvariantBackedgeTakenCount(L))
1155 const SCEV *LIBETC = SE->getBackedgeTakenCount(L);
1156 const SCEV *IterCount =
1157 SE->getAddExpr(LIBETC, SE->getConstant(LIBETC->
getType(), 1));
1158 DEBUG(
dbgs() <<
"LRR: iteration count = " << *IterCount <<
"\n");
1162 SmallInstructionVector PossibleIVs;
1163 collectPossibleIVs(L, PossibleIVs);
1165 if (PossibleIVs.empty()) {
1166 DEBUG(
dbgs() <<
"LRR: No possible IVs found\n");
1170 ReductionTracker Reductions;
1171 collectPossibleReductions(L, Reductions);
1175 for (SmallInstructionVector::iterator I = PossibleIVs.begin(),
1176 IE = PossibleIVs.end(); I !=
IE; ++
I)
1177 if (reroll(*I, L, Header, IterCount, Reductions)) {
const SCEV * evaluateAtIteration(const SCEV *It, ScalarEvolution &SE) const
void push_back(const T &Elt)
const_iterator end(StringRef path)
Get end iterator over path.
Pass * createLoopRerollPass()
static bool isSimpleLoadStore(Instruction *I)
AnalysisUsage & addPreserved()
static PassRegistry * getPassRegistry()
enable_if_c<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
unsigned getNumOperands() const
const SCEV * getStepRecurrence(ScalarEvolution &SE) const
const_iterator begin(StringRef path)
Get begin iterator over path.
void initializeLoopRerollPass(PassRegistry &)
reverse_iterator rbegin()
iv Induction Variable Users
BlockT * getHeader() const
LoopInfoBase< BlockT, LoopT > * LI
const SCEV * getStart() const
bool uge(uint64_t Num) const
Determine if the value is greater or equal to the given number.
AnalysisUsage & addRequired()
#define INITIALIZE_PASS_DEPENDENCY(depName)
static cl::opt< unsigned > MaxInc("max-reroll-increment", cl::init(2048), cl::Hidden, cl::desc("The maximum increment for loop rerolling"))
BasicBlock * InsertPreheaderForLoop(Loop *L, Pass *P)
const APInt & getValue() const
Return the constant's value.
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
static bool hasUsesOutsideLoop(Instruction *I, Loop *L)
ID
LLVM Calling Convention Representation.
uint64_t getZExtValue() const
Return the zero extended value.
bool mayReadFromMemory() const
bool isAssociative() const
static bool add(uint64_t *dest, const uint64_t *x, const uint64_t *y, unsigned len)
General addition of 64-bit integer arrays.
bool SimplifyInstructionsInBlock(BasicBlock *BB, const DataLayout *TD=0, const TargetLibraryInfo *TLI=0)
InstListType::reverse_iterator reverse_iterator
initializer< Ty > init(const Ty &Val)
friend const_iterator end(StringRef path)
Get end iterator over path.
* if(!EatIfPresent(lltok::kw_thread_local)) return false
BlockT * getLoopPreheader() const
LLVM Basic Block Representation.
const SCEV * getOperand(unsigned i) const
ItTy next(ItTy it, Dist n)
bool contains(const LoopT *L) const
Represent an integer comparison operator.
const SCEVAddRecExpr * getPostIncExpr(ScalarEvolution &SE) const
Value * getOperand(unsigned i) const
bool isCommutative() const
#define INITIALIZE_AG_DEPENDENCY(depName)
bool mayWriteToMemory() const
bool isTerminator() const
bool isSafeToSpeculativelyExecute(const Value *V, const DataLayout *TD=0)
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
bool count(const ValueT &V) const
std::pair< iterator, bool > insert(const ValueT &V)
Class for constant integers.
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
raw_ostream & dbgs()
dbgs - Return a circular-buffered debug stream.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Value * getIncomingValueForBlock(const BasicBlock *BB) const
unsigned getNumBlocks() const
getNumBlocks - Get the number of blocks in this loop in constant time.
TerminatorInst * getTerminator()
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
LLVM Value Representation.
bool DeleteDeadPHIs(BasicBlock *BB, const TargetLibraryInfo *TLI=0)
STATISTIC(NumRerolledLoops,"Number of rerolled loops")
bool isSameOperationAs(const Instruction *I, unsigned flags=0) const
Determine if one instruction is the same operation as another.
iterator getFirstInsertionPt()
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
unsigned getNumUses() const
iterator find(const KeyT &Val)