LLVM API Documentation

 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
LoopUnrollRuntime.cpp
Go to the documentation of this file.
1 //===-- UnrollLoopRuntime.cpp - Runtime Loop unrolling utilities ----------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file implements some loop unrolling utilities for loops with run-time
11 // trip counts. See LoopUnroll.cpp for unrolling loops with compile-time
12 // trip counts.
13 //
14 // The functions in this file are used to generate extra code when the
15 // run-time trip count modulo the unroll factor is not 0. When this is the
16 // case, we need to generate code to execute these 'left over' iterations.
17 //
18 // The current strategy generates an if-then-else sequence prior to the
19 // unrolled loop to execute the 'left over' iterations. Other strategies
20 // include generate a loop before or after the unrolled loop.
21 //
22 //===----------------------------------------------------------------------===//
23 
24 #define DEBUG_TYPE "loop-unroll"
26 #include "llvm/ADT/Statistic.h"
28 #include "llvm/Analysis/LoopPass.h"
31 #include "llvm/IR/BasicBlock.h"
32 #include "llvm/Support/Debug.h"
36 #include <algorithm>
37 
38 using namespace llvm;
39 
40 STATISTIC(NumRuntimeUnrolled,
41  "Number of loops unrolled with run-time trip counts");
42 
43 /// Connect the unrolling prolog code to the original loop.
44 /// The unrolling prolog code contains code to execute the
45 /// 'extra' iterations if the run-time trip count modulo the
46 /// unroll count is non-zero.
47 ///
48 /// This function performs the following:
49 /// - Create PHI nodes at prolog end block to combine values
50 /// that exit the prolog code and jump around the prolog.
51 /// - Add a PHI operand to a PHI node at the loop exit block
52 /// for values that exit the prolog and go around the loop.
53 /// - Branch around the original loop if the trip count is less
54 /// than the unroll factor.
55 ///
56 static void ConnectProlog(Loop *L, Value *TripCount, unsigned Count,
57  BasicBlock *LastPrologBB, BasicBlock *PrologEnd,
58  BasicBlock *OrigPH, BasicBlock *NewPH,
59  ValueToValueMapTy &LVMap, Pass *P) {
60  BasicBlock *Latch = L->getLoopLatch();
61  assert(Latch != 0 && "Loop must have a latch");
62 
63  // Create a PHI node for each outgoing value from the original loop
64  // (which means it is an outgoing value from the prolog code too).
65  // The new PHI node is inserted in the prolog end basic block.
66  // The new PHI name is added as an operand of a PHI node in either
67  // the loop header or the loop exit block.
68  for (succ_iterator SBI = succ_begin(Latch), SBE = succ_end(Latch);
69  SBI != SBE; ++SBI) {
70  for (BasicBlock::iterator BBI = (*SBI)->begin();
71  PHINode *PN = dyn_cast<PHINode>(BBI); ++BBI) {
72 
73  // Add a new PHI node to the prolog end block and add the
74  // appropriate incoming values.
75  PHINode *NewPN = PHINode::Create(PN->getType(), 2, PN->getName()+".unr",
76  PrologEnd->getTerminator());
77  // Adding a value to the new PHI node from the original loop preheader.
78  // This is the value that skips all the prolog code.
79  if (L->contains(PN)) {
80  NewPN->addIncoming(PN->getIncomingValueForBlock(NewPH), OrigPH);
81  } else {
82  NewPN->addIncoming(Constant::getNullValue(PN->getType()), OrigPH);
83  }
84 
85  Value *V = PN->getIncomingValueForBlock(Latch);
86  if (Instruction *I = dyn_cast<Instruction>(V)) {
87  if (L->contains(I)) {
88  V = LVMap[I];
89  }
90  }
91  // Adding a value to the new PHI node from the last prolog block
92  // that was created.
93  NewPN->addIncoming(V, LastPrologBB);
94 
95  // Update the existing PHI node operand with the value from the
96  // new PHI node. How this is done depends on if the existing
97  // PHI node is in the original loop block, or the exit block.
98  if (L->contains(PN)) {
99  PN->setIncomingValue(PN->getBasicBlockIndex(NewPH), NewPN);
100  } else {
101  PN->addIncoming(NewPN, PrologEnd);
102  }
103  }
104  }
105 
106  // Create a branch around the orignal loop, which is taken if the
107  // trip count is less than the unroll factor.
108  Instruction *InsertPt = PrologEnd->getTerminator();
109  Instruction *BrLoopExit =
110  new ICmpInst(InsertPt, ICmpInst::ICMP_ULT, TripCount,
111  ConstantInt::get(TripCount->getType(), Count));
112  BasicBlock *Exit = L->getUniqueExitBlock();
113  assert(Exit != 0 && "Loop must have a single exit block only");
114  // Split the exit to maintain loop canonicalization guarantees
115  SmallVector<BasicBlock*, 4> Preds(pred_begin(Exit), pred_end(Exit));
116  if (!Exit->isLandingPad()) {
117  SplitBlockPredecessors(Exit, Preds, ".unr-lcssa", P);
118  } else {
120  SplitLandingPadPredecessors(Exit, Preds, ".unr1-lcssa", ".unr2-lcssa",
121  P, NewBBs);
122  }
123  // Add the branch to the exit block (around the unrolled loop)
124  BranchInst::Create(Exit, NewPH, BrLoopExit, InsertPt);
125  InsertPt->eraseFromParent();
126 }
127 
128 /// Create a clone of the blocks in a loop and connect them together.
129 /// This function doesn't create a clone of the loop structure.
130 ///
131 /// There are two value maps that are defined and used. VMap is
132 /// for the values in the current loop instance. LVMap contains
133 /// the values from the last loop instance. We need the LVMap values
134 /// to update the initial values for the current loop instance.
135 ///
136 static void CloneLoopBlocks(Loop *L,
137  bool FirstCopy,
138  BasicBlock *InsertTop,
139  BasicBlock *InsertBot,
140  std::vector<BasicBlock *> &NewBlocks,
141  LoopBlocksDFS &LoopBlocks,
142  ValueToValueMapTy &VMap,
143  ValueToValueMapTy &LVMap,
144  LoopInfo *LI) {
145 
146  BasicBlock *Preheader = L->getLoopPreheader();
147  BasicBlock *Header = L->getHeader();
148  BasicBlock *Latch = L->getLoopLatch();
149  Function *F = Header->getParent();
150  LoopBlocksDFS::RPOIterator BlockBegin = LoopBlocks.beginRPO();
151  LoopBlocksDFS::RPOIterator BlockEnd = LoopBlocks.endRPO();
152  // For each block in the original loop, create a new copy,
153  // and update the value map with the newly created values.
154  for (LoopBlocksDFS::RPOIterator BB = BlockBegin; BB != BlockEnd; ++BB) {
155  BasicBlock *NewBB = CloneBasicBlock(*BB, VMap, ".unr", F);
156  NewBlocks.push_back(NewBB);
157 
158  if (Loop *ParentLoop = L->getParentLoop())
159  ParentLoop->addBasicBlockToLoop(NewBB, LI->getBase());
160 
161  VMap[*BB] = NewBB;
162  if (Header == *BB) {
163  // For the first block, add a CFG connection to this newly
164  // created block
165  InsertTop->getTerminator()->setSuccessor(0, NewBB);
166 
167  // Change the incoming values to the ones defined in the
168  // previously cloned loop.
169  for (BasicBlock::iterator I = Header->begin(); isa<PHINode>(I); ++I) {
170  PHINode *NewPHI = cast<PHINode>(VMap[I]);
171  if (FirstCopy) {
172  // We replace the first phi node with the value from the preheader
173  VMap[I] = NewPHI->getIncomingValueForBlock(Preheader);
174  NewBB->getInstList().erase(NewPHI);
175  } else {
176  // Update VMap with values from the previous block
177  unsigned idx = NewPHI->getBasicBlockIndex(Latch);
178  Value *InVal = NewPHI->getIncomingValue(idx);
179  if (Instruction *I = dyn_cast<Instruction>(InVal))
180  if (L->contains(I))
181  InVal = LVMap[InVal];
182  NewPHI->setIncomingValue(idx, InVal);
183  NewPHI->setIncomingBlock(idx, InsertTop);
184  }
185  }
186  }
187 
188  if (Latch == *BB) {
189  VMap.erase((*BB)->getTerminator());
190  NewBB->getTerminator()->eraseFromParent();
191  BranchInst::Create(InsertBot, NewBB);
192  }
193  }
194  // LastValueMap is updated with the values for the current loop
195  // which are used the next time this function is called.
196  for (ValueToValueMapTy::iterator VI = VMap.begin(), VE = VMap.end();
197  VI != VE; ++VI) {
198  LVMap[VI->first] = VI->second;
199  }
200 }
201 
202 /// Insert code in the prolog code when unrolling a loop with a
203 /// run-time trip-count.
204 ///
205 /// This method assumes that the loop unroll factor is total number
206 /// of loop bodes in the loop after unrolling. (Some folks refer
207 /// to the unroll factor as the number of *extra* copies added).
208 /// We assume also that the loop unroll factor is a power-of-two. So, after
209 /// unrolling the loop, the number of loop bodies executed is 2,
210 /// 4, 8, etc. Note - LLVM converts the if-then-sequence to a switch
211 /// instruction in SimplifyCFG.cpp. Then, the backend decides how code for
212 /// the switch instruction is generated.
213 ///
214 /// extraiters = tripcount % loopfactor
215 /// if (extraiters == 0) jump Loop:
216 /// if (extraiters == loopfactor) jump L1
217 /// if (extraiters == loopfactor-1) jump L2
218 /// ...
219 /// L1: LoopBody;
220 /// L2: LoopBody;
221 /// ...
222 /// if tripcount < loopfactor jump End
223 /// Loop:
224 /// ...
225 /// End:
226 ///
227 bool llvm::UnrollRuntimeLoopProlog(Loop *L, unsigned Count, LoopInfo *LI,
228  LPPassManager *LPM) {
229  // for now, only unroll loops that contain a single exit
230  if (!L->getExitingBlock())
231  return false;
232 
233  // Make sure the loop is in canonical form, and there is a single
234  // exit block only.
235  if (!L->isLoopSimplifyForm() || L->getUniqueExitBlock() == 0)
236  return false;
237 
238  // Use Scalar Evolution to compute the trip count. This allows more
239  // loops to be unrolled than relying on induction var simplification
240  if (!LPM)
241  return false;
243  if (SE == 0)
244  return false;
245 
246  // Only unroll loops with a computable trip count and the trip count needs
247  // to be an int value (allowing a pointer type is a TODO item)
248  const SCEV *BECount = SE->getBackedgeTakenCount(L);
249  if (isa<SCEVCouldNotCompute>(BECount) || !BECount->getType()->isIntegerTy())
250  return false;
251 
252  // Add 1 since the backedge count doesn't include the first loop iteration
253  const SCEV *TripCountSC =
254  SE->getAddExpr(BECount, SE->getConstant(BECount->getType(), 1));
255  if (isa<SCEVCouldNotCompute>(TripCountSC))
256  return false;
257 
258  // We only handle cases when the unroll factor is a power of 2.
259  // Count is the loop unroll factor, the number of extra copies added + 1.
260  if ((Count & (Count-1)) != 0)
261  return false;
262 
263  // If this loop is nested, then the loop unroller changes the code in
264  // parent loop, so the Scalar Evolution pass needs to be run again
265  if (Loop *ParentLoop = L->getParentLoop())
266  SE->forgetLoop(ParentLoop);
267 
268  BasicBlock *PH = L->getLoopPreheader();
269  BasicBlock *Header = L->getHeader();
270  BasicBlock *Latch = L->getLoopLatch();
271  // It helps to splits the original preheader twice, one for the end of the
272  // prolog code and one for a new loop preheader
273  BasicBlock *PEnd = SplitEdge(PH, Header, LPM->getAsPass());
274  BasicBlock *NewPH = SplitBlock(PEnd, PEnd->getTerminator(), LPM->getAsPass());
275  BranchInst *PreHeaderBR = cast<BranchInst>(PH->getTerminator());
276 
277  // Compute the number of extra iterations required, which is:
278  // extra iterations = run-time trip count % (loop unroll factor + 1)
279  SCEVExpander Expander(*SE, "loop-unroll");
280  Value *TripCount = Expander.expandCodeFor(TripCountSC, TripCountSC->getType(),
281  PreHeaderBR);
282  Type *CountTy = TripCount->getType();
283  BinaryOperator *ModVal =
284  BinaryOperator::CreateURem(TripCount,
285  ConstantInt::get(CountTy, Count),
286  "xtraiter");
287  ModVal->insertBefore(PreHeaderBR);
288 
289  // Check if for no extra iterations, then jump to unrolled loop
290  Value *BranchVal = new ICmpInst(PreHeaderBR,
291  ICmpInst::ICMP_NE, ModVal,
292  ConstantInt::get(CountTy, 0), "lcmp");
293  // Branch to either the extra iterations or the unrolled loop
294  // We will fix up the true branch label when adding loop body copies
295  BranchInst::Create(PEnd, PEnd, BranchVal, PreHeaderBR);
296  assert(PreHeaderBR->isUnconditional() &&
297  PreHeaderBR->getSuccessor(0) == PEnd &&
298  "CFG edges in Preheader are not correct");
299  PreHeaderBR->eraseFromParent();
300 
301  ValueToValueMapTy LVMap;
302  Function *F = Header->getParent();
303  // These variables are used to update the CFG links in each iteration
304  BasicBlock *CompareBB = 0;
305  BasicBlock *LastLoopBB = PH;
306  // Get an ordered list of blocks in the loop to help with the ordering of the
307  // cloned blocks in the prolog code
308  LoopBlocksDFS LoopBlocks(L);
309  LoopBlocks.perform(LI);
310 
311  //
312  // For each extra loop iteration, create a copy of the loop's basic blocks
313  // and generate a condition that branches to the copy depending on the
314  // number of 'left over' iterations.
315  //
316  for (unsigned leftOverIters = Count-1; leftOverIters > 0; --leftOverIters) {
317  std::vector<BasicBlock*> NewBlocks;
318  ValueToValueMapTy VMap;
319 
320  // Clone all the basic blocks in the loop, but we don't clone the loop
321  // This function adds the appropriate CFG connections.
322  CloneLoopBlocks(L, (leftOverIters == Count-1), LastLoopBB, PEnd, NewBlocks,
323  LoopBlocks, VMap, LVMap, LI);
324  LastLoopBB = cast<BasicBlock>(VMap[Latch]);
325 
326  // Insert the cloned blocks into function just before the original loop
328  NewBlocks[0], F->end());
329 
330  // Generate the code for the comparison which determines if the loop
331  // prolog code needs to be executed.
332  if (leftOverIters == Count-1) {
333  // There is no compare block for the fall-thru case when for the last
334  // left over iteration
335  CompareBB = NewBlocks[0];
336  } else {
337  // Create a new block for the comparison
338  BasicBlock *NewBB = BasicBlock::Create(CompareBB->getContext(), "unr.cmp",
339  F, CompareBB);
340  if (Loop *ParentLoop = L->getParentLoop()) {
341  // Add the new block to the parent loop, if needed
342  ParentLoop->addBasicBlockToLoop(NewBB, LI->getBase());
343  }
344 
345  // The comparison w/ the extra iteration value and branch
346  Value *BranchVal = new ICmpInst(*NewBB, ICmpInst::ICMP_EQ, ModVal,
347  ConstantInt::get(CountTy, leftOverIters),
348  "un.tmp");
349  // Branch to either the extra iterations or the unrolled loop
350  BranchInst::Create(NewBlocks[0], CompareBB,
351  BranchVal, NewBB);
352  CompareBB = NewBB;
353  PH->getTerminator()->setSuccessor(0, NewBB);
354  VMap[NewPH] = CompareBB;
355  }
356 
357  // Rewrite the cloned instruction operands to use the values
358  // created when the clone is created.
359  for (unsigned i = 0, e = NewBlocks.size(); i != e; ++i) {
360  for (BasicBlock::iterator I = NewBlocks[i]->begin(),
361  E = NewBlocks[i]->end(); I != E; ++I) {
362  RemapInstruction(I, VMap,
364  }
365  }
366  }
367 
368  // Connect the prolog code to the original loop and update the
369  // PHI functions.
370  ConnectProlog(L, TripCount, Count, LastLoopBB, PEnd, PH, NewPH, LVMap,
371  LPM->getAsPass());
372  NumRuntimeUnrolled++;
373  return true;
374 }
BasicBlock * getUniqueExitBlock() const
Definition: LoopInfo.cpp:402
BasicBlock * SplitEdge(BasicBlock *From, BasicBlock *To, Pass *P)
void addIncoming(Value *V, BasicBlock *BB)
const SCEV * getConstant(ConstantInt *V)
iterator end()
Definition: Function.h:397
enable_if_c<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:266
unsigned less than
Definition: InstrTypes.h:676
const_iterator begin(StringRef path)
Get begin iterator over path.
Definition: Path.cpp:173
LoopT * getParentLoop() const
Definition: LoopInfo.h:96
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:116
F(f)
void RemapInstruction(Instruction *I, ValueToValueMapTy &VM, RemapFlags Flags=RF_None, ValueMapTypeRemapper *TypeMapper=0, ValueMaterializer *Materializer=0)
BlockT * getHeader() const
Definition: LoopInfo.h:95
LoopInfoBase< BlockT, LoopT > * LI
Definition: LoopInfoImpl.h:411
static Constant * getNullValue(Type *Ty)
Definition: Constants.cpp:111
StringRef getName() const
Definition: Value.cpp:167
BlockT * getLoopLatch() const
Definition: LoopInfoImpl.h:154
iterator begin()
Definition: BasicBlock.h:193
AnalysisType * getAnalysisIfAvailable() const
static BranchInst * Create(BasicBlock *IfTrue, Instruction *InsertBefore=0)
bool isLoopSimplifyForm() const
Definition: LoopInfo.cpp:207
Interval::succ_iterator succ_begin(Interval *I)
Definition: Interval.h:107
BasicBlock * SplitBlockPredecessors(BasicBlock *BB, ArrayRef< BasicBlock * > Preds, const char *Suffix, Pass *P=0)
void setSuccessor(unsigned idx, BasicBlock *B)
Definition: InstrTypes.h:71
void perform(LoopInfo *LI)
Traverse the loop blocks and store the DFS result.
Definition: LoopInfo.cpp:722
Interval::succ_iterator succ_end(Interval *I)
Definition: Interval.h:110
#define P(N)
BlockT * getLoopPreheader() const
Definition: LoopInfoImpl.h:106
void insertBefore(Instruction *InsertPos)
Definition: Instruction.cpp:78
LLVM Basic Block Representation.
Definition: BasicBlock.h:72
Type * getType() const
static void ConnectProlog(Loop *L, Value *TripCount, unsigned Count, BasicBlock *LastPrologBB, BasicBlock *PrologEnd, BasicBlock *OrigPH, BasicBlock *NewPH, ValueToValueMapTy &LVMap, Pass *P)
Interval::pred_iterator pred_begin(Interval *I)
Definition: Interval.h:117
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", Instruction *InsertBefore=0)
bool contains(const LoopT *L) const
Definition: LoopInfo.h:104
const InstListType & getInstList() const
Return the underlying instruction list container.
Definition: BasicBlock.h:214
Represent an integer comparison operator.
Definition: Instructions.h:911
std::vector< BasicBlock * >::const_reverse_iterator RPOIterator
Definition: LoopIterator.h:42
BlockT * getExitingBlock() const
Definition: LoopInfoImpl.h:49
Interval::pred_iterator pred_end(Interval *I)
Definition: Interval.h:120
static void CloneLoopBlocks(Loop *L, bool FirstCopy, BasicBlock *InsertTop, BasicBlock *InsertBot, std::vector< BasicBlock * > &NewBlocks, LoopBlocksDFS &LoopBlocks, ValueToValueMapTy &VMap, ValueToValueMapTy &LVMap, LoopInfo *LI)
iterator erase(iterator where)
Definition: ilist.h:465
iterator end()
Definition: ValueMap.h:99
const BasicBlockListType & getBasicBlockList() const
Definition: Function.h:374
See the file comment.
Definition: ValueMap.h:75
void setIncomingBlock(unsigned i, BasicBlock *BB)
Value * getIncomingValue(unsigned i) const
STATISTIC(NumRuntimeUnrolled,"Number of loops unrolled with run-time trip counts")
Type * getType() const
Definition: Value.h:111
static Constant * get(Type *Ty, uint64_t V, bool isSigned=false)
Definition: Constants.cpp:492
virtual Pass * getAsPass()
Definition: LoopPass.h:104
void splice(iterator where, iplist &L2)
Definition: ilist.h:570
BasicBlock * SplitBlock(BasicBlock *Old, Instruction *SplitPt, Pass *P)
Value * getIncomingValueForBlock(const BasicBlock *BB) const
bool isIntegerTy() const
Definition: Type.h:196
const SCEV * getAddExpr(SmallVectorImpl< const SCEV * > &Ops, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap)
bool UnrollRuntimeLoopProlog(Loop *L, unsigned Count, LoopInfo *LI, LPPassManager *LPM)
void forgetLoop(const Loop *L)
BasicBlock * CloneBasicBlock(const BasicBlock *BB, ValueToValueMapTy &VMap, const Twine &NameSuffix="", Function *F=0, ClonedCodeInfo *CodeInfo=0)
#define I(x, y, z)
Definition: MD5.cpp:54
TerminatorInst * getTerminator()
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.cpp:120
bool isLandingPad() const
Return true if this basic block is a landing pad.
Definition: BasicBlock.cpp:360
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=0, BasicBlock *InsertBefore=0)
Creates a new BasicBlock.
Definition: BasicBlock.h:109
RPOIterator beginRPO() const
Reverse iterate over the cached postorder blocks.
Definition: LoopIterator.h:77
const SCEV * getBackedgeTakenCount(const Loop *L)
LLVMContext & getContext() const
Get the context in which this basic block lives.
Definition: BasicBlock.cpp:33
LLVM Value Representation.
Definition: Value.h:66
iterator end() const
Definition: StringRef.h:99
void SplitLandingPadPredecessors(BasicBlock *OrigBB, ArrayRef< BasicBlock * > Preds, const char *Suffix, const char *Suffix2, Pass *P, SmallVectorImpl< BasicBlock * > &NewBBs)
void setIncomingValue(unsigned i, Value *V)
int getBasicBlockIndex(const BasicBlock *BB) const
LoopInfoBase< BasicBlock, Loop > & getBase()
Definition: LoopInfo.h:602
iterator begin()
Definition: ValueMap.h:98
RPOIterator endRPO() const
Definition: LoopIterator.h:81
bool erase(const KeyT &Val)
Definition: ValueMap.h:147