LLVM API Documentation

 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
CodeGenPrepare.cpp
Go to the documentation of this file.
1 //===- CodeGenPrepare.cpp - Prepare a function for code generation --------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This pass munges the code in the input function to better prepare it for
11 // SelectionDAG-based code generation. This works around limitations in it's
12 // basic-block-at-a-time approach. It should eventually be removed.
13 //
14 //===----------------------------------------------------------------------===//
15 
16 #define DEBUG_TYPE "codegenprepare"
17 #include "llvm/Transforms/Scalar.h"
18 #include "llvm/ADT/DenseMap.h"
19 #include "llvm/ADT/SmallSet.h"
20 #include "llvm/ADT/Statistic.h"
21 #include "llvm/ADT/ValueMap.h"
25 #include "llvm/Assembly/Writer.h"
26 #include "llvm/IR/Constants.h"
27 #include "llvm/IR/DataLayout.h"
28 #include "llvm/IR/DerivedTypes.h"
29 #include "llvm/IR/Function.h"
30 #include "llvm/IR/IRBuilder.h"
31 #include "llvm/IR/InlineAsm.h"
32 #include "llvm/IR/Instructions.h"
33 #include "llvm/IR/IntrinsicInst.h"
34 #include "llvm/Pass.h"
35 #include "llvm/Support/CallSite.h"
37 #include "llvm/Support/Debug.h"
48 using namespace llvm;
49 using namespace llvm::PatternMatch;
50 
51 STATISTIC(NumBlocksElim, "Number of blocks eliminated");
52 STATISTIC(NumPHIsElim, "Number of trivial PHIs eliminated");
53 STATISTIC(NumGEPsElim, "Number of GEPs converted to casts");
54 STATISTIC(NumCmpUses, "Number of uses of Cmp expressions replaced with uses of "
55  "sunken Cmps");
56 STATISTIC(NumCastUses, "Number of uses of Cast expressions replaced with uses "
57  "of sunken Casts");
58 STATISTIC(NumMemoryInsts, "Number of memory instructions whose address "
59  "computations were sunk");
60 STATISTIC(NumExtsMoved, "Number of [s|z]ext instructions combined with loads");
61 STATISTIC(NumExtUses, "Number of uses of [s|z]ext instructions optimized");
62 STATISTIC(NumRetsDup, "Number of return instructions duplicated");
63 STATISTIC(NumDbgValueMoved, "Number of debug value instructions moved");
64 STATISTIC(NumSelectsExpanded, "Number of selects turned into branches");
65 
67  "disable-cgp-branch-opts", cl::Hidden, cl::init(false),
68  cl::desc("Disable branch optimizations in CodeGenPrepare"));
69 
71  "disable-cgp-select2branch", cl::Hidden, cl::init(false),
72  cl::desc("Disable select to branch conversion."));
73 
74 namespace {
75  class CodeGenPrepare : public FunctionPass {
76  /// TLI - Keep a pointer of a TargetLowering to consult for determining
77  /// transformation profitability.
78  const TargetMachine *TM;
79  const TargetLowering *TLI;
80  const TargetLibraryInfo *TLInfo;
81  DominatorTree *DT;
82 
83  /// CurInstIterator - As we scan instructions optimizing them, this is the
84  /// next instruction to optimize. Xforms that can invalidate this should
85  /// update it.
86  BasicBlock::iterator CurInstIterator;
87 
88  /// Keeps track of non-local addresses that have been sunk into a block.
89  /// This allows us to avoid inserting duplicate code for blocks with
90  /// multiple load/stores of the same address.
91  ValueMap<Value*, Value*> SunkAddrs;
92 
93  /// ModifiedDT - If CFG is modified in anyway, dominator tree may need to
94  /// be updated.
95  bool ModifiedDT;
96 
97  /// OptSize - True if optimizing for size.
98  bool OptSize;
99 
100  public:
101  static char ID; // Pass identification, replacement for typeid
102  explicit CodeGenPrepare(const TargetMachine *TM = 0)
103  : FunctionPass(ID), TM(TM), TLI(0) {
105  }
106  bool runOnFunction(Function &F);
107 
108  const char *getPassName() const { return "CodeGen Prepare"; }
109 
110  virtual void getAnalysisUsage(AnalysisUsage &AU) const {
113  }
114 
115  private:
116  bool EliminateFallThrough(Function &F);
117  bool EliminateMostlyEmptyBlocks(Function &F);
118  bool CanMergeBlocks(const BasicBlock *BB, const BasicBlock *DestBB) const;
119  void EliminateMostlyEmptyBlock(BasicBlock *BB);
120  bool OptimizeBlock(BasicBlock &BB);
121  bool OptimizeInst(Instruction *I);
122  bool OptimizeMemoryInst(Instruction *I, Value *Addr, Type *AccessTy);
123  bool OptimizeInlineAsmInst(CallInst *CS);
124  bool OptimizeCallInst(CallInst *CI);
125  bool MoveExtToFormExtLoad(Instruction *I);
126  bool OptimizeExtUses(Instruction *I);
127  bool OptimizeSelectInst(SelectInst *SI);
128  bool DupRetToEnableTailCallOpts(BasicBlock *BB);
129  bool PlaceDbgValues(Function &F);
130  };
131 }
132 
133 char CodeGenPrepare::ID = 0;
134 INITIALIZE_PASS_BEGIN(CodeGenPrepare, "codegenprepare",
135  "Optimize for code generation", false, false)
138  "Optimize for code generation", false, false)
139 
141  return new CodeGenPrepare(TM);
142 }
143 
144 bool CodeGenPrepare::runOnFunction(Function &F) {
145  bool EverMadeChange = false;
146 
147  ModifiedDT = false;
148  if (TM) TLI = TM->getTargetLowering();
149  TLInfo = &getAnalysis<TargetLibraryInfo>();
150  DT = getAnalysisIfAvailable<DominatorTree>();
151  OptSize = F.getAttributes().hasAttribute(AttributeSet::FunctionIndex,
153 
154  /// This optimization identifies DIV instructions that can be
155  /// profitably bypassed and carried out with a shorter, faster divide.
156  if (!OptSize && TLI && TLI->isSlowDivBypassed()) {
157  const DenseMap<unsigned int, unsigned int> &BypassWidths =
158  TLI->getBypassSlowDivWidths();
159  for (Function::iterator I = F.begin(); I != F.end(); I++)
160  EverMadeChange |= bypassSlowDivision(F, I, BypassWidths);
161  }
162 
163  // Eliminate blocks that contain only PHI nodes and an
164  // unconditional branch.
165  EverMadeChange |= EliminateMostlyEmptyBlocks(F);
166 
167  // llvm.dbg.value is far away from the value then iSel may not be able
168  // handle it properly. iSel will drop llvm.dbg.value if it can not
169  // find a node corresponding to the value.
170  EverMadeChange |= PlaceDbgValues(F);
171 
172  bool MadeChange = true;
173  while (MadeChange) {
174  MadeChange = false;
175  for (Function::iterator I = F.begin(); I != F.end(); ) {
176  BasicBlock *BB = I++;
177  MadeChange |= OptimizeBlock(*BB);
178  }
179  EverMadeChange |= MadeChange;
180  }
181 
182  SunkAddrs.clear();
183 
184  if (!DisableBranchOpts) {
185  MadeChange = false;
187  for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) {
188  SmallVector<BasicBlock*, 2> Successors(succ_begin(BB), succ_end(BB));
189  MadeChange |= ConstantFoldTerminator(BB, true);
190  if (!MadeChange) continue;
191 
193  II = Successors.begin(), IE = Successors.end(); II != IE; ++II)
194  if (pred_begin(*II) == pred_end(*II))
195  WorkList.insert(*II);
196  }
197 
198  // Delete the dead blocks and any of their dead successors.
199  MadeChange |= !WorkList.empty();
200  while (!WorkList.empty()) {
201  BasicBlock *BB = *WorkList.begin();
202  WorkList.erase(BB);
203  SmallVector<BasicBlock*, 2> Successors(succ_begin(BB), succ_end(BB));
204 
205  DeleteDeadBlock(BB);
206 
208  II = Successors.begin(), IE = Successors.end(); II != IE; ++II)
209  if (pred_begin(*II) == pred_end(*II))
210  WorkList.insert(*II);
211  }
212 
213  // Merge pairs of basic blocks with unconditional branches, connected by
214  // a single edge.
215  if (EverMadeChange || MadeChange)
216  MadeChange |= EliminateFallThrough(F);
217 
218  if (MadeChange)
219  ModifiedDT = true;
220  EverMadeChange |= MadeChange;
221  }
222 
223  if (ModifiedDT && DT)
224  DT->DT->recalculate(F);
225 
226  return EverMadeChange;
227 }
228 
229 /// EliminateFallThrough - Merge basic blocks which are connected
230 /// by a single edge, where one of the basic blocks has a single successor
231 /// pointing to the other basic block, which has a single predecessor.
232 bool CodeGenPrepare::EliminateFallThrough(Function &F) {
233  bool Changed = false;
234  // Scan all of the blocks in the function, except for the entry block.
235  for (Function::iterator I = ++F.begin(), E = F.end(); I != E; ) {
236  BasicBlock *BB = I++;
237  // If the destination block has a single pred, then this is a trivial
238  // edge, just collapse it.
239  BasicBlock *SinglePred = BB->getSinglePredecessor();
240 
241  // Don't merge if BB's address is taken.
242  if (!SinglePred || SinglePred == BB || BB->hasAddressTaken()) continue;
243 
244  BranchInst *Term = dyn_cast<BranchInst>(SinglePred->getTerminator());
245  if (Term && !Term->isConditional()) {
246  Changed = true;
247  DEBUG(dbgs() << "To merge:\n"<< *SinglePred << "\n\n\n");
248  // Remember if SinglePred was the entry block of the function.
249  // If so, we will need to move BB back to the entry position.
250  bool isEntry = SinglePred == &SinglePred->getParent()->getEntryBlock();
251  MergeBasicBlockIntoOnlyPred(BB, this);
252 
253  if (isEntry && BB != &BB->getParent()->getEntryBlock())
254  BB->moveBefore(&BB->getParent()->getEntryBlock());
255 
256  // We have erased a block. Update the iterator.
257  I = BB;
258  }
259  }
260  return Changed;
261 }
262 
263 /// EliminateMostlyEmptyBlocks - eliminate blocks that contain only PHI nodes,
264 /// debug info directives, and an unconditional branch. Passes before isel
265 /// (e.g. LSR/loopsimplify) often split edges in ways that are non-optimal for
266 /// isel. Start by eliminating these blocks so we can split them the way we
267 /// want them.
268 bool CodeGenPrepare::EliminateMostlyEmptyBlocks(Function &F) {
269  bool MadeChange = false;
270  // Note that this intentionally skips the entry block.
271  for (Function::iterator I = ++F.begin(), E = F.end(); I != E; ) {
272  BasicBlock *BB = I++;
273 
274  // If this block doesn't end with an uncond branch, ignore it.
276  if (!BI || !BI->isUnconditional())
277  continue;
278 
279  // If the instruction before the branch (skipping debug info) isn't a phi
280  // node, then other stuff is happening here.
281  BasicBlock::iterator BBI = BI;
282  if (BBI != BB->begin()) {
283  --BBI;
284  while (isa<DbgInfoIntrinsic>(BBI)) {
285  if (BBI == BB->begin())
286  break;
287  --BBI;
288  }
289  if (!isa<DbgInfoIntrinsic>(BBI) && !isa<PHINode>(BBI))
290  continue;
291  }
292 
293  // Do not break infinite loops.
294  BasicBlock *DestBB = BI->getSuccessor(0);
295  if (DestBB == BB)
296  continue;
297 
298  if (!CanMergeBlocks(BB, DestBB))
299  continue;
300 
301  EliminateMostlyEmptyBlock(BB);
302  MadeChange = true;
303  }
304  return MadeChange;
305 }
306 
307 /// CanMergeBlocks - Return true if we can merge BB into DestBB if there is a
308 /// single uncond branch between them, and BB contains no other non-phi
309 /// instructions.
310 bool CodeGenPrepare::CanMergeBlocks(const BasicBlock *BB,
311  const BasicBlock *DestBB) const {
312  // We only want to eliminate blocks whose phi nodes are used by phi nodes in
313  // the successor. If there are more complex condition (e.g. preheaders),
314  // don't mess around with them.
315  BasicBlock::const_iterator BBI = BB->begin();
316  while (const PHINode *PN = dyn_cast<PHINode>(BBI++)) {
317  for (Value::const_use_iterator UI = PN->use_begin(), E = PN->use_end();
318  UI != E; ++UI) {
319  const Instruction *User = cast<Instruction>(*UI);
320  if (User->getParent() != DestBB || !isa<PHINode>(User))
321  return false;
322  // If User is inside DestBB block and it is a PHINode then check
323  // incoming value. If incoming value is not from BB then this is
324  // a complex condition (e.g. preheaders) we want to avoid here.
325  if (User->getParent() == DestBB) {
326  if (const PHINode *UPN = dyn_cast<PHINode>(User))
327  for (unsigned I = 0, E = UPN->getNumIncomingValues(); I != E; ++I) {
328  Instruction *Insn = dyn_cast<Instruction>(UPN->getIncomingValue(I));
329  if (Insn && Insn->getParent() == BB &&
330  Insn->getParent() != UPN->getIncomingBlock(I))
331  return false;
332  }
333  }
334  }
335  }
336 
337  // If BB and DestBB contain any common predecessors, then the phi nodes in BB
338  // and DestBB may have conflicting incoming values for the block. If so, we
339  // can't merge the block.
340  const PHINode *DestBBPN = dyn_cast<PHINode>(DestBB->begin());
341  if (!DestBBPN) return true; // no conflict.
342 
343  // Collect the preds of BB.
345  if (const PHINode *BBPN = dyn_cast<PHINode>(BB->begin())) {
346  // It is faster to get preds from a PHI than with pred_iterator.
347  for (unsigned i = 0, e = BBPN->getNumIncomingValues(); i != e; ++i)
348  BBPreds.insert(BBPN->getIncomingBlock(i));
349  } else {
350  BBPreds.insert(pred_begin(BB), pred_end(BB));
351  }
352 
353  // Walk the preds of DestBB.
354  for (unsigned i = 0, e = DestBBPN->getNumIncomingValues(); i != e; ++i) {
355  BasicBlock *Pred = DestBBPN->getIncomingBlock(i);
356  if (BBPreds.count(Pred)) { // Common predecessor?
357  BBI = DestBB->begin();
358  while (const PHINode *PN = dyn_cast<PHINode>(BBI++)) {
359  const Value *V1 = PN->getIncomingValueForBlock(Pred);
360  const Value *V2 = PN->getIncomingValueForBlock(BB);
361 
362  // If V2 is a phi node in BB, look up what the mapped value will be.
363  if (const PHINode *V2PN = dyn_cast<PHINode>(V2))
364  if (V2PN->getParent() == BB)
365  V2 = V2PN->getIncomingValueForBlock(Pred);
366 
367  // If there is a conflict, bail out.
368  if (V1 != V2) return false;
369  }
370  }
371  }
372 
373  return true;
374 }
375 
376 
377 /// EliminateMostlyEmptyBlock - Eliminate a basic block that have only phi's and
378 /// an unconditional branch in it.
379 void CodeGenPrepare::EliminateMostlyEmptyBlock(BasicBlock *BB) {
380  BranchInst *BI = cast<BranchInst>(BB->getTerminator());
381  BasicBlock *DestBB = BI->getSuccessor(0);
382 
383  DEBUG(dbgs() << "MERGING MOSTLY EMPTY BLOCKS - BEFORE:\n" << *BB << *DestBB);
384 
385  // If the destination block has a single pred, then this is a trivial edge,
386  // just collapse it.
387  if (BasicBlock *SinglePred = DestBB->getSinglePredecessor()) {
388  if (SinglePred != DestBB) {
389  // Remember if SinglePred was the entry block of the function. If so, we
390  // will need to move BB back to the entry position.
391  bool isEntry = SinglePred == &SinglePred->getParent()->getEntryBlock();
392  MergeBasicBlockIntoOnlyPred(DestBB, this);
393 
394  if (isEntry && BB != &BB->getParent()->getEntryBlock())
395  BB->moveBefore(&BB->getParent()->getEntryBlock());
396 
397  DEBUG(dbgs() << "AFTER:\n" << *DestBB << "\n\n\n");
398  return;
399  }
400  }
401 
402  // Otherwise, we have multiple predecessors of BB. Update the PHIs in DestBB
403  // to handle the new incoming edges it is about to have.
404  PHINode *PN;
405  for (BasicBlock::iterator BBI = DestBB->begin();
406  (PN = dyn_cast<PHINode>(BBI)); ++BBI) {
407  // Remove the incoming value for BB, and remember it.
408  Value *InVal = PN->removeIncomingValue(BB, false);
409 
410  // Two options: either the InVal is a phi node defined in BB or it is some
411  // value that dominates BB.
412  PHINode *InValPhi = dyn_cast<PHINode>(InVal);
413  if (InValPhi && InValPhi->getParent() == BB) {
414  // Add all of the input values of the input PHI as inputs of this phi.
415  for (unsigned i = 0, e = InValPhi->getNumIncomingValues(); i != e; ++i)
416  PN->addIncoming(InValPhi->getIncomingValue(i),
417  InValPhi->getIncomingBlock(i));
418  } else {
419  // Otherwise, add one instance of the dominating value for each edge that
420  // we will be adding.
421  if (PHINode *BBPN = dyn_cast<PHINode>(BB->begin())) {
422  for (unsigned i = 0, e = BBPN->getNumIncomingValues(); i != e; ++i)
423  PN->addIncoming(InVal, BBPN->getIncomingBlock(i));
424  } else {
425  for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI)
426  PN->addIncoming(InVal, *PI);
427  }
428  }
429  }
430 
431  // The PHIs are now updated, change everything that refers to BB to use
432  // DestBB and remove BB.
433  BB->replaceAllUsesWith(DestBB);
434  if (DT && !ModifiedDT) {
435  BasicBlock *BBIDom = DT->getNode(BB)->getIDom()->getBlock();
436  BasicBlock *DestBBIDom = DT->getNode(DestBB)->getIDom()->getBlock();
437  BasicBlock *NewIDom = DT->findNearestCommonDominator(BBIDom, DestBBIDom);
438  DT->changeImmediateDominator(DestBB, NewIDom);
439  DT->eraseNode(BB);
440  }
441  BB->eraseFromParent();
442  ++NumBlocksElim;
443 
444  DEBUG(dbgs() << "AFTER:\n" << *DestBB << "\n\n\n");
445 }
446 
447 /// OptimizeNoopCopyExpression - If the specified cast instruction is a noop
448 /// copy (e.g. it's casting from one pointer type to another, i32->i8 on PPC),
449 /// sink it into user blocks to reduce the number of virtual
450 /// registers that must be created and coalesced.
451 ///
452 /// Return true if any changes are made.
453 ///
455  // If this is a noop copy,
456  EVT SrcVT = TLI.getValueType(CI->getOperand(0)->getType());
457  EVT DstVT = TLI.getValueType(CI->getType());
458 
459  // This is an fp<->int conversion?
460  if (SrcVT.isInteger() != DstVT.isInteger())
461  return false;
462 
463  // If this is an extension, it will be a zero or sign extension, which
464  // isn't a noop.
465  if (SrcVT.bitsLT(DstVT)) return false;
466 
467  // If these values will be promoted, find out what they will be promoted
468  // to. This helps us consider truncates on PPC as noop copies when they
469  // are.
470  if (TLI.getTypeAction(CI->getContext(), SrcVT) ==
472  SrcVT = TLI.getTypeToTransformTo(CI->getContext(), SrcVT);
473  if (TLI.getTypeAction(CI->getContext(), DstVT) ==
475  DstVT = TLI.getTypeToTransformTo(CI->getContext(), DstVT);
476 
477  // If, after promotion, these are the same types, this is a noop copy.
478  if (SrcVT != DstVT)
479  return false;
480 
481  BasicBlock *DefBB = CI->getParent();
482 
483  /// InsertedCasts - Only insert a cast in each block once.
484  DenseMap<BasicBlock*, CastInst*> InsertedCasts;
485 
486  bool MadeChange = false;
487  for (Value::use_iterator UI = CI->use_begin(), E = CI->use_end();
488  UI != E; ) {
489  Use &TheUse = UI.getUse();
490  Instruction *User = cast<Instruction>(*UI);
491 
492  // Figure out which BB this cast is used in. For PHI's this is the
493  // appropriate predecessor block.
494  BasicBlock *UserBB = User->getParent();
495  if (PHINode *PN = dyn_cast<PHINode>(User)) {
496  UserBB = PN->getIncomingBlock(UI);
497  }
498 
499  // Preincrement use iterator so we don't invalidate it.
500  ++UI;
501 
502  // If this user is in the same block as the cast, don't change the cast.
503  if (UserBB == DefBB) continue;
504 
505  // If we have already inserted a cast into this block, use it.
506  CastInst *&InsertedCast = InsertedCasts[UserBB];
507 
508  if (!InsertedCast) {
509  BasicBlock::iterator InsertPt = UserBB->getFirstInsertionPt();
510  InsertedCast =
511  CastInst::Create(CI->getOpcode(), CI->getOperand(0), CI->getType(), "",
512  InsertPt);
513  MadeChange = true;
514  }
515 
516  // Replace a use of the cast with a use of the new cast.
517  TheUse = InsertedCast;
518  ++NumCastUses;
519  }
520 
521  // If we removed all uses, nuke the cast.
522  if (CI->use_empty()) {
523  CI->eraseFromParent();
524  MadeChange = true;
525  }
526 
527  return MadeChange;
528 }
529 
530 /// OptimizeCmpExpression - sink the given CmpInst into user blocks to reduce
531 /// the number of virtual registers that must be created and coalesced. This is
532 /// a clear win except on targets with multiple condition code registers
533 /// (PowerPC), where it might lose; some adjustment may be wanted there.
534 ///
535 /// Return true if any changes are made.
536 static bool OptimizeCmpExpression(CmpInst *CI) {
537  BasicBlock *DefBB = CI->getParent();
538 
539  /// InsertedCmp - Only insert a cmp in each block once.
540  DenseMap<BasicBlock*, CmpInst*> InsertedCmps;
541 
542  bool MadeChange = false;
543  for (Value::use_iterator UI = CI->use_begin(), E = CI->use_end();
544  UI != E; ) {
545  Use &TheUse = UI.getUse();
546  Instruction *User = cast<Instruction>(*UI);
547 
548  // Preincrement use iterator so we don't invalidate it.
549  ++UI;
550 
551  // Don't bother for PHI nodes.
552  if (isa<PHINode>(User))
553  continue;
554 
555  // Figure out which BB this cmp is used in.
556  BasicBlock *UserBB = User->getParent();
557 
558  // If this user is in the same block as the cmp, don't change the cmp.
559  if (UserBB == DefBB) continue;
560 
561  // If we have already inserted a cmp into this block, use it.
562  CmpInst *&InsertedCmp = InsertedCmps[UserBB];
563 
564  if (!InsertedCmp) {
565  BasicBlock::iterator InsertPt = UserBB->getFirstInsertionPt();
566  InsertedCmp =
568  CI->getPredicate(), CI->getOperand(0),
569  CI->getOperand(1), "", InsertPt);
570  MadeChange = true;
571  }
572 
573  // Replace a use of the cmp with a use of the new cmp.
574  TheUse = InsertedCmp;
575  ++NumCmpUses;
576  }
577 
578  // If we removed all uses, nuke the cmp.
579  if (CI->use_empty())
580  CI->eraseFromParent();
581 
582  return MadeChange;
583 }
584 
585 namespace {
586 class CodeGenPrepareFortifiedLibCalls : public SimplifyFortifiedLibCalls {
587 protected:
588  void replaceCall(Value *With) {
589  CI->replaceAllUsesWith(With);
590  CI->eraseFromParent();
591  }
592  bool isFoldable(unsigned SizeCIOp, unsigned, bool) const {
593  if (ConstantInt *SizeCI =
594  dyn_cast<ConstantInt>(CI->getArgOperand(SizeCIOp)))
595  return SizeCI->isAllOnesValue();
596  return false;
597  }
598 };
599 } // end anonymous namespace
600 
601 bool CodeGenPrepare::OptimizeCallInst(CallInst *CI) {
602  BasicBlock *BB = CI->getParent();
603 
604  // Lower inline assembly if we can.
605  // If we found an inline asm expession, and if the target knows how to
606  // lower it to normal LLVM code, do so now.
607  if (TLI && isa<InlineAsm>(CI->getCalledValue())) {
608  if (TLI->ExpandInlineAsm(CI)) {
609  // Avoid invalidating the iterator.
610  CurInstIterator = BB->begin();
611  // Avoid processing instructions out of order, which could cause
612  // reuse before a value is defined.
613  SunkAddrs.clear();
614  return true;
615  }
616  // Sink address computing for memory operands into the block.
617  if (OptimizeInlineAsmInst(CI))
618  return true;
619  }
620 
621  // Lower all uses of llvm.objectsize.*
623  if (II && II->getIntrinsicID() == Intrinsic::objectsize) {
624  bool Min = (cast<ConstantInt>(II->getArgOperand(1))->getZExtValue() == 1);
625  Type *ReturnTy = CI->getType();
626  Constant *RetVal = ConstantInt::get(ReturnTy, Min ? 0 : -1ULL);
627 
628  // Substituting this can cause recursive simplifications, which can
629  // invalidate our iterator. Use a WeakVH to hold onto it in case this
630  // happens.
631  WeakVH IterHandle(CurInstIterator);
632 
633  replaceAndRecursivelySimplify(CI, RetVal, TLI ? TLI->getDataLayout() : 0,
634  TLInfo, ModifiedDT ? 0 : DT);
635 
636  // If the iterator instruction was recursively deleted, start over at the
637  // start of the block.
638  if (IterHandle != CurInstIterator) {
639  CurInstIterator = BB->begin();
640  SunkAddrs.clear();
641  }
642  return true;
643  }
644 
645  if (II && TLI) {
646  SmallVector<Value*, 2> PtrOps;
647  Type *AccessTy;
648  if (TLI->GetAddrModeArguments(II, PtrOps, AccessTy))
649  while (!PtrOps.empty())
650  if (OptimizeMemoryInst(II, PtrOps.pop_back_val(), AccessTy))
651  return true;
652  }
653 
654  // From here on out we're working with named functions.
655  if (CI->getCalledFunction() == 0) return false;
656 
657  // We'll need DataLayout from here on out.
658  const DataLayout *TD = TLI ? TLI->getDataLayout() : 0;
659  if (!TD) return false;
660 
661  // Lower all default uses of _chk calls. This is very similar
662  // to what InstCombineCalls does, but here we are only lowering calls
663  // that have the default "don't know" as the objectsize. Anything else
664  // should be left alone.
665  CodeGenPrepareFortifiedLibCalls Simplifier;
666  return Simplifier.fold(CI, TD, TLInfo);
667 }
668 
669 /// DupRetToEnableTailCallOpts - Look for opportunities to duplicate return
670 /// instructions to the predecessor to enable tail call optimizations. The
671 /// case it is currently looking for is:
672 /// @code
673 /// bb0:
674 /// %tmp0 = tail call i32 @f0()
675 /// br label %return
676 /// bb1:
677 /// %tmp1 = tail call i32 @f1()
678 /// br label %return
679 /// bb2:
680 /// %tmp2 = tail call i32 @f2()
681 /// br label %return
682 /// return:
683 /// %retval = phi i32 [ %tmp0, %bb0 ], [ %tmp1, %bb1 ], [ %tmp2, %bb2 ]
684 /// ret i32 %retval
685 /// @endcode
686 ///
687 /// =>
688 ///
689 /// @code
690 /// bb0:
691 /// %tmp0 = tail call i32 @f0()
692 /// ret i32 %tmp0
693 /// bb1:
694 /// %tmp1 = tail call i32 @f1()
695 /// ret i32 %tmp1
696 /// bb2:
697 /// %tmp2 = tail call i32 @f2()
698 /// ret i32 %tmp2
699 /// @endcode
700 bool CodeGenPrepare::DupRetToEnableTailCallOpts(BasicBlock *BB) {
701  if (!TLI)
702  return false;
703 
705  if (!RI)
706  return false;
707 
708  PHINode *PN = 0;
709  BitCastInst *BCI = 0;
710  Value *V = RI->getReturnValue();
711  if (V) {
712  BCI = dyn_cast<BitCastInst>(V);
713  if (BCI)
714  V = BCI->getOperand(0);
715 
716  PN = dyn_cast<PHINode>(V);
717  if (!PN)
718  return false;
719  }
720 
721  if (PN && PN->getParent() != BB)
722  return false;
723 
724  // It's not safe to eliminate the sign / zero extension of the return value.
725  // See llvm::isInTailCallPosition().
726  const Function *F = BB->getParent();
727  AttributeSet CallerAttrs = F->getAttributes();
728  if (CallerAttrs.hasAttribute(AttributeSet::ReturnIndex, Attribute::ZExt) ||
729  CallerAttrs.hasAttribute(AttributeSet::ReturnIndex, Attribute::SExt))
730  return false;
731 
732  // Make sure there are no instructions between the PHI and return, or that the
733  // return is the first instruction in the block.
734  if (PN) {
735  BasicBlock::iterator BI = BB->begin();
736  do { ++BI; } while (isa<DbgInfoIntrinsic>(BI));
737  if (&*BI == BCI)
738  // Also skip over the bitcast.
739  ++BI;
740  if (&*BI != RI)
741  return false;
742  } else {
743  BasicBlock::iterator BI = BB->begin();
744  while (isa<DbgInfoIntrinsic>(BI)) ++BI;
745  if (&*BI != RI)
746  return false;
747  }
748 
749  /// Only dup the ReturnInst if the CallInst is likely to be emitted as a tail
750  /// call.
751  SmallVector<CallInst*, 4> TailCalls;
752  if (PN) {
753  for (unsigned I = 0, E = PN->getNumIncomingValues(); I != E; ++I) {
755  // Make sure the phi value is indeed produced by the tail call.
756  if (CI && CI->hasOneUse() && CI->getParent() == PN->getIncomingBlock(I) &&
757  TLI->mayBeEmittedAsTailCall(CI))
758  TailCalls.push_back(CI);
759  }
760  } else {
761  SmallPtrSet<BasicBlock*, 4> VisitedBBs;
762  for (pred_iterator PI = pred_begin(BB), PE = pred_end(BB); PI != PE; ++PI) {
763  if (!VisitedBBs.insert(*PI))
764  continue;
765 
766  BasicBlock::InstListType &InstList = (*PI)->getInstList();
769  do { ++RI; } while (RI != RE && isa<DbgInfoIntrinsic>(&*RI));
770  if (RI == RE)
771  continue;
772 
773  CallInst *CI = dyn_cast<CallInst>(&*RI);
774  if (CI && CI->use_empty() && TLI->mayBeEmittedAsTailCall(CI))
775  TailCalls.push_back(CI);
776  }
777  }
778 
779  bool Changed = false;
780  for (unsigned i = 0, e = TailCalls.size(); i != e; ++i) {
781  CallInst *CI = TailCalls[i];
782  CallSite CS(CI);
783 
784  // Conservatively require the attributes of the call to match those of the
785  // return. Ignore noalias because it doesn't affect the call sequence.
786  AttributeSet CalleeAttrs = CS.getAttributes();
787  if (AttrBuilder(CalleeAttrs, AttributeSet::ReturnIndex).
788  removeAttribute(Attribute::NoAlias) !=
789  AttrBuilder(CalleeAttrs, AttributeSet::ReturnIndex).
790  removeAttribute(Attribute::NoAlias))
791  continue;
792 
793  // Make sure the call instruction is followed by an unconditional branch to
794  // the return block.
795  BasicBlock *CallBB = CI->getParent();
796  BranchInst *BI = dyn_cast<BranchInst>(CallBB->getTerminator());
797  if (!BI || !BI->isUnconditional() || BI->getSuccessor(0) != BB)
798  continue;
799 
800  // Duplicate the return into CallBB.
801  (void)FoldReturnIntoUncondBranch(RI, BB, CallBB);
802  ModifiedDT = Changed = true;
803  ++NumRetsDup;
804  }
805 
806  // If we eliminated all predecessors of the block, delete the block now.
807  if (Changed && !BB->hasAddressTaken() && pred_begin(BB) == pred_end(BB))
808  BB->eraseFromParent();
809 
810  return Changed;
811 }
812 
813 //===----------------------------------------------------------------------===//
814 // Memory Optimization
815 //===----------------------------------------------------------------------===//
816 
817 namespace {
818 
819 /// ExtAddrMode - This is an extended version of TargetLowering::AddrMode
820 /// which holds actual Value*'s for register values.
821 struct ExtAddrMode : public TargetLowering::AddrMode {
822  Value *BaseReg;
823  Value *ScaledReg;
824  ExtAddrMode() : BaseReg(0), ScaledReg(0) {}
825  void print(raw_ostream &OS) const;
826  void dump() const;
827 
828  bool operator==(const ExtAddrMode& O) const {
829  return (BaseReg == O.BaseReg) && (ScaledReg == O.ScaledReg) &&
830  (BaseGV == O.BaseGV) && (BaseOffs == O.BaseOffs) &&
831  (HasBaseReg == O.HasBaseReg) && (Scale == O.Scale);
832  }
833 };
834 
835 #ifndef NDEBUG
836 static inline raw_ostream &operator<<(raw_ostream &OS, const ExtAddrMode &AM) {
837  AM.print(OS);
838  return OS;
839 }
840 #endif
841 
842 void ExtAddrMode::print(raw_ostream &OS) const {
843  bool NeedPlus = false;
844  OS << "[";
845  if (BaseGV) {
846  OS << (NeedPlus ? " + " : "")
847  << "GV:";
848  WriteAsOperand(OS, BaseGV, /*PrintType=*/false);
849  NeedPlus = true;
850  }
851 
852  if (BaseOffs)
853  OS << (NeedPlus ? " + " : "") << BaseOffs, NeedPlus = true;
854 
855  if (BaseReg) {
856  OS << (NeedPlus ? " + " : "")
857  << "Base:";
858  WriteAsOperand(OS, BaseReg, /*PrintType=*/false);
859  NeedPlus = true;
860  }
861  if (Scale) {
862  OS << (NeedPlus ? " + " : "")
863  << Scale << "*";
864  WriteAsOperand(OS, ScaledReg, /*PrintType=*/false);
865  }
866 
867  OS << ']';
868 }
869 
870 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
871 void ExtAddrMode::dump() const {
872  print(dbgs());
873  dbgs() << '\n';
874 }
875 #endif
876 
877 
878 /// \brief A helper class for matching addressing modes.
879 ///
880 /// This encapsulates the logic for matching the target-legal addressing modes.
881 class AddressingModeMatcher {
882  SmallVectorImpl<Instruction*> &AddrModeInsts;
883  const TargetLowering &TLI;
884 
885  /// AccessTy/MemoryInst - This is the type for the access (e.g. double) and
886  /// the memory instruction that we're computing this address for.
887  Type *AccessTy;
888  Instruction *MemoryInst;
889 
890  /// AddrMode - This is the addressing mode that we're building up. This is
891  /// part of the return value of this addressing mode matching stuff.
892  ExtAddrMode &AddrMode;
893 
894  /// IgnoreProfitability - This is set to true when we should not do
895  /// profitability checks. When true, IsProfitableToFoldIntoAddressingMode
896  /// always returns true.
897  bool IgnoreProfitability;
898 
899  AddressingModeMatcher(SmallVectorImpl<Instruction*> &AMI,
900  const TargetLowering &T, Type *AT,
901  Instruction *MI, ExtAddrMode &AM)
902  : AddrModeInsts(AMI), TLI(T), AccessTy(AT), MemoryInst(MI), AddrMode(AM) {
903  IgnoreProfitability = false;
904  }
905 public:
906 
907  /// Match - Find the maximal addressing mode that a load/store of V can fold,
908  /// give an access type of AccessTy. This returns a list of involved
909  /// instructions in AddrModeInsts.
910  static ExtAddrMode Match(Value *V, Type *AccessTy,
911  Instruction *MemoryInst,
912  SmallVectorImpl<Instruction*> &AddrModeInsts,
913  const TargetLowering &TLI) {
914  ExtAddrMode Result;
915 
916  bool Success =
917  AddressingModeMatcher(AddrModeInsts, TLI, AccessTy,
918  MemoryInst, Result).MatchAddr(V, 0);
919  (void)Success; assert(Success && "Couldn't select *anything*?");
920  return Result;
921  }
922 private:
923  bool MatchScaledValue(Value *ScaleReg, int64_t Scale, unsigned Depth);
924  bool MatchAddr(Value *V, unsigned Depth);
925  bool MatchOperationAddr(User *Operation, unsigned Opcode, unsigned Depth);
926  bool IsProfitableToFoldIntoAddressingMode(Instruction *I,
927  ExtAddrMode &AMBefore,
928  ExtAddrMode &AMAfter);
929  bool ValueAlreadyLiveAtInst(Value *Val, Value *KnownLive1, Value *KnownLive2);
930 };
931 
932 /// MatchScaledValue - Try adding ScaleReg*Scale to the current addressing mode.
933 /// Return true and update AddrMode if this addr mode is legal for the target,
934 /// false if not.
935 bool AddressingModeMatcher::MatchScaledValue(Value *ScaleReg, int64_t Scale,
936  unsigned Depth) {
937  // If Scale is 1, then this is the same as adding ScaleReg to the addressing
938  // mode. Just process that directly.
939  if (Scale == 1)
940  return MatchAddr(ScaleReg, Depth);
941 
942  // If the scale is 0, it takes nothing to add this.
943  if (Scale == 0)
944  return true;
945 
946  // If we already have a scale of this value, we can add to it, otherwise, we
947  // need an available scale field.
948  if (AddrMode.Scale != 0 && AddrMode.ScaledReg != ScaleReg)
949  return false;
950 
951  ExtAddrMode TestAddrMode = AddrMode;
952 
953  // Add scale to turn X*4+X*3 -> X*7. This could also do things like
954  // [A+B + A*7] -> [B+A*8].
955  TestAddrMode.Scale += Scale;
956  TestAddrMode.ScaledReg = ScaleReg;
957 
958  // If the new address isn't legal, bail out.
959  if (!TLI.isLegalAddressingMode(TestAddrMode, AccessTy))
960  return false;
961 
962  // It was legal, so commit it.
963  AddrMode = TestAddrMode;
964 
965  // Okay, we decided that we can add ScaleReg+Scale to AddrMode. Check now
966  // to see if ScaleReg is actually X+C. If so, we can turn this into adding
967  // X*Scale + C*Scale to addr mode.
968  ConstantInt *CI = 0; Value *AddLHS = 0;
969  if (isa<Instruction>(ScaleReg) && // not a constant expr.
970  match(ScaleReg, m_Add(m_Value(AddLHS), m_ConstantInt(CI)))) {
971  TestAddrMode.ScaledReg = AddLHS;
972  TestAddrMode.BaseOffs += CI->getSExtValue()*TestAddrMode.Scale;
973 
974  // If this addressing mode is legal, commit it and remember that we folded
975  // this instruction.
976  if (TLI.isLegalAddressingMode(TestAddrMode, AccessTy)) {
977  AddrModeInsts.push_back(cast<Instruction>(ScaleReg));
978  AddrMode = TestAddrMode;
979  return true;
980  }
981  }
982 
983  // Otherwise, not (x+c)*scale, just return what we have.
984  return true;
985 }
986 
987 /// MightBeFoldableInst - This is a little filter, which returns true if an
988 /// addressing computation involving I might be folded into a load/store
989 /// accessing it. This doesn't need to be perfect, but needs to accept at least
990 /// the set of instructions that MatchOperationAddr can.
991 static bool MightBeFoldableInst(Instruction *I) {
992  switch (I->getOpcode()) {
993  case Instruction::BitCast:
994  // Don't touch identity bitcasts.
995  if (I->getType() == I->getOperand(0)->getType())
996  return false;
997  return I->getType()->isPointerTy() || I->getType()->isIntegerTy();
998  case Instruction::PtrToInt:
999  // PtrToInt is always a noop, as we know that the int type is pointer sized.
1000  return true;
1001  case Instruction::IntToPtr:
1002  // We know the input is intptr_t, so this is foldable.
1003  return true;
1004  case Instruction::Add:
1005  return true;
1006  case Instruction::Mul:
1007  case Instruction::Shl:
1008  // Can only handle X*C and X << C.
1009  return isa<ConstantInt>(I->getOperand(1));
1010  case Instruction::GetElementPtr:
1011  return true;
1012  default:
1013  return false;
1014  }
1015 }
1016 
1017 /// MatchOperationAddr - Given an instruction or constant expr, see if we can
1018 /// fold the operation into the addressing mode. If so, update the addressing
1019 /// mode and return true, otherwise return false without modifying AddrMode.
1020 bool AddressingModeMatcher::MatchOperationAddr(User *AddrInst, unsigned Opcode,
1021  unsigned Depth) {
1022  // Avoid exponential behavior on extremely deep expression trees.
1023  if (Depth >= 5) return false;
1024 
1025  switch (Opcode) {
1026  case Instruction::PtrToInt:
1027  // PtrToInt is always a noop, as we know that the int type is pointer sized.
1028  return MatchAddr(AddrInst->getOperand(0), Depth);
1029  case Instruction::IntToPtr:
1030  // This inttoptr is a no-op if the integer type is pointer sized.
1031  if (TLI.getValueType(AddrInst->getOperand(0)->getType()) ==
1032  TLI.getPointerTy(AddrInst->getType()->getPointerAddressSpace()))
1033  return MatchAddr(AddrInst->getOperand(0), Depth);
1034  return false;
1035  case Instruction::BitCast:
1036  // BitCast is always a noop, and we can handle it as long as it is
1037  // int->int or pointer->pointer (we don't want int<->fp or something).
1038  if ((AddrInst->getOperand(0)->getType()->isPointerTy() ||
1039  AddrInst->getOperand(0)->getType()->isIntegerTy()) &&
1040  // Don't touch identity bitcasts. These were probably put here by LSR,
1041  // and we don't want to mess around with them. Assume it knows what it
1042  // is doing.
1043  AddrInst->getOperand(0)->getType() != AddrInst->getType())
1044  return MatchAddr(AddrInst->getOperand(0), Depth);
1045  return false;
1046  case Instruction::Add: {
1047  // Check to see if we can merge in the RHS then the LHS. If so, we win.
1048  ExtAddrMode BackupAddrMode = AddrMode;
1049  unsigned OldSize = AddrModeInsts.size();
1050  if (MatchAddr(AddrInst->getOperand(1), Depth+1) &&
1051  MatchAddr(AddrInst->getOperand(0), Depth+1))
1052  return true;
1053 
1054  // Restore the old addr mode info.
1055  AddrMode = BackupAddrMode;
1056  AddrModeInsts.resize(OldSize);
1057 
1058  // Otherwise this was over-aggressive. Try merging in the LHS then the RHS.
1059  if (MatchAddr(AddrInst->getOperand(0), Depth+1) &&
1060  MatchAddr(AddrInst->getOperand(1), Depth+1))
1061  return true;
1062 
1063  // Otherwise we definitely can't merge the ADD in.
1064  AddrMode = BackupAddrMode;
1065  AddrModeInsts.resize(OldSize);
1066  break;
1067  }
1068  //case Instruction::Or:
1069  // TODO: We can handle "Or Val, Imm" iff this OR is equivalent to an ADD.
1070  //break;
1071  case Instruction::Mul:
1072  case Instruction::Shl: {
1073  // Can only handle X*C and X << C.
1074  ConstantInt *RHS = dyn_cast<ConstantInt>(AddrInst->getOperand(1));
1075  if (!RHS) return false;
1076  int64_t Scale = RHS->getSExtValue();
1077  if (Opcode == Instruction::Shl)
1078  Scale = 1LL << Scale;
1079 
1080  return MatchScaledValue(AddrInst->getOperand(0), Scale, Depth);
1081  }
1082  case Instruction::GetElementPtr: {
1083  // Scan the GEP. We check it if it contains constant offsets and at most
1084  // one variable offset.
1085  int VariableOperand = -1;
1086  unsigned VariableScale = 0;
1087 
1088  int64_t ConstantOffset = 0;
1089  const DataLayout *TD = TLI.getDataLayout();
1090  gep_type_iterator GTI = gep_type_begin(AddrInst);
1091  for (unsigned i = 1, e = AddrInst->getNumOperands(); i != e; ++i, ++GTI) {
1092  if (StructType *STy = dyn_cast<StructType>(*GTI)) {
1093  const StructLayout *SL = TD->getStructLayout(STy);
1094  unsigned Idx =
1095  cast<ConstantInt>(AddrInst->getOperand(i))->getZExtValue();
1096  ConstantOffset += SL->getElementOffset(Idx);
1097  } else {
1098  uint64_t TypeSize = TD->getTypeAllocSize(GTI.getIndexedType());
1099  if (ConstantInt *CI = dyn_cast<ConstantInt>(AddrInst->getOperand(i))) {
1100  ConstantOffset += CI->getSExtValue()*TypeSize;
1101  } else if (TypeSize) { // Scales of zero don't do anything.
1102  // We only allow one variable index at the moment.
1103  if (VariableOperand != -1)
1104  return false;
1105 
1106  // Remember the variable index.
1107  VariableOperand = i;
1108  VariableScale = TypeSize;
1109  }
1110  }
1111  }
1112 
1113  // A common case is for the GEP to only do a constant offset. In this case,
1114  // just add it to the disp field and check validity.
1115  if (VariableOperand == -1) {
1116  AddrMode.BaseOffs += ConstantOffset;
1117  if (ConstantOffset == 0 || TLI.isLegalAddressingMode(AddrMode, AccessTy)){
1118  // Check to see if we can fold the base pointer in too.
1119  if (MatchAddr(AddrInst->getOperand(0), Depth+1))
1120  return true;
1121  }
1122  AddrMode.BaseOffs -= ConstantOffset;
1123  return false;
1124  }
1125 
1126  // Save the valid addressing mode in case we can't match.
1127  ExtAddrMode BackupAddrMode = AddrMode;
1128  unsigned OldSize = AddrModeInsts.size();
1129 
1130  // See if the scale and offset amount is valid for this target.
1131  AddrMode.BaseOffs += ConstantOffset;
1132 
1133  // Match the base operand of the GEP.
1134  if (!MatchAddr(AddrInst->getOperand(0), Depth+1)) {
1135  // If it couldn't be matched, just stuff the value in a register.
1136  if (AddrMode.HasBaseReg) {
1137  AddrMode = BackupAddrMode;
1138  AddrModeInsts.resize(OldSize);
1139  return false;
1140  }
1141  AddrMode.HasBaseReg = true;
1142  AddrMode.BaseReg = AddrInst->getOperand(0);
1143  }
1144 
1145  // Match the remaining variable portion of the GEP.
1146  if (!MatchScaledValue(AddrInst->getOperand(VariableOperand), VariableScale,
1147  Depth)) {
1148  // If it couldn't be matched, try stuffing the base into a register
1149  // instead of matching it, and retrying the match of the scale.
1150  AddrMode = BackupAddrMode;
1151  AddrModeInsts.resize(OldSize);
1152  if (AddrMode.HasBaseReg)
1153  return false;
1154  AddrMode.HasBaseReg = true;
1155  AddrMode.BaseReg = AddrInst->getOperand(0);
1156  AddrMode.BaseOffs += ConstantOffset;
1157  if (!MatchScaledValue(AddrInst->getOperand(VariableOperand),
1158  VariableScale, Depth)) {
1159  // If even that didn't work, bail.
1160  AddrMode = BackupAddrMode;
1161  AddrModeInsts.resize(OldSize);
1162  return false;
1163  }
1164  }
1165 
1166  return true;
1167  }
1168  }
1169  return false;
1170 }
1171 
1172 /// MatchAddr - If we can, try to add the value of 'Addr' into the current
1173 /// addressing mode. If Addr can't be added to AddrMode this returns false and
1174 /// leaves AddrMode unmodified. This assumes that Addr is either a pointer type
1175 /// or intptr_t for the target.
1176 ///
1177 bool AddressingModeMatcher::MatchAddr(Value *Addr, unsigned Depth) {
1178  if (ConstantInt *CI = dyn_cast<ConstantInt>(Addr)) {
1179  // Fold in immediates if legal for the target.
1180  AddrMode.BaseOffs += CI->getSExtValue();
1181  if (TLI.isLegalAddressingMode(AddrMode, AccessTy))
1182  return true;
1183  AddrMode.BaseOffs -= CI->getSExtValue();
1184  } else if (GlobalValue *GV = dyn_cast<GlobalValue>(Addr)) {
1185  // If this is a global variable, try to fold it into the addressing mode.
1186  if (AddrMode.BaseGV == 0) {
1187  AddrMode.BaseGV = GV;
1188  if (TLI.isLegalAddressingMode(AddrMode, AccessTy))
1189  return true;
1190  AddrMode.BaseGV = 0;
1191  }
1192  } else if (Instruction *I = dyn_cast<Instruction>(Addr)) {
1193  ExtAddrMode BackupAddrMode = AddrMode;
1194  unsigned OldSize = AddrModeInsts.size();
1195 
1196  // Check to see if it is possible to fold this operation.
1197  if (MatchOperationAddr(I, I->getOpcode(), Depth)) {
1198  // Okay, it's possible to fold this. Check to see if it is actually
1199  // *profitable* to do so. We use a simple cost model to avoid increasing
1200  // register pressure too much.
1201  if (I->hasOneUse() ||
1202  IsProfitableToFoldIntoAddressingMode(I, BackupAddrMode, AddrMode)) {
1203  AddrModeInsts.push_back(I);
1204  return true;
1205  }
1206 
1207  // It isn't profitable to do this, roll back.
1208  //cerr << "NOT FOLDING: " << *I;
1209  AddrMode = BackupAddrMode;
1210  AddrModeInsts.resize(OldSize);
1211  }
1212  } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Addr)) {
1213  if (MatchOperationAddr(CE, CE->getOpcode(), Depth))
1214  return true;
1215  } else if (isa<ConstantPointerNull>(Addr)) {
1216  // Null pointer gets folded without affecting the addressing mode.
1217  return true;
1218  }
1219 
1220  // Worse case, the target should support [reg] addressing modes. :)
1221  if (!AddrMode.HasBaseReg) {
1222  AddrMode.HasBaseReg = true;
1223  AddrMode.BaseReg = Addr;
1224  // Still check for legality in case the target supports [imm] but not [i+r].
1225  if (TLI.isLegalAddressingMode(AddrMode, AccessTy))
1226  return true;
1227  AddrMode.HasBaseReg = false;
1228  AddrMode.BaseReg = 0;
1229  }
1230 
1231  // If the base register is already taken, see if we can do [r+r].
1232  if (AddrMode.Scale == 0) {
1233  AddrMode.Scale = 1;
1234  AddrMode.ScaledReg = Addr;
1235  if (TLI.isLegalAddressingMode(AddrMode, AccessTy))
1236  return true;
1237  AddrMode.Scale = 0;
1238  AddrMode.ScaledReg = 0;
1239  }
1240  // Couldn't match.
1241  return false;
1242 }
1243 
1244 /// IsOperandAMemoryOperand - Check to see if all uses of OpVal by the specified
1245 /// inline asm call are due to memory operands. If so, return true, otherwise
1246 /// return false.
1247 static bool IsOperandAMemoryOperand(CallInst *CI, InlineAsm *IA, Value *OpVal,
1248  const TargetLowering &TLI) {
1250  for (unsigned i = 0, e = TargetConstraints.size(); i != e; ++i) {
1251  TargetLowering::AsmOperandInfo &OpInfo = TargetConstraints[i];
1252 
1253  // Compute the constraint code and ConstraintType to use.
1254  TLI.ComputeConstraintToUse(OpInfo, SDValue());
1255 
1256  // If this asm operand is our Value*, and if it isn't an indirect memory
1257  // operand, we can't fold it!
1258  if (OpInfo.CallOperandVal == OpVal &&
1260  !OpInfo.isIndirect))
1261  return false;
1262  }
1263 
1264  return true;
1265 }
1266 
1267 /// FindAllMemoryUses - Recursively walk all the uses of I until we find a
1268 /// memory use. If we find an obviously non-foldable instruction, return true.
1269 /// Add the ultimately found memory instructions to MemoryUses.
1270 static bool FindAllMemoryUses(Instruction *I,
1271  SmallVectorImpl<std::pair<Instruction*,unsigned> > &MemoryUses,
1272  SmallPtrSet<Instruction*, 16> &ConsideredInsts,
1273  const TargetLowering &TLI) {
1274  // If we already considered this instruction, we're done.
1275  if (!ConsideredInsts.insert(I))
1276  return false;
1277 
1278  // If this is an obviously unfoldable instruction, bail out.
1279  if (!MightBeFoldableInst(I))
1280  return true;
1281 
1282  // Loop over all the uses, recursively processing them.
1283  for (Value::use_iterator UI = I->use_begin(), E = I->use_end();
1284  UI != E; ++UI) {
1285  User *U = *UI;
1286 
1287  if (LoadInst *LI = dyn_cast<LoadInst>(U)) {
1288  MemoryUses.push_back(std::make_pair(LI, UI.getOperandNo()));
1289  continue;
1290  }
1291 
1292  if (StoreInst *SI = dyn_cast<StoreInst>(U)) {
1293  unsigned opNo = UI.getOperandNo();
1294  if (opNo == 0) return true; // Storing addr, not into addr.
1295  MemoryUses.push_back(std::make_pair(SI, opNo));
1296  continue;
1297  }
1298 
1299  if (CallInst *CI = dyn_cast<CallInst>(U)) {
1301  if (!IA) return true;
1302 
1303  // If this is a memory operand, we're cool, otherwise bail out.
1304  if (!IsOperandAMemoryOperand(CI, IA, I, TLI))
1305  return true;
1306  continue;
1307  }
1308 
1309  if (FindAllMemoryUses(cast<Instruction>(U), MemoryUses, ConsideredInsts,
1310  TLI))
1311  return true;
1312  }
1313 
1314  return false;
1315 }
1316 
1317 /// ValueAlreadyLiveAtInst - Retrn true if Val is already known to be live at
1318 /// the use site that we're folding it into. If so, there is no cost to
1319 /// include it in the addressing mode. KnownLive1 and KnownLive2 are two values
1320 /// that we know are live at the instruction already.
1321 bool AddressingModeMatcher::ValueAlreadyLiveAtInst(Value *Val,Value *KnownLive1,
1322  Value *KnownLive2) {
1323  // If Val is either of the known-live values, we know it is live!
1324  if (Val == 0 || Val == KnownLive1 || Val == KnownLive2)
1325  return true;
1326 
1327  // All values other than instructions and arguments (e.g. constants) are live.
1328  if (!isa<Instruction>(Val) && !isa<Argument>(Val)) return true;
1329 
1330  // If Val is a constant sized alloca in the entry block, it is live, this is
1331  // true because it is just a reference to the stack/frame pointer, which is
1332  // live for the whole function.
1333  if (AllocaInst *AI = dyn_cast<AllocaInst>(Val))
1334  if (AI->isStaticAlloca())
1335  return true;
1336 
1337  // Check to see if this value is already used in the memory instruction's
1338  // block. If so, it's already live into the block at the very least, so we
1339  // can reasonably fold it.
1340  return Val->isUsedInBasicBlock(MemoryInst->getParent());
1341 }
1342 
1343 /// IsProfitableToFoldIntoAddressingMode - It is possible for the addressing
1344 /// mode of the machine to fold the specified instruction into a load or store
1345 /// that ultimately uses it. However, the specified instruction has multiple
1346 /// uses. Given this, it may actually increase register pressure to fold it
1347 /// into the load. For example, consider this code:
1348 ///
1349 /// X = ...
1350 /// Y = X+1
1351 /// use(Y) -> nonload/store
1352 /// Z = Y+1
1353 /// load Z
1354 ///
1355 /// In this case, Y has multiple uses, and can be folded into the load of Z
1356 /// (yielding load [X+2]). However, doing this will cause both "X" and "X+1" to
1357 /// be live at the use(Y) line. If we don't fold Y into load Z, we use one
1358 /// fewer register. Since Y can't be folded into "use(Y)" we don't increase the
1359 /// number of computations either.
1360 ///
1361 /// Note that this (like most of CodeGenPrepare) is just a rough heuristic. If
1362 /// X was live across 'load Z' for other reasons, we actually *would* want to
1363 /// fold the addressing mode in the Z case. This would make Y die earlier.
1364 bool AddressingModeMatcher::
1365 IsProfitableToFoldIntoAddressingMode(Instruction *I, ExtAddrMode &AMBefore,
1366  ExtAddrMode &AMAfter) {
1367  if (IgnoreProfitability) return true;
1368 
1369  // AMBefore is the addressing mode before this instruction was folded into it,
1370  // and AMAfter is the addressing mode after the instruction was folded. Get
1371  // the set of registers referenced by AMAfter and subtract out those
1372  // referenced by AMBefore: this is the set of values which folding in this
1373  // address extends the lifetime of.
1374  //
1375  // Note that there are only two potential values being referenced here,
1376  // BaseReg and ScaleReg (global addresses are always available, as are any
1377  // folded immediates).
1378  Value *BaseReg = AMAfter.BaseReg, *ScaledReg = AMAfter.ScaledReg;
1379 
1380  // If the BaseReg or ScaledReg was referenced by the previous addrmode, their
1381  // lifetime wasn't extended by adding this instruction.
1382  if (ValueAlreadyLiveAtInst(BaseReg, AMBefore.BaseReg, AMBefore.ScaledReg))
1383  BaseReg = 0;
1384  if (ValueAlreadyLiveAtInst(ScaledReg, AMBefore.BaseReg, AMBefore.ScaledReg))
1385  ScaledReg = 0;
1386 
1387  // If folding this instruction (and it's subexprs) didn't extend any live
1388  // ranges, we're ok with it.
1389  if (BaseReg == 0 && ScaledReg == 0)
1390  return true;
1391 
1392  // If all uses of this instruction are ultimately load/store/inlineasm's,
1393  // check to see if their addressing modes will include this instruction. If
1394  // so, we can fold it into all uses, so it doesn't matter if it has multiple
1395  // uses.
1397  SmallPtrSet<Instruction*, 16> ConsideredInsts;
1398  if (FindAllMemoryUses(I, MemoryUses, ConsideredInsts, TLI))
1399  return false; // Has a non-memory, non-foldable use!
1400 
1401  // Now that we know that all uses of this instruction are part of a chain of
1402  // computation involving only operations that could theoretically be folded
1403  // into a memory use, loop over each of these uses and see if they could
1404  // *actually* fold the instruction.
1405  SmallVector<Instruction*, 32> MatchedAddrModeInsts;
1406  for (unsigned i = 0, e = MemoryUses.size(); i != e; ++i) {
1407  Instruction *User = MemoryUses[i].first;
1408  unsigned OpNo = MemoryUses[i].second;
1409 
1410  // Get the access type of this use. If the use isn't a pointer, we don't
1411  // know what it accesses.
1412  Value *Address = User->getOperand(OpNo);
1413  if (!Address->getType()->isPointerTy())
1414  return false;
1415  Type *AddressAccessTy = Address->getType()->getPointerElementType();
1416 
1417  // Do a match against the root of this address, ignoring profitability. This
1418  // will tell us if the addressing mode for the memory operation will
1419  // *actually* cover the shared instruction.
1420  ExtAddrMode Result;
1421  AddressingModeMatcher Matcher(MatchedAddrModeInsts, TLI, AddressAccessTy,
1422  MemoryInst, Result);
1423  Matcher.IgnoreProfitability = true;
1424  bool Success = Matcher.MatchAddr(Address, 0);
1425  (void)Success; assert(Success && "Couldn't select *anything*?");
1426 
1427  // If the match didn't cover I, then it won't be shared by it.
1428  if (std::find(MatchedAddrModeInsts.begin(), MatchedAddrModeInsts.end(),
1429  I) == MatchedAddrModeInsts.end())
1430  return false;
1431 
1432  MatchedAddrModeInsts.clear();
1433  }
1434 
1435  return true;
1436 }
1437 
1438 } // end anonymous namespace
1439 
1440 /// IsNonLocalValue - Return true if the specified values are defined in a
1441 /// different basic block than BB.
1442 static bool IsNonLocalValue(Value *V, BasicBlock *BB) {
1443  if (Instruction *I = dyn_cast<Instruction>(V))
1444  return I->getParent() != BB;
1445  return false;
1446 }
1447 
1448 /// OptimizeMemoryInst - Load and Store Instructions often have
1449 /// addressing modes that can do significant amounts of computation. As such,
1450 /// instruction selection will try to get the load or store to do as much
1451 /// computation as possible for the program. The problem is that isel can only
1452 /// see within a single block. As such, we sink as much legal addressing mode
1453 /// stuff into the block as possible.
1454 ///
1455 /// This method is used to optimize both load/store and inline asms with memory
1456 /// operands.
1457 bool CodeGenPrepare::OptimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
1458  Type *AccessTy) {
1459  Value *Repl = Addr;
1460 
1461  // Try to collapse single-value PHI nodes. This is necessary to undo
1462  // unprofitable PRE transformations.
1463  SmallVector<Value*, 8> worklist;
1464  SmallPtrSet<Value*, 16> Visited;
1465  worklist.push_back(Addr);
1466 
1467  // Use a worklist to iteratively look through PHI nodes, and ensure that
1468  // the addressing mode obtained from the non-PHI roots of the graph
1469  // are equivalent.
1470  Value *Consensus = 0;
1471  unsigned NumUsesConsensus = 0;
1472  bool IsNumUsesConsensusValid = false;
1473  SmallVector<Instruction*, 16> AddrModeInsts;
1474  ExtAddrMode AddrMode;
1475  while (!worklist.empty()) {
1476  Value *V = worklist.back();
1477  worklist.pop_back();
1478 
1479  // Break use-def graph loops.
1480  if (!Visited.insert(V)) {
1481  Consensus = 0;
1482  break;
1483  }
1484 
1485  // For a PHI node, push all of its incoming values.
1486  if (PHINode *P = dyn_cast<PHINode>(V)) {
1487  for (unsigned i = 0, e = P->getNumIncomingValues(); i != e; ++i)
1488  worklist.push_back(P->getIncomingValue(i));
1489  continue;
1490  }
1491 
1492  // For non-PHIs, determine the addressing mode being computed.
1493  SmallVector<Instruction*, 16> NewAddrModeInsts;
1494  ExtAddrMode NewAddrMode =
1495  AddressingModeMatcher::Match(V, AccessTy, MemoryInst,
1496  NewAddrModeInsts, *TLI);
1497 
1498  // This check is broken into two cases with very similar code to avoid using
1499  // getNumUses() as much as possible. Some values have a lot of uses, so
1500  // calling getNumUses() unconditionally caused a significant compile-time
1501  // regression.
1502  if (!Consensus) {
1503  Consensus = V;
1504  AddrMode = NewAddrMode;
1505  AddrModeInsts = NewAddrModeInsts;
1506  continue;
1507  } else if (NewAddrMode == AddrMode) {
1508  if (!IsNumUsesConsensusValid) {
1509  NumUsesConsensus = Consensus->getNumUses();
1510  IsNumUsesConsensusValid = true;
1511  }
1512 
1513  // Ensure that the obtained addressing mode is equivalent to that obtained
1514  // for all other roots of the PHI traversal. Also, when choosing one
1515  // such root as representative, select the one with the most uses in order
1516  // to keep the cost modeling heuristics in AddressingModeMatcher
1517  // applicable.
1518  unsigned NumUses = V->getNumUses();
1519  if (NumUses > NumUsesConsensus) {
1520  Consensus = V;
1521  NumUsesConsensus = NumUses;
1522  AddrModeInsts = NewAddrModeInsts;
1523  }
1524  continue;
1525  }
1526 
1527  Consensus = 0;
1528  break;
1529  }
1530 
1531  // If the addressing mode couldn't be determined, or if multiple different
1532  // ones were determined, bail out now.
1533  if (!Consensus) return false;
1534 
1535  // Check to see if any of the instructions supersumed by this addr mode are
1536  // non-local to I's BB.
1537  bool AnyNonLocal = false;
1538  for (unsigned i = 0, e = AddrModeInsts.size(); i != e; ++i) {
1539  if (IsNonLocalValue(AddrModeInsts[i], MemoryInst->getParent())) {
1540  AnyNonLocal = true;
1541  break;
1542  }
1543  }
1544 
1545  // If all the instructions matched are already in this BB, don't do anything.
1546  if (!AnyNonLocal) {
1547  DEBUG(dbgs() << "CGP: Found local addrmode: " << AddrMode << "\n");
1548  return false;
1549  }
1550 
1551  // Insert this computation right after this user. Since our caller is
1552  // scanning from the top of the BB to the bottom, reuse of the expr are
1553  // guaranteed to happen later.
1554  IRBuilder<> Builder(MemoryInst);
1555 
1556  // Now that we determined the addressing expression we want to use and know
1557  // that we have to sink it into this block. Check to see if we have already
1558  // done this for some other load/store instr in this block. If so, reuse the
1559  // computation.
1560  Value *&SunkAddr = SunkAddrs[Addr];
1561  if (SunkAddr) {
1562  DEBUG(dbgs() << "CGP: Reusing nonlocal addrmode: " << AddrMode << " for "
1563  << *MemoryInst);
1564  if (SunkAddr->getType() != Addr->getType())
1565  SunkAddr = Builder.CreateBitCast(SunkAddr, Addr->getType());
1566  } else {
1567  DEBUG(dbgs() << "CGP: SINKING nonlocal addrmode: " << AddrMode << " for "
1568  << *MemoryInst);
1569  Type *IntPtrTy = TLI->getDataLayout()->getIntPtrType(Addr->getType());
1570  Value *Result = 0;
1571 
1572  // Start with the base register. Do this first so that subsequent address
1573  // matching finds it last, which will prevent it from trying to match it
1574  // as the scaled value in case it happens to be a mul. That would be
1575  // problematic if we've sunk a different mul for the scale, because then
1576  // we'd end up sinking both muls.
1577  if (AddrMode.BaseReg) {
1578  Value *V = AddrMode.BaseReg;
1579  if (V->getType()->isPointerTy())
1580  V = Builder.CreatePtrToInt(V, IntPtrTy, "sunkaddr");
1581  if (V->getType() != IntPtrTy)
1582  V = Builder.CreateIntCast(V, IntPtrTy, /*isSigned=*/true, "sunkaddr");
1583  Result = V;
1584  }
1585 
1586  // Add the scale value.
1587  if (AddrMode.Scale) {
1588  Value *V = AddrMode.ScaledReg;
1589  if (V->getType() == IntPtrTy) {
1590  // done.
1591  } else if (V->getType()->isPointerTy()) {
1592  V = Builder.CreatePtrToInt(V, IntPtrTy, "sunkaddr");
1593  } else if (cast<IntegerType>(IntPtrTy)->getBitWidth() <
1594  cast<IntegerType>(V->getType())->getBitWidth()) {
1595  V = Builder.CreateTrunc(V, IntPtrTy, "sunkaddr");
1596  } else {
1597  V = Builder.CreateSExt(V, IntPtrTy, "sunkaddr");
1598  }
1599  if (AddrMode.Scale != 1)
1600  V = Builder.CreateMul(V, ConstantInt::get(IntPtrTy, AddrMode.Scale),
1601  "sunkaddr");
1602  if (Result)
1603  Result = Builder.CreateAdd(Result, V, "sunkaddr");
1604  else
1605  Result = V;
1606  }
1607 
1608  // Add in the BaseGV if present.
1609  if (AddrMode.BaseGV) {
1610  Value *V = Builder.CreatePtrToInt(AddrMode.BaseGV, IntPtrTy, "sunkaddr");
1611  if (Result)
1612  Result = Builder.CreateAdd(Result, V, "sunkaddr");
1613  else
1614  Result = V;
1615  }
1616 
1617  // Add in the Base Offset if present.
1618  if (AddrMode.BaseOffs) {
1619  Value *V = ConstantInt::get(IntPtrTy, AddrMode.BaseOffs);
1620  if (Result)
1621  Result = Builder.CreateAdd(Result, V, "sunkaddr");
1622  else
1623  Result = V;
1624  }
1625 
1626  if (Result == 0)
1627  SunkAddr = Constant::getNullValue(Addr->getType());
1628  else
1629  SunkAddr = Builder.CreateIntToPtr(Result, Addr->getType(), "sunkaddr");
1630  }
1631 
1632  MemoryInst->replaceUsesOfWith(Repl, SunkAddr);
1633 
1634  // If we have no uses, recursively delete the value and all dead instructions
1635  // using it.
1636  if (Repl->use_empty()) {
1637  // This can cause recursive deletion, which can invalidate our iterator.
1638  // Use a WeakVH to hold onto it in case this happens.
1639  WeakVH IterHandle(CurInstIterator);
1640  BasicBlock *BB = CurInstIterator->getParent();
1641 
1643 
1644  if (IterHandle != CurInstIterator) {
1645  // If the iterator instruction was recursively deleted, start over at the
1646  // start of the block.
1647  CurInstIterator = BB->begin();
1648  SunkAddrs.clear();
1649  }
1650  }
1651  ++NumMemoryInsts;
1652  return true;
1653 }
1654 
1655 /// OptimizeInlineAsmInst - If there are any memory operands, use
1656 /// OptimizeMemoryInst to sink their address computing into the block when
1657 /// possible / profitable.
1658 bool CodeGenPrepare::OptimizeInlineAsmInst(CallInst *CS) {
1659  bool MadeChange = false;
1660 
1662  TargetConstraints = TLI->ParseConstraints(CS);
1663  unsigned ArgNo = 0;
1664  for (unsigned i = 0, e = TargetConstraints.size(); i != e; ++i) {
1665  TargetLowering::AsmOperandInfo &OpInfo = TargetConstraints[i];
1666 
1667  // Compute the constraint code and ConstraintType to use.
1668  TLI->ComputeConstraintToUse(OpInfo, SDValue());
1669 
1670  if (OpInfo.ConstraintType == TargetLowering::C_Memory &&
1671  OpInfo.isIndirect) {
1672  Value *OpVal = CS->getArgOperand(ArgNo++);
1673  MadeChange |= OptimizeMemoryInst(CS, OpVal, OpVal->getType());
1674  } else if (OpInfo.Type == InlineAsm::isInput)
1675  ArgNo++;
1676  }
1677 
1678  return MadeChange;
1679 }
1680 
1681 /// MoveExtToFormExtLoad - Move a zext or sext fed by a load into the same
1682 /// basic block as the load, unless conditions are unfavorable. This allows
1683 /// SelectionDAG to fold the extend into the load.
1684 ///
1685 bool CodeGenPrepare::MoveExtToFormExtLoad(Instruction *I) {
1686  // Look for a load being extended.
1687  LoadInst *LI = dyn_cast<LoadInst>(I->getOperand(0));
1688  if (!LI) return false;
1689 
1690  // If they're already in the same block, there's nothing to do.
1691  if (LI->getParent() == I->getParent())
1692  return false;
1693 
1694  // If the load has other users and the truncate is not free, this probably
1695  // isn't worthwhile.
1696  if (!LI->hasOneUse() &&
1697  TLI && (TLI->isTypeLegal(TLI->getValueType(LI->getType())) ||
1698  !TLI->isTypeLegal(TLI->getValueType(I->getType()))) &&
1699  !TLI->isTruncateFree(I->getType(), LI->getType()))
1700  return false;
1701 
1702  // Check whether the target supports casts folded into loads.
1703  unsigned LType;
1704  if (isa<ZExtInst>(I))
1705  LType = ISD::ZEXTLOAD;
1706  else {
1707  assert(isa<SExtInst>(I) && "Unexpected ext type!");
1708  LType = ISD::SEXTLOAD;
1709  }
1710  if (TLI && !TLI->isLoadExtLegal(LType, TLI->getValueType(LI->getType())))
1711  return false;
1712 
1713  // Move the extend into the same block as the load, so that SelectionDAG
1714  // can fold it.
1715  I->removeFromParent();
1716  I->insertAfter(LI);
1717  ++NumExtsMoved;
1718  return true;
1719 }
1720 
1721 bool CodeGenPrepare::OptimizeExtUses(Instruction *I) {
1722  BasicBlock *DefBB = I->getParent();
1723 
1724  // If the result of a {s|z}ext and its source are both live out, rewrite all
1725  // other uses of the source with result of extension.
1726  Value *Src = I->getOperand(0);
1727  if (Src->hasOneUse())
1728  return false;
1729 
1730  // Only do this xform if truncating is free.
1731  if (TLI && !TLI->isTruncateFree(I->getType(), Src->getType()))
1732  return false;
1733 
1734  // Only safe to perform the optimization if the source is also defined in
1735  // this block.
1736  if (!isa<Instruction>(Src) || DefBB != cast<Instruction>(Src)->getParent())
1737  return false;
1738 
1739  bool DefIsLiveOut = false;
1740  for (Value::use_iterator UI = I->use_begin(), E = I->use_end();
1741  UI != E; ++UI) {
1742  Instruction *User = cast<Instruction>(*UI);
1743 
1744  // Figure out which BB this ext is used in.
1745  BasicBlock *UserBB = User->getParent();
1746  if (UserBB == DefBB) continue;
1747  DefIsLiveOut = true;
1748  break;
1749  }
1750  if (!DefIsLiveOut)
1751  return false;
1752 
1753  // Make sure none of the uses are PHI nodes.
1754  for (Value::use_iterator UI = Src->use_begin(), E = Src->use_end();
1755  UI != E; ++UI) {
1756  Instruction *User = cast<Instruction>(*UI);
1757  BasicBlock *UserBB = User->getParent();
1758  if (UserBB == DefBB) continue;
1759  // Be conservative. We don't want this xform to end up introducing
1760  // reloads just before load / store instructions.
1761  if (isa<PHINode>(User) || isa<LoadInst>(User) || isa<StoreInst>(User))
1762  return false;
1763  }
1764 
1765  // InsertedTruncs - Only insert one trunc in each block once.
1766  DenseMap<BasicBlock*, Instruction*> InsertedTruncs;
1767 
1768  bool MadeChange = false;
1769  for (Value::use_iterator UI = Src->use_begin(), E = Src->use_end();
1770  UI != E; ++UI) {
1771  Use &TheUse = UI.getUse();
1772  Instruction *User = cast<Instruction>(*UI);
1773 
1774  // Figure out which BB this ext is used in.
1775  BasicBlock *UserBB = User->getParent();
1776  if (UserBB == DefBB) continue;
1777 
1778  // Both src and def are live in this block. Rewrite the use.
1779  Instruction *&InsertedTrunc = InsertedTruncs[UserBB];
1780 
1781  if (!InsertedTrunc) {
1782  BasicBlock::iterator InsertPt = UserBB->getFirstInsertionPt();
1783  InsertedTrunc = new TruncInst(I, Src->getType(), "", InsertPt);
1784  }
1785 
1786  // Replace a use of the {s|z}ext source with a use of the result.
1787  TheUse = InsertedTrunc;
1788  ++NumExtUses;
1789  MadeChange = true;
1790  }
1791 
1792  return MadeChange;
1793 }
1794 
1795 /// isFormingBranchFromSelectProfitable - Returns true if a SelectInst should be
1796 /// turned into an explicit branch.
1798  // FIXME: This should use the same heuristics as IfConversion to determine
1799  // whether a select is better represented as a branch. This requires that
1800  // branch probability metadata is preserved for the select, which is not the
1801  // case currently.
1802 
1803  CmpInst *Cmp = dyn_cast<CmpInst>(SI->getCondition());
1804 
1805  // If the branch is predicted right, an out of order CPU can avoid blocking on
1806  // the compare. Emit cmovs on compares with a memory operand as branches to
1807  // avoid stalls on the load from memory. If the compare has more than one use
1808  // there's probably another cmov or setcc around so it's not worth emitting a
1809  // branch.
1810  if (!Cmp)
1811  return false;
1812 
1813  Value *CmpOp0 = Cmp->getOperand(0);
1814  Value *CmpOp1 = Cmp->getOperand(1);
1815 
1816  // We check that the memory operand has one use to avoid uses of the loaded
1817  // value directly after the compare, making branches unprofitable.
1818  return Cmp->hasOneUse() &&
1819  ((isa<LoadInst>(CmpOp0) && CmpOp0->hasOneUse()) ||
1820  (isa<LoadInst>(CmpOp1) && CmpOp1->hasOneUse()));
1821 }
1822 
1823 
1824 /// If we have a SelectInst that will likely profit from branch prediction,
1825 /// turn it into a branch.
1826 bool CodeGenPrepare::OptimizeSelectInst(SelectInst *SI) {
1827  bool VectorCond = !SI->getCondition()->getType()->isIntegerTy(1);
1828 
1829  // Can we convert the 'select' to CF ?
1830  if (DisableSelectToBranch || OptSize || !TLI || VectorCond)
1831  return false;
1832 
1834  if (VectorCond)
1835  SelectKind = TargetLowering::VectorMaskSelect;
1836  else if (SI->getType()->isVectorTy())
1838  else
1839  SelectKind = TargetLowering::ScalarValSelect;
1840 
1841  // Do we have efficient codegen support for this kind of 'selects' ?
1842  if (TLI->isSelectSupported(SelectKind)) {
1843  // We have efficient codegen support for the select instruction.
1844  // Check if it is profitable to keep this 'select'.
1845  if (!TLI->isPredictableSelectExpensive() ||
1847  return false;
1848  }
1849 
1850  ModifiedDT = true;
1851 
1852  // First, we split the block containing the select into 2 blocks.
1853  BasicBlock *StartBlock = SI->getParent();
1854  BasicBlock::iterator SplitPt = ++(BasicBlock::iterator(SI));
1855  BasicBlock *NextBlock = StartBlock->splitBasicBlock(SplitPt, "select.end");
1856 
1857  // Create a new block serving as the landing pad for the branch.
1858  BasicBlock *SmallBlock = BasicBlock::Create(SI->getContext(), "select.mid",
1859  NextBlock->getParent(), NextBlock);
1860 
1861  // Move the unconditional branch from the block with the select in it into our
1862  // landing pad block.
1863  StartBlock->getTerminator()->eraseFromParent();
1864  BranchInst::Create(NextBlock, SmallBlock);
1865 
1866  // Insert the real conditional branch based on the original condition.
1867  BranchInst::Create(NextBlock, SmallBlock, SI->getCondition(), SI);
1868 
1869  // The select itself is replaced with a PHI Node.
1870  PHINode *PN = PHINode::Create(SI->getType(), 2, "", NextBlock->begin());
1871  PN->takeName(SI);
1872  PN->addIncoming(SI->getTrueValue(), StartBlock);
1873  PN->addIncoming(SI->getFalseValue(), SmallBlock);
1874  SI->replaceAllUsesWith(PN);
1875  SI->eraseFromParent();
1876 
1877  // Instruct OptimizeBlock to skip to the next block.
1878  CurInstIterator = StartBlock->end();
1879  ++NumSelectsExpanded;
1880  return true;
1881 }
1882 
1883 bool CodeGenPrepare::OptimizeInst(Instruction *I) {
1884  if (PHINode *P = dyn_cast<PHINode>(I)) {
1885  // It is possible for very late stage optimizations (such as SimplifyCFG)
1886  // to introduce PHI nodes too late to be cleaned up. If we detect such a
1887  // trivial PHI, go ahead and zap it here.
1888  if (Value *V = SimplifyInstruction(P, TLI ? TLI->getDataLayout() : 0,
1889  TLInfo, DT)) {
1890  P->replaceAllUsesWith(V);
1891  P->eraseFromParent();
1892  ++NumPHIsElim;
1893  return true;
1894  }
1895  return false;
1896  }
1897 
1898  if (CastInst *CI = dyn_cast<CastInst>(I)) {
1899  // If the source of the cast is a constant, then this should have
1900  // already been constant folded. The only reason NOT to constant fold
1901  // it is if something (e.g. LSR) was careful to place the constant
1902  // evaluation in a block other than then one that uses it (e.g. to hoist
1903  // the address of globals out of a loop). If this is the case, we don't
1904  // want to forward-subst the cast.
1905  if (isa<Constant>(CI->getOperand(0)))
1906  return false;
1907 
1908  if (TLI && OptimizeNoopCopyExpression(CI, *TLI))
1909  return true;
1910 
1911  if (isa<ZExtInst>(I) || isa<SExtInst>(I)) {
1912  bool MadeChange = MoveExtToFormExtLoad(I);
1913  return MadeChange | OptimizeExtUses(I);
1914  }
1915  return false;
1916  }
1917 
1918  if (CmpInst *CI = dyn_cast<CmpInst>(I))
1919  return OptimizeCmpExpression(CI);
1920 
1921  if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
1922  if (TLI)
1923  return OptimizeMemoryInst(I, I->getOperand(0), LI->getType());
1924  return false;
1925  }
1926 
1927  if (StoreInst *SI = dyn_cast<StoreInst>(I)) {
1928  if (TLI)
1929  return OptimizeMemoryInst(I, SI->getOperand(1),
1930  SI->getOperand(0)->getType());
1931  return false;
1932  }
1933 
1934  if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(I)) {
1935  if (GEPI->hasAllZeroIndices()) {
1936  /// The GEP operand must be a pointer, so must its result -> BitCast
1937  Instruction *NC = new BitCastInst(GEPI->getOperand(0), GEPI->getType(),
1938  GEPI->getName(), GEPI);
1939  GEPI->replaceAllUsesWith(NC);
1940  GEPI->eraseFromParent();
1941  ++NumGEPsElim;
1942  OptimizeInst(NC);
1943  return true;
1944  }
1945  return false;
1946  }
1947 
1948  if (CallInst *CI = dyn_cast<CallInst>(I))
1949  return OptimizeCallInst(CI);
1950 
1951  if (SelectInst *SI = dyn_cast<SelectInst>(I))
1952  return OptimizeSelectInst(SI);
1953 
1954  return false;
1955 }
1956 
1957 // In this pass we look for GEP and cast instructions that are used
1958 // across basic blocks and rewrite them to improve basic-block-at-a-time
1959 // selection.
1960 bool CodeGenPrepare::OptimizeBlock(BasicBlock &BB) {
1961  SunkAddrs.clear();
1962  bool MadeChange = false;
1963 
1964  CurInstIterator = BB.begin();
1965  while (CurInstIterator != BB.end())
1966  MadeChange |= OptimizeInst(CurInstIterator++);
1967 
1968  MadeChange |= DupRetToEnableTailCallOpts(&BB);
1969 
1970  return MadeChange;
1971 }
1972 
1973 // llvm.dbg.value is far away from the value then iSel may not be able
1974 // handle it properly. iSel will drop llvm.dbg.value if it can not
1975 // find a node corresponding to the value.
1976 bool CodeGenPrepare::PlaceDbgValues(Function &F) {
1977  bool MadeChange = false;
1978  for (Function::iterator I = F.begin(), E = F.end(); I != E; ++I) {
1979  Instruction *PrevNonDbgInst = NULL;
1980  for (BasicBlock::iterator BI = I->begin(), BE = I->end(); BI != BE;) {
1981  Instruction *Insn = BI; ++BI;
1982  DbgValueInst *DVI = dyn_cast<DbgValueInst>(Insn);
1983  if (!DVI) {
1984  PrevNonDbgInst = Insn;
1985  continue;
1986  }
1987 
1988  Instruction *VI = dyn_cast_or_null<Instruction>(DVI->getValue());
1989  if (VI && VI != PrevNonDbgInst && !VI->isTerminator()) {
1990  DEBUG(dbgs() << "Moving Debug Value before :\n" << *DVI << ' ' << *VI);
1991  DVI->removeFromParent();
1992  if (isa<PHINode>(VI))
1994  else
1995  DVI->insertAfter(VI);
1996  MadeChange = true;
1997  ++NumDbgValueMoved;
1998  }
1999  }
2000  }
2001  return MadeChange;
2002 }
const Value * getCalledValue() const
use_iterator use_end()
Definition: Value.h:152
Instruction::CastOps getOpcode() const
Return the opcode of this CastInst.
Definition: InstrTypes.h:603
class_match< Value > m_Value()
m_Value() - Match an arbitrary value and ignore it.
Definition: PatternMatch.h:70
Abstract base class of comparison instructions.
Definition: InstrTypes.h:633
AnalysisUsage & addPreserved()
void addIncoming(Value *V, BasicBlock *BB)
static PassRegistry * getPassRegistry()
LegalizeTypeAction getTypeAction(LLVMContext &Context, EVT VT) const
Sign extended before/after call.
Definition: Attributes.h:97
iterator end()
Definition: Function.h:397
Intrinsic::ID getIntrinsicID() const
Definition: IntrinsicInst.h:43
reverse_iterator rend()
Definition: ilist.h:379
virtual void ComputeConstraintToUse(AsmOperandInfo &OpInfo, SDValue Op, SelectionDAG *DAG=0) const
enable_if_c<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:266
unsigned getNumOperands() const
Definition: User.h:108
void DeleteDeadBlock(BasicBlock *BB)
bool RecursivelyDeleteTriviallyDeadInstructions(Value *V, const TargetLibraryInfo *TLI=0)
Definition: Local.cpp:316
void MergeBasicBlockIntoOnlyPred(BasicBlock *BB, Pass *P=0)
Definition: Local.cpp:477
bool insert(PtrType Ptr)
Definition: SmallPtrSet.h:253
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:116
F(f)
unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
Definition: Type.cpp:218
bool hasAttribute(unsigned Index, Attribute::AttrKind Kind) const
Return true if the attribute exists at the given index.
Definition: Attributes.cpp:818
bool bitsLT(EVT VT) const
bitsLT - Return true if this has less bits than VT.
Definition: ValueTypes.h:735
LoopInfoBase< BlockT, LoopT > * LI
Definition: LoopInfoImpl.h:411
INITIALIZE_PASS_BEGIN(CodeGenPrepare,"codegenprepare","Optimize for code generation", false, false) INITIALIZE_PASS_END(CodeGenPrepare
Type * getPointerElementType() const
Definition: Type.h:373
EVT getValueType(Type *Ty, bool AllowUnknown=false) const
static Constant * getNullValue(Type *Ty)
Definition: Constants.cpp:111
iterator begin()
Definition: BasicBlock.h:193
reverse_iterator rbegin()
Definition: ilist.h:377
void WriteAsOperand(raw_ostream &, const Value *, bool PrintTy=true, const Module *Context=0)
Definition: AsmWriter.cpp:1179
bool match(Val *V, const Pattern &P)
Definition: PatternMatch.h:42
AnalysisUsage & addRequired()
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:167
bool isUnconditional() const
FunctionPass * createCodeGenPreparePass(const TargetMachine *TM=0)
Value * removeIncomingValue(unsigned Idx, bool DeletePHIIfEmpty=true)
static unsigned getBitWidth(Type *Ty, const DataLayout *TD)
Value * getReturnValue() const
Convenience accessor. Returns null if there is no return value.
const StructLayout * getStructLayout(StructType *Ty) const
Definition: DataLayout.cpp:445
Base class of casting instructions.
Definition: InstrTypes.h:387
T LLVM_ATTRIBUTE_UNUSED_RESULT pop_back_val()
Definition: SmallVector.h:430
Definition: Use.h:60
TargetLowering::ConstraintType ConstraintType
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:172
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:421
static BranchInst * Create(BasicBlock *IfTrue, Instruction *InsertBefore=0)
static cl::opt< bool > DisableBranchOpts("disable-cgp-branch-opts", cl::Hidden, cl::init(false), cl::desc("Disable branch optimizations in CodeGenPrepare"))
bool hasAddressTaken() const
Returns true if there are any uses of this basic block other than direct branches, switches, etc. to it.
Definition: BasicBlock.h:268
ID
LLVM Calling Convention Representation.
Definition: CallingConv.h:26
bool isInteger() const
isInteger - Return true if this is an integer, or a vector integer type.
Definition: ValueTypes.h:656
bool replaceAndRecursivelySimplify(Instruction *I, Value *SimpleV, const DataLayout *TD=0, const TargetLibraryInfo *TLI=0, const DominatorTree *DT=0)
Replace all uses of 'I' with 'SimpleV' and simplify the uses recursively.
BinaryOp_match< LHS, RHS, Instruction::Add > m_Add(const LHS &L, const RHS &R)
Definition: PatternMatch.h:395
Interval::succ_iterator succ_begin(Interval *I)
Definition: Interval.h:107
bool count(PtrType Ptr) const
count - Return true if the specified pointer is in the set.
Definition: SmallPtrSet.h:264
bool LLVM_ATTRIBUTE_UNUSED_RESULT empty() const
Definition: SmallVector.h:56
This contains information for each constraint that we are lowering.
BasicBlock * getSuccessor(unsigned i) const
This class represents a no-op cast from one type to another.
class_match< ConstantInt > m_ConstantInt()
m_ConstantInt() - Match an arbitrary ConstantInt and ignore it.
Definition: PatternMatch.h:72
bool isLoadExtLegal(unsigned ExtType, EVT VT) const
Return true if the specified load with extension is legal on this target.
void replaceAllUsesWith(Value *V)
Definition: Value.cpp:303
virtual bool isSelectSupported(SelectSupportKind) const
Optimize for code generation
void takeName(Value *V)
Definition: Value.cpp:239
iterator begin()
Definition: Function.h:395
This class represents a truncation of integer types.
Considered to not alias after call.
Definition: Attributes.h:80
unsigned getNumIncomingValues() const
Interval::succ_iterator succ_end(Interval *I)
Definition: Interval.h:110
void replaceUsesOfWith(Value *From, Value *To)
Definition: User.cpp:26
uint64_t getElementOffset(unsigned Idx) const
Definition: DataLayout.h:442
static CastInst * Create(Instruction::CastOps, Value *S, Type *Ty, const Twine &Name="", Instruction *InsertBefore=0)
Construct any of the CastInst subclasses.
#define P(N)
bool isTypeLegal(EVT VT) const
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:314
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
void insertBefore(Instruction *InsertPos)
Definition: Instruction.cpp:78
LLVM Basic Block Representation.
Definition: BasicBlock.h:72
bool isVectorTy() const
Definition: Type.h:229
LLVM Constant Representation.
Definition: Constant.h:41
const Value * getCondition() const
Interval::pred_iterator pred_begin(Interval *I)
Definition: Interval.h:117
virtual bool isTruncateFree(Type *, Type *) const
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", Instruction *InsertBefore=0)
BasicBlock * getIncomingBlock(unsigned i) const
const DataLayout * getDataLayout() const
static bool OptimizeNoopCopyExpression(CastInst *CI, const TargetLowering &TLI)
for(unsigned i=0, e=MI->getNumOperands();i!=e;++i)
Value * getOperand(unsigned i) const
Definition: User.h:88
Zero extended before/after call.
Definition: Attributes.h:110
Interval::pred_iterator pred_end(Interval *I)
Definition: Interval.h:120
bool isPredictableSelectExpensive() const
Predicate getPredicate() const
Return the predicate for this instruction.
Definition: InstrTypes.h:714
bool LLVM_ATTRIBUTE_UNUSED_RESULT empty() const
Definition: SmallPtrSet.h:74
bool isPointerTy() const
Definition: Type.h:220
Value * SimplifyInstruction(Instruction *I, const DataLayout *TD=0, const TargetLibraryInfo *TLI=0, const DominatorTree *DT=0)
LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:517
bool isUsedInBasicBlock(const BasicBlock *BB) const
Definition: Value.cpp:114
const Value * getTrueValue() const
static bool OptimizeCmpExpression(CmpInst *CI)
bool isTerminator() const
Definition: Instruction.h:86
bool isConditional() const
IntegerType * getIntPtrType(LLVMContext &C, unsigned AddressSpace=0) const
Definition: DataLayout.cpp:610
std::vector< AsmOperandInfo > AsmOperandInfoVector
void initializeCodeGenPreparePass(PassRegistry &)
static bool IsNonLocalValue(Value *V, BasicBlock *BB)
See the file comment.
Definition: ValueMap.h:75
bool erase(PtrType Ptr)
Definition: SmallPtrSet.h:259
Class for constant integers.
Definition: Constants.h:51
Value * getIncomingValue(unsigned i) const
uint64_t getTypeAllocSize(Type *Ty) const
Definition: DataLayout.h:326
iterator end()
Definition: BasicBlock.h:195
Type * getType() const
Definition: Value.h:111
void eraseFromParent()
Unlink 'this' from the containing function and delete it.
Definition: BasicBlock.cpp:100
static cl::opt< bool > DisableSelectToBranch("disable-cgp-select2branch", cl::Hidden, cl::init(false), cl::desc("Disable select to branch conversion."))
std::reverse_iterator< iterator > reverse_iterator
Definition: ilist.h:349
static Constant * get(Type *Ty, uint64_t V, bool isSigned=false)
Definition: Constants.cpp:492
Function * getCalledFunction() const
const BasicBlock & getEntryBlock() const
Definition: Function.h:380
#define NC
Definition: regutils.h:39
bool ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions=false, const TargetLibraryInfo *TLI=0)
Definition: Local.cpp:59
raw_ostream & dbgs()
dbgs - Return a circular-buffered debug stream.
Definition: Debug.cpp:101
AttributeSet getAttributes() const
Return the attribute list for this Function.
Definition: Function.h:170
Value * getArgOperand(unsigned i) const
STATISTIC(NumBlocksElim,"Number of blocks eliminated")
bool isIntegerTy() const
Definition: Type.h:196
BasicBlock * getSinglePredecessor()
Return this block if it has a single predecessor block. Otherwise return a null pointer.
Definition: BasicBlock.cpp:183
AddrMode
ARM Addressing Modes.
Definition: ARMBaseInfo.h:234
use_iterator use_begin()
Definition: Value.h:150
static CmpInst * Create(OtherOps Op, unsigned short predicate, Value *S1, Value *S2, const Twine &Name="", Instruction *InsertBefore=0)
Create a CmpInst.
SelectSupportKind
Enum that describes what type of support for selects the target has.
Optimize for code false
ImmutableCallSite - establish a view to a call site for examination.
Definition: CallSite.h:318
void insertAfter(Instruction *InsertPos)
Definition: Instruction.cpp:84
#define I(x, y, z)
Definition: MD5.cpp:54
TerminatorInst * getTerminator()
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.cpp:120
bool hasOneUse() const
Definition: Value.h:161
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=0, BasicBlock *InsertBefore=0)
Creates a new BasicBlock.
Definition: BasicBlock.h:109
ReturnInst * FoldReturnIntoUncondBranch(ReturnInst *RI, BasicBlock *BB, BasicBlock *Pred)
BasicBlock * splitBasicBlock(iterator I, const Twine &BBName="")
Split the basic block into two basic blocks at the specified instruction.
Definition: BasicBlock.cpp:298
bool bypassSlowDivision(Function &F, Function::iterator &I, const DenseMap< unsigned int, unsigned int > &BypassWidth)
raw_ostream & operator<<(raw_ostream &OS, const APInt &I)
Definition: APInt.h:1688
OtherOps getOpcode() const
Get the opcode casted to the right type.
Definition: InstrTypes.h:709
virtual AsmOperandInfoVector ParseConstraints(ImmutableCallSite CS) const
bool use_empty() const
Definition: Value.h:149
EVT getTypeToTransformTo(LLVMContext &Context, EVT VT) const
LLVM Value Representation.
Definition: Value.h:66
unsigned getOpcode() const
getOpcode() returns a member of one of the enums like Instruction::Add.
Definition: Instruction.h:83
iterator begin() const
Definition: SmallPtrSet.h:276
static bool isFormingBranchFromSelectProfitable(SelectInst *SI)
static const Function * getParent(const Value *V)
const Value * getValue() const
#define DEBUG(X)
Definition: Debug.h:97
const Value * getFalseValue() const
bool operator==(uint64_t V1, const APInt &V2)
Definition: APInt.h:1684
iterator getFirstInsertionPt()
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
Definition: BasicBlock.cpp:170
unsigned getNumUses() const
Definition: Value.cpp:139
int64_t getSExtValue() const
Return the sign extended value.
Definition: Constants.h:124
codegenprepare
void moveBefore(BasicBlock *MovePos)
Unlink this basic block from its current function and insert it into the function that MovePos lives ...
Definition: BasicBlock.cpp:106
const BasicBlock * getParent() const
Definition: Instruction.h:52
InstListType::iterator iterator
Instruction iterators...
Definition: BasicBlock.h:99
INITIALIZE_PASS(GlobalMerge,"global-merge","Global Merge", false, false) bool GlobalMerge const DataLayout * TD
gep_type_iterator gep_type_begin(const User *GEP)