LLVM API Documentation

 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
BypassSlowDivision.cpp
Go to the documentation of this file.
1 //===-- BypassSlowDivision.cpp - Bypass slow division ---------------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file contains an optimization for div and rem on architectures that
11 // execute short instructions significantly faster than longer instructions.
12 // For example, on Intel Atom 32-bit divides are slow enough that during
13 // runtime it is profitable to check the value of the operands, and if they are
14 // positive and less than 256 use an unsigned 8-bit divide.
15 //
16 //===----------------------------------------------------------------------===//
17 
18 #define DEBUG_TYPE "bypass-slow-division"
20 #include "llvm/ADT/DenseMap.h"
21 #include "llvm/IR/Function.h"
22 #include "llvm/IR/IRBuilder.h"
23 #include "llvm/IR/Instructions.h"
24 
25 using namespace llvm;
26 
27 namespace {
28  struct DivOpInfo {
29  bool SignedOp;
30  Value *Dividend;
31  Value *Divisor;
32 
33  DivOpInfo(bool InSignedOp, Value *InDividend, Value *InDivisor)
34  : SignedOp(InSignedOp), Dividend(InDividend), Divisor(InDivisor) {}
35  };
36 
37  struct DivPhiNodes {
38  PHINode *Quotient;
39  PHINode *Remainder;
40 
41  DivPhiNodes(PHINode *InQuotient, PHINode *InRemainder)
42  : Quotient(InQuotient), Remainder(InRemainder) {}
43  };
44 }
45 
46 namespace llvm {
47  template<>
48  struct DenseMapInfo<DivOpInfo> {
49  static bool isEqual(const DivOpInfo &Val1, const DivOpInfo &Val2) {
50  return Val1.SignedOp == Val2.SignedOp &&
51  Val1.Dividend == Val2.Dividend &&
52  Val1.Divisor == Val2.Divisor;
53  }
54 
55  static DivOpInfo getEmptyKey() {
56  return DivOpInfo(false, 0, 0);
57  }
58 
59  static DivOpInfo getTombstoneKey() {
60  return DivOpInfo(true, 0, 0);
61  }
62 
63  static unsigned getHashValue(const DivOpInfo &Val) {
64  return (unsigned)(reinterpret_cast<uintptr_t>(Val.Dividend) ^
65  reinterpret_cast<uintptr_t>(Val.Divisor)) ^
66  (unsigned)Val.SignedOp;
67  }
68  };
69 
71 }
72 
73 // insertFastDiv - Substitutes the div/rem instruction with code that checks the
74 // value of the operands and uses a shorter-faster div/rem instruction when
75 // possible and the longer-slower div/rem instruction otherwise.
76 static bool insertFastDiv(Function &F,
79  IntegerType *BypassType,
80  bool UseDivOp,
81  bool UseSignedOp,
82  DivCacheTy &PerBBDivCache) {
83  // Get instruction operands
84  Instruction *Instr = J;
85  Value *Dividend = Instr->getOperand(0);
86  Value *Divisor = Instr->getOperand(1);
87 
88  if (isa<ConstantInt>(Divisor) ||
89  (isa<ConstantInt>(Dividend) && isa<ConstantInt>(Divisor))) {
90  // Operations with immediate values should have
91  // been solved and replaced during compile time.
92  return false;
93  }
94 
95  // Basic Block is split before divide
96  BasicBlock *MainBB = I;
97  BasicBlock *SuccessorBB = I->splitBasicBlock(J);
98  ++I; //advance iterator I to successorBB
99 
100  // Add new basic block for slow divide operation
101  BasicBlock *SlowBB = BasicBlock::Create(F.getContext(), "",
102  MainBB->getParent(), SuccessorBB);
103  SlowBB->moveBefore(SuccessorBB);
104  IRBuilder<> SlowBuilder(SlowBB, SlowBB->begin());
105  Value *SlowQuotientV;
106  Value *SlowRemainderV;
107  if (UseSignedOp) {
108  SlowQuotientV = SlowBuilder.CreateSDiv(Dividend, Divisor);
109  SlowRemainderV = SlowBuilder.CreateSRem(Dividend, Divisor);
110  } else {
111  SlowQuotientV = SlowBuilder.CreateUDiv(Dividend, Divisor);
112  SlowRemainderV = SlowBuilder.CreateURem(Dividend, Divisor);
113  }
114  SlowBuilder.CreateBr(SuccessorBB);
115 
116  // Add new basic block for fast divide operation
117  BasicBlock *FastBB = BasicBlock::Create(F.getContext(), "",
118  MainBB->getParent(), SuccessorBB);
119  FastBB->moveBefore(SlowBB);
120  IRBuilder<> FastBuilder(FastBB, FastBB->begin());
121  Value *ShortDivisorV = FastBuilder.CreateCast(Instruction::Trunc, Divisor,
122  BypassType);
123  Value *ShortDividendV = FastBuilder.CreateCast(Instruction::Trunc, Dividend,
124  BypassType);
125 
126  // udiv/urem because optimization only handles positive numbers
127  Value *ShortQuotientV = FastBuilder.CreateExactUDiv(ShortDividendV,
128  ShortDivisorV);
129  Value *ShortRemainderV = FastBuilder.CreateURem(ShortDividendV,
130  ShortDivisorV);
131  Value *FastQuotientV = FastBuilder.CreateCast(Instruction::ZExt,
132  ShortQuotientV,
133  Dividend->getType());
134  Value *FastRemainderV = FastBuilder.CreateCast(Instruction::ZExt,
135  ShortRemainderV,
136  Dividend->getType());
137  FastBuilder.CreateBr(SuccessorBB);
138 
139  // Phi nodes for result of div and rem
140  IRBuilder<> SuccessorBuilder(SuccessorBB, SuccessorBB->begin());
141  PHINode *QuoPhi = SuccessorBuilder.CreatePHI(Instr->getType(), 2);
142  QuoPhi->addIncoming(SlowQuotientV, SlowBB);
143  QuoPhi->addIncoming(FastQuotientV, FastBB);
144  PHINode *RemPhi = SuccessorBuilder.CreatePHI(Instr->getType(), 2);
145  RemPhi->addIncoming(SlowRemainderV, SlowBB);
146  RemPhi->addIncoming(FastRemainderV, FastBB);
147 
148  // Replace Instr with appropriate phi node
149  if (UseDivOp)
150  Instr->replaceAllUsesWith(QuoPhi);
151  else
152  Instr->replaceAllUsesWith(RemPhi);
153  Instr->eraseFromParent();
154 
155  // Combine operands into a single value with OR for value testing below
156  MainBB->getInstList().back().eraseFromParent();
157  IRBuilder<> MainBuilder(MainBB, MainBB->end());
158  Value *OrV = MainBuilder.CreateOr(Dividend, Divisor);
159 
160  // BitMask is inverted to check if the operands are
161  // larger than the bypass type
162  uint64_t BitMask = ~BypassType->getBitMask();
163  Value *AndV = MainBuilder.CreateAnd(OrV, BitMask);
164 
165  // Compare operand values and branch
166  Value *ZeroV = ConstantInt::getSigned(Dividend->getType(), 0);
167  Value *CmpV = MainBuilder.CreateICmpEQ(AndV, ZeroV);
168  MainBuilder.CreateCondBr(CmpV, FastBB, SlowBB);
169 
170  // point iterator J at first instruction of successorBB
171  J = I->begin();
172 
173  // Cache phi nodes to be used later in place of other instances
174  // of div or rem with the same sign, dividend, and divisor
175  DivOpInfo Key(UseSignedOp, Dividend, Divisor);
176  DivPhiNodes Value(QuoPhi, RemPhi);
177  PerBBDivCache.insert(std::pair<DivOpInfo, DivPhiNodes>(Key, Value));
178  return true;
179 }
180 
181 // reuseOrInsertFastDiv - Reuses previously computed dividend or remainder if
182 // operands and operation are identical. Otherwise call insertFastDiv to perform
183 // the optimization and cache the resulting dividend and remainder.
187  IntegerType *BypassType,
188  bool UseDivOp,
189  bool UseSignedOp,
190  DivCacheTy &PerBBDivCache) {
191  // Get instruction operands
192  Instruction *Instr = J;
193  DivOpInfo Key(UseSignedOp, Instr->getOperand(0), Instr->getOperand(1));
194  DivCacheTy::iterator CacheI = PerBBDivCache.find(Key);
195 
196  if (CacheI == PerBBDivCache.end()) {
197  // If previous instance does not exist, insert fast div
198  return insertFastDiv(F, I, J, BypassType, UseDivOp, UseSignedOp,
199  PerBBDivCache);
200  }
201 
202  // Replace operation value with previously generated phi node
203  DivPhiNodes &Value = CacheI->second;
204  if (UseDivOp) {
205  // Replace all uses of div instruction with quotient phi node
206  J->replaceAllUsesWith(Value.Quotient);
207  } else {
208  // Replace all uses of rem instruction with remainder phi node
209  J->replaceAllUsesWith(Value.Remainder);
210  }
211 
212  // Advance to next operation
213  ++J;
214 
215  // Remove redundant operation
216  Instr->eraseFromParent();
217  return true;
218 }
219 
220 // bypassSlowDivision - This optimization identifies DIV instructions that can
221 // be profitably bypassed and carried out with a shorter, faster divide.
224  const DenseMap<unsigned int, unsigned int> &BypassWidths) {
225  DivCacheTy DivCache;
226 
227  bool MadeChange = false;
228  for (BasicBlock::iterator J = I->begin(); J != I->end(); J++) {
229 
230  // Get instruction details
231  unsigned Opcode = J->getOpcode();
232  bool UseDivOp = Opcode == Instruction::SDiv || Opcode == Instruction::UDiv;
233  bool UseRemOp = Opcode == Instruction::SRem || Opcode == Instruction::URem;
234  bool UseSignedOp = Opcode == Instruction::SDiv ||
235  Opcode == Instruction::SRem;
236 
237  // Only optimize div or rem ops
238  if (!UseDivOp && !UseRemOp)
239  continue;
240 
241  // Skip division on vector types, only optimize integer instructions
242  if (!J->getType()->isIntegerTy())
243  continue;
244 
245  // Get bitwidth of div/rem instruction
246  IntegerType *T = cast<IntegerType>(J->getType());
247  unsigned int bitwidth = T->getBitWidth();
248 
249  // Continue if bitwidth is not bypassed
251  if (BI == BypassWidths.end())
252  continue;
253 
254  // Get type for div/rem instruction with bypass bitwidth
255  IntegerType *BT = IntegerType::get(J->getContext(), BI->second);
256 
257  MadeChange |= reuseOrInsertFastDiv(F, I, J, BT, UseDivOp,
258  UseSignedOp, DivCache);
259  }
260 
261  return MadeChange;
262 }
void addIncoming(Value *V, BasicBlock *BB)
LLVMContext & getContext() const
Definition: Function.cpp:167
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:116
F(f)
unsigned getBitWidth() const
Get the number of bits in this IntegerType.
Definition: DerivedTypes.h:61
static bool insertFastDiv(Function &F, Function::iterator &I, BasicBlock::iterator &J, IntegerType *BypassType, bool UseDivOp, bool UseSignedOp, DivCacheTy &PerBBDivCache)
X86 bit-test instructions.
iterator begin()
Definition: BasicBlock.h:193
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:421
void replaceAllUsesWith(Value *V)
Definition: Value.cpp:303
LLVM Basic Block Representation.
Definition: BasicBlock.h:72
const InstListType & getInstList() const
Return the underlying instruction list container.
Definition: BasicBlock.h:214
iterator end()
Definition: DenseMap.h:57
Value * getOperand(unsigned i) const
Definition: User.h:88
Integer representation type.
Definition: DerivedTypes.h:37
static bool reuseOrInsertFastDiv(Function &F, Function::iterator &I, BasicBlock::iterator &J, IntegerType *BypassType, bool UseDivOp, bool UseSignedOp, DivCacheTy &PerBBDivCache)
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:153
static IntegerType * get(LLVMContext &C, unsigned NumBits)
Get or create an IntegerType instance.
Definition: Type.cpp:305
iterator end()
Definition: BasicBlock.h:195
Type * getType() const
Definition: Value.h:111
static ConstantInt * getSigned(IntegerType *Ty, int64_t V)
Get a ConstantInt for a specific signed value.
Definition: Constants.cpp:507
Value * CreateSDiv(Value *LHS, Value *RHS, const Twine &Name="", bool isExact=false)
Definition: IRBuilder.h:693
Value * CreateUDiv(Value *LHS, Value *RHS, const Twine &Name="", bool isExact=false)
Definition: IRBuilder.h:681
uint64_t getBitMask() const
Definition: DerivedTypes.h:66
DenseMap< DivOpInfo, DivPhiNodes > DivCacheTy
#define I(x, y, z)
Definition: MD5.cpp:54
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=0, BasicBlock *InsertBefore=0)
Creates a new BasicBlock.
Definition: BasicBlock.h:109
bool bypassSlowDivision(Function &F, Function::iterator &I, const DenseMap< unsigned int, unsigned int > &BypassWidth)
static bool isEqual(const DivOpInfo &Val1, const DivOpInfo &Val2)
reference back()
Definition: ilist.h:398
static unsigned getHashValue(const DivOpInfo &Val)
LLVM Value Representation.
Definition: Value.h:66
iterator find(const KeyT &Val)
Definition: DenseMap.h:108
void moveBefore(BasicBlock *MovePos)
Unlink this basic block from its current function and insert it into the function that MovePos lives ...
Definition: BasicBlock.cpp:106