14 #define DEBUG_TYPE "inline-cost"
37 STATISTIC(NumCallsAnalyzed,
"Number of call sites analyzed");
41 class CallAnalyzer :
public InstVisitor<CallAnalyzer, bool> {
57 bool IsCallerRecursive;
59 bool ExposesReturnsTwice;
60 bool HasDynamicAlloca;
61 bool ContainsNoDuplicateCall;
64 uint64_t AllocatedSize;
65 unsigned NumInstructions, NumVectorInstructions;
66 int FiftyPercentVectorBonus, TenPercentVectorBonus;
91 bool isAllocaDerivedArg(
Value *V);
92 bool lookupSROAArgAndCost(
Value *V,
Value *&Arg,
95 void disableSROA(
Value *V);
98 bool handleSROACandidate(
bool IsSROAValid,
100 int InstructionCost);
139 : TD(TD), TTI(TTI),
F(Callee), Threshold(Threshold), Cost(0),
140 IsCallerRecursive(
false), IsRecursiveCall(
false),
141 ExposesReturnsTwice(
false), HasDynamicAlloca(
false),
142 ContainsNoDuplicateCall(
false), AllocatedSize(0), NumInstructions(0),
143 NumVectorInstructions(0), FiftyPercentVectorBonus(0),
144 TenPercentVectorBonus(0), VectorBonus(0), NumConstantArgs(0),
145 NumConstantOffsetPtrArgs(0), NumAllocaArgs(0), NumConstantPtrCmps(0),
146 NumConstantPtrDiffs(0), NumInstructionsSimplified(0),
147 SROACostSavings(0), SROACostSavingsLost(0) {}
152 int getCost() {
return Cost; }
156 unsigned NumConstantArgs;
157 unsigned NumConstantOffsetPtrArgs;
158 unsigned NumAllocaArgs;
159 unsigned NumConstantPtrCmps;
160 unsigned NumConstantPtrDiffs;
161 unsigned NumInstructionsSimplified;
162 unsigned SROACostSavings;
163 unsigned SROACostSavingsLost;
171 bool CallAnalyzer::isAllocaDerivedArg(
Value *V) {
172 return SROAArgValues.count(V);
177 bool CallAnalyzer::lookupSROAArgAndCost(
179 if (SROAArgValues.empty() || SROAArgCosts.empty())
183 if (ArgIt == SROAArgValues.
end())
187 CostIt = SROAArgCosts.
find(Arg);
188 return CostIt != SROAArgCosts.
end();
198 Cost += CostIt->second;
199 SROACostSavings -= CostIt->second;
200 SROACostSavingsLost += CostIt->second;
201 SROAArgCosts.
erase(CostIt);
205 void CallAnalyzer::disableSROA(
Value *V) {
208 if (lookupSROAArgAndCost(V, SROAArg, CostIt))
214 int InstructionCost) {
215 CostIt->second += InstructionCost;
216 SROACostSavings += InstructionCost;
222 bool CallAnalyzer::handleSROACandidate(
bool IsSROAValid,
224 int InstructionCost) {
226 accumulateSROACost(CostIt, InstructionCost);
239 if (!isa<Constant>(*
I) && !SimplifiedValues.lookup(*
I))
253 unsigned IntPtrWidth =
TD->getPointerSizeInBits();
260 if (
Constant *SimpleOp = SimplifiedValues.lookup(GTI.getOperand()))
264 if (OpC->
isZero())
continue;
267 if (
StructType *STy = dyn_cast<StructType>(*GTI)) {
274 APInt TypeSize(IntPtrWidth,
TD->getTypeAllocSize(GTI.getIndexedType()));
287 AllocatedSize += (
TD ?
TD->getTypeAllocSize(Ty) :
293 return Base::visitAlloca(I);
299 HasDynamicAlloca =
true;
303 bool CallAnalyzer::visitPHI(
PHINode &I) {
327 std::pair<Value *, APInt> BaseAndOffset = ConstantOffsetPtrs.lookup(Ptr);
328 if (BaseAndOffset.first) {
331 if (!accumulateGEPOffset(cast<GEPOperator>(I), BaseAndOffset.second)) {
339 ConstantOffsetPtrs[&
I] = BaseAndOffset;
344 SROAArgValues[&
I] = SROAArg;
350 if (isGEPOffsetConstant(I)) {
352 SROAArgValues[&
I] = SROAArg;
368 COp = SimplifiedValues.lookup(I.
getOperand(0));
371 SimplifiedValues[&
I] =
C;
376 std::pair<Value *, APInt> BaseAndOffset
379 if (BaseAndOffset.first)
380 ConstantOffsetPtrs[&
I] = BaseAndOffset;
385 if (lookupSROAArgAndCost(I.
getOperand(0), SROAArg, CostIt))
386 SROAArgValues[&
I] = SROAArg;
396 COp = SimplifiedValues.lookup(I.
getOperand(0));
399 SimplifiedValues[&
I] =
C;
406 if (
TD && IntegerSize >=
TD->getPointerSizeInBits()) {
407 std::pair<Value *, APInt> BaseAndOffset
409 if (BaseAndOffset.first)
410 ConstantOffsetPtrs[&
I] = BaseAndOffset;
422 if (lookupSROAArgAndCost(I.
getOperand(0), SROAArg, CostIt))
423 SROAArgValues[&
I] = SROAArg;
432 COp = SimplifiedValues.lookup(I.
getOperand(0));
435 SimplifiedValues[&
I] =
C;
443 if (
TD && IntegerSize <= TD->getPointerSizeInBits()) {
444 std::pair<Value *, APInt> BaseAndOffset = ConstantOffsetPtrs.lookup(Op);
445 if (BaseAndOffset.first)
446 ConstantOffsetPtrs[&
I] = BaseAndOffset;
452 if (lookupSROAArgAndCost(Op, SROAArg, CostIt))
453 SROAArgValues[&
I] = SROAArg;
458 bool CallAnalyzer::visitCastInst(
CastInst &I) {
462 COp = SimplifiedValues.lookup(I.
getOperand(0));
465 SimplifiedValues[&
I] =
C;
479 COp = SimplifiedValues.lookup(Operand);
483 SimplifiedValues[&
I] =
C;
488 disableSROA(Operand);
493 bool CallAnalyzer::visitCmpInst(
CmpInst &I) {
496 if (!isa<Constant>(LHS))
497 if (
Constant *SimpleLHS = SimplifiedValues.lookup(LHS))
499 if (!isa<Constant>(RHS))
500 if (
Constant *SimpleRHS = SimplifiedValues.lookup(RHS))
502 if (
Constant *CLHS = dyn_cast<Constant>(LHS)) {
503 if (
Constant *CRHS = dyn_cast<Constant>(RHS))
505 SimplifiedValues[&
I] =
C;
515 Value *LHSBase, *RHSBase;
516 APInt LHSOffset, RHSOffset;
517 llvm::tie(LHSBase, LHSOffset) = ConstantOffsetPtrs.lookup(LHS);
519 llvm::tie(RHSBase, RHSOffset) = ConstantOffsetPtrs.lookup(RHS);
520 if (RHSBase && LHSBase == RHSBase) {
526 SimplifiedValues[&
I] =
C;
527 ++NumConstantPtrCmps;
549 if (lookupSROAArgAndCost(I.
getOperand(0), SROAArg, CostIt)) {
550 if (isa<ConstantPointerNull>(I.
getOperand(1))) {
565 Value *LHSBase, *RHSBase;
566 APInt LHSOffset, RHSOffset;
567 llvm::tie(LHSBase, LHSOffset) = ConstantOffsetPtrs.lookup(LHS);
569 llvm::tie(RHSBase, RHSOffset) = ConstantOffsetPtrs.lookup(RHS);
570 if (RHSBase && LHSBase == RHSBase) {
576 SimplifiedValues[&
I] =
C;
577 ++NumConstantPtrDiffs;
585 return Base::visitSub(I);
590 if (!isa<Constant>(LHS))
591 if (
Constant *SimpleLHS = SimplifiedValues.lookup(LHS))
593 if (!isa<Constant>(RHS))
594 if (
Constant *SimpleRHS = SimplifiedValues.lookup(RHS))
597 if (
Constant *
C = dyn_cast_or_null<Constant>(SimpleV)) {
598 SimplifiedValues[&
I] =
C;
609 bool CallAnalyzer::visitLoad(
LoadInst &I) {
612 if (lookupSROAArgAndCost(I.
getOperand(0), SROAArg, CostIt)) {
624 bool CallAnalyzer::visitStore(
StoreInst &I) {
627 if (lookupSROAArgAndCost(I.
getOperand(0), SROAArg, CostIt)) {
661 if (AggC && InsertedC) {
692 C = dyn_cast_or_null<Constant>(SimplifiedValues.lookup(*I));
706 bool CallAnalyzer::visitCallSite(
CallSite CS) {
711 ExposesReturnsTwice =
true;
716 ContainsNoDuplicateCall =
true;
720 if (simplifyCallSite(F, CS))
726 switch (II->getIntrinsicID()) {
728 return Base::visitCallSite(CS);
741 IsRecursiveCall =
true;
745 if (TTI.isLoweredToCall(F)) {
756 return Base::visitCallSite(CS);
769 Function *F = dyn_cast_or_null<Function>(SimplifiedValues.lookup(Callee));
771 return Base::visitCallSite(CS);
779 if (CA.analyzeCall(CS)) {
785 return Base::visitCallSite(CS);
788 bool CallAnalyzer::visitInstruction(
Instruction &I) {
791 if (TargetTransformInfo::TCC_Free == TTI.getUserCost(&I))
810 bool CallAnalyzer::analyzeBlock(
BasicBlock *BB) {
815 ++NumVectorInstructions;
823 ++NumInstructionsSimplified;
828 if (IsRecursiveCall || ExposesReturnsTwice || HasDynamicAlloca)
834 if (IsCallerRecursive &&
838 if (NumVectorInstructions > NumInstructions/2)
839 VectorBonus = FiftyPercentVectorBonus;
840 else if (NumVectorInstructions > NumInstructions/10)
841 VectorBonus = TenPercentVectorBonus;
860 ConstantInt *CallAnalyzer::stripAndComputeInBoundsConstantOffsets(
Value *&V) {
864 unsigned IntPtrWidth =
TD->getPointerSizeInBits();
873 if (!GEP->
isInBounds() || !accumulateGEPOffset(*GEP, Offset))
877 V = cast<Operator>(V)->getOperand(0);
878 }
else if (
GlobalAlias *GA = dyn_cast<GlobalAlias>(V)) {
879 if (GA->mayBeOverridden())
881 V = GA->getAliasee();
886 }
while (Visited.
insert(V));
899 bool CallAnalyzer::analyzeCall(
CallSite CS) {
905 bool SingleBB =
true;
915 assert(NumInstructions == 0);
916 assert(NumVectorInstructions == 0);
922 for (
unsigned I = 0, E = CS.
arg_size(); I != E; ++
I) {
928 unsigned PointerSize =
TD->getPointerSizeInBits();
930 unsigned NumStores = (TypeSize + PointerSize - 1) / PointerSize;
938 NumStores = std::min(NumStores, 8U);
952 if (OnlyOneCallAndLocalLinkage)
960 if (
InvokeInst *II = dyn_cast<InvokeInst>(Instr)) {
961 if (isa<UnreachableInst>(II->getNormalDest()->begin()))
987 IsCallerRecursive =
true;
994 bool HasReturn =
false;
1000 FAI != FAE; ++FAI, ++CAI) {
1002 if (
Constant *C = dyn_cast<Constant>(CAI))
1003 SimplifiedValues[FAI] = C;
1005 Value *PtrArg = *CAI;
1006 if (
ConstantInt *C = stripAndComputeInBoundsConstantOffsets(PtrArg)) {
1007 ConstantOffsetPtrs[FAI] = std::make_pair(PtrArg, C->getValue());
1010 if (isa<AllocaInst>(PtrArg)) {
1011 SROAArgValues[FAI] = PtrArg;
1012 SROAArgCosts[PtrArg] = 0;
1016 NumConstantArgs = SimplifiedValues.size();
1017 NumConstantOffsetPtrArgs = ConstantOffsetPtrs.size();
1018 NumAllocaArgs = SROAArgValues.size();
1029 BBSetVector BBWorklist;
1032 for (
unsigned Idx = 0; Idx != BBWorklist.size(); ++Idx) {
1057 if (isa<IndirectBrInst>(TI))
1060 if (!HasReturn && isa<ReturnInst>(TI))
1067 if (!analyzeBlock(BB)) {
1068 if (IsRecursiveCall || ExposesReturnsTwice || HasDynamicAlloca)
1074 if (IsCallerRecursive &&
1083 if (
BranchInst *BI = dyn_cast<BranchInst>(TI)) {
1084 if (BI->isConditional()) {
1085 Value *Cond = BI->getCondition();
1087 = dyn_cast_or_null<ConstantInt>(SimplifiedValues.lookup(Cond))) {
1088 BBWorklist.insert(BI->getSuccessor(SimpleCond->isZero() ? 1 : 0));
1092 }
else if (
SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
1093 Value *Cond = SI->getCondition();
1095 = dyn_cast_or_null<ConstantInt>(SimplifiedValues.lookup(Cond))) {
1096 BBWorklist.insert(SI->findCaseValue(SimpleCond).getCaseSuccessor());
1121 if (!OnlyOneCallAndLocalLinkage && ContainsNoDuplicateCall)
1129 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1132 #define DEBUG_PRINT_STAT(x) llvm::dbgs() << " " #x ": " << x << "\n"
1142 #undef DEBUG_PRINT_STAT
1152 char InlineCostAnalysis::
ID = 0;
1165 TD = getAnalysisIfAvailable<DataLayout>();
1166 TTI = &getAnalysis<TargetTransformInfo>();
1223 CallAnalyzer CA(
TD, *TTI, *Callee, Threshold);
1224 bool ShouldInline = CA.analyzeCall(CS);
1229 if (!ShouldInline && CA.getCost() < CA.getThreshold())
1231 if (ShouldInline && CA.getCost() >= CA.getThreshold())
1243 if (isa<IndirectBrInst>(BI->getTerminator()))
1258 if (!ReturnsTwice && CS.
isCall() &&
void push_back(const T &Elt)
static ConstantInt * getFalse(LLVMContext &Context)
Value * getPointerOperand()
Instruction::CastOps getOpcode() const
Return the opcode of this CastInst.
Abstract base class of comparison instructions.
Base class for instruction visitors.
Value * getAggregateOperand()
ArrayRef< unsigned > getIndices() const
unsigned getScalarSizeInBits()
bool canConstantFoldCallTo(const Function *F)
The main container class for the LLVM Intermediate Representation.
unsigned arg_size() const
enable_if_c<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
static bool functionsHaveCompatibleAttributes(Function *Caller, Function *Callee)
Test that there are no attribute conflicts between Caller and Callee that prevent inlining...
gep_type_iterator gep_type_end(const User *GEP)
const Function * getParent() const
Return the enclosing method, or null if none.
static Constant * getSub(Constant *C1, Constant *C2, bool HasNUW=false, bool HasNSW=false)
bool hasAttribute(unsigned Index, Attribute::AttrKind Kind) const
Return true if the attribute exists at the given index.
Represents the cost of inlining a function.
bool isEquality() const
Determine if this is an equals/not equals predicate.
virtual void getAnalysisUsage(AnalysisUsage &Info) const
ValTy * getArgument(unsigned ArgNo) const
CallingConv::ID getCallingConv() const
StringRef getName() const
AnalysisUsage & addRequired()
FunTy * getCaller() const
Base class of casting instructions.
const APInt & getValue() const
Return the constant's value.
static bool attributeMatches(Function *F1, Function *F2, Attribute::AttrKind Attr)
Test that two functions either have or have not the given attribute at the same time.
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
static Constant * getExtractValue(Constant *Agg, ArrayRef< unsigned > Idxs)
Type * getAllocatedType() const
#define DEBUG_PRINT_STAT(x)
void getAnalysisUsage(AnalysisUsage &AU) const
ID
LLVM Calling Convention Representation.
This class represents a cast from a pointer to an integer.
uint64_t getZExtValue() const
Return the zero extended value.
static Constant * getICmp(unsigned short pred, Constant *LHS, Constant *RHS)
static Constant * getPtrToInt(Constant *C, Type *Ty)
This class represents a no-op cast from one type to another.
Value * getInsertedValueOperand()
const int LastCallToStaticBonus
ValTy * getCalledValue() const
bool runOnSCC(CallGraphSCC &SCC)
static Constant * getIntToPtr(Constant *C, Type *Ty)
Type * getElementType() const
bool isInBounds() const
isInBounds - Determine whether the GEP has the inbounds flag.
Constant * ConstantFoldCall(Function *F, ArrayRef< Constant * > Operands, const TargetLibraryInfo *TLI=0)
uint64_t getElementOffset(unsigned Idx) const
User::op_iterator arg_iterator
unsigned getNumSuccessors() const
bool isInlineViable(Function &Callee)
Minimal filter to detect invalid constructs for inlining.
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
const int IndirectCallThreshold
LLVM Basic Block Representation.
InstrTy * getInstruction() const
BasicBlock * getSuccessor(unsigned idx) const
static bool mayBeOverridden(LinkageTypes Linkage)
LLVM Constant Representation.
Cost analyzer used by inliner.
unsigned getBitWidth() const
Return the number of bits in the APInt.
static InlineCost getNever()
Value * getPointerOperand()
Value * getOperand(unsigned i) const
Predicate getPredicate() const
Return the predicate for this instruction.
inline Inline Cost Analysis
This class represents a cast from an integer to a pointer.
#define INITIALIZE_AG_DEPENDENCY(depName)
LLVMContext & getContext() const
All values hold a context through their type.
Call cannot be duplicated.
Value * SimplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS, const DataLayout *TD=0, const TargetLibraryInfo *TLI=0, const DominatorTree *DT=0)
STATISTIC(NumCallsAnalyzed,"Number of call sites analyzed")
BinaryOps getOpcode() const
static Constant * getBitCast(Constant *C, Type *Ty)
Class for constant integers.
bool isByValArgument(unsigned ArgNo) const
Determine whether this argument is passed by value.
bool isStaticAlloca() const
bool erase(const KeyT &Val)
static Constant * get(Type *Ty, uint64_t V, bool isSigned=false)
const BasicBlock & getEntryBlock() const
static ConstantInt * getTrue(LLVMContext &Context)
raw_ostream & dbgs()
dbgs - Return a circular-buffered debug stream.
AttributeSet getAttributes() const
Return the attribute list for this Function.
INITIALIZE_PASS_BEGIN(InlineCostAnalysis,"inline-cost","Inline Cost Analysis", true, true) INITIALIZE_PASS_END(InlineCostAnalysis
Class for arbitrary precision integers.
Function must not be optimized.
unsigned getOpcode() const
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
TerminatorInst * getTerminator()
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
bool isNoInline() const
Return true if the call should not be inlined.
unsigned getPrimitiveSizeInBits() const
static Constant * getInsertValue(Constant *Agg, Constant *Val, ArrayRef< unsigned > Idxs)
InlineCost getInlineCost(CallSite CS, int Threshold)
Get an InlineCost object representing the cost of inlining this callsite.
bool hasLocalLinkage() const
OtherOps getOpcode() const
Get the opcode casted to the right type.
static int const Threshold
const unsigned TotalAllocaSizeRecursiveCaller
Function can return twice.
LLVM Value Representation.
APInt LLVM_ATTRIBUTE_UNUSED_RESULT sextOrTrunc(unsigned width) const
Sign extend or truncate to width.
CallGraphSCC - This is a single SCC that a CallGraphSCCPass is run on.
unsigned getOpcode() const
getOpcode() returns a member of one of the enums like Instruction::Add.
A vector that has set insertion semantics.
Constant * ConstantFoldInstOperands(unsigned Opcode, Type *DestTy, ArrayRef< Constant * > Ops, const DataLayout *TD=0, const TargetLibraryInfo *TLI=0)
ItTy prior(ItTy it, Dist n)
static Constant * getCompare(unsigned short pred, Constant *C1, Constant *C2)
Return an ICmp or FCmp comparison operator constant expression.
static APInt getNullValue(unsigned numBits)
Get the '0' value.
iterator find(const KeyT &Val)
static Constant * getCast(unsigned ops, Constant *C, Type *Ty)
static InlineCost getAlways()
tier< T1, T2 > tie(T1 &f, T2 &s)
const BasicBlock * getParent() const
INITIALIZE_PASS(GlobalMerge,"global-merge","Global Merge", false, false) bool GlobalMerge const DataLayout * TD
static InlineCost get(int Cost, int Threshold)
FunTy * getCalledFunction() const
gep_type_iterator gep_type_begin(const User *GEP)