44 #define DEBUG_TYPE "loop-idiom"
63 STATISTIC(NumMemSet,
"Number of memset's formed from loop stores");
64 STATISTIC(NumMemCpy,
"Number of memcpy's formed from loop load+stores");
68 class LoopIdiomRecognize;
94 class NclPopcountRecognize {
95 LoopIdiomRecognize &LIR;
102 explicit NclPopcountRecognize(LoopIdiomRecognize &TheLIR);
108 bool preliminaryScreen();
132 class LoopIdiomRecognize :
public LoopPass {
141 explicit LoopIdiomRecognize() :
LoopPass(
ID) {
143 TD = 0; DT = 0; SE = 0; TLI = 0; TTI = 0;
153 bool processLoopStridedStore(
Value *DestPtr,
unsigned StoreSize,
154 unsigned StoreAlignment,
157 const SCEV *BECount);
158 bool processLoopStoreOfLoopLoad(
StoreInst *SI,
unsigned StoreSize,
161 const SCEV *BECount);
184 return TD ?
TD :
TD=getAnalysisIfAvailable<DataLayout>();
188 return DT ? DT : (DT=&getAnalysis<DominatorTree>());
192 return SE ? SE : (SE = &getAnalysis<ScalarEvolution>());
196 return TLI ? TLI : (TLI = &getAnalysis<TargetLibraryInfo>());
200 return TTI ? TTI : (TTI = &getAnalysis<TargetTransformInfo>());
203 Loop *getLoop()
const {
return CurLoop; }
206 bool runOnNoncountableLoop();
207 bool runOnCountableLoop();
245 for (
unsigned op = 0, e = DeadInst->
getNumOperands(); op != e; ++op) {
259 }
while (!NowDeadInsts.
empty());
284 return Br->isUnconditional() && BB->
size() == 1;
308 NclPopcountRecognize::NclPopcountRecognize(LoopIdiomRecognize &TheLIR):
309 LIR(TheLIR), CurLoop(TheLIR.getLoop()), PreCondBB(0) {
312 bool NclPopcountRecognize::preliminaryScreen() {
323 if (CurLoop->getNumBackEdges() != 1 || CurLoop->getNumBlocks() != 1)
326 BasicBlock *LoopBody = *(CurLoop->block_begin());
327 if (LoopBody->
size() >= 20) {
333 BasicBlock *PreHead = CurLoop->getLoopPreheader();
334 if (!PreHead || !LIRUtil::isAlmostEmpty(PreHead))
339 PreCondBB = LIRUtil::getPrecondBb(PreHead);
356 if (!CmpZero || !CmpZero->
isZero())
360 if ((Pred == ICmpInst::ICMP_NE && Br->
getSuccessor(0) == LoopEntry) ||
361 (Pred == ICmpInst::ICMP_EQ && Br->
getSuccessor(1) == LoopEntry))
367 bool NclPopcountRecognize::detectIdiom(
Instruction *&CntInst,
392 Value *VarX1, *VarX0;
395 DefX2 = CountInst = 0;
398 LoopEntry = *(CurLoop->block_begin());
402 if (
Value *
T = matchCondition (LIRUtil::getBranch(LoopEntry), LoopEntry))
403 DefX2 = dyn_cast<Instruction>(
T);
415 if ((SubOneOp = dyn_cast<BinaryOperator>(DefX2->
getOperand(0))))
424 Instruction *SubInst = cast<Instruction>(SubOneOp);
427 !((SubInst->
getOpcode() == Instruction::Sub && Dec->isOne()) ||
428 (SubInst->
getOpcode() == Instruction::Add && Dec->isAllOnesValue()))) {
446 IterE = LoopEntry->
end(); Iter != IterE; Iter++) {
448 if (Inst->
getOpcode() != Instruction::Add)
452 if (!Inc || !Inc->
isOne())
456 if (!Phi || Phi->
getParent() != LoopEntry)
460 bool LiveOutLoop =
false;
463 if ((cast<Instruction>(*
I))->getParent() != LoopEntry) {
464 LiveOutLoop =
true;
break;
482 BranchInst *PreCondBr = LIRUtil::getBranch(PreCondBB);
483 Value *
T = matchCondition (PreCondBr, CurLoop->getLoopPreheader());
495 void NclPopcountRecognize::transform(
Instruction *CntInst,
500 BasicBlock *PreHead = CurLoop->getLoopPreheader();
501 BranchInst *PreCondBr = LIRUtil::getBranch(PreCondBB);
509 IRBuilderTy Builder(PreCondBr);
510 Value *PopCnt, *PopCntZext, *NewCount, *TripCnt;
512 PopCnt = createPopcntIntrinsic(Builder, Var, DL);
513 NewCount = PopCntZext =
514 Builder.CreateZExtOrTrunc(PopCnt, cast<IntegerType>(CntPhi->
getType()));
516 if (NewCount != PopCnt)
517 (cast<Instruction>(NewCount))->setDebugLoc(DL);
525 if (!InitConst || !InitConst->
isZero()) {
526 NewCount = Builder.CreateAdd(NewCount, CntInitVal);
527 (cast<Instruction>(NewCount))->setDebugLoc(DL);
538 Value *Opnd0 = PopCntZext;
539 Value *Opnd1 = ConstantInt::get(PopCntZext->
getType(), 0);
544 cast<ICmpInst>(Builder.CreateICmp(PreCond->
getPredicate(), Opnd0, Opnd1));
576 PHINode *TcPhi = PHINode::Create(Ty, 2,
"tcphi", Body->
begin());
578 Builder.SetInsertPoint(LbCond);
579 Value *Opnd1 = cast<Value>(TcPhi);
580 Value *Opnd2 = cast<Value>(ConstantInt::get(Ty, 1));
582 cast<Instruction>(Builder.CreateSub(Opnd1, Opnd2,
"tcdec",
false,
true));
588 CmpInst::ICMP_UGT : CmpInst::ICMP_SLE;
591 LbCond->
setOperand(1, cast<Value>(ConstantInt::get(Ty, 0)));
601 if (cast<Instruction>(*I)->getParent() != Body)
604 for (
unsigned Idx = 0; Idx < CntUses.
size(); Idx++) {
605 (cast<Instruction>(CntUses[Idx]))->replaceUsesOfWith(CntInst, NewCount);
616 Value *Ops[] = { Val };
619 Module *M = (*(CurLoop->block_begin()))->getParent()->getParent();
621 CallInst *CI = IRBuilder.CreateCall(Func, Ops);
630 bool NclPopcountRecognize::recognize() {
632 if (!LIR.getTargetTransformInfo())
635 LIR.getScalarEvolution();
637 if (!preliminaryScreen())
643 if (!detectIdiom(CntInst, CntPhi, Val))
646 transform(CntInst, CntPhi, Val);
656 bool LoopIdiomRecognize::runOnCountableLoop() {
658 if (isa<SCEVCouldNotCompute>(BECount))
return false;
662 if (
const SCEVConstant *BECst = dyn_cast<SCEVConstant>(BECount))
663 if (BECst->getValue()->getValue() == 0)
667 if (!getDataLayout())
671 (void)getDominatorTree();
674 TLI = &getAnalysis<TargetLibraryInfo>();
677 (void)getTargetLibraryInfo();
680 CurLoop->getUniqueExitBlocks(ExitBlocks);
682 DEBUG(
dbgs() <<
"loop-idiom Scanning: F["
683 << CurLoop->getHeader()->getParent()->getName()
684 <<
"] Loop %" << CurLoop->getHeader()->getName() <<
"\n");
686 bool MadeChange =
false;
688 for (Loop::block_iterator BI = CurLoop->block_begin(),
689 E = CurLoop->block_end(); BI != E; ++BI) {
694 MadeChange |= runOnLoopBlock(*BI, BECount, ExitBlocks);
699 bool LoopIdiomRecognize::runOnNoncountableLoop() {
700 NclPopcountRecognize Popcount(*
this);
701 if (Popcount.recognize())
717 if (Name ==
"memset" || Name ==
"memcpy")
720 SE = &getAnalysis<ScalarEvolution>();
722 return runOnCountableLoop();
723 return runOnNoncountableLoop();
729 bool LoopIdiomRecognize::runOnLoopBlock(
BasicBlock *BB,
const SCEV *BECount,
734 for (
unsigned i = 0, e = ExitBlocks.
size(); i != e; ++i)
735 if (!DT->dominates(BB, ExitBlocks[i]))
738 bool MadeChange =
false;
742 if (
StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
744 if (!processLoopStore(SI, BECount))
continue;
755 if (
MemSetInst *MSI = dyn_cast<MemSetInst>(Inst)) {
757 if (!processLoopMemSet(MSI, BECount))
continue;
773 bool LoopIdiomRecognize::processLoopStore(
StoreInst *SI,
const SCEV *BECount) {
780 uint64_t SizeInBits =
TD->getTypeSizeInBits(StoredVal->
getType());
781 if ((SizeInBits & 7) || (SizeInBits >> 32) != 0)
789 if (StoreEv == 0 || StoreEv->
getLoop() != CurLoop || !StoreEv->
isAffine())
794 unsigned StoreSize = (
unsigned)SizeInBits >> 3;
802 dbgs() <<
"NEGATIVE STRIDE: " << *SI <<
"\n";
810 if (processLoopStridedStore(StorePtr, StoreSize, SI->
getAlignment(),
811 StoredVal, SI, StoreEv, BECount))
817 if (
LoadInst *LI = dyn_cast<LoadInst>(StoredVal)) {
821 StoreEv->getOperand(1) == LoadEv->
getOperand(1) && LI->isSimple())
822 if (processLoopStoreOfLoopLoad(SI, StoreSize, StoreEv, LoadEv, BECount))
831 bool LoopIdiomRecognize::
850 uint64_t SizeInBytes = cast<ConstantInt>(MSI->
getLength())->getZExtValue();
851 if ((SizeInBytes >> 32) != 0)
863 return processLoopStridedStore(Pointer, (
unsigned)SizeInBytes,
879 uint64_t AccessSize = AliasAnalysis::UnknownSize;
883 if (
const SCEVConstant *BECst = dyn_cast<SCEVConstant>(BECount))
884 AccessSize = (BECst->getValue()->getZExtValue()+1)*StoreSize;
895 if (&*
I != IgnoredStore &&
913 if (C == 0)
return 0;
917 if (Size == 0 || (Size & 7) || (Size & (Size-1)))
929 if (Size > 16)
return 0;
932 if (Size == 16)
return C;
935 unsigned ArraySize = 16/Size;
937 return ConstantArray::get(AT, std::vector<Constant*>(ArraySize, C));
943 bool LoopIdiomRecognize::
944 processLoopStridedStore(
Value *DestPtr,
unsigned StoreSize,
945 unsigned StoreAlignment,
Value *StoredVal,
947 const SCEV *BECount) {
963 CurLoop->isLoopInvariant(SplatValue)) {
966 }
else if (DestAS == 0 &&
981 BasicBlock *Preheader = CurLoop->getLoopPreheader();
993 Expander.expandCodeFor(Ev->
getStart(), DestInt8PtrTy,
998 StoreSize, getAnalysis<AliasAnalysis>(), TheStore)) {
1009 Type *IntPtr = Builder.getIntPtrTy(
TD, DestAS);
1014 if (StoreSize != 1) {
1020 Expander.expandCodeFor(NumBytesS, IntPtr, Preheader->
getTerminator());
1024 NewCall = Builder.CreateMemSet(BasePtr,
1030 Type *Int8PtrTy = DestInt8PtrTy;
1034 Builder.getVoidTy(),
1043 GlobalValue::InternalLinkage,
1044 PatternValue,
".memset_pattern");
1047 Value *PatternPtr = ConstantExpr::getBitCast(GV, Int8PtrTy);
1048 NewCall = Builder.CreateCall3(MSP, BasePtr, PatternPtr, NumBytes);
1051 DEBUG(
dbgs() <<
" Formed memset: " << *NewCall <<
"\n"
1052 <<
" from store to: " << *Ev <<
" at: " << *TheStore <<
"\n");
1064 bool LoopIdiomRecognize::
1065 processLoopStoreOfLoopLoad(
StoreInst *SI,
unsigned StoreSize,
1068 const SCEV *BECount) {
1078 BasicBlock *Preheader = CurLoop->getLoopPreheader();
1088 Value *StoreBasePtr =
1089 Expander.expandCodeFor(StoreEv->
getStart(),
1091 Preheader->getTerminator());
1094 CurLoop, BECount, StoreSize,
1095 getAnalysis<AliasAnalysis>(), SI)) {
1104 Value *LoadBasePtr =
1105 Expander.expandCodeFor(LoadEv->
getStart(),
1107 Preheader->getTerminator());
1110 StoreSize, getAnalysis<AliasAnalysis>(), SI)) {
1133 Expander.expandCodeFor(NumBytesS, IntPtrTy, Preheader->getTerminator());
1136 Builder.CreateMemCpy(StoreBasePtr, LoadBasePtr, NumBytes,
1140 DEBUG(
dbgs() <<
" Formed memcpy: " << *NewCall <<
"\n"
1141 <<
" from load ptr=" << *LoadEv <<
" at: " << *LI <<
"\n"
1142 <<
" from store ptr=" << *StoreEv <<
" at: " << *SI <<
"\n");
unsigned getAlignment() const
Value * getValueOperand()
void push_back(const T &Elt)
AnalysisUsage & addPreserved()
void addIncoming(Value *V, BasicBlock *BB)
static PassRegistry * getPassRegistry()
const SCEV * getConstant(ConstantInt *V)
Value * isBytewiseValue(Value *V)
ModRefResult getModRefInfo(const Instruction *I, const Location &Loc)
STATISTIC(NumMemSet,"Number of memset's formed from loop stores")
The main container class for the LLVM Intermediate Representation.
enable_if_c<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
unsigned getNumOperands() const
const Function * getParent() const
Return the enclosing method, or null if none.
unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
void setDebugLoc(const DebugLoc &Loc)
setDebugLoc - Set the debug location information for this instruction.
BlockT * getHeader() const
LoopInfoBase< BlockT, LoopT > * LI
const SCEV * getStart() const
static bool mayLoopAccessLocation(Value *Ptr, AliasAnalysis::ModRefResult Access, Loop *L, const SCEV *BECount, unsigned StoreSize, AliasAnalysis &AA, Instruction *IgnoredStore)
AnalysisUsage & addRequired()
#define INITIALIZE_PASS_DEPENDENCY(depName)
const APInt & getValue() const
Return the constant's value.
T LLVM_ATTRIBUTE_UNUSED_RESULT pop_back_val()
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Instruction * getFirstNonPHI()
Returns a pointer to the first instruction in this block that is not a PHINode instruction.
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
ID
LLVM Calling Convention Representation.
unsigned getPointerAddressSpace() const
Returns the address space of the pointer operand.
bool LLVM_ATTRIBUTE_UNUSED_RESULT empty() const
BasicBlock * getSuccessor(unsigned i) const
AnalysisUsage & addPreservedID(const void *ID)
Function * getDeclaration(Module *M, ID id, ArrayRef< Type * > Tys=None)
Loop * getLoopFor(const BasicBlock *BB) const
void replaceAllUsesWith(Value *V)
void memset_pattern16(void *b, const void *pattern16, size_t len);
unsigned getAlignment() const
BlockT * getLoopPreheader() const
Constant * getOrInsertFunction(StringRef Name, FunctionType *T, AttributeSet AttributeList)
LLVM Basic Block Representation.
LLVM Constant Representation.
const SCEV * getOperand(unsigned i) const
static Constant * getMemSetPatternValue(Value *V, const DataLayout &TD)
bool isInstructionTriviallyDead(Instruction *I, const TargetLibraryInfo *TLI=0)
const DebugLoc & getDebugLoc() const
getDebugLoc - Return the debug location for this node as a DebugLoc.
Represent an integer comparison operator.
Value * getOperand(unsigned i) const
Predicate getPredicate() const
Return the predicate for this instruction.
void forgetValue(Value *V)
Location - A description of a memory location.
#define INITIALIZE_AG_DEPENDENCY(depName)
static PointerType * getInt8PtrTy(LLVMContext &C, unsigned AS=0)
bool isConditional() const
static void deleteIfDeadInstruction(Value *V, ScalarEvolution &SE, const TargetLibraryInfo *TLI)
static void deleteDeadInstruction(Instruction *I, ScalarEvolution &SE, const TargetLibraryInfo *TLI)
loop Recognize loop false
Class for constant integers.
AnalysisUsage & addRequiredID(const void *ID)
void setAlignment(unsigned Align)
Predicate
Predicate - These are "(BI << 5) | BO" for various predicates.
void setUnnamedAddr(bool Val)
Value * getLength() const
ConstantInt * getValue() const
unsigned getPointerAddressSpace() const
Returns the address space of the pointer operand.
void setPredicate(Predicate P)
Set the predicate for this instruction to the specified value.
void setOperand(unsigned i, Value *Val)
raw_ostream & dbgs()
dbgs - Return a circular-buffered debug stream.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
loop Recognize loop idioms
Value * getIncomingValueForBlock(const BasicBlock *BB) const
BasicBlock * getSinglePredecessor()
Return this block if it has a single predecessor block. Otherwise return a null pointer.
const SCEV * getAddExpr(SmallVectorImpl< const SCEV * > &Ops, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap)
Pass * createLoopIdiomPass()
APInt And(const APInt &LHS, const APInt &RHS)
Bitwise AND function for APInt.
block_iterator block_end() const
Value * getCondition() const
void forgetLoop(const Loop *L)
unsigned getAlignment() const
TerminatorInst * getTerminator()
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
const Loop * getLoop() const
void initializeLoopIdiomRecognizePass(PassRegistry &)
const SCEV * getBackedgeTakenCount(const Loop *L)
LLVM Value Representation.
const SCEV * getSCEV(Value *V)
unsigned getOpcode() const
getOpcode() returns a member of one of the enums like Instruction::Add.
uint64_t getTypeSizeInBits(Type *Ty) const
block_iterator block_begin() const
const SCEV * getTruncateOrZeroExtend(const SCEV *V, Type *Ty)
INITIALIZE_PASS_BEGIN(LoopIdiomRecognize,"loop-idiom","Recognize loop idioms", false, false) INITIALIZE_PASS_END(LoopIdiomRecognize
const SCEV * getMulExpr(SmallVectorImpl< const SCEV * > &Ops, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap)
bool hasLoopInvariantBackedgeTakenCount(const Loop *L)
Value * getPointerOperand()
const BasicBlock * getParent() const
INITIALIZE_PASS(GlobalMerge,"global-merge","Global Merge", false, false) bool GlobalMerge const DataLayout * TD
bool isOne() const
Determine if the value is one.