15 #define DEBUG_TYPE "memcpyopt"
36 STATISTIC(NumMemCpyInstr,
"Number of memcpy instructions deleted");
37 STATISTIC(NumMemSetInfer,
"Number of memsets inferred");
38 STATISTIC(NumMoveToCpy,
"Number of memmoves converted to memcpy");
39 STATISTIC(NumCpyToSet,
"Number of memcpys converted to memset");
45 for (
unsigned i = 1; i != Idx; ++i, ++GTI)
50 for (
unsigned i = Idx, e = GEP->
getNumOperands(); i != e; ++i, ++GTI) {
53 return VariableIdxFound =
true;
54 if (OpC->
isZero())
continue;
57 if (
StructType *STy = dyn_cast<StructType>(*GTI)) {
81 bool VariableIdxFound =
false;
87 return !VariableIdxFound;
92 return !VariableIdxFound;
100 if (!GEP1 || !GEP2 || GEP1->
getOperand(0) != GEP2->getOperand(0))
105 for (; Idx != GEP1->
getNumOperands() && Idx != GEP2->getNumOperands(); ++Idx)
106 if (GEP1->
getOperand(Idx) != GEP2->getOperand(Idx))
111 if (VariableIdxFound)
return false;
113 Offset = Offset2-Offset1;
144 bool isProfitableToUseMemset(
const DataLayout &
TD)
const;
149 bool MemsetRange::isProfitableToUseMemset(
const DataLayout &
TD)
const {
151 if (TheStores.size() >= 4 || End-Start >= 16)
return true;
154 if (TheStores.size() < 2)
return false;
158 for (
unsigned i = 0, e = TheStores.size(); i != e; ++i)
159 if (!isa<StoreInst>(TheStores[i]))
164 if (TheStores.size() == 2)
return false;
176 unsigned Bytes =
unsigned(End-Start);
180 unsigned NumPointerStores = Bytes / MaxIntSize;
183 unsigned NumByteStores = Bytes - NumPointerStores * MaxIntSize;
188 return TheStores.size() > NumPointerStores+NumByteStores;
196 std::list<MemsetRange> Ranges;
197 typedef std::list<MemsetRange>::iterator range_iterator;
200 MemsetRanges(
const DataLayout &td) : TD(td) {}
202 typedef std::list<MemsetRange>::const_iterator const_iterator;
203 const_iterator
begin()
const {
return Ranges.
begin(); }
204 const_iterator
end()
const {
return Ranges.
end(); }
205 bool empty()
const {
return Ranges.empty(); }
207 void addInst(int64_t OffsetFromFirst,
Instruction *Inst) {
208 if (
StoreInst *SI = dyn_cast<StoreInst>(Inst))
209 addStore(OffsetFromFirst, SI);
211 addMemSet(OffsetFromFirst, cast<MemSetInst>(Inst));
214 void addStore(int64_t OffsetFromFirst,
StoreInst *SI) {
217 addRange(OffsetFromFirst, StoreSize,
221 void addMemSet(int64_t OffsetFromFirst,
MemSetInst *MSI) {
222 int64_t Size = cast<ConstantInt>(MSI->
getLength())->getZExtValue();
244 int64_t End = Start+Size;
245 range_iterator
I = Ranges.begin(), E = Ranges.end();
247 while (I != E && Start > I->End)
253 if (I == E || End < I->Start) {
254 MemsetRange &R = *Ranges.insert(I, MemsetRange());
258 R.Alignment = Alignment;
259 R.TheStores.push_back(Inst);
264 I->TheStores.push_back(Inst);
268 if (I->Start <= Start && I->End >= End)
277 if (Start < I->Start) {
280 I->Alignment = Alignment;
288 range_iterator NextI =
I;
289 while (++NextI != E && End >= NextI->Start) {
291 I->TheStores.append(NextI->TheStores.begin(), NextI->TheStores.end());
292 if (NextI->End > I->End)
338 uint64_t cpyLen,
unsigned cpyAlign,
CallInst *
C);
341 bool processByValArgument(
CallSite CS,
unsigned ArgNo);
369 if (TD == 0)
return 0;
375 MemsetRanges Ranges(*TD);
378 for (++BI; !isa<TerminatorInst>(BI); ++BI) {
379 if (!isa<StoreInst>(BI) && !isa<MemSetInst>(BI)) {
383 if (BI->mayWriteToMemory() || BI->mayReadFromMemory())
388 if (
StoreInst *NextStore = dyn_cast<StoreInst>(BI)) {
390 if (!NextStore->isSimple())
break;
402 Ranges.addStore(Offset, NextStore);
415 Ranges.addMemSet(Offset, MSI);
427 Ranges.addInst(0, StartInst);
437 for (MemsetRanges::const_iterator I = Ranges.begin(), E = Ranges.end();
439 const MemsetRange &Range = *
I;
441 if (Range.TheStores.size() == 1)
continue;
444 if (!Range.isProfitableToUseMemset(*TD))
449 StartPtr = Range.StartPtr;
452 unsigned Alignment = Range.Alignment;
453 if (Alignment == 0) {
455 cast<PointerType>(StartPtr->getType())->getElementType();
460 Builder.
CreateMemSet(StartPtr, ByteVal, Range.End-Range.Start, Alignment);
463 for (
unsigned i = 0, e = Range.TheStores.size(); i != e; ++i)
464 dbgs() << *Range.TheStores[i] <<
'\n';
465 dbgs() <<
"With: " << *AMemSet <<
'\n');
467 if (!Range.TheStores.empty())
468 AMemSet->
setDebugLoc(Range.TheStores[0]->getDebugLoc());
472 SI = Range.TheStores.begin(),
473 SE = Range.TheStores.end(); SI != SE; ++SI) {
474 MD->removeInstruction(*SI);
475 (*SI)->eraseFromParent();
487 if (TD == 0)
return false;
493 if (
LI->isSimple() &&
LI->hasOneUse() &&
506 E = C; I != E; --
I) {
518 unsigned loadAlign =
LI->getAlignment();
522 bool changed = performCallSlotOptzn(
LI,
524 LI->getPointerOperand()->stripPointerCasts(),
526 std::min(storeAlign, loadAlign),
C);
528 MD->removeInstruction(SI);
530 MD->removeInstruction(
LI);
531 LI->eraseFromParent();
571 bool MemCpyOpt::performCallSlotOptzn(
Instruction *cpy,
573 uint64_t cpyLen,
unsigned cpyAlign,
599 if (TD == 0)
return false;
608 if (cpyLen < srcSize)
614 if (
AllocaInst *
A = dyn_cast<AllocaInst>(cpyDest)) {
623 if (destSize < srcSize)
625 }
else if (
Argument *
A = dyn_cast<Argument>(cpyDest)) {
628 if (!
A->hasStructRetAttr())
631 Type *StructTy = cast<PointerType>(
A->getType())->getElementType();
640 if (destSize < srcSize)
650 bool isDestSufficientlyAligned = srcAlign <= cpyAlign;
653 if (!isDestSufficientlyAligned && !isa<AllocaInst>(cpyDest))
662 while (!srcUseList.empty()) {
663 User *UI = srcUseList.pop_back_val();
665 if (isa<BitCastInst>(UI)) {
668 srcUseList.push_back(*I);
670 if (
G->hasAllZeroIndices())
673 srcUseList.push_back(*I);
676 }
else if (UI != C && UI != cpy) {
684 if (
Instruction *cpyDestInst = dyn_cast<Instruction>(cpyDest))
701 bool changedArgument =
false;
702 for (
unsigned i = 0; i < CS.arg_size(); ++i)
703 if (CS.getArgument(i)->stripPointerCasts() == cpySrc) {
707 changedArgument =
true;
708 if (CS.getArgument(i)->getType() == Dest->
getType())
709 CS.setArgument(i, Dest);
715 if (!changedArgument)
719 if (!isDestSufficientlyAligned) {
720 assert(isa<AllocaInst>(cpyDest) &&
"Can only increase alloca alignment!");
721 cast<AllocaInst>(cpyDest)->setAlignment(srcAlign);
726 MD->removeInstruction(C);
729 MD->removeInstruction(cpy);
758 if (!MDepLen || !MLen || MDepLen->
getZExtValue() < MLen->getZExtValue())
778 if (!SourceDep.isClobber() || SourceDep.getInst() != MDep)
784 bool UseMemMove =
false;
806 MD->removeInstruction(M);
818 bool MemCpyOpt::processMemCpy(
MemCpyInst *M) {
821 if (CopySize == 0 || M->
isVolatile())
return false;
825 MD->removeInstruction(M);
832 if (GV->isConstant() && GV->hasDefinitiveInitializer())
835 Builder.CreateMemSet(M->
getRawDest(), ByteVal, CopySize,
837 MD->removeInstruction(M);
852 MD->removeInstruction(M);
860 MemDepResult SrcDepInfo = MD->getPointerDependencyFrom(SrcLoc,
true,
864 return processMemCpyMemCpyDependence(M, MDep, CopySize->getZExtValue());
882 DEBUG(
dbgs() <<
"MemCpyOpt: Optimizing memmove -> memcpy: " << *M <<
"\n");
894 MD->removeInstruction(M);
901 bool MemCpyOpt::processByValArgument(
CallSite CS,
unsigned ArgNo) {
902 if (TD == 0)
return false;
906 Type *ByValTy = cast<PointerType>(ByValArg->
getType())->getElementType();
931 if (ByValAlign == 0)
return false;
959 DEBUG(
dbgs() <<
"MemCpyOpt: Forwarding memcpy to byval:\n"
960 <<
" " << *MDep <<
"\n"
970 bool MemCpyOpt::iterateOnFunction(
Function &
F) {
971 bool MadeChange =
false;
979 bool RepeatInstruction =
false;
981 if (
StoreInst *SI = dyn_cast<StoreInst>(I))
982 MadeChange |= processStore(SI, BI);
983 else if (
MemSetInst *M = dyn_cast<MemSetInst>(I))
984 RepeatInstruction = processMemSet(M, BI);
985 else if (
MemCpyInst *M = dyn_cast<MemCpyInst>(I))
986 RepeatInstruction = processMemCpy(M);
987 else if (
MemMoveInst *M = dyn_cast<MemMoveInst>(I))
988 RepeatInstruction = processMemMove(M);
990 for (
unsigned i = 0, e = CS.
arg_size(); i != e; ++i)
992 MadeChange |= processByValArgument(CS, i);
996 if (RepeatInstruction) {
997 if (BI != BB->begin()) --BI;
1009 bool MemCpyOpt::runOnFunction(
Function &F) {
1010 bool MadeChange =
false;
1011 MD = &getAnalysis<MemoryDependenceAnalysis>();
1012 TD = getAnalysisIfAvailable<DataLayout>();
1013 TLI = &getAnalysis<TargetLibraryInfo>();
1022 if (!iterateOnFunction(F))
unsigned getAlignment() const
Type * getIndexedType() const
const_iterator end(StringRef path)
Get end iterator over path.
AnalysisUsage & addPreserved()
static PassRegistry * getPassRegistry()
LLVM Argument representation.
uint64_t getZExtValue() const
Get zero extended value.
void *memcpy(void *s1, const void *s2, size_t n);
Value * isBytewiseValue(Value *V)
ModRefResult getModRefInfo(const Instruction *I, const Location &Loc)
The main container class for the LLVM Intermediate Representation.
unsigned arg_size() const
enable_if_c<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
unsigned getNumOperands() const
static Location getLocationForSource(const MemTransferInst *MTI)
void setArgument(unsigned ArgNo, Value *newVal)
const_iterator begin(StringRef path)
Get begin iterator over path.
const Function * getParent() const
Return the enclosing method, or null if none.
void setDebugLoc(const DebugLoc &Loc)
setDebugLoc - Set the debug location information for this instruction.
LoopInfoBase< BlockT, LoopT > * LI
ValTy * getArgument(unsigned ArgNo) const
StringRef getName() const
AnalysisUsage & addRequired()
bool isNoAlias(const Location &LocA, const Location &LocB)
#define INITIALIZE_PASS_DEPENDENCY(depName)
const StructLayout * getStructLayout(StructType *Ty) const
const APInt & getValue() const
Return the constant's value.
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Type * getAllocatedType() const
ID
LLVM Calling Convention Representation.
uint64_t getZExtValue() const
Return the zero extended value.
This class represents a no-op cast from one type to another.
Function * getDeclaration(Module *M, ID id, ArrayRef< Type * > Tys=None)
uint16_t getParamAlignment(uint16_t i) const
Extract the alignment for a call or parameter (0=unknown).
uint64_t getElementOffset(unsigned Idx) const
friend const_iterator end(StringRef path)
Get end iterator over path.
unsigned getAlignment() const
STATISTIC(NumMemCpyInstr,"Number of memcpy instructions deleted")
value_use_iterator< User > use_iterator
InstrTy * getInstruction() const
static bool IsPointerOffset(Value *Ptr1, Value *Ptr2, int64_t &Offset, const DataLayout &TD)
unsigned getAlignment() const
Value * getRawDest() const
FunctionPass * createMemCpyOptPass()
static Location getLocationForDest(const MemIntrinsic *MI)
Value * getOperand(unsigned i) const
static CastInst * CreatePointerCast(Value *S, Type *Ty, const Twine &Name, BasicBlock *InsertAtEnd)
Create a BitCast or a PtrToInt cast instruction.
void *memmove(void *s1, const void *s2, size_t n);
Location - A description of a memory location.
bool dominates(const DomTreeNode *A, const DomTreeNode *B) const
#define INITIALIZE_AG_DEPENDENCY(depName)
INITIALIZE_PASS_BEGIN(MemCpyOpt,"memcpyopt","MemCpy Optimization", false, false) INITIALIZE_PASS_END(MemCpyOpt
unsigned getABITypeAlignment(Type *Ty) const
Class for constant integers.
bool isByValArgument(unsigned ArgNo) const
Determine whether this argument is passed by value.
uint64_t getTypeAllocSize(Type *Ty) const
friend const_iterator begin(StringRef path)
Get begin iterator over path.
Value * getLength() const
Value * stripPointerCasts()
Strips off any unneeded pointer casts, all-zero GEPs and aliases from the specified value...
void *memset(void *b, int c, size_t len);
raw_ostream & dbgs()
dbgs - Return a circular-buffered debug stream.
unsigned getLargestLegalIntTypeSize() const
static int64_t GetOffsetFromIndex(const GEPOperator *GEP, unsigned Idx, bool &VariableIdxFound, const DataLayout &TD)
static cl::opt< AlignMode > Align(cl::desc("Load/store alignment support"), cl::Hidden, cl::init(DefaultAlign), cl::values(clEnumValN(DefaultAlign,"arm-default-align","Generate unaligned accesses only on hardware/OS ""combinations that are known to support them"), clEnumValN(StrictAlign,"arm-strict-align","Disallow all unaligned memory accesses"), clEnumValN(NoStrictAlign,"arm-no-strict-align","Allow unaligned memory accesses"), clEnumValEnd))
Location getLocation(const LoadInst *LI)
Value * getSource() const
unsigned getOrEnforceKnownAlignment(Value *V, unsigned PrefAlign, const DataLayout *TD=0)
void setCalledFunction(Value *Fn)
setCalledFunction - Set the function called.
Instruction * getInst() const
CallInst * CreateMemSet(Value *Ptr, Value *Val, uint64_t Size, unsigned Align, bool isVolatile=false, MDNode *TBAATag=0)
Create and insert a memset to the specified pointer and the specified value.
uint64_t getTypeStoreSize(Type *Ty) const
Value * getRawSource() const
void initializeMemCpyOptPass(PassRegistry &)
LLVM Value Representation.
const Value * getArraySize() const
ModRefResult callCapturesBefore(const Instruction *I, const AliasAnalysis::Location &MemLoc, DominatorTree *DT)
int64_t getSExtValue() const
Return the sign extended value.
Value * getPointerOperand()
const BasicBlock * getParent() const
INITIALIZE_PASS(GlobalMerge,"global-merge","Global Merge", false, false) bool GlobalMerge const DataLayout * TD
gep_type_iterator gep_type_begin(const User *GEP)