45 #define LV_NAME "loop-vectorize"
46 #define DEBUG_TYPE LV_NAME
94 using namespace llvm::PatternMatch;
98 cl::desc(
"Sets the SIMD width. Zero is autoselect."));
102 cl::desc(
"Sets the vectorization unroll count. "
103 "Zero is autoselect."));
107 cl::desc(
"Enable if-conversion during vectorization."));
113 cl::desc(
"Don't vectorize loops with a constant "
114 "trip count that is smaller than this "
136 class LoopVectorizationLegality;
137 class LoopVectorizationCostModel;
153 class InnerLoopVectorizer {
158 unsigned UnrollFactor)
159 : OrigLoop(OrigLoop), SE(SE),
LI(LI), DT(DT), DL(DL), TLI(TLI),
160 VF(VecWidth), UF(UnrollFactor), Builder(SE->
getContext()), Induction(0),
161 OldInduction(0), WidenMap(UnrollFactor) {}
164 void vectorize(LoopVectorizationLegality *Legal) {
166 createEmptyLoop(Legal);
169 vectorizeLoop(Legal);
174 virtual ~InnerLoopVectorizer() {}
187 VectorParts> EdgeMaskCache;
191 Instruction *addRuntimeCheck(LoopVectorizationLegality *Legal,
194 void createEmptyLoop(LoopVectorizationLegality *Legal);
196 virtual void vectorizeLoop(LoopVectorizationLegality *Legal);
207 VectorParts createBlockInMask(
BasicBlock *BB);
213 void vectorizeBlockInLoop(LoopVectorizationLegality *Legal,
BasicBlock *BB,
219 void widenPHIInstruction(
Instruction *PN, VectorParts &Entry,
220 LoopVectorizationLegality *Legal,
221 unsigned UF,
unsigned VF, PhiVector *PV);
225 void updateAnalysis();
229 virtual void scalarizeInstruction(
Instruction *Instr);
232 virtual void vectorizeMemoryInstruction(
Instruction *Instr,
233 LoopVectorizationLegality *Legal);
245 virtual Value *getConsecutiveVector(
Value* Val,
int StartIdx,
bool Negate);
252 VectorParts &getVectorValue(
Value *V);
264 ValueMap(
unsigned UnrollFactor) : UF(UnrollFactor) {}
267 bool has(
Value *Key)
const {
return MapStorage.count(Key); }
273 VectorParts &Entry = MapStorage[Key];
274 Entry.assign(UF, Val);
279 VectorParts &
get(
Value *Key) {
280 VectorParts &Entry = MapStorage[Key];
283 assert(Entry.size() == UF);
294 std::map<Value *, VectorParts> MapStorage;
347 EdgeMaskCache MaskCache;
350 class InnerLoopUnroller :
public InnerLoopVectorizer {
355 InnerLoopVectorizer(OrigLoop, SE, LI, DT, DL, TLI, 1, UnrollFactor) { }
358 virtual void scalarizeInstruction(
Instruction *Instr);
359 virtual void vectorizeMemoryInstruction(
Instruction *Instr,
360 LoopVectorizationLegality *Legal);
362 virtual Value *getConsecutiveVector(
Value* Val,
int StartIdx,
bool Negate);
377 if (
Instruction *OpInst = dyn_cast<Instruction>(*OI))
378 if (OpInst->getDebugLoc() != Empty)
388 if (
const Instruction *Inst = dyn_cast_or_null<Instruction>(Ptr))
407 class LoopVectorizationLegality {
411 : TheLoop(L), SE(SE), DL(DL), DT(DT), TLI(TLI),
412 Induction(0), WidestIndTy(0), HasFunNoNaNAttr(
false),
413 MaxSafeDepDistBytes(-1U) {}
433 IK_ReverseIntInduction,
435 IK_ReversePtrInduction
439 enum MinMaxReductionKind {
450 struct ReductionDescriptor {
451 ReductionDescriptor() : StartValue(0), LoopExitInstr(0),
452 Kind(RK_NoReduction), MinMaxKind(MRK_Invalid) {}
455 MinMaxReductionKind MK)
456 : StartValue(Start), LoopExitInstr(Exit),
Kind(K), MinMaxKind(MK) {}
466 MinMaxReductionKind MinMaxKind;
470 struct ReductionInstDesc {
472 IsReduction(IsRedux), PatternLastInst(I), MinMaxKind(MRK_Invalid) {}
474 ReductionInstDesc(
Instruction *I, MinMaxReductionKind K) :
475 IsReduction(
true), PatternLastInst(I), MinMaxKind(K) {}
483 MinMaxReductionKind MinMaxKind;
488 struct RuntimePointerCheck {
489 RuntimePointerCheck() : Need(
false) {}
498 DependencySetId.clear();
521 struct InductionInfo {
522 InductionInfo(
Value *Start, InductionKind K) : StartValue(Start), IK(K) {}
523 InductionInfo() : StartValue(0), IK(IK_NoInduction) {}
544 PHINode *getInduction() {
return Induction; }
547 ReductionList *getReductionVars() {
return &Reductions; }
550 InductionList *getInductionVars() {
return &Inductions; }
553 Type *getWidestInductionType() {
return WidestIndTy; }
556 bool isInductionVariable(
const Value *V);
570 int isConsecutivePtr(
Value *Ptr);
573 bool isUniform(
Value *V);
576 bool isUniformAfterVectorization(
Instruction* I) {
return Uniforms.count(I); }
579 RuntimePointerCheck *getRuntimePointerCheck() {
return &PtrRtCheck; }
583 static Constant *getReductionIdentity(ReductionKind K,
Type *Tp);
585 unsigned getMaxSafeDepDistBytes() {
return MaxSafeDepDistBytes; }
591 bool canVectorizeInstrs();
597 bool canVectorizeMemory();
601 bool canVectorizeWithIfConvert();
604 void collectLoopUniforms();
613 bool AddReductionVar(
PHINode *Phi, ReductionKind
Kind);
619 ReductionInstDesc isReductionInstr(
Instruction *I, ReductionKind Kind,
620 ReductionInstDesc &Desc);
623 static ReductionInstDesc isMinMaxSelectCmpPattern(
Instruction *I,
624 ReductionInstDesc &Prev);
627 InductionKind isInductionVariable(
PHINode *Phi);
646 ReductionList Reductions;
650 InductionList Inductions;
662 RuntimePointerCheck PtrRtCheck;
664 bool HasFunNoNaNAttr;
666 unsigned MaxSafeDepDistBytes;
676 class LoopVectorizationCostModel {
679 LoopVectorizationLegality *Legal,
682 : TheLoop(L), SE(SE),
LI(LI), Legal(Legal), TTI(TTI), DL(DL), TLI(TLI) {}
699 unsigned getWidestType();
706 unsigned selectUnrollFactor(
bool OptForSize,
unsigned UserUF,
unsigned VF,
711 struct RegisterUsage {
713 unsigned LoopInvariantRegs;
715 unsigned MaxLocalUsers;
717 unsigned NumInstructions;
721 RegisterUsage calculateRegisterUsage();
728 unsigned expectedCost(
unsigned VF);
732 unsigned getInstructionCost(
Instruction *I,
unsigned VF);
750 LoopVectorizationLegality *Legal;
761 struct LoopVectorizeHints {
767 LoopVectorizeHints(
const Loop *L,
bool DisableUnrolling)
776 if (VectorizationUnroll.getNumOccurrences() > 0)
777 Unroll = VectorizationUnroll;
779 DEBUG(
if (DisableUnrolling && Unroll == 1)
780 dbgs() <<
"LV: Unrolling disabled by the pass manager\n");
794 void setAlreadyVectorized(
Loop *L) {
803 for (
unsigned i = 1, ie = LoopID->getNumOperands(); i < ie; ++i)
815 LoopID->replaceAllUsesWith(NewLoopID);
824 void getHints(
const Loop *L) {
829 assert(LoopID->getNumOperands() > 0 &&
"requires at least one operand");
830 assert(LoopID->getOperand(0) == LoopID &&
"invalid loop id");
832 for (
unsigned i = 1, ie = LoopID->getNumOperands(); i < ie; ++i) {
838 if (
const MDNode *MD = dyn_cast<MDNode>(LoopID->getOperand(i))) {
839 if (!MD || MD->getNumOperands() == 0)
842 for (
unsigned i = 1, ie = MD->getNumOperands(); i < ie; ++i)
846 assert(Args.
size() == 0 &&
"too many arguments for MDString");
859 if (Args.
size() == 1)
860 getHint(Hint, Args[0]);
870 if (Hint ==
"width") {
874 DEBUG(
dbgs() <<
"LV: ignoring invalid width hint metadata\n");
875 }
else if (Hint ==
"unroll") {
879 DEBUG(
dbgs() <<
"LV: ignoring invalid unroll hint metadata\n");
881 DEBUG(
dbgs() <<
"LV: ignoring unknown hint " << Hint <<
'\n');
887 struct LoopVectorize :
public LoopPass {
891 explicit LoopVectorize(
bool NoUnrolling =
false)
892 :
LoopPass(
ID), DisableUnrolling(NoUnrolling) {
902 bool DisableUnrolling;
909 SE = &getAnalysis<ScalarEvolution>();
910 DL = getAnalysisIfAvailable<DataLayout>();
911 LI = &getAnalysis<LoopInfo>();
912 TTI = &getAnalysis<TargetTransformInfo>();
913 DT = &getAnalysis<DominatorTree>();
914 TLI = getAnalysisIfAvailable<TargetLibraryInfo>();
918 if (!TTI->getNumberOfRegisters(
true))
922 DEBUG(
dbgs() <<
"LV: Not vectorizing because of missing data layout\n");
926 DEBUG(
dbgs() <<
"LV: Checking a loop in \"" <<
927 L->
getHeader()->getParent()->getName() <<
"\"\n");
929 LoopVectorizeHints Hints(L, DisableUnrolling);
931 if (Hints.Width == 1 && Hints.Unroll == 1) {
932 DEBUG(
dbgs() <<
"LV: Not vectorizing.\n");
937 LoopVectorizationLegality LVL(L, SE, DL, DT, TLI);
938 if (!LVL.canVectorize()) {
939 DEBUG(
dbgs() <<
"LV: Not vectorizing.\n");
944 LoopVectorizationCostModel CM(L, SE, LI, &LVL, *TTI, DL, TLI);
951 unsigned FnIndex = AttributeSet::FunctionIndex;
956 DEBUG(
dbgs() <<
"LV: Can't vectorize when the NoImplicitFloat"
957 "attribute is used.\n");
963 VF = CM.selectVectorizationFactor(OptForSize, Hints.Width);
965 unsigned UF = CM.selectUnrollFactor(OptForSize, Hints.Unroll, VF.Width,
968 DEBUG(
dbgs() <<
"LV: Found a vectorizable loop ("<< VF.Width <<
") in "<<
970 DEBUG(
dbgs() <<
"LV: Unroll Factor is " << UF <<
'\n');
973 DEBUG(
dbgs() <<
"LV: Vectorization is possible but not beneficial.\n");
977 InnerLoopUnroller Unroller(L, SE, LI, DT, DL, TLI, UF);
978 Unroller.vectorize(&LVL);
981 InnerLoopVectorizer LB(L, SE, LI, DT, DL, TLI, VF.Width, UF);
986 Hints.setAlreadyVectorized(L);
1014 LoopVectorizationLegality::RuntimePointerCheck::insert(
ScalarEvolution *SE,
1017 unsigned DepSetId) {
1020 assert(AR &&
"Invalid addrec expression");
1023 Pointers.push_back(Ptr);
1025 Ends.push_back(ScEnd);
1026 IsWritePtr.push_back(WritePtr);
1027 DependencySetId.push_back(DepSetId);
1030 Value *InnerLoopVectorizer::getBroadcastInstrs(
Value *V) {
1033 bool NewInstr = (Instr && Instr->
getParent() == LoopVectorBody);
1034 bool Invariant = OrigLoop->isLoopInvariant(V) && !NewInstr;
1039 Builder.SetInsertPoint(LoopVectorPreHeader->getTerminator());
1042 Value *Shuf = Builder.CreateVectorSplat(VF, V,
"broadcast");
1047 Value *InnerLoopVectorizer::getConsecutiveVector(
Value* Val,
int StartIdx,
1051 "Elem must be an integer");
1059 for (
int i = 0; i < VLen; ++i) {
1060 int64_t Idx = Negate ? (-i) : i;
1066 assert(Cv->
getType() == Val->
getType() &&
"Invalid consecutive vec");
1067 return Builder.CreateAdd(Val, Cv,
"induction");
1095 int LoopVectorizationLegality::isConsecutivePtr(
Value *Ptr) {
1102 PHINode *Phi = dyn_cast_or_null<PHINode>(Ptr);
1103 if (Phi && Inductions.count(Phi)) {
1104 InductionInfo II = Inductions[Phi];
1105 if (IK_PtrInduction == II.IK)
1107 else if (IK_ReversePtrInduction == II.IK)
1120 if (Phi && Inductions.count(Phi)) {
1128 for (
unsigned i = 1; i < NumOperands; ++i)
1132 InductionInfo II = Inductions[Phi];
1133 if (IK_PtrInduction == II.IK)
1135 else if (IK_ReversePtrInduction == II.IK)
1143 for (
unsigned i = 0; i != NumOperands; ++i)
1144 if (i != InductionOperand &&
1165 bool LoopVectorizationLegality::isUniform(
Value *V) {
1170 InnerLoopVectorizer::getVectorValue(
Value *V) {
1171 assert(V != Induction &&
"The new induction variable should not be used.");
1175 if (WidenMap.has(V))
1176 return WidenMap.get(V);
1180 Value *B = getBroadcastInstrs(V);
1181 return WidenMap.splat(V, B);
1184 Value *InnerLoopVectorizer::reverseVector(
Value *Vec) {
1187 for (
unsigned i = 0; i < VF; ++i)
1188 ShuffleMask.
push_back(Builder.getInt32(VF - i - 1));
1196 void InnerLoopVectorizer::vectorizeMemoryInstruction(
Instruction *Instr,
1197 LoopVectorizationLegality *Legal) {
1202 assert((LI || SI) &&
"Invalid Load/Store instruction");
1204 Type *ScalarDataTy = LI ? LI->
getType() : SI->getValueOperand()->getType();
1207 unsigned Alignment = LI ? LI->
getAlignment() : SI->getAlignment();
1211 Alignment = DL->getABITypeAlignment(ScalarDataTy);
1213 unsigned ScalarAllocatedSize = DL->getTypeAllocSize(ScalarDataTy);
1214 unsigned VectorElementSize = DL->getTypeStoreSize(DataTy)/VF;
1216 if (ScalarAllocatedSize != VectorElementSize)
1217 return scalarizeInstruction(Instr);
1221 int ConsecutiveStride = Legal->isConsecutivePtr(Ptr);
1222 bool Reverse = ConsecutiveStride < 0;
1223 bool UniformLoad = LI && Legal->isUniform(Ptr);
1224 if (!ConsecutiveStride || UniformLoad)
1225 return scalarizeInstruction(Instr);
1227 Constant *Zero = Builder.getInt32(0);
1228 VectorParts &Entry = WidenMap.get(Instr);
1233 setDebugLocFromInst(Builder, Gep);
1235 Value *FirstBasePtr = getVectorValue(PtrOperand)[0];
1236 FirstBasePtr = Builder.CreateExtractElement(FirstBasePtr, Zero);
1241 Gep2->
setName(
"gep.indvar.base");
1242 Ptr = Builder.Insert(Gep2);
1244 setDebugLocFromInst(Builder, Gep);
1246 OrigLoop) &&
"Base ptr must be invariant");
1255 for (
unsigned i = 0; i < NumOperands; ++i) {
1260 if (i == InductionOperand ||
1261 (GepOperandInst && OrigLoop->contains(GepOperandInst))) {
1262 assert((i == InductionOperand ||
1264 "Must be last index or loop invariant");
1266 VectorParts &GEPParts = getVectorValue(GepOperand);
1267 Value *Index = GEPParts[0];
1268 Index = Builder.CreateExtractElement(Index, Zero);
1270 Gep2->
setName(
"gep.indvar.idx");
1273 Ptr = Builder.Insert(Gep2);
1276 assert(isa<PHINode>(Ptr) &&
"Invalid induction ptr");
1277 setDebugLocFromInst(Builder, Ptr);
1278 VectorParts &PtrVal = getVectorValue(Ptr);
1279 Ptr = Builder.CreateExtractElement(PtrVal[0], Zero);
1284 assert(!Legal->isUniform(SI->getPointerOperand()) &&
1285 "We do not allow storing to uniform addresses");
1286 setDebugLocFromInst(Builder, SI);
1289 VectorParts StoredVal = getVectorValue(SI->getValueOperand());
1291 for (
unsigned Part = 0; Part < UF; ++Part) {
1293 Value *PartPtr = Builder.CreateGEP(Ptr, Builder.getInt32(Part * VF));
1298 StoredVal[Part] = reverseVector(StoredVal[Part]);
1301 PartPtr = Builder.CreateGEP(Ptr, Builder.getInt32(-Part * VF));
1302 PartPtr = Builder.CreateGEP(PartPtr, Builder.getInt32(1 - VF));
1305 Value *VecPtr = Builder.CreateBitCast(PartPtr,
1307 Builder.CreateStore(StoredVal[Part], VecPtr)->setAlignment(Alignment);
1313 assert(LI &&
"Must have a load instruction");
1314 setDebugLocFromInst(Builder, LI);
1315 for (
unsigned Part = 0; Part < UF; ++Part) {
1317 Value *PartPtr = Builder.CreateGEP(Ptr, Builder.getInt32(Part * VF));
1322 PartPtr = Builder.CreateGEP(Ptr, Builder.getInt32(-Part * VF));
1323 PartPtr = Builder.CreateGEP(PartPtr, Builder.getInt32(1 - VF));
1326 Value *VecPtr = Builder.CreateBitCast(PartPtr,
1328 Value *LI = Builder.CreateLoad(VecPtr,
"wide.load");
1329 cast<LoadInst>(
LI)->setAlignment(Alignment);
1330 Entry[Part] = Reverse ? reverseVector(LI) : LI;
1334 void InnerLoopVectorizer::scalarizeInstruction(
Instruction *Instr) {
1339 setDebugLocFromInst(Builder, Instr);
1342 for (
unsigned op = 0, e = Instr->
getNumOperands(); op != e; ++op) {
1346 if (SrcOp == OldInduction) {
1347 Params.push_back(getVectorValue(SrcOp));
1356 if (SrcInst && OrigLoop->contains(SrcInst)) {
1357 assert(WidenMap.has(SrcInst) &&
"Source operand is unavailable");
1359 Params.push_back(WidenMap.get(SrcInst));
1362 VectorParts Scalars;
1363 Scalars.append(UF, SrcOp);
1364 Params.push_back(Scalars);
1369 "Invalid number of operands");
1374 Value *UndefVec = IsVoidRetTy ? 0 :
1377 VectorParts &VecResults = WidenMap.splat(Instr, UndefVec);
1380 for (
unsigned Part = 0; Part < UF; ++Part) {
1382 for (
unsigned Width = 0; Width < VF; ++Width) {
1387 for (
unsigned op = 0, e = Instr->
getNumOperands(); op != e; ++op) {
1388 Value *Op = Params[op][Part];
1391 Op = Builder.CreateExtractElement(Op, Builder.getInt32(Width));
1396 Builder.Insert(Cloned);
1401 VecResults[Part] = Builder.CreateInsertElement(VecResults[Part], Cloned,
1402 Builder.getInt32(Width));
1408 InnerLoopVectorizer::addRuntimeCheck(LoopVectorizationLegality *Legal,
1410 LoopVectorizationLegality::RuntimePointerCheck *PtrRtCheck =
1411 Legal->getRuntimePointerCheck();
1413 if (!PtrRtCheck->Need)
1416 unsigned NumPointers = PtrRtCheck->Pointers.size();
1423 for (
unsigned i = 0; i < NumPointers; ++i) {
1424 Value *Ptr = PtrRtCheck->Pointers[i];
1428 DEBUG(
dbgs() <<
"LV: Adding RT check for a loop invariant ptr:" <<
1433 DEBUG(
dbgs() <<
"LV: Adding RT check for range:" << *Ptr <<
'\n');
1439 Value *Start = Exp.expandCodeFor(PtrRtCheck->Starts[i], PtrArithTy, Loc);
1440 Value *End = Exp.expandCodeFor(PtrRtCheck->Ends[i], PtrArithTy, Loc);
1448 Value *MemoryRuntimeCheck = 0;
1449 for (
unsigned i = 0; i < NumPointers; ++i) {
1450 for (
unsigned j = i+1; j < NumPointers; ++j) {
1452 if (!PtrRtCheck->IsWritePtr[i] && !PtrRtCheck->IsWritePtr[j])
1456 if (PtrRtCheck->DependencySetId[i] == PtrRtCheck->DependencySetId[j])
1459 unsigned AS0 = Starts[i]->getType()->getPointerAddressSpace();
1460 unsigned AS1 = Starts[j]->getType()->getPointerAddressSpace();
1462 assert((AS0 == Ends[j]->
getType()->getPointerAddressSpace()) &&
1463 (AS1 == Ends[i]->
getType()->getPointerAddressSpace()) &&
1464 "Trying to bounds check pointers with different address spaces");
1469 Value *Start0 = ChkBuilder.CreateBitCast(Starts[i], PtrArithTy0,
"bc");
1470 Value *Start1 = ChkBuilder.CreateBitCast(Starts[j], PtrArithTy1,
"bc");
1471 Value *End0 = ChkBuilder.CreateBitCast(Ends[i], PtrArithTy1,
"bc");
1472 Value *End1 = ChkBuilder.CreateBitCast(Ends[j], PtrArithTy0,
"bc");
1474 Value *Cmp0 = ChkBuilder.CreateICmpULE(Start0, End1,
"bound0");
1475 Value *Cmp1 = ChkBuilder.CreateICmpULE(Start1, End0,
"bound1");
1476 Value *IsConflict = ChkBuilder.CreateAnd(Cmp0, Cmp1,
"found.conflict");
1477 if (MemoryRuntimeCheck)
1478 IsConflict = ChkBuilder.CreateOr(MemoryRuntimeCheck, IsConflict,
1480 MemoryRuntimeCheck = IsConflict;
1489 ChkBuilder.Insert(Check,
"memcheck.conflict");
1494 InnerLoopVectorizer::createEmptyLoop(LoopVectorizationLegality *Legal) {
1524 BasicBlock *OldBasicBlock = OrigLoop->getHeader();
1525 BasicBlock *BypassBlock = OrigLoop->getLoopPreheader();
1526 BasicBlock *ExitBlock = OrigLoop->getExitBlock();
1527 assert(ExitBlock &&
"Must have an exit block");
1533 OldInduction = Legal->getInduction();
1534 Type *IdxTy = Legal->getWidestInductionType();
1550 Value *Count = Exp.expandCodeFor(ExitCount, ExitCount->getType(),
1557 Value *StartIdx = ExtendedIdx = OldInduction ?
1558 Builder.CreateZExt(OldInduction->getIncomingValueForBlock(BypassBlock),
1562 assert(BypassBlock &&
"Invalid loop structure");
1563 LoopBypassBlocks.push_back(BypassBlock);
1587 LI->addTopLevelLoop(Lp);
1596 setDebugLocFromInst(Builder, getDebugLocFromInstOrOperands(OldInduction));
1597 Induction = Builder.CreatePHI(IdxTy, 2,
"index");
1605 setDebugLocFromInst(BypassBuilder,
1606 getDebugLocFromInstOrOperands(OldInduction));
1610 if (Count->
getType() != IdxTy) {
1613 if (ExitCount->getType()->isPointerTy())
1614 Count = BypassBuilder.CreatePointerCast(Count, IdxTy,
"ptrcnt.to.int");
1616 Count = BypassBuilder.CreateZExtOrTrunc(Count, IdxTy,
"cnt.cast");
1620 Value *IdxEnd = BypassBuilder.CreateAdd(Count, StartIdx,
"end.idx");
1624 Value *R = BypassBuilder.CreateURem(Count, Step,
"n.mod.vf");
1625 Value *CountRoundDown = BypassBuilder.CreateSub(Count, R,
"n.vec");
1626 Value *IdxEndRoundDown = BypassBuilder.CreateAdd(CountRoundDown, StartIdx,
1627 "end.idx.rnd.down");
1631 Value *Cmp = BypassBuilder.CreateICmpEQ(IdxEndRoundDown, StartIdx,
1639 Instruction *MemRuntimeCheck = addRuntimeCheck(Legal,
1641 if (MemRuntimeCheck) {
1647 LoopBypassBlocks.push_back(CheckBlock);
1655 Cmp = MemRuntimeCheck;
1656 LastBypassBlock = CheckBlock;
1676 BypassBuilder.SetInsertPoint(LoopBypassBlocks.back()->getTerminator());
1677 for (I = List->
begin(), E = List->
end(); I != E; ++
I) {
1679 LoopVectorizationLegality::InductionInfo II = I->second;
1681 Type *ResumeValTy = (OrigPhi == OldInduction) ? IdxTy : OrigPhi->
getType();
1686 PHINode *TruncResumeVal = (OrigPhi == OldInduction) ?
1690 Value *EndValue = 0;
1692 case LoopVectorizationLegality::IK_NoInduction:
1694 case LoopVectorizationLegality::IK_IntInduction: {
1699 if (OrigPhi == OldInduction) {
1703 BypassBuilder.CreateTrunc(IdxEndRoundDown, OrigPhi->
getType());
1706 for (
unsigned I = 0, E = LoopBypassBlocks.size(); I != E; ++
I)
1707 TruncResumeVal->addIncoming(II.StartValue, LoopBypassBlocks[I]);
1708 TruncResumeVal->addIncoming(EndValue, VecBody);
1711 EndValue = IdxEndRoundDown;
1713 ResumeIndex = ResumeVal;
1719 Value *CRD = BypassBuilder.CreateSExtOrTrunc(CountRoundDown,
1722 EndValue = BypassBuilder.CreateAdd(CRD, II.StartValue ,
"ind.end");
1725 case LoopVectorizationLegality::IK_ReverseIntInduction: {
1727 Value *CRD = BypassBuilder.CreateSExtOrTrunc(CountRoundDown,
1731 EndValue = BypassBuilder.CreateSub(II.StartValue, CRD,
"rev.ind.end");
1734 case LoopVectorizationLegality::IK_PtrInduction: {
1737 EndValue = BypassBuilder.CreateGEP(II.StartValue, CountRoundDown,
1741 case LoopVectorizationLegality::IK_ReversePtrInduction: {
1745 Value *NegIdx = BypassBuilder.CreateSub(Zero, CountRoundDown,
1747 EndValue = BypassBuilder.CreateGEP(II.StartValue, NegIdx,
1755 for (
unsigned I = 0, E = LoopBypassBlocks.size(); I != E; ++
I) {
1756 if (OrigPhi == OldInduction)
1757 ResumeVal->
addIncoming(StartIdx, LoopBypassBlocks[I]);
1759 ResumeVal->
addIncoming(II.StartValue, LoopBypassBlocks[I]);
1766 if (OrigPhi == OldInduction)
1777 assert(!ResumeIndex &&
"Unexpected resume value found");
1780 for (
unsigned I = 0, E = LoopBypassBlocks.size(); I != E; ++
I)
1781 ResumeIndex->
addIncoming(StartIdx, LoopBypassBlocks[I]);
1782 ResumeIndex->
addIncoming(IdxEndRoundDown, VecBody);
1787 "Invalid resume Index");
1793 ResumeIndex,
"cmp.n",
1801 Value *NextIdx = Builder.CreateAdd(Induction, Step,
"index.next");
1802 Induction->addIncoming(StartIdx, VectorPH);
1803 Induction->addIncoming(NextIdx, VecBody);
1805 Value *ICmp = Builder.CreateICmpEQ(NextIdx, IdxEndRoundDown);
1806 Builder.CreateCondBr(ICmp, MiddleBlock, VecBody);
1815 LoopVectorPreHeader = VectorPH;
1816 LoopScalarPreHeader = ScalarPH;
1817 LoopMiddleBlock = MiddleBlock;
1818 LoopExitBlock = ExitBlock;
1819 LoopVectorBody = VecBody;
1820 LoopScalarBody = OldBasicBlock;
1822 LoopVectorizeHints Hints(Lp,
true);
1823 Hints.setAlreadyVectorized(Lp);
1829 LoopVectorizationLegality::getReductionIdentity(ReductionKind K,
Type *Tp) {
1836 case RK_IntegerMult:
1861 return ValidIntrinsicID;
1874 return ValidIntrinsicID;
1882 switch (II->getIntrinsicID()) {
1904 return II->getIntrinsicID();
1999 case LoopVectorizationLegality::RK_IntegerAdd:
2000 return Instruction::Add;
2001 case LoopVectorizationLegality::RK_IntegerMult:
2002 return Instruction::Mul;
2003 case LoopVectorizationLegality::RK_IntegerOr:
2005 case LoopVectorizationLegality::RK_IntegerAnd:
2007 case LoopVectorizationLegality::RK_IntegerXor:
2009 case LoopVectorizationLegality::RK_FloatMult:
2010 return Instruction::FMul;
2011 case LoopVectorizationLegality::RK_FloatAdd:
2012 return Instruction::FAdd;
2013 case LoopVectorizationLegality::RK_IntegerMinMax:
2014 return Instruction::ICmp;
2015 case LoopVectorizationLegality::RK_FloatMinMax:
2016 return Instruction::FCmp;
2023 LoopVectorizationLegality::MinMaxReductionKind RK,
2030 case LoopVectorizationLegality::MRK_UIntMin:
2033 case LoopVectorizationLegality::MRK_UIntMax:
2036 case LoopVectorizationLegality::MRK_SIntMin:
2039 case LoopVectorizationLegality::MRK_SIntMax:
2042 case LoopVectorizationLegality::MRK_FloatMin:
2045 case LoopVectorizationLegality::MRK_FloatMax:
2051 if (RK == LoopVectorizationLegality::MRK_FloatMin ||
2052 RK == LoopVectorizationLegality::MRK_FloatMax)
2053 Cmp = Builder.
CreateFCmp(P, Left, Right,
"rdx.minmax.cmp");
2055 Cmp = Builder.
CreateICmp(P, Left, Right,
"rdx.minmax.cmp");
2062 struct CSEDenseMapInfo {
2064 return isa<InsertElementInst>(
I) || isa<ExtractElementInst>(I) ||
2065 isa<ShuffleVectorInst>(
I) || isa<GetElementPtrInst>(I);
2074 assert(canHandle(I) &&
"Unknown instruction!");
2079 if (LHS == getEmptyKey() || RHS == getEmptyKey() ||
2080 LHS == getTombstoneKey() || RHS == getTombstoneKey())
2094 if (!CSEDenseMapInfo::canHandle(In))
2110 InnerLoopVectorizer::vectorizeLoop(LoopVectorizationLegality *Legal) {
2118 Constant *Zero = Builder.getInt32(0);
2128 PhiVector RdxPHIsToFix;
2137 be = DFS.endRPO(); bb != be; ++bb)
2138 vectorizeBlockInLoop(Legal, *bb, &RdxPHIsToFix);
2149 for (PhiVector::iterator it = RdxPHIsToFix.begin(), e = RdxPHIsToFix.end();
2152 assert(RdxPhi &&
"Unable to recover vectorized PHI");
2155 assert(Legal->getReductionVars()->count(RdxPhi) &&
2156 "Unable to find the reduction variable");
2157 LoopVectorizationLegality::ReductionDescriptor RdxDesc =
2158 (*Legal->getReductionVars())[RdxPhi];
2160 setDebugLocFromInst(Builder, RdxDesc.StartValue);
2166 Builder.SetInsertPoint(LoopBypassBlocks.front()->getTerminator());
2169 VectorParts &VectorExit = getVectorValue(RdxDesc.LoopExitInstr);
2170 Type *VecTy = VectorExit[0]->getType();
2176 if (RdxDesc.Kind == LoopVectorizationLegality::RK_IntegerMinMax ||
2177 RdxDesc.Kind == LoopVectorizationLegality::RK_FloatMinMax) {
2180 VectorStart = Identity = RdxDesc.StartValue;
2182 VectorStart = Identity = Builder.CreateVectorSplat(VF,
2189 LoopVectorizationLegality::getReductionIdentity(RdxDesc.Kind,
2195 VectorStart = RdxDesc.StartValue;
2201 VectorStart = Builder.CreateInsertElement(Identity,
2202 RdxDesc.StartValue, Zero);
2209 BasicBlock *VecPreheader = Induction->getIncomingBlock(0);
2213 VectorParts &VecRdxPhi = WidenMap.get(RdxPhi);
2214 BasicBlock *Latch = OrigLoop->getLoopLatch();
2216 VectorParts &Val = getVectorValue(LoopVal);
2217 for (
unsigned part = 0; part < UF; ++part) {
2220 Value *StartVal = (part == 0) ? VectorStart : Identity;
2221 cast<PHINode>(VecRdxPhi[part])->addIncoming(StartVal, VecPreheader);
2222 cast<PHINode>(VecRdxPhi[part])->addIncoming(Val[part], LoopVectorBody);
2229 Builder.SetInsertPoint(LoopMiddleBlock->getFirstInsertionPt());
2231 VectorParts RdxParts;
2232 setDebugLocFromInst(Builder, RdxDesc.LoopExitInstr);
2233 for (
unsigned part = 0; part < UF; ++part) {
2236 VectorParts &RdxExitVal = getVectorValue(RdxDesc.LoopExitInstr);
2237 PHINode *NewPhi = Builder.CreatePHI(VecTy, 2,
"rdx.vec.exit.phi");
2238 Value *StartVal = (part == 0) ? VectorStart : Identity;
2239 for (
unsigned I = 0, E = LoopBypassBlocks.size(); I != E; ++
I)
2240 NewPhi->
addIncoming(StartVal, LoopBypassBlocks[I]);
2241 NewPhi->
addIncoming(RdxExitVal[part], LoopVectorBody);
2242 RdxParts.push_back(NewPhi);
2246 Value *ReducedPartRdx = RdxParts[0];
2248 setDebugLocFromInst(Builder, ReducedPartRdx);
2249 for (
unsigned part = 1; part < UF; ++part) {
2250 if (Op != Instruction::ICmp && Op != Instruction::FCmp)
2252 RdxParts[part], ReducedPartRdx,
2256 ReducedPartRdx, RdxParts[part]);
2264 "Reduction emission only supported for pow2 vectors!");
2265 Value *TmpVec = ReducedPartRdx;
2267 for (
unsigned i = VF; i != 1; i >>= 1) {
2269 for (
unsigned j = 0; j != i/2; ++j)
2270 ShuffleMask[j] = Builder.getInt32(i/2 + j);
2273 std::fill(&ShuffleMask[i/2], ShuffleMask.end(),
2277 Builder.CreateShuffleVector(TmpVec,
2282 if (Op != Instruction::ICmp && Op != Instruction::FCmp)
2286 TmpVec =
createMinMaxOp(Builder, RdxDesc.MinMaxKind, TmpVec, Shuf);
2290 ReducedPartRdx = Builder.CreateExtractElement(TmpVec,
2291 Builder.getInt32(0));
2299 LEE = LoopExitBlock->end(); LEI != LEE; ++LEI) {
2301 if (!LCSSAPhi)
break;
2311 LCSSAPhi->
addIncoming(ReducedPartRdx, LoopMiddleBlock);
2318 int IncomingEdgeBlockIdx =
2319 (RdxPhi)->getBasicBlockIndex(OrigLoop->getLoopLatch());
2320 assert(IncomingEdgeBlockIdx >= 0 &&
"Invalid block index");
2322 int SelfEdgeBlockIdx = (IncomingEdgeBlockIdx ? 0 : 1);
2323 (RdxPhi)->setIncomingValue(SelfEdgeBlockIdx, ReducedPartRdx);
2324 (RdxPhi)->setIncomingValue(IncomingEdgeBlockIdx, RdxDesc.LoopExitInstr);
2330 cse(LoopVectorBody);
2333 void InnerLoopVectorizer::fixLCSSAPHIs() {
2335 LEE = LoopExitBlock->end(); LEI != LEE; ++LEI) {
2337 if (!LCSSAPhi)
break;
2350 std::pair<BasicBlock*, BasicBlock*> Edge(Src, Dst);
2351 EdgeMaskCache::iterator ECEntryIt = MaskCache.find(Edge);
2352 if (ECEntryIt != MaskCache.end())
2353 return ECEntryIt->second;
2355 VectorParts SrcMask = createBlockInMask(Src);
2359 assert(BI &&
"Unexpected terminator found");
2362 VectorParts EdgeMask = getVectorValue(BI->
getCondition());
2365 for (
unsigned part = 0; part < UF; ++part)
2366 EdgeMask[part] = Builder.CreateNot(EdgeMask[part]);
2368 for (
unsigned part = 0; part < UF; ++part)
2369 EdgeMask[part] = Builder.CreateAnd(EdgeMask[part], SrcMask[part]);
2371 MaskCache[Edge] = EdgeMask;
2375 MaskCache[Edge] = SrcMask;
2380 InnerLoopVectorizer::createBlockInMask(
BasicBlock *BB) {
2381 assert(OrigLoop->contains(BB) &&
"Block is not a part of a loop");
2384 if (OrigLoop->getHeader() == BB) {
2386 return getVectorValue(C);
2391 VectorParts BlockMask = getVectorValue(Zero);
2395 VectorParts EM = createEdgeMask(*it, BB);
2396 for (
unsigned part = 0; part < UF; ++part)
2397 BlockMask[part] = Builder.CreateOr(BlockMask[part], EM[part]);
2403 void InnerLoopVectorizer::widenPHIInstruction(
Instruction *PN,
2405 LoopVectorizationLegality *Legal,
2406 unsigned UF,
unsigned VF, PhiVector *PV) {
2409 if (Legal->getReductionVars()->count(P)) {
2410 for (
unsigned part = 0; part < UF; ++part) {
2415 LoopVectorBody-> getFirstInsertionPt());
2421 setDebugLocFromInst(Builder, P);
2423 if (P->
getParent() != OrigLoop->getHeader()) {
2437 for (
unsigned In = 0;
In < NumIncoming;
In++) {
2442 for (
unsigned part = 0; part < UF; ++part) {
2446 Entry[part] = Builder.CreateSelect(Cond[part], In0[part],
2451 Entry[part] = Builder.CreateSelect(Cond[part], In0[part],
2452 Entry[part],
"predphi");
2460 assert(Legal->getInductionVars()->count(P) &&
2461 "Not an induction variable");
2463 LoopVectorizationLegality::InductionInfo II =
2464 Legal->getInductionVars()->lookup(P);
2467 case LoopVectorizationLegality::IK_NoInduction:
2469 case LoopVectorizationLegality::IK_IntInduction: {
2470 assert(P->
getType() == II.StartValue->getType() &&
"Types must match");
2473 if (P == OldInduction) {
2476 Broadcasted = Builder.CreateTrunc(Induction, PhiTy);
2480 Value *NormalizedIdx = Builder.CreateSub(Induction, ExtendedIdx,
2482 NormalizedIdx = Builder.CreateSExtOrTrunc(NormalizedIdx, PhiTy);
2483 Broadcasted = Builder.CreateAdd(II.StartValue, NormalizedIdx,
2486 Broadcasted = getBroadcastInstrs(Broadcasted);
2489 for (
unsigned part = 0; part < UF; ++part)
2490 Entry[part] = getConsecutiveVector(Broadcasted, VF * part,
false);
2493 case LoopVectorizationLegality::IK_ReverseIntInduction:
2494 case LoopVectorizationLegality::IK_PtrInduction:
2495 case LoopVectorizationLegality::IK_ReversePtrInduction:
2497 Value *StartIdx = ExtendedIdx;
2499 Value *NormalizedIdx = Builder.CreateSub(Induction, StartIdx,
2503 if (LoopVectorizationLegality::IK_ReverseIntInduction == II.IK) {
2504 IntegerType *DstTy = cast<IntegerType>(II.StartValue->getType());
2505 Value *CNI = Builder.CreateSExtOrTrunc(NormalizedIdx, DstTy,
2507 Value *ReverseInd = Builder.CreateSub(II.StartValue, CNI,
2511 Value *Broadcasted = getBroadcastInstrs(ReverseInd);
2514 for (
unsigned part = 0; part < UF; ++part)
2515 Entry[part] = getConsecutiveVector(Broadcasted, -(
int)VF * part,
2524 bool Reverse = (LoopVectorizationLegality::IK_ReversePtrInduction ==
2529 for (
unsigned part = 0; part < UF; ++part) {
2531 int EltIndex = (part) * (Reverse ? -1 : 1);
2535 GlobalIdx = Builder.CreateSub(Idx, NormalizedIdx,
"gep.ridx");
2537 GlobalIdx = Builder.CreateAdd(NormalizedIdx, Idx,
"gep.idx");
2539 Value *SclrGep = Builder.CreateGEP(II.StartValue, GlobalIdx,
2541 Entry[part] = SclrGep;
2546 for (
unsigned int i = 0; i < VF; ++i) {
2547 int EltIndex = (i + part * VF) * (Reverse ? -1 : 1);
2551 GlobalIdx = Builder.CreateAdd(NormalizedIdx, Idx,
"gep.idx");
2553 GlobalIdx = Builder.CreateSub(Idx, NormalizedIdx,
"gep.ridx");
2555 Value *SclrGep = Builder.CreateGEP(II.StartValue, GlobalIdx,
2557 VecVal = Builder.CreateInsertElement(VecVal, SclrGep,
2558 Builder.getInt32(i),
2561 Entry[part] = VecVal;
2568 InnerLoopVectorizer::vectorizeBlockInLoop(LoopVectorizationLegality *Legal,
2572 VectorParts &Entry = WidenMap.get(it);
2573 switch (it->getOpcode()) {
2574 case Instruction::Br:
2580 widenPHIInstruction(it, Entry, Legal, UF, VF, PV);
2584 case Instruction::Add:
2585 case Instruction::FAdd:
2586 case Instruction::Sub:
2587 case Instruction::FSub:
2588 case Instruction::Mul:
2589 case Instruction::FMul:
2590 case Instruction::UDiv:
2591 case Instruction::SDiv:
2592 case Instruction::FDiv:
2593 case Instruction::URem:
2594 case Instruction::SRem:
2595 case Instruction::FRem:
2596 case Instruction::Shl:
2597 case Instruction::LShr:
2598 case Instruction::AShr:
2604 setDebugLocFromInst(Builder, BinOp);
2605 VectorParts &
A = getVectorValue(it->getOperand(0));
2606 VectorParts &B = getVectorValue(it->getOperand(1));
2609 for (
unsigned Part = 0; Part < UF; ++Part) {
2610 Value *V = Builder.CreateBinOp(BinOp->
getOpcode(), A[Part], B[Part]);
2614 if (VecOp && isa<OverflowingBinaryOperator>(BinOp)) {
2618 if (VecOp && isa<PossiblyExactOperator>(VecOp))
2631 setDebugLocFromInst(Builder, it);
2637 VectorParts &Cond = getVectorValue(it->getOperand(0));
2638 VectorParts &Op0 = getVectorValue(it->getOperand(1));
2639 VectorParts &Op1 = getVectorValue(it->getOperand(2));
2641 Value *ScalarCond = (VF == 1) ? Cond[0] :
2642 Builder.CreateExtractElement(Cond[0], Builder.getInt32(0));
2644 for (
unsigned Part = 0; Part < UF; ++Part) {
2645 Entry[Part] = Builder.CreateSelect(
2646 InvariantCond ? ScalarCond : Cond[Part],
2653 case Instruction::ICmp:
2654 case Instruction::FCmp: {
2656 bool FCmp = (it->getOpcode() == Instruction::FCmp);
2658 setDebugLocFromInst(Builder, it);
2659 VectorParts &A = getVectorValue(it->getOperand(0));
2660 VectorParts &B = getVectorValue(it->getOperand(1));
2661 for (
unsigned Part = 0; Part < UF; ++Part) {
2664 C = Builder.CreateFCmp(Cmp->
getPredicate(), A[Part], B[Part]);
2666 C = Builder.CreateICmp(Cmp->
getPredicate(), A[Part], B[Part]);
2674 vectorizeMemoryInstruction(it, Legal);
2676 case Instruction::ZExt:
2677 case Instruction::SExt:
2678 case Instruction::FPToUI:
2679 case Instruction::FPToSI:
2680 case Instruction::FPExt:
2681 case Instruction::PtrToInt:
2682 case Instruction::IntToPtr:
2683 case Instruction::SIToFP:
2684 case Instruction::UIToFP:
2685 case Instruction::Trunc:
2686 case Instruction::FPTrunc:
2687 case Instruction::BitCast: {
2689 setDebugLocFromInst(Builder, it);
2695 it->getOpcode() == Instruction::Trunc) {
2696 Value *ScalarCast = Builder.CreateCast(CI->
getOpcode(), Induction,
2698 Value *Broadcasted = getBroadcastInstrs(ScalarCast);
2699 for (
unsigned Part = 0; Part < UF; ++Part)
2700 Entry[Part] = getConsecutiveVector(Broadcasted, VF * Part,
false);
2707 VectorParts &A = getVectorValue(it->getOperand(0));
2708 for (
unsigned Part = 0; Part < UF; ++Part)
2709 Entry[Part] = Builder.CreateCast(CI->
getOpcode(), A[Part], DestTy);
2715 if (isa<DbgInfoIntrinsic>(it))
2717 setDebugLocFromInst(Builder, it);
2722 assert(ID &&
"Not an intrinsic call!");
2726 scalarizeInstruction(it);
2729 for (
unsigned Part = 0; Part < UF; ++Part) {
2740 Entry[Part] = Builder.CreateCall(F, Args);
2749 scalarizeInstruction(it);
2755 void InnerLoopVectorizer::updateAnalysis() {
2760 assert(DT->properlyDominates(LoopBypassBlocks.front(), LoopExitBlock) &&
2761 "Entry does not dominate exit.");
2763 for (
unsigned I = 1, E = LoopBypassBlocks.size(); I != E; ++
I)
2764 DT->addNewBlock(LoopBypassBlocks[I], LoopBypassBlocks[I-1]);
2765 DT->addNewBlock(LoopVectorPreHeader, LoopBypassBlocks.back());
2766 DT->addNewBlock(LoopVectorBody, LoopVectorPreHeader);
2767 DT->addNewBlock(LoopMiddleBlock, LoopBypassBlocks.front());
2768 DT->addNewBlock(LoopScalarPreHeader, LoopMiddleBlock);
2769 DT->changeImmediateDominator(LoopScalarBody, LoopScalarPreHeader);
2770 DT->changeImmediateDominator(LoopExitBlock, LoopMiddleBlock);
2772 DEBUG(DT->verifyAnalysis());
2775 bool LoopVectorizationLegality::canVectorizeWithIfConvert() {
2779 assert(TheLoop->getNumBlocks() > 1 &&
"Single block loops are vectorizable");
2786 BE = TheLoop->block_end(); BI != BE; ++BI) {
2789 if (blockNeedsPredication(BB))
2793 if (
LoadInst *LI = dyn_cast<LoadInst>(I))
2794 SafePointes.insert(LI->getPointerOperand());
2795 else if (
StoreInst *SI = dyn_cast<StoreInst>(I))
2796 SafePointes.insert(SI->getPointerOperand());
2802 BE = TheLoop->block_end(); BI != BE; ++BI) {
2810 if (blockNeedsPredication(BB) && !blockCanBePredicated(BB, SafePointes))
2818 bool LoopVectorizationLegality::canVectorize() {
2821 if (!TheLoop->getLoopPreheader())
2825 if (TheLoop->getSubLoopsVector().size())
2829 if (TheLoop->getNumBackEdges() != 1)
2833 if (!TheLoop->getExitingBlock())
2838 TheLoop->getHeader()->getName() <<
'\n');
2841 unsigned NumBlocks = TheLoop->getNumBlocks();
2842 if (NumBlocks != 1 && !canVectorizeWithIfConvert()) {
2843 DEBUG(
dbgs() <<
"LV: Can't if-convert the loop.\n");
2850 DEBUG(
dbgs() <<
"LV: SCEV could not compute the loop exit count.\n");
2858 DEBUG(
dbgs() <<
"LV: Found a loop with a very small trip count. " <<
2859 "This loop is not worth vectorizing.\n");
2864 if (!canVectorizeInstrs()) {
2865 DEBUG(
dbgs() <<
"LV: Can't vectorize the instructions or CFG\n");
2870 if (!canVectorizeMemory()) {
2871 DEBUG(
dbgs() <<
"LV: Can't vectorize due to memory conflicts\n");
2876 collectLoopUniforms();
2878 DEBUG(
dbgs() <<
"LV: We can vectorize this loop" <<
2879 (PtrRtCheck.Need ?
" (with a runtime bound check)" :
"")
2914 if (!Reductions.
count(Inst))
2921 DEBUG(
dbgs() <<
"LV: Found an outside user for : " << *U <<
'\n');
2928 bool LoopVectorizationLegality::canVectorizeInstrs() {
2929 BasicBlock *PreHeader = TheLoop->getLoopPreheader();
2936 AttributeSet::FunctionIndex,
2941 be = TheLoop->block_end(); bb != be; ++bb) {
2947 if (
PHINode *Phi = dyn_cast<PHINode>(it)) {
2953 DEBUG(
dbgs() <<
"LV: Found an non-int non-pointer PHI.\n");
2960 if (*bb != Header) {
2970 DEBUG(
dbgs() <<
"LV: Found an invalid PHI.\n");
2977 InductionKind IK = isInductionVariable(Phi);
2979 if (IK_NoInduction != IK) {
2987 if (IK == IK_IntInduction) {
2991 if (!Induction || PhiTy == WidestIndTy)
2995 DEBUG(
dbgs() <<
"LV: Found an induction variable.\n");
2996 Inductions[Phi] = InductionInfo(StartValue, IK);
3006 if (AddReductionVar(Phi, RK_IntegerAdd)) {
3007 DEBUG(
dbgs() <<
"LV: Found an ADD reduction PHI."<< *Phi <<
"\n");
3010 if (AddReductionVar(Phi, RK_IntegerMult)) {
3011 DEBUG(
dbgs() <<
"LV: Found a MUL reduction PHI."<< *Phi <<
"\n");
3014 if (AddReductionVar(Phi, RK_IntegerOr)) {
3015 DEBUG(
dbgs() <<
"LV: Found an OR reduction PHI."<< *Phi <<
"\n");
3018 if (AddReductionVar(Phi, RK_IntegerAnd)) {
3019 DEBUG(
dbgs() <<
"LV: Found an AND reduction PHI."<< *Phi <<
"\n");
3022 if (AddReductionVar(Phi, RK_IntegerXor)) {
3023 DEBUG(
dbgs() <<
"LV: Found a XOR reduction PHI."<< *Phi <<
"\n");
3026 if (AddReductionVar(Phi, RK_IntegerMinMax)) {
3027 DEBUG(
dbgs() <<
"LV: Found a MINMAX reduction PHI."<< *Phi <<
"\n");
3030 if (AddReductionVar(Phi, RK_FloatMult)) {
3031 DEBUG(
dbgs() <<
"LV: Found an FMult reduction PHI."<< *Phi <<
"\n");
3034 if (AddReductionVar(Phi, RK_FloatAdd)) {
3035 DEBUG(
dbgs() <<
"LV: Found an FAdd reduction PHI."<< *Phi <<
"\n");
3038 if (AddReductionVar(Phi, RK_FloatMinMax)) {
3039 DEBUG(
dbgs() <<
"LV: Found an float MINMAX reduction PHI."<< *Phi <<
3044 DEBUG(
dbgs() <<
"LV: Found an unidentified PHI."<< *Phi <<
"\n");
3052 DEBUG(
dbgs() <<
"LV: Found a call site.\n");
3059 !it->getType()->isVoidTy()) || isa<ExtractElementInst>(it)) {
3060 DEBUG(
dbgs() <<
"LV: Found unvectorizable type.\n");
3066 Type *
T =
ST->getValueOperand()->getType();
3081 DEBUG(
dbgs() <<
"LV: Did not find one integer induction var.\n");
3082 if (Inductions.empty())
3089 void LoopVectorizationLegality::collectLoopUniforms() {
3092 std::vector<Value*> Worklist;
3098 while (Worklist.size()) {
3100 Worklist.pop_back();
3105 if (!I || !TheLoop->contains(I) || isa<PHINode>(
I))
3121 class AccessAnalysis {
3131 DL(Dl), DepCands(DA), AreAllWritesIdentified(
true),
3132 AreAllReadsIdentified(
true), IsRTCheckNeeded(
false) {}
3135 void addLoad(
Value *Ptr,
bool IsReadOnly) {
3136 Accesses.insert(MemAccessInfo(Ptr,
false));
3138 ReadOnlyPtr.insert(Ptr);
3142 void addStore(
Value *Ptr) {
3143 Accesses.insert(MemAccessInfo(Ptr,
true));
3148 bool canCheckPtrAtRT(LoopVectorizationLegality::RuntimePointerCheck &RtCheck,
3150 Loop *TheLoop,
bool ShouldCheckStride =
false);
3154 void buildDependenceSets() {
3156 processMemAccesses(
false);
3158 processMemAccesses(
true);
3161 bool isRTCheckNeeded() {
return IsRTCheckNeeded; }
3163 bool isDependencyCheckNeeded() {
return !CheckDeps.empty(); }
3164 void resetDepChecks() { CheckDeps.clear(); }
3166 MemAccessInfoSet &getDependenciesToCheck() {
return CheckDeps; }
3175 void processMemAccesses(
bool UseDeferred);
3178 PtrAccessSet Accesses;
3181 PtrAccessSet DeferredAccesses;
3184 UnderlyingObjToAccessMap ObjToLastAccess;
3187 MemAccessInfoSet CheckDeps;
3200 DepCandidates &DepCands;
3202 bool AreAllWritesIdentified;
3203 bool AreAllReadsIdentified;
3204 bool IsRTCheckNeeded;
3224 bool AccessAnalysis::canCheckPtrAtRT(
3225 LoopVectorizationLegality::RuntimePointerCheck &RtCheck,
3227 Loop *TheLoop,
bool ShouldCheckStride) {
3230 unsigned NumReadPtrChecks = 0;
3231 unsigned NumWritePtrChecks = 0;
3232 bool CanDoRT =
true;
3234 bool IsDepCheckNeeded = isDependencyCheckNeeded();
3237 unsigned RunningDepId = 1;
3240 for (PtrAccessSet::iterator AI = Accesses.begin(), AE = Accesses.end();
3242 const MemAccessInfo &Access = *AI;
3243 Value *Ptr = Access.getPointer();
3244 bool IsWrite = Access.getInt();
3247 if (!IsWrite && Accesses.count(MemAccessInfo(Ptr,
true)))
3251 ++NumWritePtrChecks;
3258 (!ShouldCheckStride ||
isStridedPtr(SE, DL, Ptr, TheLoop) == 1)) {
3262 if (IsDepCheckNeeded) {
3263 Value *Leader = DepCands.getLeaderValue(Access).getPointer();
3264 unsigned &LeaderId = DepSetId[Leader];
3266 LeaderId = RunningDepId++;
3270 DepId = RunningDepId++;
3272 RtCheck.insert(SE, TheLoop, Ptr, IsWrite, DepId);
3274 DEBUG(
dbgs() <<
"LV: Found a runtime check ptr:" << *Ptr <<
'\n');
3280 if (IsDepCheckNeeded && CanDoRT && RunningDepId == 2)
3283 NumComparisons = (NumWritePtrChecks * (NumReadPtrChecks +
3284 NumWritePtrChecks - 1));
3292 unsigned NumPointers = RtCheck.Pointers.size();
3293 for (
unsigned i = 0; i < NumPointers; ++i) {
3294 for (
unsigned j = i + 1; j < NumPointers; ++j) {
3296 if (RtCheck.DependencySetId[i] == RtCheck.DependencySetId[j])
3299 Value *PtrI = RtCheck.Pointers[i];
3300 Value *PtrJ = RtCheck.Pointers[j];
3305 DEBUG(
dbgs() <<
"LV: Runtime check would require comparison between"
3306 " different address spaces\n");
3319 void AccessAnalysis::processMemAccesses(
bool UseDeferred) {
3324 PtrAccessSet &S = UseDeferred ? DeferredAccesses : Accesses;
3325 for (PtrAccessSet::iterator AI = S.begin(), AE = S.end(); AI != AE; ++AI) {
3326 const MemAccessInfo &Access = *AI;
3327 Value *Ptr = Access.getPointer();
3328 bool IsWrite = Access.getInt();
3330 DepCands.insert(Access);
3337 bool IsReadOnlyPtr = ReadOnlyPtr.count(Ptr) && !IsWrite;
3338 if (!UseDeferred && IsReadOnlyPtr) {
3339 DeferredAccesses.insert(Access);
3343 bool NeedDepCheck =
false;
3347 ValueVector TempObjects;
3349 for (ValueVector::iterator UI = TempObjects.begin(), UE = TempObjects.end();
3351 Value *UnderlyingObj = *UI;
3362 (!IsWrite && (!AreAllWritesIdentified ||
3363 !isa<Argument>(UnderlyingObj)) &&
3365 DEBUG(
dbgs() <<
"LV: Found an unidentified " <<
3366 (IsWrite ?
"write" :
"read" ) <<
" ptr: " << *UnderlyingObj <<
3368 IsRTCheckNeeded = (IsRTCheckNeeded ||
3370 !AreAllReadsIdentified);
3373 AreAllWritesIdentified =
false;
3375 AreAllReadsIdentified =
false;
3382 if ((IsWrite || IsReadOnlyPtr) && WriteObjects.count(UnderlyingObj))
3383 NeedDepCheck =
true;
3386 WriteObjects.insert(UnderlyingObj);
3389 UnderlyingObjToAccessMap::iterator Prev =
3390 ObjToLastAccess.find(UnderlyingObj);
3391 if (Prev != ObjToLastAccess.end())
3392 DepCands.unionSets(Access, Prev->second);
3394 ObjToLastAccess[UnderlyingObj] = Access;
3398 CheckDeps.insert(Access);
3435 class MemoryDepChecker {
3441 : SE(Se), DL(Dl), InnermostLoop(L), AccessIdx(0),
3442 ShouldRetryWithRuntimeCheck(
false) {}
3448 Accesses[MemAccessInfo(Ptr,
true)].push_back(AccessIdx);
3449 InstMap.push_back(SI);
3457 Accesses[MemAccessInfo(Ptr,
false)].push_back(AccessIdx);
3458 InstMap.push_back(LI);
3466 MemAccessInfoSet &CheckDeps);
3470 unsigned getMaxSafeDepDistBytes() {
return MaxSafeDepDistBytes; }
3474 bool shouldRetryWithRuntimeCheck() {
return ShouldRetryWithRuntimeCheck; }
3479 const Loop *InnermostLoop;
3491 unsigned MaxSafeDepDistBytes;
3495 bool ShouldRetryWithRuntimeCheck;
3509 bool isDependent(
const MemAccessInfo &A,
unsigned AIdx,
3510 const MemAccessInfo &B,
unsigned BIdx);
3514 bool couldPreventStoreLoadForward(
unsigned Distance,
unsigned TypeByteSize);
3521 return GEP->isInBounds();
3529 assert(Ty->
isPointerTy() &&
"Unexpected non ptr");
3533 if (PtrTy->getElementType()->isAggregateType()) {
3534 DEBUG(
dbgs() <<
"LV: Bad stride - Not a pointer to a scalar type" << *Ptr <<
3542 DEBUG(
dbgs() <<
"LV: Bad stride - Not an AddRecExpr pointer "
3543 << *Ptr <<
" SCEV: " << *PtrScev <<
"\n");
3549 DEBUG(
dbgs() <<
"LV: Bad stride - Not striding over innermost loop " <<
3550 *Ptr <<
" SCEV: " << *PtrScev <<
"\n");
3562 bool IsInAddressSpaceZero = PtrTy->getAddressSpace() == 0;
3563 if (!IsNoWrapAddRec && !IsInBoundsGEP && !IsInAddressSpaceZero) {
3564 DEBUG(
dbgs() <<
"LV: Bad stride - Pointer may wrap in the address space "
3565 << *Ptr <<
" SCEV: " << *PtrScev <<
"\n");
3575 DEBUG(
dbgs() <<
"LV: Bad stride - Not a constant strided " << *Ptr <<
3576 " SCEV: " << *PtrScev <<
"\n");
3590 int64_t Stride = StepVal / Size;
3591 int64_t Rem = StepVal % Size;
3598 if (!IsNoWrapAddRec && (IsInBoundsGEP || IsInAddressSpaceZero) &&
3599 Stride != 1 && Stride != -1)
3605 bool MemoryDepChecker::couldPreventStoreLoadForward(
unsigned Distance,
3606 unsigned TypeByteSize) {
3616 const unsigned NumCyclesForStoreLoadThroughMemory = 8*TypeByteSize;
3618 unsigned MaxVFWithoutSLForwardIssues =
MaxVectorWidth*TypeByteSize;
3619 if(MaxSafeDepDistBytes < MaxVFWithoutSLForwardIssues)
3620 MaxVFWithoutSLForwardIssues = MaxSafeDepDistBytes;
3622 for (
unsigned vf = 2*TypeByteSize; vf <= MaxVFWithoutSLForwardIssues;
3624 if (Distance % vf && Distance / vf < NumCyclesForStoreLoadThroughMemory) {
3625 MaxVFWithoutSLForwardIssues = (vf >>=1);
3630 if (MaxVFWithoutSLForwardIssues< 2*TypeByteSize) {
3631 DEBUG(
dbgs() <<
"LV: Distance " << Distance <<
3632 " that could cause a store-load forwarding conflict\n");
3636 if (MaxVFWithoutSLForwardIssues < MaxSafeDepDistBytes &&
3638 MaxSafeDepDistBytes = MaxVFWithoutSLForwardIssues;
3642 bool MemoryDepChecker::isDependent(
const MemAccessInfo &A,
unsigned AIdx,
3643 const MemAccessInfo &B,
unsigned BIdx) {
3644 assert (AIdx < BIdx &&
"Must pass arguments in program order");
3646 Value *APtr = A.getPointer();
3647 Value *BPtr = B.getPointer();
3648 bool AIsWrite = A.getInt();
3649 bool BIsWrite = B.getInt();
3652 if (!AIsWrite && !BIsWrite)
3658 int StrideAPtr =
isStridedPtr(SE, DL, APtr, InnermostLoop);
3659 int StrideBPtr =
isStridedPtr(SE, DL, BPtr, InnermostLoop);
3661 const SCEV *Src = AScev;
3666 if (StrideAPtr < 0) {
3678 DEBUG(
dbgs() <<
"LV: Src Scev: " << *Src <<
"Sink Scev: " << *Sink
3679 <<
"(Induction step: " << StrideAPtr <<
")\n");
3680 DEBUG(
dbgs() <<
"LV: Distance for " << *InstMap[AIdx] <<
" to "
3681 << *InstMap[BIdx] <<
": " << *Dist <<
"\n");
3686 if (!StrideAPtr || !StrideBPtr || StrideAPtr != StrideBPtr){
3687 DEBUG(
dbgs() <<
"Non-consecutive pointer access\n");
3693 DEBUG(
dbgs() <<
"LV: Dependence because of non constant distance\n");
3694 ShouldRetryWithRuntimeCheck =
true;
3700 unsigned TypeByteSize = DL->getTypeAllocSize(ATy);
3705 bool IsTrueDataDependence = (AIsWrite && !BIsWrite);
3706 if (IsTrueDataDependence &&
3707 (couldPreventStoreLoadForward(Val.
abs().
getZExtValue(), TypeByteSize) ||
3711 DEBUG(
dbgs() <<
"LV: Dependence is negative: NoDep\n");
3720 DEBUG(
dbgs() <<
"LV: Zero dependence difference but different types\n");
3729 "LV: ReadWrite-Write positive dependency with different types\n");
3742 if (Distance < 2*TypeByteSize ||
3743 2*TypeByteSize > MaxSafeDepDistBytes ||
3744 Distance < TypeByteSize * ForcedUnroll * ForcedFactor) {
3745 DEBUG(
dbgs() <<
"LV: Failure because of Positive distance "
3750 MaxSafeDepDistBytes = Distance < MaxSafeDepDistBytes ?
3751 Distance : MaxSafeDepDistBytes;
3753 bool IsTrueDataDependence = (!AIsWrite && BIsWrite);
3754 if (IsTrueDataDependence &&
3755 couldPreventStoreLoadForward(Distance, TypeByteSize))
3759 " with max VF = " << MaxSafeDepDistBytes / TypeByteSize <<
'\n');
3766 MemAccessInfoSet &CheckDeps) {
3768 MaxSafeDepDistBytes = -1U;
3769 while (!CheckDeps.empty()) {
3770 MemAccessInfo CurAccess = *CheckDeps.begin();
3782 CheckDeps.erase(*AI);
3786 for (std::vector<unsigned>::iterator I1 = Accesses[*AI].
begin(),
3787 I1E = Accesses[*AI].
end(); I1 != I1E; ++I1)
3788 for (std::vector<unsigned>::iterator I2 = Accesses[*OI].
begin(),
3789 I2E = Accesses[*OI].
end(); I2 != I2E; ++I2) {
3790 if (*I1 < *I2 && isDependent(*AI, *I1, *OI, *I2))
3792 if (*I2 < *I1 && isDependent(*OI, *I2, *AI, *I1))
3803 bool LoopVectorizationLegality::canVectorizeMemory() {
3813 unsigned NumReads = 0;
3814 unsigned NumReadWrites = 0;
3816 PtrRtCheck.Pointers.
clear();
3817 PtrRtCheck.Need =
false;
3820 MemoryDepChecker DepChecker(SE, DL, TheLoop);
3824 be = TheLoop->
block_end(); bb != be; ++bb) {
3833 if (it->mayReadFromMemory()) {
3842 if (!Ld)
return false;
3843 if (!Ld->
isSimple() && !IsAnnotatedParallel) {
3844 DEBUG(
dbgs() <<
"LV: Found a non-simple load.\n");
3847 Loads.push_back(Ld);
3848 DepChecker.addAccess(Ld);
3853 if (it->mayWriteToMemory()) {
3855 if (!St)
return false;
3856 if (!St->
isSimple() && !IsAnnotatedParallel) {
3857 DEBUG(
dbgs() <<
"LV: Found a non-simple store.\n");
3860 Stores.push_back(St);
3861 DepChecker.addAccess(St);
3871 if (!Stores.size()) {
3872 DEBUG(
dbgs() <<
"LV: Found a read-only loop!\n");
3877 AccessAnalysis Accesses(DL, DependentAccesses);
3886 ValueVector::iterator
I,
IE;
3887 for (I = Stores.begin(), IE = Stores.end(); I !=
IE; ++
I) {
3891 if (isUniform(Ptr)) {
3892 DEBUG(
dbgs() <<
"LV: We don't allow storing to uniform addresses\n");
3898 if (Seen.insert(Ptr)) {
3900 Accesses.addStore(Ptr);
3904 if (IsAnnotatedParallel) {
3906 <<
"LV: A loop annotated parallel, ignore memory dependency "
3911 for (I = Loads.begin(), IE = Loads.end(); I !=
IE; ++
I) {
3922 bool IsReadOnlyPtr =
false;
3923 if (Seen.insert(Ptr) || !
isStridedPtr(SE, DL, Ptr, TheLoop)) {
3925 IsReadOnlyPtr =
true;
3927 Accesses.addLoad(Ptr, IsReadOnlyPtr);
3932 if (NumReadWrites == 1 && NumReads == 0) {
3933 DEBUG(
dbgs() <<
"LV: Found a write-only loop!\n");
3939 Accesses.buildDependenceSets();
3940 bool NeedRTCheck = Accesses.isRTCheckNeeded();
3944 unsigned NumComparisons = 0;
3945 bool CanDoRT =
false;
3947 CanDoRT = Accesses.canCheckPtrAtRT(PtrRtCheck, NumComparisons, SE, TheLoop);
3950 DEBUG(
dbgs() <<
"LV: We need to do " << NumComparisons <<
3951 " pointer comparisons.\n");
3955 if (NumComparisons == 0 && NeedRTCheck)
3956 NeedRTCheck =
false;
3966 DEBUG(
dbgs() <<
"LV: We can perform a memory runtime check if needed.\n");
3969 if (NeedRTCheck && !CanDoRT) {
3970 DEBUG(
dbgs() <<
"LV: We can't vectorize because we can't find " <<
3971 "the array bounds.\n");
3976 PtrRtCheck.Need = NeedRTCheck;
3978 bool CanVecMem =
true;
3979 if (Accesses.isDependencyCheckNeeded()) {
3980 DEBUG(
dbgs() <<
"LV: Checking memory dependencies\n");
3981 CanVecMem = DepChecker.areDepsSafe(DependentAccesses,
3982 Accesses.getDependenciesToCheck());
3983 MaxSafeDepDistBytes = DepChecker.getMaxSafeDepDistBytes();
3985 if (!CanVecMem && DepChecker.shouldRetryWithRuntimeCheck()) {
3986 DEBUG(
dbgs() <<
"LV: Retrying with memory checks\n");
3990 Accesses.resetDepChecks();
3993 PtrRtCheck.Need =
true;
3995 CanDoRT = Accesses.canCheckPtrAtRT(PtrRtCheck, NumComparisons, SE,
4000 DEBUG(
dbgs() <<
"LV: Can't vectorize with memory checks\n");
4009 DEBUG(
dbgs() <<
"LV: We" << (NeedRTCheck ?
"" :
" don't") <<
4010 " need a runtime memory check.\n");
4017 unsigned NumUses = 0;
4019 if (Insts.
count(dyn_cast<Instruction>(*
Use)))
4030 if (!Set.
count(dyn_cast<Instruction>(*
Use)))
4035 bool LoopVectorizationLegality::AddReductionVar(
PHINode *Phi,
4036 ReductionKind
Kind) {
4054 bool FoundReduxOp =
false;
4060 bool FoundStartPHI =
false;
4065 unsigned NumCmpSelectPatternInst = 0;
4066 ReductionInstDesc ReduxDesc(
false, 0);
4071 VisitedInsts.
insert(Phi);
4087 while (!Worklist.
empty()) {
4097 bool IsAPhi = isa<PHINode>(Cur);
4105 if (!Cur->
isCommutative() && !IsAPhi && !isa<SelectInst>(Cur) &&
4106 !isa<ICmpInst>(Cur) && !isa<FCmpInst>(Cur) &&
4111 ReduxDesc = isReductionInstr(Cur, Kind, ReduxDesc);
4112 if (!ReduxDesc.IsReduction)
4116 if (!IsAPhi && Kind != RK_IntegerMinMax && Kind != RK_FloatMinMax &&
4121 if(IsAPhi && Cur != Phi && !
areAllUsesIn(Cur, VisitedInsts))
4124 if (Kind == RK_IntegerMinMax && (isa<ICmpInst>(Cur) ||
4125 isa<SelectInst>(Cur)))
4126 ++NumCmpSelectPatternInst;
4127 if (Kind == RK_FloatMinMax && (isa<FCmpInst>(Cur) ||
4128 isa<SelectInst>(Cur)))
4129 ++NumCmpSelectPatternInst;
4132 FoundReduxOp |= !IsAPhi;
4150 if (ExitInstruction != 0 || Cur == Phi)
4159 ExitInstruction = Cur;
4164 if (VisitedInsts.
insert(Usr)) {
4165 if (isa<PHINode>(Usr))
4172 FoundStartPHI =
true;
4180 if ((Kind == RK_IntegerMinMax || Kind == RK_FloatMinMax) &&
4181 NumCmpSelectPatternInst != 2)
4184 if (!FoundStartPHI || !FoundReduxOp || !ExitInstruction)
4191 AllowedExit.insert(ExitInstruction);
4194 ReductionDescriptor RD(RdxStart, ExitInstruction, Kind,
4195 ReduxDesc.MinMaxKind);
4196 Reductions[Phi] = RD;
4205 LoopVectorizationLegality::ReductionInstDesc
4206 LoopVectorizationLegality::isMinMaxSelectCmpPattern(
Instruction *I,
4207 ReductionInstDesc &Prev) {
4209 assert((isa<ICmpInst>(I) || isa<FCmpInst>(I) || isa<SelectInst>(I)) &&
4210 "Expect a select instruction");
4216 if ((Cmp = dyn_cast<ICmpInst>(I)) || (Cmp = dyn_cast<FCmpInst>(I))) {
4218 return ReductionInstDesc(
false, I);
4219 return ReductionInstDesc(Select, Prev.MinMaxKind);
4223 if (!(Select = dyn_cast<SelectInst>(I)))
4224 return ReductionInstDesc(
false, I);
4225 if (!(Cmp = dyn_cast<ICmpInst>(I->
getOperand(0))) &&
4227 return ReductionInstDesc(
false, I);
4229 return ReductionInstDesc(
false, I);
4236 return ReductionInstDesc(Select, MRK_UIntMin);
4238 return ReductionInstDesc(Select, MRK_UIntMax);
4240 return ReductionInstDesc(Select, MRK_SIntMax);
4242 return ReductionInstDesc(Select, MRK_SIntMin);
4244 return ReductionInstDesc(Select, MRK_FloatMin);
4246 return ReductionInstDesc(Select, MRK_FloatMax);
4248 return ReductionInstDesc(Select, MRK_FloatMin);
4250 return ReductionInstDesc(Select, MRK_FloatMax);
4252 return ReductionInstDesc(
false, I);
4255 LoopVectorizationLegality::ReductionInstDesc
4256 LoopVectorizationLegality::isReductionInstr(
Instruction *I,
4258 ReductionInstDesc &Prev) {
4263 return ReductionInstDesc(
false, I);
4265 if (FP && (Kind != RK_FloatMult && Kind != RK_FloatAdd &&
4266 Kind != RK_FloatMinMax))
4267 return ReductionInstDesc(
false, I);
4268 return ReductionInstDesc(I, Prev.MinMaxKind);
4269 case Instruction::Sub:
4270 case Instruction::Add:
4271 return ReductionInstDesc(Kind == RK_IntegerAdd, I);
4272 case Instruction::Mul:
4273 return ReductionInstDesc(Kind == RK_IntegerMult, I);
4275 return ReductionInstDesc(Kind == RK_IntegerAnd, I);
4277 return ReductionInstDesc(Kind == RK_IntegerOr, I);
4279 return ReductionInstDesc(Kind == RK_IntegerXor, I);
4280 case Instruction::FMul:
4281 return ReductionInstDesc(Kind == RK_FloatMult && FastMath, I);
4282 case Instruction::FAdd:
4283 return ReductionInstDesc(Kind == RK_FloatAdd && FastMath, I);
4284 case Instruction::FCmp:
4285 case Instruction::ICmp:
4287 if (Kind != RK_IntegerMinMax &&
4288 (!HasFunNoNaNAttr || Kind != RK_FloatMinMax))
4289 return ReductionInstDesc(
false, I);
4290 return isMinMaxSelectCmpPattern(I, Prev);
4294 LoopVectorizationLegality::InductionKind
4295 LoopVectorizationLegality::isInductionVariable(
PHINode *Phi) {
4299 return IK_NoInduction;
4305 DEBUG(
dbgs() <<
"LV: PHI is not a poly recurrence.\n");
4306 return IK_NoInduction;
4313 return IK_IntInduction;
4315 return IK_ReverseIntInduction;
4316 return IK_NoInduction;
4322 return IK_NoInduction;
4324 assert(PhiTy->
isPointerTy() &&
"The PHI must be a pointer");
4327 return IK_PtrInduction;
4329 return IK_ReversePtrInduction;
4331 return IK_NoInduction;
4334 bool LoopVectorizationLegality::isInductionVariable(
const Value *V) {
4336 PHINode *PN = dyn_cast_or_null<PHINode>(In0);
4340 return Inductions.count(PN);
4343 bool LoopVectorizationLegality::blockNeedsPredication(
BasicBlock *BB) {
4344 assert(TheLoop->
contains(BB) &&
"Unknown block used");
4348 return !DT->dominates(BB, Latch);
4351 bool LoopVectorizationLegality::blockCanBePredicated(
BasicBlock *BB,
4355 if (it->mayReadFromMemory()) {
4362 if (it->mayWriteToMemory() || it->mayThrow())
4366 switch (it->getOpcode()) {
4368 case Instruction::UDiv:
4369 case Instruction::SDiv:
4370 case Instruction::URem:
4371 case Instruction::SRem:
4380 LoopVectorizationCostModel::selectVectorizationFactor(
bool OptForSize,
4384 if (OptForSize && Legal->getRuntimePointerCheck()->Need) {
4385 DEBUG(
dbgs() <<
"LV: Aborting. Runtime ptr check is required in Os.\n");
4391 DEBUG(
dbgs() <<
"LV: Found trip count: " << TC <<
'\n');
4393 unsigned WidestType = getWidestType();
4394 unsigned WidestRegister = TTI.getRegisterBitWidth(
true);
4395 unsigned MaxSafeDepDist = -1U;
4396 if (Legal->getMaxSafeDepDistBytes() != -1U)
4397 MaxSafeDepDist = Legal->getMaxSafeDepDistBytes() * 8;
4398 WidestRegister = ((WidestRegister < MaxSafeDepDist) ?
4399 WidestRegister : MaxSafeDepDist);
4400 unsigned MaxVectorSize = WidestRegister / WidestType;
4401 DEBUG(
dbgs() <<
"LV: The Widest type: " << WidestType <<
" bits.\n");
4402 DEBUG(
dbgs() <<
"LV: The Widest register is: "
4403 << WidestRegister <<
" bits.\n");
4405 if (MaxVectorSize == 0) {
4406 DEBUG(
dbgs() <<
"LV: The target has no vector registers.\n");
4410 assert(MaxVectorSize <= 32 &&
"Did not expect to pack so many elements"
4411 " into one vector!");
4413 unsigned VF = MaxVectorSize;
4419 DEBUG(
dbgs() <<
"LV: Aborting. A tail loop is required in Os.\n");
4424 VF = TC % MaxVectorSize;
4432 DEBUG(
dbgs() <<
"LV: Aborting. A tail loop is required in Os.\n");
4438 assert(
isPowerOf2_32(UserVF) &&
"VF needs to be a power of two");
4439 DEBUG(
dbgs() <<
"LV: Using user VF " << UserVF <<
".\n");
4441 Factor.Width = UserVF;
4445 float Cost = expectedCost(1);
4447 DEBUG(
dbgs() <<
"LV: Scalar loop costs: " << (
int)Cost <<
".\n");
4448 for (
unsigned i=2; i <= VF; i*=2) {
4452 float VectorCost = expectedCost(i) / (float)i;
4453 DEBUG(
dbgs() <<
"LV: Vector loop of width " << i <<
" costs: " <<
4454 (
int)VectorCost <<
".\n");
4455 if (VectorCost < Cost) {
4461 DEBUG(
dbgs() <<
"LV: Selecting VF = : "<< Width <<
".\n");
4462 Factor.Width = Width;
4463 Factor.Cost = Width * Cost;
4467 unsigned LoopVectorizationCostModel::getWidestType() {
4468 unsigned MaxWidth = 8;
4472 be = TheLoop->
block_end(); bb != be; ++bb) {
4477 Type *T = it->getType();
4480 if (!isa<LoadInst>(it) && !isa<StoreInst>(it) && !isa<PHINode>(it))
4484 if (
PHINode *PN = dyn_cast<PHINode>(it))
4485 if (!Legal->getReductionVars()->count(PN))
4489 if (
StoreInst *ST = dyn_cast<StoreInst>(it))
4495 if (T->
isPointerTy() && !isConsecutiveLoadOrStore(it))
4498 MaxWidth = std::max(MaxWidth,
4507 LoopVectorizationCostModel::selectUnrollFactor(
bool OptForSize,
4510 unsigned LoopCost) {
4535 if (Legal->getMaxSafeDepDistBytes() != -1U)
4544 unsigned TargetVectorRegisters = TTI.getNumberOfRegisters(
true);
4545 DEBUG(
dbgs() <<
"LV: The target has " << TargetVectorRegisters <<
4546 " vector registers\n");
4548 LoopVectorizationCostModel::RegisterUsage R = calculateRegisterUsage();
4551 R.MaxLocalUsers = std::max(R.MaxLocalUsers, 1U);
4552 R.NumInstructions = std::max(R.NumInstructions, 1U);
4560 unsigned UF = (TargetVectorRegisters - R.LoopInvariantRegs) / R.MaxLocalUsers;
4563 unsigned MaxUnrollSize = TTI.getMaximumUnrollFactor();
4568 LoopCost = expectedCost(VF);
4572 if (UF > MaxUnrollSize)
4577 bool HasReductions = Legal->getReductionVars()->size();
4589 if (HasReductions) {
4590 DEBUG(
dbgs() <<
"LV: Unrolling because of reductions.\n");
4598 DEBUG(
dbgs() <<
"LV: Loop cost is " << LoopCost <<
'\n');
4600 DEBUG(
dbgs() <<
"LV: Unrolling to reduce branch cost.\n");
4602 return std::min(NewUF, UF);
4605 DEBUG(
dbgs() <<
"LV: Not Unrolling.\n");
4609 LoopVectorizationCostModel::RegisterUsage
4610 LoopVectorizationCostModel::calculateRegisterUsage() {
4632 R.NumInstructions = 0;
4641 IntervalMap EndPoint;
4650 be = DFS.endRPO(); bb != be; ++bb) {
4651 R.NumInstructions += (*bb)->size();
4655 IdxToInstr[Index++] =
I;
4663 if (!Instr)
continue;
4667 LoopInvariants.
insert(Instr);
4672 EndPoint[Instr] = Index;
4685 TransposeEnds[it->second].push_back(it->first);
4688 unsigned MaxUsage = 0;
4691 DEBUG(
dbgs() <<
"LV(REG): Calculating max register usage:\n");
4692 for (
unsigned int i = 0; i < Index; ++i) {
4695 if (!Ends.
count(I))
continue;
4698 InstrList &List = TransposeEnds[i];
4699 for (
unsigned int j=0, e = List.size(); j < e; ++j)
4700 OpenIntervals.
erase(List[j]);
4703 MaxUsage = std::max(MaxUsage, OpenIntervals.
size());
4705 DEBUG(
dbgs() <<
"LV(REG): At #" << i <<
" Interval # " <<
4706 OpenIntervals.
size() <<
'\n');
4712 unsigned Invariant = LoopInvariants.
size();
4713 DEBUG(
dbgs() <<
"LV(REG): Found max usage: " << MaxUsage <<
'\n');
4714 DEBUG(
dbgs() <<
"LV(REG): Found invariant usage: " << Invariant <<
'\n');
4715 DEBUG(
dbgs() <<
"LV(REG): LoopSize: " << R.NumInstructions <<
'\n');
4717 R.LoopInvariantRegs = Invariant;
4718 R.MaxLocalUsers = MaxUsage;
4722 unsigned LoopVectorizationCostModel::expectedCost(
unsigned VF) {
4727 be = TheLoop->
block_end(); bb != be; ++bb) {
4728 unsigned BlockCost = 0;
4734 if (isa<DbgInfoIntrinsic>(it))
4737 unsigned C = getInstructionCost(it, VF);
4739 DEBUG(
dbgs() <<
"LV: Found an estimated cost of " << C <<
" for VF " <<
4740 VF <<
" For instruction: " << *it <<
'\n');
4746 if (VF == 1 && Legal->blockNeedsPredication(*bb))
4765 LoopVectorizationLegality *Legal,
4767 const Loop *TheLoop) {
4775 for (
unsigned i = 1; i < NumOperands; ++i) {
4778 !Legal->isInductionVariable(Opd))
4784 unsigned MaxMergeDistance = 64;
4805 return StepVal > MaxMergeDistance;
4809 LoopVectorizationCostModel::getInstructionCost(
Instruction *I,
unsigned VF) {
4812 if (Legal->isUniformAfterVectorization(I))
4816 Type *VectorTy = ToVectorTy(RetTy, VF);
4820 case Instruction::GetElementPtr:
4826 case Instruction::Br: {
4827 return TTI.getCFInstrCost(I->
getOpcode());
4832 case Instruction::Add:
4833 case Instruction::FAdd:
4834 case Instruction::Sub:
4835 case Instruction::FSub:
4836 case Instruction::Mul:
4837 case Instruction::FMul:
4838 case Instruction::UDiv:
4839 case Instruction::SDiv:
4840 case Instruction::FDiv:
4841 case Instruction::URem:
4842 case Instruction::SRem:
4843 case Instruction::FRem:
4844 case Instruction::Shl:
4845 case Instruction::LShr:
4846 case Instruction::AShr:
4860 return TTI.getArithmeticInstrCost(I->
getOpcode(), VectorTy, Op1VK, Op2VK);
4870 return TTI.getCmpSelInstrCost(I->
getOpcode(), VectorTy, CondTy);
4872 case Instruction::ICmp:
4873 case Instruction::FCmp: {
4875 VectorTy = ToVectorTy(ValTy, VF);
4876 return TTI.getCmpSelInstrCost(I->
getOpcode(), VectorTy);
4884 VectorTy = ToVectorTy(ValTy, VF);
4886 unsigned Alignment = SI ? SI->
getAlignment() : LI->getAlignment();
4888 LI->getPointerAddressSpace();
4894 return TTI.getAddressComputationCost(VectorTy) +
4895 TTI.getMemoryOpCost(I->
getOpcode(), VectorTy, Alignment, AS);
4898 int ConsecutiveStride = Legal->isConsecutivePtr(Ptr);
4899 bool Reverse = ConsecutiveStride < 0;
4900 unsigned ScalarAllocatedSize = DL->getTypeAllocSize(ValTy);
4901 unsigned VectorElementSize = DL->getTypeStoreSize(VectorTy)/VF;
4902 if (!ConsecutiveStride || ScalarAllocatedSize != VectorElementSize) {
4903 bool IsComplexComputation =
4908 for (
unsigned i = 0; i < VF; ++i) {
4915 Instruction::InsertElement,
4920 Cost += VF * TTI.getAddressComputationCost(PtrTy, IsComplexComputation);
4921 Cost += VF * TTI.getMemoryOpCost(I->
getOpcode(), ValTy->getScalarType(),
4927 unsigned Cost = TTI.getAddressComputationCost(VectorTy);
4928 Cost += TTI.getMemoryOpCost(I->
getOpcode(), VectorTy, Alignment, AS);
4935 case Instruction::ZExt:
4936 case Instruction::SExt:
4937 case Instruction::FPToUI:
4938 case Instruction::FPToSI:
4939 case Instruction::FPExt:
4940 case Instruction::PtrToInt:
4941 case Instruction::IntToPtr:
4942 case Instruction::SIToFP:
4943 case Instruction::UIToFP:
4944 case Instruction::Trunc:
4945 case Instruction::FPTrunc:
4946 case Instruction::BitCast: {
4949 if (I->
getOpcode() == Instruction::Trunc &&
4950 Legal->isInductionVariable(I->
getOperand(0)))
4955 return TTI.getCastInstrCost(I->
getOpcode(), VectorTy, SrcVecTy);
4960 assert(ID &&
"Not an intrinsic call!");
4965 return TTI.getIntrinsicInstrCost(ID, RetTy, Tys);
4973 if (!RetTy->
isVoidTy() && VF != 1) {
4974 unsigned InsCost = TTI.getVectorInstrCost(Instruction::InsertElement,
4986 Cost += VF * TTI.getArithmeticInstrCost(Instruction::Mul, VectorTy);
4992 Type* LoopVectorizationCostModel::ToVectorTy(
Type *
Scalar,
unsigned VF) {
4999 static const char lv_name[] =
"Loop Vectorization";
5011 return new LoopVectorize(NoUnrolling);
5015 bool LoopVectorizationCostModel::isConsecutiveLoadOrStore(
Instruction *Inst) {
5017 if (
StoreInst *ST = dyn_cast<StoreInst>(Inst))
5021 if (
LoadInst *LI = dyn_cast<LoadInst>(Inst))
5022 return Legal->isConsecutivePtr(LI->getPointerOperand()) != 0;
5028 void InnerLoopUnroller::scalarizeInstruction(
Instruction *Instr) {
5033 setDebugLocFromInst(Builder, Instr);
5036 for (
unsigned op = 0, e = Instr->
getNumOperands(); op != e; ++op) {
5040 if (SrcOp == OldInduction) {
5041 Params.push_back(getVectorValue(SrcOp));
5050 if (SrcInst && OrigLoop->contains(SrcInst)) {
5051 assert(WidenMap.has(SrcInst) &&
"Source operand is unavailable");
5053 Params.push_back(WidenMap.get(SrcInst));
5056 VectorParts Scalars;
5057 Scalars.append(UF, SrcOp);
5058 Params.push_back(Scalars);
5063 "Invalid number of operands");
5068 Value *UndefVec = IsVoidRetTy ? 0 :
5071 VectorParts &VecResults = WidenMap.splat(Instr, UndefVec);
5074 for (
unsigned Part = 0; Part < UF; ++Part) {
5081 for (
unsigned op = 0, e = Instr->
getNumOperands(); op != e; ++op) {
5082 Value *Op = Params[op][Part];
5087 Builder.Insert(Cloned);
5092 VecResults[Part] = Cloned;
5097 InnerLoopUnroller::vectorizeMemoryInstruction(
Instruction *Instr,
5098 LoopVectorizationLegality*) {
5099 return scalarizeInstruction(Instr);
5102 Value *InnerLoopUnroller::reverseVector(
Value *Vec) {
5106 Value *InnerLoopUnroller::getBroadcastInstrs(
Value *V) {
5110 Value *InnerLoopUnroller::getConsecutiveVector(
Value* Val,
int StartIdx,
5114 assert(!ITy->
isVectorTy() &&
"Val must be a scalar");
5116 return Builder.CreateAdd(Val, C,
"induction");
NoWrapFlags getNoWrapFlags(NoWrapFlags Mask=NoWrapMask) const
MaxMin_match< ICmpInst, LHS, RHS, umin_pred_ty > m_UMin(const LHS &L, const RHS &R)
static bool Check(DecodeStatus &Out, DecodeStatus In)
VectorType::iterator iterator
const SCEV * evaluateAtIteration(const SCEV *It, ScalarEvolution &SE) const
Value * getValueOperand()
void push_back(const T &Elt)
void replaceOperandWith(unsigned i, Value *NewVal)
replaceOperandWith - Replace a specific operand.
const_iterator end(StringRef path)
Get end iterator over path.
Value * getPointerOperand()
Instruction::CastOps getOpcode() const
Return the opcode of this CastInst.
class_match< Value > m_Value()
m_Value() - Match an arbitrary value and ignore it.
void setHasNoSignedWrap(bool b=true)
Abstract base class of comparison instructions.
StringRef getString() const
AnalysisUsage & addPreserved()
static IntegerType * getInt1Ty(LLVMContext &C)
Pass * createLoopVectorizePass(bool NoUnrolling=false)
APInt LLVM_ATTRIBUTE_UNUSED_RESULT abs() const
Get the absolute value;.
void addIncoming(Value *V, BasicBlock *BB)
static bool hasMultipleUsesOf(Instruction *I, SmallPtrSet< Instruction *, 8 > &Insts)
static PassRegistry * getPassRegistry()
const_iterator begin() const
uint64_t getZExtValue() const
Get zero extended value.
void SetCurrentDebugLocation(const DebugLoc &L)
Set location information used by debugging information.
Value * CreateICmp(CmpInst::Predicate P, Value *LHS, Value *RHS, const Twine &Name="")
void GetUnderlyingObjects(Value *V, SmallVectorImpl< Value * > &Objects, const DataLayout *TD=0, unsigned MaxLookup=6)
const SCEV * getConstant(ConstantInt *V)
unsigned getScalarSizeInBits()
The main container class for the LLVM Intermediate Representation.
LLVMContext & getContext() const
bool isAnnotatedParallel() const
static MDString * get(LLVMContext &Context, StringRef Str)
enable_if_c<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
long double copysignl(long double x, long double y);
unsigned getNumOperands() const
long double rintl(long double x);
long double truncl(long double x);
value_op_iterator value_op_begin()
virtual void getAnalysisUsage(AnalysisUsage &) const
static int isStridedPtr(ScalarEvolution *SE, DataLayout *DL, Value *Ptr, const Loop *Lp)
Check the stride of the pointer and ensure that it does not wrap in the address space.
static unsigned getReductionBinOp(LoopVectorizationLegality::ReductionKind Kind)
This function translates the reduction kind to an LLVM binary operator.
long double sinl(long double x);
0 1 0 0 True if ordered and less than
StringRef substr(size_t Start, size_t N=npos) const
static cl::opt< unsigned > TinyTripCountVectorThreshold("vectorizer-min-trip-count", cl::init(16), cl::Hidden, cl::desc("Don't vectorize loops with a constant ""trip count that is smaller than this ""value."))
We don't vectorize loops with a known constant trip count below this number.
const SCEV * getStepRecurrence(ScalarEvolution &SE) const
void initializeLoopVectorizePass(PassRegistry &)
const_iterator begin(StringRef path)
Get begin iterator over path.
bool isLoopInvariant(const SCEV *S, const Loop *L)
value_op_iterator value_op_end()
LoopT * getParentLoop() const
const Function * getParent() const
Return the enclosing method, or null if none.
long double expl(long double x);
static bool isInBoundsGep(Value *Ptr)
double nearbyint(double x);
MDNode - a tuple of other values.
bool isNoAliasCall(const Value *V)
MaxMin_match< FCmpInst, LHS, RHS, ufmax_pred_ty > m_UnordFMax(const LHS &L, const RHS &R)
Match an 'unordered' floating point maximum function. Floating point has one special value 'NaN'...
const_iterator end() const
FunctionType * getType(LLVMContext &Context, ID id, ArrayRef< Type * > Tys=None)
unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
member_iterator member_begin(iterator I) const
bool hasAttribute(unsigned Index, Attribute::AttrKind Kind) const
Return true if the attribute exists at the given index.
BlockT * getHeader() const
LoopInfoBase< BlockT, LoopT > * LI
long double nearbyintl(long double x);
Type * getPointerElementType() const
static MDNode * get(LLVMContext &Context, ArrayRef< Value * > Vals)
const SCEV * getStart() const
StringRef getName() const
BlockT * getLoopLatch() const
long double roundl(long double x);
bool isNegative() const
Determine sign of this APInt.
bool onlyReadsMemory() const
Determine if the call does not access or only reads memory.
long double fabsl(long double x);
bool match(Val *V, const Pattern &P)
AnalysisUsage & addRequired()
#define INITIALIZE_PASS_DEPENDENCY(depName)
static error_code advance(T &it, size_t Val)
static const unsigned RuntimeMemoryCheckThreshold
static const unsigned MaxUnrollFactor
Maximum vectorization unroll count.
long double cosl(long double x);
bool isIdenticalTo(const Instruction *I) const
Base class of casting instructions.
const APInt & getValue() const
Return the constant's value.
bool getLibFunc(StringRef funcName, LibFunc::Func &F) const
#define llvm_unreachable(msg)
void setHasNoUnsignedWrap(bool b=true)
member_iterator member_end() const
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
double copysign(double x, double y);
unsigned getNumArgOperands() const
Instruction * getFirstNonPHI()
Returns a pointer to the first instruction in this block that is not a PHINode instruction.
static Constant * get(ArrayRef< Constant * > V)
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
static BranchInst * Create(BasicBlock *IfTrue, Instruction *InsertBefore=0)
MaxMin_match< FCmpInst, LHS, RHS, ufmin_pred_ty > m_UnordFMin(const LHS &L, const RHS &R)
Match an 'unordered' floating point minimum function. Floating point has one special value 'NaN'...
void setName(const Twine &Name)
static ConstantInt * ExtractElement(Constant *V, Constant *Idx)
ID
LLVM Calling Convention Representation.
Instruction * clone() const
const std::string & getModuleIdentifier() const
static void cse(BasicBlock *BB)
Perform cse of induction variable instructions.
bool isIdentifiedObject(const Value *V)
uint64_t getZExtValue() const
Return the zero extended value.
unsigned getPointerAddressSpace() const
Returns the address space of the pointer operand.
Value * CreateFCmp(CmpInst::Predicate P, Value *LHS, Value *RHS, const Twine &Name="")
iterator findValue(const ElemTy &V) const
static Intrinsic::ID getIntrinsicIDForCall(CallInst *CI, const TargetLibraryInfo *TLI)
static Type * getWiderType(DataLayout &DL, Type *Ty0, Type *Ty1)
LLVMContext & getContext() const
getContext - Return the LLVMContext in which this type was uniqued.
bool count(PtrType Ptr) const
count - Return true if the specified pointer is in the set.
void addBasicBlockToLoop(BlockT *NewBB, LoopInfoBase< BlockT, LoopT > &LI)
bool LLVM_ATTRIBUTE_UNUSED_RESULT empty() const
bool isAssociative() const
void addChildLoop(LoopT *NewChild)
static bool isValidElementType(Type *ElemTy)
Represents a floating point comparison operator.
BasicBlock * getSuccessor(unsigned i) const
MaxMin_match< FCmpInst, LHS, RHS, ofmin_pred_ty > m_OrdFMin(const LHS &L, const RHS &R)
Match an 'ordered' floating point minimum function. Floating point has one special value 'NaN'...
MaxMin_match< ICmpInst, LHS, RHS, smin_pred_ty > m_SMin(const LHS &L, const RHS &R)
bool isFloatingPointTy() const
std::set< ECValue >::const_iterator iterator
iterator* - Provides a way to iterate over all values in the set.
Function * getDeclaration(Module *M, ID id, ArrayRef< Type * > Tys=None)
static Intrinsic::ID checkBinaryFloatSignature(const CallInst &I, Intrinsic::ID ValidIntrinsicID)
void replaceAllUsesWith(Value *V)
unsigned getNumElements() const
Return the number of elements in the Vector type.
Type * getElementType() const
unsigned getNumIncomingValues() const
Value * createMinMaxOp(IRBuilder<> &Builder, LoopVectorizationLegality::MinMaxReductionKind RK, Value *Left, Value *Right)
const SCEV * getCouldNotCompute()
initializer< Ty > init(const Ty &Val)
static Intrinsic::ID checkUnaryFloatSignature(const CallInst &I, Intrinsic::ID ValidIntrinsicID)
unsigned getAlignment() const
MaxMin_match< ICmpInst, LHS, RHS, umax_pred_ty > m_UMax(const LHS &L, const RHS &R)
BlockT * getLoopPreheader() const
LLVM Basic Block Representation.
long double floorl(long double x);
static bool isFunctionScopeIdentifiedObject(Value *Ptr)
LLVM Constant Representation.
const Value * getCondition() const
hash_code hash_combine(const T1 &arg1, const T2 &arg2, const T3 &arg3, const T4 &arg4, const T5 &arg5, const T6 &arg6)
int64_t getSExtValue() const
Get sign extended value.
Instr is a loop (backwards branch).
APInt Or(const APInt &LHS, const APInt &RHS)
Bitwise OR function for APInt.
APInt Xor(const APInt &LHS, const APInt &RHS)
Bitwise XOR function for APInt.
static bool isLikelyComplexAddressComputation(Value *Ptr, LoopVectorizationLegality *Legal, ScalarEvolution *SE, const Loop *TheLoop)
Check whether the address computation for a non-consecutive memory access looks like an unlikely cand...
Interval::pred_iterator pred_begin(Interval *I)
const DebugLoc & getDebugLoc() const
getDebugLoc - Return the debug location for this node as a DebugLoc.
static const unsigned MaxVectorWidth
Maximum simd width.
MDNode * getLoopID() const
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", Instruction *InsertBefore=0)
bool verifyFunction(const Function &F, VerifierFailureAction action=AbortProcessAction)
BasicBlock * getIncomingBlock(unsigned i) const
ItTy next(ItTy it, Dist n)
bool contains(const LoopT *L) const
const SCEV * getMinusSCEV(const SCEV *LHS, const SCEV *RHS, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap)
getMinusSCEV - Return LHS-RHS. Minus is represented in SCEV as A+B*-1.
unsigned getBitWidth() const
Return the number of bits in the APInt.
double pow(double x, double y);
std::vector< BasicBlock * >::const_reverse_iterator RPOIterator
Value * getOperand(unsigned i) const
Interval::pred_iterator pred_end(Interval *I)
Value * getPointerOperand()
bool isCommutative() const
float nearbyintf(float x);
Integer representation type.
Predicate getPredicate() const
Return the predicate for this instruction.
#define INITIALIZE_AG_DEPENDENCY(depName)
void append(in_iter in_start, in_iter in_end)
static UndefValue * get(Type *T)
LLVMContext & getContext() const
All values hold a context through their type.
PointerType * getPointerTo(unsigned AddrSpace=0)
static PointerType * getInt8PtrTy(LLVMContext &C, unsigned AS=0)
const SCEV * getNoopOrZeroExtend(const SCEV *V, Type *Ty)
bool hasNoSignedWrap() const
hasNoSignedWrap - Determine whether the no signed wrap flag is set.
void setIsExact(bool b=true)
bool isConditional() const
void setLoopID(MDNode *LoopID) const
IntegerType * getIntPtrType(LLVMContext &C, unsigned AddressSpace=0) const
0 0 1 0 True if ordered and greater than
BinaryOps getOpcode() const
static Constant * getSplat(unsigned NumElts, Constant *Elt)
bool startswith(StringRef Prefix) const
Check if this string starts with the given Prefix.
static bool hasOutsideLoopUser(const Loop *TheLoop, Instruction *Inst, SmallPtrSet< Value *, 4 > &Reductions)
Check that the instruction has outside loop users and is not an identified reduction variable...
Class for constant integers.
long double log10l(long double x);
Value * getIncomingValue(unsigned i) const
uint64_t getTypeAllocSize(Type *Ty) const
AnalysisUsage & addRequiredID(const void *ID)
static unsigned getGEPInductionOperand(DataLayout *DL, const GetElementPtrInst *Gep)
Find the operand of the GEP that should be checked for consecutive stores. This ignores trailing indi...
bool isAllOnesValue() const
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
SequentialType * getType() const
bool isStrictlyPositive() const
Determine if this APInt Value is positive.
bool equalsInt(uint64_t V) const
Determine if this constant's value is same as an unsigned char.
static Constant * get(Type *Ty, uint64_t V, bool isSigned=false)
Function * getCalledFunction() const
ConstantInt * getValue() const
static Constant * get(Type *Ty, double V)
bool isExact() const
isExact - Determine whether the exact flag is set.
static ConstantInt * getTrue(LLVMContext &Context)
void setOperand(unsigned i, Value *Val)
raw_ostream & dbgs()
dbgs - Return a circular-buffered debug stream.
double long double log2l(long double x);
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
AttributeSet getAttributes() const
Return the attribute list for this Function.
Value * getArgOperand(unsigned i) const
Class for arbitrary precision integers.
long double logl(long double x);
Value * getIncomingValueForBlock(const BasicBlock *BB) const
Value * CreateSelect(Value *C, Value *True, Value *False, const Twine &Name="")
float powf(float x, float y);
static Type * convertPointerToIntegerType(DataLayout &DL, Type *Ty)
unsigned getSmallConstantTripCount(Loop *L, BasicBlock *ExitingBlock)
static const char lv_name[]
hash_code hash_combine_range(InputIteratorT first, InputIteratorT last)
Compute a hash_code for a sequence of values.
const SCEV * getAddExpr(SmallVectorImpl< const SCEV * > &Ops, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap)
std::vector< BlockT * >::const_iterator block_iterator
APInt And(const APInt &LHS, const APInt &RHS)
Bitwise AND function for APInt.
static cl::opt< unsigned > VectorizationUnroll("force-vector-unroll", cl::init(0), cl::Hidden, cl::desc("Sets the vectorization unroll count. ""Zero is autoselect."))
bool count(const T &V) const
count - Return true if the element is in the set.
block_iterator block_end() const
long double ceill(long double x);
static cl::opt< bool > EnableIfConversion("enable-if-conversion", cl::init(true), cl::Hidden, cl::desc("Enable if-conversion during vectorization."))
static CmpInst * Create(OtherOps Op, unsigned short predicate, Value *S1, Value *S2, const Twine &Name="", Instruction *InsertBefore=0)
Create a CmpInst.
Value * getCondition() const
void forgetLoop(const Loop *L)
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
bool isAggregateType() const
static IntegerType * getInt32Ty(LLVMContext &C)
unsigned getNumBlocks() const
getNumBlocks - Get the number of blocks in this loop in constant time.
unsigned getAlignment() const
TerminatorInst * getTerminator()
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
static const unsigned SmallLoopCost
The cost of a loop that is considered 'small' by the unroller.
MaxMin_match< ICmpInst, LHS, RHS, smax_pred_ty > m_SMax(const LHS &L, const RHS &R)
const Type * getScalarType() const
const Loop * getLoop() const
BasicBlock * splitBasicBlock(iterator I, const Twine &BBName="")
Split the basic block into two basic blocks at the specified instruction.
bool hasLocalLinkage() const
Attribute getAttribute(unsigned Index, Attribute::AttrKind Kind) const
Return the attribute object that exists at the given index.
const SCEV * getBackedgeTakenCount(const Loop *L)
StringRef getValueAsString() const
Return the attribute's value as a string. This requires the attribute to be a string attribute...
LLVMContext & getContext() const
Get the context in which this basic block lives.
MaxMin_match< FCmpInst, LHS, RHS, ofmax_pred_ty > m_OrdFMax(const LHS &L, const RHS &R)
Match an 'ordered' floating point maximum function. Floating point has one special value 'NaN'...
LLVM Value Representation.
const ElemTy & getLeaderValue(const ElemTy &V) const
bool hasNoUnsignedWrap() const
hasNoUnsignedWrap - Determine whether the no unsigned wrap flag is set.
ValueT lookup(const KeyT &Val) const
const SCEV * getSCEV(Value *V)
unsigned getOpcode() const
getOpcode() returns a member of one of the enums like Instruction::Add.
A vector that has set insertion semantics.
static VectorType * get(Type *ElementType, unsigned NumElements)
Disable implicit floating point insts.
long double exp2l(long double x);
static const unsigned TinyTripCountUnrollThreshold
We don't unroll loops with a known constant trip count below this number.
static cl::opt< unsigned > VectorizationFactor("force-vector-width", cl::init(0), cl::Hidden, cl::desc("Sets the SIMD width. Zero is autoselect."))
block_iterator block_begin() const
bool isPowerOf2_32(uint32_t Value)
bool isNoAliasArgument(const Value *V)
iterator getFirstInsertionPt()
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
static bool areAllUsesIn(Instruction *I, SmallPtrSet< Instruction *, 8 > &Set)
void setIncomingValue(unsigned i, Value *V)
long double powl(long double x, long double y);
float copysignf(float x, float y);
Value * getPointerOperand()
int getBasicBlockIndex(const BasicBlock *BB) const
const BasicBlock * getParent() const
static bool hasComputableBounds(ScalarEvolution *SE, Value *Ptr)
Check whether a pointer can participate in a runtime bounds check.
bool isVoidTy() const
isVoidTy - Return true if this is 'void'.
gep_type_iterator gep_type_begin(const User *GEP)