18 #define SV_NAME "slp-vectorizer"
19 #define DEBUG_TYPE "SLP"
50 cl::desc(
"Only vectorize if you gain more than this "
55 cl::desc(
"Attempt to vectorize horizontal reductions"));
60 "Attempt to vectorize horizontal reductions feeding into a store"));
64 static const unsigned MinVecRegSize = 128;
66 static const unsigned RecursionMaxDepth = 12;
70 struct BlockNumbering {
74 BlockNumbering() : BB(0), Valid(
false) {}
76 void numberInstructions() {
83 InstrVec.push_back(it);
84 assert(InstrVec[InstrIdx[it]] == it &&
"Invalid allocation");
90 assert(I->
getParent() == BB &&
"Invalid instruction");
93 assert(InstrIdx.count(I) &&
"Unknown instruction");
100 assert(InstrVec.size() > loc &&
"Invalid Index");
101 return InstrVec[loc];
104 void forget() { Valid =
false; }
124 for (
int i = 1, e = VL.
size(); i < e; i++) {
137 for (
unsigned i = 0, e = VL.
size(); i < e; ++i)
138 if (!isa<Constant>(VL[i]))
145 for (
unsigned i = 1, e = VL.
size(); i < e; ++i)
158 for (
int i = 1, e = VL.
size(); i < e; i++) {
172 for (
unsigned i = 0, n = Metadata.size(); i != n; ++i) {
173 unsigned Kind = Metadata[i].first;
174 MDNode *MD = Metadata[i].second;
176 for (
int i = 1, e = VL.
size(); MD && i != e; i++) {
200 Type *Ty = VL[0]->getType();
201 for (
int i = 1, e = VL.
size(); i < e; i++)
221 if (NElts != VL.
size())
229 for (
unsigned i = 1, e = VL.
size(); i < e; ++i) {
233 if (!CI || CI->getZExtValue() != i || E->
getOperand(0) != Vec)
246 bool AllSameOpcodeLeft =
true;
247 bool AllSameOpcodeRight =
true;
248 for (
unsigned i = 0, e = VL.
size(); i != e; ++i) {
262 AllSameOpcodeLeft = I0;
263 AllSameOpcodeRight = I1;
265 if (i && AllSameOpcodeLeft) {
266 if(
Instruction *P0 = dyn_cast<Instruction>(OrigLeft[i-1])) {
268 AllSameOpcodeLeft =
false;
270 AllSameOpcodeLeft =
false;
272 if (i && AllSameOpcodeRight) {
273 if(
Instruction *P1 = dyn_cast<Instruction>(OrigRight[i-1])) {
274 if(P1->getOpcode() != I1->getOpcode())
275 AllSameOpcodeRight =
false;
277 AllSameOpcodeRight =
false;
296 if(!i && I0->
getOpcode() > I1->getOpcode()) {
299 }
else if (i && I0->
getOpcode() > I1->getOpcode() && Right[i-1] != I1) {
303 }
else if (i && I0->
getOpcode() == I1->getOpcode() && Right[i-1] == I0) {
307 }
else if (i && I0->
getOpcode() == I1->getOpcode() && Left[i-1] == I1) {
327 bool LeftBroadcast = isSplat(Left);
328 bool RightBroadcast = isSplat(Right);
331 if (!(LeftBroadcast || RightBroadcast) &&
332 (AllSameOpcodeRight || AllSameOpcodeLeft)) {
349 F(Func), SE(Se), DL(Dl), TTI(Tti), AA(Aa),
LI(Li), DT(Dt),
350 Builder(Se->getContext()) {
355 BlocksNumbers[BB] = BlockNumbering(BB);
361 Value *vectorizeTree();
374 VectorizableTree.clear();
375 ScalarToTreeEntry.clear();
377 ExternalUses.clear();
378 MemBarrierIgnoreList.clear();
385 void optimizeGatherSequence();
390 int getEntryCost(TreeEntry *E);
396 Value *vectorizeTree(TreeEntry *E);
411 static unsigned getAddressSpaceOperand(
Value *I);
415 int getGatherCost(
Type *Ty);
445 bool isFullyVectorizableTinyTree();
448 TreeEntry() : Scalars(), VectorizedValue(0), LastScalarIndex(0),
453 assert(VL.
size() == Scalars.size() &&
"Invalid size");
461 Value *VectorizedValue;
472 VectorizableTree.push_back(TreeEntry());
473 int idx = VectorizableTree.size() - 1;
474 TreeEntry *Last = &VectorizableTree[idx];
475 Last->Scalars.insert(Last->Scalars.begin(), VL.
begin(), VL.
end());
476 Last->NeedToGather = !Vectorized;
478 Last->LastScalarIndex = getLastIndex(VL);
479 for (
int i = 0, e = VL.
size(); i != e; ++i) {
480 assert(!ScalarToTreeEntry.count(VL[i]) &&
"Scalar already in tree!");
481 ScalarToTreeEntry[VL[i]] = idx;
484 Last->LastScalarIndex = 0;
485 MustGather.insert(VL.
begin(), VL.
end());
492 std::vector<TreeEntry> VectorizableTree;
501 struct ExternalUser {
515 UserList ExternalUses;
519 ValueSet MemBarrierIgnoreList;
545 if (!getSameType(Roots))
547 buildTree_rec(Roots, 0);
550 for (
int EIdx = 0, EE = VectorizableTree.size(); EIdx < EE; ++EIdx) {
551 TreeEntry *Entry = &VectorizableTree[EIdx];
554 for (
int Lane = 0,
LE = Entry->Scalars.size(); Lane !=
LE; ++Lane) {
558 if (Entry->NeedToGather)
565 bool Gathered = MustGather.count(*
User);
568 if (ScalarToTreeEntry.count(*
User) && !Gathered) {
569 DEBUG(
dbgs() <<
"SLP: \tInternal user will be removed:" <<
571 int Idx = ScalarToTreeEntry[*
User]; (void) Idx;
572 assert(!VectorizableTree[Idx].NeedToGather &&
"Bad state");
580 if (Rdx && std::find(Rdx->begin(), Rdx->end(), UserInst) != Rdx->end())
583 DEBUG(
dbgs() <<
"SLP: Need to extract:" << **User <<
" from lane " <<
584 Lane <<
" from " << *Scalar <<
".\n");
585 ExternalUses.push_back(ExternalUser(Scalar, *User, Lane));
593 bool SameTy = getSameType(VL); (void)SameTy;
594 assert(SameTy &&
"Invalid types!");
596 if (Depth == RecursionMaxDepth) {
597 DEBUG(
dbgs() <<
"SLP: Gathering due to max recursion depth.\n");
598 newTreeEntry(VL,
false);
603 if (VL[0]->
getType()->isVectorTy()) {
604 DEBUG(
dbgs() <<
"SLP: Gathering due to vector type.\n");
605 newTreeEntry(VL,
false);
609 if (
StoreInst *SI = dyn_cast<StoreInst>(VL[0]))
610 if (SI->getValueOperand()->getType()->isVectorTy()) {
611 DEBUG(
dbgs() <<
"SLP: Gathering due to store vector type.\n");
612 newTreeEntry(VL,
false);
617 if (allConstant(VL) || isSplat(VL) || !getSameBlock(VL) ||
618 !getSameOpcode(VL)) {
619 DEBUG(
dbgs() <<
"SLP: Gathering due to C,S,B,O. \n");
620 newTreeEntry(VL,
false);
628 if (ScalarToTreeEntry.count(VL[0])) {
629 int Idx = ScalarToTreeEntry[VL[0]];
630 TreeEntry *E = &VectorizableTree[Idx];
631 for (
unsigned i = 0, e = VL.size(); i != e; ++i) {
632 DEBUG(
dbgs() <<
"SLP: \tChecking bundle: " << *VL[i] <<
".\n");
633 if (E->Scalars[i] != VL[i]) {
634 DEBUG(
dbgs() <<
"SLP: Gathering due to partial overlap.\n");
635 newTreeEntry(VL,
false);
639 DEBUG(
dbgs() <<
"SLP: Perfect diamond merge at " << *VL[0] <<
".\n");
644 for (
unsigned i = 0, e = VL.size(); i != e; ++i) {
645 if (ScalarToTreeEntry.count(VL[i])) {
646 DEBUG(
dbgs() <<
"SLP: The instruction (" << *VL[i] <<
647 ") is already in tree.\n");
648 newTreeEntry(VL,
false);
655 for (
unsigned i = 0, e = VL.size(); i != e; ++i) {
656 if (ScalarToTreeEntry.count(VL[i]) || MustGather.count(VL[i])) {
657 DEBUG(
dbgs() <<
"SLP: Gathering due to gathered scalar. \n");
658 newTreeEntry(VL,
false);
666 int MyLastIndex = getLastIndex(VL);
669 for (
unsigned i = 0, e = VL.size(); i != e; ++i) {
671 DEBUG(
dbgs() <<
"SLP: Checking users of " << *Scalar <<
". \n");
674 DEBUG(
dbgs() <<
"SLP: \tUser " << **U <<
". \n");
677 DEBUG(
dbgs() <<
"SLP: Gathering due unknown user. \n");
678 newTreeEntry(VL,
false);
684 if (UserBlock != BB) {
685 DEBUG(
dbgs() <<
"SLP: User from a different basic block "
692 if (isa<PHINode>(*User)) {
693 DEBUG(
dbgs() <<
"SLP: \tWe can schedule PHIs:" << *User <<
". \n");
698 if (ScalarToTreeEntry.count(User)) {
699 int Idx = ScalarToTreeEntry[User];
700 int VecLocation = VectorizableTree[Idx].LastScalarIndex;
701 if (VecLocation <= MyLastIndex) {
702 DEBUG(
dbgs() <<
"SLP: Gathering due to unschedulable vector. \n");
703 newTreeEntry(VL,
false);
706 DEBUG(
dbgs() <<
"SLP: In-tree user (" << *User <<
") at #" <<
707 VecLocation <<
" vector value (" << *Scalar <<
") at #"
708 << MyLastIndex <<
".\n");
713 if (RdxOps && RdxOps->count(User))
717 BlockNumbering &BN = BlocksNumbers[BB];
718 int UserIndex = BN.getIndex(User);
719 if (UserIndex < MyLastIndex) {
721 DEBUG(
dbgs() <<
"SLP: Can't schedule extractelement for "
723 newTreeEntry(VL,
false);
730 for (
unsigned i = 0, e = VL.size(); i < e; ++i)
731 for (
unsigned j = i+1; j < e; ++j)
732 if (VL[i] == VL[j]) {
733 DEBUG(
dbgs() <<
"SLP: Scalar used twice in bundle.\n");
734 newTreeEntry(VL,
false);
740 for (
unsigned i = 0, e = VL.size(); i < e; ++i) {
743 for (
unsigned j = 0; j < e; ++j) {
744 if (i != j && *U == VL[j]) {
745 DEBUG(
dbgs() <<
"SLP: Intra-bundle dependencies!" << **U <<
". \n");
746 newTreeEntry(VL,
false);
753 DEBUG(
dbgs() <<
"SLP: We are able to schedule this bundle.\n");
755 unsigned Opcode = getSameOpcode(VL);
761 for (
unsigned i = 0, e = VL.size(); i < e; ++i) {
764 Value *
Barrier = getSinkBarrier(cast<Instruction>(VL[i]), Last);
766 DEBUG(
dbgs() <<
"SLP: Can't sink " << *VL[i] <<
"\n down to " << *Last
767 <<
"\n because of " << *Barrier <<
". Gathering.\n");
768 newTreeEntry(VL,
false);
779 for (
unsigned j = 0; j < VL.size(); ++j)
783 DEBUG(
dbgs() <<
"SLP: Need to swizzle PHINodes (TerminatorInst use).\n");
784 newTreeEntry(VL,
false);
789 newTreeEntry(VL,
true);
790 DEBUG(
dbgs() <<
"SLP: added a vector of PHINodes.\n");
795 for (
unsigned j = 0; j < VL.size(); ++j)
796 Operands.push_back(cast<PHINode>(VL[j])->getIncomingValue(i));
798 buildTree_rec(Operands, Depth + 1);
803 bool Reuse = CanReuseExtract(VL);
805 DEBUG(
dbgs() <<
"SLP: Reusing extract sequence.\n");
807 newTreeEntry(VL, Reuse);
812 for (
unsigned i = 0, e = VL.size() - 1; i < e; ++i) {
813 LoadInst *L = cast<LoadInst>(VL[i]);
814 if (!L->
isSimple() || !isConsecutiveAccess(VL[i], VL[i + 1])) {
815 newTreeEntry(VL,
false);
816 DEBUG(
dbgs() <<
"SLP: Need to swizzle loads.\n");
820 newTreeEntry(VL,
true);
821 DEBUG(
dbgs() <<
"SLP: added a vector of loads.\n");
824 case Instruction::ZExt:
825 case Instruction::SExt:
826 case Instruction::FPToUI:
827 case Instruction::FPToSI:
828 case Instruction::FPExt:
829 case Instruction::PtrToInt:
830 case Instruction::IntToPtr:
831 case Instruction::SIToFP:
832 case Instruction::UIToFP:
833 case Instruction::Trunc:
834 case Instruction::FPTrunc:
835 case Instruction::BitCast: {
837 for (
unsigned i = 0; i < VL.size(); ++i) {
838 Type *Ty = cast<Instruction>(VL[i])->getOperand(0)->getType();
840 newTreeEntry(VL,
false);
841 DEBUG(
dbgs() <<
"SLP: Gathering casts with different src types.\n");
845 newTreeEntry(VL,
true);
846 DEBUG(
dbgs() <<
"SLP: added a vector of casts.\n");
851 for (
unsigned j = 0; j < VL.size(); ++j)
852 Operands.push_back(cast<Instruction>(VL[j])->getOperand(i));
854 buildTree_rec(Operands, Depth+1);
858 case Instruction::ICmp:
859 case Instruction::FCmp: {
862 Type *ComparedTy = cast<Instruction>(VL[0])->getOperand(0)->getType();
863 for (
unsigned i = 1, e = VL.size(); i < e; ++i) {
864 CmpInst *Cmp = cast<CmpInst>(VL[i]);
867 newTreeEntry(VL,
false);
868 DEBUG(
dbgs() <<
"SLP: Gathering cmp with different predicate.\n");
873 newTreeEntry(VL,
true);
874 DEBUG(
dbgs() <<
"SLP: added a vector of compares.\n");
879 for (
unsigned j = 0; j < VL.size(); ++j)
880 Operands.push_back(cast<Instruction>(VL[j])->getOperand(i));
882 buildTree_rec(Operands, Depth+1);
887 case Instruction::Add:
888 case Instruction::FAdd:
889 case Instruction::Sub:
890 case Instruction::FSub:
891 case Instruction::Mul:
892 case Instruction::FMul:
893 case Instruction::UDiv:
894 case Instruction::SDiv:
895 case Instruction::FDiv:
896 case Instruction::URem:
897 case Instruction::SRem:
898 case Instruction::FRem:
899 case Instruction::Shl:
900 case Instruction::LShr:
901 case Instruction::AShr:
905 newTreeEntry(VL,
true);
906 DEBUG(
dbgs() <<
"SLP: added a vector of bin op.\n");
911 ValueList Left, Right;
912 reorderInputsAccordingToOpcode(VL, Left, Right);
913 buildTree_rec(Left, Depth + 1);
914 buildTree_rec(Right, Depth + 1);
921 for (
unsigned j = 0; j < VL.size(); ++j)
922 Operands.push_back(cast<Instruction>(VL[j])->getOperand(i));
924 buildTree_rec(Operands, Depth+1);
930 for (
unsigned i = 0, e = VL.size() - 1; i < e; ++i)
931 if (!isConsecutiveAccess(VL[i], VL[i + 1])) {
932 newTreeEntry(VL,
false);
933 DEBUG(
dbgs() <<
"SLP: Non consecutive store.\n");
937 newTreeEntry(VL,
true);
938 DEBUG(
dbgs() <<
"SLP: added a vector of stores.\n");
941 for (
unsigned j = 0; j < VL.size(); ++j)
942 Operands.push_back(cast<Instruction>(VL[j])->getOperand(0));
945 MemBarrierIgnoreList.insert(VL.begin(), VL.end());
946 buildTree_rec(Operands, Depth + 1);
950 newTreeEntry(VL,
false);
951 DEBUG(
dbgs() <<
"SLP: Gathering unknown instruction.\n");
956 int BoUpSLP::getEntryCost(TreeEntry *E) {
959 Type *ScalarTy = VL[0]->getType();
960 if (
StoreInst *SI = dyn_cast<StoreInst>(VL[0]))
961 ScalarTy = SI->getValueOperand()->getType();
964 if (E->NeedToGather) {
970 return getGatherCost(E->Scalars);
973 assert(getSameOpcode(VL) && getSameType(VL) && getSameBlock(VL) &&
982 if (CanReuseExtract(VL))
984 return getGatherCost(VecTy);
986 case Instruction::ZExt:
987 case Instruction::SExt:
988 case Instruction::FPToUI:
989 case Instruction::FPToSI:
990 case Instruction::FPExt:
991 case Instruction::PtrToInt:
992 case Instruction::IntToPtr:
993 case Instruction::SIToFP:
994 case Instruction::UIToFP:
995 case Instruction::Trunc:
996 case Instruction::FPTrunc:
997 case Instruction::BitCast: {
1001 int ScalarCost = VL.
size() * TTI->getCastInstrCost(VL0->
getOpcode(),
1005 int VecCost = TTI->getCastInstrCost(VL0->
getOpcode(), VecTy, SrcVecTy);
1006 return VecCost - ScalarCost;
1008 case Instruction::FCmp:
1009 case Instruction::ICmp:
1011 case Instruction::Add:
1012 case Instruction::FAdd:
1013 case Instruction::Sub:
1014 case Instruction::FSub:
1015 case Instruction::Mul:
1016 case Instruction::FMul:
1017 case Instruction::UDiv:
1018 case Instruction::SDiv:
1019 case Instruction::FDiv:
1020 case Instruction::URem:
1021 case Instruction::SRem:
1022 case Instruction::FRem:
1023 case Instruction::Shl:
1024 case Instruction::LShr:
1025 case Instruction::AShr:
1032 if (Opcode == Instruction::FCmp || Opcode == Instruction::ICmp ||
1036 TTI->getCmpSelInstrCost(Opcode, ScalarTy, Builder.
getInt1Ty());
1037 VecCost = TTI->getCmpSelInstrCost(Opcode, VecTy, MaskTy);
1047 for (
unsigned i = 0; i < VL.
size(); ++i)
1048 if (!isa<ConstantInt>(cast<Instruction>(VL[i])->getOperand(1))) {
1055 TTI->getArithmeticInstrCost(Opcode, ScalarTy, Op1VK, Op2VK);
1056 VecCost = TTI->getArithmeticInstrCost(Opcode, VecTy, Op1VK, Op2VK);
1058 return VecCost - ScalarCost;
1065 return VecLdCost - ScalarLdCost;
1072 return VecStCost - ScalarStCost;
1079 bool BoUpSLP::isFullyVectorizableTinyTree() {
1080 DEBUG(
dbgs() <<
"SLP: Check whether the tree with height " <<
1081 VectorizableTree.size() <<
" is fully vectorizable .\n");
1084 if (VectorizableTree.size() != 2)
1088 if (VectorizableTree[0].NeedToGather || VectorizableTree[1].NeedToGather)
1094 int BoUpSLP::getTreeCost() {
1096 DEBUG(
dbgs() <<
"SLP: Calculating cost for tree of size " <<
1097 VectorizableTree.size() <<
".\n");
1100 if (VectorizableTree.size() < 3 && !isFullyVectorizableTinyTree()) {
1101 if (!VectorizableTree.size()) {
1102 assert(!ExternalUses.size() &&
"We should not have any external users");
1107 unsigned BundleWidth = VectorizableTree[0].Scalars.size();
1109 for (
unsigned i = 0, e = VectorizableTree.size(); i != e; ++i) {
1110 int C = getEntryCost(&VectorizableTree[i]);
1111 DEBUG(
dbgs() <<
"SLP: Adding cost " << C <<
" for bundle that starts with "
1112 << *VectorizableTree[i].Scalars[0] <<
" .\n");
1116 int ExtractCost = 0;
1117 for (UserList::iterator I = ExternalUses.begin(), E = ExternalUses.end();
1126 DEBUG(
dbgs() <<
"SLP: Total Cost " << Cost + ExtractCost<<
".\n");
1127 return Cost + ExtractCost;
1130 int BoUpSLP::getGatherCost(
Type *Ty) {
1132 for (
unsigned i = 0, e = cast<VectorType>(Ty)->getNumElements(); i < e; ++i)
1133 Cost += TTI->getVectorInstrCost(Instruction::InsertElement, Ty, i);
1139 Type *ScalarTy = VL[0]->getType();
1140 if (
StoreInst *SI = dyn_cast<StoreInst>(VL[0]))
1141 ScalarTy = SI->getValueOperand()->getType();
1144 return getGatherCost(VecTy);
1148 if (
StoreInst *SI = dyn_cast<StoreInst>(I))
1149 return AA->getLocation(SI);
1151 return AA->getLocation(
LI);
1157 return LI->getPointerOperand();
1158 if (
StoreInst *SI = dyn_cast<StoreInst>(I))
1159 return SI->getPointerOperand();
1163 unsigned BoUpSLP::getAddressSpaceOperand(
Value *I) {
1164 if (
LoadInst *L = dyn_cast<LoadInst>(I))
1166 if (
StoreInst *S = dyn_cast<StoreInst>(I))
1167 return S->getPointerAddressSpace();
1171 bool BoUpSLP::isConsecutiveAccess(
Value *
A,
Value *B) {
1174 unsigned ASA = getAddressSpaceOperand(A);
1175 unsigned ASB = getAddressSpaceOperand(B);
1178 if (!PtrA || !PtrB || (ASA != ASB))
1185 unsigned PtrBitWidth = DL->getPointerSizeInBits(ASA);
1186 Type *Ty = cast<PointerType>(PtrA->
getType())->getElementType();
1187 APInt Size(PtrBitWidth, DL->getTypeStoreSize(Ty));
1189 APInt OffsetA(PtrBitWidth, 0), OffsetB(PtrBitWidth, 0);
1193 APInt OffsetDelta = OffsetB - OffsetA;
1198 return OffsetDelta == Size;
1202 APInt BaseDelta = Size - OffsetDelta;
1205 const SCEV *PtrSCEVA = SE->getSCEV(PtrA);
1206 const SCEV *PtrSCEVB = SE->getSCEV(PtrB);
1207 const SCEV *C = SE->getConstant(BaseDelta);
1208 const SCEV *
X = SE->getAddExpr(PtrSCEVA, C);
1209 return X == PtrSCEVB;
1217 for (++I; I != E; ++
I) {
1219 if (MemBarrierIgnoreList.count(I))
1222 if (!I->mayReadOrWriteMemory())
1225 if (!I->mayWriteToMemory())
1231 if (!A.
Ptr || !B.
Ptr || AA->alias(A, B))
1239 assert(BB == getSameBlock(VL) && BlocksNumbers.count(BB) &&
"Invalid block");
1240 BlockNumbering &BN = BlocksNumbers[BB];
1243 for (
unsigned i = 0, e = VL.
size(); i < e; ++i)
1244 MaxIdx = std::max(MaxIdx, BN.getIndex(cast<Instruction>(VL[i])));
1250 assert(BB == getSameBlock(VL) && BlocksNumbers.count(BB) &&
"Invalid block");
1251 BlockNumbering &BN = BlocksNumbers[BB];
1253 int MaxIdx = BN.getIndex(cast<Instruction>(VL[0]));
1254 for (
unsigned i = 1, e = VL.
size(); i < e; ++i)
1255 MaxIdx = std::max(MaxIdx, BN.getIndex(cast<Instruction>(VL[i])));
1257 assert(I &&
"bad location");
1266 Builder.SetInsertPoint(VL0->
getParent(), NextInst);
1267 Builder.SetCurrentDebugLocation(VL0->
getDebugLoc());
1274 Vec = Builder.CreateInsertElement(Vec, VL[i], Builder.getInt32(i));
1275 if (
Instruction *Insrt = dyn_cast<Instruction>(Vec)) {
1276 GatherSeq.insert(Insrt);
1279 if (ScalarToTreeEntry.count(VL[i])) {
1280 int Idx = ScalarToTreeEntry[VL[i]];
1281 TreeEntry *E = &VectorizableTree[Idx];
1284 for (
unsigned Lane = 0,
LE = VL.size(); Lane !=
LE; ++Lane) {
1286 if (E->Scalars[Lane] == VL[i]) {
1291 assert(FoundLane >= 0 &&
"Could not find the correct lane");
1292 ExternalUses.push_back(ExternalUser(VL[i], Insrt, FoundLane));
1302 = ScalarToTreeEntry.find(VL[0]);
1303 if (Entry != ScalarToTreeEntry.
end()) {
1304 int Idx = Entry->second;
1305 const TreeEntry *En = &VectorizableTree[Idx];
1306 if (En->isSame(VL) && En->VectorizedValue)
1307 return En->VectorizedValue;
1313 if (ScalarToTreeEntry.count(VL[0])) {
1314 int Idx = ScalarToTreeEntry[VL[0]];
1315 TreeEntry *E = &VectorizableTree[Idx];
1317 return vectorizeTree(E);
1320 Type *ScalarTy = VL[0]->getType();
1321 if (
StoreInst *SI = dyn_cast<StoreInst>(VL[0]))
1322 ScalarTy = SI->getValueOperand()->getType();
1325 return Gather(VL, VecTy);
1328 Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
1331 if (E->VectorizedValue) {
1332 DEBUG(
dbgs() <<
"SLP: Diamond merged for " << *E->Scalars[0] <<
".\n");
1333 return E->VectorizedValue;
1336 Instruction *VL0 = cast<Instruction>(E->Scalars[0]);
1338 if (
StoreInst *SI = dyn_cast<StoreInst>(VL0))
1339 ScalarTy = SI->getValueOperand()->getType();
1342 if (E->NeedToGather) {
1343 setInsertPointAfterBundle(E->Scalars);
1344 return Gather(E->Scalars, VecTy);
1348 assert(Opcode == getSameOpcode(E->Scalars) &&
"Invalid opcode");
1354 Builder.SetCurrentDebugLocation(PH->
getDebugLoc());
1356 E->VectorizedValue = NewPhi;
1366 if (!VisitedBBs.
insert(IBB)) {
1372 for (
unsigned j = 0; j < E->Scalars.size(); ++j)
1373 Operands.push_back(cast<PHINode>(E->Scalars[j])->
1374 getIncomingValueForBlock(IBB));
1377 Builder.SetCurrentDebugLocation(PH->
getDebugLoc());
1378 Value *Vec = vectorizeTree(Operands);
1383 "Invalid number of incoming values");
1388 if (CanReuseExtract(E->Scalars)) {
1390 E->VectorizedValue = V;
1393 return Gather(E->Scalars, VecTy);
1395 case Instruction::ZExt:
1396 case Instruction::SExt:
1397 case Instruction::FPToUI:
1398 case Instruction::FPToSI:
1399 case Instruction::FPExt:
1400 case Instruction::PtrToInt:
1401 case Instruction::IntToPtr:
1402 case Instruction::SIToFP:
1403 case Instruction::UIToFP:
1404 case Instruction::Trunc:
1405 case Instruction::FPTrunc:
1406 case Instruction::BitCast: {
1408 for (
int i = 0, e = E->Scalars.size(); i < e; ++i)
1409 INVL.push_back(cast<Instruction>(E->Scalars[i])->getOperand(0));
1411 setInsertPointAfterBundle(E->Scalars);
1413 Value *InVec = vectorizeTree(INVL);
1415 if (
Value *V = alreadyVectorized(E->Scalars))
1420 E->VectorizedValue = V;
1423 case Instruction::FCmp:
1424 case Instruction::ICmp: {
1425 ValueList LHSV, RHSV;
1426 for (
int i = 0, e = E->Scalars.size(); i < e; ++i) {
1427 LHSV.push_back(cast<Instruction>(E->Scalars[i])->getOperand(0));
1428 RHSV.push_back(cast<Instruction>(E->Scalars[i])->getOperand(1));
1431 setInsertPointAfterBundle(E->Scalars);
1433 Value *L = vectorizeTree(LHSV);
1434 Value *R = vectorizeTree(RHSV);
1436 if (
Value *V = alreadyVectorized(E->Scalars))
1441 if (Opcode == Instruction::FCmp)
1442 V = Builder.CreateFCmp(P0, L, R);
1444 V = Builder.CreateICmp(P0, L, R);
1446 E->VectorizedValue = V;
1450 ValueList TrueVec, FalseVec, CondVec;
1451 for (
int i = 0, e = E->Scalars.size(); i < e; ++i) {
1452 CondVec.push_back(cast<Instruction>(E->Scalars[i])->getOperand(0));
1453 TrueVec.push_back(cast<Instruction>(E->Scalars[i])->getOperand(1));
1454 FalseVec.push_back(cast<Instruction>(E->Scalars[i])->getOperand(2));
1457 setInsertPointAfterBundle(E->Scalars);
1459 Value *Cond = vectorizeTree(CondVec);
1460 Value *True = vectorizeTree(TrueVec);
1461 Value *False = vectorizeTree(FalseVec);
1463 if (
Value *V = alreadyVectorized(E->Scalars))
1466 Value *V = Builder.CreateSelect(Cond, True, False);
1467 E->VectorizedValue = V;
1470 case Instruction::Add:
1471 case Instruction::FAdd:
1472 case Instruction::Sub:
1473 case Instruction::FSub:
1474 case Instruction::Mul:
1475 case Instruction::FMul:
1476 case Instruction::UDiv:
1477 case Instruction::SDiv:
1478 case Instruction::FDiv:
1479 case Instruction::URem:
1480 case Instruction::SRem:
1481 case Instruction::FRem:
1482 case Instruction::Shl:
1483 case Instruction::LShr:
1484 case Instruction::AShr:
1488 ValueList LHSVL, RHSVL;
1490 reorderInputsAccordingToOpcode(E->Scalars, LHSVL, RHSVL);
1492 for (
int i = 0, e = E->Scalars.size(); i < e; ++i) {
1493 LHSVL.push_back(cast<Instruction>(E->Scalars[i])->getOperand(0));
1494 RHSVL.push_back(cast<Instruction>(E->Scalars[i])->getOperand(1));
1497 setInsertPointAfterBundle(E->Scalars);
1499 Value *LHS = vectorizeTree(LHSVL);
1500 Value *RHS = vectorizeTree(RHSVL);
1502 if (LHS == RHS && isa<Instruction>(LHS)) {
1506 if (
Value *V = alreadyVectorized(E->Scalars))
1511 E->VectorizedValue = V;
1514 return propagateMetadata(I, E->Scalars);
1521 setInsertPointAfterBundle(E->Scalars);
1529 LI = Builder.CreateLoad(VecPtr);
1531 E->VectorizedValue =
LI;
1532 return propagateMetadata(LI, E->Scalars);
1540 for (
int i = 0, e = E->Scalars.size(); i < e; ++i)
1541 ValueOp.push_back(cast<StoreInst>(E->Scalars[i])->getValueOperand());
1543 setInsertPointAfterBundle(E->Scalars);
1545 Value *VecValue = vectorizeTree(ValueOp);
1548 StoreInst *S = Builder.CreateStore(VecValue, VecPtr);
1550 E->VectorizedValue = S;
1551 return propagateMetadata(S, E->Scalars);
1559 Value *BoUpSLP::vectorizeTree() {
1560 Builder.SetInsertPoint(
F->getEntryBlock().begin());
1561 vectorizeTree(&VectorizableTree[0]);
1563 DEBUG(
dbgs() <<
"SLP: Extracting " << ExternalUses.size() <<
" values .\n");
1566 for (UserList::iterator it = ExternalUses.begin(), e = ExternalUses.end();
1568 Value *Scalar = it->Scalar;
1576 assert(ScalarToTreeEntry.count(Scalar) &&
"Invalid scalar");
1578 int Idx = ScalarToTreeEntry[
Scalar];
1579 TreeEntry *E = &VectorizableTree[Idx];
1580 assert(!E->NeedToGather &&
"Extracting from a gather list");
1582 Value *Vec = E->VectorizedValue;
1583 assert(Vec &&
"Can't find vectorizable value");
1585 Value *Lane = Builder.getInt32(it->Lane);
1588 if (
PHINode *PN = dyn_cast<PHINode>(Vec)) {
1589 Builder.SetInsertPoint(PN->getParent()->getFirstInsertionPt());
1590 Value *Ex = Builder.CreateExtractElement(Vec, Lane);
1592 }
else if (isa<Instruction>(Vec)){
1593 if (
PHINode *PH = dyn_cast<PHINode>(User)) {
1597 Value *Ex = Builder.CreateExtractElement(Vec, Lane);
1602 Builder.SetInsertPoint(cast<Instruction>(User));
1603 Value *Ex = Builder.CreateExtractElement(Vec, Lane);
1607 Builder.SetInsertPoint(
F->getEntryBlock().begin());
1608 Value *Ex = Builder.CreateExtractElement(Vec, Lane);
1612 DEBUG(
dbgs() <<
"SLP: Replaced:" << *User <<
".\n");
1616 for (
int EIdx = 0, EE = VectorizableTree.size(); EIdx < EE; ++EIdx) {
1617 TreeEntry *Entry = &VectorizableTree[EIdx];
1620 for (
int Lane = 0,
LE = Entry->Scalars.size(); Lane !=
LE; ++Lane) {
1621 Value *Scalar = Entry->Scalars[Lane];
1624 if (Entry->NeedToGather)
1627 assert(Entry->VectorizedValue &&
"Can't find vectorizable value");
1632 UE = Scalar->
use_end(); User != UE; ++User) {
1633 DEBUG(
dbgs() <<
"SLP: \tvalidating user:" << **User <<
".\n");
1634 assert(!MustGather.count(*User) &&
1635 "Replacing gathered value with undef");
1637 assert((ScalarToTreeEntry.count(*User) ||
1639 (RdxOps && RdxOps->count(*User))) &&
1640 "Replacing out-of-tree value with undef");
1645 DEBUG(
dbgs() <<
"SLP: \tErasing scalar:" << *Scalar <<
".\n");
1646 cast<Instruction>(
Scalar)->eraseFromParent();
1651 BlocksNumbers[it].forget();
1653 Builder.ClearInsertionPoint();
1655 return VectorizableTree[0].VectorizedValue;
1668 void BoUpSLP::optimizeGatherSequence() {
1669 DEBUG(
dbgs() <<
"SLP: Optimizing " << GatherSeq.size()
1670 <<
" gather sequences instructions.\n");
1676 e = GatherSeq.end(); it != e; ++it) {
1682 if (VisitedBBs.insert(Insert->
getParent()))
1700 if (CurrVec && L->
contains(CurrVec))
1702 if (NewElem && L->
contains(NewElem))
1711 std::stable_sort(CSEWorkList.
begin(), CSEWorkList.
end(), DTCmp(DT));
1718 E = CSEWorkList.
end();
1720 assert((I == CSEWorkList.
begin() || !DT->dominates(*I, *
llvm::prior(I))) &&
1721 "Worklist not sorted properly!");
1726 if ((!isa<InsertElementInst>(In) && !isa<ExtractElementInst>(In)) ||
1727 !GatherSeq.count(In))
1736 DT->dominates((*v)->getParent(), In->
getParent())) {
1744 assert(std::find(Visited.begin(), Visited.end(),
In) == Visited.end());
1745 Visited.push_back(In);
1770 virtual bool runOnFunction(
Function &
F) {
1771 SE = &getAnalysis<ScalarEvolution>();
1772 DL = getAnalysisIfAvailable<DataLayout>();
1773 TTI = &getAnalysis<TargetTransformInfo>();
1774 AA = &getAnalysis<AliasAnalysis>();
1775 LI = &getAnalysis<LoopInfo>();
1776 DT = &getAnalysis<DominatorTree>();
1779 bool Changed =
false;
1783 if (!TTI->getNumberOfRegisters(
true))
1799 BoUpSLP R(&F, SE, DL, TTI, AA, LI, DT);
1807 if (
unsigned count = collectStores(BB, R)) {
1809 DEBUG(
dbgs() <<
"SLP: Found " << count <<
" stores to vectorize.\n");
1810 Changed |= vectorizeStoreChains(R);
1814 Changed |= vectorizeChainsInBlock(BB, R);
1818 R.optimizeGatherSequence();
1843 unsigned collectStores(
BasicBlock *BB, BoUpSLP &R);
1846 bool tryToVectorizePair(
Value *A,
Value *B, BoUpSLP &R);
1856 bool vectorizeStoreChains(BoUpSLP &R);
1860 bool vectorizeChainsInBlock(
BasicBlock *BB, BoUpSLP &R);
1868 StoreListMap StoreRefs;
1877 unsigned SliceBegin,
1878 unsigned SliceSize) {
1879 for (
unsigned i = SliceBegin; i < SliceBegin + SliceSize; ++i)
1887 int CostThreshold, BoUpSLP &R) {
1888 unsigned ChainLen = Chain.
size();
1889 DEBUG(
dbgs() <<
"SLP: Analyzing a store chain of length " << ChainLen
1891 Type *StoreTy = cast<StoreInst>(Chain[0])->getValueOperand()->getType();
1892 unsigned Sz = DL->getTypeSizeInBits(StoreTy);
1893 unsigned VF = MinVecRegSize / Sz;
1901 bool Changed =
false;
1903 for (
unsigned i = 0, e = ChainLen; i < e; ++i) {
1908 if (hasValueBeenRAUWed(Chain, TrackValues, i, VF))
1911 DEBUG(
dbgs() <<
"SLP: Analyzing " << VF <<
" stores at offset " << i
1915 R.buildTree(Operands);
1917 int Cost = R.getTreeCost();
1919 DEBUG(
dbgs() <<
"SLP: Found cost=" << Cost <<
" for VF=" << VF <<
"\n");
1920 if (Cost < CostThreshold) {
1921 DEBUG(
dbgs() <<
"SLP: Decided to vectorize cost=" << Cost <<
"\n");
1934 int costThreshold, BoUpSLP &R) {
1941 bool Changed =
false;
1945 for (
unsigned i = 0, e = Stores.
size(); i < e; ++i) {
1946 for (
unsigned j = 0; j < e; ++j) {
1950 if (R.isConsecutiveAccess(Stores[i], Stores[j])) {
1953 ConsecutiveChain[Stores[i]] = Stores[j];
1961 if (Tails.
count(*it))
1970 if (VectorizedStores.
count(I))
1974 I = ConsecutiveChain[
I];
1977 bool Vectorized = vectorizeStoreChain(Operands, costThreshold, R);
1982 Changed |= Vectorized;
1989 unsigned SLPVectorizer::collectStores(
BasicBlock *BB, BoUpSLP &R) {
2010 StoreRefs[Ptr].push_back(SI);
2016 bool SLPVectorizer::tryToVectorizePair(
Value *A,
Value *B, BoUpSLP &R) {
2020 return tryToVectorizeList(VL, R);
2027 DEBUG(
dbgs() <<
"SLP: Vectorizing a list of length = " << VL.
size() <<
".\n");
2036 Type *Ty0 = I0->getType();
2037 unsigned Sz = DL->getTypeSizeInBits(Ty0);
2038 unsigned VF = MinVecRegSize / Sz;
2040 for (
int i = 0, e = VL.
size(); i < e; ++i) {
2041 Type *Ty = VL[i]->getType();
2045 if (!Inst || Inst->
getOpcode() != Opcode0)
2049 bool Changed =
false;
2054 for (
unsigned i = 0, e = VL.
size(); i < e; ++i) {
2055 unsigned OpsWidth = 0;
2066 if (hasValueBeenRAUWed(VL, TrackValues, i, OpsWidth))
2069 DEBUG(
dbgs() <<
"SLP: Analyzing " << OpsWidth <<
" operations "
2074 int Cost = R.getTreeCost();
2077 DEBUG(
dbgs() <<
"SLP: Vectorizing pair at cost:" << Cost <<
".\n");
2089 bool SLPVectorizer::tryToVectorize(
BinaryOperator *V, BoUpSLP &R) {
2100 if (B && B->hasOneUse()) {
2103 if (tryToVectorizePair(A, B0, R)) {
2107 if (tryToVectorizePair(A, B1, R)) {
2117 if (tryToVectorizePair(A0, B, R)) {
2121 if (tryToVectorizePair(A1, B, R)) {
2139 static Value *createRdxShuffleMask(
unsigned VecLen,
unsigned NumEltsToRdx,
2140 bool IsPairwise,
bool IsLeft,
2142 assert((IsPairwise || !IsLeft) &&
"Don't support a <0,1,undef,...> mask");
2149 for (
unsigned i = 0; i != NumEltsToRdx; ++i)
2150 ShuffleMask[i] = Builder.
getInt32(2 * i + !IsLeft);
2153 for (
unsigned i = 0; i != NumEltsToRdx; ++i)
2154 ShuffleMask[i] = Builder.
getInt32(NumEltsToRdx + i);
2187 class HorizontalReduction {
2195 unsigned ReductionOpcode;
2197 unsigned ReducedValueOpcode;
2199 unsigned ReduxWidth;
2202 bool IsPairwiseReduction;
2205 HorizontalReduction()
2206 : ReductionRoot(0), ReductionPHI(0), ReductionOpcode(0),
2207 ReducedValueOpcode(0), ReduxWidth(0), IsPairwiseReduction(
false) {}
2214 "Thi phi needs to use the binary operator");
2237 ReducedValueOpcode = 0;
2246 if (ReductionOpcode != Instruction::Add &&
2247 ReductionOpcode != Instruction::FAdd)
2254 while (!Stack.
empty()) {
2256 unsigned EdgeToVist = Stack.
back().second++;
2257 bool IsReducedValue = TreeN->
getOpcode() != ReductionOpcode;
2269 if (EdgeToVist == 2 || IsReducedValue) {
2270 if (IsReducedValue) {
2273 if (!ReducedValueOpcode)
2274 ReducedValueOpcode = TreeN->
getOpcode();
2275 else if (ReducedValueOpcode != TreeN->
getOpcode())
2277 ReducedVals.push_back(TreeN);
2282 ReductionOps.insert(TreeN);
2293 Stack.
push_back(std::make_pair(Next, 0));
2294 else if (NextV != Phi)
2303 if (ReducedVals.empty())
2306 unsigned NumReducedVals = ReducedVals.size();
2307 if (NumReducedVals < ReduxWidth)
2310 Value *VectorizedTree = 0;
2317 for (; i < NumReducedVals - ReduxWidth + 1; i += ReduxWidth) {
2319 V.buildTree(ValsToReduce, &ReductionOps);
2322 int Cost = V.getTreeCost() + getReductionCost(TTI, ReducedVals[i]);
2326 DEBUG(
dbgs() <<
"SLP: Vectorizing horizontal reduction at cost:" << Cost
2330 DebugLoc Loc = cast<Instruction>(ReducedVals[i])->getDebugLoc();
2331 Value *VectorizedRoot = V.vectorizeTree();
2334 Value *ReducedSubTree = emitReduction(VectorizedRoot, Builder);
2335 if (VectorizedTree) {
2337 VectorizedTree = createBinOp(Builder, ReductionOpcode, VectorizedTree,
2338 ReducedSubTree,
"bin.rdx");
2340 VectorizedTree = ReducedSubTree;
2343 if (VectorizedTree) {
2345 for (; i < NumReducedVals; ++i) {
2347 cast<Instruction>(ReducedVals[i])->getDebugLoc());
2348 VectorizedTree = createBinOp(Builder, ReductionOpcode, VectorizedTree,
2353 assert(ReductionRoot != NULL &&
"Need a reduction operation");
2354 ReductionRoot->setOperand(0, VectorizedTree);
2355 ReductionRoot->setOperand(1, ReductionPHI);
2357 ReductionRoot->replaceAllUsesWith(VectorizedTree);
2359 return VectorizedTree != 0;
2370 int SplittingRdxCost = TTI->
getReductionCost(ReductionOpcode, VecTy,
false);
2372 IsPairwiseReduction = PairwiseRdxCost < SplittingRdxCost;
2373 int VecReduxCost = IsPairwiseReduction ? PairwiseRdxCost : SplittingRdxCost;
2375 int ScalarReduxCost =
2378 DEBUG(
dbgs() <<
"SLP: Adding cost " << VecReduxCost - ScalarReduxCost
2379 <<
" for reduction that starts with " << *FirstReducedVal
2381 << (IsPairwiseReduction ?
"pairwise" :
"splitting")
2382 <<
" reduction)\n");
2384 return VecReduxCost - ScalarReduxCost;
2389 if (Opcode == Instruction::FAdd)
2396 assert(VectorizedValue &&
"Need to have a vectorized tree node");
2399 "We only handle power-of-two reductions for now");
2401 Value *TmpVec = ValToReduce;
2402 for (
unsigned i = ReduxWidth / 2; i != 0; i >>= 1) {
2403 if (IsPairwiseReduction) {
2405 createRdxShuffleMask(ReduxWidth, i,
true,
true, Builder);
2407 createRdxShuffleMask(ReduxWidth, i,
true,
false, Builder);
2414 TmpVec = createBinOp(Builder, ReductionOpcode, LeftShuf, RightShuf,
2418 createRdxShuffleMask(ReduxWidth, i,
false,
false, Builder);
2421 TmpVec = createBinOp(Builder, ReductionOpcode, TmpVec, Shuf,
"bin.rdx");
2468 bool SLPVectorizer::vectorizeChainsInBlock(
BasicBlock *BB, BoUpSLP &R) {
2469 bool Changed =
false;
2473 bool HaveVectorizedPhiNodes =
true;
2474 while (HaveVectorizedPhiNodes) {
2475 HaveVectorizedPhiNodes =
false;
2485 if (!VisitedInstrs.
count(P))
2490 std::stable_sort(Incoming.
begin(), Incoming.
end(), PhiTypeSorterFunc);
2499 while (SameTypeIt != E &&
2500 (*SameTypeIt)->getType() == (*IncIt)->getType()) {
2501 VisitedInstrs.
insert(*SameTypeIt);
2506 unsigned NumElts = (SameTypeIt - IncIt);
2507 DEBUG(
errs() <<
"SLP: Trying to vectorize starting at PHIs (" << NumElts <<
")\n");
2511 HaveVectorizedPhiNodes =
true;
2521 VisitedInstrs.
clear();
2525 if (!VisitedInstrs.
insert(it))
2528 if (isa<DbgInfoIntrinsic>(it))
2532 if (
PHINode *P = dyn_cast<PHINode>(it)) {
2546 HorizontalReduction HorRdx;
2548 HorRdx.matchAssociativeReduction(P, BI, DL) &&
2549 HorRdx.tryToReduce(R, TTI)) {
2560 if (tryToVectorize(dyn_cast<BinaryOperator>(Inst), R)) {
2574 if (
StoreInst *SI = dyn_cast<StoreInst>(it))
2577 HorizontalReduction HorRdx;
2578 if (((HorRdx.matchAssociativeReduction(0, BinOp, DL) &&
2579 HorRdx.tryToReduce(R, TTI)) ||
2580 tryToVectorize(BinOp, R))) {
2589 if (
CmpInst *CI = dyn_cast<CmpInst>(it)) {
2599 for (
int i = 0; i < 2; ++i) {
2616 if (!findBuildVector(IE, Ops))
2619 if (tryToVectorizeList(Ops, R)) {
2632 bool SLPVectorizer::vectorizeStoreChains(BoUpSLP &R) {
2633 bool Changed =
false;
2635 for (StoreListMap::iterator it = StoreRefs.begin(), e = StoreRefs.end();
2637 if (it->second.size() < 2)
2640 DEBUG(
dbgs() <<
"SLP: Analyzing a store chain of length "
2641 << it->second.size() <<
".\n");
2644 for (
unsigned CI = 0, CE = it->second.size(); CI < CE; CI+=16) {
2645 unsigned Len = std::min<unsigned>(CE - CI, 16);
Value * getValueOperand()
void push_back(const T &Elt)
Instruction::CastOps getOpcode() const
Return the opcode of this CastInst.
Abstract base class of comparison instructions.
AnalysisUsage & addPreserved()
static IntegerType * getInt1Ty(LLVMContext &C)
Value * CreateFAdd(Value *LHS, Value *RHS, const Twine &Name="", MDNode *FPMathTag=0)
void addIncoming(Value *V, BasicBlock *BB)
static PassRegistry * getPassRegistry()
void SetCurrentDebugLocation(const DebugLoc &L)
Set location information used by debugging information.
enable_if_c<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
unsigned getNumOperands() const
virtual void getAnalysisUsage(AnalysisUsage &) const
bool properlyDominates(const DomTreeNode *A, const DomTreeNode *B) const
static cl::opt< int > SLPCostThreshold("slp-threshold", cl::init(0), cl::Hidden, cl::desc("Only vectorize if you gain more than this ""number "))
MDNode - a tuple of other values.
FunctionType * getType(LLVMContext &Context, ID id, ArrayRef< Type * > Tys=None)
Value * CreateShuffleVector(Value *V1, Value *V2, Value *Mask, const Twine &Name="")
iterator end()
Get an iterator to the end of the SetVector.
LoopInfoBase< BlockT, LoopT > * LI
StringRef getName() const
void getAllMetadataOtherThanDebugLoc(SmallVectorImpl< std::pair< unsigned, MDNode * > > &MDs) const
IntegerType * getInt32Ty()
Fetch the type representing a 32-bit integer.
Value * GetUnderlyingObject(Value *V, const DataLayout *TD=0, unsigned MaxLookup=6)
AnalysisUsage & addRequired()
#define INITIALIZE_PASS_DEPENDENCY(depName)
static Value * getPointerOperand(Instruction &Inst)
bool isIdenticalTo(const Instruction *I) const
Base class of casting instructions.
#define llvm_unreachable(msg)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Instruction * getFirstNonPHI()
Returns a pointer to the first instruction in this block that is not a PHINode instruction.
static Constant * get(ArrayRef< Constant * > V)
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
po_iterator< T > po_begin(T G)
static ConstantInt * ExtractElement(Constant *V, Constant *Idx)
ID
LLVM Calling Convention Representation.
Value * stripAndAccumulateInBoundsConstantOffsets(const DataLayout &DL, APInt &Offset)
Strips like stripInBoundsConstantOffsets but also accumulates the constant offset stripped...
uint64_t getZExtValue() const
Return the zero extended value.
unsigned getPointerAddressSpace() const
Returns the address space of the pointer operand.
bool insert(const value_type &X)
Insert a new element into the SetVector.
bool count(PtrType Ptr) const
count - Return true if the specified pointer is in the set.
void SetFastMathFlags(FastMathFlags NewFMF)
Set the fast-math flags to be used with generated fp-math operators.
bool LLVM_ATTRIBUTE_UNUSED_RESULT empty() const
bool isAssociative() const
ArrayRef< T > slice(unsigned N) const
slice(n) - Chop off the first N elements of the array.
iterator begin()
Get an iterator to the beginning of the SetVector.
void replaceAllUsesWith(Value *V)
unsigned getNumElements() const
Return the number of elements in the Vector type.
size_t size() const
size - Get the array size.
unsigned getNumIncomingValues() const
void replaceUsesOfWith(Value *From, Value *To)
initializer< Ty > init(const Ty &Val)
static cl::opt< bool > ShouldVectorizeHor("slp-vectorize-hor", cl::init(false), cl::Hidden, cl::desc("Attempt to vectorize horizontal reductions"))
Value * CreateBinOp(Instruction::BinaryOps Opc, Value *LHS, Value *RHS, const Twine &Name="")
unsigned getAlignment() const
static cl::opt< bool > ShouldStartVectorizeHorAtStore("slp-vectorize-hor-store", cl::init(false), cl::Hidden, cl::desc("Attempt to vectorize horizontal reductions feeding into a store"))
BlockT * getLoopPreheader() const
LLVM Basic Block Representation.
APInt Or(const APInt &LHS, const APInt &RHS)
Bitwise OR function for APInt.
APInt Xor(const APInt &LHS, const APInt &RHS)
Bitwise XOR function for APInt.
const DebugLoc & getDebugLoc() const
getDebugLoc - Return the debug location for this node as a DebugLoc.
bool verifyFunction(const Function &F, VerifierFailureAction action=AbortProcessAction)
BasicBlock * getIncomingBlock(unsigned i) const
bool contains(const LoopT *L) const
Value * getOperand(unsigned i) const
Value * getPointerOperand()
bool isCommutative() const
Predicate getPredicate() const
Return the predicate for this instruction.
Location - A description of a memory location.
void setAlignment(unsigned Align)
#define INITIALIZE_AG_DEPENDENCY(depName)
static MDNode * getMostGenericTBAA(MDNode *A, MDNode *B)
Methods for metadata merging.
static UndefValue * get(Type *T)
PointerType * getPointerTo(unsigned AddrSpace=0)
void setMetadata(unsigned KindID, MDNode *Node)
bool mayWriteToMemory() const
BinaryOps getOpcode() const
Class for constant integers.
Value * CreateExtractElement(Value *Vec, Value *Idx, const Twine &Name="")
Value * getIncomingValue(unsigned i) const
unsigned getVectorNumElements() const
MDNode * getMetadata(unsigned KindID) const
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
ConstantInt * getInt32(uint32_t C)
Get a constant 32-bit value.
const BasicBlock & getEntryBlock() const
unsigned getPointerAddressSpace() const
Returns the address space of the pointer operand.
void setOperand(unsigned i, Value *Val)
raw_ostream & dbgs()
dbgs - Return a circular-buffered debug stream.
Class for arbitrary precision integers.
Value * getIncomingValueForBlock(const BasicBlock *BB) const
APInt And(const APInt &LHS, const APInt &RHS)
Bitwise AND function for APInt.
bool count(const T &V) const
count - Return true if the element is in the set.
void initializeSLPVectorizerPass(PassRegistry &)
size_type count(const key_type &key) const
Count the number of elements of a given key in the SetVector.
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
bool isAggregateType() const
po_iterator< T > po_end(T G)
unsigned getAlignment() const
TerminatorInst * getTerminator()
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
User(Type *ty, unsigned vty, Use *OpList, unsigned NumOps)
const Value * Ptr
Ptr - The address of the start of the location.
Pass * createSLPVectorizerPass()
static const char lv_name[]
LLVM Value Representation.
void setAlignment(unsigned Align)
vector_type::const_iterator iterator
unsigned getOpcode() const
getOpcode() returns a member of one of the enums like Instruction::Add.
A vector that has set insertion semantics.
static VectorType * get(Type *ElementType, unsigned NumElements)
Disable implicit floating point insts.
static const Function * getParent(const Value *V)
void moveBefore(Instruction *MovePos)
uint64_t getTypeSizeInBits(Type *Ty) const
ItTy prior(ItTy it, Dist n)
bool isPowerOf2_32(uint32_t Value)
Convenience struct for specifying and reasoning about fast-math flags.
static MDNode * getMostGenericFPMath(MDNode *A, MDNode *B)
Value * getPointerOperand()
static RegisterPass< NVPTXAllocaHoisting > X("alloca-hoisting","Hoisting alloca instructions in non-entry ""blocks to the entry block")
const BasicBlock * getParent() const
bool isVoidTy() const
isVoidTy - Return true if this is 'void'.