17 #define BBV_NAME "bb-vectorize"
18 #define DEBUG_TYPE BBV_NAME
59 cl::desc(
"The required chain depth for vectorization"));
64 " target information"));
68 cl::desc(
"The maximum search distance for instruction pairs"));
72 cl::desc(
"Replicating one element to a pair breaks the chain"));
76 cl::desc(
"The size of the native vector registers"));
80 cl::desc(
"The maximum number of pairing iterations"));
84 cl::desc(
"Don't try to form non-2^n-length vectors"));
88 cl::desc(
"The maximum number of pairable instructions per group"));
92 cl::desc(
"The maximum number of candidate instruction pairs per group"));
97 " a full cycle check"));
101 cl::desc(
"Don't try to vectorize boolean (i1) values"));
105 cl::desc(
"Don't try to vectorize integer values"));
109 cl::desc(
"Don't try to vectorize floating-point values"));
114 cl::desc(
"Don't try to vectorize pointer values"));
118 cl::desc(
"Don't try to vectorize casting (conversion) operations"));
122 cl::desc(
"Don't try to vectorize floating-point math intrinsics"));
126 cl::desc(
"Don't try to vectorize the fused-multiply-add intrinsic"));
130 cl::desc(
"Don't try to vectorize select instructions"));
134 cl::desc(
"Don't try to vectorize comparison instructions"));
138 cl::desc(
"Don't try to vectorize getelementptr instructions"));
142 cl::desc(
"Don't try to vectorize loads and stores"));
146 cl::desc(
"Only generate aligned loads and stores"));
151 cl::desc(
"Don't boost the chain-depth contribution of loads and stores"));
155 cl::desc(
"Use a fast instruction dependency analysis"));
161 cl::desc(
"When debugging is enabled, output information on the"
162 " instruction-examination process"));
166 cl::desc(
"When debugging is enabled, output information on the"
167 " candidate-selection process"));
171 cl::desc(
"When debugging is enabled, output information on the"
172 " pair-selection process"));
176 cl::desc(
"When debugging is enabled, output information on the"
177 " cycle-checking process"));
182 cl::desc(
"When debugging is enabled, dump the basic block after"
183 " every pair is fused"));
186 STATISTIC(NumFusedOps,
"Number of operations fused by bb-vectorize");
208 typedef std::pair<Value *, Value *> ValuePair;
209 typedef std::pair<ValuePair, int> ValuePairWithCost;
210 typedef std::pair<ValuePair, size_t> ValuePairWithDepth;
211 typedef std::pair<ValuePair, ValuePair> VPPair;
212 typedef std::pair<VPPair, unsigned> VPPairWithType;
222 bool vectorizePairs(
BasicBlock &BB,
bool NonPow2Len =
false);
229 std::vector<Value *> &PairableInsts,
bool NonPow2Len);
234 enum PairConnectionType {
235 PairConnectionDirect,
240 void computeConnectedPairs(
243 std::vector<Value *> &PairableInsts,
244 DenseMap<ValuePair, std::vector<ValuePair> > &ConnectedPairs,
249 std::vector<Value *> &PairableInsts,
252 void choosePairs(
DenseMap<
Value *, std::vector<Value *> > &CandidatePairs,
255 std::vector<Value *> &PairableInsts,
258 DenseMap<ValuePair, std::vector<ValuePair> > &ConnectedPairs,
259 DenseMap<ValuePair, std::vector<ValuePair> > &ConnectedPairDeps,
264 std::vector<Value *> &PairableInsts,
268 DenseMap<ValuePair, std::vector<ValuePair> > &ConnectedPairs,
269 DenseMap<ValuePair, std::vector<ValuePair> > &ConnectedPairDeps);
272 bool isInstVectorizable(
Instruction *
I,
bool &IsSimpleLoadStore);
275 bool IsSimpleLoadStore,
bool NonPow2Len,
276 int &CostSavings,
int &FixedOrder);
283 void computePairsConnectedTo(
286 std::vector<Value *> &PairableInsts,
287 DenseMap<ValuePair, std::vector<ValuePair> > &ConnectedPairs,
291 bool pairsConflict(ValuePair
P, ValuePair Q,
293 DenseMap<ValuePair, std::vector<ValuePair> >
294 *PairableInstUserMap = 0,
297 bool pairWillFormCycle(ValuePair
P,
298 DenseMap<ValuePair, std::vector<ValuePair> > &PairableInstUsers,
303 std::vector<Value *> &PairableInsts,
304 DenseMap<ValuePair, std::vector<ValuePair> > &ConnectedPairs,
306 DenseMap<ValuePair, std::vector<ValuePair> > &PairableInstUserMap,
313 void buildInitialDAGFor(
316 std::vector<Value *> &PairableInsts,
317 DenseMap<ValuePair, std::vector<ValuePair> > &ConnectedPairs,
326 std::vector<Value *> &PairableInsts,
329 DenseMap<ValuePair, std::vector<ValuePair> > &ConnectedPairs,
330 DenseMap<ValuePair, std::vector<ValuePair> > &ConnectedPairDeps,
332 DenseMap<ValuePair, std::vector<ValuePair> > &PairableInstUserMap,
336 int &BestEffSize,
Value *II, std::vector<Value *>&JJ,
343 unsigned MaskOffset,
unsigned NumInElem,
344 unsigned NumInElem1,
unsigned IdxOffset,
345 std::vector<Constant*> &Mask);
351 unsigned o,
Value *&LOp,
unsigned numElemL,
352 Type *ArgTypeL,
Type *ArgTypeR,
bool IBeforeJ,
353 unsigned IdxOff = 0);
374 std::vector<Value *> &PairableInsts,
391 if (!DT->isReachableFromEntry(&BB)) {
397 DEBUG(
if (TTI)
dbgs() <<
"BBV: using target information\n");
399 bool changed =
false;
405 (TTI || v <= Config.VectorBits) &&
406 (!Config.MaxIter || n <= Config.MaxIter);
408 DEBUG(
dbgs() <<
"BBV: fusing loop #" << n <<
409 " for " << BB.
getName() <<
" in " <<
411 if (vectorizePairs(BB))
419 for (; !Config.MaxIter || n <= Config.MaxIter; ++n) {
420 DEBUG(
dbgs() <<
"BBV: fusing for non-2^n-length vectors loop #: " <<
421 n <<
" for " << BB.
getName() <<
" in " <<
423 if (!vectorizePairs(BB,
true))
break;
431 virtual bool runOnBasicBlock(
BasicBlock &BB) {
432 AA = &getAnalysis<AliasAnalysis>();
433 DT = &getAnalysis<DominatorTree>();
434 SE = &getAnalysis<ScalarEvolution>();
435 TD = getAnalysisIfAvailable<DataLayout>();
438 return vectorizeBB(BB);
455 "Cannot form vector from incompatible scalar types");
459 if (
VectorType *VTy = dyn_cast<VectorType>(ElemTy)) {
460 numElem = VTy->getNumElements();
465 if (
VectorType *VTy = dyn_cast<VectorType>(Elem2Ty)) {
466 numElem += VTy->getNumElements();
476 if (
StoreInst *SI = dyn_cast<StoreInst>(I)) {
480 Value *IVal = SI->getValueOperand();
486 if (
CastInst *CI = dyn_cast<CastInst>(I))
491 if (
SelectInst *SI = dyn_cast<SelectInst>(I)) {
492 T2 = SI->getCondition()->getType();
494 T2 = SI->getOperand(0)->getType();
495 }
else if (
CmpInst *CI = dyn_cast<CmpInst>(I)) {
496 T2 = CI->getOperand(0)->getType();
509 inline size_t getDepthFactor(
Value *V) {
518 if (isa<InsertElementInst>(V) || isa<ExtractElementInst>(V))
523 if (!Config.NoMemOpBoost && (isa<LoadInst>(V) || isa<StoreInst>(V)))
524 return Config.ReqChainDepth/2;
531 unsigned getInstrCost(
unsigned Opcode,
Type *T1,
Type *T2) {
534 case Instruction::GetElementPtr:
539 case Instruction::Br:
540 return TTI->getCFInstrCost(Opcode);
543 case Instruction::Add:
544 case Instruction::FAdd:
545 case Instruction::Sub:
546 case Instruction::FSub:
547 case Instruction::Mul:
548 case Instruction::FMul:
549 case Instruction::UDiv:
550 case Instruction::SDiv:
551 case Instruction::FDiv:
552 case Instruction::URem:
553 case Instruction::SRem:
554 case Instruction::FRem:
555 case Instruction::Shl:
556 case Instruction::LShr:
557 case Instruction::AShr:
561 return TTI->getArithmeticInstrCost(Opcode, T1);
563 case Instruction::ICmp:
564 case Instruction::FCmp:
565 return TTI->getCmpSelInstrCost(Opcode, T1, T2);
566 case Instruction::ZExt:
567 case Instruction::SExt:
568 case Instruction::FPToUI:
569 case Instruction::FPToSI:
570 case Instruction::FPExt:
571 case Instruction::PtrToInt:
572 case Instruction::IntToPtr:
573 case Instruction::SIToFP:
574 case Instruction::UIToFP:
575 case Instruction::Trunc:
576 case Instruction::FPTrunc:
577 case Instruction::BitCast:
578 case Instruction::ShuffleVector:
579 return TTI->getCastInstrCost(Opcode, T1, T2);
591 Value *&IPtr,
Value *&JPtr,
unsigned &IAlignment,
unsigned &JAlignment,
592 unsigned &IAddressSpace,
unsigned &JAddressSpace,
593 int64_t &OffsetInElmts,
bool ComputeOffset =
true) {
597 IPtr =
LI->getPointerOperand();
599 IAlignment =
LI->getAlignment();
601 IAddressSpace =
LI->getPointerAddressSpace();
604 StoreInst *SI = cast<StoreInst>(
I), *SJ = cast<StoreInst>(J);
606 JPtr = SJ->getPointerOperand();
608 JAlignment = SJ->getAlignment();
610 JAddressSpace = SJ->getPointerAddressSpace();
616 const SCEV *IPtrSCEV = SE->getSCEV(IPtr);
617 const SCEV *JPtrSCEV = SE->getSCEV(JPtr);
622 const SCEV *OffsetSCEV = SE->getMinusSCEV(JPtrSCEV, IPtrSCEV);
624 dyn_cast<SCEVConstant>(OffsetSCEV)) {
629 int64_t VTyTSS = (int64_t)
TD->getTypeStoreSize(VTy);
632 if (VTy != VTy2 && Offset < 0) {
633 int64_t VTy2TSS = (int64_t)
TD->getTypeStoreSize(VTy2);
634 OffsetInElmts = Offset/VTy2TSS;
635 return (
abs64(Offset) % VTy2TSS) == 0;
638 OffsetInElmts = Offset/VTyTSS;
639 return (
abs64(Offset) % VTyTSS) == 0;
647 bool isVectorizableIntrinsic(
CallInst* I) {
649 if (!F)
return false;
652 if (!IID)
return false;
667 return Config.VectorizeMath;
670 return Config.VectorizeFMA;
677 if (!isa<UndefValue>(IENext->
getOperand(0)) &&
678 !isa<InsertElementInst>(IENext->
getOperand(0))) {
682 dyn_cast<InsertElementInst>(IENext->
getOperand(0))));
690 bool BBVectorize::vectorizePairs(
BasicBlock &BB,
bool NonPow2Len) {
694 std::vector<Value *> AllPairableInsts;
699 AllConnectedPairDeps;
702 std::vector<Value *> PairableInsts;
706 ShouldContinue = getCandidatePairs(BB, Start, CandidatePairs,
708 CandidatePairCostSavings,
709 PairableInsts, NonPow2Len);
710 if (PairableInsts.empty())
continue;
715 CandidatePairs.
begin(), E = CandidatePairs.
end(); I != E; ++
I)
716 for (std::vector<Value *>::iterator J = I->second.begin(),
717 JE = I->second.end(); J != JE; ++J)
718 CandidatePairsSet.
insert(ValuePair(I->first, *J));
732 computeConnectedPairs(CandidatePairs, CandidatePairsSet,
733 PairableInsts, ConnectedPairs, PairConnectionTypes);
734 if (ConnectedPairs.
empty())
continue;
736 for (
DenseMap<ValuePair, std::vector<ValuePair> >::iterator
737 I = ConnectedPairs.
begin(), IE = ConnectedPairs.
end();
739 for (std::vector<ValuePair>::iterator J = I->second.begin(),
740 JE = I->second.end(); J != JE; ++J)
741 ConnectedPairDeps[*J].push_back(I->first);
745 buildDepMap(BB, CandidatePairs, PairableInsts, PairableInstUsers);
754 choosePairs(CandidatePairs, CandidatePairsSet,
755 CandidatePairCostSavings,
756 PairableInsts, FixedOrderPairs, PairConnectionTypes,
757 ConnectedPairs, ConnectedPairDeps,
758 PairableInstUsers, ChosenPairs);
760 if (ChosenPairs.
empty())
continue;
761 AllPairableInsts.insert(AllPairableInsts.end(), PairableInsts.begin(),
762 PairableInsts.end());
769 IE = ChosenPairs.
end(); I !=
IE; ++
I) {
770 if (FixedOrderPairs.
count(*I))
771 AllFixedOrderPairs.
insert(*I);
772 else if (FixedOrderPairs.
count(ValuePair(I->second, I->first)))
773 AllFixedOrderPairs.
insert(ValuePair(I->second, I->first));
778 PairConnectionTypes.
find(VPPair(*I, *J));
779 if (K != PairConnectionTypes.
end()) {
780 AllPairConnectionTypes.
insert(*K);
782 K = PairConnectionTypes.
find(VPPair(*J, *I));
783 if (K != PairConnectionTypes.
end())
784 AllPairConnectionTypes.
insert(*K);
789 for (
DenseMap<ValuePair, std::vector<ValuePair> >::iterator
790 I = ConnectedPairs.
begin(), IE = ConnectedPairs.
end();
792 for (std::vector<ValuePair>::iterator J = I->second.begin(),
793 JE = I->second.end(); J != JE; ++J)
794 if (AllPairConnectionTypes.
count(VPPair(I->first, *J))) {
795 AllConnectedPairs[I->first].push_back(*J);
796 AllConnectedPairDeps[*J].push_back(I->first);
798 }
while (ShouldContinue);
800 if (AllChosenPairs.
empty())
return false;
801 NumFusedOps += AllChosenPairs.
size();
810 fuseChosenPairs(BB, AllPairableInsts, AllChosenPairs, AllFixedOrderPairs,
811 AllPairConnectionTypes,
812 AllConnectedPairs, AllConnectedPairDeps);
823 bool BBVectorize::isInstVectorizable(
Instruction *I,
824 bool &IsSimpleLoadStore) {
825 IsSimpleLoadStore =
false;
827 if (
CallInst *
C = dyn_cast<CallInst>(I)) {
828 if (!isVectorizableIntrinsic(
C))
830 }
else if (
LoadInst *L = dyn_cast<LoadInst>(I)) {
832 IsSimpleLoadStore = L->isSimple();
833 if (!IsSimpleLoadStore || !Config.VectorizeMemOps)
835 }
else if (
StoreInst *S = dyn_cast<StoreInst>(I)) {
837 IsSimpleLoadStore = S->isSimple();
838 if (!IsSimpleLoadStore || !Config.VectorizeMemOps)
840 }
else if (
CastInst *
C = dyn_cast<CastInst>(I)) {
842 if (!Config.VectorizeCasts)
845 Type *SrcTy =
C->getSrcTy();
849 Type *DestTy =
C->getDestTy();
852 }
else if (isa<SelectInst>(I)) {
853 if (!Config.VectorizeSelect)
855 }
else if (isa<CmpInst>(I)) {
856 if (!Config.VectorizeCmp)
859 if (!Config.VectorizeGEP)
863 if (
G->getNumIndices() != 1)
865 }
else if (!(I->
isBinaryOp() || isa<ShuffleVectorInst>(
I) ||
866 isa<ExtractElementInst>(I) || isa<InsertElementInst>(
I))) {
871 if (
TD == 0 && IsSimpleLoadStore)
875 getInstructionTypes(I, T1, T2);
883 if (!Config.VectorizeBools)
891 if (!Config.VectorizeBools)
898 if (!Config.VectorizeFloats
908 if ((!Config.VectorizePointers ||
TD == 0) &&
925 bool IsSimpleLoadStore,
bool NonPow2Len,
926 int &CostSavings,
int &FixedOrder) {
928 " <-> " << *J <<
"\n");
939 Type *IT1, *IT2, *JT1, *JT2;
940 getInstructionTypes(I, IT1, IT2);
941 getInstructionTypes(J, JT1, JT2);
942 unsigned MaxTypeBits = std::max(
945 if (!TTI && MaxTypeBits > Config.VectorBits)
950 if (IsSimpleLoadStore) {
952 unsigned IAlignment, JAlignment, IAddressSpace, JAddressSpace;
953 int64_t OffsetInElmts = 0;
954 if (getPairPtrInfo(I, J, IPtr, JPtr, IAlignment, JAlignment,
955 IAddressSpace, JAddressSpace,
956 OffsetInElmts) &&
abs64(OffsetInElmts) == 1) {
957 FixedOrder = (int) OffsetInElmts;
958 unsigned BottomAlignment = IAlignment;
959 if (OffsetInElmts < 0) BottomAlignment = JAlignment;
961 Type *aTypeI = isa<StoreInst>(
I) ?
962 cast<StoreInst>(I)->getValueOperand()->getType() : I->
getType();
963 Type *aTypeJ = isa<StoreInst>(J) ?
964 cast<StoreInst>(J)->getValueOperand()->getType() : J->
getType();
965 Type *VType = getVecTypeForPair(aTypeI, aTypeJ);
967 if (Config.AlignedOnly) {
972 unsigned VecAlignment =
TD->getPrefTypeAlignment(VType);
973 if (BottomAlignment < VecAlignment)
978 unsigned ICost = TTI->getMemoryOpCost(I->
getOpcode(), aTypeI,
979 IAlignment, IAddressSpace);
980 unsigned JCost = TTI->getMemoryOpCost(J->
getOpcode(), aTypeJ,
981 JAlignment, JAddressSpace);
982 unsigned VCost = TTI->getMemoryOpCost(I->
getOpcode(), VType,
986 ICost += TTI->getAddressComputationCost(aTypeI);
987 JCost += TTI->getAddressComputationCost(aTypeJ);
988 VCost += TTI->getAddressComputationCost(VType);
990 if (VCost > ICost + JCost)
996 unsigned VParts = TTI->getNumberOfParts(VType);
999 else if (!VParts && VCost == ICost + JCost)
1002 CostSavings = ICost + JCost - VCost;
1008 unsigned ICost = getInstrCost(I->
getOpcode(), IT1, IT2);
1009 unsigned JCost = getInstrCost(J->
getOpcode(), JT1, JT2);
1010 Type *VT1 = getVecTypeForPair(IT1, JT1),
1011 *VT2 = getVecTypeForPair(IT2, JT2);
1018 unsigned VCost = getInstrCost(I->
getOpcode(), VT1, VT2);
1020 if (VCost > ICost + JCost)
1026 unsigned VParts1 = TTI->getNumberOfParts(VT1),
1027 VParts2 = TTI->getNumberOfParts(VT2);
1028 if (VParts1 > 1 || VParts2 > 1)
1030 else if ((!VParts1 || !VParts2) && VCost == ICost + JCost)
1033 CostSavings = ICost + JCost - VCost;
1044 *A1J = cast<CallInst>(J)->getArgOperand(1);
1045 const SCEV *A1ISCEV = SE->getSCEV(A1I),
1046 *A1JSCEV = SE->getSCEV(A1J);
1047 return (A1ISCEV == A1JSCEV);
1054 unsigned ICost = TTI->getIntrinsicInstrCost(IID, IT1, Tys);
1060 unsigned JCost = TTI->getIntrinsicInstrCost(IID, JT1, Tys);
1064 "Intrinsic argument counts differ");
1073 Type *RetTy = getVecTypeForPair(IT1, JT1);
1074 unsigned VCost = TTI->getIntrinsicInstrCost(IID, RetTy, Tys);
1076 if (VCost > ICost + JCost)
1082 unsigned RetParts = TTI->getNumberOfParts(RetTy);
1085 else if (!RetParts && VCost == ICost + JCost)
1089 if (!Tys[i]->isVectorTy())
1092 unsigned NumParts = TTI->getNumberOfParts(Tys[i]);
1095 else if (!NumParts && VCost == ICost + JCost)
1099 CostSavings = ICost + JCost - VCost;
1136 if (I == V || Users.
count(V)) {
1142 if (LoadMoveSetPairs) {
1143 UsesI = LoadMoveSetPairs->
count(ValuePair(J, I));
1146 WE = WriteSet.
end(); W != WE; ++W) {
1147 if (W->aliasesUnknownInst(J, *AA)) {
1155 if (UsesI && UpdateUsers) {
1165 bool BBVectorize::getCandidatePairs(
BasicBlock &BB,
1170 std::vector<Value *> &PairableInsts,
bool NonPow2Len) {
1171 size_t TotalPairs = 0;
1173 if (Start == E)
return false;
1175 bool ShouldContinue =
false, IAfterStart =
false;
1177 if (I == Start) IAfterStart =
true;
1179 bool IsSimpleLoadStore;
1180 if (!isInstVectorizable(I, IsSimpleLoadStore))
continue;
1187 bool JAfterStart = IAfterStart;
1189 for (
unsigned ss = 0; J != E && ss <= Config.SearchLimit; ++J, ++ss) {
1190 if (J == Start) JAfterStart =
true;
1193 bool UsesI = trackUsesOfI(Users, WriteSet, I, J, !Config.FastDep);
1194 if (Config.FastDep) {
1204 if (UsesI)
continue;
1209 int CostSavings, FixedOrder;
1210 if (!areInstsCompatible(I, J, IsSimpleLoadStore, NonPow2Len,
1211 CostSavings, FixedOrder))
continue;
1214 if (!PairableInsts.size() ||
1215 PairableInsts[PairableInsts.size()-1] !=
I) {
1216 PairableInsts.push_back(I);
1219 CandidatePairs[
I].push_back(J);
1222 CandidatePairCostSavings.
insert(ValuePairWithCost(ValuePair(I, J),
1225 if (FixedOrder == 1)
1226 FixedOrderPairs.
insert(ValuePair(I, J));
1227 else if (FixedOrder == -1)
1228 FixedOrderPairs.
insert(ValuePair(J, I));
1234 IAfterStart = JAfterStart =
false;
1238 << *I <<
" <-> " << *J <<
" (cost savings: " <<
1239 CostSavings <<
")\n");
1244 if (PairableInsts.size() >= Config.MaxInsts ||
1245 TotalPairs >= Config.MaxPairs) {
1246 ShouldContinue =
true;
1255 DEBUG(
dbgs() <<
"BBV: found " << PairableInsts.size()
1256 <<
" instructions with candidate pairs\n");
1258 return ShouldContinue;
1264 void BBVectorize::computePairsConnectedTo(
1267 std::vector<Value *> &PairableInsts,
1268 DenseMap<ValuePair, std::vector<ValuePair> > &ConnectedPairs,
1276 E = P.first->use_end(); I != E; ++
I) {
1277 if (isa<LoadInst>(*I)) {
1281 }
else if ((SI = dyn_cast<StoreInst>(*I)) &&
1291 E2 = P.second->use_end(); J != E2; ++J) {
1292 if ((SJ = dyn_cast<StoreInst>(*J)) &&
1297 if (CandidatePairsSet.
count(ValuePair(*I, *J))) {
1298 VPPair VP(P, ValuePair(*I, *J));
1299 ConnectedPairs[VP.first].push_back(VP.second);
1300 PairConnectionTypes.
insert(VPPairWithType(VP, PairConnectionDirect));
1304 if (CandidatePairsSet.
count(ValuePair(*J, *I))) {
1305 VPPair VP(P, ValuePair(*J, *I));
1306 ConnectedPairs[VP.first].push_back(VP.second);
1307 PairConnectionTypes.
insert(VPPairWithType(VP, PairConnectionSwap));
1311 if (Config.SplatBreaksChain)
continue;
1315 if ((SJ = dyn_cast<StoreInst>(*J)) &&
1319 if (CandidatePairsSet.
count(ValuePair(*I, *J))) {
1320 VPPair VP(P, ValuePair(*I, *J));
1321 ConnectedPairs[VP.first].push_back(VP.second);
1322 PairConnectionTypes.
insert(VPPairWithType(VP, PairConnectionSplat));
1327 if (Config.SplatBreaksChain)
return;
1331 E = P.second->use_end(); I != E; ++
I) {
1332 if (isa<LoadInst>(*I))
1334 else if ((SI = dyn_cast<StoreInst>(*I)) &&
1339 if ((SJ = dyn_cast<StoreInst>(*J)) &&
1343 if (CandidatePairsSet.
count(ValuePair(*I, *J))) {
1344 VPPair VP(P, ValuePair(*I, *J));
1345 ConnectedPairs[VP.first].push_back(VP.second);
1346 PairConnectionTypes.
insert(VPPairWithType(VP, PairConnectionSplat));
1355 void BBVectorize::computeConnectedPairs(
1358 std::vector<Value *> &PairableInsts,
1359 DenseMap<ValuePair, std::vector<ValuePair> > &ConnectedPairs,
1361 for (std::vector<Value *>::iterator PI = PairableInsts.begin(),
1362 PE = PairableInsts.end(); PI != PE; ++PI) {
1364 CandidatePairs.
find(*PI);
1365 if (PP == CandidatePairs.
end())
1368 for (std::vector<Value *>::iterator P = PP->second.
begin(),
1369 E = PP->second.
end(); P != E; ++
P)
1370 computePairsConnectedTo(CandidatePairs, CandidatePairsSet,
1371 PairableInsts, ConnectedPairs,
1372 PairConnectionTypes, ValuePair(*PI, *P));
1375 DEBUG(
size_t TotalPairs = 0;
1376 for (
DenseMap<ValuePair, std::vector<ValuePair> >::iterator I =
1377 ConnectedPairs.
begin(), IE = ConnectedPairs.
end(); I !=
IE; ++
I)
1378 TotalPairs += I->second.size();
1379 dbgs() <<
"BBV: found " << TotalPairs
1380 <<
" pair connections.\n");
1386 void BBVectorize::buildDepMap(
1389 std::vector<Value *> &PairableInsts,
1393 CandidatePairs.
begin(), E = CandidatePairs.
end();
C != E; ++
C) {
1395 IsInPair.
insert(
C->second.begin(),
C->second.end());
1404 if (IsInPair.
find(I) == IsInPair.
end())
continue;
1411 (void) trackUsesOfI(Users, WriteSet, I, J);
1419 if (IsInPair.
find(*U) == IsInPair.
end())
continue;
1420 PairableInstUsers.
insert(ValuePair(I, *U));
1431 bool BBVectorize::pairsConflict(ValuePair P, ValuePair Q,
1433 DenseMap<ValuePair, std::vector<ValuePair> > *PairableInstUserMap,
1436 bool QUsesP = PairableInstUsers.
count(ValuePair(P.first, Q.first)) ||
1437 PairableInstUsers.
count(ValuePair(P.first, Q.second)) ||
1438 PairableInstUsers.
count(ValuePair(P.second, Q.first)) ||
1439 PairableInstUsers.
count(ValuePair(P.second, Q.second));
1440 bool PUsesQ = PairableInstUsers.
count(ValuePair(Q.first, P.first)) ||
1441 PairableInstUsers.
count(ValuePair(Q.first, P.second)) ||
1442 PairableInstUsers.
count(ValuePair(Q.second, P.first)) ||
1443 PairableInstUsers.
count(ValuePair(Q.second, P.second));
1444 if (PairableInstUserMap) {
1449 if (PairableInstUserPairSet->
insert(VPPair(Q, P)).second)
1450 (*PairableInstUserMap)[Q].push_back(P);
1453 if (PairableInstUserPairSet->
insert(VPPair(P, Q)).second)
1454 (*PairableInstUserMap)[
P].push_back(Q);
1458 return (QUsesP && PUsesQ);
1463 bool BBVectorize::pairWillFormCycle(ValuePair P,
1464 DenseMap<ValuePair, std::vector<ValuePair> > &PairableInstUserMap,
1467 dbgs() <<
"BBV: starting cycle check for : " << *P.first <<
" <-> "
1468 << *P.second <<
"\n");
1480 dbgs() <<
"BBV: cycle check visiting: " << *QTop.first <<
" <-> "
1481 << *QTop.second <<
"\n");
1483 PairableInstUserMap.
find(QTop);
1484 if (QQ == PairableInstUserMap.
end())
1487 for (std::vector<ValuePair>::iterator
C = QQ->second.
begin(),
1488 CE = QQ->second.
end();
C != CE; ++
C) {
1491 <<
"BBV: rejected to prevent non-trivial cycle formation: "
1492 << QTop.first <<
" <-> " <<
C->second <<
"\n");
1499 }
while (!Q.
empty());
1506 void BBVectorize::buildInitialDAGFor(
1509 std::vector<Value *> &PairableInsts,
1510 DenseMap<ValuePair, std::vector<ValuePair> > &ConnectedPairs,
1519 Q.
push_back(ValuePairWithDepth(J, getDepthFactor(J.first)));
1521 ValuePairWithDepth QTop = Q.
back();
1524 bool MoreChildren =
false;
1525 size_t MaxChildDepth = QTop.second;
1527 ConnectedPairs.
find(QTop.first);
1528 if (QQ != ConnectedPairs.
end())
1529 for (std::vector<ValuePair>::iterator k = QQ->second.
begin(),
1530 ke = QQ->second.
end(); k != ke; ++k) {
1532 if (CandidatePairsSet.
count(*k)) {
1534 if (C == DAG.
end()) {
1535 size_t d = getDepthFactor(k->first);
1536 Q.
push_back(ValuePairWithDepth(*k, QTop.second+d));
1537 MoreChildren =
true;
1539 MaxChildDepth = std::max(MaxChildDepth, C->second);
1544 if (!MoreChildren) {
1546 DAG.
insert(ValuePairWithDepth(QTop.first, MaxChildDepth));
1549 }
while (!Q.
empty());
1554 void BBVectorize::pruneDAGFor(
1556 std::vector<Value *> &PairableInsts,
1557 DenseMap<ValuePair, std::vector<ValuePair> > &ConnectedPairs,
1559 DenseMap<ValuePair, std::vector<ValuePair> > &PairableInstUserMap,
1564 bool UseCycleCheck) {
1567 Q.
push_back(ValuePairWithDepth(J, getDepthFactor(J.first)));
1570 PrunedDAG.
insert(QTop.first);
1575 ConnectedPairs.
find(QTop.first);
1576 if (QQ == ConnectedPairs.
end())
1579 for (std::vector<ValuePair>::iterator K = QQ->second.
begin(),
1580 KE = QQ->second.
end(); K != KE; ++K) {
1582 if (C == DAG.
end())
continue;
1610 = BestChildren.
begin(), E2 = BestChildren.
end();
1612 if (C2->first.first == C->first.first ||
1613 C2->first.first == C->first.second ||
1614 C2->first.second == C->first.first ||
1615 C2->first.second == C->first.second ||
1616 pairsConflict(C2->first, C->first, PairableInstUsers,
1617 UseCycleCheck ? &PairableInstUserMap : 0,
1618 UseCycleCheck ? &PairableInstUserPairSet : 0)) {
1619 if (C2->second >= C->second) {
1624 CurrentPairs.
insert(C2->first);
1627 if (!CanAdd)
continue;
1632 E2 = PrunedDAG.
end();
T != E2; ++
T) {
1633 if (
T->first == C->first.first ||
1634 T->first == C->first.second ||
1635 T->second == C->first.first ||
1636 T->second == C->first.second ||
1637 pairsConflict(*
T, C->first, PairableInstUsers,
1638 UseCycleCheck ? &PairableInstUserMap : 0,
1639 UseCycleCheck ? &PairableInstUserPairSet : 0)) {
1646 if (!CanAdd)
continue;
1650 E2 = Q.
end(); C2 != E2; ++C2) {
1651 if (C2->first.first == C->first.first ||
1652 C2->first.first == C->first.second ||
1653 C2->first.second == C->first.first ||
1654 C2->first.second == C->first.second ||
1655 pairsConflict(C2->first, C->first, PairableInstUsers,
1656 UseCycleCheck ? &PairableInstUserMap : 0,
1657 UseCycleCheck ? &PairableInstUserPairSet : 0)) {
1662 CurrentPairs.
insert(C2->first);
1664 if (!CanAdd)
continue;
1669 ChosenPairs.
begin(), E2 = ChosenPairs.
end();
1671 if (pairsConflict(*C2, C->first, PairableInstUsers,
1672 UseCycleCheck ? &PairableInstUserMap : 0,
1673 UseCycleCheck ? &PairableInstUserPairSet : 0)) {
1678 CurrentPairs.
insert(*C2);
1680 if (!CanAdd)
continue;
1690 if (UseCycleCheck &&
1691 pairWillFormCycle(C->first, PairableInstUserMap, CurrentPairs))
1699 = BestChildren.
begin(); C2 != BestChildren.
end();) {
1700 if (C2->first.first == C->first.first ||
1701 C2->first.first == C->first.second ||
1702 C2->first.second == C->first.first ||
1703 C2->first.second == C->first.second ||
1704 pairsConflict(C2->first, C->first, PairableInstUsers))
1705 C2 = BestChildren.
erase(C2);
1710 BestChildren.
push_back(ValuePairWithDepth(C->first, C->second));
1714 = BestChildren.
begin(), E2 = BestChildren.
end();
1716 size_t DepthF = getDepthFactor(C->first.first);
1717 Q.
push_back(ValuePairWithDepth(C->first, QTop.second+DepthF));
1719 }
while (!Q.
empty());
1724 void BBVectorize::findBestDAGFor(
1728 std::vector<Value *> &PairableInsts,
1731 DenseMap<ValuePair, std::vector<ValuePair> > &ConnectedPairs,
1732 DenseMap<ValuePair, std::vector<ValuePair> > &ConnectedPairDeps,
1734 DenseMap<ValuePair, std::vector<ValuePair> > &PairableInstUserMap,
1738 int &BestEffSize,
Value *II, std::vector<Value *>&JJ,
1739 bool UseCycleCheck) {
1740 for (std::vector<Value *>::iterator J = JJ.begin(), JE = JJ.end();
1742 ValuePair IJ(II, *J);
1743 if (!CandidatePairsSet.
count(IJ))
1750 bool DoesConflict =
false;
1752 E = ChosenPairs.
end(); C != E; ++
C) {
1753 if (pairsConflict(*C, IJ, PairableInstUsers,
1754 UseCycleCheck ? &PairableInstUserMap : 0,
1755 UseCycleCheck ? &PairableInstUserPairSet : 0)) {
1756 DoesConflict =
true;
1760 ChosenPairSet.
insert(*C);
1762 if (DoesConflict)
continue;
1764 if (UseCycleCheck &&
1765 pairWillFormCycle(IJ, PairableInstUserMap, ChosenPairSet))
1769 buildInitialDAGFor(CandidatePairs, CandidatePairsSet,
1770 PairableInsts, ConnectedPairs,
1771 PairableInstUsers, ChosenPairs, DAG, IJ);
1778 << *IJ.first <<
" <-> " << *IJ.second <<
"} of depth " <<
1779 MaxDepth <<
" and size " << DAG.
size() <<
"\n");
1789 pruneDAGFor(CandidatePairs, PairableInsts, ConnectedPairs,
1790 PairableInstUsers, PairableInstUserMap,
1791 PairableInstUserPairSet,
1792 ChosenPairs, DAG, PrunedDAG, IJ, UseCycleCheck);
1798 E = PrunedDAG.end(); S != E; ++S) {
1799 PrunedDAGInstrs.
insert(S->first);
1800 PrunedDAGInstrs.
insert(S->second);
1809 bool HasNontrivialInsts =
false;
1814 E = PrunedDAG.end(); S != E; ++S) {
1815 if (!isa<ShuffleVectorInst>(S->first) &&
1816 !isa<InsertElementInst>(S->first) &&
1817 !isa<ExtractElementInst>(S->first))
1818 HasNontrivialInsts =
true;
1820 bool FlipOrder =
false;
1822 if (getDepthFactor(S->first)) {
1823 int ESContrib = CandidatePairCostSavings.
find(*S)->second;
1825 << *S->first <<
" <-> " << *S->second <<
"} = " <<
1827 EffSize += ESContrib;
1833 ConnectedPairDeps.
find(*S);
1834 if (SS != ConnectedPairDeps.
end()) {
1835 unsigned NumDepsDirect = 0, NumDepsSwap = 0;
1836 for (std::vector<ValuePair>::iterator
T = SS->second.
begin(),
1837 TE = SS->second.
end();
T != TE; ++
T) {
1839 if (!PrunedDAG.count(Q.second))
1842 PairConnectionTypes.
find(VPPair(Q.second, Q.first));
1843 assert(R != PairConnectionTypes.
end() &&
1844 "Cannot find pair connection type");
1845 if (R->second == PairConnectionDirect)
1847 else if (R->second == PairConnectionSwap)
1854 FlipOrder = !FixedOrderPairs.
count(*S) &&
1855 ((NumDepsSwap > NumDepsDirect) ||
1856 FixedOrderPairs.
count(ValuePair(S->second, S->first)));
1858 for (std::vector<ValuePair>::iterator
T = SS->second.
begin(),
1859 TE = SS->second.
end();
T != TE; ++
T) {
1861 if (!PrunedDAG.count(Q.second))
1864 PairConnectionTypes.
find(VPPair(Q.second, Q.first));
1865 assert(R != PairConnectionTypes.
end() &&
1866 "Cannot find pair connection type");
1867 Type *Ty1 = Q.second.first->getType(),
1868 *Ty2 = Q.second.second->getType();
1869 Type *VTy = getVecTypeForPair(Ty1, Ty2);
1870 if ((R->second == PairConnectionDirect && FlipOrder) ||
1871 (R->second == PairConnectionSwap && !FlipOrder) ||
1872 R->second == PairConnectionSplat) {
1873 int ESContrib = (int) getInstrCost(Instruction::ShuffleVector,
1877 if (R->second == PairConnectionSplat)
1878 ESContrib = std::min(ESContrib, (
int) TTI->getShuffleCost(
1881 ESContrib = std::min(ESContrib, (
int) TTI->getShuffleCost(
1886 *Q.second.first <<
" <-> " << *Q.second.second <<
1888 *S->first <<
" <-> " << *S->second <<
"} = " <<
1890 EffSize -= ESContrib;
1898 if (!S->first->getType()->isVoidTy()) {
1899 Type *Ty1 = S->first->getType(),
1900 *Ty2 = S->second->getType();
1901 Type *VTy = getVecTypeForPair(Ty1, Ty2);
1903 bool NeedsExtraction =
false;
1911 if (isa<ExtractElementInst>(*I))
1913 if (PrunedDAGInstrs.
count(*I))
1915 NeedsExtraction =
true;
1919 if (NeedsExtraction) {
1922 ESContrib = (int) getInstrCost(Instruction::ShuffleVector,
1924 ESContrib = std::min(ESContrib, (
int) TTI->getShuffleCost(
1927 ESContrib = (int) TTI->getVectorInstrCost(
1931 *S->first <<
"} = " << ESContrib <<
"\n");
1932 EffSize -= ESContrib;
1935 NeedsExtraction =
false;
1943 if (isa<ExtractElementInst>(*I))
1945 if (PrunedDAGInstrs.
count(*I))
1947 NeedsExtraction =
true;
1951 if (NeedsExtraction) {
1953 if (Ty2->isVectorTy()) {
1954 ESContrib = (int) getInstrCost(Instruction::ShuffleVector,
1956 ESContrib = std::min(ESContrib, (
int) TTI->getShuffleCost(
1960 ESContrib = (int) TTI->getVectorInstrCost(
1963 *S->second <<
"} = " << ESContrib <<
"\n");
1964 EffSize -= ESContrib;
1969 if (!isa<LoadInst>(S->first) && !isa<StoreInst>(S->first)) {
1971 *S2 = cast<Instruction>(S->second);
1977 if (isa<Constant>(O1) && isa<Constant>(O2))
1983 ValuePair VP = ValuePair(O1, O2);
1984 ValuePair VPR = ValuePair(O2, O1);
1987 if (PrunedDAG.count(VP) || PrunedDAG.count(VPR))
1991 *Ty2 = O2->getType();
1992 Type *VTy = getVecTypeForPair(Ty1, Ty2);
1999 *IEO2 = dyn_cast<InsertElementInst>(O2);
2000 if (IEO1 && IEO2 && isPureIEChain(IEO1) && isPureIEChain(IEO2))
2005 *EIO2 = dyn_cast<ExtractElementInst>(O2);
2008 EIO2->getOperand(0)->getType())
2014 *SIO2 = dyn_cast<ShuffleVectorInst>(O2);
2017 SIO2->getOperand(0)->getType()) {
2021 SIOps.
insert(SIO2->getOperand(0));
2022 SIOps.
insert(SIO2->getOperand(1));
2023 if (SIOps.
size() <= 2)
2030 if (IncomingPairs.
count(VP)) {
2032 }
else if (IncomingPairs.
count(VPR)) {
2033 ESContrib = (int) getInstrCost(Instruction::ShuffleVector,
2037 ESContrib = std::min(ESContrib, (
int) TTI->getShuffleCost(
2039 }
else if (!Ty1->
isVectorTy() && !Ty2->isVectorTy()) {
2040 ESContrib = (int) TTI->getVectorInstrCost(
2041 Instruction::InsertElement, VTy, 0);
2042 ESContrib += (int) TTI->getVectorInstrCost(
2043 Instruction::InsertElement, VTy, 1);
2047 ESContrib = (int) TTI->getVectorInstrCost(
2048 Instruction::InsertElement, Ty2, 0);
2049 ESContrib += (int) getInstrCost(Instruction::ShuffleVector,
2051 }
else if (!Ty2->isVectorTy()) {
2054 ESContrib = (int) TTI->getVectorInstrCost(
2055 Instruction::InsertElement, Ty1, 0);
2056 ESContrib += (int) getInstrCost(Instruction::ShuffleVector,
2059 Type *TyBig = Ty1, *TySmall = Ty2;
2063 ESContrib = (int) getInstrCost(Instruction::ShuffleVector,
2065 if (TyBig != TySmall)
2066 ESContrib += (int) getInstrCost(Instruction::ShuffleVector,
2071 << *O1 <<
" <-> " << *O2 <<
"} = " <<
2073 EffSize -= ESContrib;
2074 IncomingPairs.
insert(VP);
2079 if (!HasNontrivialInsts) {
2081 "\tNo non-trivial instructions in DAG;"
2082 " override to zero effective size\n");
2087 E = PrunedDAG.end(); S != E; ++S)
2088 EffSize += (
int) getDepthFactor(S->first);
2092 dbgs() <<
"BBV: found pruned DAG for pair {"
2093 << *IJ.first <<
" <-> " << *IJ.second <<
"} of depth " <<
2094 MaxDepth <<
" and size " << PrunedDAG.size() <<
2095 " (effective size: " << EffSize <<
")\n");
2097 MaxDepth >= Config.ReqChainDepth) &&
2098 EffSize > 0 && EffSize > BestEffSize) {
2100 BestEffSize = EffSize;
2101 BestDAG = PrunedDAG;
2108 void BBVectorize::choosePairs(
2112 std::vector<Value *> &PairableInsts,
2115 DenseMap<ValuePair, std::vector<ValuePair> > &ConnectedPairs,
2116 DenseMap<ValuePair, std::vector<ValuePair> > &ConnectedPairDeps,
2119 bool UseCycleCheck =
2120 CandidatePairsSet.
size() <= Config.MaxCandPairsForCycleCheck;
2124 E = CandidatePairsSet.
end(); I != E; ++
I) {
2125 std::vector<Value *> &JJ = CandidatePairs2[I->second];
2126 if (JJ.empty()) JJ.reserve(32);
2127 JJ.push_back(I->first);
2132 for (std::vector<Value *>::iterator I = PairableInsts.begin(),
2133 E = PairableInsts.end(); I != E; ++
I) {
2135 size_t NumChoices = CandidatePairs.
lookup(*I).size();
2136 if (!NumChoices)
continue;
2138 std::vector<Value *> &JJ = CandidatePairs[*
I];
2141 size_t BestMaxDepth = 0;
2142 int BestEffSize = 0;
2144 findBestDAGFor(CandidatePairs, CandidatePairsSet,
2145 CandidatePairCostSavings,
2146 PairableInsts, FixedOrderPairs, PairConnectionTypes,
2147 ConnectedPairs, ConnectedPairDeps,
2148 PairableInstUsers, PairableInstUserMap,
2149 PairableInstUserPairSet, ChosenPairs,
2150 BestDAG, BestMaxDepth, BestEffSize, *I, JJ,
2153 if (BestDAG.
empty())
2160 DEBUG(
dbgs() <<
"BBV: selected pairs in the best DAG for: "
2161 << *cast<Instruction>(*I) <<
"\n");
2164 SE2 = BestDAG.
end(); S != SE2; ++S) {
2166 ChosenPairs.
insert(ValuePair(S->first, S->second));
2167 DEBUG(
dbgs() <<
"BBV: selected pair: " << *S->first <<
" <-> " <<
2168 *S->second <<
"\n");
2171 std::vector<Value *> &KK = CandidatePairs[S->first];
2172 for (std::vector<Value *>::iterator K = KK.
begin(), KE = KK.end();
2174 if (*K == S->second)
2177 CandidatePairsSet.
erase(ValuePair(S->first, *K));
2180 std::vector<Value *> &LL = CandidatePairs2[S->second];
2181 for (std::vector<Value *>::iterator L = LL.begin(),
LE = LL.end();
2186 CandidatePairsSet.
erase(ValuePair(*L, S->second));
2189 std::vector<Value *> &MM = CandidatePairs[S->second];
2190 for (std::vector<Value *>::iterator M = MM.begin(), ME = MM.end();
2192 assert(*M != S->first &&
"Flipped pair in candidate list?");
2193 CandidatePairsSet.
erase(ValuePair(S->second, *M));
2196 std::vector<Value *> &NN = CandidatePairs2[S->first];
2197 for (std::vector<Value *>::iterator
N = NN.begin(),
NE = NN.end();
2199 assert(*
N != S->second &&
"Flipped pair in candidate list?");
2200 CandidatePairsSet.
erase(ValuePair(*
N, S->first));
2205 DEBUG(
dbgs() <<
"BBV: selected " << ChosenPairs.
size() <<
" pairs.\n");
2208 std::string getReplacementName(
Instruction *I,
bool IsInput,
unsigned o,
2213 return (I->
getName() + (IsInput ?
".v.i" :
".v.r") +
utostr(o) +
2214 (n > 0 ?
"." +
utostr(n) :
"")).str();
2222 unsigned IAlignment, JAlignment, IAddressSpace, JAddressSpace;
2223 int64_t OffsetInElmts;
2227 (void) getPairPtrInfo(I, J, IPtr, JPtr, IAlignment, JAlignment,
2228 IAddressSpace, JAddressSpace,
2229 OffsetInElmts,
false);
2236 Type *VArgType = getVecTypeForPair(ArgTypeI, ArgTypeJ);
2240 return new BitCastInst(VPtr, VArgPtrType, getReplacementName(I,
true, o),
2245 unsigned MaskOffset,
unsigned NumInElem,
2246 unsigned NumInElem1,
unsigned IdxOffset,
2247 std::vector<Constant*> &Mask) {
2249 for (
unsigned v = 0; v < NumElem1; ++v) {
2250 int m = cast<ShuffleVectorInst>(J)->getMaskValue(v);
2254 unsigned mm = m + (int) IdxOffset;
2255 if (m >= (
int) NumInElem1)
2256 mm += (
int) NumInElem;
2258 Mask[v+MaskOffset] =
2273 Type *VArgType = getVecTypeForPair(ArgTypeI, ArgTypeJ);
2281 std::vector<Constant*> Mask(NumElem);
2297 fillNewShuffleMask(Context, I, 0, NumInElemJ, NumInElemI,
2301 fillNewShuffleMask(Context, J, NumElemI, NumInElemI, NumInElemJ,
2311 bool IBeforeJ,
unsigned IdxOff) {
2312 bool ExpandedIEChain =
false;
2316 if (isPureIEChain(LIE)) {
2322 cast<ConstantInt>(LIENext->
getOperand(2))->getSExtValue();
2325 dyn_cast<InsertElementInst>(LIENext->
getOperand(0))));
2329 for (
unsigned i = 0; i < numElemL; ++i) {
2330 if (isa<UndefValue>(VectElemts[i]))
continue;
2334 getReplacementName(IBeforeJ ? I : J,
2341 ExpandedIEChain =
true;
2345 return ExpandedIEChain;
2348 static unsigned getNumScalarElements(
Type *Ty) {
2349 if (
VectorType *VecTy = dyn_cast<VectorType>(Ty))
2350 return VecTy->getNumElements();
2364 VectorType *VArgType = getVecTypeForPair(ArgTypeI, ArgTypeJ);
2367 Type *ArgTypeL = ArgTypeI, *ArgTypeH = ArgTypeJ;
2369 unsigned numElemL = getNumScalarElements(ArgTypeL);
2370 unsigned numElemH = getNumScalarElements(ArgTypeH);
2388 bool IsSizeChangeShuffle =
2389 isa<ShuffleVectorInst>(L) &&
2392 if ((LEE || LSV) && (HEE || HSV) && !IsSizeChangeShuffle) {
2394 bool CanUseInputs =
true;
2399 I1 = LSV->getOperand(0);
2400 I2 = LSV->getOperand(1);
2401 if (I2 == I1 || isa<UndefValue>(I2))
2406 Value *I3 = HEE->getOperand(0);
2407 if (!I2 && I3 != I1)
2409 else if (I3 != I1 && I3 != I2)
2410 CanUseInputs =
false;
2412 Value *I3 = HSV->getOperand(0);
2413 if (!I2 && I3 != I1)
2415 else if (I3 != I1 && I3 != I2)
2416 CanUseInputs =
false;
2419 Value *I4 = HSV->getOperand(1);
2420 if (!isa<UndefValue>(I4)) {
2421 if (!I2 && I4 != I1)
2423 else if (I4 != I1 && I4 != I2)
2424 CanUseInputs =
false;
2431 cast<Instruction>(LOp)->getOperand(0)->getType()
2432 ->getVectorNumElements();
2435 cast<Instruction>(
HOp)->getOperand(0)->getType()
2436 ->getVectorNumElements();
2441 for (
unsigned i = 0; i < numElemL; ++i) {
2445 cast<ConstantInt>(LEE->
getOperand(1))->getSExtValue();
2448 Idx = LSV->getMaskValue(i);
2449 if (Idx < (
int) LOpElem) {
2450 INum = LSV->getOperand(0) == I1 ? 0 : 1;
2453 INum = LSV->getOperand(1) == I1 ? 0 : 1;
2457 II[i] = std::pair<int, int>(Idx, INum);
2459 for (
unsigned i = 0; i < numElemH; ++i) {
2463 cast<ConstantInt>(HEE->getOperand(1))->getSExtValue();
2464 INum = HEE->getOperand(0) == I1 ? 0 : 1;
2466 Idx = HSV->getMaskValue(i);
2467 if (Idx < (
int) HOpElem) {
2468 INum = HSV->getOperand(0) == I1 ? 0 : 1;
2471 INum = HSV->getOperand(1) == I1 ? 0 : 1;
2475 II[i + numElemL] = std::pair<int, int>(Idx, INum);
2486 if (I1Elem == numElem) {
2487 bool ElemInOrder =
true;
2488 for (
unsigned i = 0; i < numElem; ++i) {
2489 if (II[i].first != (
int) i && II[i].first != -1) {
2490 ElemInOrder =
false;
2500 std::vector<Constant *> Mask(numElem);
2501 for (
unsigned i = 0; i < numElem; ++i) {
2502 int Idx = II[i].first;
2512 getReplacementName(IBeforeJ ? I : J,
2524 if (I1Elem < I2Elem) {
2525 std::vector<Constant *> Mask(I2Elem);
2527 for (; v < I1Elem; ++v)
2529 for (; v < I2Elem; ++v)
2535 getReplacementName(IBeforeJ ? I : J,
2541 }
else if (I1Elem > I2Elem) {
2542 std::vector<Constant *> Mask(I1Elem);
2544 for (; v < I2Elem; ++v)
2546 for (; v < I1Elem; ++v)
2552 getReplacementName(IBeforeJ ? I : J,
2562 std::vector<Constant *> Mask(numElem);
2563 for (
unsigned v = 0; v < numElem; ++v) {
2564 if (II[v].first == -1) {
2567 int Idx = II[v].first + II[v].second * I1Elem;
2574 getReplacementName(IBeforeJ ? I : J,
true, o));
2580 Type *ArgType = ArgTypeL;
2581 if (numElemL < numElemH) {
2582 if (numElemL == 1 && expandIEChain(Context, I, J, o, HOp, numElemH,
2583 ArgTypeL, VArgType, IBeforeJ, 1)) {
2589 getReplacementName(IBeforeJ ? I : J,
true, o));
2592 }
else if (!expandIEChain(Context, I, J, o, LOp, numElemL, ArgTypeL,
2593 ArgTypeH, IBeforeJ)) {
2599 std::vector<Constant *> Mask(numElemH);
2601 for (; v < numElemL; ++v)
2603 for (; v < numElemH; ++v)
2608 getReplacementName(IBeforeJ ? I : J,
2612 getReplacementName(IBeforeJ ? I : J,
2621 }
else if (numElemL > numElemH) {
2622 if (numElemH == 1 && expandIEChain(Context, I, J, o, LOp, numElemL,
2623 ArgTypeH, VArgType, IBeforeJ)) {
2628 getReplacementName(IBeforeJ ? I : J,
2632 }
else if (!expandIEChain(Context, I, J, o, HOp, numElemH, ArgTypeH,
2633 ArgTypeL, IBeforeJ)) {
2636 std::vector<Constant *> Mask(numElemL);
2638 for (; v < numElemH; ++v)
2640 for (; v < numElemL; ++v)
2645 getReplacementName(IBeforeJ ? I : J,
2649 getReplacementName(IBeforeJ ? I : J,
2660 std::vector<Constant*> Mask(numElem);
2661 for (
unsigned v = 0; v < numElem; ++v) {
2665 if (v >= numElemL && numElemH > numElemL)
2666 Idx += (numElemH - numElemL);
2672 getReplacementName(IBeforeJ ? I : J,
true, o));
2679 getReplacementName(IBeforeJ ? I : J,
2683 getReplacementName(IBeforeJ ? I : J,
2691 void BBVectorize::getReplacementInputsForPair(
LLVMContext& Context,
2697 for (
unsigned p = 0, o = NumOperands-1; p < NumOperands; ++p, --o) {
2701 if (isa<LoadInst>(I) || (o == 1 && isa<StoreInst>(I))) {
2703 ReplacedOperands[o] = getReplacementPointerInput(Context, I, J, o);
2705 }
else if (isa<CallInst>(I)) {
2708 if (o == NumOperands-1) {
2714 Type *VArgType = getVecTypeForPair(ArgTypeI, ArgTypeJ);
2725 }
else if (isa<ShuffleVectorInst>(I) && o == NumOperands-1) {
2726 ReplacedOperands[o] = getReplacementShuffleMask(Context, I, J);
2730 ReplacedOperands[o] = getReplacementInput(Context, I, J, o, IBeforeJ);
2742 if (isa<StoreInst>(I)) {
2743 AA->replaceWithNewValue(I, K);
2744 AA->replaceWithNewValue(J, K);
2749 VectorType *VType = getVecTypeForPair(IType, JType);
2752 unsigned numElemI = getNumScalarElements(IType);
2753 unsigned numElemJ = getNumScalarElements(JType);
2756 std::vector<Constant*> Mask1(numElemI), Mask2(numElemI);
2757 for (
unsigned v = 0; v < numElemI; ++v) {
2764 getReplacementName(K,
false, 1));
2768 getReplacementName(K,
false, 1));
2772 std::vector<Constant*> Mask1(numElemJ), Mask2(numElemJ);
2773 for (
unsigned v = 0; v < numElemJ; ++v) {
2780 getReplacementName(K,
false, 2));
2784 getReplacementName(K,
false, 2));
2794 bool BBVectorize::canMoveUsesOfIAfterJ(
BasicBlock &BB,
2804 for (; cast<Instruction>(L) != J; ++L)
2805 (
void) trackUsesOfI(Users, WriteSet, I, L,
true, &LoadMoveSetPairs);
2807 assert(cast<Instruction>(L) == J &&
2808 "Tracking has not proceeded far enough to check for dependencies");
2811 return !trackUsesOfI(Users, WriteSet, I, J,
true, &LoadMoveSetPairs);
2815 void BBVectorize::moveUsesOfIAfterJ(
BasicBlock &BB,
2826 for (; cast<Instruction>(L) != J;) {
2827 if (trackUsesOfI(Users, WriteSet, I, L,
true, &LoadMoveSetPairs)) {
2831 DEBUG(
dbgs() <<
"BBV: moving: " << *InstToMove <<
2832 " to after " << *InsertionPt <<
"\n");
2835 InsertionPt = InstToMove;
2845 void BBVectorize::collectPairLoadMoveSet(
BasicBlock &BB,
2861 if (trackUsesOfI(Users, WriteSet, I, L)) {
2862 if (L->mayReadFromMemory()) {
2863 LoadMoveSet[L].push_back(I);
2864 LoadMoveSetPairs.
insert(ValuePair(L, I));
2877 void BBVectorize::collectLoadMoveSet(
BasicBlock &BB,
2878 std::vector<Value *> &PairableInsts,
2882 for (std::vector<Value *>::iterator PI = PairableInsts.begin(),
2883 PIE = PairableInsts.end(); PI != PIE; ++PI) {
2885 if (P == ChosenPairs.
end())
continue;
2888 collectPairLoadMoveSet(BB, ChosenPairs, LoadMoveSet,
2889 LoadMoveSetPairs, I);
2899 for (
unsigned i = 0, n = Metadata.
size(); i < n; ++i) {
2900 unsigned Kind = Metadata[i].first;
2902 MDNode *KMD = Metadata[i].second;
2924 void BBVectorize::fuseChosenPairs(
BasicBlock &BB,
2925 std::vector<Value *> &PairableInsts,
2929 DenseMap<ValuePair, std::vector<ValuePair> > &ConnectedPairs,
2930 DenseMap<ValuePair, std::vector<ValuePair> > &ConnectedPairDeps) {
2938 E = ChosenPairs.
end(); P != E; ++
P)
2939 FlippedPairs.
insert(ValuePair(P->second, P->first));
2941 E = FlippedPairs.
end(); P != E; ++
P)
2946 collectLoadMoveSet(BB, PairableInsts, ChosenPairs,
2947 LoadMoveSet, LoadMoveSetPairs);
2949 DEBUG(
dbgs() <<
"BBV: initial: \n" << BB <<
"\n");
2953 if (P == ChosenPairs.
end()) {
2958 if (getDepthFactor(P->first) == 0) {
2968 *J = cast<Instruction>(P->second);
2970 DEBUG(
dbgs() <<
"BBV: fusing: " << *I <<
2971 " <-> " << *J <<
"\n");
2975 assert(FP != ChosenPairs.
end() &&
"Flipped pair not found in list");
2976 ChosenPairs.
erase(FP);
2977 ChosenPairs.
erase(P);
2979 if (!canMoveUsesOfIAfterJ(BB, LoadMoveSetPairs, I, J)) {
2980 DEBUG(
dbgs() <<
"BBV: fusion of: " << *I <<
2982 " aborted because of non-trivial dependency cycle\n");
2989 bool FlipPairOrder = FixedOrderPairs.
count(ValuePair(J, I));
2990 if (!FlipPairOrder && !FixedOrderPairs.
count(ValuePair(I, J))) {
2995 bool OrigOrder =
true;
2997 ConnectedPairDeps.
find(ValuePair(I, J));
2998 if (IJ == ConnectedPairDeps.
end()) {
2999 IJ = ConnectedPairDeps.
find(ValuePair(J, I));
3003 if (IJ != ConnectedPairDeps.
end()) {
3004 unsigned NumDepsDirect = 0, NumDepsSwap = 0;
3005 for (std::vector<ValuePair>::iterator
T = IJ->second.
begin(),
3006 TE = IJ->second.
end();
T != TE; ++
T) {
3007 VPPair Q(IJ->first, *
T);
3009 PairConnectionTypes.
find(VPPair(Q.second, Q.first));
3010 assert(R != PairConnectionTypes.
end() &&
3011 "Cannot find pair connection type");
3012 if (R->second == PairConnectionDirect)
3014 else if (R->second == PairConnectionSwap)
3021 if (NumDepsSwap > NumDepsDirect) {
3022 FlipPairOrder =
true;
3023 DEBUG(
dbgs() <<
"BBV: reordering pair: " << *I <<
3024 " <-> " << *J <<
"\n");
3036 ConnectedPairs.
find(ValuePair(H, L));
3037 if (HL != ConnectedPairs.
end())
3038 for (std::vector<ValuePair>::iterator
T = HL->second.
begin(),
3039 TE = HL->second.
end();
T != TE; ++
T) {
3040 VPPair Q(HL->first, *
T);
3042 assert(R != PairConnectionTypes.
end() &&
3043 "Cannot find pair connection type");
3044 if (R->second == PairConnectionDirect)
3045 R->second = PairConnectionSwap;
3046 else if (R->second == PairConnectionSwap)
3047 R->second = PairConnectionDirect;
3050 bool LBeforeH = !FlipPairOrder;
3053 getReplacementInputsForPair(Context, L, H, ReplacedOperands,
3061 else if (H->hasName())
3064 if (!isa<StoreInst>(K))
3067 combineMetadata(K, H);
3070 for (
unsigned o = 0; o < NumOperands; ++o)
3078 replaceOutputsOfPair(Context, L, H, K, InsertionPt, K1, K2);
3085 moveUsesOfIAfterJ(BB, LoadMoveSetPairs, InsertionPt, I, J);
3087 if (!isa<StoreInst>(I)) {
3089 H->replaceAllUsesWith(K2);
3090 AA->replaceWithNewValue(L, K1);
3091 AA->replaceWithNewValue(H, K2);
3101 std::vector<ValuePair> NewSetMembers;
3103 LoadMoveSet.
find(I);
3104 if (II != LoadMoveSet.
end())
3105 for (std::vector<Value *>::iterator
N = II->second.
begin(),
3107 NewSetMembers.push_back(ValuePair(K, *
N));
3109 LoadMoveSet.
find(J);
3110 if (JJ != LoadMoveSet.
end())
3111 for (std::vector<Value *>::iterator
N = JJ->second.
begin(),
3113 NewSetMembers.push_back(ValuePair(K, *
N));
3114 for (std::vector<ValuePair>::iterator
A = NewSetMembers.begin(),
3115 AE = NewSetMembers.end();
A != AE; ++
A) {
3116 LoadMoveSet[
A->first].push_back(
A->second);
3123 if (cast<Instruction>(PI) == J)
3135 DEBUG(
dbgs() <<
"BBV: final: \n" << BB <<
"\n");
3149 return new BBVectorize(C);
3154 BBVectorize BBVectorizer(P, C);
3155 return BBVectorizer.vectorizeBB(BB);
bool VectorizeFMA
Vectorize the fused-multiply-add intrinsic.
void push_back(const T &Elt)
static cl::opt< unsigned > MaxPairs("bb-vectorize-max-pairs-per-group", cl::init(3000), cl::Hidden, cl::desc("The maximum number of candidate instruction pairs per group"))
Abstract base class of comparison instructions.
STATISTIC(NumFusedOps,"Number of operations fused by bb-vectorize")
AnalysisUsage & addPreserved()
static PassRegistry * getPassRegistry()
unsigned getScalarSizeInBits()
The main container class for the LLVM Intermediate Representation.
static cl::opt< bool > DebugPairSelection("bb-vectorize-debug-pair-selection", cl::init(false), cl::Hidden, cl::desc("When debugging is enabled, output information on the"" pair-selection process"))
enable_if_c<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
unsigned getNumOperands() const
virtual void getAnalysisUsage(AnalysisUsage &) const
void initializeBBVectorizePass(PassRegistry &)
static cl::opt< bool > NoMemOpBoost("bb-vectorize-no-mem-op-boost", cl::init(false), cl::Hidden, cl::desc("Don't boost the chain-depth contribution of loads and stores"))
static PointerType * get(Type *ElementType, unsigned AddressSpace)
static cl::opt< bool > IgnoreTargetInfo("bb-vectorize-ignore-target-info", cl::init(false), cl::Hidden, cl::desc("Ignore target information"))
bool VectorizeMath
Vectorize floating-point math intrinsics.
const Function * getParent() const
Return the enclosing method, or null if none.
MDNode - a tuple of other values.
iv Induction Variable Users
unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
LoopInfoBase< BlockT, LoopT > * LI
static cl::opt< unsigned > VectorBits("bb-vectorize-vector-bits", cl::init(128), cl::Hidden, cl::desc("The size of the native vector registers"))
Type * getPointerElementType() const
StringRef getName() const
void getAllMetadataOtherThanDebugLoc(SmallVectorImpl< std::pair< unsigned, MDNode * > > &MDs) const
bool isSingleValueType() const
AnalysisUsage & addRequired()
#define INITIALIZE_PASS_DEPENDENCY(depName)
static cl::opt< bool > AlignedOnly("bb-vectorize-aligned-only", cl::init(false), cl::Hidden, cl::desc("Only generate aligned loads and stores"))
unsigned MaxCandPairsForCycleCheck
The maximum number of candidate pairs with which to use a full cycle check.
bool erase(const ValueT &V)
Base class of casting instructions.
const APInt & getValue() const
Return the constant's value.
const_iterator end() const
AnalysisType * getAnalysisIfAvailable() const
T LLVM_ATTRIBUTE_UNUSED_RESULT pop_back_val()
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
unsigned getNumArgOperands() const
static Constant * get(ArrayRef< Constant * > V)
Check for equivalence ignoring load/store alignment.
static ConstantInt * ExtractElement(Constant *V, Constant *Idx)
ID
LLVM Calling Convention Representation.
Instruction * clone() const
static cl::opt< unsigned > MaxInsts("bb-vectorize-max-instr-per-group", cl::init(500), cl::Hidden, cl::desc("The maximum number of pairable instructions per group"))
static const char bb_vectorize_name[]
bool Pow2LenOnly
Don't try to form odd-length vectors.
unsigned getPointerAddressSpace() const
Returns the address space of the pointer operand.
const_iterator begin() const
static cl::opt< bool > NoMath("bb-vectorize-no-math", cl::init(false), cl::Hidden, cl::desc("Don't try to vectorize floating-point math intrinsics"))
bool mayReadFromMemory() const
static cl::opt< bool > UseChainDepthWithTI("bb-vectorize-use-chain-depth", cl::init(false), cl::Hidden, cl::desc("Use the chain depth requirement with"" target information"))
bool LLVM_ATTRIBUTE_UNUSED_RESULT empty() const
static cl::opt< bool > NoFMA("bb-vectorize-no-fma", cl::init(false), cl::Hidden, cl::desc("Don't try to vectorize the fused-multiply-add intrinsic"))
static bool isValidElementType(Type *ElemTy)
This class represents a no-op cast from one type to another.
static std::string utostr(uint64_t X, bool isNeg=false)
Function * getDeclaration(Module *M, ID id, ArrayRef< Type * > Tys=None)
void replaceAllUsesWith(Value *V)
unsigned getNumElements() const
Return the number of elements in the Vector type.
bool VectorizePointers
Vectorize pointer values.
bool isPPC_FP128Ty() const
isPPC_FP128Ty - Return true if this is powerpc long double.
bool VectorizeCmp
Vectorize comparison instructions.
unsigned MaxIter
The maximum number of pairing iterations.
bool VectorizeMemOps
Vectorize loads and stores.
bool SimplifyInstructionsInBlock(BasicBlock *BB, const DataLayout *TD=0, const TargetLibraryInfo *TLI=0)
static cl::opt< bool > FastDep("bb-vectorize-fast-dep", cl::init(false), cl::Hidden, cl::desc("Use a fast instruction dependency analysis"))
static cl::opt< bool > NoCmp("bb-vectorize-no-cmp", cl::init(false), cl::Hidden, cl::desc("Don't try to vectorize comparison instructions"))
bool isX86_MMXTy() const
isX86_MMXTy - Return true if this is X86 MMX.
bool isIntOrIntVectorTy() const
void intersectOptionalDataWith(const Value *V)
initializer< Ty > init(const Ty &Val)
unsigned getAlignment() const
iterator find(const ValueT &V)
bool AlignedOnly
Only generate aligned loads and stores.
static InsertElementInst * Create(Value *Vec, Value *NewElt, Value *Idx, const Twine &NameStr="", Instruction *InsertBefore=0)
static cl::opt< bool > NoFloats("bb-vectorize-no-floats", cl::init(false), cl::Hidden, cl::desc("Don't try to vectorize floating-point values"))
void insertBefore(Instruction *InsertPos)
LLVM Basic Block Representation.
static cl::opt< unsigned > SearchLimit("bb-vectorize-search-limit", cl::init(400), cl::Hidden, cl::desc("The maximum search distance for instruction pairs"))
unsigned getIntrinsicID() const LLVM_READONLY
bool SplatBreaksChain
Replicating one element to a pair breaks the chain.
static cl::opt< bool > DebugInstructionExamination("bb-vectorize-debug-instruction-examination", cl::init(false), cl::Hidden, cl::desc("When debugging is enabled, output information on the"" instruction-examination process"))
APInt Or(const APInt &LHS, const APInt &RHS)
Bitwise OR function for APInt.
APInt Xor(const APInt &LHS, const APInt &RHS)
Bitwise XOR function for APInt.
ItTy next(ItTy it, Dist n)
Value * getOperand(unsigned i) const
Value * getPointerOperand()
unsigned SearchLimit
The maximum search distance for instruction pairs.
static cl::opt< bool > NoCasts("bb-vectorize-no-casts", cl::init(false), cl::Hidden, cl::desc("Don't try to vectorize casting (conversion) operations"))
static cl::opt< unsigned > MaxCandPairsForCycleCheck("bb-vectorize-max-cycle-check-pairs", cl::init(200), cl::Hidden, cl::desc("The maximum number of candidate pairs with which to use"" a full cycle check"))
bool count(const KeyT &Val) const
count - Return true if the specified key is in the map.
#define INITIALIZE_AG_DEPENDENCY(depName)
static MDNode * getMostGenericTBAA(MDNode *A, MDNode *B)
Methods for metadata merging.
static UndefValue * get(Type *T)
iterator erase(iterator I)
static cl::opt< unsigned > MaxIter("bb-vectorize-max-iter", cl::init(0), cl::Hidden, cl::desc("The maximum number of pairing iterations"))
bool isFPOrFPVectorTy() const
void setMetadata(unsigned KindID, MDNode *Node)
bool mayWriteToMemory() const
unsigned MaxPairs
The maximum number of candidate instruction pairs per group.
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
AnalysisType & getAnalysis() const
bool count(const ValueT &V) const
std::pair< iterator, bool > insert(const ValueT &V)
Class for constant integers.
bool VectorizeFloats
Vectorize floating-point values.
unsigned getVectorNumElements() const
static cl::opt< bool > SplatBreaksChain("bb-vectorize-splat-breaks-chain", cl::init(false), cl::Hidden, cl::desc("Replicating one element to a pair breaks the chain"))
MDNode * getMetadata(unsigned KindID) const
bool vectorizeBasicBlock(Pass *P, BasicBlock &BB, const VectorizeConfig &C=VectorizeConfig())
Vectorize the BasicBlock.
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
bool erase(const KeyT &Val)
static cl::opt< bool > NoMemOps("bb-vectorize-no-mem-ops", cl::init(false), cl::Hidden, cl::desc("Don't try to vectorize loads and stores"))
static Constant * get(Type *Ty, uint64_t V, bool isSigned=false)
Function * getCalledFunction() const
static cl::opt< bool > NoPointers("bb-vectorize-no-pointers", cl::init(true), cl::Hidden, cl::desc("Don't try to vectorize pointer values"))
unsigned getPointerAddressSpace() const
Returns the address space of the pointer operand.
bool FastDep
Use a fast instruction dependency analysis.
void setOperand(unsigned i, Value *Val)
raw_ostream & dbgs()
dbgs - Return a circular-buffered debug stream.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Value * getArgOperand(unsigned i) const
static cl::opt< bool > NoGEP("bb-vectorize-no-gep", cl::init(false), cl::Hidden, cl::desc("Don't try to vectorize getelementptr instructions"))
static cl::opt< bool > NoBools("bb-vectorize-no-bools", cl::init(false), cl::Hidden, cl::desc("Don't try to vectorize boolean (i1) values"))
bool VectorizeCasts
Vectorize casting (conversion) operations.
BasicBlockPass * createBBVectorizePass(const VectorizeConfig &C=VectorizeConfig())
bool VectorizeBools
Vectorize boolean values.
APInt And(const APInt &LHS, const APInt &RHS)
Bitwise AND function for APInt.
static cl::opt< unsigned > ReqChainDepth("bb-vectorize-req-chain-depth", cl::init(6), cl::Hidden, cl::desc("The required chain depth for vectorization"))
unsigned ReqChainDepth
The required chain depth for vectorization.
bool isX86_FP80Ty() const
isX86_FP80Ty - Return true if this is x86 long double.
unsigned MaxInsts
The maximum number of pairable instructions per group.
static IntegerType * getInt32Ty(LLVMContext &C)
static cl::opt< bool > NoInts("bb-vectorize-no-ints", cl::init(false), cl::Hidden, cl::desc("Don't try to vectorize integer values"))
unsigned getAlignment() const
void insertAfter(Instruction *InsertPos)
bool NoMemOpBoost
Don't boost the chain-depth contribution of loads and stores.
bool VectorizeGEP
Vectorize getelementptr instructions.
static Function * getCalledFunction(const Value *V, bool LookThroughBitCast)
bool LLVM_ATTRIBUTE_UNUSED_RESULT empty() const
const Type * getScalarType() const
static cl::opt< bool > PrintAfterEveryPair("bb-vectorize-debug-print-after-every-pair", cl::init(false), cl::Hidden, cl::desc("When debugging is enabled, dump the basic block after"" every pair is fused"))
static cl::opt< bool > DebugCycleCheck("bb-vectorize-debug-cycle-check", cl::init(false), cl::Hidden, cl::desc("When debugging is enabled, output information on the"" cycle-checking process"))
static cl::opt< bool > DebugCandidateSelection("bb-vectorize-debug-candidate-selection", cl::init(false), cl::Hidden, cl::desc("When debugging is enabled, output information on the"" candidate-selection process"))
unsigned getPrimitiveSizeInBits() const
void mutateType(Type *Ty)
VectorizeConfig()
Initialize the VectorizeConfig from command line options.
LLVMContext & getContext() const
Get the context in which this basic block lives.
bool add(Value *Ptr, uint64_t Size, const MDNode *TBAAInfo)
LLVM Value Representation.
ValueT lookup(const KeyT &Val) const
unsigned getOpcode() const
getOpcode() returns a member of one of the enums like Instruction::Add.
static VectorType * get(Type *ElementType, unsigned NumElements)
bool VectorizeInts
Vectorize integer values.
static cl::opt< bool > Pow2LenOnly("bb-vectorize-pow2-len-only", cl::init(false), cl::Hidden, cl::desc("Don't try to form non-2^n-length vectors"))
static cl::opt< HelpPrinterWrapper, true, parser< bool > > HOp("help", cl::desc("Display available options (-help-hidden for more)"), cl::location(WrappedNormalPrinter), cl::ValueDisallowed)
bool isSameOperationAs(const Instruction *I, unsigned flags=0) const
Determine if one instruction is the same operation as another.
iterator getFirstInsertionPt()
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
bool VectorizeSelect
Vectorize select instructions.
static MDNode * getMostGenericFPMath(MDNode *A, MDNode *B)
int64_t getSExtValue() const
Return the sign extended value.
iterator find(const KeyT &Val)
Value * getPointerOperand()
const BasicBlock * getParent() const
InstListType::iterator iterator
Instruction iterators...
INITIALIZE_PASS(GlobalMerge,"global-merge","Global Merge", false, false) bool GlobalMerge const DataLayout * TD
static cl::opt< bool > NoSelect("bb-vectorize-no-select", cl::init(false), cl::Hidden, cl::desc("Don't try to vectorize select instructions"))
unsigned VectorBits
The size of the native vector registers.