17 #define DEBUG_TYPE "x86tti"
41 unsigned getScalarizationOverhead(
Type *Ty,
bool Insert,
bool Extract)
const;
50 TLI(TM->getTargetLowering()) {
54 virtual void initializePass() {
58 virtual void finalizePass() {
70 virtual void *getAdjustedAnalysisPointer(
const void *
ID) {
78 virtual PopcntSupportKind getPopcntSupport(
unsigned TyWidth)
const;
85 virtual unsigned getNumberOfRegisters(
bool Vector)
const;
86 virtual unsigned getRegisterBitWidth(
bool Vector)
const;
87 virtual unsigned getMaximumUnrollFactor()
const;
88 virtual unsigned getArithmeticInstrCost(
unsigned Opcode,
Type *Ty,
90 OperandValueKind)
const;
91 virtual unsigned getShuffleCost(ShuffleKind
Kind,
Type *Tp,
92 int Index,
Type *SubTp)
const;
93 virtual unsigned getCastInstrCost(
unsigned Opcode,
Type *Dst,
95 virtual unsigned getCmpSelInstrCost(
unsigned Opcode,
Type *ValTy,
97 virtual unsigned getVectorInstrCost(
unsigned Opcode,
Type *Val,
98 unsigned Index)
const;
99 virtual unsigned getMemoryOpCost(
unsigned Opcode,
Type *Src,
103 virtual unsigned getAddressComputationCost(
Type *PtrTy,
bool IsComplex)
const;
105 virtual unsigned getReductionCost(
unsigned Opcode,
Type *Ty,
106 bool IsPairwiseForm)
const;
114 "X86 Target Transform Info",
true,
true,
false)
119 return new X86TTI(TM);
129 X86TTI::PopcntSupportKind X86TTI::getPopcntSupport(
unsigned TyWidth)
const {
130 assert(
isPowerOf2_32(TyWidth) &&
"Ty width must be power of 2");
134 return ST->hasPOPCNT() ? PSK_FastHardware : PSK_Software;
137 unsigned X86TTI::getNumberOfRegisters(
bool Vector)
const {
138 if (Vector && !
ST->hasSSE1())
146 unsigned X86TTI::getRegisterBitWidth(
bool Vector)
const {
148 if (
ST->hasAVX())
return 256;
149 if (
ST->hasSSE1())
return 128;
159 unsigned X86TTI::getMaximumUnrollFactor()
const {
171 unsigned X86TTI::getArithmeticInstrCost(
unsigned Opcode,
Type *Ty,
172 OperandValueKind Op1Info,
173 OperandValueKind Op2Info)
const {
175 std::pair<unsigned, MVT>
LT = TLI->getTypeLegalizationCost(Ty);
177 int ISD = TLI->InstructionOpcodeToISD(Opcode);
178 assert(ISD &&
"Invalid opcode");
219 return LT.first * AVX2CostTable[Idx].
Cost;
223 SSE2UniformConstCostTable[] = {
246 return LT.first * SSE2UniformConstCostTable[Idx].
Cost;
293 return LT.first * SSE2CostTable[Idx].
Cost;
314 if (
ST->hasAVX() && !
ST->hasAVX2()) {
317 return LT.first * AVX1CostTable[Idx].
Cost;
329 return LT.first * CustomLowered[Idx].
Cost;
342 unsigned X86TTI::getShuffleCost(ShuffleKind
Kind,
Type *Tp,
int Index,
345 if (Kind != SK_Reverse)
348 std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Tp);
350 if (LT.second.getSizeInBits() > 128)
354 return Cost * LT.first;
357 unsigned X86TTI::getCastInstrCost(
unsigned Opcode,
Type *Dst,
Type *Src)
const {
358 int ISD = TLI->InstructionOpcodeToISD(Opcode);
359 assert(ISD &&
"Invalid opcode");
361 std::pair<unsigned, MVT> LTSrc = TLI->getTypeLegalizationCost(Src);
362 std::pair<unsigned, MVT> LTDest = TLI->getTypeLegalizationCost(Dst);
388 if (
ST->hasSSE2() && !
ST->hasAVX()) {
392 return LTSrc.first * SSE2ConvTbl[Idx].
Cost;
395 EVT SrcTy = TLI->getValueType(Src);
396 EVT DstTy = TLI->getValueType(Dst);
403 AVXConversionTbl[] = {
454 return AVXConversionTbl[Idx].
Cost;
460 unsigned X86TTI::getCmpSelInstrCost(
unsigned Opcode,
Type *ValTy,
461 Type *CondTy)
const {
463 std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(ValTy);
467 int ISD = TLI->InstructionOpcodeToISD(Opcode);
468 assert(ISD &&
"Invalid opcode");
499 return LT.first * AVX2CostTbl[Idx].
Cost;
505 return LT.first * AVX1CostTbl[Idx].
Cost;
508 if (
ST->hasSSE42()) {
511 return LT.first * SSE42CostTbl[Idx].
Cost;
517 unsigned X86TTI::getVectorInstrCost(
unsigned Opcode,
Type *Val,
518 unsigned Index)
const {
519 assert(Val->
isVectorTy() &&
"This must be a vector type");
523 std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Val);
526 if (!LT.second.isVector())
530 unsigned Width = LT.second.getVectorNumElements();
531 Index = Index % Width;
541 unsigned X86TTI::getScalarizationOverhead(
Type *Ty,
bool Insert,
542 bool Extract)
const {
543 assert (Ty->
isVectorTy() &&
"Can only scalarize vectors");
548 Cost += TopTTI->getVectorInstrCost(Instruction::InsertElement, Ty, i);
556 unsigned X86TTI::getMemoryOpCost(
unsigned Opcode,
Type *Src,
unsigned Alignment,
559 if (
VectorType *VTy = dyn_cast<VectorType>(Src)) {
560 unsigned NumElem = VTy->getVectorNumElements();
564 if (NumElem == 3 && VTy->getScalarSizeInBits() == 32)
569 if (NumElem == 3 && VTy->getScalarSizeInBits() == 64)
576 VTy->getScalarType(),
579 unsigned SplitCost = getScalarizationOverhead(Src,
582 return NumElem * Cost + SplitCost;
587 std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Src);
592 unsigned Cost = LT.first * 1;
596 if (LT.second.getSizeInBits() > 128 && !
ST->hasAVX2())
602 unsigned X86TTI::getAddressComputationCost(
Type *Ty,
bool IsComplex)
const {
607 unsigned NumVectorInstToHideOverhead = 10;
610 return NumVectorInstToHideOverhead;
615 unsigned X86TTI::getReductionCost(
unsigned Opcode,
Type *ValTy,
616 bool IsPairwise)
const {
618 std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(ValTy);
622 int ISD = TLI->InstructionOpcodeToISD(Opcode);
623 assert(ISD &&
"Invalid opcode");
670 return LT.first * AVX1CostTblPairWise[Idx].
Cost;
673 if (
ST->hasSSE42()) {
676 return LT.first * SSE42CostTblPairWise[Idx].
Cost;
682 return LT.first * AVX1CostTblNoPairWise[Idx].
Cost;
685 if (
ST->hasSSE42()) {
688 return LT.first * SSE42CostTblNoPairWise[Idx].
Cost;
static PassRegistry * getPassRegistry()
Cost tables and simple lookup functions.
int CostTableLookup(const CostTblEntry< TypeTy > *Tbl, unsigned len, int ISD, CompareTy Ty)
Find in cost table, TypeTy must be comparable to CompareTy by ==.
Type Conversion Cost Table.
#define llvm_unreachable(msg)
static ConstantInt * ExtractElement(Constant *V, Constant *Idx)
ID
LLVM Calling Convention Representation.
Simple integer binary arithmetic operators.
bool isFloatingPointTy() const
Simple binary floating point operators.
void initializeX86TTIPass(PassRegistry &)
#define INITIALIZE_AG_PASS(passName, agName, arg, name, cfg, analysis, def)
unsigned getVectorNumElements() const
ZERO_EXTEND - Used for integer types, zeroing the new bits.
const Type * getScalarType() const
ImmutablePass * createX86TargetTransformInfoPass(const X86TargetMachine *TM)
Creates an X86-specific Target Transformation Info pass.
int ConvertCostTableLookup(const TypeConversionCostTblEntry< TypeTy > *Tbl, unsigned len, int ISD, CompareTy Dst, CompareTy Src)
bool isPowerOf2_32(uint32_t Value)
TRUNCATE - Completely drop the high bits.