LLVM API Documentation

 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
X86TargetTransformInfo.cpp
Go to the documentation of this file.
1 //===-- X86TargetTransformInfo.cpp - X86 specific TTI pass ----------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 /// \file
10 /// This file implements a TargetTransformInfo analysis pass specific to the
11 /// X86 target machine. It uses the target's detailed information to provide
12 /// more precise answers to certain TTI queries, while letting the target
13 /// independent and default TTI implementations handle the rest.
14 ///
15 //===----------------------------------------------------------------------===//
16 
17 #define DEBUG_TYPE "x86tti"
18 #include "X86.h"
19 #include "X86TargetMachine.h"
21 #include "llvm/Support/Debug.h"
23 #include "llvm/Target/CostTable.h"
24 using namespace llvm;
25 
26 // Declare the pass initialization routine locally as target-specific passes
27 // don't havve a target-wide initialization entry point, and so we rely on the
28 // pass constructor initialization.
29 namespace llvm {
31 }
32 
33 namespace {
34 
35 class X86TTI : public ImmutablePass, public TargetTransformInfo {
36  const X86Subtarget *ST;
37  const X86TargetLowering *TLI;
38 
39  /// Estimate the overhead of scalarizing an instruction. Insert and Extract
40  /// are set if the result needs to be inserted and/or extracted from vectors.
41  unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) const;
42 
43 public:
44  X86TTI() : ImmutablePass(ID), ST(0), TLI(0) {
45  llvm_unreachable("This pass cannot be directly constructed");
46  }
47 
48  X86TTI(const X86TargetMachine *TM)
49  : ImmutablePass(ID), ST(TM->getSubtargetImpl()),
50  TLI(TM->getTargetLowering()) {
52  }
53 
54  virtual void initializePass() {
55  pushTTIStack(this);
56  }
57 
58  virtual void finalizePass() {
59  popTTIStack();
60  }
61 
62  virtual void getAnalysisUsage(AnalysisUsage &AU) const {
64  }
65 
66  /// Pass identification.
67  static char ID;
68 
69  /// Provide necessary pointer adjustments for the two base classes.
70  virtual void *getAdjustedAnalysisPointer(const void *ID) {
71  if (ID == &TargetTransformInfo::ID)
72  return (TargetTransformInfo*)this;
73  return this;
74  }
75 
76  /// \name Scalar TTI Implementations
77  /// @{
78  virtual PopcntSupportKind getPopcntSupport(unsigned TyWidth) const;
79 
80  /// @}
81 
82  /// \name Vector TTI Implementations
83  /// @{
84 
85  virtual unsigned getNumberOfRegisters(bool Vector) const;
86  virtual unsigned getRegisterBitWidth(bool Vector) const;
87  virtual unsigned getMaximumUnrollFactor() const;
88  virtual unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty,
89  OperandValueKind,
90  OperandValueKind) const;
91  virtual unsigned getShuffleCost(ShuffleKind Kind, Type *Tp,
92  int Index, Type *SubTp) const;
93  virtual unsigned getCastInstrCost(unsigned Opcode, Type *Dst,
94  Type *Src) const;
95  virtual unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
96  Type *CondTy) const;
97  virtual unsigned getVectorInstrCost(unsigned Opcode, Type *Val,
98  unsigned Index) const;
99  virtual unsigned getMemoryOpCost(unsigned Opcode, Type *Src,
100  unsigned Alignment,
101  unsigned AddressSpace) const;
102 
103  virtual unsigned getAddressComputationCost(Type *PtrTy, bool IsComplex) const;
104 
105  virtual unsigned getReductionCost(unsigned Opcode, Type *Ty,
106  bool IsPairwiseForm) const;
107 
108  /// @}
109 };
110 
111 } // end anonymous namespace
112 
113 INITIALIZE_AG_PASS(X86TTI, TargetTransformInfo, "x86tti",
114  "X86 Target Transform Info", true, true, false)
115 char X86TTI::ID = 0;
116 
119  return new X86TTI(TM);
120 }
121 
122 
123 //===----------------------------------------------------------------------===//
124 //
125 // X86 cost model.
126 //
127 //===----------------------------------------------------------------------===//
128 
129 X86TTI::PopcntSupportKind X86TTI::getPopcntSupport(unsigned TyWidth) const {
130  assert(isPowerOf2_32(TyWidth) && "Ty width must be power of 2");
131  // TODO: Currently the __builtin_popcount() implementation using SSE3
132  // instructions is inefficient. Once the problem is fixed, we should
133  // call ST->hasSSE3() instead of ST->hasPOPCNT().
134  return ST->hasPOPCNT() ? PSK_FastHardware : PSK_Software;
135 }
136 
137 unsigned X86TTI::getNumberOfRegisters(bool Vector) const {
138  if (Vector && !ST->hasSSE1())
139  return 0;
140 
141  if (ST->is64Bit())
142  return 16;
143  return 8;
144 }
145 
146 unsigned X86TTI::getRegisterBitWidth(bool Vector) const {
147  if (Vector) {
148  if (ST->hasAVX()) return 256;
149  if (ST->hasSSE1()) return 128;
150  return 0;
151  }
152 
153  if (ST->is64Bit())
154  return 64;
155  return 32;
156 
157 }
158 
159 unsigned X86TTI::getMaximumUnrollFactor() const {
160  if (ST->isAtom())
161  return 1;
162 
163  // Sandybridge and Haswell have multiple execution ports and pipelined
164  // vector units.
165  if (ST->hasAVX())
166  return 4;
167 
168  return 2;
169 }
170 
171 unsigned X86TTI::getArithmeticInstrCost(unsigned Opcode, Type *Ty,
172  OperandValueKind Op1Info,
173  OperandValueKind Op2Info) const {
174  // Legalize the type.
175  std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Ty);
176 
177  int ISD = TLI->InstructionOpcodeToISD(Opcode);
178  assert(ISD && "Invalid opcode");
179 
180  static const CostTblEntry<MVT::SimpleValueType> AVX2CostTable[] = {
181  // Shifts on v4i64/v8i32 on AVX2 is legal even though we declare to
182  // customize them to detect the cases where shift amount is a scalar one.
183  { ISD::SHL, MVT::v4i32, 1 },
184  { ISD::SRL, MVT::v4i32, 1 },
185  { ISD::SRA, MVT::v4i32, 1 },
186  { ISD::SHL, MVT::v8i32, 1 },
187  { ISD::SRL, MVT::v8i32, 1 },
188  { ISD::SRA, MVT::v8i32, 1 },
189  { ISD::SHL, MVT::v2i64, 1 },
190  { ISD::SRL, MVT::v2i64, 1 },
191  { ISD::SHL, MVT::v4i64, 1 },
192  { ISD::SRL, MVT::v4i64, 1 },
193 
194  { ISD::SHL, MVT::v32i8, 42 }, // cmpeqb sequence.
195  { ISD::SHL, MVT::v16i16, 16*10 }, // Scalarized.
196 
197  { ISD::SRL, MVT::v32i8, 32*10 }, // Scalarized.
198  { ISD::SRL, MVT::v16i16, 8*10 }, // Scalarized.
199 
200  { ISD::SRA, MVT::v32i8, 32*10 }, // Scalarized.
201  { ISD::SRA, MVT::v16i16, 16*10 }, // Scalarized.
202  { ISD::SRA, MVT::v4i64, 4*10 }, // Scalarized.
203 
204  // Vectorizing division is a bad idea. See the SSE2 table for more comments.
205  { ISD::SDIV, MVT::v32i8, 32*20 },
206  { ISD::SDIV, MVT::v16i16, 16*20 },
207  { ISD::SDIV, MVT::v8i32, 8*20 },
208  { ISD::SDIV, MVT::v4i64, 4*20 },
209  { ISD::UDIV, MVT::v32i8, 32*20 },
210  { ISD::UDIV, MVT::v16i16, 16*20 },
211  { ISD::UDIV, MVT::v8i32, 8*20 },
212  { ISD::UDIV, MVT::v4i64, 4*20 },
213  };
214 
215  // Look for AVX2 lowering tricks.
216  if (ST->hasAVX2()) {
217  int Idx = CostTableLookup(AVX2CostTable, ISD, LT.second);
218  if (Idx != -1)
219  return LT.first * AVX2CostTable[Idx].Cost;
220  }
221 
223  SSE2UniformConstCostTable[] = {
224  // We don't correctly identify costs of casts because they are marked as
225  // custom.
226  // Constant splats are cheaper for the following instructions.
227  { ISD::SHL, MVT::v16i8, 1 }, // psllw.
228  { ISD::SHL, MVT::v8i16, 1 }, // psllw.
229  { ISD::SHL, MVT::v4i32, 1 }, // pslld
230  { ISD::SHL, MVT::v2i64, 1 }, // psllq.
231 
232  { ISD::SRL, MVT::v16i8, 1 }, // psrlw.
233  { ISD::SRL, MVT::v8i16, 1 }, // psrlw.
234  { ISD::SRL, MVT::v4i32, 1 }, // psrld.
235  { ISD::SRL, MVT::v2i64, 1 }, // psrlq.
236 
237  { ISD::SRA, MVT::v16i8, 4 }, // psrlw, pand, pxor, psubb.
238  { ISD::SRA, MVT::v8i16, 1 }, // psraw.
239  { ISD::SRA, MVT::v4i32, 1 }, // psrad.
240  };
241 
243  ST->hasSSE2()) {
244  int Idx = CostTableLookup(SSE2UniformConstCostTable, ISD, LT.second);
245  if (Idx != -1)
246  return LT.first * SSE2UniformConstCostTable[Idx].Cost;
247  }
248 
249 
250  static const CostTblEntry<MVT::SimpleValueType> SSE2CostTable[] = {
251  // We don't correctly identify costs of casts because they are marked as
252  // custom.
253  // For some cases, where the shift amount is a scalar we would be able
254  // to generate better code. Unfortunately, when this is the case the value
255  // (the splat) will get hoisted out of the loop, thereby making it invisible
256  // to ISel. The cost model must return worst case assumptions because it is
257  // used for vectorization and we don't want to make vectorized code worse
258  // than scalar code.
259  { ISD::SHL, MVT::v16i8, 30 }, // cmpeqb sequence.
260  { ISD::SHL, MVT::v8i16, 8*10 }, // Scalarized.
261  { ISD::SHL, MVT::v4i32, 2*5 }, // We optimized this using mul.
262  { ISD::SHL, MVT::v2i64, 2*10 }, // Scalarized.
263 
264  { ISD::SRL, MVT::v16i8, 16*10 }, // Scalarized.
265  { ISD::SRL, MVT::v8i16, 8*10 }, // Scalarized.
266  { ISD::SRL, MVT::v4i32, 4*10 }, // Scalarized.
267  { ISD::SRL, MVT::v2i64, 2*10 }, // Scalarized.
268 
269  { ISD::SRA, MVT::v16i8, 16*10 }, // Scalarized.
270  { ISD::SRA, MVT::v8i16, 8*10 }, // Scalarized.
271  { ISD::SRA, MVT::v4i32, 4*10 }, // Scalarized.
272  { ISD::SRA, MVT::v2i64, 2*10 }, // Scalarized.
273 
274  // It is not a good idea to vectorize division. We have to scalarize it and
275  // in the process we will often end up having to spilling regular
276  // registers. The overhead of division is going to dominate most kernels
277  // anyways so try hard to prevent vectorization of division - it is
278  // generally a bad idea. Assume somewhat arbitrarily that we have to be able
279  // to hide "20 cycles" for each lane.
280  { ISD::SDIV, MVT::v16i8, 16*20 },
281  { ISD::SDIV, MVT::v8i16, 8*20 },
282  { ISD::SDIV, MVT::v4i32, 4*20 },
283  { ISD::SDIV, MVT::v2i64, 2*20 },
284  { ISD::UDIV, MVT::v16i8, 16*20 },
285  { ISD::UDIV, MVT::v8i16, 8*20 },
286  { ISD::UDIV, MVT::v4i32, 4*20 },
287  { ISD::UDIV, MVT::v2i64, 2*20 },
288  };
289 
290  if (ST->hasSSE2()) {
291  int Idx = CostTableLookup(SSE2CostTable, ISD, LT.second);
292  if (Idx != -1)
293  return LT.first * SSE2CostTable[Idx].Cost;
294  }
295 
296  static const CostTblEntry<MVT::SimpleValueType> AVX1CostTable[] = {
297  // We don't have to scalarize unsupported ops. We can issue two half-sized
298  // operations and we only need to extract the upper YMM half.
299  // Two ops + 1 extract + 1 insert = 4.
300  { ISD::MUL, MVT::v8i32, 4 },
301  { ISD::SUB, MVT::v8i32, 4 },
302  { ISD::ADD, MVT::v8i32, 4 },
303  { ISD::SUB, MVT::v4i64, 4 },
304  { ISD::ADD, MVT::v4i64, 4 },
305  // A v4i64 multiply is custom lowered as two split v2i64 vectors that then
306  // are lowered as a series of long multiplies(3), shifts(4) and adds(2)
307  // Because we believe v4i64 to be a legal type, we must also include the
308  // split factor of two in the cost table. Therefore, the cost here is 18
309  // instead of 9.
310  { ISD::MUL, MVT::v4i64, 18 },
311  };
312 
313  // Look for AVX1 lowering tricks.
314  if (ST->hasAVX() && !ST->hasAVX2()) {
315  int Idx = CostTableLookup(AVX1CostTable, ISD, LT.second);
316  if (Idx != -1)
317  return LT.first * AVX1CostTable[Idx].Cost;
318  }
319 
320  // Custom lowering of vectors.
321  static const CostTblEntry<MVT::SimpleValueType> CustomLowered[] = {
322  // A v2i64/v4i64 and multiply is custom lowered as a series of long
323  // multiplies(3), shifts(4) and adds(2).
324  { ISD::MUL, MVT::v2i64, 9 },
325  { ISD::MUL, MVT::v4i64, 9 },
326  };
327  int Idx = CostTableLookup(CustomLowered, ISD, LT.second);
328  if (Idx != -1)
329  return LT.first * CustomLowered[Idx].Cost;
330 
331  // Special lowering of v4i32 mul on sse2, sse3: Lower v4i32 mul as 2x shuffle,
332  // 2x pmuludq, 2x shuffle.
333  if (ISD == ISD::MUL && LT.second == MVT::v4i32 && ST->hasSSE2() &&
334  !ST->hasSSE41())
335  return 6;
336 
337  // Fallback to the default implementation.
338  return TargetTransformInfo::getArithmeticInstrCost(Opcode, Ty, Op1Info,
339  Op2Info);
340 }
341 
342 unsigned X86TTI::getShuffleCost(ShuffleKind Kind, Type *Tp, int Index,
343  Type *SubTp) const {
344  // We only estimate the cost of reverse shuffles.
345  if (Kind != SK_Reverse)
346  return TargetTransformInfo::getShuffleCost(Kind, Tp, Index, SubTp);
347 
348  std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Tp);
349  unsigned Cost = 1;
350  if (LT.second.getSizeInBits() > 128)
351  Cost = 3; // Extract + insert + copy.
352 
353  // Multiple by the number of parts.
354  return Cost * LT.first;
355 }
356 
357 unsigned X86TTI::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) const {
358  int ISD = TLI->InstructionOpcodeToISD(Opcode);
359  assert(ISD && "Invalid opcode");
360 
361  std::pair<unsigned, MVT> LTSrc = TLI->getTypeLegalizationCost(Src);
362  std::pair<unsigned, MVT> LTDest = TLI->getTypeLegalizationCost(Dst);
363 
365  SSE2ConvTbl[] = {
366  // These are somewhat magic numbers justified by looking at the output of
367  // Intel's IACA, running some kernels and making sure when we take
368  // legalization into account the throughput will be overestimated.
377  // There are faster sequences for float conversions.
386  };
387 
388  if (ST->hasSSE2() && !ST->hasAVX()) {
389  int Idx =
390  ConvertCostTableLookup(SSE2ConvTbl, ISD, LTDest.second, LTSrc.second);
391  if (Idx != -1)
392  return LTSrc.first * SSE2ConvTbl[Idx].Cost;
393  }
394 
395  EVT SrcTy = TLI->getValueType(Src);
396  EVT DstTy = TLI->getValueType(Dst);
397 
398  // The function getSimpleVT only handles simple value types.
399  if (!SrcTy.isSimple() || !DstTy.isSimple())
400  return TargetTransformInfo::getCastInstrCost(Opcode, Dst, Src);
401 
403  AVXConversionTbl[] = {
413 
426 
439 
448  };
449 
450  if (ST->hasAVX()) {
451  int Idx = ConvertCostTableLookup(AVXConversionTbl, ISD, DstTy.getSimpleVT(),
452  SrcTy.getSimpleVT());
453  if (Idx != -1)
454  return AVXConversionTbl[Idx].Cost;
455  }
456 
457  return TargetTransformInfo::getCastInstrCost(Opcode, Dst, Src);
458 }
459 
460 unsigned X86TTI::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
461  Type *CondTy) const {
462  // Legalize the type.
463  std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(ValTy);
464 
465  MVT MTy = LT.second;
466 
467  int ISD = TLI->InstructionOpcodeToISD(Opcode);
468  assert(ISD && "Invalid opcode");
469 
470  static const CostTblEntry<MVT::SimpleValueType> SSE42CostTbl[] = {
471  { ISD::SETCC, MVT::v2f64, 1 },
472  { ISD::SETCC, MVT::v4f32, 1 },
473  { ISD::SETCC, MVT::v2i64, 1 },
474  { ISD::SETCC, MVT::v4i32, 1 },
475  { ISD::SETCC, MVT::v8i16, 1 },
476  { ISD::SETCC, MVT::v16i8, 1 },
477  };
478 
479  static const CostTblEntry<MVT::SimpleValueType> AVX1CostTbl[] = {
480  { ISD::SETCC, MVT::v4f64, 1 },
481  { ISD::SETCC, MVT::v8f32, 1 },
482  // AVX1 does not support 8-wide integer compare.
483  { ISD::SETCC, MVT::v4i64, 4 },
484  { ISD::SETCC, MVT::v8i32, 4 },
485  { ISD::SETCC, MVT::v16i16, 4 },
486  { ISD::SETCC, MVT::v32i8, 4 },
487  };
488 
489  static const CostTblEntry<MVT::SimpleValueType> AVX2CostTbl[] = {
490  { ISD::SETCC, MVT::v4i64, 1 },
491  { ISD::SETCC, MVT::v8i32, 1 },
492  { ISD::SETCC, MVT::v16i16, 1 },
493  { ISD::SETCC, MVT::v32i8, 1 },
494  };
495 
496  if (ST->hasAVX2()) {
497  int Idx = CostTableLookup(AVX2CostTbl, ISD, MTy);
498  if (Idx != -1)
499  return LT.first * AVX2CostTbl[Idx].Cost;
500  }
501 
502  if (ST->hasAVX()) {
503  int Idx = CostTableLookup(AVX1CostTbl, ISD, MTy);
504  if (Idx != -1)
505  return LT.first * AVX1CostTbl[Idx].Cost;
506  }
507 
508  if (ST->hasSSE42()) {
509  int Idx = CostTableLookup(SSE42CostTbl, ISD, MTy);
510  if (Idx != -1)
511  return LT.first * SSE42CostTbl[Idx].Cost;
512  }
513 
514  return TargetTransformInfo::getCmpSelInstrCost(Opcode, ValTy, CondTy);
515 }
516 
517 unsigned X86TTI::getVectorInstrCost(unsigned Opcode, Type *Val,
518  unsigned Index) const {
519  assert(Val->isVectorTy() && "This must be a vector type");
520 
521  if (Index != -1U) {
522  // Legalize the type.
523  std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Val);
524 
525  // This type is legalized to a scalar type.
526  if (!LT.second.isVector())
527  return 0;
528 
529  // The type may be split. Normalize the index to the new type.
530  unsigned Width = LT.second.getVectorNumElements();
531  Index = Index % Width;
532 
533  // Floating point scalars are already located in index #0.
534  if (Val->getScalarType()->isFloatingPointTy() && Index == 0)
535  return 0;
536  }
537 
538  return TargetTransformInfo::getVectorInstrCost(Opcode, Val, Index);
539 }
540 
541 unsigned X86TTI::getScalarizationOverhead(Type *Ty, bool Insert,
542  bool Extract) const {
543  assert (Ty->isVectorTy() && "Can only scalarize vectors");
544  unsigned Cost = 0;
545 
546  for (int i = 0, e = Ty->getVectorNumElements(); i < e; ++i) {
547  if (Insert)
548  Cost += TopTTI->getVectorInstrCost(Instruction::InsertElement, Ty, i);
549  if (Extract)
550  Cost += TopTTI->getVectorInstrCost(Instruction::ExtractElement, Ty, i);
551  }
552 
553  return Cost;
554 }
555 
556 unsigned X86TTI::getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
557  unsigned AddressSpace) const {
558  // Handle non power of two vectors such as <3 x float>
559  if (VectorType *VTy = dyn_cast<VectorType>(Src)) {
560  unsigned NumElem = VTy->getVectorNumElements();
561 
562  // Handle a few common cases:
563  // <3 x float>
564  if (NumElem == 3 && VTy->getScalarSizeInBits() == 32)
565  // Cost = 64 bit store + extract + 32 bit store.
566  return 3;
567 
568  // <3 x double>
569  if (NumElem == 3 && VTy->getScalarSizeInBits() == 64)
570  // Cost = 128 bit store + unpack + 64 bit store.
571  return 3;
572 
573  // Assume that all other non power-of-two numbers are scalarized.
574  if (!isPowerOf2_32(NumElem)) {
575  unsigned Cost = TargetTransformInfo::getMemoryOpCost(Opcode,
576  VTy->getScalarType(),
577  Alignment,
578  AddressSpace);
579  unsigned SplitCost = getScalarizationOverhead(Src,
580  Opcode == Instruction::Load,
581  Opcode==Instruction::Store);
582  return NumElem * Cost + SplitCost;
583  }
584  }
585 
586  // Legalize the type.
587  std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Src);
588  assert((Opcode == Instruction::Load || Opcode == Instruction::Store) &&
589  "Invalid Opcode");
590 
591  // Each load/store unit costs 1.
592  unsigned Cost = LT.first * 1;
593 
594  // On Sandybridge 256bit load/stores are double pumped
595  // (but not on Haswell).
596  if (LT.second.getSizeInBits() > 128 && !ST->hasAVX2())
597  Cost*=2;
598 
599  return Cost;
600 }
601 
602 unsigned X86TTI::getAddressComputationCost(Type *Ty, bool IsComplex) const {
603  // Address computations in vectorized code with non-consecutive addresses will
604  // likely result in more instructions compared to scalar code where the
605  // computation can more often be merged into the index mode. The resulting
606  // extra micro-ops can significantly decrease throughput.
607  unsigned NumVectorInstToHideOverhead = 10;
608 
609  if (Ty->isVectorTy() && IsComplex)
610  return NumVectorInstToHideOverhead;
611 
613 }
614 
615 unsigned X86TTI::getReductionCost(unsigned Opcode, Type *ValTy,
616  bool IsPairwise) const {
617 
618  std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(ValTy);
619 
620  MVT MTy = LT.second;
621 
622  int ISD = TLI->InstructionOpcodeToISD(Opcode);
623  assert(ISD && "Invalid opcode");
624 
625  // We use the Intel Architecture Code Analyzer(IACA) to measure the throughput
626  // and make it as the cost.
627 
628  static const CostTblEntry<MVT::SimpleValueType> SSE42CostTblPairWise[] = {
629  { ISD::FADD, MVT::v2f64, 2 },
630  { ISD::FADD, MVT::v4f32, 4 },
631  { ISD::ADD, MVT::v2i64, 2 }, // The data reported by the IACA tool is "1.6".
632  { ISD::ADD, MVT::v4i32, 3 }, // The data reported by the IACA tool is "3.5".
633  { ISD::ADD, MVT::v8i16, 5 },
634  };
635 
636  static const CostTblEntry<MVT::SimpleValueType> AVX1CostTblPairWise[] = {
637  { ISD::FADD, MVT::v4f32, 4 },
638  { ISD::FADD, MVT::v4f64, 5 },
639  { ISD::FADD, MVT::v8f32, 7 },
640  { ISD::ADD, MVT::v2i64, 1 }, // The data reported by the IACA tool is "1.5".
641  { ISD::ADD, MVT::v4i32, 3 }, // The data reported by the IACA tool is "3.5".
642  { ISD::ADD, MVT::v4i64, 5 }, // The data reported by the IACA tool is "4.8".
643  { ISD::ADD, MVT::v8i16, 5 },
644  { ISD::ADD, MVT::v8i32, 5 },
645  };
646 
647  static const CostTblEntry<MVT::SimpleValueType> SSE42CostTblNoPairWise[] = {
648  { ISD::FADD, MVT::v2f64, 2 },
649  { ISD::FADD, MVT::v4f32, 4 },
650  { ISD::ADD, MVT::v2i64, 2 }, // The data reported by the IACA tool is "1.6".
651  { ISD::ADD, MVT::v4i32, 3 }, // The data reported by the IACA tool is "3.3".
652  { ISD::ADD, MVT::v8i16, 4 }, // The data reported by the IACA tool is "4.3".
653  };
654 
655  static const CostTblEntry<MVT::SimpleValueType> AVX1CostTblNoPairWise[] = {
656  { ISD::FADD, MVT::v4f32, 3 },
657  { ISD::FADD, MVT::v4f64, 3 },
658  { ISD::FADD, MVT::v8f32, 4 },
659  { ISD::ADD, MVT::v2i64, 1 }, // The data reported by the IACA tool is "1.5".
660  { ISD::ADD, MVT::v4i32, 3 }, // The data reported by the IACA tool is "2.8".
661  { ISD::ADD, MVT::v4i64, 3 },
662  { ISD::ADD, MVT::v8i16, 4 },
663  { ISD::ADD, MVT::v8i32, 5 },
664  };
665 
666  if (IsPairwise) {
667  if (ST->hasAVX()) {
668  int Idx = CostTableLookup(AVX1CostTblPairWise, ISD, MTy);
669  if (Idx != -1)
670  return LT.first * AVX1CostTblPairWise[Idx].Cost;
671  }
672 
673  if (ST->hasSSE42()) {
674  int Idx = CostTableLookup(SSE42CostTblPairWise, ISD, MTy);
675  if (Idx != -1)
676  return LT.first * SSE42CostTblPairWise[Idx].Cost;
677  }
678  } else {
679  if (ST->hasAVX()) {
680  int Idx = CostTableLookup(AVX1CostTblNoPairWise, ISD, MTy);
681  if (Idx != -1)
682  return LT.first * AVX1CostTblNoPairWise[Idx].Cost;
683  }
684 
685  if (ST->hasSSE42()) {
686  int Idx = CostTableLookup(SSE42CostTblNoPairWise, ISD, MTy);
687  if (Idx != -1)
688  return LT.first * SSE42CostTblNoPairWise[Idx].Cost;
689  }
690  }
691 
692  return TargetTransformInfo::getReductionCost(Opcode, ValTy, IsPairwise);
693 }
694 
static PassRegistry * getPassRegistry()
Cost tables and simple lookup functions.
int CostTableLookup(const CostTblEntry< TypeTy > *Tbl, unsigned len, int ISD, CompareTy Ty)
Find in cost table, TypeTy must be comparable to CompareTy by ==.
Definition: CostTable.h:30
Type Conversion Cost Table.
Definition: CostTable.h:49
Cost Table Entry.
Definition: CostTable.h:22
#define llvm_unreachable(msg)
virtual unsigned getAddressComputationCost(Type *Ty, bool IsComplex=false) const
static ConstantInt * ExtractElement(Constant *V, Constant *Idx)
ID
LLVM Calling Convention Representation.
Definition: CallingConv.h:26
Simple integer binary arithmetic operators.
Definition: ISDOpcodes.h:176
bool isFloatingPointTy() const
Definition: Type.h:162
virtual unsigned getShuffleCost(ShuffleKind Kind, Type *Tp, int Index=0, Type *SubTp=0) const
Simple binary floating point operators.
Definition: ISDOpcodes.h:222
bool isVectorTy() const
Definition: Type.h:229
virtual unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy=0) const
void initializeX86TTIPass(PassRegistry &)
static char ID
Analysis group identification.
virtual unsigned getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) const
#define INITIALIZE_AG_PASS(passName, agName, arg, name, cfg, analysis, def)
Definition: PassSupport.h:268
virtual unsigned getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, unsigned AddressSpace) const
unsigned getVectorNumElements() const
Definition: Type.cpp:214
AddressSpace
Definition: NVPTXBaseInfo.h:22
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition: ISDOpcodes.h:357
virtual unsigned getReductionCost(unsigned Opcode, Type *Ty, bool IsPairwiseForm) const
Calculate the cost of performing a vector reduction.
virtual void getAnalysisUsage(AnalysisUsage &AU) const
All pass subclasses must call TargetTransformInfo::getAnalysisUsage.
const Type * getScalarType() const
Definition: Type.cpp:51
ImmutablePass * createX86TargetTransformInfoPass(const X86TargetMachine *TM)
Creates an X86-specific Target Transformation Info pass.
bool isSimple() const
Definition: ValueTypes.h:640
int ConvertCostTableLookup(const TypeConversionCostTblEntry< TypeTy > *Tbl, unsigned len, int ISD, CompareTy Dst, CompareTy Src)
Definition: CostTable.h:59
virtual unsigned getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index=-1) const
bool isPowerOf2_32(uint32_t Value)
Definition: MathExtras.h:354
virtual unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty, OperandValueKind Opd1Info=OK_AnyValue, OperandValueKind Opd2Info=OK_AnyValue) const
TRUNCATE - Completely drop the high bits.
Definition: ISDOpcodes.h:363
MVT getSimpleVT() const
Definition: ValueTypes.h:749