LLVM API Documentation

 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
AMDGPUISelLowering.cpp
Go to the documentation of this file.
1 //===-- AMDGPUISelLowering.cpp - AMDGPU Common DAG lowering functions -----===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 /// \file
11 /// \brief This is the parent TargetLowering class for hardware code gen
12 /// targets.
13 //
14 //===----------------------------------------------------------------------===//
15 
16 #include "AMDGPUISelLowering.h"
17 #include "AMDGPU.h"
18 #include "AMDGPUFrameLowering.h"
19 #include "AMDGPURegisterInfo.h"
20 #include "AMDGPUSubtarget.h"
21 #include "AMDILIntrinsicInfo.h"
23 #include "SIMachineFunctionInfo.h"
29 #include "llvm/IR/DataLayout.h"
30 
31 using namespace llvm;
32 static bool allocateStack(unsigned ValNo, MVT ValVT, MVT LocVT,
33  CCValAssign::LocInfo LocInfo,
34  ISD::ArgFlagsTy ArgFlags, CCState &State) {
35  unsigned Offset = State.AllocateStack(ValVT.getSizeInBits() / 8, ArgFlags.getOrigAlign());
36  State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
37 
38  return true;
39 }
40 
41 #include "AMDGPUGenCallingConv.inc"
42 
45 
46  // Initialize target lowering borrowed from AMDIL
47  InitAMDILLowering();
48 
49  // We need to custom lower some of the intrinsics
51 
52  // Library functions. These default to Expand, but we have instructions
53  // for them.
61 
62  // The hardware supports ROTR, but not ROTL
64 
65  // Lower floating point store/load to integer store/load to reduce the number
66  // of patterns in tablegen.
69 
72 
75 
78 
81 
84 
85  // Custom lowering of vector stores is required for local address space
86  // stores.
88  // XXX: Native v2i32 local address space stores are possible, but not
89  // currently implemented.
91 
95  // XXX: This can be change to Custom, once ExpandVectorStores can
96  // handle 64-bit stores.
98 
101 
104 
107 
110 
113 
116 
121 
134 
137 
139 
141 
147 
148  static const MVT::SimpleValueType IntTypes[] = {
150  };
151  const size_t NumIntTypes = array_lengthof(IntTypes);
152 
153  for (unsigned int x = 0; x < NumIntTypes; ++x) {
154  MVT::SimpleValueType VT = IntTypes[x];
155  //Expand the following operations for the current type by default
172  }
173 
174  static const MVT::SimpleValueType FloatTypes[] = {
176  };
177  const size_t NumFloatTypes = array_lengthof(FloatTypes);
178 
179  for (unsigned int x = 0; x < NumFloatTypes; ++x) {
180  MVT::SimpleValueType VT = FloatTypes[x];
188  }
189 }
190 
191 //===----------------------------------------------------------------------===//
192 // Target Information
193 //===----------------------------------------------------------------------===//
194 
196  return MVT::i32;
197 }
198 
200  EVT CastTy) const {
201  if (LoadTy.getSizeInBits() != CastTy.getSizeInBits())
202  return true;
203 
204  unsigned LScalarSize = LoadTy.getScalarType().getSizeInBits();
205  unsigned CastScalarSize = CastTy.getScalarType().getSizeInBits();
206 
207  return ((LScalarSize <= CastScalarSize) ||
208  (CastScalarSize >= 32) ||
209  (LScalarSize < 32));
210 }
211 
212 //===---------------------------------------------------------------------===//
213 // Target Properties
214 //===---------------------------------------------------------------------===//
215 
217  assert(VT.isFloatingPoint());
218  return VT == MVT::f32;
219 }
220 
222  assert(VT.isFloatingPoint());
223  return VT == MVT::f32;
224 }
225 
226 //===---------------------------------------------------------------------===//
227 // TargetLowering Callbacks
228 //===---------------------------------------------------------------------===//
229 
231  const SmallVectorImpl<ISD::InputArg> &Ins) const {
232 
233  State.AnalyzeFormalArguments(Ins, CC_AMDGPU);
234 }
235 
237  SDValue Chain,
238  CallingConv::ID CallConv,
239  bool isVarArg,
241  const SmallVectorImpl<SDValue> &OutVals,
242  SDLoc DL, SelectionDAG &DAG) const {
243  return DAG.getNode(AMDGPUISD::RET_FLAG, DL, MVT::Other, Chain);
244 }
245 
246 //===---------------------------------------------------------------------===//
247 // Target specific lowering
248 //===---------------------------------------------------------------------===//
249 
251  const {
252  switch (Op.getOpcode()) {
253  default:
254  Op.getNode()->dump();
255  assert(0 && "Custom lowering code for this"
256  "instruction is not implemented yet!");
257  break;
258  // AMDIL DAG lowering
259  case ISD::SDIV: return LowerSDIV(Op, DAG);
260  case ISD::SREM: return LowerSREM(Op, DAG);
261  case ISD::SIGN_EXTEND_INREG: return LowerSIGN_EXTEND_INREG(Op, DAG);
262  case ISD::BRCOND: return LowerBRCOND(Op, DAG);
263  // AMDGPU DAG lowering
264  case ISD::CONCAT_VECTORS: return LowerCONCAT_VECTORS(Op, DAG);
265  case ISD::EXTRACT_SUBVECTOR: return LowerEXTRACT_SUBVECTOR(Op, DAG);
266  case ISD::FrameIndex: return LowerFrameIndex(Op, DAG);
267  case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG);
268  case ISD::UDIVREM: return LowerUDIVREM(Op, DAG);
269  case ISD::UINT_TO_FP: return LowerUINT_TO_FP(Op, DAG);
270  }
271  return Op;
272 }
273 
275  SDValue Op,
276  SelectionDAG &DAG) const {
277 
279  GlobalAddressSDNode *G = cast<GlobalAddressSDNode>(Op);
280 
282  // XXX: What does the value of G->getOffset() mean?
283  assert(G->getOffset() == 0 &&
284  "Do not know what to do with an non-zero offset");
285 
286  const GlobalValue *GV = G->getGlobal();
287 
288  unsigned Offset;
289  if (MFI->LocalMemoryObjects.count(GV) == 0) {
290  uint64_t Size = TD->getTypeAllocSize(GV->getType()->getElementType());
291  Offset = MFI->LDSSize;
292  MFI->LocalMemoryObjects[GV] = Offset;
293  // XXX: Account for alignment?
294  MFI->LDSSize += Size;
295  } else {
296  Offset = MFI->LocalMemoryObjects[GV];
297  }
298 
299  return DAG.getConstant(Offset, getPointerTy(G->getAddressSpace()));
300 }
301 
302 void AMDGPUTargetLowering::ExtractVectorElements(SDValue Op, SelectionDAG &DAG,
304  unsigned Start,
305  unsigned Count) const {
306  EVT VT = Op.getValueType();
307  for (unsigned i = Start, e = Start + Count; i != e; ++i) {
310  Op, DAG.getConstant(i, MVT::i32)));
311  }
312 }
313 
314 SDValue AMDGPUTargetLowering::LowerCONCAT_VECTORS(SDValue Op,
315  SelectionDAG &DAG) const {
317  SDValue A = Op.getOperand(0);
318  SDValue B = Op.getOperand(1);
319 
320  ExtractVectorElements(A, DAG, Args, 0,
322  ExtractVectorElements(B, DAG, Args, 0,
324 
325  return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(Op), Op.getValueType(),
326  &Args[0], Args.size());
327 }
328 
329 SDValue AMDGPUTargetLowering::LowerEXTRACT_SUBVECTOR(SDValue Op,
330  SelectionDAG &DAG) const {
331 
333  EVT VT = Op.getValueType();
334  unsigned Start = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
335  ExtractVectorElements(Op.getOperand(0), DAG, Args, Start,
336  VT.getVectorNumElements());
337 
338  return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(Op), Op.getValueType(),
339  &Args[0], Args.size());
340 }
341 
342 SDValue AMDGPUTargetLowering::LowerFrameIndex(SDValue Op,
343  SelectionDAG &DAG) const {
344 
346  const AMDGPUFrameLowering *TFL =
347  static_cast<const AMDGPUFrameLowering*>(getTargetMachine().getFrameLowering());
348 
350  assert(FIN);
351 
352  unsigned FrameIndex = FIN->getIndex();
353  unsigned Offset = TFL->getFrameIndexOffset(MF, FrameIndex);
354  return DAG.getConstant(Offset * 4 * TFL->getStackWidth(MF),
355  Op.getValueType());
356 }
357 
358 SDValue AMDGPUTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
359  SelectionDAG &DAG) const {
360  unsigned IntrinsicID = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
361  SDLoc DL(Op);
362  EVT VT = Op.getValueType();
363 
364  switch (IntrinsicID) {
365  default: return Op;
366  case AMDGPUIntrinsic::AMDIL_abs:
367  return LowerIntrinsicIABS(Op, DAG);
368  case AMDGPUIntrinsic::AMDIL_exp:
369  return DAG.getNode(ISD::FEXP2, DL, VT, Op.getOperand(1));
370  case AMDGPUIntrinsic::AMDGPU_lrp:
371  return LowerIntrinsicLRP(Op, DAG);
372  case AMDGPUIntrinsic::AMDIL_fraction:
373  return DAG.getNode(AMDGPUISD::FRACT, DL, VT, Op.getOperand(1));
374  case AMDGPUIntrinsic::AMDIL_max:
375  return DAG.getNode(AMDGPUISD::FMAX, DL, VT, Op.getOperand(1),
376  Op.getOperand(2));
377  case AMDGPUIntrinsic::AMDGPU_imax:
378  return DAG.getNode(AMDGPUISD::SMAX, DL, VT, Op.getOperand(1),
379  Op.getOperand(2));
380  case AMDGPUIntrinsic::AMDGPU_umax:
381  return DAG.getNode(AMDGPUISD::UMAX, DL, VT, Op.getOperand(1),
382  Op.getOperand(2));
383  case AMDGPUIntrinsic::AMDIL_min:
384  return DAG.getNode(AMDGPUISD::FMIN, DL, VT, Op.getOperand(1),
385  Op.getOperand(2));
386  case AMDGPUIntrinsic::AMDGPU_imin:
387  return DAG.getNode(AMDGPUISD::SMIN, DL, VT, Op.getOperand(1),
388  Op.getOperand(2));
389  case AMDGPUIntrinsic::AMDGPU_umin:
390  return DAG.getNode(AMDGPUISD::UMIN, DL, VT, Op.getOperand(1),
391  Op.getOperand(2));
392  case AMDGPUIntrinsic::AMDIL_round_nearest:
393  return DAG.getNode(ISD::FRINT, DL, VT, Op.getOperand(1));
394  }
395 }
396 
397 ///IABS(a) = SMAX(sub(0, a), a)
399  SelectionDAG &DAG) const {
400 
401  SDLoc DL(Op);
402  EVT VT = Op.getValueType();
403  SDValue Neg = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, VT),
404  Op.getOperand(1));
405 
406  return DAG.getNode(AMDGPUISD::SMAX, DL, VT, Neg, Op.getOperand(1));
407 }
408 
409 /// Linear Interpolation
410 /// LRP(a, b, c) = muladd(a, b, (1 - a) * c)
412  SelectionDAG &DAG) const {
413  SDLoc DL(Op);
414  EVT VT = Op.getValueType();
415  SDValue OneSubA = DAG.getNode(ISD::FSUB, DL, VT,
416  DAG.getConstantFP(1.0f, MVT::f32),
417  Op.getOperand(1));
418  SDValue OneSubAC = DAG.getNode(ISD::FMUL, DL, VT, OneSubA,
419  Op.getOperand(3));
420  return DAG.getNode(ISD::FADD, DL, VT,
421  DAG.getNode(ISD::FMUL, DL, VT, Op.getOperand(1), Op.getOperand(2)),
422  OneSubAC);
423 }
424 
425 /// \brief Generate Min/Max node
427  SelectionDAG &DAG) const {
428  SDLoc DL(Op);
429  EVT VT = Op.getValueType();
430 
431  SDValue LHS = Op.getOperand(0);
432  SDValue RHS = Op.getOperand(1);
433  SDValue True = Op.getOperand(2);
434  SDValue False = Op.getOperand(3);
435  SDValue CC = Op.getOperand(4);
436 
437  if (VT != MVT::f32 ||
438  !((LHS == True && RHS == False) || (LHS == False && RHS == True))) {
439  return SDValue();
440  }
441 
442  ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
443  switch (CCOpcode) {
444  case ISD::SETOEQ:
445  case ISD::SETONE:
446  case ISD::SETUNE:
447  case ISD::SETNE:
448  case ISD::SETUEQ:
449  case ISD::SETEQ:
450  case ISD::SETFALSE:
451  case ISD::SETFALSE2:
452  case ISD::SETTRUE:
453  case ISD::SETTRUE2:
454  case ISD::SETUO:
455  case ISD::SETO:
456  assert(0 && "Operation should already be optimised !");
457  case ISD::SETULE:
458  case ISD::SETULT:
459  case ISD::SETOLE:
460  case ISD::SETOLT:
461  case ISD::SETLE:
462  case ISD::SETLT: {
463  if (LHS == True)
464  return DAG.getNode(AMDGPUISD::FMIN, DL, VT, LHS, RHS);
465  else
466  return DAG.getNode(AMDGPUISD::FMAX, DL, VT, LHS, RHS);
467  }
468  case ISD::SETGT:
469  case ISD::SETGE:
470  case ISD::SETUGE:
471  case ISD::SETOGE:
472  case ISD::SETUGT:
473  case ISD::SETOGT: {
474  if (LHS == True)
475  return DAG.getNode(AMDGPUISD::FMAX, DL, VT, LHS, RHS);
476  else
477  return DAG.getNode(AMDGPUISD::FMIN, DL, VT, LHS, RHS);
478  }
479  case ISD::SETCC_INVALID:
480  assert(0 && "Invalid setcc condcode !");
481  }
482  return Op;
483 }
484 
486  SelectionDAG &DAG) const {
488  EVT MemEltVT = Load->getMemoryVT().getVectorElementType();
489  EVT EltVT = Op.getValueType().getVectorElementType();
490  EVT PtrVT = Load->getBasePtr().getValueType();
491  unsigned NumElts = Load->getMemoryVT().getVectorNumElements();
493  SDLoc SL(Op);
494 
495  for (unsigned i = 0, e = NumElts; i != e; ++i) {
496  SDValue Ptr = DAG.getNode(ISD::ADD, SL, PtrVT, Load->getBasePtr(),
497  DAG.getConstant(i * (MemEltVT.getSizeInBits() / 8), PtrVT));
498  Loads.push_back(DAG.getExtLoad(Load->getExtensionType(), SL, EltVT,
499  Load->getChain(), Ptr,
501  MemEltVT, Load->isVolatile(), Load->isNonTemporal(),
502  Load->getAlignment()));
503  }
504  return DAG.getNode(ISD::BUILD_VECTOR, SL, Op.getValueType(), &Loads[0],
505  Loads.size());
506 }
507 
508 SDValue AMDGPUTargetLowering::MergeVectorStore(const SDValue &Op,
509  SelectionDAG &DAG) const {
511  EVT MemVT = Store->getMemoryVT();
512  unsigned MemBits = MemVT.getSizeInBits();
513 
514  // Byte stores are really expensive, so if possible, try to pack
515  // 32-bit vector truncatating store into an i32 store.
516  // XXX: We could also handle optimize other vector bitwidths
517  if (!MemVT.isVector() || MemBits > 32) {
518  return SDValue();
519  }
520 
521  SDLoc DL(Op);
522  const SDValue &Value = Store->getValue();
523  EVT VT = Value.getValueType();
524  const SDValue &Ptr = Store->getBasePtr();
525  EVT MemEltVT = MemVT.getVectorElementType();
526  unsigned MemEltBits = MemEltVT.getSizeInBits();
527  unsigned MemNumElements = MemVT.getVectorNumElements();
528  EVT PackedVT = EVT::getIntegerVT(*DAG.getContext(), MemVT.getSizeInBits());
529  SDValue Mask;
530  switch(MemEltBits) {
531  case 8:
532  Mask = DAG.getConstant(0xFF, PackedVT);
533  break;
534  case 16:
535  Mask = DAG.getConstant(0xFFFF, PackedVT);
536  break;
537  default:
538  llvm_unreachable("Cannot lower this vector store");
539  }
540  SDValue PackedValue;
541  for (unsigned i = 0; i < MemNumElements; ++i) {
542  EVT ElemVT = VT.getVectorElementType();
543  SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ElemVT, Value,
544  DAG.getConstant(i, MVT::i32));
545  Elt = DAG.getZExtOrTrunc(Elt, DL, PackedVT);
546  Elt = DAG.getNode(ISD::AND, DL, PackedVT, Elt, Mask);
547  SDValue Shift = DAG.getConstant(MemEltBits * i, PackedVT);
548  Elt = DAG.getNode(ISD::SHL, DL, PackedVT, Elt, Shift);
549  if (i == 0) {
550  PackedValue = Elt;
551  } else {
552  PackedValue = DAG.getNode(ISD::OR, DL, PackedVT, PackedValue, Elt);
553  }
554  }
555  return DAG.getStore(Store->getChain(), DL, PackedValue, Ptr,
557  Store->isVolatile(), Store->isNonTemporal(),
558  Store->getAlignment());
559 }
560 
562  SelectionDAG &DAG) const {
563  StoreSDNode *Store = cast<StoreSDNode>(Op);
564  EVT MemEltVT = Store->getMemoryVT().getVectorElementType();
565  EVT EltVT = Store->getValue().getValueType().getVectorElementType();
566  EVT PtrVT = Store->getBasePtr().getValueType();
567  unsigned NumElts = Store->getMemoryVT().getVectorNumElements();
568  SDLoc SL(Op);
569 
571 
572  for (unsigned i = 0, e = NumElts; i != e; ++i) {
573  SDValue Val = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, EltVT,
574  Store->getValue(), DAG.getConstant(i, MVT::i32));
575  SDValue Ptr = DAG.getNode(ISD::ADD, SL, PtrVT,
576  Store->getBasePtr(),
577  DAG.getConstant(i * (MemEltVT.getSizeInBits() / 8),
578  PtrVT));
579  Chains.push_back(DAG.getTruncStore(Store->getChain(), SL, Val, Ptr,
581  MemEltVT, Store->isVolatile(), Store->isNonTemporal(),
582  Store->getAlignment()));
583  }
584  return DAG.getNode(ISD::TokenFactor, SL, MVT::Other, &Chains[0], NumElts);
585 }
586 
588  SDValue Result = AMDGPUTargetLowering::MergeVectorStore(Op, DAG);
589  if (Result.getNode()) {
590  return Result;
591  }
592 
593  StoreSDNode *Store = cast<StoreSDNode>(Op);
594  if ((Store->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS ||
596  Store->getValue().getValueType().isVector()) {
597  return SplitVectorStore(Op, DAG);
598  }
599  return SDValue();
600 }
601 
602 SDValue AMDGPUTargetLowering::LowerUDIVREM(SDValue Op,
603  SelectionDAG &DAG) const {
604  SDLoc DL(Op);
605  EVT VT = Op.getValueType();
606 
607  SDValue Num = Op.getOperand(0);
608  SDValue Den = Op.getOperand(1);
609 
610  SmallVector<SDValue, 8> Results;
611 
612  // RCP = URECIP(Den) = 2^32 / Den + e
613  // e is rounding error.
614  SDValue RCP = DAG.getNode(AMDGPUISD::URECIP, DL, VT, Den);
615 
616  // RCP_LO = umulo(RCP, Den) */
617  SDValue RCP_LO = DAG.getNode(ISD::UMULO, DL, VT, RCP, Den);
618 
619  // RCP_HI = mulhu (RCP, Den) */
620  SDValue RCP_HI = DAG.getNode(ISD::MULHU, DL, VT, RCP, Den);
621 
622  // NEG_RCP_LO = -RCP_LO
623  SDValue NEG_RCP_LO = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, VT),
624  RCP_LO);
625 
626  // ABS_RCP_LO = (RCP_HI == 0 ? NEG_RCP_LO : RCP_LO)
627  SDValue ABS_RCP_LO = DAG.getSelectCC(DL, RCP_HI, DAG.getConstant(0, VT),
628  NEG_RCP_LO, RCP_LO,
629  ISD::SETEQ);
630  // Calculate the rounding error from the URECIP instruction
631  // E = mulhu(ABS_RCP_LO, RCP)
632  SDValue E = DAG.getNode(ISD::MULHU, DL, VT, ABS_RCP_LO, RCP);
633 
634  // RCP_A_E = RCP + E
635  SDValue RCP_A_E = DAG.getNode(ISD::ADD, DL, VT, RCP, E);
636 
637  // RCP_S_E = RCP - E
638  SDValue RCP_S_E = DAG.getNode(ISD::SUB, DL, VT, RCP, E);
639 
640  // Tmp0 = (RCP_HI == 0 ? RCP_A_E : RCP_SUB_E)
641  SDValue Tmp0 = DAG.getSelectCC(DL, RCP_HI, DAG.getConstant(0, VT),
642  RCP_A_E, RCP_S_E,
643  ISD::SETEQ);
644  // Quotient = mulhu(Tmp0, Num)
645  SDValue Quotient = DAG.getNode(ISD::MULHU, DL, VT, Tmp0, Num);
646 
647  // Num_S_Remainder = Quotient * Den
648  SDValue Num_S_Remainder = DAG.getNode(ISD::UMULO, DL, VT, Quotient, Den);
649 
650  // Remainder = Num - Num_S_Remainder
651  SDValue Remainder = DAG.getNode(ISD::SUB, DL, VT, Num, Num_S_Remainder);
652 
653  // Remainder_GE_Den = (Remainder >= Den ? -1 : 0)
654  SDValue Remainder_GE_Den = DAG.getSelectCC(DL, Remainder, Den,
655  DAG.getConstant(-1, VT),
656  DAG.getConstant(0, VT),
657  ISD::SETUGE);
658  // Remainder_GE_Zero = (Num >= Num_S_Remainder ? -1 : 0)
659  SDValue Remainder_GE_Zero = DAG.getSelectCC(DL, Num,
660  Num_S_Remainder,
661  DAG.getConstant(-1, VT),
662  DAG.getConstant(0, VT),
663  ISD::SETUGE);
664  // Tmp1 = Remainder_GE_Den & Remainder_GE_Zero
665  SDValue Tmp1 = DAG.getNode(ISD::AND, DL, VT, Remainder_GE_Den,
666  Remainder_GE_Zero);
667 
668  // Calculate Division result:
669 
670  // Quotient_A_One = Quotient + 1
671  SDValue Quotient_A_One = DAG.getNode(ISD::ADD, DL, VT, Quotient,
672  DAG.getConstant(1, VT));
673 
674  // Quotient_S_One = Quotient - 1
675  SDValue Quotient_S_One = DAG.getNode(ISD::SUB, DL, VT, Quotient,
676  DAG.getConstant(1, VT));
677 
678  // Div = (Tmp1 == 0 ? Quotient : Quotient_A_One)
679  SDValue Div = DAG.getSelectCC(DL, Tmp1, DAG.getConstant(0, VT),
680  Quotient, Quotient_A_One, ISD::SETEQ);
681 
682  // Div = (Remainder_GE_Zero == 0 ? Quotient_S_One : Div)
683  Div = DAG.getSelectCC(DL, Remainder_GE_Zero, DAG.getConstant(0, VT),
684  Quotient_S_One, Div, ISD::SETEQ);
685 
686  // Calculate Rem result:
687 
688  // Remainder_S_Den = Remainder - Den
689  SDValue Remainder_S_Den = DAG.getNode(ISD::SUB, DL, VT, Remainder, Den);
690 
691  // Remainder_A_Den = Remainder + Den
692  SDValue Remainder_A_Den = DAG.getNode(ISD::ADD, DL, VT, Remainder, Den);
693 
694  // Rem = (Tmp1 == 0 ? Remainder : Remainder_S_Den)
695  SDValue Rem = DAG.getSelectCC(DL, Tmp1, DAG.getConstant(0, VT),
696  Remainder, Remainder_S_Den, ISD::SETEQ);
697 
698  // Rem = (Remainder_GE_Zero == 0 ? Remainder_A_Den : Rem)
699  Rem = DAG.getSelectCC(DL, Remainder_GE_Zero, DAG.getConstant(0, VT),
700  Remainder_A_Den, Rem, ISD::SETEQ);
701  SDValue Ops[2];
702  Ops[0] = Div;
703  Ops[1] = Rem;
704  return DAG.getMergeValues(Ops, 2, DL);
705 }
706 
707 SDValue AMDGPUTargetLowering::LowerUINT_TO_FP(SDValue Op,
708  SelectionDAG &DAG) const {
709  SDValue S0 = Op.getOperand(0);
710  SDLoc DL(Op);
711  if (Op.getValueType() != MVT::f32 || S0.getValueType() != MVT::i64)
712  return SDValue();
713 
714  // f32 uint_to_fp i64
716  DAG.getConstant(0, MVT::i32));
717  SDValue FloatLo = DAG.getNode(ISD::UINT_TO_FP, DL, MVT::f32, Lo);
719  DAG.getConstant(1, MVT::i32));
720  SDValue FloatHi = DAG.getNode(ISD::UINT_TO_FP, DL, MVT::f32, Hi);
721  FloatHi = DAG.getNode(ISD::FMUL, DL, MVT::f32, FloatHi,
722  DAG.getConstantFP(4294967296.0f, MVT::f32)); // 2^32
723  return DAG.getNode(ISD::FADD, DL, MVT::f32, FloatLo, FloatHi);
724 
725 }
726 
727 //===----------------------------------------------------------------------===//
728 // Helper functions
729 //===----------------------------------------------------------------------===//
730 
732  SelectionDAG &DAG,
733  const Function *F,
735  SmallVectorImpl<ISD::InputArg> &OrigIns) const {
736 
737  for (unsigned i = 0, e = Ins.size(); i < e; ++i) {
738  if (Ins[i].ArgVT == Ins[i].VT) {
739  OrigIns.push_back(Ins[i]);
740  continue;
741  }
742 
743  EVT VT;
744  if (Ins[i].ArgVT.isVector() && !Ins[i].VT.isVector()) {
745  // Vector has been split into scalars.
746  VT = Ins[i].ArgVT.getVectorElementType();
747  } else if (Ins[i].VT.isVector() && Ins[i].ArgVT.isVector() &&
748  Ins[i].ArgVT.getVectorElementType() !=
749  Ins[i].VT.getVectorElementType()) {
750  // Vector elements have been promoted
751  VT = Ins[i].ArgVT;
752  } else {
753  // Vector has been spilt into smaller vectors.
754  VT = Ins[i].VT;
755  }
756 
757  ISD::InputArg Arg(Ins[i].Flags, VT, VT, Ins[i].Used,
758  Ins[i].OrigArgIndex, Ins[i].PartOffset);
759  OrigIns.push_back(Arg);
760  }
761 }
762 
764  if (ConstantFPSDNode * CFP = dyn_cast<ConstantFPSDNode>(Op)) {
765  return CFP->isExactlyValue(1.0);
766  }
767  if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) {
768  return C->isAllOnesValue();
769  }
770  return false;
771 }
772 
774  if (ConstantFPSDNode * CFP = dyn_cast<ConstantFPSDNode>(Op)) {
775  return CFP->getValueAPF().isZero();
776  }
777  if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) {
778  return C->isNullValue();
779  }
780  return false;
781 }
782 
784  const TargetRegisterClass *RC,
785  unsigned Reg, EVT VT) const {
788  unsigned VirtualRegister;
789  if (!MRI.isLiveIn(Reg)) {
790  VirtualRegister = MRI.createVirtualRegister(RC);
791  MRI.addLiveIn(Reg, VirtualRegister);
792  } else {
793  VirtualRegister = MRI.getLiveInVirtReg(Reg);
794  }
795  return DAG.getRegister(VirtualRegister, VT);
796 }
797 
798 #define NODE_NAME_CASE(node) case AMDGPUISD::node: return #node;
799 
800 const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const {
801  switch (Opcode) {
802  default: return 0;
803  // AMDIL DAG nodes
809 
810  // AMDGPU DAG nodes
832  }
833 }
SDValue getConstant(uint64_t Val, EVT VT, bool isTarget=false)
virtual int getFrameIndexOffset(const MachineFunction &MF, int FI) const
LLVMContext * getContext() const
Definition: SelectionDAG.h:285
AMDGPU specific subclass of TargetSubtarget.
void dump() const
dump - Dump this node, for debugging.
AMDGPUTargetLowering(TargetMachine &TM)
Address space for local memory.
Definition: AMDGPU.h:78
void AnalyzeFormalArguments(CCState &State, const SmallVectorImpl< ISD::InputArg > &Ins) const
const TargetMachine & getTargetMachine() const
unsigned createVirtualRegister(const TargetRegisterClass *RegClass)
void addLiveIn(unsigned Reg, unsigned vreg=0)
SDValue LowerGlobalAddress(AMDGPUMachineFunction *MFI, SDValue Op, SelectionDAG &DAG) const
enable_if_c<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:266
void AnalyzeFormalArguments(const SmallVectorImpl< ISD::InputArg > &Ins, CCAssignFn Fn)
const GlobalValue * getGlobal() const
SDValue getSelectCC(SDLoc DL, SDValue LHS, SDValue RHS, SDValue True, SDValue False, ISD::CondCode Cond)
Definition: SelectionDAG.h:679
unsigned getSizeInBits() const
Definition: ValueTypes.h:359
F(f)
const SDValue & getBasePtr() const
virtual SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl< ISD::OutputArg > &Outs, const SmallVectorImpl< SDValue > &OutVals, SDLoc DL, SelectionDAG &DAG) const
void setTruncStoreAction(MVT ValVT, MVT MemVT, LegalizeAction Action)
bool isVector() const
isVector - Return true if this is a vector value type.
Definition: ValueTypes.h:661
SDValue getStore(SDValue Chain, SDLoc dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, bool isVolatile, bool isNonTemporal, unsigned Alignment, const MDNode *TBAAInfo=0)
virtual bool isFNegFree(EVT VT) const
#define llvm_unreachable(msg)
MachineFunction & getMachineFunction() const
Definition: SelectionDAG.h:280
virtual SDValue CreateLiveInRegister(SelectionDAG &DAG, const TargetRegisterClass *RC, unsigned Reg, EVT VT) const
Helper function that adds Reg to the LiveIn list of the DAG's MachineFunction.
void addLoc(const CCValAssign &V)
unsigned getAddressSpace() const
SDValue LowerIntrinsicIABS(SDValue Op, SelectionDAG &DAG) const
IABS(a) = SMAX(sub(0, a), a)
EVT getScalarType() const
Definition: ValueTypes.h:756
void setOperationAction(unsigned Op, MVT VT, LegalizeAction Action)
virtual MVT getPointerTy(uint32_t=0) const
ID
LLVM Calling Convention Representation.
Definition: CallingConv.h:26
Interface to describe a layout of a stack frame on a AMDIL target machine.
#define G(x, y, z)
Definition: MD5.cpp:52
SDValue getConstantFP(double Val, EVT VT, bool isTarget=false)
EVT getVectorElementType() const
Definition: ValueTypes.h:762
TargetRegisterInfo interface that is implemented by all hw codegen targets.
size_t array_lengthof(T(&)[N])
Find the length of an array.
Definition: STLExtras.h:250
bool isLiveIn(unsigned Reg) const
Simple integer binary arithmetic operators.
Definition: ISDOpcodes.h:176
const SDValue & getBasePtr() const
EVT getMemoryVT() const
getMemoryVT - Return the type of the in-memory value.
bool isHWTrueValue(SDValue Op) const
SDNode * getNode() const
get the SDNode which holds the desired result
virtual bool isFAbsFree(EVT VT) const
const SDValue & getOperand(unsigned i) const
Simple binary floating point operators.
Definition: ISDOpcodes.h:222
bool isNonTemporal() const
unsigned getLiveInVirtReg(unsigned PReg) const
virtual const char * getTargetNodeName(unsigned Opcode) const
This method returns the name of a target specific DAG node.
unsigned getOpcode() const
virtual bool isLoadBitCastBeneficial(EVT, EVT) const LLVM_OVERRIDE
bool isVolatile() const
const SDValue & getValue() const
void AddPromotedToType(unsigned Opc, MVT OrigVT, MVT DestVT)
SDValue LowerIntrinsicLRP(SDValue Op, SelectionDAG &DAG) const
virtual const TargetFrameLowering * getFrameLowering() const
bool isHWFalseValue(SDValue Op) const
void setLoadExtAction(unsigned ExtType, MVT VT, LegalizeAction Action)
SDValue SplitVectorStore(SDValue Op, SelectionDAG &DAG) const
unsigned LDSSize
Number of bytes in the LDS that are being used.
uint64_t getTypeAllocSize(Type *Ty) const
Definition: DataLayout.h:326
Interface definition of the TargetLowering class that is common to all AMD GPUs.
static bool allocateStack(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
SDValue getExtLoad(ISD::LoadExtType ExtType, SDLoc dl, EVT VT, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, EVT MemVT, bool isVolatile, bool isNonTemporal, unsigned Alignment, const MDNode *TBAAInfo=0)
const SDValue & getChain() const
MachineMemOperand * getMemOperand() const
SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG) const
virtual unsigned getStackWidth(const MachineFunction &MF) const
virtual MVT getVectorIdxTy() const
ISD::LoadExtType getExtensionType() const
const Value * getValue() const
SDValue getNode(unsigned Opcode, SDLoc DL, EVT VT)
Address space for private memory.
Definition: AMDGPU.h:75
std::map< const GlobalValue *, unsigned > LocalMemoryObjects
unsigned getAddressSpace() const
getAddressSpace - Return the address space for the associated pointer
Bitwise operators - logical and, logical or, logical xor.
Definition: ISDOpcodes.h:295
MachineRegisterInfo & getRegInfo()
Information about the stack frame layout on the AMDGPU targets.
unsigned getSizeInBits() const
getSizeInBits - Return the size of the specified value type in bits.
Definition: ValueTypes.h:779
virtual const DataLayout * getDataLayout() const
SDValue getTruncStore(SDValue Chain, SDLoc dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, EVT TVT, bool isNonTemporal, bool isVolatile, unsigned Alignment, const MDNode *TBAAInfo=0)
#define NODE_NAME_CASE(node)
static CCValAssign getMem(unsigned ValNo, MVT ValVT, unsigned Offset, MVT LocVT, LocInfo HTP)
SDValue SplitVectorLoad(const SDValue &Op, SelectionDAG &DAG) const
Split a vector load into multiple scalar loads.
EVT getValueType() const
void getOriginalFunctionArgs(SelectionDAG &DAG, const Function *F, const SmallVectorImpl< ISD::InputArg > &Ins, SmallVectorImpl< ISD::InputArg > &OrigIns) const
virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const
bool isFloatingPoint() const
isFloatingPoint - Return true if this is a FP, or a vector FP type.
Definition: ValueTypes.h:651
SDValue getZExtOrTrunc(SDValue Op, SDLoc DL, EVT VT)
LLVM Value Representation.
Definition: Value.h:66
SDValue getRegister(unsigned Reg, EVT VT)
Interface for the AMDGPU Implementation of the Intrinsic Info class.
SDValue getMergeValues(const SDValue *Ops, unsigned NumOps, SDLoc dl)
getMergeValues - Create a MERGE_VALUES node from the given operands.
unsigned getOrigAlign() const
const MCRegisterInfo & MRI
unsigned getAlignment() const
static EVT getIntegerVT(LLVMContext &Context, unsigned BitWidth)
Definition: ValueTypes.h:607
unsigned AllocateStack(unsigned Size, unsigned Align)
INITIALIZE_PASS(GlobalMerge,"global-merge","Global Merge", false, false) bool GlobalMerge const DataLayout * TD
SDValue LowerMinMax(SDValue Op, SelectionDAG &DAG) const
Generate Min/Max node.
unsigned getVectorNumElements() const
Definition: ValueTypes.h:771