LLVM API Documentation

 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
SIISelLowering.cpp
Go to the documentation of this file.
1 //===-- SIISelLowering.cpp - SI DAG Lowering Implementation ---------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 /// \file
11 /// \brief Custom DAG lowering for SI
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "SIISelLowering.h"
16 #include "AMDGPU.h"
17 #include "AMDILIntrinsicInfo.h"
18 #include "SIInstrInfo.h"
19 #include "SIMachineFunctionInfo.h"
20 #include "SIRegisterInfo.h"
25 #include "llvm/IR/Function.h"
26 
27 const uint64_t RSRC_DATA_FORMAT = 0xf00000000000LL;
28 
29 using namespace llvm;
30 
33 
34  addRegisterClass(MVT::i1, &AMDGPU::SReg_64RegClass);
35  addRegisterClass(MVT::i64, &AMDGPU::VSrc_64RegClass);
36 
37  addRegisterClass(MVT::v32i8, &AMDGPU::SReg_256RegClass);
38  addRegisterClass(MVT::v64i8, &AMDGPU::SReg_512RegClass);
39 
40  addRegisterClass(MVT::i32, &AMDGPU::VSrc_32RegClass);
41  addRegisterClass(MVT::f32, &AMDGPU::VSrc_32RegClass);
42 
43  addRegisterClass(MVT::f64, &AMDGPU::VSrc_64RegClass);
44  addRegisterClass(MVT::v2i32, &AMDGPU::VSrc_64RegClass);
45  addRegisterClass(MVT::v2f32, &AMDGPU::VSrc_64RegClass);
46 
47  addRegisterClass(MVT::v4i32, &AMDGPU::VReg_128RegClass);
48  addRegisterClass(MVT::v4f32, &AMDGPU::VReg_128RegClass);
49  addRegisterClass(MVT::i128, &AMDGPU::SReg_128RegClass);
50 
51  addRegisterClass(MVT::v8i32, &AMDGPU::VReg_256RegClass);
52  addRegisterClass(MVT::v8f32, &AMDGPU::VReg_256RegClass);
53 
54  addRegisterClass(MVT::v16i32, &AMDGPU::VReg_512RegClass);
55  addRegisterClass(MVT::v16f32, &AMDGPU::VReg_512RegClass);
56 
58 
59  // Condition Codes
66 
73 
78 
83 
85 
86  // We need to custom lower vector stores from local memory
91 
94 
95  // We need to custom lower loads/stores from private memory
100 
106 
107 
110 
112 
115 
119 
124 
126 
131 
138 
141 
143 
145 
147 }
148 
149 //===----------------------------------------------------------------------===//
150 // TargetLowering queries
151 //===----------------------------------------------------------------------===//
152 
154  bool *IsFast) const {
155  // XXX: This depends on the address space and also we may want to revist
156  // the alignment values we specify in the DataLayout.
157  if (!VT.isSimple() || VT == MVT::Other)
158  return false;
159  return VT.bitsGT(MVT::i32);
160 }
161 
163  return VT.bitsLE(MVT::i16);
164 }
165 
166 SDValue SITargetLowering::LowerParameter(SelectionDAG &DAG, EVT VT, EVT MemVT,
167  SDLoc DL, SDValue Chain,
168  unsigned Offset) const {
172  SDValue BasePtr = DAG.getCopyFromReg(Chain, DL,
173  MRI.getLiveInVirtReg(AMDGPU::SGPR0_SGPR1), MVT::i64);
174  SDValue Ptr = DAG.getNode(ISD::ADD, DL, MVT::i64, BasePtr,
175  DAG.getConstant(Offset, MVT::i64));
176  return DAG.getExtLoad(ISD::SEXTLOAD, DL, VT, Chain, Ptr,
177  MachinePointerInfo(UndefValue::get(PtrTy)), MemVT,
178  false, false, MemVT.getSizeInBits() >> 3);
179 
180 }
181 
183  SDValue Chain,
184  CallingConv::ID CallConv,
185  bool isVarArg,
187  SDLoc DL, SelectionDAG &DAG,
188  SmallVectorImpl<SDValue> &InVals) const {
189 
191 
193  FunctionType *FType = MF.getFunction()->getFunctionType();
195 
196  assert(CallConv == CallingConv::C);
197 
199  uint32_t Skipped = 0;
200 
201  for (unsigned i = 0, e = Ins.size(), PSInputNum = 0; i != e; ++i) {
202  const ISD::InputArg &Arg = Ins[i];
203 
204  // First check if it's a PS input addr
205  if (Info->ShaderType == ShaderType::PIXEL && !Arg.Flags.isInReg() &&
206  !Arg.Flags.isByVal()) {
207 
208  assert((PSInputNum <= 15) && "Too many PS inputs!");
209 
210  if (!Arg.Used) {
211  // We can savely skip PS inputs
212  Skipped |= 1 << i;
213  ++PSInputNum;
214  continue;
215  }
216 
217  Info->PSInputAddr |= 1 << PSInputNum++;
218  }
219 
220  // Second split vertices into their elements
221  if (Info->ShaderType != ShaderType::COMPUTE && Arg.VT.isVector()) {
222  ISD::InputArg NewArg = Arg;
223  NewArg.Flags.setSplit();
224  NewArg.VT = Arg.VT.getVectorElementType();
225 
226  // We REALLY want the ORIGINAL number of vertex elements here, e.g. a
227  // three or five element vertex only needs three or five registers,
228  // NOT four or eigth.
229  Type *ParamType = FType->getParamType(Arg.OrigArgIndex);
230  unsigned NumElements = ParamType->getVectorNumElements();
231 
232  for (unsigned j = 0; j != NumElements; ++j) {
233  Splits.push_back(NewArg);
234  NewArg.PartOffset += NewArg.VT.getStoreSize();
235  }
236 
237  } else if (Info->ShaderType != ShaderType::COMPUTE) {
238  Splits.push_back(Arg);
239  }
240  }
241 
243  CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
244  getTargetMachine(), ArgLocs, *DAG.getContext());
245 
246  // At least one interpolation mode must be enabled or else the GPU will hang.
247  if (Info->ShaderType == ShaderType::PIXEL && (Info->PSInputAddr & 0x7F) == 0) {
248  Info->PSInputAddr |= 1;
249  CCInfo.AllocateReg(AMDGPU::VGPR0);
250  CCInfo.AllocateReg(AMDGPU::VGPR1);
251  }
252 
253  // The pointer to the list of arguments is stored in SGPR0, SGPR1
254  if (Info->ShaderType == ShaderType::COMPUTE) {
255  CCInfo.AllocateReg(AMDGPU::SGPR0);
256  CCInfo.AllocateReg(AMDGPU::SGPR1);
257  MF.addLiveIn(AMDGPU::SGPR0_SGPR1, &AMDGPU::SReg_64RegClass);
258  }
259 
260  if (Info->ShaderType == ShaderType::COMPUTE) {
262  Splits);
263  }
264 
265  AnalyzeFormalArguments(CCInfo, Splits);
266 
267  for (unsigned i = 0, e = Ins.size(), ArgIdx = 0; i != e; ++i) {
268 
269  const ISD::InputArg &Arg = Ins[i];
270  if (Skipped & (1 << i)) {
271  InVals.push_back(DAG.getUNDEF(Arg.VT));
272  continue;
273  }
274 
275  CCValAssign &VA = ArgLocs[ArgIdx++];
276  EVT VT = VA.getLocVT();
277 
278  if (VA.isMemLoc()) {
279  VT = Ins[i].VT;
280  EVT MemVT = Splits[i].VT;
281  // The first 36 bytes of the input buffer contains information about
282  // thread group and global sizes.
283  SDValue Arg = LowerParameter(DAG, VT, MemVT, DL, DAG.getRoot(),
284  36 + VA.getLocMemOffset());
285  InVals.push_back(Arg);
286  continue;
287  }
288  assert(VA.isRegLoc() && "Parameter must be in a register!");
289 
290  unsigned Reg = VA.getLocReg();
291 
292  if (VT == MVT::i64) {
293  // For now assume it is a pointer
294  Reg = TRI->getMatchingSuperReg(Reg, AMDGPU::sub0,
295  &AMDGPU::SReg_64RegClass);
296  Reg = MF.addLiveIn(Reg, &AMDGPU::SReg_64RegClass);
297  InVals.push_back(DAG.getCopyFromReg(Chain, DL, Reg, VT));
298  continue;
299  }
300 
301  const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg, VT);
302 
303  Reg = MF.addLiveIn(Reg, RC);
304  SDValue Val = DAG.getCopyFromReg(Chain, DL, Reg, VT);
305 
306  if (Arg.VT.isVector()) {
307 
308  // Build a vector from the registers
309  Type *ParamType = FType->getParamType(Arg.OrigArgIndex);
310  unsigned NumElements = ParamType->getVectorNumElements();
311 
313  Regs.push_back(Val);
314  for (unsigned j = 1; j != NumElements; ++j) {
315  Reg = ArgLocs[ArgIdx++].getLocReg();
316  Reg = MF.addLiveIn(Reg, RC);
317  Regs.push_back(DAG.getCopyFromReg(Chain, DL, Reg, VT));
318  }
319 
320  // Fill up the missing vector elements
321  NumElements = Arg.VT.getVectorNumElements() - NumElements;
322  for (unsigned j = 0; j != NumElements; ++j)
323  Regs.push_back(DAG.getUNDEF(VT));
324 
325  InVals.push_back(DAG.getNode(ISD::BUILD_VECTOR, DL, Arg.VT,
326  Regs.data(), Regs.size()));
327  continue;
328  }
329 
330  InVals.push_back(Val);
331  }
332  return Chain;
333 }
334 
336  MachineInstr * MI, MachineBasicBlock * BB) const {
337 
339 
340  switch (MI->getOpcode()) {
341  default:
343  case AMDGPU::BRANCH: return BB;
344  case AMDGPU::SI_ADDR64_RSRC: {
345  const SIInstrInfo *TII =
346  static_cast<const SIInstrInfo*>(getTargetMachine().getInstrInfo());
347  MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
348  unsigned SuperReg = MI->getOperand(0).getReg();
349  unsigned SubRegLo = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
350  unsigned SubRegHi = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
351  unsigned SubRegHiHi = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
352  unsigned SubRegHiLo = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
353  BuildMI(*BB, I, MI->getDebugLoc(), TII->get(AMDGPU::S_MOV_B64), SubRegLo)
354  .addOperand(MI->getOperand(1));
355  BuildMI(*BB, I, MI->getDebugLoc(), TII->get(AMDGPU::S_MOV_B32), SubRegHiLo)
356  .addImm(0);
357  BuildMI(*BB, I, MI->getDebugLoc(), TII->get(AMDGPU::S_MOV_B32), SubRegHiHi)
358  .addImm(RSRC_DATA_FORMAT >> 32);
359  BuildMI(*BB, I, MI->getDebugLoc(), TII->get(AMDGPU::REG_SEQUENCE), SubRegHi)
360  .addReg(SubRegHiLo)
361  .addImm(AMDGPU::sub0)
362  .addReg(SubRegHiHi)
363  .addImm(AMDGPU::sub1);
364  BuildMI(*BB, I, MI->getDebugLoc(), TII->get(AMDGPU::REG_SEQUENCE), SuperReg)
365  .addReg(SubRegLo)
366  .addImm(AMDGPU::sub0_sub1)
367  .addReg(SubRegHi)
368  .addImm(AMDGPU::sub2_sub3);
369  MI->eraseFromParent();
370  break;
371  }
372  case AMDGPU::V_SUB_F64: {
373  const SIInstrInfo *TII =
374  static_cast<const SIInstrInfo*>(getTargetMachine().getInstrInfo());
375  BuildMI(*BB, I, MI->getDebugLoc(), TII->get(AMDGPU::V_ADD_F64),
376  MI->getOperand(0).getReg())
377  .addReg(MI->getOperand(1).getReg())
378  .addReg(MI->getOperand(2).getReg())
379  .addImm(0) /* src2 */
380  .addImm(0) /* ABS */
381  .addImm(0) /* CLAMP */
382  .addImm(0) /* OMOD */
383  .addImm(2); /* NEG */
384  MI->eraseFromParent();
385  break;
386  }
387  case AMDGPU::SI_RegisterStorePseudo: {
388  MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
389  const SIInstrInfo *TII =
390  static_cast<const SIInstrInfo*>(getTargetMachine().getInstrInfo());
391  unsigned Reg = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
392  MachineInstrBuilder MIB =
393  BuildMI(*BB, I, MI->getDebugLoc(), TII->get(AMDGPU::SI_RegisterStore),
394  Reg);
395  for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i)
396  MIB.addOperand(MI->getOperand(i));
397 
398  MI->eraseFromParent();
399  }
400  }
401  return BB;
402 }
403 
405  if (!VT.isVector()) {
406  return MVT::i1;
407  }
409 }
410 
412  return MVT::i32;
413 }
414 
416  VT = VT.getScalarType();
417 
418  if (!VT.isSimple())
419  return false;
420 
421  switch (VT.getSimpleVT().SimpleTy) {
422  case MVT::f32:
423  return false; /* There is V_MAD_F32 for f32 */
424  case MVT::f64:
425  return true;
426  default:
427  break;
428  }
429 
430  return false;
431 }
432 
433 //===----------------------------------------------------------------------===//
434 // Custom DAG Lowering Operations
435 //===----------------------------------------------------------------------===//
436 
440  switch (Op.getOpcode()) {
441  default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
442  case ISD::ADD: return LowerADD(Op, DAG);
443  case ISD::BRCOND: return LowerBRCOND(Op, DAG);
444  case ISD::LOAD: {
446  if ((Load->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS ||
448  Op.getValueType().isVector()) {
449  SDValue MergedValues[2] = {
450  SplitVectorLoad(Op, DAG),
451  Load->getChain()
452  };
453  return DAG.getMergeValues(MergedValues, 2, SDLoc(Op));
454  } else {
455  return LowerLOAD(Op, DAG);
456  }
457  }
458 
459  case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
460  case ISD::SIGN_EXTEND: return LowerSIGN_EXTEND(Op, DAG);
461  case ISD::STORE: return LowerSTORE(Op, DAG);
462  case ISD::ANY_EXTEND: // Fall-through
463  case ISD::ZERO_EXTEND: return LowerZERO_EXTEND(Op, DAG);
464  case ISD::GlobalAddress: return LowerGlobalAddress(MFI, Op, DAG);
466  unsigned IntrinsicID =
467  cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
468  EVT VT = Op.getValueType();
469  SDLoc DL(Op);
470  //XXX: Hardcoded we only use two to store the pointer to the parameters.
471  unsigned NumUserSGPRs = 2;
472  switch (IntrinsicID) {
473  default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
475  return LowerParameter(DAG, VT, VT, DL, DAG.getEntryNode(), 0);
477  return LowerParameter(DAG, VT, VT, DL, DAG.getEntryNode(), 4);
479  return LowerParameter(DAG, VT, VT, DL, DAG.getEntryNode(), 8);
481  return LowerParameter(DAG, VT, VT, DL, DAG.getEntryNode(), 12);
483  return LowerParameter(DAG, VT, VT, DL, DAG.getEntryNode(), 16);
485  return LowerParameter(DAG, VT, VT, DL, DAG.getEntryNode(), 20);
487  return LowerParameter(DAG, VT, VT, DL, DAG.getEntryNode(), 24);
489  return LowerParameter(DAG, VT, VT, DL, DAG.getEntryNode(), 28);
491  return LowerParameter(DAG, VT, VT, DL, DAG.getEntryNode(), 32);
493  return CreateLiveInRegister(DAG, &AMDGPU::SReg_32RegClass,
494  AMDGPU::SReg_32RegClass.getRegister(NumUserSGPRs + 0), VT);
496  return CreateLiveInRegister(DAG, &AMDGPU::SReg_32RegClass,
497  AMDGPU::SReg_32RegClass.getRegister(NumUserSGPRs + 1), VT);
499  return CreateLiveInRegister(DAG, &AMDGPU::SReg_32RegClass,
500  AMDGPU::SReg_32RegClass.getRegister(NumUserSGPRs + 2), VT);
502  return CreateLiveInRegister(DAG, &AMDGPU::VReg_32RegClass,
503  AMDGPU::VGPR0, VT);
505  return CreateLiveInRegister(DAG, &AMDGPU::VReg_32RegClass,
506  AMDGPU::VGPR1, VT);
508  return CreateLiveInRegister(DAG, &AMDGPU::VReg_32RegClass,
509  AMDGPU::VGPR2, VT);
510  case AMDGPUIntrinsic::SI_load_const: {
511  SDValue Ops [] = {
512  ResourceDescriptorToi128(Op.getOperand(1), DAG),
513  Op.getOperand(2)
514  };
515 
519  VT.getSizeInBits() / 8, 4);
521  Op->getVTList(), Ops, 2, VT, MMO);
522  }
523  case AMDGPUIntrinsic::SI_sample:
524  return LowerSampleIntrinsic(AMDGPUISD::SAMPLE, Op, DAG);
525  case AMDGPUIntrinsic::SI_sampleb:
526  return LowerSampleIntrinsic(AMDGPUISD::SAMPLEB, Op, DAG);
527  case AMDGPUIntrinsic::SI_sampled:
528  return LowerSampleIntrinsic(AMDGPUISD::SAMPLED, Op, DAG);
529  case AMDGPUIntrinsic::SI_samplel:
530  return LowerSampleIntrinsic(AMDGPUISD::SAMPLEL, Op, DAG);
531  case AMDGPUIntrinsic::SI_vs_load_input:
532  return DAG.getNode(AMDGPUISD::LOAD_INPUT, DL, VT,
533  ResourceDescriptorToi128(Op.getOperand(1), DAG),
534  Op.getOperand(2),
535  Op.getOperand(3));
536  }
537  }
538 
539  case ISD::INTRINSIC_VOID:
540  SDValue Chain = Op.getOperand(0);
541  unsigned IntrinsicID = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
542 
543  switch (IntrinsicID) {
544  case AMDGPUIntrinsic::SI_tbuffer_store: {
545  SDLoc DL(Op);
546  SDValue Ops [] = {
547  Chain,
548  ResourceDescriptorToi128(Op.getOperand(2), DAG),
549  Op.getOperand(3),
550  Op.getOperand(4),
551  Op.getOperand(5),
552  Op.getOperand(6),
553  Op.getOperand(7),
554  Op.getOperand(8),
555  Op.getOperand(9),
556  Op.getOperand(10),
557  Op.getOperand(11),
558  Op.getOperand(12),
559  Op.getOperand(13),
560  Op.getOperand(14)
561  };
562  EVT VT = Op.getOperand(3).getValueType();
563 
567  VT.getSizeInBits() / 8, 4);
569  Op->getVTList(), Ops,
570  sizeof(Ops)/sizeof(Ops[0]), VT, MMO);
571  }
572  default:
573  break;
574  }
575  }
576  return SDValue();
577 }
578 
579 SDValue SITargetLowering::LowerADD(SDValue Op,
580  SelectionDAG &DAG) const {
581  if (Op.getValueType() != MVT::i64)
582  return SDValue();
583 
584  SDLoc DL(Op);
585  SDValue LHS = Op.getOperand(0);
586  SDValue RHS = Op.getOperand(1);
587 
588  SDValue Zero = DAG.getConstant(0, MVT::i32);
589  SDValue One = DAG.getConstant(1, MVT::i32);
590 
591  SDValue Lo0 = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, LHS, Zero);
592  SDValue Hi0 = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, LHS, One);
593 
594  SDValue Lo1 = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, RHS, Zero);
595  SDValue Hi1 = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, RHS, One);
596 
597  SDVTList VTList = DAG.getVTList(MVT::i32, MVT::Glue);
598 
599  SDValue AddLo = DAG.getNode(ISD::ADDC, DL, VTList, Lo0, Lo1);
600  SDValue Carry = AddLo.getValue(1);
601  SDValue AddHi = DAG.getNode(ISD::ADDE, DL, VTList, Hi0, Hi1, Carry);
602 
603  return DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, AddLo, AddHi.getValue(0));
604 }
605 
606 /// \brief Helper function for LowerBRCOND
607 static SDNode *findUser(SDValue Value, unsigned Opcode) {
608 
609  SDNode *Parent = Value.getNode();
610  for (SDNode::use_iterator I = Parent->use_begin(), E = Parent->use_end();
611  I != E; ++I) {
612 
613  if (I.getUse().get() != Value)
614  continue;
615 
616  if (I->getOpcode() == Opcode)
617  return *I;
618  }
619  return 0;
620 }
621 
622 /// This transforms the control flow intrinsics to get the branch destination as
623 /// last parameter, also switches branch target with BR if the need arise
624 SDValue SITargetLowering::LowerBRCOND(SDValue BRCOND,
625  SelectionDAG &DAG) const {
626 
627  SDLoc DL(BRCOND);
628 
629  SDNode *Intr = BRCOND.getOperand(1).getNode();
630  SDValue Target = BRCOND.getOperand(2);
631  SDNode *BR = 0;
632 
633  if (Intr->getOpcode() == ISD::SETCC) {
634  // As long as we negate the condition everything is fine
635  SDNode *SetCC = Intr;
636  assert(SetCC->getConstantOperandVal(1) == 1);
637  assert(cast<CondCodeSDNode>(SetCC->getOperand(2).getNode())->get() ==
638  ISD::SETNE);
639  Intr = SetCC->getOperand(0).getNode();
640 
641  } else {
642  // Get the target from BR if we don't negate the condition
643  BR = findUser(BRCOND, ISD::BR);
644  Target = BR->getOperand(1);
645  }
646 
647  assert(Intr->getOpcode() == ISD::INTRINSIC_W_CHAIN);
648 
649  // Build the result and
651  for (unsigned i = 1, e = Intr->getNumValues(); i != e; ++i)
652  Res.push_back(Intr->getValueType(i));
653 
654  // operands of the new intrinsic call
656  Ops.push_back(BRCOND.getOperand(0));
657  for (unsigned i = 1, e = Intr->getNumOperands(); i != e; ++i)
658  Ops.push_back(Intr->getOperand(i));
659  Ops.push_back(Target);
660 
661  // build the new intrinsic call
662  SDNode *Result = DAG.getNode(
663  Res.size() > 1 ? ISD::INTRINSIC_W_CHAIN : ISD::INTRINSIC_VOID, DL,
664  DAG.getVTList(Res.data(), Res.size()), Ops.data(), Ops.size()).getNode();
665 
666  if (BR) {
667  // Give the branch instruction our target
668  SDValue Ops[] = {
669  BR->getOperand(0),
670  BRCOND.getOperand(2)
671  };
672  DAG.MorphNodeTo(BR, ISD::BR, BR->getVTList(), Ops, 2);
673  }
674 
675  SDValue Chain = SDValue(Result, Result->getNumValues() - 1);
676 
677  // Copy the intrinsic results to registers
678  for (unsigned i = 1, e = Intr->getNumValues() - 1; i != e; ++i) {
680  if (!CopyToReg)
681  continue;
682 
683  Chain = DAG.getCopyToReg(
684  Chain, DL,
685  CopyToReg->getOperand(1),
686  SDValue(Result, i - 1),
687  SDValue());
688 
689  DAG.ReplaceAllUsesWith(SDValue(CopyToReg, 0), CopyToReg->getOperand(0));
690  }
691 
692  // Remove the old intrinsic from the chain
694  SDValue(Intr, Intr->getNumValues() - 1),
695  Intr->getOperand(0));
696 
697  return Chain;
698 }
699 
700 SDValue SITargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
701  SDLoc DL(Op);
702  LoadSDNode *Load = cast<LoadSDNode>(Op);
703 
705  return SDValue();
706 
707  SDValue TruncPtr = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32,
708  Load->getBasePtr(), DAG.getConstant(0, MVT::i32));
709  SDValue Ptr = DAG.getNode(ISD::SRL, DL, MVT::i32, TruncPtr,
710  DAG.getConstant(2, MVT::i32));
711 
713  Load->getChain(), Ptr,
714  DAG.getTargetConstant(0, MVT::i32),
715  Op.getOperand(2));
716  SDValue MergedValues[2] = {
717  Ret,
718  Load->getChain()
719  };
720  return DAG.getMergeValues(MergedValues, 2, DL);
721 
722 }
723 
724 SDValue SITargetLowering::ResourceDescriptorToi128(SDValue Op,
725  SelectionDAG &DAG) const {
726 
727  if (Op.getValueType() == MVT::i128) {
728  return Op;
729  }
730 
731  assert(Op.getOpcode() == ISD::UNDEF);
732 
733  return DAG.getNode(ISD::BUILD_PAIR, SDLoc(Op), MVT::i128,
734  DAG.getConstant(0, MVT::i64),
735  DAG.getConstant(0, MVT::i64));
736 }
737 
738 SDValue SITargetLowering::LowerSampleIntrinsic(unsigned Opcode,
739  const SDValue &Op,
740  SelectionDAG &DAG) const {
741  return DAG.getNode(Opcode, SDLoc(Op), Op.getValueType(), Op.getOperand(1),
742  Op.getOperand(2),
743  ResourceDescriptorToi128(Op.getOperand(3), DAG),
744  Op.getOperand(4));
745 }
746 
747 SDValue SITargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
748  SDValue LHS = Op.getOperand(0);
749  SDValue RHS = Op.getOperand(1);
750  SDValue True = Op.getOperand(2);
751  SDValue False = Op.getOperand(3);
752  SDValue CC = Op.getOperand(4);
753  EVT VT = Op.getValueType();
754  SDLoc DL(Op);
755 
756  // Possible Min/Max pattern
757  SDValue MinMax = LowerMinMax(Op, DAG);
758  if (MinMax.getNode()) {
759  return MinMax;
760  }
761 
762  SDValue Cond = DAG.getNode(ISD::SETCC, DL, MVT::i1, LHS, RHS, CC);
763  return DAG.getNode(ISD::SELECT, DL, VT, Cond, True, False);
764 }
765 
766 SDValue SITargetLowering::LowerSIGN_EXTEND(SDValue Op,
767  SelectionDAG &DAG) const {
768  EVT VT = Op.getValueType();
769  SDLoc DL(Op);
770 
771  if (VT != MVT::i64) {
772  return SDValue();
773  }
774 
775  SDValue Hi = DAG.getNode(ISD::SRA, DL, MVT::i32, Op.getOperand(0),
776  DAG.getConstant(31, MVT::i32));
777 
778  return DAG.getNode(ISD::BUILD_PAIR, DL, VT, Op.getOperand(0), Hi);
779 }
780 
781 SDValue SITargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
782  SDLoc DL(Op);
783  StoreSDNode *Store = cast<StoreSDNode>(Op);
784  EVT VT = Store->getMemoryVT();
785 
787  if (Ret.getNode())
788  return Ret;
789 
790  if (VT.isVector() && VT.getVectorNumElements() >= 8)
791  return SplitVectorStore(Op, DAG);
792 
794  return SDValue();
795 
796  SDValue TruncPtr = DAG.getZExtOrTrunc(Store->getBasePtr(), DL, MVT::i32);
797  SDValue Ptr = DAG.getNode(ISD::SRL, DL, MVT::i32, TruncPtr,
798  DAG.getConstant(2, MVT::i32));
799  SDValue Chain = Store->getChain();
801 
802  if (VT == MVT::i64) {
803  for (unsigned i = 0; i < 2; ++i) {
805  Store->getValue(), DAG.getConstant(i, MVT::i32)));
806  }
807  } else if (VT == MVT::i128) {
808  for (unsigned i = 0; i < 2; ++i) {
809  for (unsigned j = 0; j < 2; ++j) {
812  Store->getValue(), DAG.getConstant(i, MVT::i32)),
813  DAG.getConstant(j, MVT::i32)));
814  }
815  }
816  } else {
817  Values.push_back(Store->getValue());
818  }
819 
820  for (unsigned i = 0; i < Values.size(); ++i) {
821  SDValue PartPtr = DAG.getNode(ISD::ADD, DL, MVT::i32,
822  Ptr, DAG.getConstant(i, MVT::i32));
824  Chain, Values[i], PartPtr,
825  DAG.getTargetConstant(0, MVT::i32));
826  }
827  return Chain;
828 }
829 
830 
831 SDValue SITargetLowering::LowerZERO_EXTEND(SDValue Op,
832  SelectionDAG &DAG) const {
833  EVT VT = Op.getValueType();
834  SDLoc DL(Op);
835 
836  if (VT != MVT::i64) {
837  return SDValue();
838  }
839 
840  return DAG.getNode(ISD::BUILD_PAIR, DL, VT, Op.getOperand(0),
841  DAG.getConstant(0, MVT::i32));
842 }
843 
844 //===----------------------------------------------------------------------===//
845 // Custom DAG optimizations
846 //===----------------------------------------------------------------------===//
847 
849  DAGCombinerInfo &DCI) const {
850  SelectionDAG &DAG = DCI.DAG;
851  SDLoc DL(N);
852  EVT VT = N->getValueType(0);
853 
854  switch (N->getOpcode()) {
855  default: break;
856  case ISD::SELECT_CC: {
857  ConstantSDNode *True, *False;
858  // i1 selectcc(l, r, -1, 0, cc) -> i1 setcc(l, r, cc)
859  if ((True = dyn_cast<ConstantSDNode>(N->getOperand(2)))
860  && (False = dyn_cast<ConstantSDNode>(N->getOperand(3)))
861  && True->isAllOnesValue()
862  && False->isNullValue()
863  && VT == MVT::i1) {
864  return DAG.getNode(ISD::SETCC, DL, VT, N->getOperand(0),
865  N->getOperand(1), N->getOperand(4));
866 
867  }
868  break;
869  }
870  case ISD::SETCC: {
871  SDValue Arg0 = N->getOperand(0);
872  SDValue Arg1 = N->getOperand(1);
873  SDValue CC = N->getOperand(2);
874  ConstantSDNode * C = NULL;
875  ISD::CondCode CCOp = dyn_cast<CondCodeSDNode>(CC)->get();
876 
877  // i1 setcc (sext(i1), 0, setne) -> i1 setcc(i1, 0, setne)
878  if (VT == MVT::i1
879  && Arg0.getOpcode() == ISD::SIGN_EXTEND
880  && Arg0.getOperand(0).getValueType() == MVT::i1
881  && (C = dyn_cast<ConstantSDNode>(Arg1))
882  && C->isNullValue()
883  && CCOp == ISD::SETNE) {
884  return SimplifySetCC(VT, Arg0.getOperand(0),
885  DAG.getConstant(0, MVT::i1), CCOp, true, DCI, DL);
886  }
887  break;
888  }
889  }
890  return SDValue();
891 }
892 
893 /// \brief Test if RegClass is one of the VSrc classes
894 static bool isVSrc(unsigned RegClass) {
895  return AMDGPU::VSrc_32RegClassID == RegClass ||
896  AMDGPU::VSrc_64RegClassID == RegClass;
897 }
898 
899 /// \brief Test if RegClass is one of the SSrc classes
900 static bool isSSrc(unsigned RegClass) {
901  return AMDGPU::SSrc_32RegClassID == RegClass ||
902  AMDGPU::SSrc_64RegClassID == RegClass;
903 }
904 
905 /// \brief Analyze the possible immediate value Op
906 ///
907 /// Returns -1 if it isn't an immediate, 0 if it's and inline immediate
908 /// and the immediate value if it's a literal immediate
910 
911  union {
912  int32_t I;
913  float F;
914  } Imm;
915 
916  if (const ConstantSDNode *Node = dyn_cast<ConstantSDNode>(N)) {
917  if (Node->getZExtValue() >> 32) {
918  return -1;
919  }
920  Imm.I = Node->getSExtValue();
921  } else if (const ConstantFPSDNode *Node = dyn_cast<ConstantFPSDNode>(N))
922  Imm.F = Node->getValueAPF().convertToFloat();
923  else
924  return -1; // It isn't an immediate
925 
926  if ((Imm.I >= -16 && Imm.I <= 64) ||
927  Imm.F == 0.5f || Imm.F == -0.5f ||
928  Imm.F == 1.0f || Imm.F == -1.0f ||
929  Imm.F == 2.0f || Imm.F == -2.0f ||
930  Imm.F == 4.0f || Imm.F == -4.0f)
931  return 0; // It's an inline immediate
932 
933  return Imm.I; // It's a literal immediate
934 }
935 
936 /// \brief Try to fold an immediate directly into an instruction
937 bool SITargetLowering::foldImm(SDValue &Operand, int32_t &Immediate,
938  bool &ScalarSlotUsed) const {
939 
940  MachineSDNode *Mov = dyn_cast<MachineSDNode>(Operand);
941  const SIInstrInfo *TII =
942  static_cast<const SIInstrInfo*>(getTargetMachine().getInstrInfo());
943  if (Mov == 0 || !TII->isMov(Mov->getMachineOpcode()))
944  return false;
945 
946  const SDValue &Op = Mov->getOperand(0);
947  int32_t Value = analyzeImmediate(Op.getNode());
948  if (Value == -1) {
949  // Not an immediate at all
950  return false;
951 
952  } else if (Value == 0) {
953  // Inline immediates can always be fold
954  Operand = Op;
955  return true;
956 
957  } else if (Value == Immediate) {
958  // Already fold literal immediate
959  Operand = Op;
960  return true;
961 
962  } else if (!ScalarSlotUsed && !Immediate) {
963  // Fold this literal immediate
964  ScalarSlotUsed = true;
965  Immediate = Value;
966  Operand = Op;
967  return true;
968 
969  }
970 
971  return false;
972 }
973 
974 const TargetRegisterClass *SITargetLowering::getRegClassForNode(
975  SelectionDAG &DAG, const SDValue &Op) const {
976  const SIInstrInfo *TII =
977  static_cast<const SIInstrInfo*>(getTargetMachine().getInstrInfo());
978  const SIRegisterInfo &TRI = TII->getRegisterInfo();
979 
980  if (!Op->isMachineOpcode()) {
981  switch(Op->getOpcode()) {
982  case ISD::CopyFromReg: {
984  unsigned Reg = cast<RegisterSDNode>(Op->getOperand(1))->getReg();
986  return MRI.getRegClass(Reg);
987  }
988  return TRI.getPhysRegClass(Reg);
989  }
990  default: return NULL;
991  }
992  }
993  const MCInstrDesc &Desc = TII->get(Op->getMachineOpcode());
994  int OpClassID = Desc.OpInfo[Op.getResNo()].RegClass;
995  if (OpClassID != -1) {
996  return TRI.getRegClass(OpClassID);
997  }
998  switch(Op.getMachineOpcode()) {
1000  // Operand 1 is the register class id for COPY_TO_REGCLASS instructions.
1001  OpClassID = cast<ConstantSDNode>(Op->getOperand(1))->getZExtValue();
1002 
1003  // If the COPY_TO_REGCLASS instruction is copying to a VSrc register
1004  // class, then the register class for the value could be either a
1005  // VReg or and SReg. In order to get a more accurate
1006  if (OpClassID == AMDGPU::VSrc_32RegClassID ||
1007  OpClassID == AMDGPU::VSrc_64RegClassID) {
1008  return getRegClassForNode(DAG, Op.getOperand(0));
1009  }
1010  return TRI.getRegClass(OpClassID);
1011  case AMDGPU::EXTRACT_SUBREG: {
1012  int SubIdx = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
1014  getRegClassForNode(DAG, Op.getOperand(0));
1015  return TRI.getSubClassWithSubReg(SuperClass, SubIdx);
1016  }
1017  case AMDGPU::REG_SEQUENCE:
1018  // Operand 0 is the register class id for REG_SEQUENCE instructions.
1019  return TRI.getRegClass(
1020  cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue());
1021  default:
1022  return getRegClassFor(Op.getSimpleValueType());
1023  }
1024 }
1025 
1026 /// \brief Does "Op" fit into register class "RegClass" ?
1027 bool SITargetLowering::fitsRegClass(SelectionDAG &DAG, const SDValue &Op,
1028  unsigned RegClass) const {
1030  const TargetRegisterClass *RC = getRegClassForNode(DAG, Op);
1031  if (!RC) {
1032  return false;
1033  }
1034  return TRI->getRegClass(RegClass)->hasSubClassEq(RC);
1035 }
1036 
1037 /// \brief Make sure that we don't exeed the number of allowed scalars
1038 void SITargetLowering::ensureSRegLimit(SelectionDAG &DAG, SDValue &Operand,
1039  unsigned RegClass,
1040  bool &ScalarSlotUsed) const {
1041 
1042  // First map the operands register class to a destination class
1043  if (RegClass == AMDGPU::VSrc_32RegClassID)
1044  RegClass = AMDGPU::VReg_32RegClassID;
1045  else if (RegClass == AMDGPU::VSrc_64RegClassID)
1046  RegClass = AMDGPU::VReg_64RegClassID;
1047  else
1048  return;
1049 
1050  // Nothing todo if they fit naturaly
1051  if (fitsRegClass(DAG, Operand, RegClass))
1052  return;
1053 
1054  // If the scalar slot isn't used yet use it now
1055  if (!ScalarSlotUsed) {
1056  ScalarSlotUsed = true;
1057  return;
1058  }
1059 
1060  // This is a conservative aproach. It is possible that we can't determine the
1061  // correct register class and copy too often, but better safe than sorry.
1062  SDValue RC = DAG.getTargetConstant(RegClass, MVT::i32);
1064  Operand.getValueType(), Operand, RC);
1065  Operand = SDValue(Node, 0);
1066 }
1067 
1068 /// \returns true if \p Node's operands are different from the SDValue list
1069 /// \p Ops
1070 static bool isNodeChanged(const SDNode *Node, const std::vector<SDValue> &Ops) {
1071  for (unsigned i = 0, e = Node->getNumOperands(); i < e; ++i) {
1072  if (Ops[i].getNode() != Node->getOperand(i).getNode()) {
1073  return true;
1074  }
1075  }
1076  return false;
1077 }
1078 
1079 /// \brief Try to fold the Nodes operands into the Node
1080 SDNode *SITargetLowering::foldOperands(MachineSDNode *Node,
1081  SelectionDAG &DAG) const {
1082 
1083  // Original encoding (either e32 or e64)
1084  int Opcode = Node->getMachineOpcode();
1085  const SIInstrInfo *TII =
1086  static_cast<const SIInstrInfo*>(getTargetMachine().getInstrInfo());
1087  const MCInstrDesc *Desc = &TII->get(Opcode);
1088 
1089  unsigned NumDefs = Desc->getNumDefs();
1090  unsigned NumOps = Desc->getNumOperands();
1091 
1092  // Commuted opcode if available
1093  int OpcodeRev = Desc->isCommutable() ? TII->commuteOpcode(Opcode) : -1;
1094  const MCInstrDesc *DescRev = OpcodeRev == -1 ? 0 : &TII->get(OpcodeRev);
1095 
1096  assert(!DescRev || DescRev->getNumDefs() == NumDefs);
1097  assert(!DescRev || DescRev->getNumOperands() == NumOps);
1098 
1099  // e64 version if available, -1 otherwise
1100  int OpcodeE64 = AMDGPU::getVOPe64(Opcode);
1101  const MCInstrDesc *DescE64 = OpcodeE64 == -1 ? 0 : &TII->get(OpcodeE64);
1102 
1103  assert(!DescE64 || DescE64->getNumDefs() == NumDefs);
1104  assert(!DescE64 || DescE64->getNumOperands() == (NumOps + 4));
1105 
1106  int32_t Immediate = Desc->getSize() == 4 ? 0 : -1;
1107  bool HaveVSrc = false, HaveSSrc = false;
1108 
1109  // First figure out what we alread have in this instruction
1110  for (unsigned i = 0, e = Node->getNumOperands(), Op = NumDefs;
1111  i != e && Op < NumOps; ++i, ++Op) {
1112 
1113  unsigned RegClass = Desc->OpInfo[Op].RegClass;
1114  if (isVSrc(RegClass))
1115  HaveVSrc = true;
1116  else if (isSSrc(RegClass))
1117  HaveSSrc = true;
1118  else
1119  continue;
1120 
1121  int32_t Imm = analyzeImmediate(Node->getOperand(i).getNode());
1122  if (Imm != -1 && Imm != 0) {
1123  // Literal immediate
1124  Immediate = Imm;
1125  }
1126  }
1127 
1128  // If we neither have VSrc nor SSrc it makes no sense to continue
1129  if (!HaveVSrc && !HaveSSrc)
1130  return Node;
1131 
1132  // No scalar allowed when we have both VSrc and SSrc
1133  bool ScalarSlotUsed = HaveVSrc && HaveSSrc;
1134 
1135  // Second go over the operands and try to fold them
1136  std::vector<SDValue> Ops;
1137  bool Promote2e64 = false;
1138  for (unsigned i = 0, e = Node->getNumOperands(), Op = NumDefs;
1139  i != e && Op < NumOps; ++i, ++Op) {
1140 
1141  const SDValue &Operand = Node->getOperand(i);
1142  Ops.push_back(Operand);
1143 
1144  // Already folded immediate ?
1145  if (isa<ConstantSDNode>(Operand.getNode()) ||
1146  isa<ConstantFPSDNode>(Operand.getNode()))
1147  continue;
1148 
1149  // Is this a VSrc or SSrc operand ?
1150  unsigned RegClass = Desc->OpInfo[Op].RegClass;
1151  if (isVSrc(RegClass) || isSSrc(RegClass)) {
1152  // Try to fold the immediates
1153  if (!foldImm(Ops[i], Immediate, ScalarSlotUsed)) {
1154  // Folding didn't worked, make sure we don't hit the SReg limit
1155  ensureSRegLimit(DAG, Ops[i], RegClass, ScalarSlotUsed);
1156  }
1157  continue;
1158  }
1159 
1160  if (i == 1 && DescRev && fitsRegClass(DAG, Ops[0], RegClass)) {
1161 
1162  unsigned OtherRegClass = Desc->OpInfo[NumDefs].RegClass;
1163  assert(isVSrc(OtherRegClass) || isSSrc(OtherRegClass));
1164 
1165  // Test if it makes sense to swap operands
1166  if (foldImm(Ops[1], Immediate, ScalarSlotUsed) ||
1167  (!fitsRegClass(DAG, Ops[1], RegClass) &&
1168  fitsRegClass(DAG, Ops[1], OtherRegClass))) {
1169 
1170  // Swap commutable operands
1171  SDValue Tmp = Ops[1];
1172  Ops[1] = Ops[0];
1173  Ops[0] = Tmp;
1174 
1175  Desc = DescRev;
1176  DescRev = 0;
1177  continue;
1178  }
1179  }
1180 
1181  if (DescE64 && !Immediate) {
1182 
1183  // Test if it makes sense to switch to e64 encoding
1184  unsigned OtherRegClass = DescE64->OpInfo[Op].RegClass;
1185  if (!isVSrc(OtherRegClass) && !isSSrc(OtherRegClass))
1186  continue;
1187 
1188  int32_t TmpImm = -1;
1189  if (foldImm(Ops[i], TmpImm, ScalarSlotUsed) ||
1190  (!fitsRegClass(DAG, Ops[i], RegClass) &&
1191  fitsRegClass(DAG, Ops[1], OtherRegClass))) {
1192 
1193  // Switch to e64 encoding
1194  Immediate = -1;
1195  Promote2e64 = true;
1196  Desc = DescE64;
1197  DescE64 = 0;
1198  }
1199  }
1200  }
1201 
1202  if (Promote2e64) {
1203  // Add the modifier flags while promoting
1204  for (unsigned i = 0; i < 4; ++i)
1205  Ops.push_back(DAG.getTargetConstant(0, MVT::i32));
1206  }
1207 
1208  // Add optional chain and glue
1209  for (unsigned i = NumOps - NumDefs, e = Node->getNumOperands(); i < e; ++i)
1210  Ops.push_back(Node->getOperand(i));
1211 
1212  // Nodes that have a glue result are not CSE'd by getMachineNode(), so in
1213  // this case a brand new node is always be created, even if the operands
1214  // are the same as before. So, manually check if anything has been changed.
1215  if (Desc->Opcode == Opcode && !isNodeChanged(Node, Ops)) {
1216  return Node;
1217  }
1218 
1219  // Create a complete new instruction
1220  return DAG.getMachineNode(Desc->Opcode, SDLoc(Node), Node->getVTList(), Ops);
1221 }
1222 
1223 /// \brief Helper function for adjustWritemask
1224 static unsigned SubIdx2Lane(unsigned Idx) {
1225  switch (Idx) {
1226  default: return 0;
1227  case AMDGPU::sub0: return 0;
1228  case AMDGPU::sub1: return 1;
1229  case AMDGPU::sub2: return 2;
1230  case AMDGPU::sub3: return 3;
1231  }
1232 }
1233 
1234 /// \brief Adjust the writemask of MIMG instructions
1235 void SITargetLowering::adjustWritemask(MachineSDNode *&Node,
1236  SelectionDAG &DAG) const {
1237  SDNode *Users[4] = { };
1238  unsigned Lane = 0;
1239  unsigned OldDmask = Node->getConstantOperandVal(0);
1240  unsigned NewDmask = 0;
1241 
1242  // Try to figure out the used register components
1243  for (SDNode::use_iterator I = Node->use_begin(), E = Node->use_end();
1244  I != E; ++I) {
1245 
1246  // Abort if we can't understand the usage
1247  if (!I->isMachineOpcode() ||
1248  I->getMachineOpcode() != TargetOpcode::EXTRACT_SUBREG)
1249  return;
1250 
1251  // Lane means which subreg of %VGPRa_VGPRb_VGPRc_VGPRd is used.
1252  // Note that subregs are packed, i.e. Lane==0 is the first bit set
1253  // in OldDmask, so it can be any of X,Y,Z,W; Lane==1 is the second bit
1254  // set, etc.
1255  Lane = SubIdx2Lane(I->getConstantOperandVal(1));
1256 
1257  // Set which texture component corresponds to the lane.
1258  unsigned Comp;
1259  for (unsigned i = 0, Dmask = OldDmask; i <= Lane; i++) {
1260  assert(Dmask);
1261  Comp = countTrailingZeros(Dmask);
1262  Dmask &= ~(1 << Comp);
1263  }
1264 
1265  // Abort if we have more than one user per component
1266  if (Users[Lane])
1267  return;
1268 
1269  Users[Lane] = *I;
1270  NewDmask |= 1 << Comp;
1271  }
1272 
1273  // Abort if there's no change
1274  if (NewDmask == OldDmask)
1275  return;
1276 
1277  // Adjust the writemask in the node
1278  std::vector<SDValue> Ops;
1279  Ops.push_back(DAG.getTargetConstant(NewDmask, MVT::i32));
1280  for (unsigned i = 1, e = Node->getNumOperands(); i != e; ++i)
1281  Ops.push_back(Node->getOperand(i));
1282  Node = (MachineSDNode*)DAG.UpdateNodeOperands(Node, Ops.data(), Ops.size());
1283 
1284  // If we only got one lane, replace it with a copy
1285  // (if NewDmask has only one bit set...)
1286  if (NewDmask && (NewDmask & (NewDmask-1)) == 0) {
1287  SDValue RC = DAG.getTargetConstant(AMDGPU::VReg_32RegClassID, MVT::i32);
1289  SDLoc(), Users[Lane]->getValueType(0),
1290  SDValue(Node, 0), RC);
1291  DAG.ReplaceAllUsesWith(Users[Lane], Copy);
1292  return;
1293  }
1294 
1295  // Update the users of the node with the new indices
1296  for (unsigned i = 0, Idx = AMDGPU::sub0; i < 4; ++i) {
1297 
1298  SDNode *User = Users[i];
1299  if (!User)
1300  continue;
1301 
1302  SDValue Op = DAG.getTargetConstant(Idx, MVT::i32);
1303  DAG.UpdateNodeOperands(User, User->getOperand(0), Op);
1304 
1305  switch (Idx) {
1306  default: break;
1307  case AMDGPU::sub0: Idx = AMDGPU::sub1; break;
1308  case AMDGPU::sub1: Idx = AMDGPU::sub2; break;
1309  case AMDGPU::sub2: Idx = AMDGPU::sub3; break;
1310  }
1311  }
1312 }
1313 
1314 /// \brief Fold the instructions after slecting them
1316  SelectionDAG &DAG) const {
1317  const SIInstrInfo *TII =
1318  static_cast<const SIInstrInfo*>(getTargetMachine().getInstrInfo());
1319  Node = AdjustRegClass(Node, DAG);
1320 
1321  if (TII->isMIMG(Node->getMachineOpcode()))
1322  adjustWritemask(Node, DAG);
1323 
1324  return foldOperands(Node, DAG);
1325 }
1326 
1327 /// \brief Assign the register class depending on the number of
1328 /// bits set in the writemask
1330  SDNode *Node) const {
1331  const SIInstrInfo *TII =
1332  static_cast<const SIInstrInfo*>(getTargetMachine().getInstrInfo());
1333  if (!TII->isMIMG(MI->getOpcode()))
1334  return;
1335 
1336  unsigned VReg = MI->getOperand(0).getReg();
1337  unsigned Writemask = MI->getOperand(1).getImm();
1338  unsigned BitsSet = 0;
1339  for (unsigned i = 0; i < 4; ++i)
1340  BitsSet += Writemask & (1 << i) ? 1 : 0;
1341 
1342  const TargetRegisterClass *RC;
1343  switch (BitsSet) {
1344  default: return;
1345  case 1: RC = &AMDGPU::VReg_32RegClass; break;
1346  case 2: RC = &AMDGPU::VReg_64RegClass; break;
1347  case 3: RC = &AMDGPU::VReg_96RegClass; break;
1348  }
1349 
1350  unsigned NewOpcode = TII->getMaskedMIMGOp(MI->getOpcode(), BitsSet);
1351  MI->setDesc(TII->get(NewOpcode));
1353  MRI.setRegClass(VReg, RC);
1354 }
1355 
1356 MachineSDNode *SITargetLowering::AdjustRegClass(MachineSDNode *N,
1357  SelectionDAG &DAG) const {
1358 
1359  SDLoc DL(N);
1360  unsigned NewOpcode = N->getMachineOpcode();
1361 
1362  switch (N->getMachineOpcode()) {
1363  default: return N;
1364  case AMDGPU::S_LOAD_DWORD_IMM:
1365  NewOpcode = AMDGPU::BUFFER_LOAD_DWORD_ADDR64;
1366  // Fall-through
1367  case AMDGPU::S_LOAD_DWORDX2_SGPR:
1368  if (NewOpcode == N->getMachineOpcode()) {
1369  NewOpcode = AMDGPU::BUFFER_LOAD_DWORDX2_ADDR64;
1370  }
1371  // Fall-through
1372  case AMDGPU::S_LOAD_DWORDX4_IMM:
1373  case AMDGPU::S_LOAD_DWORDX4_SGPR: {
1374  if (NewOpcode == N->getMachineOpcode()) {
1375  NewOpcode = AMDGPU::BUFFER_LOAD_DWORDX4_ADDR64;
1376  }
1377  if (fitsRegClass(DAG, N->getOperand(0), AMDGPU::SReg_64RegClassID)) {
1378  return N;
1379  }
1380  ConstantSDNode *Offset = cast<ConstantSDNode>(N->getOperand(1));
1381  SDValue Ops[] = {
1382  SDValue(DAG.getMachineNode(AMDGPU::SI_ADDR64_RSRC, DL, MVT::i128,
1383  DAG.getConstant(0, MVT::i64)), 0),
1384  N->getOperand(0),
1385  DAG.getConstant(Offset->getSExtValue() << 2, MVT::i32)
1386  };
1387  return DAG.getMachineNode(NewOpcode, DL, N->getVTList(), Ops);
1388  }
1389  }
1390 }
1391 
1393  const TargetRegisterClass *RC,
1394  unsigned Reg, EVT VT) const {
1395  SDValue VReg = AMDGPUTargetLowering::CreateLiveInRegister(DAG, RC, Reg, VT);
1396 
1397  return DAG.getCopyFromReg(DAG.getEntryNode(), SDLoc(DAG.getEntryNode()),
1398  cast<RegisterSDNode>(VReg)->getReg(), VT);
1399 }
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
The memory access reads data.
const MachineFunction * getParent() const
SDValue getConstant(uint64_t Val, EVT VT, bool isTarget=false)
Interface definition for SIRegisterInfo.
SDValue getValue(unsigned R) const
The memory access writes data.
int getMaskedMIMGOp(uint16_t Opcode, unsigned Channels) const
Given a MIMG Opcode that writes all 4 channels, return the equivalent opcode that writes Channels Cha...
LLVMContext * getContext() const
Definition: SelectionDAG.h:285
SDValue getCopyToReg(SDValue Chain, SDLoc dl, unsigned Reg, SDValue N)
Definition: SelectionDAG.h:487
const TargetRegisterClass * getMinimalPhysRegClass(unsigned Reg, EVT VT=MVT::Other) const
virtual EVT getSetCCResultType(LLVMContext &Context, EVT VT) const
int getVOPe64(uint16_t Opcode)
static MVT getVectorVT(MVT VT, unsigned NumElements)
Definition: ValueTypes.h:500
bool isCommutable() const
Definition: MCInstrDesc.h:411
Address space for local memory.
Definition: AMDGPU.h:78
void AnalyzeFormalArguments(CCState &State, const SmallVectorImpl< ISD::InputArg > &Ins) const
const TargetMachine & getTargetMachine() const
SDVTList getVTList() const
unsigned getNumDefs() const
Return the number of MachineOperands that are register definitions. Register definitions always occur...
Definition: MCInstrDesc.h:198
unsigned createVirtualRegister(const TargetRegisterClass *RegClass)
SDValue LowerGlobalAddress(AMDGPUMachineFunction *MFI, SDValue Op, SelectionDAG &DAG) const
enable_if_c<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:266
bool hasSubClassEq(const TargetRegisterClass *RC) const
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, unsigned f, uint64_t s, unsigned base_alignment, const MDNode *TBAAInfo=0, const MDNode *Ranges=0)
unsigned addLiveIn(unsigned PReg, const TargetRegisterClass *RC)
static bool isVirtualRegister(unsigned Reg)
static PointerType * get(Type *ElementType, unsigned AddressSpace)
Definition: Type.cpp:730
unsigned getOpcode() const
Type * getTypeForEVT(LLVMContext &Context) const
Definition: ValueTypes.cpp:180
SITargetLowering(TargetMachine &tm)
unsigned getNumOperands() const
const SDValue & getOperand(unsigned Num) const
F(f)
const Function * getFunction() const
const uint64_t RSRC_DATA_FORMAT
iv Induction Variable Users
Definition: IVUsers.cpp:39
const SDValue & getBasePtr() const
unsigned getResNo() const
get the index which selects a specific result in the SDNode
bool isRegLoc() const
bool isAllOnesValue() const
void setTruncStoreAction(MVT ValVT, MVT MemVT, LegalizeAction Action)
EVT getValueType(Type *Ty, bool AllowUnknown=false) const
bool isVector() const
isVector - Return true if this is a vector value type.
Definition: ValueTypes.h:661
SDNode * MorphNodeTo(SDNode *N, unsigned Opc, SDVTList VTs, const SDValue *Ops, unsigned NumOps)
const HexagonInstrInfo * TII
EVT getValueType(unsigned ResNo) const
const TargetRegisterClass * getRegClass(unsigned i) const
MachineFunction & getMachineFunction() const
Definition: SelectionDAG.h:280
virtual bool shouldSplitVectorElementType(EVT VT) const
int32_t analyzeImmediate(const SDNode *N) const
Analyze the possible immediate value Op.
static unsigned SubIdx2Lane(unsigned Idx)
Helper function for adjustWritemask.
virtual SDValue CreateLiveInRegister(SelectionDAG &DAG, const TargetRegisterClass *RC, unsigned Reg, EVT VT) const
Helper function that adds Reg to the LiveIn list of the DAG's MachineFunction.
const TargetRegisterClass * getRegClass(unsigned Reg) const
void setCondCodeAction(ISD::CondCode CC, MVT VT, LegalizeAction Action)
SimpleValueType SimpleTy
Definition: ValueTypes.h:161
EVT getScalarType() const
Definition: ValueTypes.h:756
void setOperationAction(unsigned Op, MVT VT, LegalizeAction Action)
unsigned getMachineOpcode() const
SDVTList getVTList(EVT VT)
unsigned getStoreSize() const
Definition: ValueTypes.h:433
bool allowsUnalignedMemoryAccesses(EVT VT, bool *IsFast) const
Determine if the target supports unaligned memory accesses.
static bool isNodeChanged(const SDNode *Node, const std::vector< SDValue > &Ops)
ID
LLVM Calling Convention Representation.
Definition: CallingConv.h:26
SDValue SimplifySetCC(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond, bool foldBooleans, DAGCombinerInfo &DCI, SDLoc dl) const
const MachineInstrBuilder & addImm(int64_t Val) const
unsigned getNumOperands() const
Definition: MachineInstr.h:265
unsigned getLocReg() const
virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const
int isMIMG(uint16_t Opcode) const
unsigned getNumValues() const
int getOpcode() const
Definition: MachineInstr.h:261
enable_if_c< std::numeric_limits< T >::is_integer &&!std::numeric_limits< T >::is_signed, std::size_t >::type countTrailingZeros(T Val, ZeroBehavior ZB=ZB_Width)
Count number of 0's from the least significant bit to the most stopping at the first 1...
Definition: MathExtras.h:49
Simple integer binary arithmetic operators.
Definition: ISDOpcodes.h:176
unsigned getMatchingSuperReg(unsigned Reg, unsigned SubIdx, const TargetRegisterClass *RC) const
const SDValue & getBasePtr() const
SDValue getUNDEF(EVT VT)
getUNDEF - Return an UNDEF node. UNDEF does not have a useful SDLoc.
Definition: SelectionDAG.h:585
EVT getMemoryVT() const
getMemoryVT - Return the type of the in-memory value.
int64_t getImm() const
bool bitsLE(EVT VT) const
bitsLE - Return true if this has no more bits than VT.
Definition: ValueTypes.h:741
virtual MVT getScalarShiftAmountTy(EVT VT) const
UNDEF - An undefined node.
Definition: ISDOpcodes.h:154
const MachineBasicBlock * getParent() const
Definition: MachineInstr.h:119
virtual bool isFMAFasterThanFMulAndFAdd(EVT VT) const
SDNode * getNode() const
get the SDNode which holds the desired result
bundle_iterator< MachineInstr, instr_iterator > iterator
Type * getParamType(unsigned i) const
Parameter type accessors.
Definition: DerivedTypes.h:128
Control flow instructions. These all have token chains.
Definition: ISDOpcodes.h:475
unsigned getVectorNumElements() const
Definition: ValueTypes.h:311
const SDValue & getOperand(unsigned i) const
void addRegisterClass(MVT VT, const TargetRegisterClass *RC)
void setTargetDAGCombine(ISD::NodeType NT)
MVT getLocVT() const
bool isVector() const
isVector - Return true if this is a vector value type.
Definition: ValueTypes.h:190
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:267
unsigned getLiveInVirtReg(unsigned PReg) const
unsigned getSize() const
Return the number of bytes in the encoding of this instruction, or zero if the encoding size cannot b...
Definition: MCInstrDesc.h:576
SI DAG Lowering interface definition.
virtual MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr *MI, MachineBasicBlock *BB) const
SDValue getCopyFromReg(SDValue Chain, SDLoc dl, unsigned Reg, EVT VT)
Definition: SelectionDAG.h:510
unsigned getOpcode() const
static bool isSSrc(unsigned RegClass)
Test if RegClass is one of the SSrc classes.
virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const
use_iterator use_begin() const
const SDValue & getValue() const
virtual void AdjustInstrPostInstrSelection(MachineInstr *MI, SDNode *Node) const
Assign the register class depending on the number of bits set in the writemask.
MachineInstrBuilder BuildMI(MachineFunction &MF, DebugLoc DL, const MCInstrDesc &MCID)
static UndefValue * get(Type *T)
Definition: Constants.cpp:1334
uint64_t getConstantOperandVal(unsigned Num) const
const SDValue & getRoot() const
Definition: SelectionDAG.h:328
The memory access is invariant.
bool bitsGT(EVT VT) const
bitsGT - Return true if this has more bits than VT.
Definition: ValueTypes.h:723
void setLoadExtAction(unsigned ExtType, MVT VT, LegalizeAction Action)
SDValue SplitVectorStore(SDValue Op, SelectionDAG &DAG) const
virtual const TargetInstrInfo * getInstrInfo() const
virtual MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr *MI, MachineBasicBlock *MBB) const
void setDesc(const MCInstrDesc &tid)
Definition: MachineInstr.h:984
unsigned getVectorNumElements() const
Definition: Type.cpp:214
SDNode * UpdateNodeOperands(SDNode *N, SDValue Op)
SDValue getExtLoad(ISD::LoadExtType ExtType, SDLoc dl, EVT VT, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, EVT MemVT, bool isVolatile, bool isNonTemporal, unsigned Alignment, const MDNode *TBAAInfo=0)
CCValAssign - Represent assignment of one arg/retval to a location.
const SDValue & getChain() const
SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl< ISD::InputArg > &Ins, SDLoc DL, SelectionDAG &DAG, SmallVectorImpl< SDValue > &InVals) const
SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG) const
static SDNode * findUser(SDValue Value, unsigned Opcode)
Helper function for LowerBRCOND.
unsigned short Opcode
Definition: MCInstrDesc.h:139
static bool isVSrc(unsigned RegClass)
Test if RegClass is one of the VSrc classes.
SDValue getMemIntrinsicNode(unsigned Opcode, SDLoc dl, const EVT *VTs, unsigned NumVTs, const SDValue *Ops, unsigned NumOps, EVT MemVT, MachinePointerInfo PtrInfo, unsigned Align=0, bool Vol=false, bool ReadMem=true, bool WriteMem=true)
int64_t getSExtValue() const
virtual const TargetRegisterClass * getRegClassFor(MVT VT) const
static use_iterator use_end()
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition: ISDOpcodes.h:357
bool isMemLoc() const
SDValue getNode(unsigned Opcode, SDLoc DL, EVT VT)
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition: ISDOpcodes.h:360
Address space for private memory.
Definition: AMDGPU.h:75
unsigned getAddressSpace() const
getAddressSpace - Return the address space for the associated pointer
unsigned commuteOpcode(unsigned Opcode) const
const TargetRegisterClass * getPhysRegClass(unsigned Reg) const
Return the 'base' register class for this register. e.g. SGPR0 => SReg_32, VGPR => VReg_32 SGPR0_SGPR...
Interface definition for SIInstrInfo.
pointer data()
data - Return a pointer to the vector's buffer, even if empty().
Definition: SmallVector.h:135
MachineRegisterInfo & getRegInfo()
unsigned getSizeInBits() const
getSizeInBits - Return the size of the specified value type in bits.
Definition: ValueTypes.h:779
void ReplaceAllUsesWith(SDValue From, SDValue Op)
#define I(x, y, z)
Definition: MD5.cpp:54
#define N
FunctionType * getFunctionType() const
Definition: Function.cpp:171
static unsigned getReg(const void *D, unsigned RC, unsigned RegNo)
const SIRegisterInfo & getRegisterInfo() const
Definition: SIInstrInfo.cpp:30
MachineSDNode * getMachineNode(unsigned Opcode, SDLoc dl, EVT VT)
virtual const TargetRegisterInfo * getRegisterInfo() const
SDValue SplitVectorLoad(const SDValue &Op, SelectionDAG &DAG) const
Split a vector load into multiple scalar loads.
EVT getValueType() const
void getOriginalFunctionArgs(SelectionDAG &DAG, const Function *F, const SmallVectorImpl< ISD::InputArg > &Ins, SmallVectorImpl< ISD::InputArg > &OrigIns) const
unsigned getReg() const
getReg - Returns the register number.
virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const
virtual SDNode * PostISelFolding(MachineSDNode *N, SelectionDAG &DAG) const
Fold the instructions after slecting them.
bool isSimple() const
Definition: ValueTypes.h:640
unsigned OrigArgIndex
Index original Function's argument.
void setSchedulingPreference(Sched::Preference Pref)
Specify the target scheduling preference.
SDValue getZExtOrTrunc(SDValue Op, SDLoc DL, EVT VT)
LLVM Value Representation.
Definition: Value.h:66
unsigned getNumOperands() const
Return the number of declared MachineOperands for this MachineInstruction. Note that variadic (isVari...
Definition: MCInstrDesc.h:190
const MCOperandInfo * OpInfo
Definition: MCInstrDesc.h:148
const MachineInstrBuilder & addOperand(const MachineOperand &MO) const
Address space for constant memory.
Definition: AMDGPU.h:77
void ReplaceAllUsesOfValueWith(SDValue From, SDValue To)
void setRegClass(unsigned Reg, const TargetRegisterClass *RC)
Interface for the AMDGPU Implementation of the Intrinsic Info class.
SDValue getMergeValues(const SDValue *Ops, unsigned NumOps, SDLoc dl)
getMergeValues - Create a MERGE_VALUES node from the given operands.
const MCRegisterInfo & MRI
SDValue getTargetConstant(uint64_t Val, EVT VT)
Definition: SelectionDAG.h:408
unsigned getLocMemOffset() const
MVT getVectorElementType() const
Definition: ValueTypes.h:263
SDValue getEntryNode() const
Definition: SelectionDAG.h:332
SDValue CreateLiveInRegister(SelectionDAG &DAG, const TargetRegisterClass *RC, unsigned Reg, EVT VT) const
Helper function that adds Reg to the LiveIn list of the DAG's MachineFunction.
const MachineInstrBuilder & addReg(unsigned RegNo, unsigned flags=0, unsigned SubReg=0) const
SDValue LowerMinMax(SDValue Op, SelectionDAG &DAG) const
Generate Min/Max node.
MVT getSimpleVT() const
Definition: ValueTypes.h:749
DebugLoc getDebugLoc() const
Definition: MachineInstr.h:244
bool isMachineOpcode() const
unsigned getMachineOpcode() const
unsigned getVectorNumElements() const
Definition: ValueTypes.h:771