LLVM API Documentation

 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
R600ISelLowering.cpp
Go to the documentation of this file.
1 //===-- R600ISelLowering.cpp - R600 DAG Lowering Implementation -----------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 /// \file
11 /// \brief Custom DAG lowering for R600
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "R600ISelLowering.h"
16 #include "R600Defines.h"
17 #include "R600InstrInfo.h"
24 #include "llvm/IR/Argument.h"
25 #include "llvm/IR/Function.h"
26 
27 using namespace llvm;
28 
31  Gen(TM.getSubtarget<AMDGPUSubtarget>().getGeneration()) {
32  addRegisterClass(MVT::v4f32, &AMDGPU::R600_Reg128RegClass);
33  addRegisterClass(MVT::f32, &AMDGPU::R600_Reg32RegClass);
34  addRegisterClass(MVT::v4i32, &AMDGPU::R600_Reg128RegClass);
35  addRegisterClass(MVT::i32, &AMDGPU::R600_Reg32RegClass);
36  addRegisterClass(MVT::v2f32, &AMDGPU::R600_Reg64RegClass);
37  addRegisterClass(MVT::v2i32, &AMDGPU::R600_Reg64RegClass);
38 
40 
41  // Set condition code actions
54 
59 
62 
65 
68 
70 
74 
77 
81 
88 
89  // Legalize loads and stores to the private address space.
93 
94  // EXTLOAD should be the same as ZEXTLOAD. It is legal for some address
95  // spaces, so it is custom lowered to handle those where it isn't.
102 
109 
113 
119 
121 
125 }
126 
128  MachineInstr * MI, MachineBasicBlock * BB) const {
129  MachineFunction * MF = BB->getParent();
132  const R600InstrInfo *TII =
133  static_cast<const R600InstrInfo*>(MF->getTarget().getInstrInfo());
134 
135  switch (MI->getOpcode()) {
136  default:
137  // Replace LDS_*_RET instruction that don't have any uses with the
138  // equivalent LDS_*_NORET instruction.
139  if (TII->isLDSRetInstr(MI->getOpcode())) {
140  int DstIdx = TII->getOperandIdx(MI->getOpcode(), AMDGPU::OpName::dst);
141  assert(DstIdx != -1);
142  MachineInstrBuilder NewMI;
143  if (!MRI.use_empty(MI->getOperand(DstIdx).getReg()))
144  return BB;
145 
146  NewMI = BuildMI(*BB, I, BB->findDebugLoc(I),
147  TII->get(AMDGPU::getLDSNoRetOp(MI->getOpcode())));
148  for (unsigned i = 1, e = MI->getNumOperands(); i < e; ++i) {
149  NewMI.addOperand(MI->getOperand(i));
150  }
151  } else {
153  }
154  break;
155  case AMDGPU::CLAMP_R600: {
156  MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, I,
157  AMDGPU::MOV,
158  MI->getOperand(0).getReg(),
159  MI->getOperand(1).getReg());
160  TII->addFlag(NewMI, 0, MO_FLAG_CLAMP);
161  break;
162  }
163 
164  case AMDGPU::FABS_R600: {
165  MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, I,
166  AMDGPU::MOV,
167  MI->getOperand(0).getReg(),
168  MI->getOperand(1).getReg());
169  TII->addFlag(NewMI, 0, MO_FLAG_ABS);
170  break;
171  }
172 
173  case AMDGPU::FNEG_R600: {
174  MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, I,
175  AMDGPU::MOV,
176  MI->getOperand(0).getReg(),
177  MI->getOperand(1).getReg());
178  TII->addFlag(NewMI, 0, MO_FLAG_NEG);
179  break;
180  }
181 
182  case AMDGPU::MASK_WRITE: {
183  unsigned maskedRegister = MI->getOperand(0).getReg();
184  assert(TargetRegisterInfo::isVirtualRegister(maskedRegister));
185  MachineInstr * defInstr = MRI.getVRegDef(maskedRegister);
186  TII->addFlag(defInstr, 0, MO_FLAG_MASK);
187  break;
188  }
189 
190  case AMDGPU::MOV_IMM_F32:
191  TII->buildMovImm(*BB, I, MI->getOperand(0).getReg(),
192  MI->getOperand(1).getFPImm()->getValueAPF()
194  break;
195  case AMDGPU::MOV_IMM_I32:
196  TII->buildMovImm(*BB, I, MI->getOperand(0).getReg(),
197  MI->getOperand(1).getImm());
198  break;
199  case AMDGPU::CONST_COPY: {
200  MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, MI, AMDGPU::MOV,
201  MI->getOperand(0).getReg(), AMDGPU::ALU_CONST);
202  TII->setImmOperand(NewMI, AMDGPU::OpName::src0_sel,
203  MI->getOperand(1).getImm());
204  break;
205  }
206 
207  case AMDGPU::RAT_WRITE_CACHELESS_32_eg:
208  case AMDGPU::RAT_WRITE_CACHELESS_64_eg:
209  case AMDGPU::RAT_WRITE_CACHELESS_128_eg: {
210  unsigned EOP = (llvm::next(I)->getOpcode() == AMDGPU::RETURN) ? 1 : 0;
211 
212  BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI->getOpcode()))
213  .addOperand(MI->getOperand(0))
214  .addOperand(MI->getOperand(1))
215  .addImm(EOP); // Set End of program bit
216  break;
217  }
218 
219  case AMDGPU::TXD: {
220  unsigned T0 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
221  unsigned T1 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
222  MachineOperand &RID = MI->getOperand(4);
223  MachineOperand &SID = MI->getOperand(5);
224  unsigned TextureId = MI->getOperand(6).getImm();
225  unsigned SrcX = 0, SrcY = 1, SrcZ = 2, SrcW = 3;
226  unsigned CTX = 1, CTY = 1, CTZ = 1, CTW = 1;
227 
228  switch (TextureId) {
229  case 5: // Rect
230  CTX = CTY = 0;
231  break;
232  case 6: // Shadow1D
233  SrcW = SrcZ;
234  break;
235  case 7: // Shadow2D
236  SrcW = SrcZ;
237  break;
238  case 8: // ShadowRect
239  CTX = CTY = 0;
240  SrcW = SrcZ;
241  break;
242  case 9: // 1DArray
243  SrcZ = SrcY;
244  CTZ = 0;
245  break;
246  case 10: // 2DArray
247  CTZ = 0;
248  break;
249  case 11: // Shadow1DArray
250  SrcZ = SrcY;
251  CTZ = 0;
252  break;
253  case 12: // Shadow2DArray
254  CTZ = 0;
255  break;
256  }
257  BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_H), T0)
258  .addOperand(MI->getOperand(3))
259  .addImm(SrcX)
260  .addImm(SrcY)
261  .addImm(SrcZ)
262  .addImm(SrcW)
263  .addImm(0)
264  .addImm(0)
265  .addImm(0)
266  .addImm(0)
267  .addImm(1)
268  .addImm(2)
269  .addImm(3)
270  .addOperand(RID)
271  .addOperand(SID)
272  .addImm(CTX)
273  .addImm(CTY)
274  .addImm(CTZ)
275  .addImm(CTW);
276  BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_V), T1)
277  .addOperand(MI->getOperand(2))
278  .addImm(SrcX)
279  .addImm(SrcY)
280  .addImm(SrcZ)
281  .addImm(SrcW)
282  .addImm(0)
283  .addImm(0)
284  .addImm(0)
285  .addImm(0)
286  .addImm(1)
287  .addImm(2)
288  .addImm(3)
289  .addOperand(RID)
290  .addOperand(SID)
291  .addImm(CTX)
292  .addImm(CTY)
293  .addImm(CTZ)
294  .addImm(CTW);
295  BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SAMPLE_G))
296  .addOperand(MI->getOperand(0))
297  .addOperand(MI->getOperand(1))
298  .addImm(SrcX)
299  .addImm(SrcY)
300  .addImm(SrcZ)
301  .addImm(SrcW)
302  .addImm(0)
303  .addImm(0)
304  .addImm(0)
305  .addImm(0)
306  .addImm(1)
307  .addImm(2)
308  .addImm(3)
309  .addOperand(RID)
310  .addOperand(SID)
311  .addImm(CTX)
312  .addImm(CTY)
313  .addImm(CTZ)
314  .addImm(CTW)
317  break;
318  }
319 
320  case AMDGPU::TXD_SHADOW: {
321  unsigned T0 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
322  unsigned T1 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
323  MachineOperand &RID = MI->getOperand(4);
324  MachineOperand &SID = MI->getOperand(5);
325  unsigned TextureId = MI->getOperand(6).getImm();
326  unsigned SrcX = 0, SrcY = 1, SrcZ = 2, SrcW = 3;
327  unsigned CTX = 1, CTY = 1, CTZ = 1, CTW = 1;
328 
329  switch (TextureId) {
330  case 5: // Rect
331  CTX = CTY = 0;
332  break;
333  case 6: // Shadow1D
334  SrcW = SrcZ;
335  break;
336  case 7: // Shadow2D
337  SrcW = SrcZ;
338  break;
339  case 8: // ShadowRect
340  CTX = CTY = 0;
341  SrcW = SrcZ;
342  break;
343  case 9: // 1DArray
344  SrcZ = SrcY;
345  CTZ = 0;
346  break;
347  case 10: // 2DArray
348  CTZ = 0;
349  break;
350  case 11: // Shadow1DArray
351  SrcZ = SrcY;
352  CTZ = 0;
353  break;
354  case 12: // Shadow2DArray
355  CTZ = 0;
356  break;
357  }
358 
359  BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_H), T0)
360  .addOperand(MI->getOperand(3))
361  .addImm(SrcX)
362  .addImm(SrcY)
363  .addImm(SrcZ)
364  .addImm(SrcW)
365  .addImm(0)
366  .addImm(0)
367  .addImm(0)
368  .addImm(0)
369  .addImm(1)
370  .addImm(2)
371  .addImm(3)
372  .addOperand(RID)
373  .addOperand(SID)
374  .addImm(CTX)
375  .addImm(CTY)
376  .addImm(CTZ)
377  .addImm(CTW);
378  BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_V), T1)
379  .addOperand(MI->getOperand(2))
380  .addImm(SrcX)
381  .addImm(SrcY)
382  .addImm(SrcZ)
383  .addImm(SrcW)
384  .addImm(0)
385  .addImm(0)
386  .addImm(0)
387  .addImm(0)
388  .addImm(1)
389  .addImm(2)
390  .addImm(3)
391  .addOperand(RID)
392  .addOperand(SID)
393  .addImm(CTX)
394  .addImm(CTY)
395  .addImm(CTZ)
396  .addImm(CTW);
397  BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SAMPLE_C_G))
398  .addOperand(MI->getOperand(0))
399  .addOperand(MI->getOperand(1))
400  .addImm(SrcX)
401  .addImm(SrcY)
402  .addImm(SrcZ)
403  .addImm(SrcW)
404  .addImm(0)
405  .addImm(0)
406  .addImm(0)
407  .addImm(0)
408  .addImm(1)
409  .addImm(2)
410  .addImm(3)
411  .addOperand(RID)
412  .addOperand(SID)
413  .addImm(CTX)
414  .addImm(CTY)
415  .addImm(CTZ)
416  .addImm(CTW)
419  break;
420  }
421 
422  case AMDGPU::BRANCH:
423  BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP))
424  .addOperand(MI->getOperand(0));
425  break;
426 
427  case AMDGPU::BRANCH_COND_f32: {
428  MachineInstr *NewMI =
429  BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::PRED_X),
430  AMDGPU::PREDICATE_BIT)
431  .addOperand(MI->getOperand(1))
432  .addImm(OPCODE_IS_NOT_ZERO)
433  .addImm(0); // Flags
434  TII->addFlag(NewMI, 0, MO_FLAG_PUSH);
435  BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP_COND))
436  .addOperand(MI->getOperand(0))
437  .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
438  break;
439  }
440 
441  case AMDGPU::BRANCH_COND_i32: {
442  MachineInstr *NewMI =
443  BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::PRED_X),
444  AMDGPU::PREDICATE_BIT)
445  .addOperand(MI->getOperand(1))
446  .addImm(OPCODE_IS_NOT_ZERO_INT)
447  .addImm(0); // Flags
448  TII->addFlag(NewMI, 0, MO_FLAG_PUSH);
449  BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP_COND))
450  .addOperand(MI->getOperand(0))
451  .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
452  break;
453  }
454 
455  case AMDGPU::EG_ExportSwz:
456  case AMDGPU::R600_ExportSwz: {
457  // Instruction is left unmodified if its not the last one of its type
458  bool isLastInstructionOfItsType = true;
459  unsigned InstExportType = MI->getOperand(1).getImm();
460  for (MachineBasicBlock::iterator NextExportInst = llvm::next(I),
461  EndBlock = BB->end(); NextExportInst != EndBlock;
462  NextExportInst = llvm::next(NextExportInst)) {
463  if (NextExportInst->getOpcode() == AMDGPU::EG_ExportSwz ||
464  NextExportInst->getOpcode() == AMDGPU::R600_ExportSwz) {
465  unsigned CurrentInstExportType = NextExportInst->getOperand(1)
466  .getImm();
467  if (CurrentInstExportType == InstExportType) {
468  isLastInstructionOfItsType = false;
469  break;
470  }
471  }
472  }
473  bool EOP = (llvm::next(I)->getOpcode() == AMDGPU::RETURN)? 1 : 0;
474  if (!EOP && !isLastInstructionOfItsType)
475  return BB;
476  unsigned CfInst = (MI->getOpcode() == AMDGPU::EG_ExportSwz)? 84 : 40;
477  BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI->getOpcode()))
478  .addOperand(MI->getOperand(0))
479  .addOperand(MI->getOperand(1))
480  .addOperand(MI->getOperand(2))
481  .addOperand(MI->getOperand(3))
482  .addOperand(MI->getOperand(4))
483  .addOperand(MI->getOperand(5))
484  .addOperand(MI->getOperand(6))
485  .addImm(CfInst)
486  .addImm(EOP);
487  break;
488  }
489  case AMDGPU::RETURN: {
490  // RETURN instructions must have the live-out registers as implicit uses,
491  // otherwise they appear dead.
493  MachineInstrBuilder MIB(*MF, MI);
494  for (unsigned i = 0, e = MFI->LiveOuts.size(); i != e; ++i)
495  MIB.addReg(MFI->LiveOuts[i], RegState::Implicit);
496  return BB;
497  }
498  }
499 
500  MI->eraseFromParent();
501  return BB;
502 }
503 
504 //===----------------------------------------------------------------------===//
505 // Custom DAG Lowering Operations
506 //===----------------------------------------------------------------------===//
507 
511  switch (Op.getOpcode()) {
512  default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
513  case ISD::FCOS:
514  case ISD::FSIN: return LowerTrig(Op, DAG);
515  case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
516  case ISD::STORE: return LowerSTORE(Op, DAG);
517  case ISD::LOAD: return LowerLOAD(Op, DAG);
518  case ISD::GlobalAddress: return LowerGlobalAddress(MFI, Op, DAG);
519  case ISD::INTRINSIC_VOID: {
520  SDValue Chain = Op.getOperand(0);
521  unsigned IntrinsicID =
522  cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
523  switch (IntrinsicID) {
524  case AMDGPUIntrinsic::AMDGPU_store_output: {
525  int64_t RegIndex = cast<ConstantSDNode>(Op.getOperand(3))->getZExtValue();
526  unsigned Reg = AMDGPU::R600_TReg32RegClass.getRegister(RegIndex);
527  MFI->LiveOuts.push_back(Reg);
528  return DAG.getCopyToReg(Chain, SDLoc(Op), Reg, Op.getOperand(2));
529  }
530  case AMDGPUIntrinsic::R600_store_swizzle: {
531  const SDValue Args[8] = {
532  Chain,
533  Op.getOperand(2), // Export Value
534  Op.getOperand(3), // ArrayBase
535  Op.getOperand(4), // Type
536  DAG.getConstant(0, MVT::i32), // SWZ_X
537  DAG.getConstant(1, MVT::i32), // SWZ_Y
538  DAG.getConstant(2, MVT::i32), // SWZ_Z
539  DAG.getConstant(3, MVT::i32) // SWZ_W
540  };
541  return DAG.getNode(AMDGPUISD::EXPORT, SDLoc(Op), Op.getValueType(),
542  Args, 8);
543  }
544 
545  // default for switch(IntrinsicID)
546  default: break;
547  }
548  // break out of case ISD::INTRINSIC_VOID in switch(Op.getOpcode())
549  break;
550  }
552  unsigned IntrinsicID =
553  cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
554  EVT VT = Op.getValueType();
555  SDLoc DL(Op);
556  switch(IntrinsicID) {
557  default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
558  case AMDGPUIntrinsic::R600_load_input: {
559  int64_t RegIndex = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
560  unsigned Reg = AMDGPU::R600_TReg32RegClass.getRegister(RegIndex);
563  MRI.addLiveIn(Reg);
564  return DAG.getCopyFromReg(DAG.getEntryNode(),
565  SDLoc(DAG.getEntryNode()), Reg, VT);
566  }
567 
568  case AMDGPUIntrinsic::R600_interp_input: {
569  int slot = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
570  int ijb = cast<ConstantSDNode>(Op.getOperand(2))->getSExtValue();
571  MachineSDNode *interp;
572  if (ijb < 0) {
573  const MachineFunction &MF = DAG.getMachineFunction();
574  const R600InstrInfo *TII =
575  static_cast<const R600InstrInfo*>(MF.getTarget().getInstrInfo());
576  interp = DAG.getMachineNode(AMDGPU::INTERP_VEC_LOAD, DL,
577  MVT::v4f32, DAG.getTargetConstant(slot / 4 , MVT::i32));
578  return DAG.getTargetExtractSubreg(
579  TII->getRegisterInfo().getSubRegFromChannel(slot % 4),
580  DL, MVT::f32, SDValue(interp, 0));
581  }
584  unsigned RegisterI = AMDGPU::R600_TReg32RegClass.getRegister(2 * ijb);
585  unsigned RegisterJ = AMDGPU::R600_TReg32RegClass.getRegister(2 * ijb + 1);
586  MRI.addLiveIn(RegisterI);
587  MRI.addLiveIn(RegisterJ);
588  SDValue RegisterINode = DAG.getCopyFromReg(DAG.getEntryNode(),
589  SDLoc(DAG.getEntryNode()), RegisterI, MVT::f32);
590  SDValue RegisterJNode = DAG.getCopyFromReg(DAG.getEntryNode(),
591  SDLoc(DAG.getEntryNode()), RegisterJ, MVT::f32);
592 
593  if (slot % 4 < 2)
594  interp = DAG.getMachineNode(AMDGPU::INTERP_PAIR_XY, DL,
595  MVT::f32, MVT::f32, DAG.getTargetConstant(slot / 4 , MVT::i32),
596  RegisterJNode, RegisterINode);
597  else
598  interp = DAG.getMachineNode(AMDGPU::INTERP_PAIR_ZW, DL,
599  MVT::f32, MVT::f32, DAG.getTargetConstant(slot / 4 , MVT::i32),
600  RegisterJNode, RegisterINode);
601  return SDValue(interp, slot % 2);
602  }
603  case AMDGPUIntrinsic::R600_interp_xy:
604  case AMDGPUIntrinsic::R600_interp_zw: {
605  int slot = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
606  MachineSDNode *interp;
607  SDValue RegisterINode = Op.getOperand(2);
608  SDValue RegisterJNode = Op.getOperand(3);
609 
610  if (IntrinsicID == AMDGPUIntrinsic::R600_interp_xy)
611  interp = DAG.getMachineNode(AMDGPU::INTERP_PAIR_XY, DL,
613  RegisterJNode, RegisterINode);
614  else
615  interp = DAG.getMachineNode(AMDGPU::INTERP_PAIR_ZW, DL,
617  RegisterJNode, RegisterINode);
618  return DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v2f32,
619  SDValue(interp, 0), SDValue(interp, 1));
620  }
621  case AMDGPUIntrinsic::R600_tex:
622  case AMDGPUIntrinsic::R600_texc:
623  case AMDGPUIntrinsic::R600_txl:
624  case AMDGPUIntrinsic::R600_txlc:
625  case AMDGPUIntrinsic::R600_txb:
626  case AMDGPUIntrinsic::R600_txbc:
627  case AMDGPUIntrinsic::R600_txf:
628  case AMDGPUIntrinsic::R600_txq:
629  case AMDGPUIntrinsic::R600_ddx:
630  case AMDGPUIntrinsic::R600_ddy:
631  case AMDGPUIntrinsic::R600_ldptr: {
632  unsigned TextureOp;
633  switch (IntrinsicID) {
634  case AMDGPUIntrinsic::R600_tex:
635  TextureOp = 0;
636  break;
637  case AMDGPUIntrinsic::R600_texc:
638  TextureOp = 1;
639  break;
640  case AMDGPUIntrinsic::R600_txl:
641  TextureOp = 2;
642  break;
643  case AMDGPUIntrinsic::R600_txlc:
644  TextureOp = 3;
645  break;
646  case AMDGPUIntrinsic::R600_txb:
647  TextureOp = 4;
648  break;
649  case AMDGPUIntrinsic::R600_txbc:
650  TextureOp = 5;
651  break;
652  case AMDGPUIntrinsic::R600_txf:
653  TextureOp = 6;
654  break;
655  case AMDGPUIntrinsic::R600_txq:
656  TextureOp = 7;
657  break;
658  case AMDGPUIntrinsic::R600_ddx:
659  TextureOp = 8;
660  break;
661  case AMDGPUIntrinsic::R600_ddy:
662  TextureOp = 9;
663  break;
664  case AMDGPUIntrinsic::R600_ldptr:
665  TextureOp = 10;
666  break;
667  default:
668  llvm_unreachable("Unknow Texture Operation");
669  }
670 
671  SDValue TexArgs[19] = {
672  DAG.getConstant(TextureOp, MVT::i32),
673  Op.getOperand(1),
674  DAG.getConstant(0, MVT::i32),
675  DAG.getConstant(1, MVT::i32),
676  DAG.getConstant(2, MVT::i32),
677  DAG.getConstant(3, MVT::i32),
678  Op.getOperand(2),
679  Op.getOperand(3),
680  Op.getOperand(4),
681  DAG.getConstant(0, MVT::i32),
682  DAG.getConstant(1, MVT::i32),
683  DAG.getConstant(2, MVT::i32),
684  DAG.getConstant(3, MVT::i32),
685  Op.getOperand(5),
686  Op.getOperand(6),
687  Op.getOperand(7),
688  Op.getOperand(8),
689  Op.getOperand(9),
690  Op.getOperand(10)
691  };
692  return DAG.getNode(AMDGPUISD::TEXTURE_FETCH, DL, MVT::v4f32, TexArgs, 19);
693  }
694  case AMDGPUIntrinsic::AMDGPU_dp4: {
695  SDValue Args[8] = {
697  DAG.getConstant(0, MVT::i32)),
699  DAG.getConstant(0, MVT::i32)),
701  DAG.getConstant(1, MVT::i32)),
703  DAG.getConstant(1, MVT::i32)),
705  DAG.getConstant(2, MVT::i32)),
707  DAG.getConstant(2, MVT::i32)),
709  DAG.getConstant(3, MVT::i32)),
711  DAG.getConstant(3, MVT::i32))
712  };
713  return DAG.getNode(AMDGPUISD::DOT4, DL, MVT::f32, Args, 8);
714  }
715 
717  return LowerImplicitParameter(DAG, VT, DL, 0);
719  return LowerImplicitParameter(DAG, VT, DL, 1);
721  return LowerImplicitParameter(DAG, VT, DL, 2);
723  return LowerImplicitParameter(DAG, VT, DL, 3);
725  return LowerImplicitParameter(DAG, VT, DL, 4);
727  return LowerImplicitParameter(DAG, VT, DL, 5);
729  return LowerImplicitParameter(DAG, VT, DL, 6);
731  return LowerImplicitParameter(DAG, VT, DL, 7);
733  return LowerImplicitParameter(DAG, VT, DL, 8);
734 
736  return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
737  AMDGPU::T1_X, VT);
739  return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
740  AMDGPU::T1_Y, VT);
742  return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
743  AMDGPU::T1_Z, VT);
745  return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
746  AMDGPU::T0_X, VT);
748  return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
749  AMDGPU::T0_Y, VT);
751  return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
752  AMDGPU::T0_Z, VT);
753  }
754  // break out of case ISD::INTRINSIC_WO_CHAIN in switch(Op.getOpcode())
755  break;
756  }
757  } // end switch(Op.getOpcode())
758  return SDValue();
759 }
760 
762  SmallVectorImpl<SDValue> &Results,
763  SelectionDAG &DAG) const {
764  switch (N->getOpcode()) {
765  default: return;
766  case ISD::FP_TO_UINT: Results.push_back(LowerFPTOUINT(N->getOperand(0), DAG));
767  return;
768  case ISD::LOAD: {
769  SDNode *Node = LowerLOAD(SDValue(N, 0), DAG).getNode();
770  Results.push_back(SDValue(Node, 0));
771  Results.push_back(SDValue(Node, 1));
772  // XXX: LLVM seems not to replace Chain Value inside CustomWidenLowerNode
773  // function
774  DAG.ReplaceAllUsesOfValueWith(SDValue(N,1), SDValue(Node, 1));
775  return;
776  }
777  case ISD::STORE:
778  SDNode *Node = LowerSTORE(SDValue(N, 0), DAG).getNode();
779  Results.push_back(SDValue(Node, 0));
780  return;
781  }
782 }
783 
784 SDValue R600TargetLowering::LowerTrig(SDValue Op, SelectionDAG &DAG) const {
785  // On hw >= R700, COS/SIN input must be between -1. and 1.
786  // Thus we lower them to TRIG ( FRACT ( x / 2Pi + 0.5) - 0.5)
787  EVT VT = Op.getValueType();
788  SDValue Arg = Op.getOperand(0);
789  SDValue FractPart = DAG.getNode(AMDGPUISD::FRACT, SDLoc(Op), VT,
790  DAG.getNode(ISD::FADD, SDLoc(Op), VT,
791  DAG.getNode(ISD::FMUL, SDLoc(Op), VT, Arg,
792  DAG.getConstantFP(0.15915494309, MVT::f32)),
793  DAG.getConstantFP(0.5, MVT::f32)));
794  unsigned TrigNode;
795  switch (Op.getOpcode()) {
796  case ISD::FCOS:
797  TrigNode = AMDGPUISD::COS_HW;
798  break;
799  case ISD::FSIN:
800  TrigNode = AMDGPUISD::SIN_HW;
801  break;
802  default:
803  llvm_unreachable("Wrong trig opcode");
804  }
805  SDValue TrigVal = DAG.getNode(TrigNode, SDLoc(Op), VT,
806  DAG.getNode(ISD::FADD, SDLoc(Op), VT, FractPart,
807  DAG.getConstantFP(-0.5, MVT::f32)));
808  if (Gen >= AMDGPUSubtarget::R700)
809  return TrigVal;
810  // On R600 hw, COS/SIN input must be between -Pi and Pi.
811  return DAG.getNode(ISD::FMUL, SDLoc(Op), VT, TrigVal,
812  DAG.getConstantFP(3.14159265359, MVT::f32));
813 }
814 
815 SDValue R600TargetLowering::LowerFPTOUINT(SDValue Op, SelectionDAG &DAG) const {
816  return DAG.getNode(
817  ISD::SETCC,
818  SDLoc(Op),
819  MVT::i1,
820  Op, DAG.getConstantFP(0.0f, MVT::f32),
822  );
823 }
824 
825 SDValue R600TargetLowering::LowerImplicitParameter(SelectionDAG &DAG, EVT VT,
826  SDLoc DL,
827  unsigned DwordOffset) const {
828  unsigned ByteOffset = DwordOffset * 4;
829  PointerType * PtrType = PointerType::get(VT.getTypeForEVT(*DAG.getContext()),
831 
832  // We shouldn't be using an offset wider than 16-bits for implicit parameters.
833  assert(isInt<16>(ByteOffset));
834 
835  return DAG.getLoad(VT, DL, DAG.getEntryNode(),
836  DAG.getConstant(ByteOffset, MVT::i32), // PTR
838  false, false, false, 0);
839 }
840 
841 bool R600TargetLowering::isZero(SDValue Op) const {
842  if(ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Op)) {
843  return Cst->isNullValue();
844  } else if(ConstantFPSDNode *CstFP = dyn_cast<ConstantFPSDNode>(Op)){
845  return CstFP->isZero();
846  } else {
847  return false;
848  }
849 }
850 
851 SDValue R600TargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
852  SDLoc DL(Op);
853  EVT VT = Op.getValueType();
854 
855  SDValue LHS = Op.getOperand(0);
856  SDValue RHS = Op.getOperand(1);
857  SDValue True = Op.getOperand(2);
858  SDValue False = Op.getOperand(3);
859  SDValue CC = Op.getOperand(4);
860  SDValue Temp;
861 
862  // LHS and RHS are guaranteed to be the same value type
863  EVT CompareVT = LHS.getValueType();
864 
865  // Check if we can lower this to a native operation.
866 
867  // Try to lower to a SET* instruction:
868  //
869  // SET* can match the following patterns:
870  //
871  // select_cc f32, f32, -1, 0, cc_supported
872  // select_cc f32, f32, 1.0f, 0.0f, cc_supported
873  // select_cc i32, i32, -1, 0, cc_supported
874  //
875 
876  // Move hardware True/False values to the correct operand.
877  ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
878  ISD::CondCode InverseCC =
879  ISD::getSetCCInverse(CCOpcode, CompareVT == MVT::i32);
880  if (isHWTrueValue(False) && isHWFalseValue(True)) {
881  if (isCondCodeLegal(InverseCC, CompareVT.getSimpleVT())) {
882  std::swap(False, True);
883  CC = DAG.getCondCode(InverseCC);
884  } else {
885  ISD::CondCode SwapInvCC = ISD::getSetCCSwappedOperands(InverseCC);
886  if (isCondCodeLegal(SwapInvCC, CompareVT.getSimpleVT())) {
887  std::swap(False, True);
888  std::swap(LHS, RHS);
889  CC = DAG.getCondCode(SwapInvCC);
890  }
891  }
892  }
893 
894  if (isHWTrueValue(True) && isHWFalseValue(False) &&
895  (CompareVT == VT || VT == MVT::i32)) {
896  // This can be matched by a SET* instruction.
897  return DAG.getNode(ISD::SELECT_CC, DL, VT, LHS, RHS, True, False, CC);
898  }
899 
900  // Try to lower to a CND* instruction:
901  //
902  // CND* can match the following patterns:
903  //
904  // select_cc f32, 0.0, f32, f32, cc_supported
905  // select_cc f32, 0.0, i32, i32, cc_supported
906  // select_cc i32, 0, f32, f32, cc_supported
907  // select_cc i32, 0, i32, i32, cc_supported
908  //
909 
910  // Try to move the zero value to the RHS
911  if (isZero(LHS)) {
912  ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
913  // Try swapping the operands
914  ISD::CondCode CCSwapped = ISD::getSetCCSwappedOperands(CCOpcode);
915  if (isCondCodeLegal(CCSwapped, CompareVT.getSimpleVT())) {
916  std::swap(LHS, RHS);
917  CC = DAG.getCondCode(CCSwapped);
918  } else {
919  // Try inverting the conditon and then swapping the operands
920  ISD::CondCode CCInv = ISD::getSetCCInverse(CCOpcode, CompareVT.isInteger());
921  CCSwapped = ISD::getSetCCSwappedOperands(CCInv);
922  if (isCondCodeLegal(CCSwapped, CompareVT.getSimpleVT())) {
923  std::swap(True, False);
924  std::swap(LHS, RHS);
925  CC = DAG.getCondCode(CCSwapped);
926  }
927  }
928  }
929  if (isZero(RHS)) {
930  SDValue Cond = LHS;
931  SDValue Zero = RHS;
932  ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
933  if (CompareVT != VT) {
934  // Bitcast True / False to the correct types. This will end up being
935  // a nop, but it allows us to define only a single pattern in the
936  // .TD files for each CND* instruction rather than having to have
937  // one pattern for integer True/False and one for fp True/False
938  True = DAG.getNode(ISD::BITCAST, DL, CompareVT, True);
939  False = DAG.getNode(ISD::BITCAST, DL, CompareVT, False);
940  }
941 
942  switch (CCOpcode) {
943  case ISD::SETONE:
944  case ISD::SETUNE:
945  case ISD::SETNE:
946  CCOpcode = ISD::getSetCCInverse(CCOpcode, CompareVT == MVT::i32);
947  Temp = True;
948  True = False;
949  False = Temp;
950  break;
951  default:
952  break;
953  }
954  SDValue SelectNode = DAG.getNode(ISD::SELECT_CC, DL, CompareVT,
955  Cond, Zero,
956  True, False,
957  DAG.getCondCode(CCOpcode));
958  return DAG.getNode(ISD::BITCAST, DL, VT, SelectNode);
959  }
960 
961 
962  // Possible Min/Max pattern
963  SDValue MinMax = LowerMinMax(Op, DAG);
964  if (MinMax.getNode()) {
965  return MinMax;
966  }
967 
968  // If we make it this for it means we have no native instructions to handle
969  // this SELECT_CC, so we must lower it.
970  SDValue HWTrue, HWFalse;
971 
972  if (CompareVT == MVT::f32) {
973  HWTrue = DAG.getConstantFP(1.0f, CompareVT);
974  HWFalse = DAG.getConstantFP(0.0f, CompareVT);
975  } else if (CompareVT == MVT::i32) {
976  HWTrue = DAG.getConstant(-1, CompareVT);
977  HWFalse = DAG.getConstant(0, CompareVT);
978  }
979  else {
980  assert(!"Unhandled value type in LowerSELECT_CC");
981  }
982 
983  // Lower this unsupported SELECT_CC into a combination of two supported
984  // SELECT_CC operations.
985  SDValue Cond = DAG.getNode(ISD::SELECT_CC, DL, CompareVT, LHS, RHS, HWTrue, HWFalse, CC);
986 
987  return DAG.getNode(ISD::SELECT_CC, DL, VT,
988  Cond, HWFalse,
989  True, False,
990  DAG.getCondCode(ISD::SETNE));
991 }
992 
993 /// LLVM generates byte-addresed pointers. For indirect addressing, we need to
994 /// convert these pointers to a register index. Each register holds
995 /// 16 bytes, (4 x 32bit sub-register), but we need to take into account the
996 /// \p StackWidth, which tells us how many of the 4 sub-registrers will be used
997 /// for indirect addressing.
998 SDValue R600TargetLowering::stackPtrToRegIndex(SDValue Ptr,
999  unsigned StackWidth,
1000  SelectionDAG &DAG) const {
1001  unsigned SRLPad;
1002  switch(StackWidth) {
1003  case 1:
1004  SRLPad = 2;
1005  break;
1006  case 2:
1007  SRLPad = 3;
1008  break;
1009  case 4:
1010  SRLPad = 4;
1011  break;
1012  default: llvm_unreachable("Invalid stack width");
1013  }
1014 
1015  return DAG.getNode(ISD::SRL, SDLoc(Ptr), Ptr.getValueType(), Ptr,
1016  DAG.getConstant(SRLPad, MVT::i32));
1017 }
1018 
1019 void R600TargetLowering::getStackAddress(unsigned StackWidth,
1020  unsigned ElemIdx,
1021  unsigned &Channel,
1022  unsigned &PtrIncr) const {
1023  switch (StackWidth) {
1024  default:
1025  case 1:
1026  Channel = 0;
1027  if (ElemIdx > 0) {
1028  PtrIncr = 1;
1029  } else {
1030  PtrIncr = 0;
1031  }
1032  break;
1033  case 2:
1034  Channel = ElemIdx % 2;
1035  if (ElemIdx == 2) {
1036  PtrIncr = 1;
1037  } else {
1038  PtrIncr = 0;
1039  }
1040  break;
1041  case 4:
1042  Channel = ElemIdx;
1043  PtrIncr = 0;
1044  break;
1045  }
1046 }
1047 
1048 SDValue R600TargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
1049  SDLoc DL(Op);
1050  StoreSDNode *StoreNode = cast<StoreSDNode>(Op);
1051  SDValue Chain = Op.getOperand(0);
1052  SDValue Value = Op.getOperand(1);
1053  SDValue Ptr = Op.getOperand(2);
1054 
1055  SDValue Result = AMDGPUTargetLowering::LowerSTORE(Op, DAG);
1056  if (Result.getNode()) {
1057  return Result;
1058  }
1059 
1060  if (StoreNode->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS) {
1061  if (StoreNode->isTruncatingStore()) {
1062  EVT VT = Value.getValueType();
1063  assert(VT.bitsLE(MVT::i32));
1064  EVT MemVT = StoreNode->getMemoryVT();
1065  SDValue MaskConstant;
1066  if (MemVT == MVT::i8) {
1067  MaskConstant = DAG.getConstant(0xFF, MVT::i32);
1068  } else {
1069  assert(MemVT == MVT::i16);
1070  MaskConstant = DAG.getConstant(0xFFFF, MVT::i32);
1071  }
1072  SDValue DWordAddr = DAG.getNode(ISD::SRL, DL, VT, Ptr,
1073  DAG.getConstant(2, MVT::i32));
1074  SDValue ByteIndex = DAG.getNode(ISD::AND, DL, Ptr.getValueType(), Ptr,
1075  DAG.getConstant(0x00000003, VT));
1076  SDValue TruncValue = DAG.getNode(ISD::AND, DL, VT, Value, MaskConstant);
1077  SDValue Shift = DAG.getNode(ISD::SHL, DL, VT, ByteIndex,
1078  DAG.getConstant(3, VT));
1079  SDValue ShiftedValue = DAG.getNode(ISD::SHL, DL, VT, TruncValue, Shift);
1080  SDValue Mask = DAG.getNode(ISD::SHL, DL, VT, MaskConstant, Shift);
1081  // XXX: If we add a 64-bit ZW register class, then we could use a 2 x i32
1082  // vector instead.
1083  SDValue Src[4] = {
1084  ShiftedValue,
1085  DAG.getConstant(0, MVT::i32),
1086  DAG.getConstant(0, MVT::i32),
1087  Mask
1088  };
1089  SDValue Input = DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v4i32, Src, 4);
1090  SDValue Args[3] = { Chain, Input, DWordAddr };
1092  Op->getVTList(), Args, 3, MemVT,
1093  StoreNode->getMemOperand());
1094  } else if (Ptr->getOpcode() != AMDGPUISD::DWORDADDR &&
1095  Value.getValueType().bitsGE(MVT::i32)) {
1096  // Convert pointer from byte address to dword address.
1097  Ptr = DAG.getNode(AMDGPUISD::DWORDADDR, DL, Ptr.getValueType(),
1098  DAG.getNode(ISD::SRL, DL, Ptr.getValueType(),
1099  Ptr, DAG.getConstant(2, MVT::i32)));
1100 
1101  if (StoreNode->isTruncatingStore() || StoreNode->isIndexed()) {
1102  assert(!"Truncated and indexed stores not supported yet");
1103  } else {
1104  Chain = DAG.getStore(Chain, DL, Value, Ptr, StoreNode->getMemOperand());
1105  }
1106  return Chain;
1107  }
1108  }
1109 
1110  EVT ValueVT = Value.getValueType();
1111 
1112  if (StoreNode->getAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS) {
1113  return SDValue();
1114  }
1115 
1116  // Lowering for indirect addressing
1117 
1118  const MachineFunction &MF = DAG.getMachineFunction();
1119  const AMDGPUFrameLowering *TFL = static_cast<const AMDGPUFrameLowering*>(
1121  unsigned StackWidth = TFL->getStackWidth(MF);
1122 
1123  Ptr = stackPtrToRegIndex(Ptr, StackWidth, DAG);
1124 
1125  if (ValueVT.isVector()) {
1126  unsigned NumElemVT = ValueVT.getVectorNumElements();
1127  EVT ElemVT = ValueVT.getVectorElementType();
1128  SDValue Stores[4];
1129 
1130  assert(NumElemVT >= StackWidth && "Stack width cannot be greater than "
1131  "vector width in load");
1132 
1133  for (unsigned i = 0; i < NumElemVT; ++i) {
1134  unsigned Channel, PtrIncr;
1135  getStackAddress(StackWidth, i, Channel, PtrIncr);
1136  Ptr = DAG.getNode(ISD::ADD, DL, MVT::i32, Ptr,
1137  DAG.getConstant(PtrIncr, MVT::i32));
1138  SDValue Elem = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ElemVT,
1139  Value, DAG.getConstant(i, MVT::i32));
1140 
1141  Stores[i] = DAG.getNode(AMDGPUISD::REGISTER_STORE, DL, MVT::Other,
1142  Chain, Elem, Ptr,
1143  DAG.getTargetConstant(Channel, MVT::i32));
1144  }
1145  Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Stores, NumElemVT);
1146  } else {
1147  if (ValueVT == MVT::i8) {
1148  Value = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, Value);
1149  }
1150  Chain = DAG.getNode(AMDGPUISD::REGISTER_STORE, DL, MVT::Other, Chain, Value, Ptr,
1151  DAG.getTargetConstant(0, MVT::i32)); // Channel
1152  }
1153 
1154  return Chain;
1155 }
1156 
1157 // return (512 + (kc_bank << 12)
1158 static int
1160  switch (AddressSpace) {
1162  return 512;
1164  return 512 + 4096;
1166  return 512 + 4096 * 2;
1168  return 512 + 4096 * 3;
1170  return 512 + 4096 * 4;
1172  return 512 + 4096 * 5;
1174  return 512 + 4096 * 6;
1176  return 512 + 4096 * 7;
1178  return 512 + 4096 * 8;
1180  return 512 + 4096 * 9;
1182  return 512 + 4096 * 10;
1184  return 512 + 4096 * 11;
1186  return 512 + 4096 * 12;
1188  return 512 + 4096 * 13;
1190  return 512 + 4096 * 14;
1192  return 512 + 4096 * 15;
1193  default:
1194  return -1;
1195  }
1196 }
1197 
1198 SDValue R600TargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const
1199 {
1200  EVT VT = Op.getValueType();
1201  SDLoc DL(Op);
1202  LoadSDNode *LoadNode = cast<LoadSDNode>(Op);
1203  SDValue Chain = Op.getOperand(0);
1204  SDValue Ptr = Op.getOperand(1);
1205  SDValue LoweredLoad;
1206 
1207  if (LoadNode->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS && VT.isVector()) {
1208  SDValue MergedValues[2] = {
1209  SplitVectorLoad(Op, DAG),
1210  Chain
1211  };
1212  return DAG.getMergeValues(MergedValues, 2, DL);
1213  }
1214 
1215  int ConstantBlock = ConstantAddressBlock(LoadNode->getAddressSpace());
1216  if (ConstantBlock > -1 &&
1217  ((LoadNode->getExtensionType() == ISD::NON_EXTLOAD) ||
1218  (LoadNode->getExtensionType() == ISD::ZEXTLOAD))) {
1219  SDValue Result;
1220  if (isa<ConstantExpr>(LoadNode->getSrcValue()) ||
1221  isa<Constant>(LoadNode->getSrcValue()) ||
1222  isa<ConstantSDNode>(Ptr)) {
1223  SDValue Slots[4];
1224  for (unsigned i = 0; i < 4; i++) {
1225  // We want Const position encoded with the following formula :
1226  // (((512 + (kc_bank << 12) + const_index) << 2) + chan)
1227  // const_index is Ptr computed by llvm using an alignment of 16.
1228  // Thus we add (((512 + (kc_bank << 12)) + chan ) * 4 here and
1229  // then div by 4 at the ISel step
1230  SDValue NewPtr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
1231  DAG.getConstant(4 * i + ConstantBlock * 16, MVT::i32));
1232  Slots[i] = DAG.getNode(AMDGPUISD::CONST_ADDRESS, DL, MVT::i32, NewPtr);
1233  }
1234  EVT NewVT = MVT::v4i32;
1235  unsigned NumElements = 4;
1236  if (VT.isVector()) {
1237  NewVT = VT;
1238  NumElements = VT.getVectorNumElements();
1239  }
1240  Result = DAG.getNode(ISD::BUILD_VECTOR, DL, NewVT, Slots, NumElements);
1241  } else {
1242  // non constant ptr cant be folded, keeps it as a v4f32 load
1243  Result = DAG.getNode(AMDGPUISD::CONST_ADDRESS, DL, MVT::v4i32,
1244  DAG.getNode(ISD::SRL, DL, MVT::i32, Ptr, DAG.getConstant(4, MVT::i32)),
1245  DAG.getConstant(LoadNode->getAddressSpace() -
1247  );
1248  }
1249 
1250  if (!VT.isVector()) {
1251  Result = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32, Result,
1252  DAG.getConstant(0, MVT::i32));
1253  }
1254 
1255  SDValue MergedValues[2] = {
1256  Result,
1257  Chain
1258  };
1259  return DAG.getMergeValues(MergedValues, 2, DL);
1260  }
1261 
1262  // For most operations returning SDValue() will result in the node being
1263  // expanded by the DAG Legalizer. This is not the case for ISD::LOAD, so we
1264  // need to manually expand loads that may be legal in some address spaces and
1265  // illegal in others. SEXT loads from CONSTANT_BUFFER_0 are supported for
1266  // compute shaders, since the data is sign extended when it is uploaded to the
1267  // buffer. However SEXT loads from other address spaces are not supported, so
1268  // we need to expand them here.
1269  if (LoadNode->getExtensionType() == ISD::SEXTLOAD) {
1270  EVT MemVT = LoadNode->getMemoryVT();
1271  assert(!MemVT.isVector() && (MemVT == MVT::i16 || MemVT == MVT::i8));
1272  SDValue ShiftAmount =
1273  DAG.getConstant(VT.getSizeInBits() - MemVT.getSizeInBits(), MVT::i32);
1274  SDValue NewLoad = DAG.getExtLoad(ISD::EXTLOAD, DL, VT, Chain, Ptr,
1275  LoadNode->getPointerInfo(), MemVT,
1276  LoadNode->isVolatile(),
1277  LoadNode->isNonTemporal(),
1278  LoadNode->getAlignment());
1279  SDValue Shl = DAG.getNode(ISD::SHL, DL, VT, NewLoad, ShiftAmount);
1280  SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, Shl, ShiftAmount);
1281 
1282  SDValue MergedValues[2] = { Sra, Chain };
1283  return DAG.getMergeValues(MergedValues, 2, DL);
1284  }
1285 
1286  if (LoadNode->getAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS) {
1287  return SDValue();
1288  }
1289 
1290  // Lowering for indirect addressing
1291  const MachineFunction &MF = DAG.getMachineFunction();
1292  const AMDGPUFrameLowering *TFL = static_cast<const AMDGPUFrameLowering*>(
1294  unsigned StackWidth = TFL->getStackWidth(MF);
1295 
1296  Ptr = stackPtrToRegIndex(Ptr, StackWidth, DAG);
1297 
1298  if (VT.isVector()) {
1299  unsigned NumElemVT = VT.getVectorNumElements();
1300  EVT ElemVT = VT.getVectorElementType();
1301  SDValue Loads[4];
1302 
1303  assert(NumElemVT >= StackWidth && "Stack width cannot be greater than "
1304  "vector width in load");
1305 
1306  for (unsigned i = 0; i < NumElemVT; ++i) {
1307  unsigned Channel, PtrIncr;
1308  getStackAddress(StackWidth, i, Channel, PtrIncr);
1309  Ptr = DAG.getNode(ISD::ADD, DL, MVT::i32, Ptr,
1310  DAG.getConstant(PtrIncr, MVT::i32));
1311  Loads[i] = DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, ElemVT,
1312  Chain, Ptr,
1313  DAG.getTargetConstant(Channel, MVT::i32),
1314  Op.getOperand(2));
1315  }
1316  for (unsigned i = NumElemVT; i < 4; ++i) {
1317  Loads[i] = DAG.getUNDEF(ElemVT);
1318  }
1319  EVT TargetVT = EVT::getVectorVT(*DAG.getContext(), ElemVT, 4);
1320  LoweredLoad = DAG.getNode(ISD::BUILD_VECTOR, DL, TargetVT, Loads, 4);
1321  } else {
1322  LoweredLoad = DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, VT,
1323  Chain, Ptr,
1324  DAG.getTargetConstant(0, MVT::i32), // Channel
1325  Op.getOperand(2));
1326  }
1327 
1328  SDValue Ops[2];
1329  Ops[0] = LoweredLoad;
1330  Ops[1] = Chain;
1331 
1332  return DAG.getMergeValues(Ops, 2, DL);
1333 }
1334 
1335 /// XXX Only kernel functions are supported, so we can assume for now that
1336 /// every function is a kernel function, but in the future we should use
1337 /// separate calling conventions for kernel and non-kernel functions.
1339  SDValue Chain,
1340  CallingConv::ID CallConv,
1341  bool isVarArg,
1343  SDLoc DL, SelectionDAG &DAG,
1344  SmallVectorImpl<SDValue> &InVals) const {
1346  CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
1347  getTargetMachine(), ArgLocs, *DAG.getContext());
1348  MachineFunction &MF = DAG.getMachineFunction();
1349  unsigned ShaderType = MF.getInfo<R600MachineFunctionInfo>()->ShaderType;
1350 
1352 
1354  LocalIns);
1355 
1356  AnalyzeFormalArguments(CCInfo, LocalIns);
1357 
1358  for (unsigned i = 0, e = Ins.size(); i < e; ++i) {
1359  CCValAssign &VA = ArgLocs[i];
1360  EVT VT = Ins[i].VT;
1361  EVT MemVT = LocalIns[i].VT;
1362 
1363  if (ShaderType != ShaderType::COMPUTE) {
1364  unsigned Reg = MF.addLiveIn(VA.getLocReg(), &AMDGPU::R600_Reg128RegClass);
1365  SDValue Register = DAG.getCopyFromReg(Chain, DL, Reg, VT);
1366  InVals.push_back(Register);
1367  continue;
1368  }
1369 
1372 
1373  // The first 36 bytes of the input buffer contains information about
1374  // thread group and global sizes.
1375  SDValue Arg = DAG.getExtLoad(ISD::SEXTLOAD, DL, VT, Chain,
1376  DAG.getConstant(36 + VA.getLocMemOffset(), MVT::i32),
1378  MemVT, false, false, 4);
1379  // 4 is the prefered alignment for
1380  // the CONSTANT memory space.
1381  InVals.push_back(Arg);
1382  }
1383  return Chain;
1384 }
1385 
1387  if (!VT.isVector()) return MVT::i32;
1389 }
1390 
1391 static SDValue
1393  DenseMap<unsigned, unsigned> &RemapSwizzle) {
1394  assert(VectorEntry.getOpcode() == ISD::BUILD_VECTOR);
1395  assert(RemapSwizzle.empty());
1396  SDValue NewBldVec[4] = {
1397  VectorEntry.getOperand(0),
1398  VectorEntry.getOperand(1),
1399  VectorEntry.getOperand(2),
1400  VectorEntry.getOperand(3)
1401  };
1402 
1403  for (unsigned i = 0; i < 4; i++) {
1404  if (NewBldVec[i].getOpcode() == ISD::UNDEF)
1405  // We mask write here to teach later passes that the ith element of this
1406  // vector is undef. Thus we can use it to reduce 128 bits reg usage,
1407  // break false dependencies and additionnaly make assembly easier to read.
1408  RemapSwizzle[i] = 7; // SEL_MASK_WRITE
1409  if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(NewBldVec[i])) {
1410  if (C->isZero()) {
1411  RemapSwizzle[i] = 4; // SEL_0
1412  NewBldVec[i] = DAG.getUNDEF(MVT::f32);
1413  } else if (C->isExactlyValue(1.0)) {
1414  RemapSwizzle[i] = 5; // SEL_1
1415  NewBldVec[i] = DAG.getUNDEF(MVT::f32);
1416  }
1417  }
1418 
1419  if (NewBldVec[i].getOpcode() == ISD::UNDEF)
1420  continue;
1421  for (unsigned j = 0; j < i; j++) {
1422  if (NewBldVec[i] == NewBldVec[j]) {
1423  NewBldVec[i] = DAG.getUNDEF(NewBldVec[i].getValueType());
1424  RemapSwizzle[i] = j;
1425  break;
1426  }
1427  }
1428  }
1429 
1430  return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(VectorEntry),
1431  VectorEntry.getValueType(), NewBldVec, 4);
1432 }
1433 
1435  DenseMap<unsigned, unsigned> &RemapSwizzle) {
1436  assert(VectorEntry.getOpcode() == ISD::BUILD_VECTOR);
1437  assert(RemapSwizzle.empty());
1438  SDValue NewBldVec[4] = {
1439  VectorEntry.getOperand(0),
1440  VectorEntry.getOperand(1),
1441  VectorEntry.getOperand(2),
1442  VectorEntry.getOperand(3)
1443  };
1444  bool isUnmovable[4] = { false, false, false, false };
1445  for (unsigned i = 0; i < 4; i++)
1446  RemapSwizzle[i] = i;
1447 
1448  for (unsigned i = 0; i < 4; i++) {
1449  if (NewBldVec[i].getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
1450  unsigned Idx = dyn_cast<ConstantSDNode>(NewBldVec[i].getOperand(1))
1451  ->getZExtValue();
1452  if (i == Idx) {
1453  isUnmovable[Idx] = true;
1454  continue;
1455  }
1456  if (isUnmovable[Idx])
1457  continue;
1458  // Swap i and Idx
1459  std::swap(NewBldVec[Idx], NewBldVec[i]);
1460  std::swap(RemapSwizzle[i], RemapSwizzle[Idx]);
1461  break;
1462  }
1463  }
1464 
1465  return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(VectorEntry),
1466  VectorEntry.getValueType(), NewBldVec, 4);
1467 }
1468 
1469 
1470 SDValue R600TargetLowering::OptimizeSwizzle(SDValue BuildVector,
1471 SDValue Swz[4], SelectionDAG &DAG) const {
1472  assert(BuildVector.getOpcode() == ISD::BUILD_VECTOR);
1473  // Old -> New swizzle values
1474  DenseMap<unsigned, unsigned> SwizzleRemap;
1475 
1476  BuildVector = CompactSwizzlableVector(DAG, BuildVector, SwizzleRemap);
1477  for (unsigned i = 0; i < 4; i++) {
1478  unsigned Idx = dyn_cast<ConstantSDNode>(Swz[i])->getZExtValue();
1479  if (SwizzleRemap.find(Idx) != SwizzleRemap.end())
1480  Swz[i] = DAG.getConstant(SwizzleRemap[Idx], MVT::i32);
1481  }
1482 
1483  SwizzleRemap.clear();
1484  BuildVector = ReorganizeVector(DAG, BuildVector, SwizzleRemap);
1485  for (unsigned i = 0; i < 4; i++) {
1486  unsigned Idx = dyn_cast<ConstantSDNode>(Swz[i])->getZExtValue();
1487  if (SwizzleRemap.find(Idx) != SwizzleRemap.end())
1488  Swz[i] = DAG.getConstant(SwizzleRemap[Idx], MVT::i32);
1489  }
1490 
1491  return BuildVector;
1492 }
1493 
1494 
1495 //===----------------------------------------------------------------------===//
1496 // Custom DAG Optimizations
1497 //===----------------------------------------------------------------------===//
1498 
1500  DAGCombinerInfo &DCI) const {
1501  SelectionDAG &DAG = DCI.DAG;
1502 
1503  switch (N->getOpcode()) {
1504  // (f32 fp_round (f64 uint_to_fp a)) -> (f32 uint_to_fp a)
1505  case ISD::FP_ROUND: {
1506  SDValue Arg = N->getOperand(0);
1507  if (Arg.getOpcode() == ISD::UINT_TO_FP && Arg.getValueType() == MVT::f64) {
1508  return DAG.getNode(ISD::UINT_TO_FP, SDLoc(N), N->getValueType(0),
1509  Arg.getOperand(0));
1510  }
1511  break;
1512  }
1513 
1514  // (i32 fp_to_sint (fneg (select_cc f32, f32, 1.0, 0.0 cc))) ->
1515  // (i32 select_cc f32, f32, -1, 0 cc)
1516  //
1517  // Mesa's GLSL frontend generates the above pattern a lot and we can lower
1518  // this to one of the SET*_DX10 instructions.
1519  case ISD::FP_TO_SINT: {
1520  SDValue FNeg = N->getOperand(0);
1521  if (FNeg.getOpcode() != ISD::FNEG) {
1522  return SDValue();
1523  }
1524  SDValue SelectCC = FNeg.getOperand(0);
1525  if (SelectCC.getOpcode() != ISD::SELECT_CC ||
1526  SelectCC.getOperand(0).getValueType() != MVT::f32 || // LHS
1527  SelectCC.getOperand(2).getValueType() != MVT::f32 || // True
1528  !isHWTrueValue(SelectCC.getOperand(2)) ||
1529  !isHWFalseValue(SelectCC.getOperand(3))) {
1530  return SDValue();
1531  }
1532 
1533  return DAG.getNode(ISD::SELECT_CC, SDLoc(N), N->getValueType(0),
1534  SelectCC.getOperand(0), // LHS
1535  SelectCC.getOperand(1), // RHS
1536  DAG.getConstant(-1, MVT::i32), // True
1537  DAG.getConstant(0, MVT::i32), // Flase
1538  SelectCC.getOperand(4)); // CC
1539 
1540  break;
1541  }
1542 
1543  // insert_vector_elt (build_vector elt0, ... , eltN), NewEltIdx, idx
1544  // => build_vector elt0, ... , NewEltIdx, ... , eltN
1545  case ISD::INSERT_VECTOR_ELT: {
1546  SDValue InVec = N->getOperand(0);
1547  SDValue InVal = N->getOperand(1);
1548  SDValue EltNo = N->getOperand(2);
1549  SDLoc dl(N);
1550 
1551  // If the inserted element is an UNDEF, just use the input vector.
1552  if (InVal.getOpcode() == ISD::UNDEF)
1553  return InVec;
1554 
1555  EVT VT = InVec.getValueType();
1556 
1557  // If we can't generate a legal BUILD_VECTOR, exit
1559  return SDValue();
1560 
1561  // Check that we know which element is being inserted
1562  if (!isa<ConstantSDNode>(EltNo))
1563  return SDValue();
1564  unsigned Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
1565 
1566  // Check that the operand is a BUILD_VECTOR (or UNDEF, which can essentially
1567  // be converted to a BUILD_VECTOR). Fill in the Ops vector with the
1568  // vector elements.
1570  if (InVec.getOpcode() == ISD::BUILD_VECTOR) {
1571  Ops.append(InVec.getNode()->op_begin(),
1572  InVec.getNode()->op_end());
1573  } else if (InVec.getOpcode() == ISD::UNDEF) {
1574  unsigned NElts = VT.getVectorNumElements();
1575  Ops.append(NElts, DAG.getUNDEF(InVal.getValueType()));
1576  } else {
1577  return SDValue();
1578  }
1579 
1580  // Insert the element
1581  if (Elt < Ops.size()) {
1582  // All the operands of BUILD_VECTOR must have the same type;
1583  // we enforce that here.
1584  EVT OpVT = Ops[0].getValueType();
1585  if (InVal.getValueType() != OpVT)
1586  InVal = OpVT.bitsGT(InVal.getValueType()) ?
1587  DAG.getNode(ISD::ANY_EXTEND, dl, OpVT, InVal) :
1588  DAG.getNode(ISD::TRUNCATE, dl, OpVT, InVal);
1589  Ops[Elt] = InVal;
1590  }
1591 
1592  // Return the new vector
1593  return DAG.getNode(ISD::BUILD_VECTOR, dl,
1594  VT, &Ops[0], Ops.size());
1595  }
1596 
1597  // Extract_vec (Build_vector) generated by custom lowering
1598  // also needs to be customly combined
1599  case ISD::EXTRACT_VECTOR_ELT: {
1600  SDValue Arg = N->getOperand(0);
1601  if (Arg.getOpcode() == ISD::BUILD_VECTOR) {
1602  if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
1603  unsigned Element = Const->getZExtValue();
1604  return Arg->getOperand(Element);
1605  }
1606  }
1607  if (Arg.getOpcode() == ISD::BITCAST &&
1608  Arg.getOperand(0).getOpcode() == ISD::BUILD_VECTOR) {
1609  if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
1610  unsigned Element = Const->getZExtValue();
1611  return DAG.getNode(ISD::BITCAST, SDLoc(N), N->getVTList(),
1612  Arg->getOperand(0).getOperand(Element));
1613  }
1614  }
1615  }
1616 
1617  case ISD::SELECT_CC: {
1618  // fold selectcc (selectcc x, y, a, b, cc), b, a, b, seteq ->
1619  // selectcc x, y, a, b, inv(cc)
1620  //
1621  // fold selectcc (selectcc x, y, a, b, cc), b, a, b, setne ->
1622  // selectcc x, y, a, b, cc
1623  SDValue LHS = N->getOperand(0);
1624  if (LHS.getOpcode() != ISD::SELECT_CC) {
1625  return SDValue();
1626  }
1627 
1628  SDValue RHS = N->getOperand(1);
1629  SDValue True = N->getOperand(2);
1630  SDValue False = N->getOperand(3);
1631  ISD::CondCode NCC = cast<CondCodeSDNode>(N->getOperand(4))->get();
1632 
1633  if (LHS.getOperand(2).getNode() != True.getNode() ||
1634  LHS.getOperand(3).getNode() != False.getNode() ||
1635  RHS.getNode() != False.getNode()) {
1636  return SDValue();
1637  }
1638 
1639  switch (NCC) {
1640  default: return SDValue();
1641  case ISD::SETNE: return LHS;
1642  case ISD::SETEQ: {
1643  ISD::CondCode LHSCC = cast<CondCodeSDNode>(LHS.getOperand(4))->get();
1644  LHSCC = ISD::getSetCCInverse(LHSCC,
1645  LHS.getOperand(0).getValueType().isInteger());
1646  if (DCI.isBeforeLegalizeOps() ||
1648  return DAG.getSelectCC(SDLoc(N),
1649  LHS.getOperand(0),
1650  LHS.getOperand(1),
1651  LHS.getOperand(2),
1652  LHS.getOperand(3),
1653  LHSCC);
1654  break;
1655  }
1656  }
1657  return SDValue();
1658  }
1659 
1660  case AMDGPUISD::EXPORT: {
1661  SDValue Arg = N->getOperand(1);
1662  if (Arg.getOpcode() != ISD::BUILD_VECTOR)
1663  break;
1664 
1665  SDValue NewArgs[8] = {
1666  N->getOperand(0), // Chain
1667  SDValue(),
1668  N->getOperand(2), // ArrayBase
1669  N->getOperand(3), // Type
1670  N->getOperand(4), // SWZ_X
1671  N->getOperand(5), // SWZ_Y
1672  N->getOperand(6), // SWZ_Z
1673  N->getOperand(7) // SWZ_W
1674  };
1675  SDLoc DL(N);
1676  NewArgs[1] = OptimizeSwizzle(N->getOperand(1), &NewArgs[4], DAG);
1677  return DAG.getNode(AMDGPUISD::EXPORT, DL, N->getVTList(), NewArgs, 8);
1678  }
1679  case AMDGPUISD::TEXTURE_FETCH: {
1680  SDValue Arg = N->getOperand(1);
1681  if (Arg.getOpcode() != ISD::BUILD_VECTOR)
1682  break;
1683 
1684  SDValue NewArgs[19] = {
1685  N->getOperand(0),
1686  N->getOperand(1),
1687  N->getOperand(2),
1688  N->getOperand(3),
1689  N->getOperand(4),
1690  N->getOperand(5),
1691  N->getOperand(6),
1692  N->getOperand(7),
1693  N->getOperand(8),
1694  N->getOperand(9),
1695  N->getOperand(10),
1696  N->getOperand(11),
1697  N->getOperand(12),
1698  N->getOperand(13),
1699  N->getOperand(14),
1700  N->getOperand(15),
1701  N->getOperand(16),
1702  N->getOperand(17),
1703  N->getOperand(18),
1704  };
1705  NewArgs[1] = OptimizeSwizzle(N->getOperand(1), &NewArgs[2], DAG);
1706  return DAG.getNode(AMDGPUISD::TEXTURE_FETCH, SDLoc(N), N->getVTList(),
1707  NewArgs, 19);
1708  }
1709  }
1710  return SDValue();
1711 }
1712 
1713 static bool
1714 FoldOperand(SDNode *ParentNode, unsigned SrcIdx, SDValue &Src, SDValue &Neg,
1715  SDValue &Abs, SDValue &Sel, SDValue &Imm, SelectionDAG &DAG) {
1716  const R600InstrInfo *TII =
1717  static_cast<const R600InstrInfo *>(DAG.getTarget().getInstrInfo());
1718  if (!Src.isMachineOpcode())
1719  return false;
1720  switch (Src.getMachineOpcode()) {
1721  case AMDGPU::FNEG_R600:
1722  if (!Neg.getNode())
1723  return false;
1724  Src = Src.getOperand(0);
1725  Neg = DAG.getTargetConstant(1, MVT::i32);
1726  return true;
1727  case AMDGPU::FABS_R600:
1728  if (!Abs.getNode())
1729  return false;
1730  Src = Src.getOperand(0);
1731  Abs = DAG.getTargetConstant(1, MVT::i32);
1732  return true;
1733  case AMDGPU::CONST_COPY: {
1734  unsigned Opcode = ParentNode->getMachineOpcode();
1735  bool HasDst = TII->getOperandIdx(Opcode, AMDGPU::OpName::dst) > -1;
1736 
1737  if (!Sel.getNode())
1738  return false;
1739 
1740  SDValue CstOffset = Src.getOperand(0);
1741  if (ParentNode->getValueType(0).isVector())
1742  return false;
1743 
1744  // Gather constants values
1745  int SrcIndices[] = {
1746  TII->getOperandIdx(Opcode, AMDGPU::OpName::src0),
1747  TII->getOperandIdx(Opcode, AMDGPU::OpName::src1),
1748  TII->getOperandIdx(Opcode, AMDGPU::OpName::src2),
1749  TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_X),
1750  TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Y),
1751  TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Z),
1752  TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_W),
1753  TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_X),
1754  TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Y),
1755  TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Z),
1756  TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_W)
1757  };
1758  std::vector<unsigned> Consts;
1759  for (unsigned i = 0; i < sizeof(SrcIndices) / sizeof(int); i++) {
1760  int OtherSrcIdx = SrcIndices[i];
1761  int OtherSelIdx = TII->getSelIdx(Opcode, OtherSrcIdx);
1762  if (OtherSrcIdx < 0 || OtherSelIdx < 0)
1763  continue;
1764  if (HasDst) {
1765  OtherSrcIdx--;
1766  OtherSelIdx--;
1767  }
1768  if (RegisterSDNode *Reg =
1769  dyn_cast<RegisterSDNode>(ParentNode->getOperand(OtherSrcIdx))) {
1770  if (Reg->getReg() == AMDGPU::ALU_CONST) {
1772  ParentNode->getOperand(OtherSelIdx));
1773  Consts.push_back(Cst->getZExtValue());
1774  }
1775  }
1776  }
1777 
1778  ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(CstOffset);
1779  Consts.push_back(Cst->getZExtValue());
1780  if (!TII->fitsConstReadLimitations(Consts)) {
1781  return false;
1782  }
1783 
1784  Sel = CstOffset;
1785  Src = DAG.getRegister(AMDGPU::ALU_CONST, MVT::f32);
1786  return true;
1787  }
1788  case AMDGPU::MOV_IMM_I32:
1789  case AMDGPU::MOV_IMM_F32: {
1790  unsigned ImmReg = AMDGPU::ALU_LITERAL_X;
1791  uint64_t ImmValue = 0;
1792 
1793 
1794  if (Src.getMachineOpcode() == AMDGPU::MOV_IMM_F32) {
1796  float FloatValue = FPC->getValueAPF().convertToFloat();
1797  if (FloatValue == 0.0) {
1798  ImmReg = AMDGPU::ZERO;
1799  } else if (FloatValue == 0.5) {
1800  ImmReg = AMDGPU::HALF;
1801  } else if (FloatValue == 1.0) {
1802  ImmReg = AMDGPU::ONE;
1803  } else {
1804  ImmValue = FPC->getValueAPF().bitcastToAPInt().getZExtValue();
1805  }
1806  } else {
1808  uint64_t Value = C->getZExtValue();
1809  if (Value == 0) {
1810  ImmReg = AMDGPU::ZERO;
1811  } else if (Value == 1) {
1812  ImmReg = AMDGPU::ONE_INT;
1813  } else {
1814  ImmValue = Value;
1815  }
1816  }
1817 
1818  // Check that we aren't already using an immediate.
1819  // XXX: It's possible for an instruction to have more than one
1820  // immediate operand, but this is not supported yet.
1821  if (ImmReg == AMDGPU::ALU_LITERAL_X) {
1822  if (!Imm.getNode())
1823  return false;
1825  assert(C);
1826  if (C->getZExtValue())
1827  return false;
1828  Imm = DAG.getTargetConstant(ImmValue, MVT::i32);
1829  }
1830  Src = DAG.getRegister(ImmReg, MVT::i32);
1831  return true;
1832  }
1833  default:
1834  return false;
1835  }
1836 }
1837 
1838 
1839 /// \brief Fold the instructions after selecting them
1840 SDNode *R600TargetLowering::PostISelFolding(MachineSDNode *Node,
1841  SelectionDAG &DAG) const {
1842  const R600InstrInfo *TII =
1843  static_cast<const R600InstrInfo *>(DAG.getTarget().getInstrInfo());
1844  if (!Node->isMachineOpcode())
1845  return Node;
1846  unsigned Opcode = Node->getMachineOpcode();
1847  SDValue FakeOp;
1848 
1849  std::vector<SDValue> Ops;
1850  for(SDNode::op_iterator I = Node->op_begin(), E = Node->op_end();
1851  I != E; ++I)
1852  Ops.push_back(*I);
1853 
1854  if (Opcode == AMDGPU::DOT_4) {
1855  int OperandIdx[] = {
1856  TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_X),
1857  TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Y),
1858  TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Z),
1859  TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_W),
1860  TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_X),
1861  TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Y),
1862  TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Z),
1863  TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_W)
1864  };
1865  int NegIdx[] = {
1866  TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_X),
1867  TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_Y),
1868  TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_Z),
1869  TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_W),
1870  TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_X),
1871  TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_Y),
1872  TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_Z),
1873  TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_W)
1874  };
1875  int AbsIdx[] = {
1876  TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_X),
1877  TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_Y),
1878  TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_Z),
1879  TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_W),
1880  TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_X),
1881  TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_Y),
1882  TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_Z),
1883  TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_W)
1884  };
1885  for (unsigned i = 0; i < 8; i++) {
1886  if (OperandIdx[i] < 0)
1887  return Node;
1888  SDValue &Src = Ops[OperandIdx[i] - 1];
1889  SDValue &Neg = Ops[NegIdx[i] - 1];
1890  SDValue &Abs = Ops[AbsIdx[i] - 1];
1891  bool HasDst = TII->getOperandIdx(Opcode, AMDGPU::OpName::dst) > -1;
1892  int SelIdx = TII->getSelIdx(Opcode, OperandIdx[i]);
1893  if (HasDst)
1894  SelIdx--;
1895  SDValue &Sel = (SelIdx > -1) ? Ops[SelIdx] : FakeOp;
1896  if (FoldOperand(Node, i, Src, Neg, Abs, Sel, FakeOp, DAG))
1897  return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops);
1898  }
1899  } else if (Opcode == AMDGPU::REG_SEQUENCE) {
1900  for (unsigned i = 1, e = Node->getNumOperands(); i < e; i += 2) {
1901  SDValue &Src = Ops[i];
1902  if (FoldOperand(Node, i, Src, FakeOp, FakeOp, FakeOp, FakeOp, DAG))
1903  return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops);
1904  }
1905  } else if (Opcode == AMDGPU::CLAMP_R600) {
1906  SDValue Src = Node->getOperand(0);
1907  if (!Src.isMachineOpcode() ||
1908  !TII->hasInstrModifiers(Src.getMachineOpcode()))
1909  return Node;
1910  int ClampIdx = TII->getOperandIdx(Src.getMachineOpcode(),
1911  AMDGPU::OpName::clamp);
1912  if (ClampIdx < 0)
1913  return Node;
1914  std::vector<SDValue> Ops;
1915  unsigned NumOp = Src.getNumOperands();
1916  for(unsigned i = 0; i < NumOp; ++i)
1917  Ops.push_back(Src.getOperand(i));
1918  Ops[ClampIdx - 1] = DAG.getTargetConstant(1, MVT::i32);
1919  return DAG.getMachineNode(Src.getMachineOpcode(), SDLoc(Node),
1920  Node->getVTList(), Ops);
1921  } else {
1922  if (!TII->hasInstrModifiers(Opcode))
1923  return Node;
1924  int OperandIdx[] = {
1925  TII->getOperandIdx(Opcode, AMDGPU::OpName::src0),
1926  TII->getOperandIdx(Opcode, AMDGPU::OpName::src1),
1927  TII->getOperandIdx(Opcode, AMDGPU::OpName::src2)
1928  };
1929  int NegIdx[] = {
1930  TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg),
1931  TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg),
1932  TII->getOperandIdx(Opcode, AMDGPU::OpName::src2_neg)
1933  };
1934  int AbsIdx[] = {
1935  TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs),
1936  TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs),
1937  -1
1938  };
1939  for (unsigned i = 0; i < 3; i++) {
1940  if (OperandIdx[i] < 0)
1941  return Node;
1942  SDValue &Src = Ops[OperandIdx[i] - 1];
1943  SDValue &Neg = Ops[NegIdx[i] - 1];
1944  SDValue FakeAbs;
1945  SDValue &Abs = (AbsIdx[i] > -1) ? Ops[AbsIdx[i] - 1] : FakeAbs;
1946  bool HasDst = TII->getOperandIdx(Opcode, AMDGPU::OpName::dst) > -1;
1947  int SelIdx = TII->getSelIdx(Opcode, OperandIdx[i]);
1948  int ImmIdx = TII->getOperandIdx(Opcode, AMDGPU::OpName::literal);
1949  if (HasDst) {
1950  SelIdx--;
1951  ImmIdx--;
1952  }
1953  SDValue &Sel = (SelIdx > -1) ? Ops[SelIdx] : FakeOp;
1954  SDValue &Imm = Ops[ImmIdx];
1955  if (FoldOperand(Node, i, Src, Neg, Abs, Sel, Imm, DAG))
1956  return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops);
1957  }
1958  }
1959 
1960  return Node;
1961 }
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
static SDValue CompactSwizzlableVector(SelectionDAG &DAG, SDValue VectorEntry, DenseMap< unsigned, unsigned > &RemapSwizzle)
void push_back(const T &Elt)
Definition: SmallVector.h:236
const MachineFunction * getParent() const
SDValue getConstant(uint64_t Val, EVT VT, bool isTarget=false)
void setImmOperand(MachineInstr *MI, unsigned Op, int64_t Imm) const
Helper function for setting instruction flag values.
LLVMContext * getContext() const
Definition: SelectionDAG.h:285
SDValue getCopyToReg(SDValue Chain, SDLoc dl, unsigned Reg, SDValue N)
Definition: SelectionDAG.h:487
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1306
Interface definition for R600InstrInfo.
const ConstantFP * getFPImm() const
SmallVector< unsigned, 4 > LiveOuts
Address space for local memory.
Definition: AMDGPU.h:78
void AnalyzeFormalArguments(CCState &State, const SmallVectorImpl< ISD::InputArg > &Ins) const
static AbsOpt Abs
const TargetMachine & getTargetMachine() const
SDVTList getVTList() const
unsigned createVirtualRegister(const TargetRegisterClass *RegClass)
void addLiveIn(unsigned Reg, unsigned vreg=0)
SDValue LowerGlobalAddress(AMDGPUMachineFunction *MFI, SDValue Op, SelectionDAG &DAG) const
enable_if_c<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:266
const TargetMachine & getTarget() const
Definition: SelectionDAG.h:281
virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const
static bool isVirtualRegister(unsigned Reg)
static PointerType * get(Type *ElementType, unsigned AddressSpace)
Definition: Type.cpp:730
unsigned getOpcode() const
SDValue getSelectCC(SDLoc DL, SDValue LHS, SDValue RHS, SDValue True, SDValue False, ISD::CondCode Cond)
Definition: SelectionDAG.h:679
Type * getTypeForEVT(LLVMContext &Context) const
Definition: ValueTypes.cpp:180
unsigned getNumOperands() const
void setBooleanVectorContents(BooleanContent Ty)
unsigned getNumOperands() const
#define OPCODE_IS_NOT_ZERO_INT
const SDValue & getOperand(unsigned Num) const
const Function * getFunction() const
virtual EVT getSetCCResultType(LLVMContext &, EVT VT) const
void setTruncStoreAction(MVT ValVT, MVT MemVT, LegalizeAction Action)
EVT getValueType(Type *Ty, bool AllowUnknown=false) const
SDValue getLoad(EVT VT, SDLoc dl, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, bool isVolatile, bool isNonTemporal, bool isInvariant, unsigned Alignment, const MDNode *TBAAInfo=0, const MDNode *Ranges=0)
bool isVector() const
isVector - Return true if this is a vector value type.
Definition: ValueTypes.h:661
SDValue getStore(SDValue Chain, SDLoc dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, bool isVolatile, bool isNonTemporal, unsigned Alignment, const MDNode *TBAAInfo=0)
Address space for global memory (RAT0, VTX0).
Definition: AMDGPU.h:76
const HexagonInstrInfo * TII
R600TargetLowering(TargetMachine &TM)
#define MO_FLAG_ABS
Definition: R600Defines.h:19
#define llvm_unreachable(msg)
EVT getValueType(unsigned ResNo) const
MachineFunction & getMachineFunction() const
Definition: SelectionDAG.h:280
int getLDSNoRetOp(uint16_t Opcode)
virtual SDValue CreateLiveInRegister(SelectionDAG &DAG, const TargetRegisterClass *RC, unsigned Reg, EVT VT) const
Helper function that adds Reg to the LiveIn list of the DAG's MachineFunction.
void setCondCodeAction(ISD::CondCode CC, MVT VT, LegalizeAction Action)
MachineInstr * buildMovImm(MachineBasicBlock &BB, MachineBasicBlock::iterator I, unsigned DstReg, uint64_t Imm) const
void setOperationAction(unsigned Op, MVT VT, LegalizeAction Action)
unsigned getMachineOpcode() const
bool bitsGE(EVT VT) const
bitsGE - Return true if this has no less bits than VT.
Definition: ValueTypes.h:729
ID
LLVM Calling Convention Representation.
Definition: CallingConv.h:26
virtual SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl< ISD::InputArg > &Ins, SDLoc DL, SelectionDAG &DAG, SmallVectorImpl< SDValue > &InVals) const
const MachineInstrBuilder & addImm(int64_t Val) const
bool isInteger() const
isInteger - Return true if this is an integer, or a vector integer type.
Definition: ValueTypes.h:656
unsigned getNumOperands() const
Definition: MachineInstr.h:265
SDValue getConstantFP(double Val, EVT VT, bool isTarget=false)
bool hasInstrModifiers(unsigned Opcode) const
EVT getVectorElementType() const
Definition: ValueTypes.h:762
unsigned getLocReg() const
#define OPCODE_IS_NOT_ZERO
int getOpcode() const
Definition: MachineInstr.h:261
#define MO_FLAG_NEG
Definition: R600Defines.h:18
Simple integer binary arithmetic operators.
Definition: ISDOpcodes.h:176
SDValue getUNDEF(EVT VT)
getUNDEF - Return an UNDEF node. UNDEF does not have a useful SDLoc.
Definition: SelectionDAG.h:585
virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const
EVT getMemoryVT() const
getMemoryVT - Return the type of the in-memory value.
int64_t getImm() const
bool bitsLE(EVT VT) const
bitsLE - Return true if this has no more bits than VT.
Definition: ValueTypes.h:741
bool isHWTrueValue(SDValue Op) const
UNDEF - An undefined node.
Definition: ISDOpcodes.h:154
SDNode * getNode() const
get the SDNode which holds the desired result
bundle_iterator< MachineInstr, instr_iterator > iterator
static ConstantPointerNull * get(PointerType *T)
get() - Static factory methods - Return objects of the specified value
Definition: Constants.cpp:1314
const SDValue & getOperand(unsigned i) const
void addRegisterClass(MVT VT, const TargetRegisterClass *RC)
Simple binary floating point operators.
Definition: ISDOpcodes.h:222
void setTargetDAGCombine(ISD::NodeType NT)
bool isNonTemporal() const
static int ConstantAddressBlock(unsigned AddressSpace)
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:267
unsigned getSubRegFromChannel(unsigned Channel) const
bool isOperationLegal(unsigned Op, EVT VT) const
Return true if the specified operation is legal on this target.
void setBooleanContents(BooleanContent Ty)
const R600RegisterInfo & getRegisterInfo() const
ItTy next(ItTy it, Dist n)
Definition: STLExtras.h:154
SDValue getCopyFromReg(SDValue Chain, SDLoc dl, unsigned Reg, EVT VT)
Definition: SelectionDAG.h:510
unsigned getOpcode() const
int getOperandIdx(const MachineInstr &MI, unsigned Op) const
Get the index of Op in the MachineInstr.
CondCode getSetCCSwappedOperands(CondCode Operation)
bool isVolatile() const
MachineInstrBuilder BuildMI(MachineFunction &MF, DebugLoc DL, const MCInstrDesc &MCID)
void append(in_iter in_start, in_iter in_end)
Definition: SmallVector.h:445
virtual const TargetFrameLowering * getFrameLowering() const
static UndefValue * get(Type *T)
Definition: Constants.cpp:1334
const APFloat & getValueAPF() const
bool isHWFalseValue(SDValue Op) const
static EVT getVectorVT(LLVMContext &Context, EVT VT, unsigned NumElements)
Definition: ValueTypes.h:616
#define MO_FLAG_CLAMP
Definition: R600Defines.h:17
static SDValue ReorganizeVector(SelectionDAG &DAG, SDValue VectorEntry, DenseMap< unsigned, unsigned > &RemapSwizzle)
const MachinePointerInfo & getPointerInfo() const
static bool FoldOperand(SDNode *ParentNode, unsigned SrcIdx, SDValue &Src, SDValue &Neg, SDValue &Abs, SDValue &Sel, SDValue &Imm, SelectionDAG &DAG)
DebugLoc findDebugLoc(instr_iterator MBBI)
bool bitsGT(EVT VT) const
bitsGT - Return true if this has more bits than VT.
Definition: ValueTypes.h:723
void setLoadExtAction(unsigned ExtType, MVT VT, LegalizeAction Action)
virtual const TargetInstrInfo * getInstrInfo() const
virtual MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr *MI, MachineBasicBlock *MBB) const
SDValue getExtLoad(ISD::LoadExtType ExtType, SDLoc dl, EVT VT, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, EVT MemVT, bool isVolatile, bool isNonTemporal, unsigned Alignment, const MDNode *TBAAInfo=0)
CCValAssign - Represent assignment of one arg/retval to a location.
AddressSpace
Definition: NVPTXBaseInfo.h:22
Promote Memory to Register
Definition: Mem2Reg.cpp:54
MachineMemOperand * getMemOperand() const
SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG) const
bool isMachineOpcode() const
virtual unsigned getStackWidth(const MachineFunction &MF) const
CondCode getSetCCInverse(CondCode Operation, bool isInteger)
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:591
ISD::LoadExtType getExtensionType() const
SDValue getMemIntrinsicNode(unsigned Opcode, SDLoc dl, const EVT *VTs, unsigned NumVTs, const SDValue *Ops, unsigned NumOps, EVT MemVT, MachinePointerInfo PtrInfo, unsigned Align=0, bool Vol=false, bool ReadMem=true, bool WriteMem=true)
op_iterator op_begin() const
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition: ISDOpcodes.h:357
APInt bitcastToAPInt() const
Definition: APFloat.cpp:3050
SDValue getNode(unsigned Opcode, SDLoc DL, EVT VT)
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition: ISDOpcodes.h:360
Address space for private memory.
Definition: AMDGPU.h:75
int getSelIdx(unsigned Opcode, unsigned SrcIdx) const
unsigned getAddressSpace() const
getAddressSpace - Return the address space for the associated pointer
void ReplaceNodeResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG) const
void addFlag(MachineInstr *MI, unsigned Operand, unsigned Flag) const
Add one of the MO_FLAG* flags to the specified Operand.
MachineInstrBuilder buildDefaultInstruction(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, unsigned Opcode, unsigned DstReg, unsigned Src0Reg, unsigned Src1Reg=0) const
Bitwise operators - logical and, logical or, logical xor.
Definition: ISDOpcodes.h:295
MachineRegisterInfo & getRegInfo()
R600 DAG Lowering interface definition.
Information about the stack frame layout on the AMDGPU targets.
unsigned getSizeInBits() const
getSizeInBits - Return the size of the specified value type in bits.
Definition: ValueTypes.h:779
#define I(x, y, z)
Definition: MD5.cpp:54
#define N
bool isIndexed() const
isIndexed - Return true if this is a pre/post inc/dec load/store.
float convertToFloat() const
Definition: APFloat.cpp:3073
op_iterator op_end() const
const TargetMachine & getTarget() const
bool LLVM_ATTRIBUTE_UNUSED_RESULT empty() const
Definition: DenseMap.h:67
MachineSDNode * getMachineNode(unsigned Opcode, SDLoc dl, EVT VT)
const Value * getSrcValue() const
Returns the SrcValue and offset that describes the location of the access.
bool isInt< 16 >(int64_t x)
Definition: MathExtras.h:272
SDValue SplitVectorLoad(const SDValue &Op, SelectionDAG &DAG) const
Split a vector load into multiple scalar loads.
EVT getValueType() const
MachineInstr * getVRegDef(unsigned Reg) const
SDValue getCondCode(ISD::CondCode Cond)
const APFloat & getValueAPF() const
Definition: Constants.h:263
void getOriginalFunctionArgs(SelectionDAG &DAG, const Function *F, const SmallVectorImpl< ISD::InputArg > &Ins, SmallVectorImpl< ISD::InputArg > &OrigIns) const
unsigned getReg() const
getReg - Returns the register number.
virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const
void setSchedulingPreference(Sched::Preference Pref)
Specify the target scheduling preference.
virtual MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr *MI, MachineBasicBlock *BB) const
SDValue getTargetExtractSubreg(int SRIdx, SDLoc DL, EVT VT, SDValue Operand)
LLVM Value Representation.
Definition: Value.h:66
SDValue getRegister(unsigned Reg, EVT VT)
bool isTruncatingStore() const
const MachineInstrBuilder & addOperand(const MachineOperand &MO) const
void ReplaceAllUsesOfValueWith(SDValue From, SDValue To)
#define MO_FLAG_MASK
Definition: R600Defines.h:20
SDValue getMergeValues(const SDValue *Ops, unsigned NumOps, SDLoc dl)
getMergeValues - Create a MERGE_VALUES node from the given operands.
const MCRegisterInfo & MRI
SDValue getTargetConstant(uint64_t Val, EVT VT)
Definition: SelectionDAG.h:408
unsigned getLocMemOffset() const
bool fitsConstReadLimitations(const std::vector< MachineInstr * > &) const
SDValue getEntryNode() const
Definition: SelectionDAG.h:332
bool isLDSRetInstr(unsigned Opcode) const
TRUNCATE - Completely drop the high bits.
Definition: ISDOpcodes.h:363
unsigned getAlignment() const
bool isCondCodeLegal(ISD::CondCode CC, MVT VT) const
Return true if the specified condition code is legal on this target.
const MachineInstrBuilder & addReg(unsigned RegNo, unsigned flags=0, unsigned SubReg=0) const
EVT changeVectorElementTypeToInteger() const
Definition: ValueTypes.h:626
SDValue LowerMinMax(SDValue Op, SelectionDAG &DAG) const
Generate Min/Max node.
#define T1
MVT getSimpleVT() const
Definition: ValueTypes.h:749
#define MO_FLAG_PUSH
Definition: R600Defines.h:21
bool isMachineOpcode() const
unsigned getMachineOpcode() const
bool use_empty(unsigned RegNo) const
uint64_t getZExtValue() const
unsigned getVectorNumElements() const
Definition: ValueTypes.h:771