LLVM API Documentation

 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
R600InstrInfo.cpp
Go to the documentation of this file.
1 //===-- R600InstrInfo.cpp - R600 Instruction Information ------------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 /// \file
11 /// \brief R600 Implementation of TargetInstrInfo.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "R600InstrInfo.h"
16 #include "AMDGPU.h"
17 #include "AMDGPUSubtarget.h"
18 #include "AMDGPUTargetMachine.h"
19 #include "R600Defines.h"
21 #include "R600RegisterInfo.h"
25 
26 #define GET_INSTRINFO_CTOR_DTOR
27 #include "AMDGPUGenDFAPacketizer.inc"
28 
29 using namespace llvm;
30 
32  : AMDGPUInstrInfo(tm),
33  RI(tm),
34  ST(tm.getSubtarget<AMDGPUSubtarget>())
35  { }
36 
38  return RI;
39 }
40 
42  return get(MI.getOpcode()).TSFlags & R600_InstFlag::TRIG;
43 }
44 
46  return get(MI.getOpcode()).TSFlags & R600_InstFlag::VECTOR;
47 }
48 
49 void
52  unsigned DestReg, unsigned SrcReg,
53  bool KillSrc) const {
54  unsigned VectorComponents = 0;
55  if (AMDGPU::R600_Reg128RegClass.contains(DestReg) &&
56  AMDGPU::R600_Reg128RegClass.contains(SrcReg)) {
57  VectorComponents = 4;
58  } else if(AMDGPU::R600_Reg64RegClass.contains(DestReg) &&
59  AMDGPU::R600_Reg64RegClass.contains(SrcReg)) {
60  VectorComponents = 2;
61  }
62 
63  if (VectorComponents > 0) {
64  for (unsigned I = 0; I < VectorComponents; I++) {
65  unsigned SubRegIndex = RI.getSubRegFromChannel(I);
66  buildDefaultInstruction(MBB, MI, AMDGPU::MOV,
67  RI.getSubReg(DestReg, SubRegIndex),
68  RI.getSubReg(SrcReg, SubRegIndex))
69  .addReg(DestReg,
71  }
72  } else {
73  MachineInstr *NewMI = buildDefaultInstruction(MBB, MI, AMDGPU::MOV,
74  DestReg, SrcReg);
75  NewMI->getOperand(getOperandIdx(*NewMI, AMDGPU::OpName::src0))
76  .setIsKill(KillSrc);
77  }
78 }
79 
80 /// \returns true if \p MBBI can be moved into a new basic.
82  MachineBasicBlock::iterator MBBI) const {
83  for (MachineInstr::const_mop_iterator I = MBBI->operands_begin(),
84  E = MBBI->operands_end(); I != E; ++I) {
85  if (I->isReg() && !TargetRegisterInfo::isVirtualRegister(I->getReg()) &&
86  I->isUse() && RI.isPhysRegLiveAcrossClauses(I->getReg()))
87  return false;
88  }
89  return true;
90 }
91 
92 unsigned R600InstrInfo::getIEQOpcode() const {
93  return AMDGPU::SETE_INT;
94 }
95 
96 bool R600InstrInfo::isMov(unsigned Opcode) const {
97 
98 
99  switch(Opcode) {
100  default: return false;
101  case AMDGPU::MOV:
102  case AMDGPU::MOV_IMM_F32:
103  case AMDGPU::MOV_IMM_I32:
104  return true;
105  }
106 }
107 
108 // Some instructions act as place holders to emulate operations that the GPU
109 // hardware does automatically. This function can be used to check if
110 // an opcode falls into this category.
111 bool R600InstrInfo::isPlaceHolderOpcode(unsigned Opcode) const {
112  switch (Opcode) {
113  default: return false;
114  case AMDGPU::RETURN:
115  return true;
116  }
117 }
118 
119 bool R600InstrInfo::isReductionOp(unsigned Opcode) const {
120  return false;
121 }
122 
123 bool R600InstrInfo::isCubeOp(unsigned Opcode) const {
124  switch(Opcode) {
125  default: return false;
126  case AMDGPU::CUBE_r600_pseudo:
127  case AMDGPU::CUBE_r600_real:
128  case AMDGPU::CUBE_eg_pseudo:
129  case AMDGPU::CUBE_eg_real:
130  return true;
131  }
132 }
133 
134 bool R600InstrInfo::isALUInstr(unsigned Opcode) const {
135  unsigned TargetFlags = get(Opcode).TSFlags;
136 
137  return (TargetFlags & R600_InstFlag::ALU_INST);
138 }
139 
140 bool R600InstrInfo::hasInstrModifiers(unsigned Opcode) const {
141  unsigned TargetFlags = get(Opcode).TSFlags;
142 
143  return ((TargetFlags & R600_InstFlag::OP1) |
144  (TargetFlags & R600_InstFlag::OP2) |
145  (TargetFlags & R600_InstFlag::OP3));
146 }
147 
148 bool R600InstrInfo::isLDSInstr(unsigned Opcode) const {
149  unsigned TargetFlags = get(Opcode).TSFlags;
150 
151  return ((TargetFlags & R600_InstFlag::LDS_1A) |
152  (TargetFlags & R600_InstFlag::LDS_1A1D) |
153  (TargetFlags & R600_InstFlag::LDS_1A2D));
154 }
155 
156 bool R600InstrInfo::isLDSNoRetInstr(unsigned Opcode) const {
157  return isLDSInstr(Opcode) && getOperandIdx(Opcode, AMDGPU::OpName::dst) == -1;
158 }
159 
160 bool R600InstrInfo::isLDSRetInstr(unsigned Opcode) const {
161  return isLDSInstr(Opcode) && getOperandIdx(Opcode, AMDGPU::OpName::dst) != -1;
162 }
163 
165  if (isALUInstr(MI->getOpcode()))
166  return true;
167  if (isVector(*MI) || isCubeOp(MI->getOpcode()))
168  return true;
169  switch (MI->getOpcode()) {
170  case AMDGPU::PRED_X:
171  case AMDGPU::INTERP_PAIR_XY:
172  case AMDGPU::INTERP_PAIR_ZW:
173  case AMDGPU::INTERP_VEC_LOAD:
174  case AMDGPU::COPY:
175  case AMDGPU::DOT_4:
176  return true;
177  default:
178  return false;
179  }
180 }
181 
182 bool R600InstrInfo::isTransOnly(unsigned Opcode) const {
183  if (ST.hasCaymanISA())
184  return false;
185  return (get(Opcode).getSchedClass() == AMDGPU::Sched::TransALU);
186 }
187 
189  return isTransOnly(MI->getOpcode());
190 }
191 
192 bool R600InstrInfo::isVectorOnly(unsigned Opcode) const {
193  return (get(Opcode).getSchedClass() == AMDGPU::Sched::VecALU);
194 }
195 
197  return isVectorOnly(MI->getOpcode());
198 }
199 
200 bool R600InstrInfo::isExport(unsigned Opcode) const {
201  return (get(Opcode).TSFlags & R600_InstFlag::IS_EXPORT);
202 }
203 
204 bool R600InstrInfo::usesVertexCache(unsigned Opcode) const {
205  return ST.hasVertexCache() && IS_VTX(get(Opcode));
206 }
207 
210  return MFI->ShaderType != ShaderType::COMPUTE && usesVertexCache(MI->getOpcode());
211 }
212 
213 bool R600InstrInfo::usesTextureCache(unsigned Opcode) const {
214  return (!ST.hasVertexCache() && IS_VTX(get(Opcode))) || IS_TEX(get(Opcode));
215 }
216 
219  return (MFI->ShaderType == ShaderType::COMPUTE && usesVertexCache(MI->getOpcode())) ||
221 }
222 
223 bool R600InstrInfo::mustBeLastInClause(unsigned Opcode) const {
224  switch (Opcode) {
225  case AMDGPU::KILLGT:
226  case AMDGPU::GROUP_BARRIER:
227  return true;
228  default:
229  return false;
230  }
231 }
232 
234  return MI->findRegisterUseOperandIdx(AMDGPU::AR_X) != -1;
235 }
236 
238  return MI->findRegisterDefOperandIdx(AMDGPU::AR_X) != -1;
239 }
240 
242  if (!isALUInstr(MI->getOpcode())) {
243  return false;
244  }
246  E = MI->operands_end(); I != E; ++I) {
247  if (!I->isReg() || !I->isUse() ||
249  continue;
250 
251  if (AMDGPU::R600_LDS_SRC_REGRegClass.contains(I->getReg()))
252  return true;
253  }
254  return false;
255 }
256 
257 int R600InstrInfo::getSrcIdx(unsigned Opcode, unsigned SrcNum) const {
258  static const unsigned OpTable[] = {
259  AMDGPU::OpName::src0,
260  AMDGPU::OpName::src1,
261  AMDGPU::OpName::src2
262  };
263 
264  assert (SrcNum < 3);
265  return getOperandIdx(Opcode, OpTable[SrcNum]);
266 }
267 
268 #define SRC_SEL_ROWS 11
269 int R600InstrInfo::getSelIdx(unsigned Opcode, unsigned SrcIdx) const {
270  static const unsigned SrcSelTable[SRC_SEL_ROWS][2] = {
271  {AMDGPU::OpName::src0, AMDGPU::OpName::src0_sel},
272  {AMDGPU::OpName::src1, AMDGPU::OpName::src1_sel},
273  {AMDGPU::OpName::src2, AMDGPU::OpName::src2_sel},
274  {AMDGPU::OpName::src0_X, AMDGPU::OpName::src0_sel_X},
275  {AMDGPU::OpName::src0_Y, AMDGPU::OpName::src0_sel_Y},
276  {AMDGPU::OpName::src0_Z, AMDGPU::OpName::src0_sel_Z},
277  {AMDGPU::OpName::src0_W, AMDGPU::OpName::src0_sel_W},
278  {AMDGPU::OpName::src1_X, AMDGPU::OpName::src1_sel_X},
279  {AMDGPU::OpName::src1_Y, AMDGPU::OpName::src1_sel_Y},
280  {AMDGPU::OpName::src1_Z, AMDGPU::OpName::src1_sel_Z},
281  {AMDGPU::OpName::src1_W, AMDGPU::OpName::src1_sel_W}
282  };
283 
284  for (unsigned i = 0; i < SRC_SEL_ROWS; ++i) {
285  if (getOperandIdx(Opcode, SrcSelTable[i][0]) == (int)SrcIdx) {
286  return getOperandIdx(Opcode, SrcSelTable[i][1]);
287  }
288  }
289  return -1;
290 }
291 #undef SRC_SEL_ROWS
292 
296 
297  if (MI->getOpcode() == AMDGPU::DOT_4) {
298  static const unsigned OpTable[8][2] = {
299  {AMDGPU::OpName::src0_X, AMDGPU::OpName::src0_sel_X},
300  {AMDGPU::OpName::src0_Y, AMDGPU::OpName::src0_sel_Y},
301  {AMDGPU::OpName::src0_Z, AMDGPU::OpName::src0_sel_Z},
302  {AMDGPU::OpName::src0_W, AMDGPU::OpName::src0_sel_W},
303  {AMDGPU::OpName::src1_X, AMDGPU::OpName::src1_sel_X},
304  {AMDGPU::OpName::src1_Y, AMDGPU::OpName::src1_sel_Y},
305  {AMDGPU::OpName::src1_Z, AMDGPU::OpName::src1_sel_Z},
306  {AMDGPU::OpName::src1_W, AMDGPU::OpName::src1_sel_W},
307  };
308 
309  for (unsigned j = 0; j < 8; j++) {
311  OpTable[j][0]));
312  unsigned Reg = MO.getReg();
313  if (Reg == AMDGPU::ALU_CONST) {
314  unsigned Sel = MI->getOperand(getOperandIdx(MI->getOpcode(),
315  OpTable[j][1])).getImm();
316  Result.push_back(std::pair<MachineOperand *, int64_t>(&MO, Sel));
317  continue;
318  }
319 
320  }
321  return Result;
322  }
323 
324  static const unsigned OpTable[3][2] = {
325  {AMDGPU::OpName::src0, AMDGPU::OpName::src0_sel},
326  {AMDGPU::OpName::src1, AMDGPU::OpName::src1_sel},
327  {AMDGPU::OpName::src2, AMDGPU::OpName::src2_sel},
328  };
329 
330  for (unsigned j = 0; j < 3; j++) {
331  int SrcIdx = getOperandIdx(MI->getOpcode(), OpTable[j][0]);
332  if (SrcIdx < 0)
333  break;
334  MachineOperand &MO = MI->getOperand(SrcIdx);
335  unsigned Reg = MI->getOperand(SrcIdx).getReg();
336  if (Reg == AMDGPU::ALU_CONST) {
337  unsigned Sel = MI->getOperand(
338  getOperandIdx(MI->getOpcode(), OpTable[j][1])).getImm();
339  Result.push_back(std::pair<MachineOperand *, int64_t>(&MO, Sel));
340  continue;
341  }
342  if (Reg == AMDGPU::ALU_LITERAL_X) {
343  unsigned Imm = MI->getOperand(
344  getOperandIdx(MI->getOpcode(), AMDGPU::OpName::literal)).getImm();
345  Result.push_back(std::pair<MachineOperand *, int64_t>(&MO, Imm));
346  continue;
347  }
348  Result.push_back(std::pair<MachineOperand *, int64_t>(&MO, 0));
349  }
350  return Result;
351 }
352 
353 std::vector<std::pair<int, unsigned> >
354 R600InstrInfo::ExtractSrcs(MachineInstr *MI,
356  unsigned &ConstCount) const {
357  ConstCount = 0;
359  const std::pair<int, unsigned> DummyPair(-1, 0);
360  std::vector<std::pair<int, unsigned> > Result;
361  unsigned i = 0;
362  for (unsigned n = Srcs.size(); i < n; ++i) {
363  unsigned Reg = Srcs[i].first->getReg();
364  unsigned Index = RI.getEncodingValue(Reg) & 0xff;
365  if (Reg == AMDGPU::OQAP) {
366  Result.push_back(std::pair<int, unsigned>(Index, 0));
367  }
368  if (PV.find(Reg) != PV.end()) {
369  // 255 is used to tells its a PS/PV reg
370  Result.push_back(std::pair<int, unsigned>(255, 0));
371  continue;
372  }
373  if (Index > 127) {
374  ConstCount++;
375  Result.push_back(DummyPair);
376  continue;
377  }
378  unsigned Chan = RI.getHWRegChan(Reg);
379  Result.push_back(std::pair<int, unsigned>(Index, Chan));
380  }
381  for (; i < 3; ++i)
382  Result.push_back(DummyPair);
383  return Result;
384 }
385 
386 static std::vector<std::pair<int, unsigned> >
387 Swizzle(std::vector<std::pair<int, unsigned> > Src,
389  if (Src[0] == Src[1])
390  Src[1].first = -1;
391  switch (Swz) {
393  break;
395  std::swap(Src[1], Src[2]);
396  break;
398  std::swap(Src[0], Src[1]);
399  break;
401  std::swap(Src[0], Src[1]);
402  std::swap(Src[0], Src[2]);
403  break;
405  std::swap(Src[0], Src[2]);
406  std::swap(Src[0], Src[1]);
407  break;
409  std::swap(Src[0], Src[2]);
410  break;
411  }
412  return Src;
413 }
414 
415 static unsigned
417  switch (Swz) {
419  unsigned Cycles[3] = { 2, 1, 0};
420  return Cycles[Op];
421  }
423  unsigned Cycles[3] = { 1, 2, 2};
424  return Cycles[Op];
425  }
427  unsigned Cycles[3] = { 2, 1, 2};
428  return Cycles[Op];
429  }
431  unsigned Cycles[3] = { 2, 2, 1};
432  return Cycles[Op];
433  }
434  default:
435  llvm_unreachable("Wrong Swizzle for Trans Slot");
436  return 0;
437  }
438 }
439 
440 /// returns how many MIs (whose inputs are represented by IGSrcs) can be packed
441 /// in the same Instruction Group while meeting read port limitations given a
442 /// Swz swizzle sequence.
444  const std::vector<std::vector<std::pair<int, unsigned> > > &IGSrcs,
445  const std::vector<R600InstrInfo::BankSwizzle> &Swz,
446  const std::vector<std::pair<int, unsigned> > &TransSrcs,
447  R600InstrInfo::BankSwizzle TransSwz) const {
448  int Vector[4][3];
449  memset(Vector, -1, sizeof(Vector));
450  for (unsigned i = 0, e = IGSrcs.size(); i < e; i++) {
451  const std::vector<std::pair<int, unsigned> > &Srcs =
452  Swizzle(IGSrcs[i], Swz[i]);
453  for (unsigned j = 0; j < 3; j++) {
454  const std::pair<int, unsigned> &Src = Srcs[j];
455  if (Src.first < 0 || Src.first == 255)
456  continue;
457  if (Src.first == GET_REG_INDEX(RI.getEncodingValue(AMDGPU::OQAP))) {
458  if (Swz[i] != R600InstrInfo::ALU_VEC_012_SCL_210 &&
460  // The value from output queue A (denoted by register OQAP) can
461  // only be fetched during the first cycle.
462  return false;
463  }
464  // OQAP does not count towards the normal read port restrictions
465  continue;
466  }
467  if (Vector[Src.second][j] < 0)
468  Vector[Src.second][j] = Src.first;
469  if (Vector[Src.second][j] != Src.first)
470  return i;
471  }
472  }
473  // Now check Trans Alu
474  for (unsigned i = 0, e = TransSrcs.size(); i < e; ++i) {
475  const std::pair<int, unsigned> &Src = TransSrcs[i];
476  unsigned Cycle = getTransSwizzle(TransSwz, i);
477  if (Src.first < 0)
478  continue;
479  if (Src.first == 255)
480  continue;
481  if (Vector[Src.second][Cycle] < 0)
482  Vector[Src.second][Cycle] = Src.first;
483  if (Vector[Src.second][Cycle] != Src.first)
484  return IGSrcs.size() - 1;
485  }
486  return IGSrcs.size();
487 }
488 
489 /// Given a swizzle sequence SwzCandidate and an index Idx, returns the next
490 /// (in lexicographic term) swizzle sequence assuming that all swizzles after
491 /// Idx can be skipped
492 static bool
494  std::vector<R600InstrInfo::BankSwizzle> &SwzCandidate,
495  unsigned Idx) {
496  assert(Idx < SwzCandidate.size());
497  int ResetIdx = Idx;
498  while (ResetIdx > -1 && SwzCandidate[ResetIdx] == R600InstrInfo::ALU_VEC_210)
499  ResetIdx --;
500  for (unsigned i = ResetIdx + 1, e = SwzCandidate.size(); i < e; i++) {
501  SwzCandidate[i] = R600InstrInfo::ALU_VEC_012_SCL_210;
502  }
503  if (ResetIdx == -1)
504  return false;
505  int NextSwizzle = SwzCandidate[ResetIdx] + 1;
506  SwzCandidate[ResetIdx] = (R600InstrInfo::BankSwizzle)NextSwizzle;
507  return true;
508 }
509 
510 /// Enumerate all possible Swizzle sequence to find one that can meet all
511 /// read port requirements.
513  const std::vector<std::vector<std::pair<int, unsigned> > > &IGSrcs,
514  std::vector<R600InstrInfo::BankSwizzle> &SwzCandidate,
515  const std::vector<std::pair<int, unsigned> > &TransSrcs,
516  R600InstrInfo::BankSwizzle TransSwz) const {
517  unsigned ValidUpTo = 0;
518  do {
519  ValidUpTo = isLegalUpTo(IGSrcs, SwzCandidate, TransSrcs, TransSwz);
520  if (ValidUpTo == IGSrcs.size())
521  return true;
522  } while (NextPossibleSolution(SwzCandidate, ValidUpTo));
523  return false;
524 }
525 
526 /// Instructions in Trans slot can't read gpr at cycle 0 if they also read
527 /// a const, and can't read a gpr at cycle 1 if they read 2 const.
528 static bool
530  const std::vector<std::pair<int, unsigned> > &TransOps,
531  unsigned ConstCount) {
532  // TransALU can't read 3 constants
533  if (ConstCount > 2)
534  return false;
535  for (unsigned i = 0, e = TransOps.size(); i < e; ++i) {
536  const std::pair<int, unsigned> &Src = TransOps[i];
537  unsigned Cycle = getTransSwizzle(TransSwz, i);
538  if (Src.first < 0)
539  continue;
540  if (ConstCount > 0 && Cycle == 0)
541  return false;
542  if (ConstCount > 1 && Cycle == 1)
543  return false;
544  }
545  return true;
546 }
547 
548 bool
549 R600InstrInfo::fitsReadPortLimitations(const std::vector<MachineInstr *> &IG,
551  std::vector<BankSwizzle> &ValidSwizzle,
552  bool isLastAluTrans)
553  const {
554  //Todo : support shared src0 - src1 operand
555 
556  std::vector<std::vector<std::pair<int, unsigned> > > IGSrcs;
557  ValidSwizzle.clear();
558  unsigned ConstCount;
560  for (unsigned i = 0, e = IG.size(); i < e; ++i) {
561  IGSrcs.push_back(ExtractSrcs(IG[i], PV, ConstCount));
562  unsigned Op = getOperandIdx(IG[i]->getOpcode(),
563  AMDGPU::OpName::bank_swizzle);
564  ValidSwizzle.push_back( (R600InstrInfo::BankSwizzle)
565  IG[i]->getOperand(Op).getImm());
566  }
567  std::vector<std::pair<int, unsigned> > TransOps;
568  if (!isLastAluTrans)
569  return FindSwizzleForVectorSlot(IGSrcs, ValidSwizzle, TransOps, TransBS);
570 
571  TransOps = IGSrcs.back();
572  IGSrcs.pop_back();
573  ValidSwizzle.pop_back();
574 
575  static const R600InstrInfo::BankSwizzle TransSwz[] = {
580  };
581  for (unsigned i = 0; i < 4; i++) {
582  TransBS = TransSwz[i];
583  if (!isConstCompatible(TransBS, TransOps, ConstCount))
584  continue;
585  bool Result = FindSwizzleForVectorSlot(IGSrcs, ValidSwizzle, TransOps,
586  TransBS);
587  if (Result) {
588  ValidSwizzle.push_back(TransBS);
589  return true;
590  }
591  }
592 
593  return false;
594 }
595 
596 
597 bool
598 R600InstrInfo::fitsConstReadLimitations(const std::vector<unsigned> &Consts)
599  const {
600  assert (Consts.size() <= 12 && "Too many operands in instructions group");
601  unsigned Pair1 = 0, Pair2 = 0;
602  for (unsigned i = 0, n = Consts.size(); i < n; ++i) {
603  unsigned ReadConstHalf = Consts[i] & 2;
604  unsigned ReadConstIndex = Consts[i] & (~3);
605  unsigned ReadHalfConst = ReadConstIndex | ReadConstHalf;
606  if (!Pair1) {
607  Pair1 = ReadHalfConst;
608  continue;
609  }
610  if (Pair1 == ReadHalfConst)
611  continue;
612  if (!Pair2) {
613  Pair2 = ReadHalfConst;
614  continue;
615  }
616  if (Pair2 != ReadHalfConst)
617  return false;
618  }
619  return true;
620 }
621 
622 bool
623 R600InstrInfo::fitsConstReadLimitations(const std::vector<MachineInstr *> &MIs)
624  const {
625  std::vector<unsigned> Consts;
626  SmallSet<int64_t, 4> Literals;
627  for (unsigned i = 0, n = MIs.size(); i < n; i++) {
628  MachineInstr *MI = MIs[i];
629  if (!isALUInstr(MI->getOpcode()))
630  continue;
631 
633  getSrcs(MI);
634 
635  for (unsigned j = 0, e = Srcs.size(); j < e; j++) {
636  std::pair<MachineOperand *, unsigned> Src = Srcs[j];
637  if (Src.first->getReg() == AMDGPU::ALU_LITERAL_X)
638  Literals.insert(Src.second);
639  if (Literals.size() > 4)
640  return false;
641  if (Src.first->getReg() == AMDGPU::ALU_CONST)
642  Consts.push_back(Src.second);
643  if (AMDGPU::R600_KC0RegClass.contains(Src.first->getReg()) ||
644  AMDGPU::R600_KC1RegClass.contains(Src.first->getReg())) {
645  unsigned Index = RI.getEncodingValue(Src.first->getReg()) & 0xff;
646  unsigned Chan = RI.getHWRegChan(Src.first->getReg());
647  Consts.push_back((Index << 2) | Chan);
648  }
649  }
650  }
651  return fitsConstReadLimitations(Consts);
652 }
653 
655  const ScheduleDAG *DAG) const {
656  const InstrItineraryData *II = TM->getInstrItineraryData();
657  return TM->getSubtarget<AMDGPUSubtarget>().createDFAPacketizer(II);
658 }
659 
660 static bool
661 isPredicateSetter(unsigned Opcode) {
662  switch (Opcode) {
663  case AMDGPU::PRED_X:
664  return true;
665  default:
666  return false;
667  }
668 }
669 
670 static MachineInstr *
673  while (I != MBB.begin()) {
674  --I;
675  MachineInstr *MI = I;
676  if (isPredicateSetter(MI->getOpcode()))
677  return MI;
678  }
679 
680  return NULL;
681 }
682 
683 static
684 bool isJump(unsigned Opcode) {
685  return Opcode == AMDGPU::JUMP || Opcode == AMDGPU::JUMP_COND;
686 }
687 
688 static bool isBranch(unsigned Opcode) {
689  return Opcode == AMDGPU::BRANCH || Opcode == AMDGPU::BRANCH_COND_i32 ||
690  Opcode == AMDGPU::BRANCH_COND_f32;
691 }
692 
693 bool
695  MachineBasicBlock *&TBB,
696  MachineBasicBlock *&FBB,
698  bool AllowModify) const {
699  // Most of the following comes from the ARM implementation of AnalyzeBranch
700 
701  // If the block has no terminators, it just falls into the block after it.
703  if (I == MBB.begin())
704  return false;
705  --I;
706  while (I->isDebugValue()) {
707  if (I == MBB.begin())
708  return false;
709  --I;
710  }
711  // AMDGPU::BRANCH* instructions are only available after isel and are not
712  // handled
713  if (isBranch(I->getOpcode()))
714  return true;
715  if (!isJump(static_cast<MachineInstr *>(I)->getOpcode())) {
716  return false;
717  }
718 
719  // Get the last instruction in the block.
720  MachineInstr *LastInst = I;
721 
722  // If there is only one terminator instruction, process it.
723  unsigned LastOpc = LastInst->getOpcode();
724  if (I == MBB.begin() ||
725  !isJump(static_cast<MachineInstr *>(--I)->getOpcode())) {
726  if (LastOpc == AMDGPU::JUMP) {
727  TBB = LastInst->getOperand(0).getMBB();
728  return false;
729  } else if (LastOpc == AMDGPU::JUMP_COND) {
730  MachineInstr *predSet = I;
731  while (!isPredicateSetter(predSet->getOpcode())) {
732  predSet = --I;
733  }
734  TBB = LastInst->getOperand(0).getMBB();
735  Cond.push_back(predSet->getOperand(1));
736  Cond.push_back(predSet->getOperand(2));
737  Cond.push_back(MachineOperand::CreateReg(AMDGPU::PRED_SEL_ONE, false));
738  return false;
739  }
740  return true; // Can't handle indirect branch.
741  }
742 
743  // Get the instruction before it if it is a terminator.
744  MachineInstr *SecondLastInst = I;
745  unsigned SecondLastOpc = SecondLastInst->getOpcode();
746 
747  // If the block ends with a B and a Bcc, handle it.
748  if (SecondLastOpc == AMDGPU::JUMP_COND && LastOpc == AMDGPU::JUMP) {
749  MachineInstr *predSet = --I;
750  while (!isPredicateSetter(predSet->getOpcode())) {
751  predSet = --I;
752  }
753  TBB = SecondLastInst->getOperand(0).getMBB();
754  FBB = LastInst->getOperand(0).getMBB();
755  Cond.push_back(predSet->getOperand(1));
756  Cond.push_back(predSet->getOperand(2));
757  Cond.push_back(MachineOperand::CreateReg(AMDGPU::PRED_SEL_ONE, false));
758  return false;
759  }
760 
761  // Otherwise, can't handle this.
762  return true;
763 }
764 
765 int R600InstrInfo::getBranchInstr(const MachineOperand &op) const {
766  const MachineInstr *MI = op.getParent();
767 
768  switch (MI->getDesc().OpInfo->RegClass) {
769  default: // FIXME: fallthrough??
770  case AMDGPU::GPRI32RegClassID: return AMDGPU::BRANCH_COND_i32;
771  case AMDGPU::GPRF32RegClassID: return AMDGPU::BRANCH_COND_f32;
772  };
773 }
774 
775 static
777  for (MachineBasicBlock::reverse_iterator It = MBB.rbegin(), E = MBB.rend();
778  It != E; ++It) {
779  if (It->getOpcode() == AMDGPU::CF_ALU ||
780  It->getOpcode() == AMDGPU::CF_ALU_PUSH_BEFORE)
781  return llvm::prior(It.base());
782  }
783  return MBB.end();
784 }
785 
786 unsigned
788  MachineBasicBlock *TBB,
789  MachineBasicBlock *FBB,
791  DebugLoc DL) const {
792  assert(TBB && "InsertBranch must not be told to insert a fallthrough");
793 
794  if (FBB == 0) {
795  if (Cond.empty()) {
796  BuildMI(&MBB, DL, get(AMDGPU::JUMP)).addMBB(TBB);
797  return 1;
798  } else {
799  MachineInstr *PredSet = findFirstPredicateSetterFrom(MBB, MBB.end());
800  assert(PredSet && "No previous predicate !");
801  addFlag(PredSet, 0, MO_FLAG_PUSH);
802  PredSet->getOperand(2).setImm(Cond[1].getImm());
803 
804  BuildMI(&MBB, DL, get(AMDGPU::JUMP_COND))
805  .addMBB(TBB)
806  .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
808  if (CfAlu == MBB.end())
809  return 1;
810  assert (CfAlu->getOpcode() == AMDGPU::CF_ALU);
811  CfAlu->setDesc(get(AMDGPU::CF_ALU_PUSH_BEFORE));
812  return 1;
813  }
814  } else {
815  MachineInstr *PredSet = findFirstPredicateSetterFrom(MBB, MBB.end());
816  assert(PredSet && "No previous predicate !");
817  addFlag(PredSet, 0, MO_FLAG_PUSH);
818  PredSet->getOperand(2).setImm(Cond[1].getImm());
819  BuildMI(&MBB, DL, get(AMDGPU::JUMP_COND))
820  .addMBB(TBB)
821  .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
822  BuildMI(&MBB, DL, get(AMDGPU::JUMP)).addMBB(FBB);
824  if (CfAlu == MBB.end())
825  return 2;
826  assert (CfAlu->getOpcode() == AMDGPU::CF_ALU);
827  CfAlu->setDesc(get(AMDGPU::CF_ALU_PUSH_BEFORE));
828  return 2;
829  }
830 }
831 
832 unsigned
834 
835  // Note : we leave PRED* instructions there.
836  // They may be needed when predicating instructions.
837 
839 
840  if (I == MBB.begin()) {
841  return 0;
842  }
843  --I;
844  switch (I->getOpcode()) {
845  default:
846  return 0;
847  case AMDGPU::JUMP_COND: {
848  MachineInstr *predSet = findFirstPredicateSetterFrom(MBB, I);
849  clearFlag(predSet, 0, MO_FLAG_PUSH);
850  I->eraseFromParent();
852  if (CfAlu == MBB.end())
853  break;
854  assert (CfAlu->getOpcode() == AMDGPU::CF_ALU_PUSH_BEFORE);
855  CfAlu->setDesc(get(AMDGPU::CF_ALU));
856  break;
857  }
858  case AMDGPU::JUMP:
859  I->eraseFromParent();
860  break;
861  }
862  I = MBB.end();
863 
864  if (I == MBB.begin()) {
865  return 1;
866  }
867  --I;
868  switch (I->getOpcode()) {
869  // FIXME: only one case??
870  default:
871  return 1;
872  case AMDGPU::JUMP_COND: {
873  MachineInstr *predSet = findFirstPredicateSetterFrom(MBB, I);
874  clearFlag(predSet, 0, MO_FLAG_PUSH);
875  I->eraseFromParent();
877  if (CfAlu == MBB.end())
878  break;
879  assert (CfAlu->getOpcode() == AMDGPU::CF_ALU_PUSH_BEFORE);
880  CfAlu->setDesc(get(AMDGPU::CF_ALU));
881  break;
882  }
883  case AMDGPU::JUMP:
884  I->eraseFromParent();
885  break;
886  }
887  return 2;
888 }
889 
890 bool
892  int idx = MI->findFirstPredOperandIdx();
893  if (idx < 0)
894  return false;
895 
896  unsigned Reg = MI->getOperand(idx).getReg();
897  switch (Reg) {
898  default: return false;
899  case AMDGPU::PRED_SEL_ONE:
900  case AMDGPU::PRED_SEL_ZERO:
901  case AMDGPU::PREDICATE_BIT:
902  return true;
903  }
904 }
905 
906 bool
908  // XXX: KILL* instructions can be predicated, but they must be the last
909  // instruction in a clause, so this means any instructions after them cannot
910  // be predicated. Until we have proper support for instruction clauses in the
911  // backend, we will mark KILL* instructions as unpredicable.
912 
913  if (MI->getOpcode() == AMDGPU::KILLGT) {
914  return false;
915  } else if (MI->getOpcode() == AMDGPU::CF_ALU) {
916  // If the clause start in the middle of MBB then the MBB has more
917  // than a single clause, unable to predicate several clauses.
918  if (MI->getParent()->begin() != MachineBasicBlock::iterator(MI))
919  return false;
920  // TODO: We don't support KC merging atm
921  if (MI->getOperand(3).getImm() != 0 || MI->getOperand(4).getImm() != 0)
922  return false;
923  return true;
924  } else if (isVector(*MI)) {
925  return false;
926  } else {
928  }
929 }
930 
931 
932 bool
934  unsigned NumCyles,
935  unsigned ExtraPredCycles,
936  const BranchProbability &Probability) const{
937  return true;
938 }
939 
940 bool
942  unsigned NumTCycles,
943  unsigned ExtraTCycles,
944  MachineBasicBlock &FMBB,
945  unsigned NumFCycles,
946  unsigned ExtraFCycles,
947  const BranchProbability &Probability) const {
948  return true;
949 }
950 
951 bool
953  unsigned NumCyles,
954  const BranchProbability &Probability)
955  const {
956  return true;
957 }
958 
959 bool
961  MachineBasicBlock &FMBB) const {
962  return false;
963 }
964 
965 
966 bool
968  MachineOperand &MO = Cond[1];
969  switch (MO.getImm()) {
970  case OPCODE_IS_ZERO_INT:
972  break;
975  break;
976  case OPCODE_IS_ZERO:
978  break;
979  case OPCODE_IS_NOT_ZERO:
981  break;
982  default:
983  return true;
984  }
985 
986  MachineOperand &MO2 = Cond[2];
987  switch (MO2.getReg()) {
988  case AMDGPU::PRED_SEL_ZERO:
989  MO2.setReg(AMDGPU::PRED_SEL_ONE);
990  break;
991  case AMDGPU::PRED_SEL_ONE:
992  MO2.setReg(AMDGPU::PRED_SEL_ZERO);
993  break;
994  default:
995  return true;
996  }
997  return false;
998 }
999 
1000 bool
1002  std::vector<MachineOperand> &Pred) const {
1003  return isPredicateSetter(MI->getOpcode());
1004 }
1005 
1006 
1007 bool
1009  const SmallVectorImpl<MachineOperand> &Pred2) const {
1010  return false;
1011 }
1012 
1013 
1014 bool
1016  const SmallVectorImpl<MachineOperand> &Pred) const {
1017  int PIdx = MI->findFirstPredOperandIdx();
1018 
1019  if (MI->getOpcode() == AMDGPU::CF_ALU) {
1020  MI->getOperand(8).setImm(0);
1021  return true;
1022  }
1023 
1024  if (MI->getOpcode() == AMDGPU::DOT_4) {
1025  MI->getOperand(getOperandIdx(*MI, AMDGPU::OpName::pred_sel_X))
1026  .setReg(Pred[2].getReg());
1027  MI->getOperand(getOperandIdx(*MI, AMDGPU::OpName::pred_sel_Y))
1028  .setReg(Pred[2].getReg());
1029  MI->getOperand(getOperandIdx(*MI, AMDGPU::OpName::pred_sel_Z))
1030  .setReg(Pred[2].getReg());
1031  MI->getOperand(getOperandIdx(*MI, AMDGPU::OpName::pred_sel_W))
1032  .setReg(Pred[2].getReg());
1033  MachineInstrBuilder MIB(*MI->getParent()->getParent(), MI);
1034  MIB.addReg(AMDGPU::PREDICATE_BIT, RegState::Implicit);
1035  return true;
1036  }
1037 
1038  if (PIdx != -1) {
1039  MachineOperand &PMO = MI->getOperand(PIdx);
1040  PMO.setReg(Pred[2].getReg());
1041  MachineInstrBuilder MIB(*MI->getParent()->getParent(), MI);
1042  MIB.addReg(AMDGPU::PREDICATE_BIT, RegState::Implicit);
1043  return true;
1044  }
1045 
1046  return false;
1047 }
1048 
1050  return 2;
1051 }
1052 
1054  const MachineInstr *MI,
1055  unsigned *PredCost) const {
1056  if (PredCost)
1057  *PredCost = 2;
1058  return 2;
1059 }
1060 
1062  const MachineFunction &MF) const {
1063  const AMDGPUFrameLowering *TFL =
1064  static_cast<const AMDGPUFrameLowering*>(TM.getFrameLowering());
1065 
1066  unsigned StackWidth = TFL->getStackWidth(MF);
1067  int End = getIndirectIndexEnd(MF);
1068 
1069  if (End == -1)
1070  return;
1071 
1072  for (int Index = getIndirectIndexBegin(MF); Index <= End; ++Index) {
1073  unsigned SuperReg = AMDGPU::R600_Reg128RegClass.getRegister(Index);
1074  Reserved.set(SuperReg);
1075  for (unsigned Chan = 0; Chan < StackWidth; ++Chan) {
1076  unsigned Reg = AMDGPU::R600_TReg32RegClass.getRegister((4 * Index) + Chan);
1077  Reserved.set(Reg);
1078  }
1079  }
1080 }
1081 
1082 unsigned R600InstrInfo::calculateIndirectAddress(unsigned RegIndex,
1083  unsigned Channel) const {
1084  // XXX: Remove when we support a stack width > 2
1085  assert(Channel == 0);
1086  return RegIndex;
1087 }
1088 
1090  return &AMDGPU::R600_TReg32_XRegClass;
1091 }
1092 
1095  unsigned ValueReg, unsigned Address,
1096  unsigned OffsetReg) const {
1097  unsigned AddrReg = AMDGPU::R600_AddrRegClass.getRegister(Address);
1098  MachineInstr *MOVA = buildDefaultInstruction(*MBB, I, AMDGPU::MOVA_INT_eg,
1099  AMDGPU::AR_X, OffsetReg);
1101 
1102  MachineInstrBuilder Mov = buildDefaultInstruction(*MBB, I, AMDGPU::MOV,
1103  AddrReg, ValueReg)
1104  .addReg(AMDGPU::AR_X,
1106  setImmOperand(Mov, AMDGPU::OpName::dst_rel, 1);
1107  return Mov;
1108 }
1109 
1112  unsigned ValueReg, unsigned Address,
1113  unsigned OffsetReg) const {
1114  unsigned AddrReg = AMDGPU::R600_AddrRegClass.getRegister(Address);
1115  MachineInstr *MOVA = buildDefaultInstruction(*MBB, I, AMDGPU::MOVA_INT_eg,
1116  AMDGPU::AR_X,
1117  OffsetReg);
1119  MachineInstrBuilder Mov = buildDefaultInstruction(*MBB, I, AMDGPU::MOV,
1120  ValueReg,
1121  AddrReg)
1122  .addReg(AMDGPU::AR_X,
1124  setImmOperand(Mov, AMDGPU::OpName::src0_rel, 1);
1125 
1126  return Mov;
1127 }
1128 
1130  return 115;
1131 }
1132 
1135  unsigned Opcode,
1136  unsigned DstReg,
1137  unsigned Src0Reg,
1138  unsigned Src1Reg) const {
1139  MachineInstrBuilder MIB = BuildMI(MBB, I, MBB.findDebugLoc(I), get(Opcode),
1140  DstReg); // $dst
1141 
1142  if (Src1Reg) {
1143  MIB.addImm(0) // $update_exec_mask
1144  .addImm(0); // $update_predicate
1145  }
1146  MIB.addImm(1) // $write
1147  .addImm(0) // $omod
1148  .addImm(0) // $dst_rel
1149  .addImm(0) // $dst_clamp
1150  .addReg(Src0Reg) // $src0
1151  .addImm(0) // $src0_neg
1152  .addImm(0) // $src0_rel
1153  .addImm(0) // $src0_abs
1154  .addImm(-1); // $src0_sel
1155 
1156  if (Src1Reg) {
1157  MIB.addReg(Src1Reg) // $src1
1158  .addImm(0) // $src1_neg
1159  .addImm(0) // $src1_rel
1160  .addImm(0) // $src1_abs
1161  .addImm(-1); // $src1_sel
1162  }
1163 
1164  //XXX: The r600g finalizer expects this to be 1, once we've moved the
1165  //scheduling to the backend, we can change the default to 0.
1166  MIB.addImm(1) // $last
1167  .addReg(AMDGPU::PRED_SEL_OFF) // $pred_sel
1168  .addImm(0) // $literal
1169  .addImm(0); // $bank_swizzle
1170 
1171  return MIB;
1172 }
1173 
1174 #define OPERAND_CASE(Label) \
1175  case Label: { \
1176  static const unsigned Ops[] = \
1177  { \
1178  Label##_X, \
1179  Label##_Y, \
1180  Label##_Z, \
1181  Label##_W \
1182  }; \
1183  return Ops[Slot]; \
1184  }
1185 
1186 static unsigned getSlotedOps(unsigned Op, unsigned Slot) {
1187  switch (Op) {
1188  OPERAND_CASE(AMDGPU::OpName::update_exec_mask)
1189  OPERAND_CASE(AMDGPU::OpName::update_pred)
1191  OPERAND_CASE(AMDGPU::OpName::omod)
1192  OPERAND_CASE(AMDGPU::OpName::dst_rel)
1193  OPERAND_CASE(AMDGPU::OpName::clamp)
1194  OPERAND_CASE(AMDGPU::OpName::src0)
1195  OPERAND_CASE(AMDGPU::OpName::src0_neg)
1196  OPERAND_CASE(AMDGPU::OpName::src0_rel)
1197  OPERAND_CASE(AMDGPU::OpName::src0_abs)
1198  OPERAND_CASE(AMDGPU::OpName::src0_sel)
1199  OPERAND_CASE(AMDGPU::OpName::src1)
1200  OPERAND_CASE(AMDGPU::OpName::src1_neg)
1201  OPERAND_CASE(AMDGPU::OpName::src1_rel)
1202  OPERAND_CASE(AMDGPU::OpName::src1_abs)
1203  OPERAND_CASE(AMDGPU::OpName::src1_sel)
1204  OPERAND_CASE(AMDGPU::OpName::pred_sel)
1205  default:
1206  llvm_unreachable("Wrong Operand");
1207  }
1208 }
1209 
1210 #undef OPERAND_CASE
1211 
1213  MachineBasicBlock &MBB, MachineInstr *MI, unsigned Slot, unsigned DstReg)
1214  const {
1215  assert (MI->getOpcode() == AMDGPU::DOT_4 && "Not Implemented");
1216  unsigned Opcode;
1219  Opcode = AMDGPU::DOT4_r600;
1220  else
1221  Opcode = AMDGPU::DOT4_eg;
1223  MachineOperand &Src0 = MI->getOperand(
1224  getOperandIdx(MI->getOpcode(), getSlotedOps(AMDGPU::OpName::src0, Slot)));
1225  MachineOperand &Src1 = MI->getOperand(
1226  getOperandIdx(MI->getOpcode(), getSlotedOps(AMDGPU::OpName::src1, Slot)));
1228  MBB, I, Opcode, DstReg, Src0.getReg(), Src1.getReg());
1229  static const unsigned Operands[14] = {
1230  AMDGPU::OpName::update_exec_mask,
1231  AMDGPU::OpName::update_pred,
1233  AMDGPU::OpName::omod,
1234  AMDGPU::OpName::dst_rel,
1235  AMDGPU::OpName::clamp,
1236  AMDGPU::OpName::src0_neg,
1237  AMDGPU::OpName::src0_rel,
1238  AMDGPU::OpName::src0_abs,
1239  AMDGPU::OpName::src0_sel,
1240  AMDGPU::OpName::src1_neg,
1241  AMDGPU::OpName::src1_rel,
1242  AMDGPU::OpName::src1_abs,
1243  AMDGPU::OpName::src1_sel,
1244  };
1245 
1247  getSlotedOps(AMDGPU::OpName::pred_sel, Slot)));
1248  MIB->getOperand(getOperandIdx(Opcode, AMDGPU::OpName::pred_sel))
1249  .setReg(MO.getReg());
1250 
1251  for (unsigned i = 0; i < 14; i++) {
1252  MachineOperand &MO = MI->getOperand(
1253  getOperandIdx(MI->getOpcode(), getSlotedOps(Operands[i], Slot)));
1254  assert (MO.isImm());
1255  setImmOperand(MIB, Operands[i], MO.getImm());
1256  }
1257  MIB->getOperand(20).setImm(0);
1258  return MIB;
1259 }
1260 
1263  unsigned DstReg,
1264  uint64_t Imm) const {
1265  MachineInstr *MovImm = buildDefaultInstruction(BB, I, AMDGPU::MOV, DstReg,
1266  AMDGPU::ALU_LITERAL_X);
1267  setImmOperand(MovImm, AMDGPU::OpName::literal, Imm);
1268  return MovImm;
1269 }
1270 
1273  unsigned DstReg, unsigned SrcReg) const {
1274  return buildDefaultInstruction(*MBB, I, AMDGPU::MOV, DstReg, SrcReg);
1275 }
1276 
1277 int R600InstrInfo::getOperandIdx(const MachineInstr &MI, unsigned Op) const {
1278  return getOperandIdx(MI.getOpcode(), Op);
1279 }
1280 
1281 int R600InstrInfo::getOperandIdx(unsigned Opcode, unsigned Op) const {
1282  return AMDGPU::getNamedOperandIdx(Opcode, Op);
1283 }
1284 
1286  int64_t Imm) const {
1287  int Idx = getOperandIdx(*MI, Op);
1288  assert(Idx != -1 && "Operand not supported for this instruction.");
1289  assert(MI->getOperand(Idx).isImm());
1290  MI->getOperand(Idx).setImm(Imm);
1291 }
1292 
1293 //===----------------------------------------------------------------------===//
1294 // Instruction flag getters/setters
1295 //===----------------------------------------------------------------------===//
1296 
1298  return GET_FLAG_OPERAND_IDX(get(MI.getOpcode()).TSFlags) != 0;
1299 }
1300 
1302  unsigned Flag) const {
1303  unsigned TargetFlags = get(MI->getOpcode()).TSFlags;
1304  int FlagIndex = 0;
1305  if (Flag != 0) {
1306  // If we pass something other than the default value of Flag to this
1307  // function, it means we are want to set a flag on an instruction
1308  // that uses native encoding.
1309  assert(HAS_NATIVE_OPERANDS(TargetFlags));
1310  bool IsOP3 = (TargetFlags & R600_InstFlag::OP3) == R600_InstFlag::OP3;
1311  switch (Flag) {
1312  case MO_FLAG_CLAMP:
1313  FlagIndex = getOperandIdx(*MI, AMDGPU::OpName::clamp);
1314  break;
1315  case MO_FLAG_MASK:
1316  FlagIndex = getOperandIdx(*MI, AMDGPU::OpName::write);
1317  break;
1318  case MO_FLAG_NOT_LAST:
1319  case MO_FLAG_LAST:
1320  FlagIndex = getOperandIdx(*MI, AMDGPU::OpName::last);
1321  break;
1322  case MO_FLAG_NEG:
1323  switch (SrcIdx) {
1324  case 0: FlagIndex = getOperandIdx(*MI, AMDGPU::OpName::src0_neg); break;
1325  case 1: FlagIndex = getOperandIdx(*MI, AMDGPU::OpName::src1_neg); break;
1326  case 2: FlagIndex = getOperandIdx(*MI, AMDGPU::OpName::src2_neg); break;
1327  }
1328  break;
1329 
1330  case MO_FLAG_ABS:
1331  assert(!IsOP3 && "Cannot set absolute value modifier for OP3 "
1332  "instructions.");
1333  (void)IsOP3;
1334  switch (SrcIdx) {
1335  case 0: FlagIndex = getOperandIdx(*MI, AMDGPU::OpName::src0_abs); break;
1336  case 1: FlagIndex = getOperandIdx(*MI, AMDGPU::OpName::src1_abs); break;
1337  }
1338  break;
1339 
1340  default:
1341  FlagIndex = -1;
1342  break;
1343  }
1344  assert(FlagIndex != -1 && "Flag not supported for this instruction");
1345  } else {
1346  FlagIndex = GET_FLAG_OPERAND_IDX(TargetFlags);
1347  assert(FlagIndex != 0 &&
1348  "Instruction flags not supported for this instruction");
1349  }
1350 
1351  MachineOperand &FlagOp = MI->getOperand(FlagIndex);
1352  assert(FlagOp.isImm());
1353  return FlagOp;
1354 }
1355 
1356 void R600InstrInfo::addFlag(MachineInstr *MI, unsigned Operand,
1357  unsigned Flag) const {
1358  unsigned TargetFlags = get(MI->getOpcode()).TSFlags;
1359  if (Flag == 0) {
1360  return;
1361  }
1362  if (HAS_NATIVE_OPERANDS(TargetFlags)) {
1363  MachineOperand &FlagOp = getFlagOp(MI, Operand, Flag);
1364  if (Flag == MO_FLAG_NOT_LAST) {
1365  clearFlag(MI, Operand, MO_FLAG_LAST);
1366  } else if (Flag == MO_FLAG_MASK) {
1367  clearFlag(MI, Operand, Flag);
1368  } else {
1369  FlagOp.setImm(1);
1370  }
1371  } else {
1372  MachineOperand &FlagOp = getFlagOp(MI, Operand);
1373  FlagOp.setImm(FlagOp.getImm() | (Flag << (NUM_MO_FLAGS * Operand)));
1374  }
1375 }
1376 
1377 void R600InstrInfo::clearFlag(MachineInstr *MI, unsigned Operand,
1378  unsigned Flag) const {
1379  unsigned TargetFlags = get(MI->getOpcode()).TSFlags;
1380  if (HAS_NATIVE_OPERANDS(TargetFlags)) {
1381  MachineOperand &FlagOp = getFlagOp(MI, Operand, Flag);
1382  FlagOp.setImm(0);
1383  } else {
1384  MachineOperand &FlagOp = getFlagOp(MI);
1385  unsigned InstFlags = FlagOp.getImm();
1386  InstFlags &= ~(Flag << (NUM_MO_FLAGS * Operand));
1387  FlagOp.setImm(InstFlags);
1388  }
1389 }
MachineInstr * buildSlotOfVectorInstruction(MachineBasicBlock &MBB, MachineInstr *MI, unsigned Slot, unsigned DstReg) const
bool readsLDSSrcReg(const MachineInstr *MI) const
void clearFlag(MachineInstr *MI, unsigned Operand, unsigned Flag) const
Clear the specified flag on the instruction.
const MachineFunction * getParent() const
BitVector & set()
Definition: BitVector.h:236
mop_iterator operands_end()
Definition: MachineInstr.h:285
void setImmOperand(MachineInstr *MI, unsigned Op, int64_t Imm) const
Helper function for setting instruction flag values.
MachineInstr * getParent()
bool isLDSInstr(unsigned Opcode) const
AMDGPU specific subclass of TargetSubtarget.
int getSrcIdx(unsigned Opcode, unsigned SrcNum) const
bool isLegalToSplitMBBAt(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI) const
Interface definition for R600InstrInfo.
MachineBasicBlock * getMBB() const
unsigned getHWRegChan(unsigned reg) const
get the HW encoding for a register's channel.
#define MO_FLAG_LAST
Definition: R600Defines.h:23
#define NUM_MO_FLAGS
Definition: R600Defines.h:24
bool canBeConsideredALU(const MachineInstr *MI) const
enum Generation getGeneration() const
bool isVector(const MachineInstr &MI) const
static bool isVirtualRegister(unsigned Reg)
virtual unsigned calculateIndirectAddress(unsigned RegIndex, unsigned Channel) const
Calculate the "Indirect Address" for the given RegIndex and Channel.
virtual MachineInstrBuilder buildIndirectWrite(MachineBasicBlock *MBB, MachineBasicBlock::iterator I, unsigned ValueReg, unsigned Address, unsigned OffsetReg) const
Build instruction(s) for an indirect register write.
#define OPCODE_IS_ZERO
bool fitsReadPortLimitations(const std::vector< MachineInstr * > &MIs, const DenseMap< unsigned, unsigned > &PV, std::vector< BankSwizzle > &BS, bool isLastAluTrans) const
const MCInstrDesc & getDesc() const
Definition: MachineInstr.h:257
static unsigned getSlotedOps(unsigned Op, unsigned Slot)
static bool isConstCompatible(R600InstrInfo::BankSwizzle TransSwz, const std::vector< std::pair< int, unsigned > > &TransOps, unsigned ConstCount)
#define OPCODE_IS_NOT_ZERO_INT
bool isPlaceHolderOpcode(unsigned opcode) const
virtual void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, DebugLoc DL, unsigned DestReg, unsigned SrcReg, bool KillSrc) const
Interface definition for R600RegisterInfo.
bool isVectorOnly(unsigned Opcode) const
bool hasFlagOperand(const MachineInstr &MI) const
bool AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl< MachineOperand > &Cond, bool AllowModify) const
unsigned RemoveBranch(MachineBasicBlock &MBB) const
bool isCubeOp(unsigned opcode) const
bool isProfitableToUnpredicate(MachineBasicBlock &TMBB, MachineBasicBlock &FMBB) const
static MachineBasicBlock::iterator FindLastAluClause(MachineBasicBlock &MBB)
static unsigned getTransSwizzle(R600InstrInfo::BankSwizzle Swz, unsigned Op)
#define MO_FLAG_ABS
Definition: R600Defines.h:19
static MachineOperand CreateReg(unsigned Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false)
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
#define llvm_unreachable(msg)
bool PredicateInstruction(MachineInstr *MI, const SmallVectorImpl< MachineOperand > &Pred) const
MachineInstr * buildMovImm(MachineBasicBlock &BB, MachineBasicBlock::iterator I, unsigned DstReg, uint64_t Imm) const
virtual const InstrItineraryData * getInstrItineraryData() const
const MachineInstrBuilder & addImm(int64_t Val) const
bool ReverseBranchCondition(SmallVectorImpl< MachineOperand > &Cond) const
#define HAS_NATIVE_OPERANDS(Flags)
Definition: R600Defines.h:53
bool hasInstrModifiers(unsigned Opcode) const
MachineInstr * buildMovInstr(MachineBasicBlock *MBB, MachineBasicBlock::iterator I, unsigned DstReg, unsigned SrcReg) const
Build a MOV instruction.
static bool isJump(unsigned Opcode)
bool usesVertexCache(unsigned Opcode) const
bool LLVM_ATTRIBUTE_UNUSED_RESULT empty() const
Definition: SmallVector.h:56
#define OPCODE_IS_NOT_ZERO
int getOpcode() const
Definition: MachineInstr.h:261
#define MO_FLAG_NEG
Definition: R600Defines.h:18
virtual unsigned getIEQOpcode() const
bool insert(const T &V)
Definition: SmallSet.h:59
R600InstrInfo(AMDGPUTargetMachine &tm)
MachineOperand & getFlagOp(MachineInstr *MI, unsigned SrcIdx=0, unsigned Flag=0) const
bool usesTextureCache(unsigned Opcode) const
int64_t getImm() const
bool isPredicable(MachineInstr *MI) const
reverse_iterator rend()
reverse_iterator rbegin()
#define SRC_SEL_ROWS
bool hasVertexCache() const
const MachineBasicBlock * getParent() const
Definition: MachineInstr.h:119
bool isExport(unsigned Opcode) const
bundle_iterator< MachineInstr, instr_iterator > iterator
unsigned int getPredicationCost(const MachineInstr *) const
bool isPredicable(MachineInstr *MI) const
SmallVector< std::pair< MachineOperand *, int64_t >, 3 > getSrcs(MachineInstr *MI) const
bool FindSwizzleForVectorSlot(const std::vector< std::vector< std::pair< int, unsigned > > > &IGSrcs, std::vector< R600InstrInfo::BankSwizzle > &SwzCandidate, const std::vector< std::pair< int, unsigned > > &TransSrcs, R600InstrInfo::BankSwizzle TransSwz) const
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:267
unsigned getSubRegFromChannel(unsigned Channel) const
bool isPredicated(const MachineInstr *MI) const
TargetMachine & TM
const R600RegisterInfo & getRegisterInfo() const
virtual const TargetRegisterClass * getIndirectAddrRegClass() const
void setImm(int64_t immVal)
iterator end()
Definition: DenseMap.h:57
int getOperandIdx(const MachineInstr &MI, unsigned Op) const
Get the index of Op in the MachineInstr.
static MachineInstr * findFirstPredicateSetterFrom(MachineBasicBlock &MBB, MachineBasicBlock::iterator I)
The AMDGPU TargetMachine interface definition for hw codgen targets.
MachineInstrBuilder BuildMI(MachineFunction &MF, DebugLoc DL, const MCInstrDesc &MCID)
virtual const TargetFrameLowering * getFrameLowering() const
#define MO_FLAG_CLAMP
Definition: R600Defines.h:17
void setIsKill(bool Val=true)
DebugLoc findDebugLoc(instr_iterator MBBI)
static bool NextPossibleSolution(std::vector< R600InstrInfo::BankSwizzle > &SwzCandidate, unsigned Idx)
bool hasCaymanISA() const
const STC & getSubtarget() const
bool usesAddressRegister(MachineInstr *MI) const
virtual bool isPhysRegLiveAcrossClauses(unsigned Reg) const
#define OPERAND_CASE(Label)
virtual unsigned getStackWidth(const MachineFunction &MF) const
unsigned int getInstrLatency(const InstrItineraryData *ItinData, const MachineInstr *MI, unsigned *PredCost=0) const
int findRegisterUseOperandIdx(unsigned Reg, bool isKill=false, const TargetRegisterInfo *TRI=NULL) const
#define IS_TEX(desc)
Definition: R600Defines.h:63
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:591
bool isTrig(const MachineInstr &MI) const
bool mustBeLastInClause(unsigned Opcode) const
static bool isPredicateSetter(unsigned Opcode)
int findFirstPredOperandIdx() const
int getSelIdx(unsigned Opcode, unsigned SrcIdx) const
virtual int getIndirectIndexEnd(const MachineFunction &MF) const
ssize_t write(int fildes, const void *buf, size_t nbyte);
unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, const SmallVectorImpl< MachineOperand > &Cond, DebugLoc DL) const
bool SubsumesPredicate(const SmallVectorImpl< MachineOperand > &Pred1, const SmallVectorImpl< MachineOperand > &Pred2) const
void addFlag(MachineInstr *MI, unsigned Operand, unsigned Flag) const
Add one of the MO_FLAG* flags to the specified Operand.
void reserveIndirectRegisters(BitVector &Reserved, const MachineFunction &MF) const
Reserve the registers that may be accesed using indirect addressing.
#define MO_FLAG_NOT_LAST
Definition: R600Defines.h:22
MachineInstrBuilder buildDefaultInstruction(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, unsigned Opcode, unsigned DstReg, unsigned Src0Reg, unsigned Src1Reg=0) const
bool DefinesPredicate(MachineInstr *MI, std::vector< MachineOperand > &Pred) const
unsigned size() const
Definition: SmallSet.h:43
bool isProfitableToDupForIfCvt(MachineBasicBlock &MBB, unsigned NumCyles, const BranchProbability &Probability) const
virtual MachineInstrBuilder buildIndirectRead(MachineBasicBlock *MBB, MachineBasicBlock::iterator I, unsigned ValueReg, unsigned Address, unsigned OffsetReg) const
Build instruction(s) for an indirect register read.
#define GET_FLAG_OPERAND_IDX(Flags)
Helper for getting the operand index for the instruction flags operand.
Definition: R600Defines.h:28
Information about the stack frame layout on the AMDGPU targets.
void setReg(unsigned Reg)
unsigned isLegalUpTo(const std::vector< std::vector< std::pair< int, unsigned > > > &IGSrcs, const std::vector< R600InstrInfo::BankSwizzle > &Swz, const std::vector< std::pair< int, unsigned > > &TransSrcs, R600InstrInfo::BankSwizzle TransSwz) const
#define I(x, y, z)
Definition: MD5.cpp:54
static unsigned getReg(const void *D, unsigned RC, unsigned RegNo)
int findRegisterDefOperandIdx(unsigned Reg, bool isDead=false, bool Overlap=false, const TargetRegisterInfo *TRI=NULL) const
static std::vector< std::pair< int, unsigned > > Swizzle(std::vector< std::pair< int, unsigned > > Src, R600InstrInfo::BankSwizzle Swz)
unsigned getMaxAlusPerClause() const
bool isReductionOp(unsigned opcode) const
unsigned getReg() const
getReg - Returns the register number.
static bool isBranch(unsigned Opcode)
std::reverse_iterator< iterator > reverse_iterator
mop_iterator operands_begin()
Definition: MachineInstr.h:284
bool definesAddressRegister(MachineInstr *MI) const
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned char TargetFlags=0) const
virtual int getIndirectIndexBegin(const MachineFunction &MF) const
#define GET_REG_INDEX(reg)
Definition: R600Defines.h:60
const MCOperandInfo * OpInfo
Definition: MCInstrDesc.h:148
#define IS_VTX(desc)
Definition: R600Defines.h:62
bool isProfitableToIfCvt(MachineBasicBlock &MBB, unsigned NumCyles, unsigned ExtraPredCycles, const BranchProbability &Probability) const
ItTy prior(ItTy it, Dist n)
Definition: STLExtras.h:167
int16_t getNamedOperandIdx(uint16_t Opcode, uint16_t NamedIndex)
virtual bool isMov(unsigned Opcode) const
#define MO_FLAG_MASK
Definition: R600Defines.h:20
bool isLDSNoRetInstr(unsigned Opcode) const
bool fitsConstReadLimitations(const std::vector< MachineInstr * > &) const
bool isLDSRetInstr(unsigned Opcode) const
bool isALUInstr(unsigned Opcode) const
iterator find(const KeyT &Val)
Definition: DenseMap.h:108
const MachineInstrBuilder & addReg(unsigned RegNo, unsigned flags=0, unsigned SubReg=0) const
DFAPacketizer * CreateTargetScheduleState(const TargetMachine *TM, const ScheduleDAG *DAG) const
bool isTransOnly(unsigned Opcode) const
#define MO_FLAG_PUSH
Definition: R600Defines.h:21
#define OPCODE_IS_ZERO_INT