15 #define DEBUG_TYPE "misched"
33 CurInstKind = IDOther;
35 OccupedSlotsMask = 31;
37 InstKindLimit[IDOther] = 32;
45 void R600SchedStrategy::MoveUnits(std::vector<SUnit *> &QSrc,
46 std::vector<SUnit *> &QDst)
48 QDst.insert(QDst.end(), QSrc.begin(), QSrc.end());
54 assert (GPRCount &&
"GPRCount cannot be 0");
55 return 248 / GPRCount;
60 NextInstKind = IDOther;
65 bool AllowSwitchToAlu = (CurEmitted >= InstKindLimit[CurInstKind]) ||
66 (Available[CurInstKind].empty());
67 bool AllowSwitchFromAlu = (CurEmitted >= InstKindLimit[CurInstKind]) &&
68 (!Available[IDFetch].empty() || !Available[IDOther].empty());
70 if (CurInstKind == IDAlu && !Available[IDFetch].empty()) {
75 float ALUFetchRationEstimate =
76 (AluInstCount + AvailablesAluCount() + Pending[IDAlu].size()) /
77 (FetchInstCount + Available[IDFetch].size());
78 unsigned NeededWF = 62.5f / ALUFetchRationEstimate;
79 DEBUG(
dbgs() << NeededWF <<
" approx. Wavefronts Required\n" );
90 unsigned NearRegisterRequirement = 2 * Available[IDFetch].size();
92 AllowSwitchFromAlu =
true;
95 if (!SU && ((AllowSwitchToAlu && CurInstKind != IDAlu) ||
96 (!AllowSwitchFromAlu && CurInstKind == IDAlu))) {
99 if (!SU && !PhysicalRegCopy.empty()) {
100 SU = PhysicalRegCopy.front();
101 PhysicalRegCopy.erase(PhysicalRegCopy.begin());
104 if (CurEmitted >= InstKindLimit[IDAlu])
106 NextInstKind = IDAlu;
112 SU = pickOther(IDFetch);
114 NextInstKind = IDFetch;
119 SU = pickOther(IDOther);
121 NextInstKind = IDOther;
126 dbgs() <<
" ** Pick node **\n";
129 dbgs() <<
"NO NODE \n";
130 for (
unsigned i = 0; i < DAG->
SUnits.size(); i++) {
142 if (NextInstKind != CurInstKind) {
143 DEBUG(
dbgs() <<
"Instruction Type Switch\n");
144 if (NextInstKind != IDAlu)
145 OccupedSlotsMask |= 31;
147 CurInstKind = NextInstKind;
150 if (CurInstKind == IDAlu) {
152 switch (getAluKind(SU)) {
163 if (MO.
isReg() && MO.
getReg() == AMDGPU::ALU_LITERAL_X)
173 DEBUG(
dbgs() << CurEmitted <<
" Instructions Emitted in this clause\n");
175 if (CurInstKind != IDFetch) {
176 MoveUnits(Pending[IDFetch], Available[IDFetch]);
196 PhysicalRegCopy.push_back(SU);
200 int IK = getInstKind(SU);
204 Available[IDOther].push_back(SU);
206 Pending[IK].push_back(SU);
210 bool R600SchedStrategy::regBelongsToClass(
unsigned Reg,
219 R600SchedStrategy::AluKind R600SchedStrategy::getAluKind(
SUnit *SU)
const {
228 case AMDGPU::INTERP_PAIR_XY:
229 case AMDGPU::INTERP_PAIR_ZW:
230 case AMDGPU::INTERP_VEC_LOAD:
248 MI->
getOpcode() == AMDGPU::GROUP_BARRIER) {
258 switch (DestSubReg) {
273 if (regBelongsToClass(DestReg, &AMDGPU::R600_TReg32_XRegClass) ||
274 regBelongsToClass(DestReg, &AMDGPU::R600_AddrRegClass))
276 if (regBelongsToClass(DestReg, &AMDGPU::R600_TReg32_YRegClass))
278 if (regBelongsToClass(DestReg, &AMDGPU::R600_TReg32_ZRegClass))
280 if (regBelongsToClass(DestReg, &AMDGPU::R600_TReg32_WRegClass))
282 if (regBelongsToClass(DestReg, &AMDGPU::R600_Reg128RegClass))
293 int R600SchedStrategy::getInstKind(
SUnit* SU) {
306 case AMDGPU::CONST_COPY:
307 case AMDGPU::INTERP_PAIR_XY:
308 case AMDGPU::INTERP_PAIR_ZW:
309 case AMDGPU::INTERP_VEC_LOAD:
317 SUnit *R600SchedStrategy::PopInst(std::vector<SUnit *> &Q,
bool AnyALU) {
323 InstructionsGroupCandidate.push_back(SU->
getInstr());
327 InstructionsGroupCandidate.pop_back();
328 Q.erase((It + 1).base());
331 InstructionsGroupCandidate.pop_back();
337 void R600SchedStrategy::LoadAlu() {
338 std::vector<SUnit *> &QSrc = Pending[IDAlu];
339 for (
unsigned i = 0, e = QSrc.size(); i < e; ++i) {
340 AluKind AK = getAluKind(QSrc[i]);
341 AvailableAlus[AK].push_back(QSrc[i]);
346 void R600SchedStrategy::PrepareNextSlot() {
348 assert (OccupedSlotsMask &&
"Slot wasn't filled");
349 OccupedSlotsMask = 0;
352 InstructionsGroupCandidate.clear();
356 void R600SchedStrategy::AssignSlot(
MachineInstr* MI,
unsigned Slot) {
358 if (DstIndex == -1) {
388 SUnit *R600SchedStrategy::AttemptFillSlot(
unsigned Slot,
bool AnyAlu) {
389 static const AluKind IndexToID[] = {AluT_X, AluT_Y, AluT_Z, AluT_W};
390 SUnit *SlotedSU = PopInst(AvailableAlus[IndexToID[Slot]], AnyAlu);
393 SUnit *UnslotedSU = PopInst(AvailableAlus[AluAny], AnyAlu);
395 AssignSlot(UnslotedSU->
getInstr(), Slot);
399 unsigned R600SchedStrategy::AvailablesAluCount()
const {
400 return AvailableAlus[AluAny].size() + AvailableAlus[AluT_XYZW].size() +
401 AvailableAlus[AluT_X].size() + AvailableAlus[AluT_Y].size() +
402 AvailableAlus[AluT_Z].size() + AvailableAlus[AluT_W].size() +
403 AvailableAlus[AluTrans].size() + AvailableAlus[AluDiscarded].size() +
404 AvailableAlus[AluPredX].size();
407 SUnit* R600SchedStrategy::pickAlu() {
408 while (AvailablesAluCount() || !Pending[IDAlu].empty()) {
409 if (!OccupedSlotsMask) {
411 if (!AvailableAlus[AluPredX].empty()) {
412 OccupedSlotsMask |= 31;
413 return PopInst(AvailableAlus[AluPredX],
false);
416 if (!AvailableAlus[AluDiscarded].empty()) {
417 OccupedSlotsMask |= 31;
418 return PopInst(AvailableAlus[AluDiscarded],
false);
421 if (!AvailableAlus[AluT_XYZW].empty()) {
422 OccupedSlotsMask |= 15;
423 return PopInst(AvailableAlus[AluT_XYZW],
false);
426 bool TransSlotOccuped = OccupedSlotsMask & 16;
427 if (!TransSlotOccuped && VLIW5) {
428 if (!AvailableAlus[AluTrans].empty()) {
429 OccupedSlotsMask |= 16;
430 return PopInst(AvailableAlus[AluTrans],
false);
432 SUnit *SU = AttemptFillSlot(3,
true);
434 OccupedSlotsMask |= 16;
438 for (
int Chan = 3; Chan > -1; --Chan) {
439 bool isOccupied = OccupedSlotsMask & (1 << Chan);
441 SUnit *SU = AttemptFillSlot(Chan,
false);
443 OccupedSlotsMask |= (1 << Chan);
444 InstructionsGroupCandidate.push_back(SU->
getInstr());
454 SUnit* R600SchedStrategy::pickOther(
int QID) {
456 std::vector<SUnit *> &AQ = Available[QID];
459 MoveUnits(Pending[QID], AQ);
463 AQ.resize(AQ.size() - 1);
bool readsLDSSrcReg(const MachineInstr *MI) const
mop_iterator operands_end()
bool isLDSInstr(unsigned Opcode) const
virtual void schedNode(SUnit *SU, bool IsTopNode)
MachineInstr * getInstr() const
bool isVector(const MachineInstr &MI) const
static bool isVirtualRegister(unsigned Reg)
R600 Machine Scheduler interface.
bool isVectorOnly(unsigned Opcode) const
bool isCubeOp(unsigned opcode) const
bool isReg() const
isReg - Tests if this is a MO_Register operand.
const TargetRegisterClass * getRegClass(unsigned Reg) const
virtual void releaseTopNode(SUnit *SU)
bool usesVertexCache(unsigned Opcode) const
bool usesTextureCache(unsigned Opcode) const
const TargetRegisterClass * constrainRegClass(unsigned Reg, const TargetRegisterClass *RC, unsigned MinNumRegs=0)
virtual void releaseBottomNode(SUnit *SU)
const MachineOperand & getOperand(unsigned i) const
static bool isPhysicalRegCopy(MachineInstr *MI)
int getOperandIdx(const MachineInstr &MI, unsigned Op) const
Get the index of Op in the MachineInstr.
unsigned getSubReg() const
const STC & getSubtarget() const
virtual SUnit * pickNode(bool &IsTopNode)
raw_ostream & dbgs()
dbgs - Return a circular-buffered debug stream.
static unsigned getWFCountLimitedByGPR(unsigned GPRCount)
const TargetRegisterInfo * TRI
virtual void initialize(ScheduleDAGMI *dag)
Initialize the strategy after building the DAG for a new region.
std::reverse_iterator< const_iterator > reverse_iterator
const TargetMachine & getTarget() const
const TargetInstrInfo * TII
unsigned getMaxAlusPerClause() const
bool isReductionOp(unsigned opcode) const
unsigned getReg() const
getReg - Returns the register number.
short getTexVTXClauseSize() const
mop_iterator operands_begin()
bool fitsConstReadLimitations(const std::vector< MachineInstr * > &) const
MachineRegisterInfo & MRI
std::vector< SUnit > SUnits
bool isALUInstr(unsigned Opcode) const
void dump(const ScheduleDAG *G) const
bool isTransOnly(unsigned Opcode) const
SUnit - Scheduling unit. This is a node in the scheduling DAG.
bool contains(unsigned Reg) const