17 #define DEBUG_TYPE "packets"
48 const char *getPassName()
const {
49 return "R600 Packetizer";
62 bool ConsideredInstUsesAlreadyWrittenVectorElement;
74 if (!
TII->isALUInstr(I->getOpcode()) && !I->isBundle())
82 int BISlot = getSlot(BI);
83 if (LastDstChan >= BISlot)
89 if (OperandIdx > -1 && BI->getOperand(OperandIdx).getImm() == 0)
91 int DstIdx =
TII->getOperandIdx(BI->getOpcode(), AMDGPU::OpName::dst);
95 unsigned Dst = BI->getOperand(DstIdx).getReg();
96 if (isTrans ||
TII->isTransOnly(BI)) {
97 Result[Dst] = AMDGPU::PS;
100 if (BI->getOpcode() == AMDGPU::DOT4_r600 ||
101 BI->getOpcode() == AMDGPU::DOT4_eg) {
102 Result[Dst] = AMDGPU::PV_X;
105 if (Dst == AMDGPU::OQAP) {
109 switch (TRI.getHWRegChan(Dst)) {
111 PVReg = AMDGPU::PV_X;
114 PVReg = AMDGPU::PV_Y;
117 PVReg = AMDGPU::PV_Z;
120 PVReg = AMDGPU::PV_W;
126 }
while ((++BI)->isBundledWithPred());
133 AMDGPU::OpName::src0,
134 AMDGPU::OpName::src1,
137 for (
unsigned i = 0; i < 3; i++) {
138 int OperandIdx =
TII->getOperandIdx(MI->
getOpcode(), Ops[i]);
158 void initPacketizerState() {
159 ConsideredInstUsesAlreadyWrittenVectorElement =
false;
170 if (
TII->isVector(*MI))
174 if (MI->
getOpcode() == AMDGPU::GROUP_BARRIER)
185 bool isLegalToPacketizeTogether(
SUnit *SUI,
SUnit *SUJ) {
187 if (getSlot(MII) == getSlot(MIJ))
188 ConsideredInstUsesAlreadyWrittenVectorElement =
true;
190 int OpI =
TII->getOperandIdx(MII->
getOpcode(), AMDGPU::OpName::pred_sel),
191 OpJ =
TII->getOperandIdx(MIJ->getOpcode(), AMDGPU::OpName::pred_sel);
193 PredJ = (OpJ > -1)?MIJ->getOperand(OpJ).getReg():0;
197 for (
unsigned i = 0, e = SUJ->
Succs.size(); i < e; ++i) {
210 bool ARDef =
TII->definesAddressRegister(MII) ||
211 TII->definesAddressRegister(MIJ);
212 bool ARUse =
TII->usesAddressRegister(MII) ||
213 TII->usesAddressRegister(MIJ);
222 bool isLegalToPruneDependencies(
SUnit *SUI,
SUnit *SUJ) {
return false;}
225 unsigned LastOp =
TII->getOperandIdx(MI->
getOpcode(), AMDGPU::OpName::last);
231 std::vector<R600InstrInfo::BankSwizzle> &BS,
233 isTransSlot =
TII->isTransOnly(MI);
234 assert (!isTransSlot || VLIW5);
237 if (!isTransSlot && !CurrentPacketMIs.empty()) {
238 if (getSlot(MI) <= getSlot(CurrentPacketMIs.back())) {
239 if (ConsideredInstUsesAlreadyWrittenVectorElement &&
240 !
TII->isVectorOnly(MI) && VLIW5) {
250 CurrentPacketMIs.push_back(MI);
251 if (!
TII->fitsConstReadLimitations(CurrentPacketMIs)) {
253 dbgs() <<
"Couldn't pack :\n";
255 dbgs() <<
"with the following packets :\n";
256 for (
unsigned i = 0, e = CurrentPacketMIs.size() - 1; i < e; i++) {
257 CurrentPacketMIs[i]->dump();
260 dbgs() <<
"because of Consts read limitations\n";
262 CurrentPacketMIs.pop_back();
267 if (!
TII->fitsReadPortLimitations(CurrentPacketMIs,
268 PV, BS, isTransSlot)) {
270 dbgs() <<
"Couldn't pack :\n";
272 dbgs() <<
"with the following packets :\n";
273 for (
unsigned i = 0, e = CurrentPacketMIs.size() - 1; i < e; i++) {
274 CurrentPacketMIs[i]->dump();
277 dbgs() <<
"because of Read port limitations\n";
279 CurrentPacketMIs.pop_back();
284 if (isTransSlot &&
TII->readsLDSSrcReg(MI))
287 CurrentPacketMIs.pop_back();
293 CurrentPacketMIs.empty() ? MI : CurrentPacketMIs.front();
295 getPreviousVector(FirstInBundle);
296 std::vector<R600InstrInfo::BankSwizzle> BS;
299 if (isBundlableWithCurrentPMI(MI, PV, BS, isTransSlot)) {
300 for (
unsigned i = 0, e = CurrentPacketMIs.size(); i < e; i++) {
303 AMDGPU::OpName::bank_swizzle);
307 AMDGPU::OpName::bank_swizzle);
309 if (!CurrentPacketMIs.empty())
310 setIsLastBit(CurrentPacketMIs.back(), 0);
311 substitutePV(MI, PV);
319 if (
TII->isTransOnly(MI))
334 assert(Packetizer.getResourceTracker() &&
"Empty DFA table!");
347 MBB != MBBe; ++MBB) {
352 (MI->getOpcode() == AMDGPU::CF_ALU && !MI->getOperand(8).getImm())) {
355 MBB->erase(DeleteMI);
365 MBB != MBBe; ++MBB) {
367 unsigned RemainingCount = MBB->size();
369 RegionEnd != MBB->begin();) {
373 for(;I != MBB->begin(); --
I, --RemainingCount) {
380 if (I == RegionEnd) {
391 Packetizer.PacketizeMIs(MBB, I, RegionEnd);
403 return new R600Packetizer(tm);
AnalysisUsage & addPreserved()
bool isSucc(SUnit *N)
isSucc - Test if node N is a successor of this node.
Interface definition for R600InstrInfo.
unsigned getHWRegChan(unsigned reg) const
get the HW encoding for a register's channel.
MachineInstr * getInstr() const
virtual bool isPredicated(const MachineInstr *MI) const
virtual bool isSchedulingBoundary(const MachineInstr *MI, const MachineBasicBlock *MBB, const MachineFunction &MF) const
Instructions::iterator instr_iterator
A register anti-dependedence (aka WAR).
AnalysisUsage & addRequired()
const HexagonInstrInfo * TII
#define llvm_unreachable(msg)
ID
LLVM Calling Convention Representation.
A register output-dependence (aka WAW).
const MachineBasicBlock * getParent() const
bundle_iterator< MachineInstr, instr_iterator > iterator
const MachineOperand & getOperand(unsigned i) const
FunctionPass * createR600Packetizer(TargetMachine &tm)
ItTy next(ItTy it, Dist n)
void setImm(int64_t immVal)
virtual const TargetInstrInfo * getInstrInfo() const
const STC & getSubtarget() const
raw_ostream & dbgs()
dbgs - Return a circular-buffered debug stream.
ssize_t write(int fildes, const void *buf, size_t nbyte);
virtual void getAnalysisUsage(AnalysisUsage &AU) const
IMPLICIT_DEF - This is the MachineInstr-level equivalent of undef.
void setReg(unsigned Reg)
const TargetMachine & getTarget() const
Kind getKind() const
getKind - Return an enum value representing the kind of the dependence.
unsigned getReg() const
getReg - Returns the register number.
virtual const HexagonRegisterInfo & getRegisterInfo() const
SmallVector< SDep, 4 > Succs
static const Function * getParent(const Value *V)
BasicBlockListType::iterator iterator
ItTy prior(ItTy it, Dist n)
iterator find(const KeyT &Val)
SUnit - Scheduling unit. This is a node in the scheduling DAG.
virtual MachineBasicBlock::iterator addToPacket(MachineInstr *MI)