LLVM API Documentation

 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
ARMLoadStoreOptimizer.cpp
Go to the documentation of this file.
1 //===-- ARMLoadStoreOptimizer.cpp - ARM load / store opt. pass ------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file contains a pass that performs load / store related peephole
11 // optimizations. This pass should be run after register allocation.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #define DEBUG_TYPE "arm-ldst-opt"
16 #include "ARM.h"
17 #include "ARMBaseInstrInfo.h"
18 #include "ARMBaseRegisterInfo.h"
19 #include "ARMMachineFunctionInfo.h"
21 #include "llvm/ADT/DenseMap.h"
22 #include "llvm/ADT/STLExtras.h"
23 #include "llvm/ADT/SmallPtrSet.h"
24 #include "llvm/ADT/SmallSet.h"
25 #include "llvm/ADT/SmallVector.h"
26 #include "llvm/ADT/Statistic.h"
34 #include "llvm/IR/DataLayout.h"
35 #include "llvm/IR/DerivedTypes.h"
36 #include "llvm/IR/Function.h"
37 #include "llvm/Support/Debug.h"
43 using namespace llvm;
44 
45 STATISTIC(NumLDMGened , "Number of ldm instructions generated");
46 STATISTIC(NumSTMGened , "Number of stm instructions generated");
47 STATISTIC(NumVLDMGened, "Number of vldm instructions generated");
48 STATISTIC(NumVSTMGened, "Number of vstm instructions generated");
49 STATISTIC(NumLdStMoved, "Number of load / store instructions moved");
50 STATISTIC(NumLDRDFormed,"Number of ldrd created before allocation");
51 STATISTIC(NumSTRDFormed,"Number of strd created before allocation");
52 STATISTIC(NumLDRD2LDM, "Number of ldrd instructions turned back into ldm");
53 STATISTIC(NumSTRD2STM, "Number of strd instructions turned back into stm");
54 STATISTIC(NumLDRD2LDR, "Number of ldrd instructions turned back into ldr's");
55 STATISTIC(NumSTRD2STR, "Number of strd instructions turned back into str's");
56 
57 /// ARMAllocLoadStoreOpt - Post- register allocation pass the combine
58 /// load / store instructions to form ldm / stm instructions.
59 
60 namespace {
61  struct ARMLoadStoreOpt : public MachineFunctionPass {
62  static char ID;
63  ARMLoadStoreOpt() : MachineFunctionPass(ID) {}
64 
65  const TargetInstrInfo *TII;
66  const TargetRegisterInfo *TRI;
67  const ARMSubtarget *STI;
68  ARMFunctionInfo *AFI;
69  RegScavenger *RS;
70  bool isThumb2;
71 
72  virtual bool runOnMachineFunction(MachineFunction &Fn);
73 
74  virtual const char *getPassName() const {
75  return "ARM load / store optimization pass";
76  }
77 
78  private:
79  struct MemOpQueueEntry {
80  int Offset;
81  unsigned Reg;
82  bool isKill;
83  unsigned Position;
85  bool Merged;
86  MemOpQueueEntry(int o, unsigned r, bool k, unsigned p,
88  : Offset(o), Reg(r), isKill(k), Position(p), MBBI(i), Merged(false) {}
89  };
90  typedef SmallVector<MemOpQueueEntry,8> MemOpQueue;
91  typedef MemOpQueue::iterator MemOpQueueIter;
92 
93  void findUsesOfImpDef(SmallVectorImpl<MachineOperand *> &UsesOfImpDefs,
94  const MemOpQueue &MemOps, unsigned DefReg,
95  unsigned RangeBegin, unsigned RangeEnd);
96 
97  bool MergeOps(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
98  int Offset, unsigned Base, bool BaseKill, int Opcode,
99  ARMCC::CondCodes Pred, unsigned PredReg, unsigned Scratch,
100  DebugLoc dl,
101  ArrayRef<std::pair<unsigned, bool> > Regs,
102  ArrayRef<unsigned> ImpDefs);
103  void MergeOpsUpdate(MachineBasicBlock &MBB,
104  MemOpQueue &MemOps,
105  unsigned memOpsBegin,
106  unsigned memOpsEnd,
107  unsigned insertAfter,
108  int Offset,
109  unsigned Base,
110  bool BaseKill,
111  int Opcode,
112  ARMCC::CondCodes Pred,
113  unsigned PredReg,
114  unsigned Scratch,
115  DebugLoc dl,
117  void MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex, unsigned Base,
118  int Opcode, unsigned Size,
119  ARMCC::CondCodes Pred, unsigned PredReg,
120  unsigned Scratch, MemOpQueue &MemOps,
122 
123  void AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps);
124  bool FixInvalidRegPairOp(MachineBasicBlock &MBB,
126  bool MergeBaseUpdateLoadStore(MachineBasicBlock &MBB,
128  const TargetInstrInfo *TII,
129  bool &Advance,
131  bool MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB,
133  bool &Advance,
135  bool LoadStoreMultipleOpti(MachineBasicBlock &MBB);
136  bool MergeReturnIntoLDM(MachineBasicBlock &MBB);
137  };
138  char ARMLoadStoreOpt::ID = 0;
139 }
140 
141 static int getLoadStoreMultipleOpcode(int Opcode, ARM_AM::AMSubMode Mode) {
142  switch (Opcode) {
143  default: llvm_unreachable("Unhandled opcode!");
144  case ARM::LDRi12:
145  ++NumLDMGened;
146  switch (Mode) {
147  default: llvm_unreachable("Unhandled submode!");
148  case ARM_AM::ia: return ARM::LDMIA;
149  case ARM_AM::da: return ARM::LDMDA;
150  case ARM_AM::db: return ARM::LDMDB;
151  case ARM_AM::ib: return ARM::LDMIB;
152  }
153  case ARM::STRi12:
154  ++NumSTMGened;
155  switch (Mode) {
156  default: llvm_unreachable("Unhandled submode!");
157  case ARM_AM::ia: return ARM::STMIA;
158  case ARM_AM::da: return ARM::STMDA;
159  case ARM_AM::db: return ARM::STMDB;
160  case ARM_AM::ib: return ARM::STMIB;
161  }
162  case ARM::t2LDRi8:
163  case ARM::t2LDRi12:
164  ++NumLDMGened;
165  switch (Mode) {
166  default: llvm_unreachable("Unhandled submode!");
167  case ARM_AM::ia: return ARM::t2LDMIA;
168  case ARM_AM::db: return ARM::t2LDMDB;
169  }
170  case ARM::t2STRi8:
171  case ARM::t2STRi12:
172  ++NumSTMGened;
173  switch (Mode) {
174  default: llvm_unreachable("Unhandled submode!");
175  case ARM_AM::ia: return ARM::t2STMIA;
176  case ARM_AM::db: return ARM::t2STMDB;
177  }
178  case ARM::VLDRS:
179  ++NumVLDMGened;
180  switch (Mode) {
181  default: llvm_unreachable("Unhandled submode!");
182  case ARM_AM::ia: return ARM::VLDMSIA;
183  case ARM_AM::db: return 0; // Only VLDMSDB_UPD exists.
184  }
185  case ARM::VSTRS:
186  ++NumVSTMGened;
187  switch (Mode) {
188  default: llvm_unreachable("Unhandled submode!");
189  case ARM_AM::ia: return ARM::VSTMSIA;
190  case ARM_AM::db: return 0; // Only VSTMSDB_UPD exists.
191  }
192  case ARM::VLDRD:
193  ++NumVLDMGened;
194  switch (Mode) {
195  default: llvm_unreachable("Unhandled submode!");
196  case ARM_AM::ia: return ARM::VLDMDIA;
197  case ARM_AM::db: return 0; // Only VLDMDDB_UPD exists.
198  }
199  case ARM::VSTRD:
200  ++NumVSTMGened;
201  switch (Mode) {
202  default: llvm_unreachable("Unhandled submode!");
203  case ARM_AM::ia: return ARM::VSTMDIA;
204  case ARM_AM::db: return 0; // Only VSTMDDB_UPD exists.
205  }
206  }
207 }
208 
209 namespace llvm {
210  namespace ARM_AM {
211 
213  switch (Opcode) {
214  default: llvm_unreachable("Unhandled opcode!");
215  case ARM::LDMIA_RET:
216  case ARM::LDMIA:
217  case ARM::LDMIA_UPD:
218  case ARM::STMIA:
219  case ARM::STMIA_UPD:
220  case ARM::t2LDMIA_RET:
221  case ARM::t2LDMIA:
222  case ARM::t2LDMIA_UPD:
223  case ARM::t2STMIA:
224  case ARM::t2STMIA_UPD:
225  case ARM::VLDMSIA:
226  case ARM::VLDMSIA_UPD:
227  case ARM::VSTMSIA:
228  case ARM::VSTMSIA_UPD:
229  case ARM::VLDMDIA:
230  case ARM::VLDMDIA_UPD:
231  case ARM::VSTMDIA:
232  case ARM::VSTMDIA_UPD:
233  return ARM_AM::ia;
234 
235  case ARM::LDMDA:
236  case ARM::LDMDA_UPD:
237  case ARM::STMDA:
238  case ARM::STMDA_UPD:
239  return ARM_AM::da;
240 
241  case ARM::LDMDB:
242  case ARM::LDMDB_UPD:
243  case ARM::STMDB:
244  case ARM::STMDB_UPD:
245  case ARM::t2LDMDB:
246  case ARM::t2LDMDB_UPD:
247  case ARM::t2STMDB:
248  case ARM::t2STMDB_UPD:
249  case ARM::VLDMSDB_UPD:
250  case ARM::VSTMSDB_UPD:
251  case ARM::VLDMDDB_UPD:
252  case ARM::VSTMDDB_UPD:
253  return ARM_AM::db;
254 
255  case ARM::LDMIB:
256  case ARM::LDMIB_UPD:
257  case ARM::STMIB:
258  case ARM::STMIB_UPD:
259  return ARM_AM::ib;
260  }
261 }
262 
263  } // end namespace ARM_AM
264 } // end namespace llvm
265 
266 static bool isT2i32Load(unsigned Opc) {
267  return Opc == ARM::t2LDRi12 || Opc == ARM::t2LDRi8;
268 }
269 
270 static bool isi32Load(unsigned Opc) {
271  return Opc == ARM::LDRi12 || isT2i32Load(Opc);
272 }
273 
274 static bool isT2i32Store(unsigned Opc) {
275  return Opc == ARM::t2STRi12 || Opc == ARM::t2STRi8;
276 }
277 
278 static bool isi32Store(unsigned Opc) {
279  return Opc == ARM::STRi12 || isT2i32Store(Opc);
280 }
281 
282 /// MergeOps - Create and insert a LDM or STM with Base as base register and
283 /// registers in Regs as the register operands that would be loaded / stored.
284 /// It returns true if the transformation is done.
285 bool
286 ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB,
288  int Offset, unsigned Base, bool BaseKill,
289  int Opcode, ARMCC::CondCodes Pred,
290  unsigned PredReg, unsigned Scratch, DebugLoc dl,
291  ArrayRef<std::pair<unsigned, bool> > Regs,
292  ArrayRef<unsigned> ImpDefs) {
293  // Only a single register to load / store. Don't bother.
294  unsigned NumRegs = Regs.size();
295  if (NumRegs <= 1)
296  return false;
297 
299  // VFP and Thumb2 do not support IB or DA modes.
300  bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode);
301  bool haveIBAndDA = isNotVFP && !isThumb2;
302  if (Offset == 4 && haveIBAndDA)
303  Mode = ARM_AM::ib;
304  else if (Offset == -4 * (int)NumRegs + 4 && haveIBAndDA)
305  Mode = ARM_AM::da;
306  else if (Offset == -4 * (int)NumRegs && isNotVFP)
307  // VLDM/VSTM do not support DB mode without also updating the base reg.
308  Mode = ARM_AM::db;
309  else if (Offset != 0) {
310  // Check if this is a supported opcode before we insert instructions to
311  // calculate a new base register.
312  if (!getLoadStoreMultipleOpcode(Opcode, Mode)) return false;
313 
314  // If starting offset isn't zero, insert a MI to materialize a new base.
315  // But only do so if it is cost effective, i.e. merging more than two
316  // loads / stores.
317  if (NumRegs <= 2)
318  return false;
319 
320  unsigned NewBase;
321  if (isi32Load(Opcode))
322  // If it is a load, then just use one of the destination register to
323  // use as the new base.
324  NewBase = Regs[NumRegs-1].first;
325  else {
326  // Use the scratch register to use as a new base.
327  NewBase = Scratch;
328  if (NewBase == 0)
329  return false;
330  }
331  int BaseOpc = !isThumb2 ? ARM::ADDri : ARM::t2ADDri;
332  if (Offset < 0) {
333  BaseOpc = !isThumb2 ? ARM::SUBri : ARM::t2SUBri;
334  Offset = - Offset;
335  }
336  int ImmedOffset = isThumb2
337  ? ARM_AM::getT2SOImmVal(Offset) : ARM_AM::getSOImmVal(Offset);
338  if (ImmedOffset == -1)
339  // FIXME: Try t2ADDri12 or t2SUBri12?
340  return false; // Probably not worth it then.
341 
342  BuildMI(MBB, MBBI, dl, TII->get(BaseOpc), NewBase)
343  .addReg(Base, getKillRegState(BaseKill)).addImm(Offset)
344  .addImm(Pred).addReg(PredReg).addReg(0);
345  Base = NewBase;
346  BaseKill = true; // New base is always killed right its use.
347  }
348 
349  bool isDef = (isi32Load(Opcode) || Opcode == ARM::VLDRS ||
350  Opcode == ARM::VLDRD);
351  Opcode = getLoadStoreMultipleOpcode(Opcode, Mode);
352  if (!Opcode) return false;
353  MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII->get(Opcode))
354  .addReg(Base, getKillRegState(BaseKill))
355  .addImm(Pred).addReg(PredReg);
356  for (unsigned i = 0; i != NumRegs; ++i)
357  MIB = MIB.addReg(Regs[i].first, getDefRegState(isDef)
358  | getKillRegState(Regs[i].second));
359 
360  // Add implicit defs for super-registers.
361  for (unsigned i = 0, e = ImpDefs.size(); i != e; ++i)
362  MIB.addReg(ImpDefs[i], RegState::ImplicitDefine);
363 
364  return true;
365 }
366 
367 /// \brief Find all instructions using a given imp-def within a range.
368 ///
369 /// We are trying to combine a range of instructions, one of which (located at
370 /// position RangeBegin) implicitly defines a register. The final LDM/STM will
371 /// be placed at RangeEnd, and so any uses of this definition between RangeStart
372 /// and RangeEnd must be modified to use an undefined value.
373 ///
374 /// The live range continues until we find a second definition or one of the
375 /// uses we find is a kill. Unfortunately MemOps is not sorted by Position, so
376 /// we must consider all uses and decide which are relevant in a second pass.
377 void ARMLoadStoreOpt::findUsesOfImpDef(
378  SmallVectorImpl<MachineOperand *> &UsesOfImpDefs, const MemOpQueue &MemOps,
379  unsigned DefReg, unsigned RangeBegin, unsigned RangeEnd) {
380  std::map<unsigned, MachineOperand *> Uses;
381  unsigned LastLivePos = RangeEnd;
382 
383  // First we find all uses of this register with Position between RangeBegin
384  // and RangeEnd, any or all of these could be uses of a definition at
385  // RangeBegin. We also record the latest position a definition at RangeBegin
386  // would be considered live.
387  for (unsigned i = 0; i < MemOps.size(); ++i) {
388  MachineInstr &MI = *MemOps[i].MBBI;
389  unsigned MIPosition = MemOps[i].Position;
390  if (MIPosition <= RangeBegin || MIPosition > RangeEnd)
391  continue;
392 
393  // If this instruction defines the register, then any later use will be of
394  // that definition rather than ours.
395  if (MI.definesRegister(DefReg))
396  LastLivePos = std::min(LastLivePos, MIPosition);
397 
398  MachineOperand *UseOp = MI.findRegisterUseOperand(DefReg);
399  if (!UseOp)
400  continue;
401 
402  // If this instruction kills the register then (assuming liveness is
403  // correct when we start) we don't need to think about anything after here.
404  if (UseOp->isKill())
405  LastLivePos = std::min(LastLivePos, MIPosition);
406 
407  Uses[MIPosition] = UseOp;
408  }
409 
410  // Now we traverse the list of all uses, and append the ones that actually use
411  // our definition to the requested list.
412  for (std::map<unsigned, MachineOperand *>::iterator I = Uses.begin(),
413  E = Uses.end();
414  I != E; ++I) {
415  // List is sorted by position so once we've found one out of range there
416  // will be no more to consider.
417  if (I->first > LastLivePos)
418  break;
419  UsesOfImpDefs.push_back(I->second);
420  }
421 }
422 
423 // MergeOpsUpdate - call MergeOps and update MemOps and merges accordingly on
424 // success.
425 void ARMLoadStoreOpt::MergeOpsUpdate(MachineBasicBlock &MBB,
426  MemOpQueue &memOps,
427  unsigned memOpsBegin, unsigned memOpsEnd,
428  unsigned insertAfter, int Offset,
429  unsigned Base, bool BaseKill,
430  int Opcode,
431  ARMCC::CondCodes Pred, unsigned PredReg,
432  unsigned Scratch,
433  DebugLoc dl,
435  // First calculate which of the registers should be killed by the merged
436  // instruction.
437  const unsigned insertPos = memOps[insertAfter].Position;
438  SmallSet<unsigned, 4> KilledRegs;
440  for (unsigned i = 0, e = memOps.size(); i != e; ++i) {
441  if (i == memOpsBegin) {
442  i = memOpsEnd;
443  if (i == e)
444  break;
445  }
446  if (memOps[i].Position < insertPos && memOps[i].isKill) {
447  unsigned Reg = memOps[i].Reg;
448  KilledRegs.insert(Reg);
449  Killer[Reg] = i;
450  }
451  }
452 
454  SmallVector<unsigned, 8> ImpDefs;
455  SmallVector<MachineOperand *, 8> UsesOfImpDefs;
456  for (unsigned i = memOpsBegin; i < memOpsEnd; ++i) {
457  unsigned Reg = memOps[i].Reg;
458  // If we are inserting the merged operation after an operation that
459  // uses the same register, make sure to transfer any kill flag.
460  bool isKill = memOps[i].isKill || KilledRegs.count(Reg);
461  Regs.push_back(std::make_pair(Reg, isKill));
462 
463  // Collect any implicit defs of super-registers. They must be preserved.
464  for (MIOperands MO(memOps[i].MBBI); MO.isValid(); ++MO) {
465  if (!MO->isReg() || !MO->isDef() || !MO->isImplicit() || MO->isDead())
466  continue;
467  unsigned DefReg = MO->getReg();
468  if (std::find(ImpDefs.begin(), ImpDefs.end(), DefReg) == ImpDefs.end())
469  ImpDefs.push_back(DefReg);
470 
471  // There may be other uses of the definition between this instruction and
472  // the eventual LDM/STM position. These should be marked undef if the
473  // merge takes place.
474  findUsesOfImpDef(UsesOfImpDefs, memOps, DefReg, memOps[i].Position,
475  insertPos);
476  }
477  }
478 
479  // Try to do the merge.
480  MachineBasicBlock::iterator Loc = memOps[insertAfter].MBBI;
481  ++Loc;
482  if (!MergeOps(MBB, Loc, Offset, Base, BaseKill, Opcode,
483  Pred, PredReg, Scratch, dl, Regs, ImpDefs))
484  return;
485 
486  // Merge succeeded, update records.
487  Merges.push_back(prior(Loc));
488 
489  // In gathering loads together, we may have moved the imp-def of a register
490  // past one of its uses. This is OK, since we know better than the rest of
491  // LLVM what's OK with ARM loads and stores; but we still have to adjust the
492  // affected uses.
493  for (SmallVectorImpl<MachineOperand *>::iterator I = UsesOfImpDefs.begin(),
494  E = UsesOfImpDefs.end();
495  I != E; ++I)
496  (*I)->setIsUndef();
497 
498  for (unsigned i = memOpsBegin; i < memOpsEnd; ++i) {
499  // Remove kill flags from any memops that come before insertPos.
500  if (Regs[i-memOpsBegin].second) {
501  unsigned Reg = Regs[i-memOpsBegin].first;
502  if (KilledRegs.count(Reg)) {
503  unsigned j = Killer[Reg];
504  int Idx = memOps[j].MBBI->findRegisterUseOperandIdx(Reg, true);
505  assert(Idx >= 0 && "Cannot find killing operand");
506  memOps[j].MBBI->getOperand(Idx).setIsKill(false);
507  memOps[j].isKill = false;
508  }
509  memOps[i].isKill = true;
510  }
511  MBB.erase(memOps[i].MBBI);
512  // Update this memop to refer to the merged instruction.
513  // We may need to move kill flags again.
514  memOps[i].Merged = true;
515  memOps[i].MBBI = Merges.back();
516  memOps[i].Position = insertPos;
517  }
518 }
519 
520 /// MergeLDR_STR - Merge a number of load / store instructions into one or more
521 /// load / store multiple instructions.
522 void
523 ARMLoadStoreOpt::MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex,
524  unsigned Base, int Opcode, unsigned Size,
525  ARMCC::CondCodes Pred, unsigned PredReg,
526  unsigned Scratch, MemOpQueue &MemOps,
528  bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode);
529  int Offset = MemOps[SIndex].Offset;
530  int SOffset = Offset;
531  unsigned insertAfter = SIndex;
532  MachineBasicBlock::iterator Loc = MemOps[SIndex].MBBI;
533  DebugLoc dl = Loc->getDebugLoc();
534  const MachineOperand &PMO = Loc->getOperand(0);
535  unsigned PReg = PMO.getReg();
536  unsigned PRegNum = PMO.isUndef() ? UINT_MAX : TRI->getEncodingValue(PReg);
537  unsigned Count = 1;
538  unsigned Limit = ~0U;
539 
540  // vldm / vstm limit are 32 for S variants, 16 for D variants.
541 
542  switch (Opcode) {
543  default: break;
544  case ARM::VSTRS:
545  Limit = 32;
546  break;
547  case ARM::VSTRD:
548  Limit = 16;
549  break;
550  case ARM::VLDRD:
551  Limit = 16;
552  break;
553  case ARM::VLDRS:
554  Limit = 32;
555  break;
556  }
557 
558  for (unsigned i = SIndex+1, e = MemOps.size(); i != e; ++i) {
559  int NewOffset = MemOps[i].Offset;
560  const MachineOperand &MO = MemOps[i].MBBI->getOperand(0);
561  unsigned Reg = MO.getReg();
562  unsigned RegNum = MO.isUndef() ? UINT_MAX : TRI->getEncodingValue(Reg);
563  // Register numbers must be in ascending order. For VFP / NEON load and
564  // store multiples, the registers must also be consecutive and within the
565  // limit on the number of registers per instruction.
566  if (Reg != ARM::SP &&
567  NewOffset == Offset + (int)Size &&
568  ((isNotVFP && RegNum > PRegNum) ||
569  ((Count < Limit) && RegNum == PRegNum+1)) &&
570  // On Swift we don't want vldm/vstm to start with a odd register num
571  // because Q register unaligned vldm/vstm need more uops.
572  (!STI->isSwift() || isNotVFP || Count != 1 || !(PRegNum & 0x1))) {
573  Offset += Size;
574  PRegNum = RegNum;
575  ++Count;
576  } else {
577  // Can't merge this in. Try merge the earlier ones first.
578  MergeOpsUpdate(MBB, MemOps, SIndex, i, insertAfter, SOffset,
579  Base, false, Opcode, Pred, PredReg, Scratch, dl, Merges);
580  MergeLDR_STR(MBB, i, Base, Opcode, Size, Pred, PredReg, Scratch,
581  MemOps, Merges);
582  return;
583  }
584 
585  if (MemOps[i].Position > MemOps[insertAfter].Position)
586  insertAfter = i;
587  }
588 
589  bool BaseKill = Loc->findRegisterUseOperandIdx(Base, true) != -1;
590  MergeOpsUpdate(MBB, MemOps, SIndex, MemOps.size(), insertAfter, SOffset,
591  Base, BaseKill, Opcode, Pred, PredReg, Scratch, dl, Merges);
592  return;
593 }
594 
595 static bool definesCPSR(MachineInstr *MI) {
596  for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
597  const MachineOperand &MO = MI->getOperand(i);
598  if (!MO.isReg())
599  continue;
600  if (MO.isDef() && MO.getReg() == ARM::CPSR && !MO.isDead())
601  // If the instruction has live CPSR def, then it's not safe to fold it
602  // into load / store.
603  return true;
604  }
605 
606  return false;
607 }
608 
609 static bool isMatchingDecrement(MachineInstr *MI, unsigned Base,
610  unsigned Bytes, unsigned Limit,
611  ARMCC::CondCodes Pred, unsigned PredReg) {
612  unsigned MyPredReg = 0;
613  if (!MI)
614  return false;
615 
616  bool CheckCPSRDef = false;
617  switch (MI->getOpcode()) {
618  default: return false;
619  case ARM::t2SUBri:
620  case ARM::SUBri:
621  CheckCPSRDef = true;
622  // fallthrough
623  case ARM::tSUBspi:
624  break;
625  }
626 
627  // Make sure the offset fits in 8 bits.
628  if (Bytes == 0 || (Limit && Bytes >= Limit))
629  return false;
630 
631  unsigned Scale = (MI->getOpcode() == ARM::tSUBspi) ? 4 : 1; // FIXME
632  if (!(MI->getOperand(0).getReg() == Base &&
633  MI->getOperand(1).getReg() == Base &&
634  (MI->getOperand(2).getImm()*Scale) == Bytes &&
635  getInstrPredicate(MI, MyPredReg) == Pred &&
636  MyPredReg == PredReg))
637  return false;
638 
639  return CheckCPSRDef ? !definesCPSR(MI) : true;
640 }
641 
642 static bool isMatchingIncrement(MachineInstr *MI, unsigned Base,
643  unsigned Bytes, unsigned Limit,
644  ARMCC::CondCodes Pred, unsigned PredReg) {
645  unsigned MyPredReg = 0;
646  if (!MI)
647  return false;
648 
649  bool CheckCPSRDef = false;
650  switch (MI->getOpcode()) {
651  default: return false;
652  case ARM::t2ADDri:
653  case ARM::ADDri:
654  CheckCPSRDef = true;
655  // fallthrough
656  case ARM::tADDspi:
657  break;
658  }
659 
660  if (Bytes == 0 || (Limit && Bytes >= Limit))
661  // Make sure the offset fits in 8 bits.
662  return false;
663 
664  unsigned Scale = (MI->getOpcode() == ARM::tADDspi) ? 4 : 1; // FIXME
665  if (!(MI->getOperand(0).getReg() == Base &&
666  MI->getOperand(1).getReg() == Base &&
667  (MI->getOperand(2).getImm()*Scale) == Bytes &&
668  getInstrPredicate(MI, MyPredReg) == Pred &&
669  MyPredReg == PredReg))
670  return false;
671 
672  return CheckCPSRDef ? !definesCPSR(MI) : true;
673 }
674 
675 static inline unsigned getLSMultipleTransferSize(MachineInstr *MI) {
676  switch (MI->getOpcode()) {
677  default: return 0;
678  case ARM::LDRi12:
679  case ARM::STRi12:
680  case ARM::t2LDRi8:
681  case ARM::t2LDRi12:
682  case ARM::t2STRi8:
683  case ARM::t2STRi12:
684  case ARM::VLDRS:
685  case ARM::VSTRS:
686  return 4;
687  case ARM::VLDRD:
688  case ARM::VSTRD:
689  return 8;
690  case ARM::LDMIA:
691  case ARM::LDMDA:
692  case ARM::LDMDB:
693  case ARM::LDMIB:
694  case ARM::STMIA:
695  case ARM::STMDA:
696  case ARM::STMDB:
697  case ARM::STMIB:
698  case ARM::t2LDMIA:
699  case ARM::t2LDMDB:
700  case ARM::t2STMIA:
701  case ARM::t2STMDB:
702  case ARM::VLDMSIA:
703  case ARM::VSTMSIA:
704  return (MI->getNumOperands() - MI->getDesc().getNumOperands() + 1) * 4;
705  case ARM::VLDMDIA:
706  case ARM::VSTMDIA:
707  return (MI->getNumOperands() - MI->getDesc().getNumOperands() + 1) * 8;
708  }
709 }
710 
711 static unsigned getUpdatingLSMultipleOpcode(unsigned Opc,
712  ARM_AM::AMSubMode Mode) {
713  switch (Opc) {
714  default: llvm_unreachable("Unhandled opcode!");
715  case ARM::LDMIA:
716  case ARM::LDMDA:
717  case ARM::LDMDB:
718  case ARM::LDMIB:
719  switch (Mode) {
720  default: llvm_unreachable("Unhandled submode!");
721  case ARM_AM::ia: return ARM::LDMIA_UPD;
722  case ARM_AM::ib: return ARM::LDMIB_UPD;
723  case ARM_AM::da: return ARM::LDMDA_UPD;
724  case ARM_AM::db: return ARM::LDMDB_UPD;
725  }
726  case ARM::STMIA:
727  case ARM::STMDA:
728  case ARM::STMDB:
729  case ARM::STMIB:
730  switch (Mode) {
731  default: llvm_unreachable("Unhandled submode!");
732  case ARM_AM::ia: return ARM::STMIA_UPD;
733  case ARM_AM::ib: return ARM::STMIB_UPD;
734  case ARM_AM::da: return ARM::STMDA_UPD;
735  case ARM_AM::db: return ARM::STMDB_UPD;
736  }
737  case ARM::t2LDMIA:
738  case ARM::t2LDMDB:
739  switch (Mode) {
740  default: llvm_unreachable("Unhandled submode!");
741  case ARM_AM::ia: return ARM::t2LDMIA_UPD;
742  case ARM_AM::db: return ARM::t2LDMDB_UPD;
743  }
744  case ARM::t2STMIA:
745  case ARM::t2STMDB:
746  switch (Mode) {
747  default: llvm_unreachable("Unhandled submode!");
748  case ARM_AM::ia: return ARM::t2STMIA_UPD;
749  case ARM_AM::db: return ARM::t2STMDB_UPD;
750  }
751  case ARM::VLDMSIA:
752  switch (Mode) {
753  default: llvm_unreachable("Unhandled submode!");
754  case ARM_AM::ia: return ARM::VLDMSIA_UPD;
755  case ARM_AM::db: return ARM::VLDMSDB_UPD;
756  }
757  case ARM::VLDMDIA:
758  switch (Mode) {
759  default: llvm_unreachable("Unhandled submode!");
760  case ARM_AM::ia: return ARM::VLDMDIA_UPD;
761  case ARM_AM::db: return ARM::VLDMDDB_UPD;
762  }
763  case ARM::VSTMSIA:
764  switch (Mode) {
765  default: llvm_unreachable("Unhandled submode!");
766  case ARM_AM::ia: return ARM::VSTMSIA_UPD;
767  case ARM_AM::db: return ARM::VSTMSDB_UPD;
768  }
769  case ARM::VSTMDIA:
770  switch (Mode) {
771  default: llvm_unreachable("Unhandled submode!");
772  case ARM_AM::ia: return ARM::VSTMDIA_UPD;
773  case ARM_AM::db: return ARM::VSTMDDB_UPD;
774  }
775  }
776 }
777 
778 /// MergeBaseUpdateLSMultiple - Fold proceeding/trailing inc/dec of base
779 /// register into the LDM/STM/VLDM{D|S}/VSTM{D|S} op when possible:
780 ///
781 /// stmia rn, <ra, rb, rc>
782 /// rn := rn + 4 * 3;
783 /// =>
784 /// stmia rn!, <ra, rb, rc>
785 ///
786 /// rn := rn - 4 * 3;
787 /// ldmia rn, <ra, rb, rc>
788 /// =>
789 /// ldmdb rn!, <ra, rb, rc>
790 bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB,
792  bool &Advance,
794  MachineInstr *MI = MBBI;
795  unsigned Base = MI->getOperand(0).getReg();
796  bool BaseKill = MI->getOperand(0).isKill();
797  unsigned Bytes = getLSMultipleTransferSize(MI);
798  unsigned PredReg = 0;
799  ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg);
800  int Opcode = MI->getOpcode();
801  DebugLoc dl = MI->getDebugLoc();
802 
803  // Can't use an updating ld/st if the base register is also a dest
804  // register. e.g. ldmdb r0!, {r0, r1, r2}. The behavior is undefined.
805  for (unsigned i = 2, e = MI->getNumOperands(); i != e; ++i)
806  if (MI->getOperand(i).getReg() == Base)
807  return false;
808 
809  bool DoMerge = false;
811 
812  // Try merging with the previous instruction.
813  MachineBasicBlock::iterator BeginMBBI = MBB.begin();
814  if (MBBI != BeginMBBI) {
815  MachineBasicBlock::iterator PrevMBBI = prior(MBBI);
816  while (PrevMBBI != BeginMBBI && PrevMBBI->isDebugValue())
817  --PrevMBBI;
818  if (Mode == ARM_AM::ia &&
819  isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) {
820  Mode = ARM_AM::db;
821  DoMerge = true;
822  } else if (Mode == ARM_AM::ib &&
823  isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) {
824  Mode = ARM_AM::da;
825  DoMerge = true;
826  }
827  if (DoMerge)
828  MBB.erase(PrevMBBI);
829  }
830 
831  // Try merging with the next instruction.
832  MachineBasicBlock::iterator EndMBBI = MBB.end();
833  if (!DoMerge && MBBI != EndMBBI) {
834  MachineBasicBlock::iterator NextMBBI = llvm::next(MBBI);
835  while (NextMBBI != EndMBBI && NextMBBI->isDebugValue())
836  ++NextMBBI;
837  if ((Mode == ARM_AM::ia || Mode == ARM_AM::ib) &&
838  isMatchingIncrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) {
839  DoMerge = true;
840  } else if ((Mode == ARM_AM::da || Mode == ARM_AM::db) &&
841  isMatchingDecrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) {
842  DoMerge = true;
843  }
844  if (DoMerge) {
845  if (NextMBBI == I) {
846  Advance = true;
847  ++I;
848  }
849  MBB.erase(NextMBBI);
850  }
851  }
852 
853  if (!DoMerge)
854  return false;
855 
856  unsigned NewOpc = getUpdatingLSMultipleOpcode(Opcode, Mode);
857  MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII->get(NewOpc))
858  .addReg(Base, getDefRegState(true)) // WB base register
859  .addReg(Base, getKillRegState(BaseKill))
860  .addImm(Pred).addReg(PredReg);
861 
862  // Transfer the rest of operands.
863  for (unsigned OpNum = 3, e = MI->getNumOperands(); OpNum != e; ++OpNum)
864  MIB.addOperand(MI->getOperand(OpNum));
865 
866  // Transfer memoperands.
867  MIB->setMemRefs(MI->memoperands_begin(), MI->memoperands_end());
868 
869  MBB.erase(MBBI);
870  return true;
871 }
872 
873 static unsigned getPreIndexedLoadStoreOpcode(unsigned Opc,
874  ARM_AM::AddrOpc Mode) {
875  switch (Opc) {
876  case ARM::LDRi12:
877  return ARM::LDR_PRE_IMM;
878  case ARM::STRi12:
879  return ARM::STR_PRE_IMM;
880  case ARM::VLDRS:
881  return Mode == ARM_AM::add ? ARM::VLDMSIA_UPD : ARM::VLDMSDB_UPD;
882  case ARM::VLDRD:
883  return Mode == ARM_AM::add ? ARM::VLDMDIA_UPD : ARM::VLDMDDB_UPD;
884  case ARM::VSTRS:
885  return Mode == ARM_AM::add ? ARM::VSTMSIA_UPD : ARM::VSTMSDB_UPD;
886  case ARM::VSTRD:
887  return Mode == ARM_AM::add ? ARM::VSTMDIA_UPD : ARM::VSTMDDB_UPD;
888  case ARM::t2LDRi8:
889  case ARM::t2LDRi12:
890  return ARM::t2LDR_PRE;
891  case ARM::t2STRi8:
892  case ARM::t2STRi12:
893  return ARM::t2STR_PRE;
894  default: llvm_unreachable("Unhandled opcode!");
895  }
896 }
897 
898 static unsigned getPostIndexedLoadStoreOpcode(unsigned Opc,
899  ARM_AM::AddrOpc Mode) {
900  switch (Opc) {
901  case ARM::LDRi12:
902  return ARM::LDR_POST_IMM;
903  case ARM::STRi12:
904  return ARM::STR_POST_IMM;
905  case ARM::VLDRS:
906  return Mode == ARM_AM::add ? ARM::VLDMSIA_UPD : ARM::VLDMSDB_UPD;
907  case ARM::VLDRD:
908  return Mode == ARM_AM::add ? ARM::VLDMDIA_UPD : ARM::VLDMDDB_UPD;
909  case ARM::VSTRS:
910  return Mode == ARM_AM::add ? ARM::VSTMSIA_UPD : ARM::VSTMSDB_UPD;
911  case ARM::VSTRD:
912  return Mode == ARM_AM::add ? ARM::VSTMDIA_UPD : ARM::VSTMDDB_UPD;
913  case ARM::t2LDRi8:
914  case ARM::t2LDRi12:
915  return ARM::t2LDR_POST;
916  case ARM::t2STRi8:
917  case ARM::t2STRi12:
918  return ARM::t2STR_POST;
919  default: llvm_unreachable("Unhandled opcode!");
920  }
921 }
922 
923 /// MergeBaseUpdateLoadStore - Fold proceeding/trailing inc/dec of base
924 /// register into the LDR/STR/FLD{D|S}/FST{D|S} op when possible:
925 bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineBasicBlock &MBB,
927  const TargetInstrInfo *TII,
928  bool &Advance,
930  MachineInstr *MI = MBBI;
931  unsigned Base = MI->getOperand(1).getReg();
932  bool BaseKill = MI->getOperand(1).isKill();
933  unsigned Bytes = getLSMultipleTransferSize(MI);
934  int Opcode = MI->getOpcode();
935  DebugLoc dl = MI->getDebugLoc();
936  bool isAM5 = (Opcode == ARM::VLDRD || Opcode == ARM::VLDRS ||
937  Opcode == ARM::VSTRD || Opcode == ARM::VSTRS);
938  bool isAM2 = (Opcode == ARM::LDRi12 || Opcode == ARM::STRi12);
939  if (isi32Load(Opcode) || isi32Store(Opcode))
940  if (MI->getOperand(2).getImm() != 0)
941  return false;
942  if (isAM5 && ARM_AM::getAM5Offset(MI->getOperand(2).getImm()) != 0)
943  return false;
944 
945  bool isLd = isi32Load(Opcode) || Opcode == ARM::VLDRS || Opcode == ARM::VLDRD;
946  // Can't do the merge if the destination register is the same as the would-be
947  // writeback register.
948  if (MI->getOperand(0).getReg() == Base)
949  return false;
950 
951  unsigned PredReg = 0;
952  ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg);
953  bool DoMerge = false;
954  ARM_AM::AddrOpc AddSub = ARM_AM::add;
955  unsigned NewOpc = 0;
956  // AM2 - 12 bits, thumb2 - 8 bits.
957  unsigned Limit = isAM5 ? 0 : (isAM2 ? 0x1000 : 0x100);
958 
959  // Try merging with the previous instruction.
960  MachineBasicBlock::iterator BeginMBBI = MBB.begin();
961  if (MBBI != BeginMBBI) {
962  MachineBasicBlock::iterator PrevMBBI = prior(MBBI);
963  while (PrevMBBI != BeginMBBI && PrevMBBI->isDebugValue())
964  --PrevMBBI;
965  if (isMatchingDecrement(PrevMBBI, Base, Bytes, Limit, Pred, PredReg)) {
966  DoMerge = true;
967  AddSub = ARM_AM::sub;
968  } else if (!isAM5 &&
969  isMatchingIncrement(PrevMBBI, Base, Bytes, Limit,Pred,PredReg)) {
970  DoMerge = true;
971  }
972  if (DoMerge) {
973  NewOpc = getPreIndexedLoadStoreOpcode(Opcode, AddSub);
974  MBB.erase(PrevMBBI);
975  }
976  }
977 
978  // Try merging with the next instruction.
979  MachineBasicBlock::iterator EndMBBI = MBB.end();
980  if (!DoMerge && MBBI != EndMBBI) {
981  MachineBasicBlock::iterator NextMBBI = llvm::next(MBBI);
982  while (NextMBBI != EndMBBI && NextMBBI->isDebugValue())
983  ++NextMBBI;
984  if (!isAM5 &&
985  isMatchingDecrement(NextMBBI, Base, Bytes, Limit, Pred, PredReg)) {
986  DoMerge = true;
987  AddSub = ARM_AM::sub;
988  } else if (isMatchingIncrement(NextMBBI, Base, Bytes, Limit,Pred,PredReg)) {
989  DoMerge = true;
990  }
991  if (DoMerge) {
992  NewOpc = getPostIndexedLoadStoreOpcode(Opcode, AddSub);
993  if (NextMBBI == I) {
994  Advance = true;
995  ++I;
996  }
997  MBB.erase(NextMBBI);
998  }
999  }
1000 
1001  if (!DoMerge)
1002  return false;
1003 
1004  if (isAM5) {
1005  // VLDM[SD}_UPD, VSTM[SD]_UPD
1006  // (There are no base-updating versions of VLDR/VSTR instructions, but the
1007  // updating load/store-multiple instructions can be used with only one
1008  // register.)
1009  MachineOperand &MO = MI->getOperand(0);
1010  BuildMI(MBB, MBBI, dl, TII->get(NewOpc))
1011  .addReg(Base, getDefRegState(true)) // WB base register
1012  .addReg(Base, getKillRegState(isLd ? BaseKill : false))
1013  .addImm(Pred).addReg(PredReg)
1014  .addReg(MO.getReg(), (isLd ? getDefRegState(true) :
1015  getKillRegState(MO.isKill())));
1016  } else if (isLd) {
1017  if (isAM2) {
1018  // LDR_PRE, LDR_POST
1019  if (NewOpc == ARM::LDR_PRE_IMM || NewOpc == ARM::LDRB_PRE_IMM) {
1020  int Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes;
1021  BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg())
1022  .addReg(Base, RegState::Define)
1023  .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
1024  } else {
1025  int Offset = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift);
1026  BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg())
1027  .addReg(Base, RegState::Define)
1028  .addReg(Base).addReg(0).addImm(Offset).addImm(Pred).addReg(PredReg);
1029  }
1030  } else {
1031  int Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes;
1032  // t2LDR_PRE, t2LDR_POST
1033  BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg())
1034  .addReg(Base, RegState::Define)
1035  .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
1036  }
1037  } else {
1038  MachineOperand &MO = MI->getOperand(0);
1039  // FIXME: post-indexed stores use am2offset_imm, which still encodes
1040  // the vestigal zero-reg offset register. When that's fixed, this clause
1041  // can be removed entirely.
1042  if (isAM2 && NewOpc == ARM::STR_POST_IMM) {
1043  int Offset = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift);
1044  // STR_PRE, STR_POST
1045  BuildMI(MBB, MBBI, dl, TII->get(NewOpc), Base)
1046  .addReg(MO.getReg(), getKillRegState(MO.isKill()))
1047  .addReg(Base).addReg(0).addImm(Offset).addImm(Pred).addReg(PredReg);
1048  } else {
1049  int Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes;
1050  // t2STR_PRE, t2STR_POST
1051  BuildMI(MBB, MBBI, dl, TII->get(NewOpc), Base)
1052  .addReg(MO.getReg(), getKillRegState(MO.isKill()))
1053  .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
1054  }
1055  }
1056  MBB.erase(MBBI);
1057 
1058  return true;
1059 }
1060 
1061 /// isMemoryOp - Returns true if instruction is a memory operation that this
1062 /// pass is capable of operating on.
1063 static bool isMemoryOp(const MachineInstr *MI) {
1064  // When no memory operands are present, conservatively assume unaligned,
1065  // volatile, unfoldable.
1066  if (!MI->hasOneMemOperand())
1067  return false;
1068 
1069  const MachineMemOperand *MMO = *MI->memoperands_begin();
1070 
1071  // Don't touch volatile memory accesses - we may be changing their order.
1072  if (MMO->isVolatile())
1073  return false;
1074 
1075  // Unaligned ldr/str is emulated by some kernels, but unaligned ldm/stm is
1076  // not.
1077  if (MMO->getAlignment() < 4)
1078  return false;
1079 
1080  // str <undef> could probably be eliminated entirely, but for now we just want
1081  // to avoid making a mess of it.
1082  // FIXME: Use str <undef> as a wildcard to enable better stm folding.
1083  if (MI->getNumOperands() > 0 && MI->getOperand(0).isReg() &&
1084  MI->getOperand(0).isUndef())
1085  return false;
1086 
1087  // Likewise don't mess with references to undefined addresses.
1088  if (MI->getNumOperands() > 1 && MI->getOperand(1).isReg() &&
1089  MI->getOperand(1).isUndef())
1090  return false;
1091 
1092  int Opcode = MI->getOpcode();
1093  switch (Opcode) {
1094  default: break;
1095  case ARM::VLDRS:
1096  case ARM::VSTRS:
1097  return MI->getOperand(1).isReg();
1098  case ARM::VLDRD:
1099  case ARM::VSTRD:
1100  return MI->getOperand(1).isReg();
1101  case ARM::LDRi12:
1102  case ARM::STRi12:
1103  case ARM::t2LDRi8:
1104  case ARM::t2LDRi12:
1105  case ARM::t2STRi8:
1106  case ARM::t2STRi12:
1107  return MI->getOperand(1).isReg();
1108  }
1109  return false;
1110 }
1111 
1112 /// AdvanceRS - Advance register scavenger to just before the earliest memory
1113 /// op that is being merged.
1114 void ARMLoadStoreOpt::AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps) {
1115  MachineBasicBlock::iterator Loc = MemOps[0].MBBI;
1116  unsigned Position = MemOps[0].Position;
1117  for (unsigned i = 1, e = MemOps.size(); i != e; ++i) {
1118  if (MemOps[i].Position < Position) {
1119  Position = MemOps[i].Position;
1120  Loc = MemOps[i].MBBI;
1121  }
1122  }
1123 
1124  if (Loc != MBB.begin())
1125  RS->forward(prior(Loc));
1126 }
1127 
1128 static int getMemoryOpOffset(const MachineInstr *MI) {
1129  int Opcode = MI->getOpcode();
1130  bool isAM3 = Opcode == ARM::LDRD || Opcode == ARM::STRD;
1131  unsigned NumOperands = MI->getDesc().getNumOperands();
1132  unsigned OffField = MI->getOperand(NumOperands-3).getImm();
1133 
1134  if (Opcode == ARM::t2LDRi12 || Opcode == ARM::t2LDRi8 ||
1135  Opcode == ARM::t2STRi12 || Opcode == ARM::t2STRi8 ||
1136  Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8 ||
1137  Opcode == ARM::LDRi12 || Opcode == ARM::STRi12)
1138  return OffField;
1139 
1140  int Offset = isAM3 ? ARM_AM::getAM3Offset(OffField)
1141  : ARM_AM::getAM5Offset(OffField) * 4;
1142  if (isAM3) {
1143  if (ARM_AM::getAM3Op(OffField) == ARM_AM::sub)
1144  Offset = -Offset;
1145  } else {
1146  if (ARM_AM::getAM5Op(OffField) == ARM_AM::sub)
1147  Offset = -Offset;
1148  }
1149  return Offset;
1150 }
1151 
1154  int Offset, bool isDef,
1155  DebugLoc dl, unsigned NewOpc,
1156  unsigned Reg, bool RegDeadKill, bool RegUndef,
1157  unsigned BaseReg, bool BaseKill, bool BaseUndef,
1158  bool OffKill, bool OffUndef,
1159  ARMCC::CondCodes Pred, unsigned PredReg,
1160  const TargetInstrInfo *TII, bool isT2) {
1161  if (isDef) {
1162  MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(),
1163  TII->get(NewOpc))
1164  .addReg(Reg, getDefRegState(true) | getDeadRegState(RegDeadKill))
1165  .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef));
1166  MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
1167  } else {
1168  MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(),
1169  TII->get(NewOpc))
1170  .addReg(Reg, getKillRegState(RegDeadKill) | getUndefRegState(RegUndef))
1171  .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef));
1172  MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
1173  }
1174 }
1175 
1176 bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB,
1178  MachineInstr *MI = &*MBBI;
1179  unsigned Opcode = MI->getOpcode();
1180  if (Opcode == ARM::LDRD || Opcode == ARM::STRD ||
1181  Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8) {
1182  const MachineOperand &BaseOp = MI->getOperand(2);
1183  unsigned BaseReg = BaseOp.getReg();
1184  unsigned EvenReg = MI->getOperand(0).getReg();
1185  unsigned OddReg = MI->getOperand(1).getReg();
1186  unsigned EvenRegNum = TRI->getDwarfRegNum(EvenReg, false);
1187  unsigned OddRegNum = TRI->getDwarfRegNum(OddReg, false);
1188  // ARM errata 602117: LDRD with base in list may result in incorrect base
1189  // register when interrupted or faulted.
1190  bool Errata602117 = EvenReg == BaseReg && STI->isCortexM3();
1191  if (!Errata602117 &&
1192  ((EvenRegNum & 1) == 0 && (EvenRegNum + 1) == OddRegNum))
1193  return false;
1194 
1195  MachineBasicBlock::iterator NewBBI = MBBI;
1196  bool isT2 = Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8;
1197  bool isLd = Opcode == ARM::LDRD || Opcode == ARM::t2LDRDi8;
1198  bool EvenDeadKill = isLd ?
1199  MI->getOperand(0).isDead() : MI->getOperand(0).isKill();
1200  bool EvenUndef = MI->getOperand(0).isUndef();
1201  bool OddDeadKill = isLd ?
1202  MI->getOperand(1).isDead() : MI->getOperand(1).isKill();
1203  bool OddUndef = MI->getOperand(1).isUndef();
1204  bool BaseKill = BaseOp.isKill();
1205  bool BaseUndef = BaseOp.isUndef();
1206  bool OffKill = isT2 ? false : MI->getOperand(3).isKill();
1207  bool OffUndef = isT2 ? false : MI->getOperand(3).isUndef();
1208  int OffImm = getMemoryOpOffset(MI);
1209  unsigned PredReg = 0;
1210  ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg);
1211 
1212  if (OddRegNum > EvenRegNum && OffImm == 0) {
1213  // Ascending register numbers and no offset. It's safe to change it to a
1214  // ldm or stm.
1215  unsigned NewOpc = (isLd)
1216  ? (isT2 ? ARM::t2LDMIA : ARM::LDMIA)
1217  : (isT2 ? ARM::t2STMIA : ARM::STMIA);
1218  if (isLd) {
1219  BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc))
1220  .addReg(BaseReg, getKillRegState(BaseKill))
1221  .addImm(Pred).addReg(PredReg)
1222  .addReg(EvenReg, getDefRegState(isLd) | getDeadRegState(EvenDeadKill))
1223  .addReg(OddReg, getDefRegState(isLd) | getDeadRegState(OddDeadKill));
1224  ++NumLDRD2LDM;
1225  } else {
1226  BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc))
1227  .addReg(BaseReg, getKillRegState(BaseKill))
1228  .addImm(Pred).addReg(PredReg)
1229  .addReg(EvenReg,
1230  getKillRegState(EvenDeadKill) | getUndefRegState(EvenUndef))
1231  .addReg(OddReg,
1232  getKillRegState(OddDeadKill) | getUndefRegState(OddUndef));
1233  ++NumSTRD2STM;
1234  }
1235  NewBBI = llvm::prior(MBBI);
1236  } else {
1237  // Split into two instructions.
1238  unsigned NewOpc = (isLd)
1239  ? (isT2 ? (OffImm < 0 ? ARM::t2LDRi8 : ARM::t2LDRi12) : ARM::LDRi12)
1240  : (isT2 ? (OffImm < 0 ? ARM::t2STRi8 : ARM::t2STRi12) : ARM::STRi12);
1241  // Be extra careful for thumb2. t2LDRi8 can't reference a zero offset,
1242  // so adjust and use t2LDRi12 here for that.
1243  unsigned NewOpc2 = (isLd)
1244  ? (isT2 ? (OffImm+4 < 0 ? ARM::t2LDRi8 : ARM::t2LDRi12) : ARM::LDRi12)
1245  : (isT2 ? (OffImm+4 < 0 ? ARM::t2STRi8 : ARM::t2STRi12) : ARM::STRi12);
1246  DebugLoc dl = MBBI->getDebugLoc();
1247  // If this is a load and base register is killed, it may have been
1248  // re-defed by the load, make sure the first load does not clobber it.
1249  if (isLd &&
1250  (BaseKill || OffKill) &&
1251  (TRI->regsOverlap(EvenReg, BaseReg))) {
1252  assert(!TRI->regsOverlap(OddReg, BaseReg));
1253  InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc2,
1254  OddReg, OddDeadKill, false,
1255  BaseReg, false, BaseUndef, false, OffUndef,
1256  Pred, PredReg, TII, isT2);
1257  NewBBI = llvm::prior(MBBI);
1258  InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc,
1259  EvenReg, EvenDeadKill, false,
1260  BaseReg, BaseKill, BaseUndef, OffKill, OffUndef,
1261  Pred, PredReg, TII, isT2);
1262  } else {
1263  if (OddReg == EvenReg && EvenDeadKill) {
1264  // If the two source operands are the same, the kill marker is
1265  // probably on the first one. e.g.
1266  // t2STRDi8 %R5<kill>, %R5, %R9<kill>, 0, 14, %reg0
1267  EvenDeadKill = false;
1268  OddDeadKill = true;
1269  }
1270  // Never kill the base register in the first instruction.
1271  if (EvenReg == BaseReg)
1272  EvenDeadKill = false;
1273  InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc,
1274  EvenReg, EvenDeadKill, EvenUndef,
1275  BaseReg, false, BaseUndef, false, OffUndef,
1276  Pred, PredReg, TII, isT2);
1277  NewBBI = llvm::prior(MBBI);
1278  InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc2,
1279  OddReg, OddDeadKill, OddUndef,
1280  BaseReg, BaseKill, BaseUndef, OffKill, OffUndef,
1281  Pred, PredReg, TII, isT2);
1282  }
1283  if (isLd)
1284  ++NumLDRD2LDR;
1285  else
1286  ++NumSTRD2STR;
1287  }
1288 
1289  MBB.erase(MI);
1290  MBBI = NewBBI;
1291  return true;
1292  }
1293  return false;
1294 }
1295 
1296 /// LoadStoreMultipleOpti - An optimization pass to turn multiple LDR / STR
1297 /// ops of the same base and incrementing offset into LDM / STM ops.
1298 bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) {
1299  unsigned NumMerges = 0;
1300  unsigned NumMemOps = 0;
1301  MemOpQueue MemOps;
1302  unsigned CurrBase = 0;
1303  int CurrOpc = -1;
1304  unsigned CurrSize = 0;
1305  ARMCC::CondCodes CurrPred = ARMCC::AL;
1306  unsigned CurrPredReg = 0;
1307  unsigned Position = 0;
1309 
1310  RS->enterBasicBlock(&MBB);
1311  MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
1312  while (MBBI != E) {
1313  if (FixInvalidRegPairOp(MBB, MBBI))
1314  continue;
1315 
1316  bool Advance = false;
1317  bool TryMerge = false;
1318  bool Clobber = false;
1319 
1320  bool isMemOp = isMemoryOp(MBBI);
1321  if (isMemOp) {
1322  int Opcode = MBBI->getOpcode();
1323  unsigned Size = getLSMultipleTransferSize(MBBI);
1324  const MachineOperand &MO = MBBI->getOperand(0);
1325  unsigned Reg = MO.getReg();
1326  bool isKill = MO.isDef() ? false : MO.isKill();
1327  unsigned Base = MBBI->getOperand(1).getReg();
1328  unsigned PredReg = 0;
1329  ARMCC::CondCodes Pred = getInstrPredicate(MBBI, PredReg);
1330  int Offset = getMemoryOpOffset(MBBI);
1331  // Watch out for:
1332  // r4 := ldr [r5]
1333  // r5 := ldr [r5, #4]
1334  // r6 := ldr [r5, #8]
1335  //
1336  // The second ldr has effectively broken the chain even though it
1337  // looks like the later ldr(s) use the same base register. Try to
1338  // merge the ldr's so far, including this one. But don't try to
1339  // combine the following ldr(s).
1340  Clobber = (isi32Load(Opcode) && Base == MBBI->getOperand(0).getReg());
1341 
1342  // Watch out for:
1343  // r4 := ldr [r0, #8]
1344  // r4 := ldr [r0, #4]
1345  //
1346  // The optimization may reorder the second ldr in front of the first
1347  // ldr, which violates write after write(WAW) dependence. The same as
1348  // str. Try to merge inst(s) already in MemOps.
1349  bool Overlap = false;
1350  for (MemOpQueueIter I = MemOps.begin(), E = MemOps.end(); I != E; ++I) {
1351  if (TRI->regsOverlap(Reg, I->MBBI->getOperand(0).getReg())) {
1352  Overlap = true;
1353  break;
1354  }
1355  }
1356 
1357  if (CurrBase == 0 && !Clobber) {
1358  // Start of a new chain.
1359  CurrBase = Base;
1360  CurrOpc = Opcode;
1361  CurrSize = Size;
1362  CurrPred = Pred;
1363  CurrPredReg = PredReg;
1364  MemOps.push_back(MemOpQueueEntry(Offset, Reg, isKill, Position, MBBI));
1365  ++NumMemOps;
1366  Advance = true;
1367  } else if (!Overlap) {
1368  if (Clobber) {
1369  TryMerge = true;
1370  Advance = true;
1371  }
1372 
1373  if (CurrOpc == Opcode && CurrBase == Base && CurrPred == Pred) {
1374  // No need to match PredReg.
1375  // Continue adding to the queue.
1376  if (Offset > MemOps.back().Offset) {
1377  MemOps.push_back(MemOpQueueEntry(Offset, Reg, isKill,
1378  Position, MBBI));
1379  ++NumMemOps;
1380  Advance = true;
1381  } else {
1382  for (MemOpQueueIter I = MemOps.begin(), E = MemOps.end();
1383  I != E; ++I) {
1384  if (Offset < I->Offset) {
1385  MemOps.insert(I, MemOpQueueEntry(Offset, Reg, isKill,
1386  Position, MBBI));
1387  ++NumMemOps;
1388  Advance = true;
1389  break;
1390  } else if (Offset == I->Offset) {
1391  // Collision! This can't be merged!
1392  break;
1393  }
1394  }
1395  }
1396  }
1397  }
1398  }
1399 
1400  if (MBBI->isDebugValue()) {
1401  ++MBBI;
1402  if (MBBI == E)
1403  // Reach the end of the block, try merging the memory instructions.
1404  TryMerge = true;
1405  } else if (Advance) {
1406  ++Position;
1407  ++MBBI;
1408  if (MBBI == E)
1409  // Reach the end of the block, try merging the memory instructions.
1410  TryMerge = true;
1411  } else
1412  TryMerge = true;
1413 
1414  if (TryMerge) {
1415  if (NumMemOps > 1) {
1416  // Try to find a free register to use as a new base in case it's needed.
1417  // First advance to the instruction just before the start of the chain.
1418  AdvanceRS(MBB, MemOps);
1419  // Find a scratch register.
1420  unsigned Scratch = RS->FindUnusedReg(&ARM::GPRRegClass);
1421  // Process the load / store instructions.
1422  RS->forward(prior(MBBI));
1423 
1424  // Merge ops.
1425  Merges.clear();
1426  MergeLDR_STR(MBB, 0, CurrBase, CurrOpc, CurrSize,
1427  CurrPred, CurrPredReg, Scratch, MemOps, Merges);
1428 
1429  // Try folding preceding/trailing base inc/dec into the generated
1430  // LDM/STM ops.
1431  for (unsigned i = 0, e = Merges.size(); i < e; ++i)
1432  if (MergeBaseUpdateLSMultiple(MBB, Merges[i], Advance, MBBI))
1433  ++NumMerges;
1434  NumMerges += Merges.size();
1435 
1436  // Try folding preceding/trailing base inc/dec into those load/store
1437  // that were not merged to form LDM/STM ops.
1438  for (unsigned i = 0; i != NumMemOps; ++i)
1439  if (!MemOps[i].Merged)
1440  if (MergeBaseUpdateLoadStore(MBB, MemOps[i].MBBI, TII,Advance,MBBI))
1441  ++NumMerges;
1442 
1443  // RS may be pointing to an instruction that's deleted.
1444  RS->skipTo(prior(MBBI));
1445  } else if (NumMemOps == 1) {
1446  // Try folding preceding/trailing base inc/dec into the single
1447  // load/store.
1448  if (MergeBaseUpdateLoadStore(MBB, MemOps[0].MBBI, TII, Advance, MBBI)) {
1449  ++NumMerges;
1450  RS->forward(prior(MBBI));
1451  }
1452  }
1453 
1454  CurrBase = 0;
1455  CurrOpc = -1;
1456  CurrSize = 0;
1457  CurrPred = ARMCC::AL;
1458  CurrPredReg = 0;
1459  if (NumMemOps) {
1460  MemOps.clear();
1461  NumMemOps = 0;
1462  }
1463 
1464  // If iterator hasn't been advanced and this is not a memory op, skip it.
1465  // It can't start a new chain anyway.
1466  if (!Advance && !isMemOp && MBBI != E) {
1467  ++Position;
1468  ++MBBI;
1469  }
1470  }
1471  }
1472  return NumMerges > 0;
1473 }
1474 
1475 /// MergeReturnIntoLDM - If this is a exit BB, try merging the return ops
1476 /// ("bx lr" and "mov pc, lr") into the preceding stack restore so it
1477 /// directly restore the value of LR into pc.
1478 /// ldmfd sp!, {..., lr}
1479 /// bx lr
1480 /// or
1481 /// ldmfd sp!, {..., lr}
1482 /// mov pc, lr
1483 /// =>
1484 /// ldmfd sp!, {..., pc}
1485 bool ARMLoadStoreOpt::MergeReturnIntoLDM(MachineBasicBlock &MBB) {
1486  if (MBB.empty()) return false;
1487 
1489  if (MBBI != MBB.begin() &&
1490  (MBBI->getOpcode() == ARM::BX_RET ||
1491  MBBI->getOpcode() == ARM::tBX_RET ||
1492  MBBI->getOpcode() == ARM::MOVPCLR)) {
1493  MachineInstr *PrevMI = prior(MBBI);
1494  unsigned Opcode = PrevMI->getOpcode();
1495  if (Opcode == ARM::LDMIA_UPD || Opcode == ARM::LDMDA_UPD ||
1496  Opcode == ARM::LDMDB_UPD || Opcode == ARM::LDMIB_UPD ||
1497  Opcode == ARM::t2LDMIA_UPD || Opcode == ARM::t2LDMDB_UPD) {
1498  MachineOperand &MO = PrevMI->getOperand(PrevMI->getNumOperands()-1);
1499  if (MO.getReg() != ARM::LR)
1500  return false;
1501  unsigned NewOpc = (isThumb2 ? ARM::t2LDMIA_RET : ARM::LDMIA_RET);
1502  assert(((isThumb2 && Opcode == ARM::t2LDMIA_UPD) ||
1503  Opcode == ARM::LDMIA_UPD) && "Unsupported multiple load-return!");
1504  PrevMI->setDesc(TII->get(NewOpc));
1505  MO.setReg(ARM::PC);
1506  PrevMI->copyImplicitOps(*MBB.getParent(), &*MBBI);
1507  MBB.erase(MBBI);
1508  return true;
1509  }
1510  }
1511  return false;
1512 }
1513 
1514 bool ARMLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
1515  const TargetMachine &TM = Fn.getTarget();
1516  AFI = Fn.getInfo<ARMFunctionInfo>();
1517  TII = TM.getInstrInfo();
1518  TRI = TM.getRegisterInfo();
1519  STI = &TM.getSubtarget<ARMSubtarget>();
1520  RS = new RegScavenger();
1521  isThumb2 = AFI->isThumb2Function();
1522 
1523  bool Modified = false;
1524  for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
1525  ++MFI) {
1526  MachineBasicBlock &MBB = *MFI;
1527  Modified |= LoadStoreMultipleOpti(MBB);
1528  if (TM.getSubtarget<ARMSubtarget>().hasV5TOps())
1529  Modified |= MergeReturnIntoLDM(MBB);
1530  }
1531 
1532  delete RS;
1533  return Modified;
1534 }
1535 
1536 
1537 /// ARMPreAllocLoadStoreOpt - Pre- register allocation pass that move
1538 /// load / stores from consecutive locations close to make it more
1539 /// likely they will be combined later.
1540 
1541 namespace {
1542  struct ARMPreAllocLoadStoreOpt : public MachineFunctionPass{
1543  static char ID;
1544  ARMPreAllocLoadStoreOpt() : MachineFunctionPass(ID) {}
1545 
1546  const DataLayout *TD;
1547  const TargetInstrInfo *TII;
1548  const TargetRegisterInfo *TRI;
1549  const ARMSubtarget *STI;
1551  MachineFunction *MF;
1552 
1553  virtual bool runOnMachineFunction(MachineFunction &Fn);
1554 
1555  virtual const char *getPassName() const {
1556  return "ARM pre- register allocation load / store optimization pass";
1557  }
1558 
1559  private:
1560  bool CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1, DebugLoc &dl,
1561  unsigned &NewOpc, unsigned &EvenReg,
1562  unsigned &OddReg, unsigned &BaseReg,
1563  int &Offset,
1564  unsigned &PredReg, ARMCC::CondCodes &Pred,
1565  bool &isT2);
1566  bool RescheduleOps(MachineBasicBlock *MBB,
1568  unsigned Base, bool isLd,
1570  bool RescheduleLoadStoreInstrs(MachineBasicBlock *MBB);
1571  };
1572  char ARMPreAllocLoadStoreOpt::ID = 0;
1573 }
1574 
1575 bool ARMPreAllocLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
1576  TD = Fn.getTarget().getDataLayout();
1577  TII = Fn.getTarget().getInstrInfo();
1578  TRI = Fn.getTarget().getRegisterInfo();
1579  STI = &Fn.getTarget().getSubtarget<ARMSubtarget>();
1580  MRI = &Fn.getRegInfo();
1581  MF = &Fn;
1582 
1583  bool Modified = false;
1584  for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
1585  ++MFI)
1586  Modified |= RescheduleLoadStoreInstrs(MFI);
1587 
1588  return Modified;
1589 }
1590 
1591 static bool IsSafeAndProfitableToMove(bool isLd, unsigned Base,
1595  SmallSet<unsigned, 4> &MemRegs,
1596  const TargetRegisterInfo *TRI) {
1597  // Are there stores / loads / calls between them?
1598  // FIXME: This is overly conservative. We should make use of alias information
1599  // some day.
1600  SmallSet<unsigned, 4> AddedRegPressure;
1601  while (++I != E) {
1602  if (I->isDebugValue() || MemOps.count(&*I))
1603  continue;
1604  if (I->isCall() || I->isTerminator() || I->hasUnmodeledSideEffects())
1605  return false;
1606  if (isLd && I->mayStore())
1607  return false;
1608  if (!isLd) {
1609  if (I->mayLoad())
1610  return false;
1611  // It's not safe to move the first 'str' down.
1612  // str r1, [r0]
1613  // strh r5, [r0]
1614  // str r4, [r0, #+4]
1615  if (I->mayStore())
1616  return false;
1617  }
1618  for (unsigned j = 0, NumOps = I->getNumOperands(); j != NumOps; ++j) {
1619  MachineOperand &MO = I->getOperand(j);
1620  if (!MO.isReg())
1621  continue;
1622  unsigned Reg = MO.getReg();
1623  if (MO.isDef() && TRI->regsOverlap(Reg, Base))
1624  return false;
1625  if (Reg != Base && !MemRegs.count(Reg))
1626  AddedRegPressure.insert(Reg);
1627  }
1628  }
1629 
1630  // Estimate register pressure increase due to the transformation.
1631  if (MemRegs.size() <= 4)
1632  // Ok if we are moving small number of instructions.
1633  return true;
1634  return AddedRegPressure.size() <= MemRegs.size() * 2;
1635 }
1636 
1637 
1638 /// Copy Op0 and Op1 operands into a new array assigned to MI.
1640  MachineInstr *Op1) {
1641  assert(MI->memoperands_empty() && "expected a new machineinstr");
1642  size_t numMemRefs = (Op0->memoperands_end() - Op0->memoperands_begin())
1643  + (Op1->memoperands_end() - Op1->memoperands_begin());
1644 
1645  MachineFunction *MF = MI->getParent()->getParent();
1646  MachineSDNode::mmo_iterator MemBegin = MF->allocateMemRefsArray(numMemRefs);
1648  std::copy(Op0->memoperands_begin(), Op0->memoperands_end(), MemBegin);
1649  MemEnd =
1650  std::copy(Op1->memoperands_begin(), Op1->memoperands_end(), MemEnd);
1651  MI->setMemRefs(MemBegin, MemEnd);
1652 }
1653 
1654 bool
1655 ARMPreAllocLoadStoreOpt::CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1,
1656  DebugLoc &dl,
1657  unsigned &NewOpc, unsigned &EvenReg,
1658  unsigned &OddReg, unsigned &BaseReg,
1659  int &Offset, unsigned &PredReg,
1660  ARMCC::CondCodes &Pred,
1661  bool &isT2) {
1662  // Make sure we're allowed to generate LDRD/STRD.
1663  if (!STI->hasV5TEOps())
1664  return false;
1665 
1666  // FIXME: VLDRS / VSTRS -> VLDRD / VSTRD
1667  unsigned Scale = 1;
1668  unsigned Opcode = Op0->getOpcode();
1669  if (Opcode == ARM::LDRi12)
1670  NewOpc = ARM::LDRD;
1671  else if (Opcode == ARM::STRi12)
1672  NewOpc = ARM::STRD;
1673  else if (Opcode == ARM::t2LDRi8 || Opcode == ARM::t2LDRi12) {
1674  NewOpc = ARM::t2LDRDi8;
1675  Scale = 4;
1676  isT2 = true;
1677  } else if (Opcode == ARM::t2STRi8 || Opcode == ARM::t2STRi12) {
1678  NewOpc = ARM::t2STRDi8;
1679  Scale = 4;
1680  isT2 = true;
1681  } else
1682  return false;
1683 
1684  // Make sure the base address satisfies i64 ld / st alignment requirement.
1685  // At the moment, we ignore the memoryoperand's value.
1686  // If we want to use AliasAnalysis, we should check it accordingly.
1687  if (!Op0->hasOneMemOperand() ||
1688  (*Op0->memoperands_begin())->isVolatile())
1689  return false;
1690 
1691  unsigned Align = (*Op0->memoperands_begin())->getAlignment();
1692  const Function *Func = MF->getFunction();
1693  unsigned ReqAlign = STI->hasV6Ops()
1694  ? TD->getABITypeAlignment(Type::getInt64Ty(Func->getContext()))
1695  : 8; // Pre-v6 need 8-byte align
1696  if (Align < ReqAlign)
1697  return false;
1698 
1699  // Then make sure the immediate offset fits.
1700  int OffImm = getMemoryOpOffset(Op0);
1701  if (isT2) {
1702  int Limit = (1 << 8) * Scale;
1703  if (OffImm >= Limit || (OffImm <= -Limit) || (OffImm & (Scale-1)))
1704  return false;
1705  Offset = OffImm;
1706  } else {
1707  ARM_AM::AddrOpc AddSub = ARM_AM::add;
1708  if (OffImm < 0) {
1709  AddSub = ARM_AM::sub;
1710  OffImm = - OffImm;
1711  }
1712  int Limit = (1 << 8) * Scale;
1713  if (OffImm >= Limit || (OffImm & (Scale-1)))
1714  return false;
1715  Offset = ARM_AM::getAM3Opc(AddSub, OffImm);
1716  }
1717  EvenReg = Op0->getOperand(0).getReg();
1718  OddReg = Op1->getOperand(0).getReg();
1719  if (EvenReg == OddReg)
1720  return false;
1721  BaseReg = Op0->getOperand(1).getReg();
1722  Pred = getInstrPredicate(Op0, PredReg);
1723  dl = Op0->getDebugLoc();
1724  return true;
1725 }
1726 
1727 namespace {
1728  struct OffsetCompare {
1729  bool operator()(const MachineInstr *LHS, const MachineInstr *RHS) const {
1730  int LOffset = getMemoryOpOffset(LHS);
1731  int ROffset = getMemoryOpOffset(RHS);
1732  assert(LHS == RHS || LOffset != ROffset);
1733  return LOffset > ROffset;
1734  }
1735  };
1736 }
1737 
1738 bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB,
1740  unsigned Base, bool isLd,
1741  DenseMap<MachineInstr*, unsigned> &MI2LocMap) {
1742  bool RetVal = false;
1743 
1744  // Sort by offset (in reverse order).
1745  std::sort(Ops.begin(), Ops.end(), OffsetCompare());
1746 
1747  // The loads / stores of the same base are in order. Scan them from first to
1748  // last and check for the following:
1749  // 1. Any def of base.
1750  // 2. Any gaps.
1751  while (Ops.size() > 1) {
1752  unsigned FirstLoc = ~0U;
1753  unsigned LastLoc = 0;
1754  MachineInstr *FirstOp = 0;
1755  MachineInstr *LastOp = 0;
1756  int LastOffset = 0;
1757  unsigned LastOpcode = 0;
1758  unsigned LastBytes = 0;
1759  unsigned NumMove = 0;
1760  for (int i = Ops.size() - 1; i >= 0; --i) {
1761  MachineInstr *Op = Ops[i];
1762  unsigned Loc = MI2LocMap[Op];
1763  if (Loc <= FirstLoc) {
1764  FirstLoc = Loc;
1765  FirstOp = Op;
1766  }
1767  if (Loc >= LastLoc) {
1768  LastLoc = Loc;
1769  LastOp = Op;
1770  }
1771 
1772  unsigned LSMOpcode
1774  if (LastOpcode && LSMOpcode != LastOpcode)
1775  break;
1776 
1777  int Offset = getMemoryOpOffset(Op);
1778  unsigned Bytes = getLSMultipleTransferSize(Op);
1779  if (LastBytes) {
1780  if (Bytes != LastBytes || Offset != (LastOffset + (int)Bytes))
1781  break;
1782  }
1783  LastOffset = Offset;
1784  LastBytes = Bytes;
1785  LastOpcode = LSMOpcode;
1786  if (++NumMove == 8) // FIXME: Tune this limit.
1787  break;
1788  }
1789 
1790  if (NumMove <= 1)
1791  Ops.pop_back();
1792  else {
1794  SmallSet<unsigned, 4> MemRegs;
1795  for (int i = NumMove-1; i >= 0; --i) {
1796  MemOps.insert(Ops[i]);
1797  MemRegs.insert(Ops[i]->getOperand(0).getReg());
1798  }
1799 
1800  // Be conservative, if the instructions are too far apart, don't
1801  // move them. We want to limit the increase of register pressure.
1802  bool DoMove = (LastLoc - FirstLoc) <= NumMove*4; // FIXME: Tune this.
1803  if (DoMove)
1804  DoMove = IsSafeAndProfitableToMove(isLd, Base, FirstOp, LastOp,
1805  MemOps, MemRegs, TRI);
1806  if (!DoMove) {
1807  for (unsigned i = 0; i != NumMove; ++i)
1808  Ops.pop_back();
1809  } else {
1810  // This is the new location for the loads / stores.
1811  MachineBasicBlock::iterator InsertPos = isLd ? FirstOp : LastOp;
1812  while (InsertPos != MBB->end()
1813  && (MemOps.count(InsertPos) || InsertPos->isDebugValue()))
1814  ++InsertPos;
1815 
1816  // If we are moving a pair of loads / stores, see if it makes sense
1817  // to try to allocate a pair of registers that can form register pairs.
1818  MachineInstr *Op0 = Ops.back();
1819  MachineInstr *Op1 = Ops[Ops.size()-2];
1820  unsigned EvenReg = 0, OddReg = 0;
1821  unsigned BaseReg = 0, PredReg = 0;
1822  ARMCC::CondCodes Pred = ARMCC::AL;
1823  bool isT2 = false;
1824  unsigned NewOpc = 0;
1825  int Offset = 0;
1826  DebugLoc dl;
1827  if (NumMove == 2 && CanFormLdStDWord(Op0, Op1, dl, NewOpc,
1828  EvenReg, OddReg, BaseReg,
1829  Offset, PredReg, Pred, isT2)) {
1830  Ops.pop_back();
1831  Ops.pop_back();
1832 
1833  const MCInstrDesc &MCID = TII->get(NewOpc);
1834  const TargetRegisterClass *TRC = TII->getRegClass(MCID, 0, TRI, *MF);
1835  MRI->constrainRegClass(EvenReg, TRC);
1836  MRI->constrainRegClass(OddReg, TRC);
1837 
1838  // Form the pair instruction.
1839  if (isLd) {
1840  MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos, dl, MCID)
1841  .addReg(EvenReg, RegState::Define)
1842  .addReg(OddReg, RegState::Define)
1843  .addReg(BaseReg);
1844  // FIXME: We're converting from LDRi12 to an insn that still
1845  // uses addrmode2, so we need an explicit offset reg. It should
1846  // always by reg0 since we're transforming LDRi12s.
1847  if (!isT2)
1848  MIB.addReg(0);
1849  MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
1850  concatenateMemOperands(MIB, Op0, Op1);
1851  DEBUG(dbgs() << "Formed " << *MIB << "\n");
1852  ++NumLDRDFormed;
1853  } else {
1854  MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos, dl, MCID)
1855  .addReg(EvenReg)
1856  .addReg(OddReg)
1857  .addReg(BaseReg);
1858  // FIXME: We're converting from LDRi12 to an insn that still
1859  // uses addrmode2, so we need an explicit offset reg. It should
1860  // always by reg0 since we're transforming STRi12s.
1861  if (!isT2)
1862  MIB.addReg(0);
1863  MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
1864  concatenateMemOperands(MIB, Op0, Op1);
1865  DEBUG(dbgs() << "Formed " << *MIB << "\n");
1866  ++NumSTRDFormed;
1867  }
1868  MBB->erase(Op0);
1869  MBB->erase(Op1);
1870 
1871  // Add register allocation hints to form register pairs.
1872  MRI->setRegAllocationHint(EvenReg, ARMRI::RegPairEven, OddReg);
1873  MRI->setRegAllocationHint(OddReg, ARMRI::RegPairOdd, EvenReg);
1874  } else {
1875  for (unsigned i = 0; i != NumMove; ++i) {
1876  MachineInstr *Op = Ops.back();
1877  Ops.pop_back();
1878  MBB->splice(InsertPos, MBB, Op);
1879  }
1880  }
1881 
1882  NumLdStMoved += NumMove;
1883  RetVal = true;
1884  }
1885  }
1886  }
1887 
1888  return RetVal;
1889 }
1890 
1891 bool
1892 ARMPreAllocLoadStoreOpt::RescheduleLoadStoreInstrs(MachineBasicBlock *MBB) {
1893  bool RetVal = false;
1894 
1898  SmallVector<unsigned, 4> LdBases;
1899  SmallVector<unsigned, 4> StBases;
1900 
1901  unsigned Loc = 0;
1902  MachineBasicBlock::iterator MBBI = MBB->begin();
1903  MachineBasicBlock::iterator E = MBB->end();
1904  while (MBBI != E) {
1905  for (; MBBI != E; ++MBBI) {
1906  MachineInstr *MI = MBBI;
1907  if (MI->isCall() || MI->isTerminator()) {
1908  // Stop at barriers.
1909  ++MBBI;
1910  break;
1911  }
1912 
1913  if (!MI->isDebugValue())
1914  MI2LocMap[MI] = ++Loc;
1915 
1916  if (!isMemoryOp(MI))
1917  continue;
1918  unsigned PredReg = 0;
1919  if (getInstrPredicate(MI, PredReg) != ARMCC::AL)
1920  continue;
1921 
1922  int Opc = MI->getOpcode();
1923  bool isLd = isi32Load(Opc) || Opc == ARM::VLDRS || Opc == ARM::VLDRD;
1924  unsigned Base = MI->getOperand(1).getReg();
1925  int Offset = getMemoryOpOffset(MI);
1926 
1927  bool StopHere = false;
1928  if (isLd) {
1930  Base2LdsMap.find(Base);
1931  if (BI != Base2LdsMap.end()) {
1932  for (unsigned i = 0, e = BI->second.size(); i != e; ++i) {
1933  if (Offset == getMemoryOpOffset(BI->second[i])) {
1934  StopHere = true;
1935  break;
1936  }
1937  }
1938  if (!StopHere)
1939  BI->second.push_back(MI);
1940  } else {
1941  Base2LdsMap[Base].push_back(MI);
1942  LdBases.push_back(Base);
1943  }
1944  } else {
1946  Base2StsMap.find(Base);
1947  if (BI != Base2StsMap.end()) {
1948  for (unsigned i = 0, e = BI->second.size(); i != e; ++i) {
1949  if (Offset == getMemoryOpOffset(BI->second[i])) {
1950  StopHere = true;
1951  break;
1952  }
1953  }
1954  if (!StopHere)
1955  BI->second.push_back(MI);
1956  } else {
1957  Base2StsMap[Base].push_back(MI);
1958  StBases.push_back(Base);
1959  }
1960  }
1961 
1962  if (StopHere) {
1963  // Found a duplicate (a base+offset combination that's seen earlier).
1964  // Backtrack.
1965  --Loc;
1966  break;
1967  }
1968  }
1969 
1970  // Re-schedule loads.
1971  for (unsigned i = 0, e = LdBases.size(); i != e; ++i) {
1972  unsigned Base = LdBases[i];
1973  SmallVectorImpl<MachineInstr *> &Lds = Base2LdsMap[Base];
1974  if (Lds.size() > 1)
1975  RetVal |= RescheduleOps(MBB, Lds, Base, true, MI2LocMap);
1976  }
1977 
1978  // Re-schedule stores.
1979  for (unsigned i = 0, e = StBases.size(); i != e; ++i) {
1980  unsigned Base = StBases[i];
1981  SmallVectorImpl<MachineInstr *> &Sts = Base2StsMap[Base];
1982  if (Sts.size() > 1)
1983  RetVal |= RescheduleOps(MBB, Sts, Base, false, MI2LocMap);
1984  }
1985 
1986  if (MBBI != E) {
1987  Base2LdsMap.clear();
1988  Base2StsMap.clear();
1989  LdBases.clear();
1990  StBases.clear();
1991  }
1992  }
1993 
1994  return RetVal;
1995 }
1996 
1997 
1998 /// createARMLoadStoreOptimizationPass - returns an instance of the load / store
1999 /// optimization pass.
2001  if (PreAlloc)
2002  return new ARMPreAllocLoadStoreOpt();
2003  return new ARMLoadStoreOpt();
2004 }
static unsigned getPreIndexedLoadStoreOpcode(unsigned Opc, ARM_AM::AddrOpc Mode)
AMSubMode getLoadStoreMultipleSubMode(int Opcode)
const MachineFunction * getParent() const
instr_iterator erase(instr_iterator I)
LLVMContext & getContext() const
Definition: Function.cpp:167
static unsigned char getAM3Offset(unsigned AM3Opc)
bool isDead() const
bool insert(PtrType Ptr)
Definition: SmallPtrSet.h:253
static bool isMatchingIncrement(MachineInstr *MI, unsigned Base, unsigned Bytes, unsigned Limit, ARMCC::CondCodes Pred, unsigned PredReg)
const MCInstrDesc & getDesc() const
Definition: MachineInstr.h:257
static IntegerType * getInt64Ty(LLVMContext &C)
Definition: Type.cpp:242
bool isTerminator(QueryType Type=AnyInBundle) const
Definition: MachineInstr.h:366
const HexagonInstrInfo * TII
static bool definesCPSR(MachineInstr *MI)
#define llvm_unreachable(msg)
bool isReg() const
isReg - Tests if this is a MO_Register operand.
static int getMemoryOpOffset(const MachineInstr *MI)
bool isUndef() const
ID
LLVM Calling Convention Representation.
Definition: CallingConv.h:26
#define false
Definition: ConvertUTF.c:64
const MachineInstrBuilder & addImm(int64_t Val) const
unsigned getNumOperands() const
Definition: MachineInstr.h:265
bool isKill() const
bool definesRegister(unsigned Reg, const TargetRegisterInfo *TRI=NULL) const
Definition: MachineInstr.h:753
bool count(PtrType Ptr) const
count - Return true if the specified pointer is in the set.
Definition: SmallPtrSet.h:264
int getOpcode() const
Definition: MachineInstr.h:261
static int getT2SOImmVal(unsigned Arg)
uint64_t getAlignment() const
bool insert(const T &V)
Definition: SmallSet.h:59
int64_t getImm() const
unsigned getUndefRegState(bool B)
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:109
unsigned getKillRegState(bool B)
static unsigned getPostIndexedLoadStoreOpcode(unsigned Opc, ARM_AM::AddrOpc Mode)
const MachineBasicBlock * getParent() const
Definition: MachineInstr.h:119
bool isDebugValue() const
Definition: MachineInstr.h:639
unsigned getDeadRegState(bool B)
mmo_iterator memoperands_end() const
Definition: MachineInstr.h:292
unsigned getDefRegState(bool B)
bundle_iterator< MachineInstr, instr_iterator > iterator
bool regsOverlap(unsigned regA, unsigned regB) const
static unsigned getAM2Opc(AddrOpc Opc, unsigned Imm12, ShiftOpc SO, unsigned IdxMode=0)
ARMCC::CondCodes getInstrPredicate(const MachineInstr *MI, unsigned &PredReg)
static unsigned char getAM5Offset(unsigned AM5Opc)
static bool isT2i32Load(unsigned Opc)
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:267
ItTy next(ItTy it, Dist n)
Definition: STLExtras.h:154
bool hasOneMemOperand() const
Definition: MachineInstr.h:297
static int getLoadStoreMultipleOpcode(int Opcode, ARM_AM::AMSubMode Mode)
MachineInstrBuilder BuildMI(MachineFunction &MF, DebugLoc DL, const MCInstrDesc &MCID)
STATISTIC(NumLDMGened,"Number of ldm instructions generated")
const MCInstrDesc & get(unsigned Opcode) const
Definition: MCInstrInfo.h:48
FunctionPass * createARMLoadStoreOptimizationPass(bool PreAlloc=false)
static bool isi32Store(unsigned Opc)
virtual const TargetInstrInfo * getInstrInfo() const
MachineOperand * findRegisterUseOperand(unsigned Reg, bool isKill=false, const TargetRegisterInfo *TRI=NULL)
Definition: MachineInstr.h:780
bool memoperands_empty() const
Definition: MachineInstr.h:293
void setDesc(const MCInstrDesc &tid)
Definition: MachineInstr.h:984
const STC & getSubtarget() const
static bool IsSafeAndProfitableToMove(bool isLd, unsigned Base, MachineBasicBlock::iterator I, MachineBasicBlock::iterator E, SmallPtrSet< MachineInstr *, 4 > &MemOps, SmallSet< unsigned, 4 > &MemRegs, const TargetRegisterInfo *TRI)
static bool isMemoryOp(const MachineInstr *MI)
raw_ostream & dbgs()
dbgs - Return a circular-buffered debug stream.
Definition: Debug.cpp:101
static AddrOpc getAM3Op(unsigned AM3Opc)
static unsigned getAM3Opc(AddrOpc Opc, unsigned char Offset, unsigned IdxMode=0)
getAM3Opc - This function encodes the addrmode3 opc field.
static cl::opt< AlignMode > Align(cl::desc("Load/store alignment support"), cl::Hidden, cl::init(DefaultAlign), cl::values(clEnumValN(DefaultAlign,"arm-default-align","Generate unaligned accesses only on hardware/OS ""combinations that are known to support them"), clEnumValN(StrictAlign,"arm-strict-align","Disallow all unaligned memory accesses"), clEnumValN(NoStrictAlign,"arm-no-strict-align","Allow unaligned memory accesses"), clEnumValEnd))
static int getSOImmVal(unsigned Arg)
static unsigned getLSMultipleTransferSize(MachineInstr *MI)
bool count(const T &V) const
count - Return true if the element is in the set.
Definition: SmallSet.h:48
static bool isT2i32Store(unsigned Opc)
void copyImplicitOps(MachineFunction &MF, const MachineInstr *MI)
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
MachineRegisterInfo & getRegInfo()
unsigned size() const
Definition: SmallSet.h:43
static void InsertLDR_STR(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, int Offset, bool isDef, DebugLoc dl, unsigned NewOpc, unsigned Reg, bool RegDeadKill, bool RegUndef, unsigned BaseReg, bool BaseKill, bool BaseUndef, bool OffKill, bool OffUndef, ARMCC::CondCodes Pred, unsigned PredReg, const TargetInstrInfo *TII, bool isT2)
bool hasV5TOps() const
Definition: ARMSubtarget.h:245
void setReg(unsigned Reg)
virtual const DataLayout * getDataLayout() const
#define I(x, y, z)
Definition: MD5.cpp:54
bool isCall(QueryType Type=AnyInBundle) const
Definition: MachineInstr.h:349
static unsigned getReg(const void *D, unsigned RC, unsigned RegNo)
const TargetMachine & getTarget() const
virtual const TargetRegisterInfo * getRegisterInfo() const
static AddrOpc getAM5Op(unsigned AM5Opc)
static unsigned getUpdatingLSMultipleOpcode(unsigned Opc, ARM_AM::AMSubMode Mode)
unsigned getReg() const
getReg - Returns the register number.
bool isValid() const
isValid - Returns true until all the operands have been visited.
static bool isi32Load(unsigned Opc)
static bool isMatchingDecrement(MachineInstr *MI, unsigned Base, unsigned Bytes, unsigned Limit, ARMCC::CondCodes Pred, unsigned PredReg)
static void concatenateMemOperands(MachineInstr *MI, MachineInstr *Op0, MachineInstr *Op1)
Copy Op0 and Op1 operands into a new array assigned to MI.
unsigned getNumOperands() const
Return the number of declared MachineOperands for this MachineInstruction. Note that variadic (isVari...
Definition: MCInstrDesc.h:190
const MachineInstrBuilder & addOperand(const MachineOperand &MO) const
BasicBlockListType::iterator iterator
ItTy prior(ItTy it, Dist n)
Definition: STLExtras.h:167
#define DEBUG(X)
Definition: Debug.h:97
const MCRegisterInfo & MRI
const MachineInstrBuilder & addReg(unsigned RegNo, unsigned flags=0, unsigned SubReg=0) const
MachineInstr::mmo_iterator allocateMemRefsArray(unsigned long Num)
void setMemRefs(mmo_iterator NewMemRefs, mmo_iterator NewMemRefsEnd)
INITIALIZE_PASS(GlobalMerge,"global-merge","Global Merge", false, false) bool GlobalMerge const DataLayout * TD
DebugLoc getDebugLoc() const
Definition: MachineInstr.h:244
mmo_iterator memoperands_begin() const
Access to memory operands of the instruction.
Definition: MachineInstr.h:291