LLVM API Documentation

 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
X86FloatingPoint.cpp
Go to the documentation of this file.
1 //===-- X86FloatingPoint.cpp - Floating point Reg -> Stack converter ------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file defines the pass which converts floating point instructions from
11 // pseudo registers into register stack instructions. This pass uses live
12 // variable information to indicate where the FPn registers are used and their
13 // lifetimes.
14 //
15 // The x87 hardware tracks liveness of the stack registers, so it is necessary
16 // to implement exact liveness tracking between basic blocks. The CFG edges are
17 // partitioned into bundles where the same FP registers must be live in
18 // identical stack positions. Instructions are inserted at the end of each basic
19 // block to rearrange the live registers to match the outgoing bundle.
20 //
21 // This approach avoids splitting critical edges at the potential cost of more
22 // live register shuffling instructions when critical edges are present.
23 //
24 //===----------------------------------------------------------------------===//
25 
26 #define DEBUG_TYPE "x86-codegen"
27 #include "X86.h"
28 #include "X86InstrInfo.h"
30 #include "llvm/ADT/STLExtras.h"
31 #include "llvm/ADT/SmallPtrSet.h"
32 #include "llvm/ADT/SmallVector.h"
33 #include "llvm/ADT/Statistic.h"
38 #include "llvm/CodeGen/Passes.h"
39 #include "llvm/IR/InlineAsm.h"
40 #include "llvm/Support/Debug.h"
45 #include <algorithm>
46 using namespace llvm;
47 
48 STATISTIC(NumFXCH, "Number of fxch instructions inserted");
49 STATISTIC(NumFP , "Number of floating point instructions");
50 
51 namespace {
52  struct FPS : public MachineFunctionPass {
53  static char ID;
54  FPS() : MachineFunctionPass(ID) {
56  // This is really only to keep valgrind quiet.
57  // The logic in isLive() is too much for it.
58  memset(Stack, 0, sizeof(Stack));
59  memset(RegMap, 0, sizeof(RegMap));
60  }
61 
62  virtual void getAnalysisUsage(AnalysisUsage &AU) const {
63  AU.setPreservesCFG();
68  }
69 
70  virtual bool runOnMachineFunction(MachineFunction &MF);
71 
72  virtual const char *getPassName() const { return "X86 FP Stackifier"; }
73 
74  private:
75  const TargetInstrInfo *TII; // Machine instruction info.
76 
77  // Two CFG edges are related if they leave the same block, or enter the same
78  // block. The transitive closure of an edge under this relation is a
79  // LiveBundle. It represents a set of CFG edges where the live FP stack
80  // registers must be allocated identically in the x87 stack.
81  //
82  // A LiveBundle is usually all the edges leaving a block, or all the edges
83  // entering a block, but it can contain more edges if critical edges are
84  // present.
85  //
86  // The set of live FP registers in a LiveBundle is calculated by bundleCFG,
87  // but the exact mapping of FP registers to stack slots is fixed later.
88  struct LiveBundle {
89  // Bit mask of live FP registers. Bit 0 = FP0, bit 1 = FP1, &c.
90  unsigned Mask;
91 
92  // Number of pre-assigned live registers in FixStack. This is 0 when the
93  // stack order has not yet been fixed.
94  unsigned FixCount;
95 
96  // Assigned stack order for live-in registers.
97  // FixStack[i] == getStackEntry(i) for all i < FixCount.
98  unsigned char FixStack[8];
99 
100  LiveBundle() : Mask(0), FixCount(0) {}
101 
102  // Have the live registers been assigned a stack order yet?
103  bool isFixed() const { return !Mask || FixCount; }
104  };
105 
106  // Numbered LiveBundle structs. LiveBundles[0] is used for all CFG edges
107  // with no live FP registers.
108  SmallVector<LiveBundle, 8> LiveBundles;
109 
110  // The edge bundle analysis provides indices into the LiveBundles vector.
111  EdgeBundles *Bundles;
112 
113  // Return a bitmask of FP registers in block's live-in list.
114  static unsigned calcLiveInMask(MachineBasicBlock *MBB) {
115  unsigned Mask = 0;
117  E = MBB->livein_end(); I != E; ++I) {
118  unsigned Reg = *I;
119  if (Reg < X86::FP0 || Reg > X86::FP6)
120  continue;
121  Mask |= 1 << (Reg - X86::FP0);
122  }
123  return Mask;
124  }
125 
126  // Partition all the CFG edges into LiveBundles.
127  void bundleCFG(MachineFunction &MF);
128 
129  MachineBasicBlock *MBB; // Current basic block
130 
131  // The hardware keeps track of how many FP registers are live, so we have
132  // to model that exactly. Usually, each live register corresponds to an
133  // FP<n> register, but when dealing with calls, returns, and inline
134  // assembly, it is sometimes necessary to have live scratch registers.
135  unsigned Stack[8]; // FP<n> Registers in each stack slot...
136  unsigned StackTop; // The current top of the FP stack.
137 
138  enum {
139  NumFPRegs = 16 // Including scratch pseudo-registers.
140  };
141 
142  // For each live FP<n> register, point to its Stack[] entry.
143  // The first entries correspond to FP0-FP6, the rest are scratch registers
144  // used when we need slightly different live registers than what the
145  // register allocator thinks.
146  unsigned RegMap[NumFPRegs];
147 
148  // Pending fixed registers - Inline assembly needs FP registers to appear
149  // in fixed stack slot positions. This is handled by copying FP registers
150  // to ST registers before the instruction, and copying back after the
151  // instruction.
152  //
153  // This is modeled with pending ST registers. NumPendingSTs is the number
154  // of ST registers (ST0-STn) we are tracking. PendingST[n] points to an FP
155  // register that holds the ST value. The ST registers are not moved into
156  // place until immediately before the instruction that needs them.
157  //
158  // It can happen that we need an ST register to be live when no FP register
159  // holds the value:
160  //
161  // %ST0 = COPY %FP4<kill>
162  //
163  // When that happens, we allocate a scratch FP register to hold the ST
164  // value. That means every register in PendingST must be live.
165 
166  unsigned NumPendingSTs;
167  unsigned char PendingST[8];
168 
169  // Set up our stack model to match the incoming registers to MBB.
170  void setupBlockStack();
171 
172  // Shuffle live registers to match the expectations of successor blocks.
173  void finishBlockStack();
174 
175 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
176  void dumpStack() const {
177  dbgs() << "Stack contents:";
178  for (unsigned i = 0; i != StackTop; ++i) {
179  dbgs() << " FP" << Stack[i];
180  assert(RegMap[Stack[i]] == i && "Stack[] doesn't match RegMap[]!");
181  }
182  for (unsigned i = 0; i != NumPendingSTs; ++i)
183  dbgs() << ", ST" << i << " in FP" << unsigned(PendingST[i]);
184  dbgs() << "\n";
185  }
186 #endif
187 
188  /// getSlot - Return the stack slot number a particular register number is
189  /// in.
190  unsigned getSlot(unsigned RegNo) const {
191  assert(RegNo < NumFPRegs && "Regno out of range!");
192  return RegMap[RegNo];
193  }
194 
195  /// isLive - Is RegNo currently live in the stack?
196  bool isLive(unsigned RegNo) const {
197  unsigned Slot = getSlot(RegNo);
198  return Slot < StackTop && Stack[Slot] == RegNo;
199  }
200 
201  /// getScratchReg - Return an FP register that is not currently in use.
202  unsigned getScratchReg() const {
203  for (int i = NumFPRegs - 1; i >= 8; --i)
204  if (!isLive(i))
205  return i;
206  llvm_unreachable("Ran out of scratch FP registers");
207  }
208 
209  /// isScratchReg - Returns trus if RegNo is a scratch FP register.
210  static bool isScratchReg(unsigned RegNo) {
211  return RegNo > 8 && RegNo < NumFPRegs;
212  }
213 
214  /// getStackEntry - Return the X86::FP<n> register in register ST(i).
215  unsigned getStackEntry(unsigned STi) const {
216  if (STi >= StackTop)
217  report_fatal_error("Access past stack top!");
218  return Stack[StackTop-1-STi];
219  }
220 
221  /// getSTReg - Return the X86::ST(i) register which contains the specified
222  /// FP<RegNo> register.
223  unsigned getSTReg(unsigned RegNo) const {
224  return StackTop - 1 - getSlot(RegNo) + X86::ST0;
225  }
226 
227  // pushReg - Push the specified FP<n> register onto the stack.
228  void pushReg(unsigned Reg) {
229  assert(Reg < NumFPRegs && "Register number out of range!");
230  if (StackTop >= 8)
231  report_fatal_error("Stack overflow!");
232  Stack[StackTop] = Reg;
233  RegMap[Reg] = StackTop++;
234  }
235 
236  bool isAtTop(unsigned RegNo) const { return getSlot(RegNo) == StackTop-1; }
237  void moveToTop(unsigned RegNo, MachineBasicBlock::iterator I) {
238  DebugLoc dl = I == MBB->end() ? DebugLoc() : I->getDebugLoc();
239  if (isAtTop(RegNo)) return;
240 
241  unsigned STReg = getSTReg(RegNo);
242  unsigned RegOnTop = getStackEntry(0);
243 
244  // Swap the slots the regs are in.
245  std::swap(RegMap[RegNo], RegMap[RegOnTop]);
246 
247  // Swap stack slot contents.
248  if (RegMap[RegOnTop] >= StackTop)
249  report_fatal_error("Access past stack top!");
250  std::swap(Stack[RegMap[RegOnTop]], Stack[StackTop-1]);
251 
252  // Emit an fxch to update the runtime processors version of the state.
253  BuildMI(*MBB, I, dl, TII->get(X86::XCH_F)).addReg(STReg);
254  ++NumFXCH;
255  }
256 
257  void duplicateToTop(unsigned RegNo, unsigned AsReg, MachineInstr *I) {
258  DebugLoc dl = I == MBB->end() ? DebugLoc() : I->getDebugLoc();
259  unsigned STReg = getSTReg(RegNo);
260  pushReg(AsReg); // New register on top of stack
261 
262  BuildMI(*MBB, I, dl, TII->get(X86::LD_Frr)).addReg(STReg);
263  }
264 
265  /// duplicatePendingSTBeforeKill - The instruction at I is about to kill
266  /// RegNo. If any PendingST registers still need the RegNo value, duplicate
267  /// them to new scratch registers.
268  void duplicatePendingSTBeforeKill(unsigned RegNo, MachineInstr *I) {
269  for (unsigned i = 0; i != NumPendingSTs; ++i) {
270  if (PendingST[i] != RegNo)
271  continue;
272  unsigned SR = getScratchReg();
273  DEBUG(dbgs() << "Duplicating pending ST" << i
274  << " in FP" << RegNo << " to FP" << SR << '\n');
275  duplicateToTop(RegNo, SR, I);
276  PendingST[i] = SR;
277  }
278  }
279 
280  /// popStackAfter - Pop the current value off of the top of the FP stack
281  /// after the specified instruction.
282  void popStackAfter(MachineBasicBlock::iterator &I);
283 
284  /// freeStackSlotAfter - Free the specified register from the register
285  /// stack, so that it is no longer in a register. If the register is
286  /// currently at the top of the stack, we just pop the current instruction,
287  /// otherwise we store the current top-of-stack into the specified slot,
288  /// then pop the top of stack.
289  void freeStackSlotAfter(MachineBasicBlock::iterator &I, unsigned Reg);
290 
291  /// freeStackSlotBefore - Just the pop, no folding. Return the inserted
292  /// instruction.
294  freeStackSlotBefore(MachineBasicBlock::iterator I, unsigned FPRegNo);
295 
296  /// Adjust the live registers to be the set in Mask.
297  void adjustLiveRegs(unsigned Mask, MachineBasicBlock::iterator I);
298 
299  /// Shuffle the top FixCount stack entries such that FP reg FixStack[0] is
300  /// st(0), FP reg FixStack[1] is st(1) etc.
301  void shuffleStackTop(const unsigned char *FixStack, unsigned FixCount,
303 
304  bool processBasicBlock(MachineFunction &MF, MachineBasicBlock &MBB);
305 
306  void handleZeroArgFP(MachineBasicBlock::iterator &I);
307  void handleOneArgFP(MachineBasicBlock::iterator &I);
308  void handleOneArgFPRW(MachineBasicBlock::iterator &I);
309  void handleTwoArgFP(MachineBasicBlock::iterator &I);
310  void handleCompareFP(MachineBasicBlock::iterator &I);
311  void handleCondMovFP(MachineBasicBlock::iterator &I);
312  void handleSpecialFP(MachineBasicBlock::iterator &I);
313 
314  // Check if a COPY instruction is using FP registers.
315  static bool isFPCopy(MachineInstr *MI) {
316  unsigned DstReg = MI->getOperand(0).getReg();
317  unsigned SrcReg = MI->getOperand(1).getReg();
318 
319  return X86::RFP80RegClass.contains(DstReg) ||
320  X86::RFP80RegClass.contains(SrcReg);
321  }
322  };
323  char FPS::ID = 0;
324 }
325 
327 
328 /// getFPReg - Return the X86::FPx register number for the specified operand.
329 /// For example, this returns 3 for X86::FP3.
330 static unsigned getFPReg(const MachineOperand &MO) {
331  assert(MO.isReg() && "Expected an FP register!");
332  unsigned Reg = MO.getReg();
333  assert(Reg >= X86::FP0 && Reg <= X86::FP6 && "Expected FP register!");
334  return Reg - X86::FP0;
335 }
336 
337 /// runOnMachineFunction - Loop over all of the basic blocks, transforming FP
338 /// register references into FP stack references.
339 ///
340 bool FPS::runOnMachineFunction(MachineFunction &MF) {
341  // We only need to run this pass if there are any FP registers used in this
342  // function. If it is all integer, there is nothing for us to do!
343  bool FPIsUsed = false;
344 
345  assert(X86::FP6 == X86::FP0+6 && "Register enums aren't sorted right!");
346  for (unsigned i = 0; i <= 6; ++i)
347  if (MF.getRegInfo().isPhysRegUsed(X86::FP0+i)) {
348  FPIsUsed = true;
349  break;
350  }
351 
352  // Early exit.
353  if (!FPIsUsed) return false;
354 
355  Bundles = &getAnalysis<EdgeBundles>();
356  TII = MF.getTarget().getInstrInfo();
357 
358  // Prepare cross-MBB liveness.
359  bundleCFG(MF);
360 
361  StackTop = 0;
362 
363  // Process the function in depth first order so that we process at least one
364  // of the predecessors for every reachable block in the function.
366  MachineBasicBlock *Entry = MF.begin();
367 
368  bool Changed = false;
370  I = df_ext_begin(Entry, Processed), E = df_ext_end(Entry, Processed);
371  I != E; ++I)
372  Changed |= processBasicBlock(MF, **I);
373 
374  // Process any unreachable blocks in arbitrary order now.
375  if (MF.size() != Processed.size())
376  for (MachineFunction::iterator BB = MF.begin(), E = MF.end(); BB != E; ++BB)
377  if (Processed.insert(BB))
378  Changed |= processBasicBlock(MF, *BB);
379 
380  LiveBundles.clear();
381 
382  return Changed;
383 }
384 
385 /// bundleCFG - Scan all the basic blocks to determine consistent live-in and
386 /// live-out sets for the FP registers. Consistent means that the set of
387 /// registers live-out from a block is identical to the live-in set of all
388 /// successors. This is not enforced by the normal live-in lists since
389 /// registers may be implicitly defined, or not used by all successors.
390 void FPS::bundleCFG(MachineFunction &MF) {
391  assert(LiveBundles.empty() && "Stale data in LiveBundles");
392  LiveBundles.resize(Bundles->getNumBundles());
393 
394  // Gather the actual live-in masks for all MBBs.
395  for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) {
396  MachineBasicBlock *MBB = I;
397  const unsigned Mask = calcLiveInMask(MBB);
398  if (!Mask)
399  continue;
400  // Update MBB ingoing bundle mask.
401  LiveBundles[Bundles->getBundle(MBB->getNumber(), false)].Mask |= Mask;
402  }
403 }
404 
405 /// processBasicBlock - Loop over all of the instructions in the basic block,
406 /// transforming FP instructions into their stack form.
407 ///
408 bool FPS::processBasicBlock(MachineFunction &MF, MachineBasicBlock &BB) {
409  bool Changed = false;
410  MBB = &BB;
411  NumPendingSTs = 0;
412 
413  setupBlockStack();
414 
415  for (MachineBasicBlock::iterator I = BB.begin(); I != BB.end(); ++I) {
416  MachineInstr *MI = I;
417  uint64_t Flags = MI->getDesc().TSFlags;
418 
419  unsigned FPInstClass = Flags & X86II::FPTypeMask;
420  if (MI->isInlineAsm())
421  FPInstClass = X86II::SpecialFP;
422 
423  if (MI->isCopy() && isFPCopy(MI))
424  FPInstClass = X86II::SpecialFP;
425 
426  if (MI->isImplicitDef() &&
427  X86::RFP80RegClass.contains(MI->getOperand(0).getReg()))
428  FPInstClass = X86II::SpecialFP;
429 
430  if (FPInstClass == X86II::NotFP)
431  continue; // Efficiently ignore non-fp insts!
432 
433  MachineInstr *PrevMI = 0;
434  if (I != BB.begin())
435  PrevMI = prior(I);
436 
437  ++NumFP; // Keep track of # of pseudo instrs
438  DEBUG(dbgs() << "\nFPInst:\t" << *MI);
439 
440  // Get dead variables list now because the MI pointer may be deleted as part
441  // of processing!
442  SmallVector<unsigned, 8> DeadRegs;
443  for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
444  const MachineOperand &MO = MI->getOperand(i);
445  if (MO.isReg() && MO.isDead())
446  DeadRegs.push_back(MO.getReg());
447  }
448 
449  switch (FPInstClass) {
450  case X86II::ZeroArgFP: handleZeroArgFP(I); break;
451  case X86II::OneArgFP: handleOneArgFP(I); break; // fstp ST(0)
452  case X86II::OneArgFPRW: handleOneArgFPRW(I); break; // ST(0) = fsqrt(ST(0))
453  case X86II::TwoArgFP: handleTwoArgFP(I); break;
454  case X86II::CompareFP: handleCompareFP(I); break;
455  case X86II::CondMovFP: handleCondMovFP(I); break;
456  case X86II::SpecialFP: handleSpecialFP(I); break;
457  default: llvm_unreachable("Unknown FP Type!");
458  }
459 
460  // Check to see if any of the values defined by this instruction are dead
461  // after definition. If so, pop them.
462  for (unsigned i = 0, e = DeadRegs.size(); i != e; ++i) {
463  unsigned Reg = DeadRegs[i];
464  if (Reg >= X86::FP0 && Reg <= X86::FP6) {
465  DEBUG(dbgs() << "Register FP#" << Reg-X86::FP0 << " is dead!\n");
466  freeStackSlotAfter(I, Reg-X86::FP0);
467  }
468  }
469 
470  // Print out all of the instructions expanded to if -debug
471  DEBUG(
472  MachineBasicBlock::iterator PrevI(PrevMI);
473  if (I == PrevI) {
474  dbgs() << "Just deleted pseudo instruction\n";
475  } else {
477  // Rewind to first instruction newly inserted.
478  while (Start != BB.begin() && prior(Start) != PrevI) --Start;
479  dbgs() << "Inserted instructions:\n\t";
480  Start->print(dbgs(), &MF.getTarget());
481  while (++Start != llvm::next(I)) {}
482  }
483  dumpStack();
484  );
485  (void)PrevMI;
486 
487  Changed = true;
488  }
489 
490  finishBlockStack();
491 
492  return Changed;
493 }
494 
495 /// setupBlockStack - Use the live bundles to set up our model of the stack
496 /// to match predecessors' live out stack.
497 void FPS::setupBlockStack() {
498  DEBUG(dbgs() << "\nSetting up live-ins for BB#" << MBB->getNumber()
499  << " derived from " << MBB->getName() << ".\n");
500  StackTop = 0;
501  // Get the live-in bundle for MBB.
502  const LiveBundle &Bundle =
503  LiveBundles[Bundles->getBundle(MBB->getNumber(), false)];
504 
505  if (!Bundle.Mask) {
506  DEBUG(dbgs() << "Block has no FP live-ins.\n");
507  return;
508  }
509 
510  // Depth-first iteration should ensure that we always have an assigned stack.
511  assert(Bundle.isFixed() && "Reached block before any predecessors");
512 
513  // Push the fixed live-in registers.
514  for (unsigned i = Bundle.FixCount; i > 0; --i) {
515  MBB->addLiveIn(X86::ST0+i-1);
516  DEBUG(dbgs() << "Live-in st(" << (i-1) << "): %FP"
517  << unsigned(Bundle.FixStack[i-1]) << '\n');
518  pushReg(Bundle.FixStack[i-1]);
519  }
520 
521  // Kill off unwanted live-ins. This can happen with a critical edge.
522  // FIXME: We could keep these live registers around as zombies. They may need
523  // to be revived at the end of a short block. It might save a few instrs.
524  adjustLiveRegs(calcLiveInMask(MBB), MBB->begin());
525  DEBUG(MBB->dump());
526 }
527 
528 /// finishBlockStack - Revive live-outs that are implicitly defined out of
529 /// MBB. Shuffle live registers to match the expected fixed stack of any
530 /// predecessors, and ensure that all predecessors are expecting the same
531 /// stack.
532 void FPS::finishBlockStack() {
533  // The RET handling below takes care of return blocks for us.
534  if (MBB->succ_empty())
535  return;
536 
537  DEBUG(dbgs() << "Setting up live-outs for BB#" << MBB->getNumber()
538  << " derived from " << MBB->getName() << ".\n");
539 
540  // Get MBB's live-out bundle.
541  unsigned BundleIdx = Bundles->getBundle(MBB->getNumber(), true);
542  LiveBundle &Bundle = LiveBundles[BundleIdx];
543 
544  // We may need to kill and define some registers to match successors.
545  // FIXME: This can probably be combined with the shuffle below.
547  adjustLiveRegs(Bundle.Mask, Term);
548 
549  if (!Bundle.Mask) {
550  DEBUG(dbgs() << "No live-outs.\n");
551  return;
552  }
553 
554  // Has the stack order been fixed yet?
555  DEBUG(dbgs() << "LB#" << BundleIdx << ": ");
556  if (Bundle.isFixed()) {
557  DEBUG(dbgs() << "Shuffling stack to match.\n");
558  shuffleStackTop(Bundle.FixStack, Bundle.FixCount, Term);
559  } else {
560  // Not fixed yet, we get to choose.
561  DEBUG(dbgs() << "Fixing stack order now.\n");
562  Bundle.FixCount = StackTop;
563  for (unsigned i = 0; i < StackTop; ++i)
564  Bundle.FixStack[i] = getStackEntry(i);
565  }
566 }
567 
568 
569 //===----------------------------------------------------------------------===//
570 // Efficient Lookup Table Support
571 //===----------------------------------------------------------------------===//
572 
573 namespace {
574  struct TableEntry {
575  uint16_t from;
576  uint16_t to;
577  bool operator<(const TableEntry &TE) const { return from < TE.from; }
578  friend bool operator<(const TableEntry &TE, unsigned V) {
579  return TE.from < V;
580  }
581  friend bool LLVM_ATTRIBUTE_UNUSED operator<(unsigned V,
582  const TableEntry &TE) {
583  return V < TE.from;
584  }
585  };
586 }
587 
588 #ifndef NDEBUG
589 static bool TableIsSorted(const TableEntry *Table, unsigned NumEntries) {
590  for (unsigned i = 0; i != NumEntries-1; ++i)
591  if (!(Table[i] < Table[i+1])) return false;
592  return true;
593 }
594 #endif
595 
596 static int Lookup(const TableEntry *Table, unsigned N, unsigned Opcode) {
597  const TableEntry *I = std::lower_bound(Table, Table+N, Opcode);
598  if (I != Table+N && I->from == Opcode)
599  return I->to;
600  return -1;
601 }
602 
603 #ifdef NDEBUG
604 #define ASSERT_SORTED(TABLE)
605 #else
606 #define ASSERT_SORTED(TABLE) \
607  { static bool TABLE##Checked = false; \
608  if (!TABLE##Checked) { \
609  assert(TableIsSorted(TABLE, array_lengthof(TABLE)) && \
610  "All lookup tables must be sorted for efficient access!"); \
611  TABLE##Checked = true; \
612  } \
613  }
614 #endif
615 
616 //===----------------------------------------------------------------------===//
617 // Register File -> Register Stack Mapping Methods
618 //===----------------------------------------------------------------------===//
619 
620 // OpcodeTable - Sorted map of register instructions to their stack version.
621 // The first element is an register file pseudo instruction, the second is the
622 // concrete X86 instruction which uses the register stack.
623 //
624 static const TableEntry OpcodeTable[] = {
625  { X86::ABS_Fp32 , X86::ABS_F },
626  { X86::ABS_Fp64 , X86::ABS_F },
627  { X86::ABS_Fp80 , X86::ABS_F },
628  { X86::ADD_Fp32m , X86::ADD_F32m },
629  { X86::ADD_Fp64m , X86::ADD_F64m },
630  { X86::ADD_Fp64m32 , X86::ADD_F32m },
631  { X86::ADD_Fp80m32 , X86::ADD_F32m },
632  { X86::ADD_Fp80m64 , X86::ADD_F64m },
633  { X86::ADD_FpI16m32 , X86::ADD_FI16m },
634  { X86::ADD_FpI16m64 , X86::ADD_FI16m },
635  { X86::ADD_FpI16m80 , X86::ADD_FI16m },
636  { X86::ADD_FpI32m32 , X86::ADD_FI32m },
637  { X86::ADD_FpI32m64 , X86::ADD_FI32m },
638  { X86::ADD_FpI32m80 , X86::ADD_FI32m },
639  { X86::CHS_Fp32 , X86::CHS_F },
640  { X86::CHS_Fp64 , X86::CHS_F },
641  { X86::CHS_Fp80 , X86::CHS_F },
642  { X86::CMOVBE_Fp32 , X86::CMOVBE_F },
643  { X86::CMOVBE_Fp64 , X86::CMOVBE_F },
644  { X86::CMOVBE_Fp80 , X86::CMOVBE_F },
645  { X86::CMOVB_Fp32 , X86::CMOVB_F },
646  { X86::CMOVB_Fp64 , X86::CMOVB_F },
647  { X86::CMOVB_Fp80 , X86::CMOVB_F },
648  { X86::CMOVE_Fp32 , X86::CMOVE_F },
649  { X86::CMOVE_Fp64 , X86::CMOVE_F },
650  { X86::CMOVE_Fp80 , X86::CMOVE_F },
651  { X86::CMOVNBE_Fp32 , X86::CMOVNBE_F },
652  { X86::CMOVNBE_Fp64 , X86::CMOVNBE_F },
653  { X86::CMOVNBE_Fp80 , X86::CMOVNBE_F },
654  { X86::CMOVNB_Fp32 , X86::CMOVNB_F },
655  { X86::CMOVNB_Fp64 , X86::CMOVNB_F },
656  { X86::CMOVNB_Fp80 , X86::CMOVNB_F },
657  { X86::CMOVNE_Fp32 , X86::CMOVNE_F },
658  { X86::CMOVNE_Fp64 , X86::CMOVNE_F },
659  { X86::CMOVNE_Fp80 , X86::CMOVNE_F },
660  { X86::CMOVNP_Fp32 , X86::CMOVNP_F },
661  { X86::CMOVNP_Fp64 , X86::CMOVNP_F },
662  { X86::CMOVNP_Fp80 , X86::CMOVNP_F },
663  { X86::CMOVP_Fp32 , X86::CMOVP_F },
664  { X86::CMOVP_Fp64 , X86::CMOVP_F },
665  { X86::CMOVP_Fp80 , X86::CMOVP_F },
666  { X86::COS_Fp32 , X86::COS_F },
667  { X86::COS_Fp64 , X86::COS_F },
668  { X86::COS_Fp80 , X86::COS_F },
669  { X86::DIVR_Fp32m , X86::DIVR_F32m },
670  { X86::DIVR_Fp64m , X86::DIVR_F64m },
671  { X86::DIVR_Fp64m32 , X86::DIVR_F32m },
672  { X86::DIVR_Fp80m32 , X86::DIVR_F32m },
673  { X86::DIVR_Fp80m64 , X86::DIVR_F64m },
674  { X86::DIVR_FpI16m32, X86::DIVR_FI16m},
675  { X86::DIVR_FpI16m64, X86::DIVR_FI16m},
676  { X86::DIVR_FpI16m80, X86::DIVR_FI16m},
677  { X86::DIVR_FpI32m32, X86::DIVR_FI32m},
678  { X86::DIVR_FpI32m64, X86::DIVR_FI32m},
679  { X86::DIVR_FpI32m80, X86::DIVR_FI32m},
680  { X86::DIV_Fp32m , X86::DIV_F32m },
681  { X86::DIV_Fp64m , X86::DIV_F64m },
682  { X86::DIV_Fp64m32 , X86::DIV_F32m },
683  { X86::DIV_Fp80m32 , X86::DIV_F32m },
684  { X86::DIV_Fp80m64 , X86::DIV_F64m },
685  { X86::DIV_FpI16m32 , X86::DIV_FI16m },
686  { X86::DIV_FpI16m64 , X86::DIV_FI16m },
687  { X86::DIV_FpI16m80 , X86::DIV_FI16m },
688  { X86::DIV_FpI32m32 , X86::DIV_FI32m },
689  { X86::DIV_FpI32m64 , X86::DIV_FI32m },
690  { X86::DIV_FpI32m80 , X86::DIV_FI32m },
691  { X86::ILD_Fp16m32 , X86::ILD_F16m },
692  { X86::ILD_Fp16m64 , X86::ILD_F16m },
693  { X86::ILD_Fp16m80 , X86::ILD_F16m },
694  { X86::ILD_Fp32m32 , X86::ILD_F32m },
695  { X86::ILD_Fp32m64 , X86::ILD_F32m },
696  { X86::ILD_Fp32m80 , X86::ILD_F32m },
697  { X86::ILD_Fp64m32 , X86::ILD_F64m },
698  { X86::ILD_Fp64m64 , X86::ILD_F64m },
699  { X86::ILD_Fp64m80 , X86::ILD_F64m },
700  { X86::ISTT_Fp16m32 , X86::ISTT_FP16m},
701  { X86::ISTT_Fp16m64 , X86::ISTT_FP16m},
702  { X86::ISTT_Fp16m80 , X86::ISTT_FP16m},
703  { X86::ISTT_Fp32m32 , X86::ISTT_FP32m},
704  { X86::ISTT_Fp32m64 , X86::ISTT_FP32m},
705  { X86::ISTT_Fp32m80 , X86::ISTT_FP32m},
706  { X86::ISTT_Fp64m32 , X86::ISTT_FP64m},
707  { X86::ISTT_Fp64m64 , X86::ISTT_FP64m},
708  { X86::ISTT_Fp64m80 , X86::ISTT_FP64m},
709  { X86::IST_Fp16m32 , X86::IST_F16m },
710  { X86::IST_Fp16m64 , X86::IST_F16m },
711  { X86::IST_Fp16m80 , X86::IST_F16m },
712  { X86::IST_Fp32m32 , X86::IST_F32m },
713  { X86::IST_Fp32m64 , X86::IST_F32m },
714  { X86::IST_Fp32m80 , X86::IST_F32m },
715  { X86::IST_Fp64m32 , X86::IST_FP64m },
716  { X86::IST_Fp64m64 , X86::IST_FP64m },
717  { X86::IST_Fp64m80 , X86::IST_FP64m },
718  { X86::LD_Fp032 , X86::LD_F0 },
719  { X86::LD_Fp064 , X86::LD_F0 },
720  { X86::LD_Fp080 , X86::LD_F0 },
721  { X86::LD_Fp132 , X86::LD_F1 },
722  { X86::LD_Fp164 , X86::LD_F1 },
723  { X86::LD_Fp180 , X86::LD_F1 },
724  { X86::LD_Fp32m , X86::LD_F32m },
725  { X86::LD_Fp32m64 , X86::LD_F32m },
726  { X86::LD_Fp32m80 , X86::LD_F32m },
727  { X86::LD_Fp64m , X86::LD_F64m },
728  { X86::LD_Fp64m80 , X86::LD_F64m },
729  { X86::LD_Fp80m , X86::LD_F80m },
730  { X86::MUL_Fp32m , X86::MUL_F32m },
731  { X86::MUL_Fp64m , X86::MUL_F64m },
732  { X86::MUL_Fp64m32 , X86::MUL_F32m },
733  { X86::MUL_Fp80m32 , X86::MUL_F32m },
734  { X86::MUL_Fp80m64 , X86::MUL_F64m },
735  { X86::MUL_FpI16m32 , X86::MUL_FI16m },
736  { X86::MUL_FpI16m64 , X86::MUL_FI16m },
737  { X86::MUL_FpI16m80 , X86::MUL_FI16m },
738  { X86::MUL_FpI32m32 , X86::MUL_FI32m },
739  { X86::MUL_FpI32m64 , X86::MUL_FI32m },
740  { X86::MUL_FpI32m80 , X86::MUL_FI32m },
741  { X86::SIN_Fp32 , X86::SIN_F },
742  { X86::SIN_Fp64 , X86::SIN_F },
743  { X86::SIN_Fp80 , X86::SIN_F },
744  { X86::SQRT_Fp32 , X86::SQRT_F },
745  { X86::SQRT_Fp64 , X86::SQRT_F },
746  { X86::SQRT_Fp80 , X86::SQRT_F },
747  { X86::ST_Fp32m , X86::ST_F32m },
748  { X86::ST_Fp64m , X86::ST_F64m },
749  { X86::ST_Fp64m32 , X86::ST_F32m },
750  { X86::ST_Fp80m32 , X86::ST_F32m },
751  { X86::ST_Fp80m64 , X86::ST_F64m },
752  { X86::ST_FpP80m , X86::ST_FP80m },
753  { X86::SUBR_Fp32m , X86::SUBR_F32m },
754  { X86::SUBR_Fp64m , X86::SUBR_F64m },
755  { X86::SUBR_Fp64m32 , X86::SUBR_F32m },
756  { X86::SUBR_Fp80m32 , X86::SUBR_F32m },
757  { X86::SUBR_Fp80m64 , X86::SUBR_F64m },
758  { X86::SUBR_FpI16m32, X86::SUBR_FI16m},
759  { X86::SUBR_FpI16m64, X86::SUBR_FI16m},
760  { X86::SUBR_FpI16m80, X86::SUBR_FI16m},
761  { X86::SUBR_FpI32m32, X86::SUBR_FI32m},
762  { X86::SUBR_FpI32m64, X86::SUBR_FI32m},
763  { X86::SUBR_FpI32m80, X86::SUBR_FI32m},
764  { X86::SUB_Fp32m , X86::SUB_F32m },
765  { X86::SUB_Fp64m , X86::SUB_F64m },
766  { X86::SUB_Fp64m32 , X86::SUB_F32m },
767  { X86::SUB_Fp80m32 , X86::SUB_F32m },
768  { X86::SUB_Fp80m64 , X86::SUB_F64m },
769  { X86::SUB_FpI16m32 , X86::SUB_FI16m },
770  { X86::SUB_FpI16m64 , X86::SUB_FI16m },
771  { X86::SUB_FpI16m80 , X86::SUB_FI16m },
772  { X86::SUB_FpI32m32 , X86::SUB_FI32m },
773  { X86::SUB_FpI32m64 , X86::SUB_FI32m },
774  { X86::SUB_FpI32m80 , X86::SUB_FI32m },
775  { X86::TST_Fp32 , X86::TST_F },
776  { X86::TST_Fp64 , X86::TST_F },
777  { X86::TST_Fp80 , X86::TST_F },
778  { X86::UCOM_FpIr32 , X86::UCOM_FIr },
779  { X86::UCOM_FpIr64 , X86::UCOM_FIr },
780  { X86::UCOM_FpIr80 , X86::UCOM_FIr },
781  { X86::UCOM_Fpr32 , X86::UCOM_Fr },
782  { X86::UCOM_Fpr64 , X86::UCOM_Fr },
783  { X86::UCOM_Fpr80 , X86::UCOM_Fr },
784 };
785 
786 static unsigned getConcreteOpcode(unsigned Opcode) {
788  int Opc = Lookup(OpcodeTable, array_lengthof(OpcodeTable), Opcode);
789  assert(Opc != -1 && "FP Stack instruction not in OpcodeTable!");
790  return Opc;
791 }
792 
793 //===----------------------------------------------------------------------===//
794 // Helper Methods
795 //===----------------------------------------------------------------------===//
796 
797 // PopTable - Sorted map of instructions to their popping version. The first
798 // element is an instruction, the second is the version which pops.
799 //
800 static const TableEntry PopTable[] = {
801  { X86::ADD_FrST0 , X86::ADD_FPrST0 },
802 
803  { X86::DIVR_FrST0, X86::DIVR_FPrST0 },
804  { X86::DIV_FrST0 , X86::DIV_FPrST0 },
805 
806  { X86::IST_F16m , X86::IST_FP16m },
807  { X86::IST_F32m , X86::IST_FP32m },
808 
809  { X86::MUL_FrST0 , X86::MUL_FPrST0 },
810 
811  { X86::ST_F32m , X86::ST_FP32m },
812  { X86::ST_F64m , X86::ST_FP64m },
813  { X86::ST_Frr , X86::ST_FPrr },
814 
815  { X86::SUBR_FrST0, X86::SUBR_FPrST0 },
816  { X86::SUB_FrST0 , X86::SUB_FPrST0 },
817 
818  { X86::UCOM_FIr , X86::UCOM_FIPr },
819 
820  { X86::UCOM_FPr , X86::UCOM_FPPr },
821  { X86::UCOM_Fr , X86::UCOM_FPr },
822 };
823 
824 /// popStackAfter - Pop the current value off of the top of the FP stack after
825 /// the specified instruction. This attempts to be sneaky and combine the pop
826 /// into the instruction itself if possible. The iterator is left pointing to
827 /// the last instruction, be it a new pop instruction inserted, or the old
828 /// instruction if it was modified in place.
829 ///
830 void FPS::popStackAfter(MachineBasicBlock::iterator &I) {
831  MachineInstr* MI = I;
832  DebugLoc dl = MI->getDebugLoc();
834  if (StackTop == 0)
835  report_fatal_error("Cannot pop empty stack!");
836  RegMap[Stack[--StackTop]] = ~0; // Update state
837 
838  // Check to see if there is a popping version of this instruction...
839  int Opcode = Lookup(PopTable, array_lengthof(PopTable), I->getOpcode());
840  if (Opcode != -1) {
841  I->setDesc(TII->get(Opcode));
842  if (Opcode == X86::UCOM_FPPr)
843  I->RemoveOperand(0);
844  } else { // Insert an explicit pop
845  I = BuildMI(*MBB, ++I, dl, TII->get(X86::ST_FPrr)).addReg(X86::ST0);
846  }
847 }
848 
849 /// freeStackSlotAfter - Free the specified register from the register stack, so
850 /// that it is no longer in a register. If the register is currently at the top
851 /// of the stack, we just pop the current instruction, otherwise we store the
852 /// current top-of-stack into the specified slot, then pop the top of stack.
853 void FPS::freeStackSlotAfter(MachineBasicBlock::iterator &I, unsigned FPRegNo) {
854  if (getStackEntry(0) == FPRegNo) { // already at the top of stack? easy.
855  popStackAfter(I);
856  return;
857  }
858 
859  // Otherwise, store the top of stack into the dead slot, killing the operand
860  // without having to add in an explicit xchg then pop.
861  //
862  I = freeStackSlotBefore(++I, FPRegNo);
863 }
864 
865 /// freeStackSlotBefore - Free the specified register without trying any
866 /// folding.
868 FPS::freeStackSlotBefore(MachineBasicBlock::iterator I, unsigned FPRegNo) {
869  unsigned STReg = getSTReg(FPRegNo);
870  unsigned OldSlot = getSlot(FPRegNo);
871  unsigned TopReg = Stack[StackTop-1];
872  Stack[OldSlot] = TopReg;
873  RegMap[TopReg] = OldSlot;
874  RegMap[FPRegNo] = ~0;
875  Stack[--StackTop] = ~0;
876  return BuildMI(*MBB, I, DebugLoc(), TII->get(X86::ST_FPrr)).addReg(STReg);
877 }
878 
879 /// adjustLiveRegs - Kill and revive registers such that exactly the FP
880 /// registers with a bit in Mask are live.
881 void FPS::adjustLiveRegs(unsigned Mask, MachineBasicBlock::iterator I) {
882  unsigned Defs = Mask;
883  unsigned Kills = 0;
884  for (unsigned i = 0; i < StackTop; ++i) {
885  unsigned RegNo = Stack[i];
886  if (!(Defs & (1 << RegNo)))
887  // This register is live, but we don't want it.
888  Kills |= (1 << RegNo);
889  else
890  // We don't need to imp-def this live register.
891  Defs &= ~(1 << RegNo);
892  }
893  assert((Kills & Defs) == 0 && "Register needs killing and def'ing?");
894 
895  // Produce implicit-defs for free by using killed registers.
896  while (Kills && Defs) {
897  unsigned KReg = countTrailingZeros(Kills);
898  unsigned DReg = countTrailingZeros(Defs);
899  DEBUG(dbgs() << "Renaming %FP" << KReg << " as imp %FP" << DReg << "\n");
900  std::swap(Stack[getSlot(KReg)], Stack[getSlot(DReg)]);
901  std::swap(RegMap[KReg], RegMap[DReg]);
902  Kills &= ~(1 << KReg);
903  Defs &= ~(1 << DReg);
904  }
905 
906  // Kill registers by popping.
907  if (Kills && I != MBB->begin()) {
909  while (StackTop) {
910  unsigned KReg = getStackEntry(0);
911  if (!(Kills & (1 << KReg)))
912  break;
913  DEBUG(dbgs() << "Popping %FP" << KReg << "\n");
914  popStackAfter(I2);
915  Kills &= ~(1 << KReg);
916  }
917  }
918 
919  // Manually kill the rest.
920  while (Kills) {
921  unsigned KReg = countTrailingZeros(Kills);
922  DEBUG(dbgs() << "Killing %FP" << KReg << "\n");
923  freeStackSlotBefore(I, KReg);
924  Kills &= ~(1 << KReg);
925  }
926 
927  // Load zeros for all the imp-defs.
928  while(Defs) {
929  unsigned DReg = countTrailingZeros(Defs);
930  DEBUG(dbgs() << "Defining %FP" << DReg << " as 0\n");
931  BuildMI(*MBB, I, DebugLoc(), TII->get(X86::LD_F0));
932  pushReg(DReg);
933  Defs &= ~(1 << DReg);
934  }
935 
936  // Now we should have the correct registers live.
937  DEBUG(dumpStack());
938  assert(StackTop == CountPopulation_32(Mask) && "Live count mismatch");
939 }
940 
941 /// shuffleStackTop - emit fxch instructions before I to shuffle the top
942 /// FixCount entries into the order given by FixStack.
943 /// FIXME: Is there a better algorithm than insertion sort?
944 void FPS::shuffleStackTop(const unsigned char *FixStack,
945  unsigned FixCount,
947  // Move items into place, starting from the desired stack bottom.
948  while (FixCount--) {
949  // Old register at position FixCount.
950  unsigned OldReg = getStackEntry(FixCount);
951  // Desired register at position FixCount.
952  unsigned Reg = FixStack[FixCount];
953  if (Reg == OldReg)
954  continue;
955  // (Reg st0) (OldReg st0) = (Reg OldReg st0)
956  moveToTop(Reg, I);
957  if (FixCount > 0)
958  moveToTop(OldReg, I);
959  }
960  DEBUG(dumpStack());
961 }
962 
963 
964 //===----------------------------------------------------------------------===//
965 // Instruction transformation implementation
966 //===----------------------------------------------------------------------===//
967 
968 /// handleZeroArgFP - ST(0) = fld0 ST(0) = flds <mem>
969 ///
970 void FPS::handleZeroArgFP(MachineBasicBlock::iterator &I) {
971  MachineInstr *MI = I;
972  unsigned DestReg = getFPReg(MI->getOperand(0));
973 
974  // Change from the pseudo instruction to the concrete instruction.
975  MI->RemoveOperand(0); // Remove the explicit ST(0) operand
976  MI->setDesc(TII->get(getConcreteOpcode(MI->getOpcode())));
977 
978  // Result gets pushed on the stack.
979  pushReg(DestReg);
980 }
981 
982 /// handleOneArgFP - fst <mem>, ST(0)
983 ///
984 void FPS::handleOneArgFP(MachineBasicBlock::iterator &I) {
985  MachineInstr *MI = I;
986  unsigned NumOps = MI->getDesc().getNumOperands();
987  assert((NumOps == X86::AddrNumOperands + 1 || NumOps == 1) &&
988  "Can only handle fst* & ftst instructions!");
989 
990  // Is this the last use of the source register?
991  unsigned Reg = getFPReg(MI->getOperand(NumOps-1));
992  bool KillsSrc = MI->killsRegister(X86::FP0+Reg);
993 
994  if (KillsSrc)
995  duplicatePendingSTBeforeKill(Reg, I);
996 
997  // FISTP64m is strange because there isn't a non-popping versions.
998  // If we have one _and_ we don't want to pop the operand, duplicate the value
999  // on the stack instead of moving it. This ensure that popping the value is
1000  // always ok.
1001  // Ditto FISTTP16m, FISTTP32m, FISTTP64m, ST_FpP80m.
1002  //
1003  if (!KillsSrc &&
1004  (MI->getOpcode() == X86::IST_Fp64m32 ||
1005  MI->getOpcode() == X86::ISTT_Fp16m32 ||
1006  MI->getOpcode() == X86::ISTT_Fp32m32 ||
1007  MI->getOpcode() == X86::ISTT_Fp64m32 ||
1008  MI->getOpcode() == X86::IST_Fp64m64 ||
1009  MI->getOpcode() == X86::ISTT_Fp16m64 ||
1010  MI->getOpcode() == X86::ISTT_Fp32m64 ||
1011  MI->getOpcode() == X86::ISTT_Fp64m64 ||
1012  MI->getOpcode() == X86::IST_Fp64m80 ||
1013  MI->getOpcode() == X86::ISTT_Fp16m80 ||
1014  MI->getOpcode() == X86::ISTT_Fp32m80 ||
1015  MI->getOpcode() == X86::ISTT_Fp64m80 ||
1016  MI->getOpcode() == X86::ST_FpP80m)) {
1017  duplicateToTop(Reg, getScratchReg(), I);
1018  } else {
1019  moveToTop(Reg, I); // Move to the top of the stack...
1020  }
1021 
1022  // Convert from the pseudo instruction to the concrete instruction.
1023  MI->RemoveOperand(NumOps-1); // Remove explicit ST(0) operand
1024  MI->setDesc(TII->get(getConcreteOpcode(MI->getOpcode())));
1025 
1026  if (MI->getOpcode() == X86::IST_FP64m ||
1027  MI->getOpcode() == X86::ISTT_FP16m ||
1028  MI->getOpcode() == X86::ISTT_FP32m ||
1029  MI->getOpcode() == X86::ISTT_FP64m ||
1030  MI->getOpcode() == X86::ST_FP80m) {
1031  if (StackTop == 0)
1032  report_fatal_error("Stack empty??");
1033  --StackTop;
1034  } else if (KillsSrc) { // Last use of operand?
1035  popStackAfter(I);
1036  }
1037 }
1038 
1039 
1040 /// handleOneArgFPRW: Handle instructions that read from the top of stack and
1041 /// replace the value with a newly computed value. These instructions may have
1042 /// non-fp operands after their FP operands.
1043 ///
1044 /// Examples:
1045 /// R1 = fchs R2
1046 /// R1 = fadd R2, [mem]
1047 ///
1048 void FPS::handleOneArgFPRW(MachineBasicBlock::iterator &I) {
1049  MachineInstr *MI = I;
1050 #ifndef NDEBUG
1051  unsigned NumOps = MI->getDesc().getNumOperands();
1052  assert(NumOps >= 2 && "FPRW instructions must have 2 ops!!");
1053 #endif
1054 
1055  // Is this the last use of the source register?
1056  unsigned Reg = getFPReg(MI->getOperand(1));
1057  bool KillsSrc = MI->killsRegister(X86::FP0+Reg);
1058 
1059  if (KillsSrc) {
1060  duplicatePendingSTBeforeKill(Reg, I);
1061  // If this is the last use of the source register, just make sure it's on
1062  // the top of the stack.
1063  moveToTop(Reg, I);
1064  if (StackTop == 0)
1065  report_fatal_error("Stack cannot be empty!");
1066  --StackTop;
1067  pushReg(getFPReg(MI->getOperand(0)));
1068  } else {
1069  // If this is not the last use of the source register, _copy_ it to the top
1070  // of the stack.
1071  duplicateToTop(Reg, getFPReg(MI->getOperand(0)), I);
1072  }
1073 
1074  // Change from the pseudo instruction to the concrete instruction.
1075  MI->RemoveOperand(1); // Drop the source operand.
1076  MI->RemoveOperand(0); // Drop the destination operand.
1077  MI->setDesc(TII->get(getConcreteOpcode(MI->getOpcode())));
1078 }
1079 
1080 
1081 //===----------------------------------------------------------------------===//
1082 // Define tables of various ways to map pseudo instructions
1083 //
1084 
1085 // ForwardST0Table - Map: A = B op C into: ST(0) = ST(0) op ST(i)
1086 static const TableEntry ForwardST0Table[] = {
1087  { X86::ADD_Fp32 , X86::ADD_FST0r },
1088  { X86::ADD_Fp64 , X86::ADD_FST0r },
1089  { X86::ADD_Fp80 , X86::ADD_FST0r },
1090  { X86::DIV_Fp32 , X86::DIV_FST0r },
1091  { X86::DIV_Fp64 , X86::DIV_FST0r },
1092  { X86::DIV_Fp80 , X86::DIV_FST0r },
1093  { X86::MUL_Fp32 , X86::MUL_FST0r },
1094  { X86::MUL_Fp64 , X86::MUL_FST0r },
1095  { X86::MUL_Fp80 , X86::MUL_FST0r },
1096  { X86::SUB_Fp32 , X86::SUB_FST0r },
1097  { X86::SUB_Fp64 , X86::SUB_FST0r },
1098  { X86::SUB_Fp80 , X86::SUB_FST0r },
1099 };
1100 
1101 // ReverseST0Table - Map: A = B op C into: ST(0) = ST(i) op ST(0)
1102 static const TableEntry ReverseST0Table[] = {
1103  { X86::ADD_Fp32 , X86::ADD_FST0r }, // commutative
1104  { X86::ADD_Fp64 , X86::ADD_FST0r }, // commutative
1105  { X86::ADD_Fp80 , X86::ADD_FST0r }, // commutative
1106  { X86::DIV_Fp32 , X86::DIVR_FST0r },
1107  { X86::DIV_Fp64 , X86::DIVR_FST0r },
1108  { X86::DIV_Fp80 , X86::DIVR_FST0r },
1109  { X86::MUL_Fp32 , X86::MUL_FST0r }, // commutative
1110  { X86::MUL_Fp64 , X86::MUL_FST0r }, // commutative
1111  { X86::MUL_Fp80 , X86::MUL_FST0r }, // commutative
1112  { X86::SUB_Fp32 , X86::SUBR_FST0r },
1113  { X86::SUB_Fp64 , X86::SUBR_FST0r },
1114  { X86::SUB_Fp80 , X86::SUBR_FST0r },
1115 };
1116 
1117 // ForwardSTiTable - Map: A = B op C into: ST(i) = ST(0) op ST(i)
1118 static const TableEntry ForwardSTiTable[] = {
1119  { X86::ADD_Fp32 , X86::ADD_FrST0 }, // commutative
1120  { X86::ADD_Fp64 , X86::ADD_FrST0 }, // commutative
1121  { X86::ADD_Fp80 , X86::ADD_FrST0 }, // commutative
1122  { X86::DIV_Fp32 , X86::DIVR_FrST0 },
1123  { X86::DIV_Fp64 , X86::DIVR_FrST0 },
1124  { X86::DIV_Fp80 , X86::DIVR_FrST0 },
1125  { X86::MUL_Fp32 , X86::MUL_FrST0 }, // commutative
1126  { X86::MUL_Fp64 , X86::MUL_FrST0 }, // commutative
1127  { X86::MUL_Fp80 , X86::MUL_FrST0 }, // commutative
1128  { X86::SUB_Fp32 , X86::SUBR_FrST0 },
1129  { X86::SUB_Fp64 , X86::SUBR_FrST0 },
1130  { X86::SUB_Fp80 , X86::SUBR_FrST0 },
1131 };
1132 
1133 // ReverseSTiTable - Map: A = B op C into: ST(i) = ST(i) op ST(0)
1134 static const TableEntry ReverseSTiTable[] = {
1135  { X86::ADD_Fp32 , X86::ADD_FrST0 },
1136  { X86::ADD_Fp64 , X86::ADD_FrST0 },
1137  { X86::ADD_Fp80 , X86::ADD_FrST0 },
1138  { X86::DIV_Fp32 , X86::DIV_FrST0 },
1139  { X86::DIV_Fp64 , X86::DIV_FrST0 },
1140  { X86::DIV_Fp80 , X86::DIV_FrST0 },
1141  { X86::MUL_Fp32 , X86::MUL_FrST0 },
1142  { X86::MUL_Fp64 , X86::MUL_FrST0 },
1143  { X86::MUL_Fp80 , X86::MUL_FrST0 },
1144  { X86::SUB_Fp32 , X86::SUB_FrST0 },
1145  { X86::SUB_Fp64 , X86::SUB_FrST0 },
1146  { X86::SUB_Fp80 , X86::SUB_FrST0 },
1147 };
1148 
1149 
1150 /// handleTwoArgFP - Handle instructions like FADD and friends which are virtual
1151 /// instructions which need to be simplified and possibly transformed.
1152 ///
1153 /// Result: ST(0) = fsub ST(0), ST(i)
1154 /// ST(i) = fsub ST(0), ST(i)
1155 /// ST(0) = fsubr ST(0), ST(i)
1156 /// ST(i) = fsubr ST(0), ST(i)
1157 ///
1158 void FPS::handleTwoArgFP(MachineBasicBlock::iterator &I) {
1161  MachineInstr *MI = I;
1162 
1163  unsigned NumOperands = MI->getDesc().getNumOperands();
1164  assert(NumOperands == 3 && "Illegal TwoArgFP instruction!");
1165  unsigned Dest = getFPReg(MI->getOperand(0));
1166  unsigned Op0 = getFPReg(MI->getOperand(NumOperands-2));
1167  unsigned Op1 = getFPReg(MI->getOperand(NumOperands-1));
1168  bool KillsOp0 = MI->killsRegister(X86::FP0+Op0);
1169  bool KillsOp1 = MI->killsRegister(X86::FP0+Op1);
1170  DebugLoc dl = MI->getDebugLoc();
1171 
1172  unsigned TOS = getStackEntry(0);
1173 
1174  // One of our operands must be on the top of the stack. If neither is yet, we
1175  // need to move one.
1176  if (Op0 != TOS && Op1 != TOS) { // No operand at TOS?
1177  // We can choose to move either operand to the top of the stack. If one of
1178  // the operands is killed by this instruction, we want that one so that we
1179  // can update right on top of the old version.
1180  if (KillsOp0) {
1181  moveToTop(Op0, I); // Move dead operand to TOS.
1182  TOS = Op0;
1183  } else if (KillsOp1) {
1184  moveToTop(Op1, I);
1185  TOS = Op1;
1186  } else {
1187  // All of the operands are live after this instruction executes, so we
1188  // cannot update on top of any operand. Because of this, we must
1189  // duplicate one of the stack elements to the top. It doesn't matter
1190  // which one we pick.
1191  //
1192  duplicateToTop(Op0, Dest, I);
1193  Op0 = TOS = Dest;
1194  KillsOp0 = true;
1195  }
1196  } else if (!KillsOp0 && !KillsOp1) {
1197  // If we DO have one of our operands at the top of the stack, but we don't
1198  // have a dead operand, we must duplicate one of the operands to a new slot
1199  // on the stack.
1200  duplicateToTop(Op0, Dest, I);
1201  Op0 = TOS = Dest;
1202  KillsOp0 = true;
1203  }
1204 
1205  // Now we know that one of our operands is on the top of the stack, and at
1206  // least one of our operands is killed by this instruction.
1207  assert((TOS == Op0 || TOS == Op1) && (KillsOp0 || KillsOp1) &&
1208  "Stack conditions not set up right!");
1209 
1210  // We decide which form to use based on what is on the top of the stack, and
1211  // which operand is killed by this instruction.
1212  const TableEntry *InstTable;
1213  bool isForward = TOS == Op0;
1214  bool updateST0 = (TOS == Op0 && !KillsOp1) || (TOS == Op1 && !KillsOp0);
1215  if (updateST0) {
1216  if (isForward)
1217  InstTable = ForwardST0Table;
1218  else
1219  InstTable = ReverseST0Table;
1220  } else {
1221  if (isForward)
1222  InstTable = ForwardSTiTable;
1223  else
1224  InstTable = ReverseSTiTable;
1225  }
1226 
1227  int Opcode = Lookup(InstTable, array_lengthof(ForwardST0Table),
1228  MI->getOpcode());
1229  assert(Opcode != -1 && "Unknown TwoArgFP pseudo instruction!");
1230 
1231  // NotTOS - The register which is not on the top of stack...
1232  unsigned NotTOS = (TOS == Op0) ? Op1 : Op0;
1233 
1234  // Replace the old instruction with a new instruction
1235  MBB->remove(I++);
1236  I = BuildMI(*MBB, I, dl, TII->get(Opcode)).addReg(getSTReg(NotTOS));
1237 
1238  // If both operands are killed, pop one off of the stack in addition to
1239  // overwriting the other one.
1240  if (KillsOp0 && KillsOp1 && Op0 != Op1) {
1241  assert(!updateST0 && "Should have updated other operand!");
1242  popStackAfter(I); // Pop the top of stack
1243  }
1244 
1245  // Update stack information so that we know the destination register is now on
1246  // the stack.
1247  unsigned UpdatedSlot = getSlot(updateST0 ? TOS : NotTOS);
1248  assert(UpdatedSlot < StackTop && Dest < 7);
1249  Stack[UpdatedSlot] = Dest;
1250  RegMap[Dest] = UpdatedSlot;
1251  MBB->getParent()->DeleteMachineInstr(MI); // Remove the old instruction
1252 }
1253 
1254 /// handleCompareFP - Handle FUCOM and FUCOMI instructions, which have two FP
1255 /// register arguments and no explicit destinations.
1256 ///
1257 void FPS::handleCompareFP(MachineBasicBlock::iterator &I) {
1260  MachineInstr *MI = I;
1261 
1262  unsigned NumOperands = MI->getDesc().getNumOperands();
1263  assert(NumOperands == 2 && "Illegal FUCOM* instruction!");
1264  unsigned Op0 = getFPReg(MI->getOperand(NumOperands-2));
1265  unsigned Op1 = getFPReg(MI->getOperand(NumOperands-1));
1266  bool KillsOp0 = MI->killsRegister(X86::FP0+Op0);
1267  bool KillsOp1 = MI->killsRegister(X86::FP0+Op1);
1268 
1269  // Make sure the first operand is on the top of stack, the other one can be
1270  // anywhere.
1271  moveToTop(Op0, I);
1272 
1273  // Change from the pseudo instruction to the concrete instruction.
1274  MI->getOperand(0).setReg(getSTReg(Op1));
1275  MI->RemoveOperand(1);
1276  MI->setDesc(TII->get(getConcreteOpcode(MI->getOpcode())));
1277 
1278  // If any of the operands are killed by this instruction, free them.
1279  if (KillsOp0) freeStackSlotAfter(I, Op0);
1280  if (KillsOp1 && Op0 != Op1) freeStackSlotAfter(I, Op1);
1281 }
1282 
1283 /// handleCondMovFP - Handle two address conditional move instructions. These
1284 /// instructions move a st(i) register to st(0) iff a condition is true. These
1285 /// instructions require that the first operand is at the top of the stack, but
1286 /// otherwise don't modify the stack at all.
1287 void FPS::handleCondMovFP(MachineBasicBlock::iterator &I) {
1288  MachineInstr *MI = I;
1289 
1290  unsigned Op0 = getFPReg(MI->getOperand(0));
1291  unsigned Op1 = getFPReg(MI->getOperand(2));
1292  bool KillsOp1 = MI->killsRegister(X86::FP0+Op1);
1293 
1294  // The first operand *must* be on the top of the stack.
1295  moveToTop(Op0, I);
1296 
1297  // Change the second operand to the stack register that the operand is in.
1298  // Change from the pseudo instruction to the concrete instruction.
1299  MI->RemoveOperand(0);
1300  MI->RemoveOperand(1);
1301  MI->getOperand(0).setReg(getSTReg(Op1));
1302  MI->setDesc(TII->get(getConcreteOpcode(MI->getOpcode())));
1303 
1304  // If we kill the second operand, make sure to pop it from the stack.
1305  if (Op0 != Op1 && KillsOp1) {
1306  // Get this value off of the register stack.
1307  freeStackSlotAfter(I, Op1);
1308  }
1309 }
1310 
1311 
1312 /// handleSpecialFP - Handle special instructions which behave unlike other
1313 /// floating point instructions. This is primarily intended for use by pseudo
1314 /// instructions.
1315 ///
1316 void FPS::handleSpecialFP(MachineBasicBlock::iterator &I) {
1317  MachineInstr *MI = I;
1318  switch (MI->getOpcode()) {
1319  default: llvm_unreachable("Unknown SpecialFP instruction!");
1320  case TargetOpcode::COPY: {
1321  // We handle three kinds of copies: FP <- FP, FP <- ST, and ST <- FP.
1322  const MachineOperand &MO1 = MI->getOperand(1);
1323  const MachineOperand &MO0 = MI->getOperand(0);
1324  unsigned DstST = MO0.getReg() - X86::ST0;
1325  unsigned SrcST = MO1.getReg() - X86::ST0;
1326  bool KillsSrc = MI->killsRegister(MO1.getReg());
1327 
1328  // ST = COPY FP. Set up a pending ST register.
1329  if (DstST < 8) {
1330  unsigned SrcFP = getFPReg(MO1);
1331  assert(isLive(SrcFP) && "Cannot copy dead register");
1332  assert(!MO0.isDead() && "Cannot copy to dead ST register");
1333 
1334  // Unallocated STs are marked as the nonexistent FP255.
1335  while (NumPendingSTs <= DstST)
1336  PendingST[NumPendingSTs++] = NumFPRegs;
1337 
1338  // STi could still be live from a previous inline asm.
1339  if (isScratchReg(PendingST[DstST])) {
1340  DEBUG(dbgs() << "Clobbering old ST in FP" << unsigned(PendingST[DstST])
1341  << '\n');
1342  freeStackSlotBefore(MI, PendingST[DstST]);
1343  }
1344 
1345  // When the source is killed, allocate a scratch FP register.
1346  if (KillsSrc) {
1347  duplicatePendingSTBeforeKill(SrcFP, I);
1348  unsigned Slot = getSlot(SrcFP);
1349  unsigned SR = getScratchReg();
1350  PendingST[DstST] = SR;
1351  Stack[Slot] = SR;
1352  RegMap[SR] = Slot;
1353  } else
1354  PendingST[DstST] = SrcFP;
1355  break;
1356  }
1357 
1358  // FP = COPY ST. Extract fixed stack value.
1359  // Any instruction defining ST registers must have assigned them to a
1360  // scratch register.
1361  if (SrcST < 8) {
1362  unsigned DstFP = getFPReg(MO0);
1363  assert(!isLive(DstFP) && "Cannot copy ST to live FP register");
1364  assert(NumPendingSTs > SrcST && "Cannot copy from dead ST register");
1365  unsigned SrcFP = PendingST[SrcST];
1366  assert(isScratchReg(SrcFP) && "Expected ST in a scratch register");
1367  assert(isLive(SrcFP) && "Scratch holding ST is dead");
1368 
1369  // DstFP steals the stack slot from SrcFP.
1370  unsigned Slot = getSlot(SrcFP);
1371  Stack[Slot] = DstFP;
1372  RegMap[DstFP] = Slot;
1373 
1374  // Always treat the ST as killed.
1375  PendingST[SrcST] = NumFPRegs;
1376  while (NumPendingSTs && PendingST[NumPendingSTs - 1] == NumFPRegs)
1377  --NumPendingSTs;
1378  break;
1379  }
1380 
1381  // FP <- FP copy.
1382  unsigned DstFP = getFPReg(MO0);
1383  unsigned SrcFP = getFPReg(MO1);
1384  assert(isLive(SrcFP) && "Cannot copy dead register");
1385  if (KillsSrc) {
1386  // If the input operand is killed, we can just change the owner of the
1387  // incoming stack slot into the result.
1388  unsigned Slot = getSlot(SrcFP);
1389  Stack[Slot] = DstFP;
1390  RegMap[DstFP] = Slot;
1391  } else {
1392  // For COPY we just duplicate the specified value to a new stack slot.
1393  // This could be made better, but would require substantial changes.
1394  duplicateToTop(SrcFP, DstFP, I);
1395  }
1396  break;
1397  }
1398 
1400  // All FP registers must be explicitly defined, so load a 0 instead.
1401  unsigned Reg = MI->getOperand(0).getReg() - X86::FP0;
1402  DEBUG(dbgs() << "Emitting LD_F0 for implicit FP" << Reg << '\n');
1403  BuildMI(*MBB, I, MI->getDebugLoc(), TII->get(X86::LD_F0));
1404  pushReg(Reg);
1405  break;
1406  }
1407 
1408  case X86::FpPOP_RETVAL: {
1409  // The FpPOP_RETVAL instruction is used after calls that return a value on
1410  // the floating point stack. We cannot model this with ST defs since CALL
1411  // instructions have fixed clobber lists. This instruction is interpreted
1412  // to mean that there is one more live register on the stack than we
1413  // thought.
1414  //
1415  // This means that StackTop does not match the hardware stack between a
1416  // call and the FpPOP_RETVAL instructions. We do tolerate FP instructions
1417  // between CALL and FpPOP_RETVAL as long as they don't overflow the
1418  // hardware stack.
1419  unsigned DstFP = getFPReg(MI->getOperand(0));
1420 
1421  // Move existing stack elements up to reflect reality.
1422  assert(StackTop < 8 && "Stack overflowed before FpPOP_RETVAL");
1423  if (StackTop) {
1424  std::copy_backward(Stack, Stack + StackTop, Stack + StackTop + 1);
1425  for (unsigned i = 0; i != NumFPRegs; ++i)
1426  ++RegMap[i];
1427  }
1428  ++StackTop;
1429 
1430  // DstFP is the new bottom of the stack.
1431  Stack[0] = DstFP;
1432  RegMap[DstFP] = 0;
1433 
1434  // DstFP will be killed by processBasicBlock if this was a dead def.
1435  break;
1436  }
1437 
1438  case TargetOpcode::INLINEASM: {
1439  // The inline asm MachineInstr currently only *uses* FP registers for the
1440  // 'f' constraint. These should be turned into the current ST(x) register
1441  // in the machine instr.
1442  //
1443  // There are special rules for x87 inline assembly. The compiler must know
1444  // exactly how many registers are popped and pushed implicitly by the asm.
1445  // Otherwise it is not possible to restore the stack state after the inline
1446  // asm.
1447  //
1448  // There are 3 kinds of input operands:
1449  //
1450  // 1. Popped inputs. These must appear at the stack top in ST0-STn. A
1451  // popped input operand must be in a fixed stack slot, and it is either
1452  // tied to an output operand, or in the clobber list. The MI has ST use
1453  // and def operands for these inputs.
1454  //
1455  // 2. Fixed inputs. These inputs appear in fixed stack slots, but are
1456  // preserved by the inline asm. The fixed stack slots must be STn-STm
1457  // following the popped inputs. A fixed input operand cannot be tied to
1458  // an output or appear in the clobber list. The MI has ST use operands
1459  // and no defs for these inputs.
1460  //
1461  // 3. Preserved inputs. These inputs use the "f" constraint which is
1462  // represented as an FP register. The inline asm won't change these
1463  // stack slots.
1464  //
1465  // Outputs must be in ST registers, FP outputs are not allowed. Clobbered
1466  // registers do not count as output operands. The inline asm changes the
1467  // stack as if it popped all the popped inputs and then pushed all the
1468  // output operands.
1469 
1470  // Scan the assembly for ST registers used, defined and clobbered. We can
1471  // only tell clobbers from defs by looking at the asm descriptor.
1472  unsigned STUses = 0, STDefs = 0, STClobbers = 0, STDeadDefs = 0;
1473  unsigned NumOps = 0;
1474  for (unsigned i = InlineAsm::MIOp_FirstOperand, e = MI->getNumOperands();
1475  i != e && MI->getOperand(i).isImm(); i += 1 + NumOps) {
1476  unsigned Flags = MI->getOperand(i).getImm();
1477  NumOps = InlineAsm::getNumOperandRegisters(Flags);
1478  if (NumOps != 1)
1479  continue;
1480  const MachineOperand &MO = MI->getOperand(i + 1);
1481  if (!MO.isReg())
1482  continue;
1483  unsigned STReg = MO.getReg() - X86::ST0;
1484  if (STReg >= 8)
1485  continue;
1486 
1487  switch (InlineAsm::getKind(Flags)) {
1488  case InlineAsm::Kind_RegUse:
1489  STUses |= (1u << STReg);
1490  break;
1491  case InlineAsm::Kind_RegDef:
1492  case InlineAsm::Kind_RegDefEarlyClobber:
1493  STDefs |= (1u << STReg);
1494  if (MO.isDead())
1495  STDeadDefs |= (1u << STReg);
1496  break;
1497  case InlineAsm::Kind_Clobber:
1498  STClobbers |= (1u << STReg);
1499  break;
1500  default:
1501  break;
1502  }
1503  }
1504 
1505  if (STUses && !isMask_32(STUses))
1506  MI->emitError("fixed input regs must be last on the x87 stack");
1507  unsigned NumSTUses = CountTrailingOnes_32(STUses);
1508 
1509  // Defs must be contiguous from the stack top. ST0-STn.
1510  if (STDefs && !isMask_32(STDefs)) {
1511  MI->emitError("output regs must be last on the x87 stack");
1512  STDefs = NextPowerOf2(STDefs) - 1;
1513  }
1514  unsigned NumSTDefs = CountTrailingOnes_32(STDefs);
1515 
1516  // So must the clobbered stack slots. ST0-STm, m >= n.
1517  if (STClobbers && !isMask_32(STDefs | STClobbers))
1518  MI->emitError("clobbers must be last on the x87 stack");
1519 
1520  // Popped inputs are the ones that are also clobbered or defined.
1521  unsigned STPopped = STUses & (STDefs | STClobbers);
1522  if (STPopped && !isMask_32(STPopped))
1523  MI->emitError("implicitly popped regs must be last on the x87 stack");
1524  unsigned NumSTPopped = CountTrailingOnes_32(STPopped);
1525 
1526  DEBUG(dbgs() << "Asm uses " << NumSTUses << " fixed regs, pops "
1527  << NumSTPopped << ", and defines " << NumSTDefs << " regs.\n");
1528 
1529  // Scan the instruction for FP uses corresponding to "f" constraints.
1530  // Collect FP registers to kill afer the instruction.
1531  // Always kill all the scratch regs.
1532  unsigned FPKills = ((1u << NumFPRegs) - 1) & ~0xff;
1533  unsigned FPUsed = 0;
1534  for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
1535  MachineOperand &Op = MI->getOperand(i);
1536  if (!Op.isReg() || Op.getReg() < X86::FP0 || Op.getReg() > X86::FP6)
1537  continue;
1538  if (!Op.isUse())
1539  MI->emitError("illegal \"f\" output constraint");
1540  unsigned FPReg = getFPReg(Op);
1541  FPUsed |= 1U << FPReg;
1542 
1543  // If we kill this operand, make sure to pop it from the stack after the
1544  // asm. We just remember it for now, and pop them all off at the end in
1545  // a batch.
1546  if (Op.isKill())
1547  FPKills |= 1U << FPReg;
1548  }
1549 
1550  // The popped inputs will be killed by the instruction, so duplicate them
1551  // if the FP register needs to be live after the instruction, or if it is
1552  // used in the instruction itself. We effectively treat the popped inputs
1553  // as early clobbers.
1554  for (unsigned i = 0; i < NumSTPopped; ++i) {
1555  if ((FPKills & ~FPUsed) & (1u << PendingST[i]))
1556  continue;
1557  unsigned SR = getScratchReg();
1558  duplicateToTop(PendingST[i], SR, I);
1559  DEBUG(dbgs() << "Duplicating ST" << i << " in FP"
1560  << unsigned(PendingST[i]) << " to avoid clobbering it.\n");
1561  PendingST[i] = SR;
1562  }
1563 
1564  // Make sure we have a unique live register for every fixed use. Some of
1565  // them could be undef uses, and we need to emit LD_F0 instructions.
1566  for (unsigned i = 0; i < NumSTUses; ++i) {
1567  if (i < NumPendingSTs && PendingST[i] < NumFPRegs) {
1568  // Check for shared assignments.
1569  for (unsigned j = 0; j < i; ++j) {
1570  if (PendingST[j] != PendingST[i])
1571  continue;
1572  // STi and STj are inn the same register, create a copy.
1573  unsigned SR = getScratchReg();
1574  duplicateToTop(PendingST[i], SR, I);
1575  DEBUG(dbgs() << "Duplicating ST" << i << " in FP"
1576  << unsigned(PendingST[i])
1577  << " to avoid collision with ST" << j << '\n');
1578  PendingST[i] = SR;
1579  }
1580  continue;
1581  }
1582  unsigned SR = getScratchReg();
1583  DEBUG(dbgs() << "Emitting LD_F0 for ST" << i << " in FP" << SR << '\n');
1584  BuildMI(*MBB, I, MI->getDebugLoc(), TII->get(X86::LD_F0));
1585  pushReg(SR);
1586  PendingST[i] = SR;
1587  if (NumPendingSTs == i)
1588  ++NumPendingSTs;
1589  }
1590  assert(NumPendingSTs >= NumSTUses && "Fixed registers should be assigned");
1591 
1592  // Now we can rearrange the live registers to match what was requested.
1593  shuffleStackTop(PendingST, NumPendingSTs, I);
1594  DEBUG({dbgs() << "Before asm: "; dumpStack();});
1595 
1596  // With the stack layout fixed, rewrite the FP registers.
1597  for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
1598  MachineOperand &Op = MI->getOperand(i);
1599  if (!Op.isReg() || Op.getReg() < X86::FP0 || Op.getReg() > X86::FP6)
1600  continue;
1601  unsigned FPReg = getFPReg(Op);
1602  Op.setReg(getSTReg(FPReg));
1603  }
1604 
1605  // Simulate the inline asm popping its inputs and pushing its outputs.
1606  StackTop -= NumSTPopped;
1607 
1608  // Hold the fixed output registers in scratch FP registers. They will be
1609  // transferred to real FP registers by copies.
1610  NumPendingSTs = 0;
1611  for (unsigned i = 0; i < NumSTDefs; ++i) {
1612  unsigned SR = getScratchReg();
1613  pushReg(SR);
1614  FPKills &= ~(1u << SR);
1615  }
1616  for (unsigned i = 0; i < NumSTDefs; ++i)
1617  PendingST[NumPendingSTs++] = getStackEntry(i);
1618  DEBUG({dbgs() << "After asm: "; dumpStack();});
1619 
1620  // If any of the ST defs were dead, pop them immediately. Our caller only
1621  // handles dead FP defs.
1622  MachineBasicBlock::iterator InsertPt = MI;
1623  for (unsigned i = 0; STDefs & (1u << i); ++i) {
1624  if (!(STDeadDefs & (1u << i)))
1625  continue;
1626  freeStackSlotAfter(InsertPt, PendingST[i]);
1627  PendingST[i] = NumFPRegs;
1628  }
1629  while (NumPendingSTs && PendingST[NumPendingSTs - 1] == NumFPRegs)
1630  --NumPendingSTs;
1631 
1632  // If this asm kills any FP registers (is the last use of them) we must
1633  // explicitly emit pop instructions for them. Do this now after the asm has
1634  // executed so that the ST(x) numbers are not off (which would happen if we
1635  // did this inline with operand rewriting).
1636  //
1637  // Note: this might be a non-optimal pop sequence. We might be able to do
1638  // better by trying to pop in stack order or something.
1639  while (FPKills) {
1640  unsigned FPReg = countTrailingZeros(FPKills);
1641  if (isLive(FPReg))
1642  freeStackSlotAfter(InsertPt, FPReg);
1643  FPKills &= ~(1U << FPReg);
1644  }
1645  // Don't delete the inline asm!
1646  return;
1647  }
1648 
1649  case X86::WIN_FTOL_32:
1650  case X86::WIN_FTOL_64: {
1651  // Push the operand into ST0.
1652  MachineOperand &Op = MI->getOperand(0);
1653  assert(Op.isUse() && Op.isReg() &&
1654  Op.getReg() >= X86::FP0 && Op.getReg() <= X86::FP6);
1655  unsigned FPReg = getFPReg(Op);
1656  if (Op.isKill())
1657  moveToTop(FPReg, I);
1658  else
1659  duplicateToTop(FPReg, FPReg, I);
1660 
1661  // Emit the call. This will pop the operand.
1662  BuildMI(*MBB, I, MI->getDebugLoc(), TII->get(X86::CALLpcrel32))
1663  .addExternalSymbol("_ftol2")
1664  .addReg(X86::ST0, RegState::ImplicitKill)
1668  .addReg(X86::EFLAGS, RegState::Define | RegState::Implicit);
1669  --StackTop;
1670 
1671  break;
1672  }
1673 
1674  case X86::RET:
1675  case X86::RETI:
1676  // If RET has an FP register use operand, pass the first one in ST(0) and
1677  // the second one in ST(1).
1678 
1679  // Find the register operands.
1680  unsigned FirstFPRegOp = ~0U, SecondFPRegOp = ~0U;
1681  unsigned LiveMask = 0;
1682 
1683  for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
1684  MachineOperand &Op = MI->getOperand(i);
1685  if (!Op.isReg() || Op.getReg() < X86::FP0 || Op.getReg() > X86::FP6)
1686  continue;
1687  // FP Register uses must be kills unless there are two uses of the same
1688  // register, in which case only one will be a kill.
1689  assert(Op.isUse() &&
1690  (Op.isKill() || // Marked kill.
1691  getFPReg(Op) == FirstFPRegOp || // Second instance.
1692  MI->killsRegister(Op.getReg())) && // Later use is marked kill.
1693  "Ret only defs operands, and values aren't live beyond it");
1694 
1695  if (FirstFPRegOp == ~0U)
1696  FirstFPRegOp = getFPReg(Op);
1697  else {
1698  assert(SecondFPRegOp == ~0U && "More than two fp operands!");
1699  SecondFPRegOp = getFPReg(Op);
1700  }
1701  LiveMask |= (1 << getFPReg(Op));
1702 
1703  // Remove the operand so that later passes don't see it.
1704  MI->RemoveOperand(i);
1705  --i, --e;
1706  }
1707 
1708  // We may have been carrying spurious live-ins, so make sure only the returned
1709  // registers are left live.
1710  adjustLiveRegs(LiveMask, MI);
1711  if (!LiveMask) return; // Quick check to see if any are possible.
1712 
1713  // There are only four possibilities here:
1714  // 1) we are returning a single FP value. In this case, it has to be in
1715  // ST(0) already, so just declare success by removing the value from the
1716  // FP Stack.
1717  if (SecondFPRegOp == ~0U) {
1718  // Assert that the top of stack contains the right FP register.
1719  assert(StackTop == 1 && FirstFPRegOp == getStackEntry(0) &&
1720  "Top of stack not the right register for RET!");
1721 
1722  // Ok, everything is good, mark the value as not being on the stack
1723  // anymore so that our assertion about the stack being empty at end of
1724  // block doesn't fire.
1725  StackTop = 0;
1726  return;
1727  }
1728 
1729  // Otherwise, we are returning two values:
1730  // 2) If returning the same value for both, we only have one thing in the FP
1731  // stack. Consider: RET FP1, FP1
1732  if (StackTop == 1) {
1733  assert(FirstFPRegOp == SecondFPRegOp && FirstFPRegOp == getStackEntry(0)&&
1734  "Stack misconfiguration for RET!");
1735 
1736  // Duplicate the TOS so that we return it twice. Just pick some other FPx
1737  // register to hold it.
1738  unsigned NewReg = getScratchReg();
1739  duplicateToTop(FirstFPRegOp, NewReg, MI);
1740  FirstFPRegOp = NewReg;
1741  }
1742 
1743  /// Okay we know we have two different FPx operands now:
1744  assert(StackTop == 2 && "Must have two values live!");
1745 
1746  /// 3) If SecondFPRegOp is currently in ST(0) and FirstFPRegOp is currently
1747  /// in ST(1). In this case, emit an fxch.
1748  if (getStackEntry(0) == SecondFPRegOp) {
1749  assert(getStackEntry(1) == FirstFPRegOp && "Unknown regs live");
1750  moveToTop(FirstFPRegOp, MI);
1751  }
1752 
1753  /// 4) Finally, FirstFPRegOp must be in ST(0) and SecondFPRegOp must be in
1754  /// ST(1). Just remove both from our understanding of the stack and return.
1755  assert(getStackEntry(0) == FirstFPRegOp && "Unknown regs live");
1756  assert(getStackEntry(1) == SecondFPRegOp && "Unknown regs live");
1757  StackTop = 0;
1758  return;
1759  }
1760 
1761  I = MBB->erase(I); // Remove the pseudo instruction
1762 
1763  // We want to leave I pointing to the previous instruction, but what if we
1764  // just erased the first instruction?
1765  if (I == MBB->begin()) {
1766  DEBUG(dbgs() << "Inserting dummy KILL\n");
1767  I = BuildMI(*MBB, I, DebugLoc(), TII->get(TargetOpcode::KILL));
1768  } else
1769  --I;
1770 }
static bool TableIsSorted(const TableEntry *Table, unsigned NumEntries)
static const TableEntry OpcodeTable[]
void push_back(const T &Elt)
Definition: SmallVector.h:236
const MachineFunction * getParent() const
instr_iterator erase(instr_iterator I)
static PassRegistry * getPassRegistry()
static unsigned getConcreteOpcode(unsigned Opcode)
char & MachineDominatorsID
MachineDominators - This pass is a machine dominators analysis pass.
std::vector< unsigned >::const_iterator livein_iterator
bool isDead() const
STATISTIC(NumFXCH,"Number of fxch instructions inserted")
bool insert(PtrType Ptr)
Definition: SmallPtrSet.h:253
void addLiveIn(unsigned Reg)
void operator<(const Optional< T > &X, const Optional< U > &Y)
Poison comparison between two Optional objects. Clients needs to explicitly compare the underlying va...
const MCInstrDesc & getDesc() const
Definition: MachineInstr.h:257
static const TableEntry ReverseSTiTable[]
LLVM_ATTRIBUTE_NORETURN void report_fatal_error(const char *reason, bool gen_crash_diag=true)
livein_iterator livein_begin() const
AnalysisUsage & addRequired()
char & MachineLoopInfoID
MachineLoopInfo - This pass is a loop analysis pass.
const HexagonInstrInfo * TII
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
#define llvm_unreachable(msg)
bool isReg() const
isReg - Tests if this is a MO_Register operand.
static const TableEntry ForwardST0Table[]
ID
LLVM Calling Convention Representation.
Definition: CallingConv.h:26
unsigned getNumOperands() const
Definition: MachineInstr.h:265
void RemoveOperand(unsigned i)
bool isKill() const
static unsigned getFPReg(const MachineOperand &MO)
size_t array_lengthof(T(&)[N])
Find the length of an array.
Definition: STLExtras.h:250
int getOpcode() const
Definition: MachineInstr.h:261
enable_if_c< std::numeric_limits< T >::is_integer &&!std::numeric_limits< T >::is_signed, std::size_t >::type countTrailingZeros(T Val, ZeroBehavior ZB=ZB_Width)
Count number of 0's from the least significant bit to the most stopping at the first 1...
Definition: MathExtras.h:49
AnalysisUsage & addPreservedID(const void *ID)
int64_t getImm() const
static const TableEntry PopTable[]
bool isImplicitDef() const
Definition: MachineInstr.h:650
bundle_iterator< MachineInstr, instr_iterator > iterator
df_ext_iterator< T, SetTy > df_ext_end(const T &G, SetTy &S)
static const TableEntry ForwardSTiTable[]
livein_iterator livein_end() const
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:267
static int Lookup(const TableEntry *Table, unsigned N, unsigned Opcode)
static unsigned getNumOperandRegisters(unsigned Flag)
Definition: InlineAsm.h:278
bool isCopy() const
Definition: MachineInstr.h:669
df_ext_iterator< T, SetTy > df_ext_begin(const T &G, SetTy &S)
ItTy next(ItTy it, Dist n)
Definition: STLExtras.h:154
#define LLVM_ATTRIBUTE_UNUSED
Definition: Compiler.h:199
static unsigned getKind(unsigned Flags)
Definition: InlineAsm.h:262
AddrNumOperands - Total number of operands in a memory reference.
Definition: X86BaseInfo.h:42
MachineInstrBuilder BuildMI(MachineFunction &MF, DebugLoc DL, const MCInstrDesc &MCID)
unsigned size() const
void DeleteMachineInstr(MachineInstr *MI)
uint64_t NextPowerOf2(uint64_t A)
Definition: MathExtras.h:546
void emitError(StringRef Msg) const
virtual const TargetInstrInfo * getInstrInfo() const
unsigned CountPopulation_32(uint32_t Value)
Definition: MathExtras.h:417
void setDesc(const MCInstrDesc &tid)
Definition: MachineInstr.h:984
unsigned CountTrailingOnes_32(uint32_t Value)
Definition: MathExtras.h:402
bool isInlineAsm() const
Definition: MachineInstr.h:651
unsigned size() const
Definition: SmallPtrSet.h:75
bool killsRegister(unsigned Reg, const TargetRegisterInfo *TRI=NULL) const
Definition: MachineInstr.h:745
void setPreservesCFG()
Definition: Pass.cpp:249
MachineInstr * remove(MachineInstr *I)
raw_ostream & dbgs()
dbgs - Return a circular-buffered debug stream.
Definition: Debug.cpp:101
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:591
FunctionPass * createX86FloatingPointStackifierPass()
StringRef getName() const
bool isMask_32(uint32_t Value)
Definition: MathExtras.h:328
static DebugLoc get(unsigned Line, unsigned Col, MDNode *Scope, MDNode *InlinedAt=0)
Definition: DebugLoc.cpp:74
static const TableEntry ReverseST0Table[]
MachineRegisterInfo & getRegInfo()
virtual void getAnalysisUsage(AnalysisUsage &AU) const
IMPLICIT_DEF - This is the MachineInstr-level equivalent of undef.
Definition: TargetOpcodes.h:52
void setReg(unsigned Reg)
#define I(x, y, z)
Definition: MD5.cpp:54
#define N
const TargetMachine & getTarget() const
unsigned getReg() const
getReg - Returns the register number.
unsigned getNumOperands() const
Return the number of declared MachineOperands for this MachineInstruction. Note that variadic (isVari...
Definition: MCInstrDesc.h:190
#define ASSERT_SORTED(TABLE)
bool isPhysRegUsed(unsigned Reg) const
BasicBlockListType::iterator iterator
ItTy prior(ItTy it, Dist n)
Definition: STLExtras.h:167
#define DEBUG(X)
Definition: Debug.h:97
void initializeEdgeBundlesPass(PassRegistry &)
const MachineInstrBuilder & addReg(unsigned RegNo, unsigned flags=0, unsigned SubReg=0) const
DebugLoc getDebugLoc() const
Definition: MachineInstr.h:244