26 #define DEBUG_TYPE "x86-codegen"
48 STATISTIC(NumFXCH,
"Number of fxch instructions inserted");
49 STATISTIC(NumFP ,
"Number of floating point instructions");
58 memset(Stack, 0,
sizeof(Stack));
59 memset(RegMap, 0,
sizeof(RegMap));
72 virtual const char *getPassName()
const {
return "X86 FP Stackifier"; }
98 unsigned char FixStack[8];
100 LiveBundle() : Mask(0), FixCount(0) {}
103 bool isFixed()
const {
return !Mask || FixCount; }
121 Mask |= 1 << (Reg - X86::FP0);
146 unsigned RegMap[NumFPRegs];
166 unsigned NumPendingSTs;
167 unsigned char PendingST[8];
170 void setupBlockStack();
173 void finishBlockStack();
175 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
176 void dumpStack()
const {
177 dbgs() <<
"Stack contents:";
178 for (
unsigned i = 0; i != StackTop; ++i) {
179 dbgs() <<
" FP" << Stack[i];
180 assert(RegMap[Stack[i]] == i &&
"Stack[] doesn't match RegMap[]!");
182 for (
unsigned i = 0; i != NumPendingSTs; ++i)
183 dbgs() <<
", ST" << i <<
" in FP" <<
unsigned(PendingST[i]);
190 unsigned getSlot(
unsigned RegNo)
const {
191 assert(RegNo < NumFPRegs &&
"Regno out of range!");
192 return RegMap[RegNo];
196 bool isLive(
unsigned RegNo)
const {
197 unsigned Slot = getSlot(RegNo);
198 return Slot < StackTop && Stack[Slot] == RegNo;
202 unsigned getScratchReg()
const {
203 for (
int i = NumFPRegs - 1; i >= 8; --i)
210 static bool isScratchReg(
unsigned RegNo) {
211 return RegNo > 8 && RegNo < NumFPRegs;
215 unsigned getStackEntry(
unsigned STi)
const {
218 return Stack[StackTop-1-STi];
223 unsigned getSTReg(
unsigned RegNo)
const {
224 return StackTop - 1 - getSlot(RegNo) + X86::ST0;
228 void pushReg(
unsigned Reg) {
229 assert(Reg < NumFPRegs &&
"Register number out of range!");
232 Stack[StackTop] =
Reg;
233 RegMap[
Reg] = StackTop++;
236 bool isAtTop(
unsigned RegNo)
const {
return getSlot(RegNo) == StackTop-1; }
239 if (isAtTop(RegNo))
return;
241 unsigned STReg = getSTReg(RegNo);
242 unsigned RegOnTop = getStackEntry(0);
245 std::swap(RegMap[RegNo], RegMap[RegOnTop]);
248 if (RegMap[RegOnTop] >= StackTop)
250 std::swap(Stack[RegMap[RegOnTop]], Stack[StackTop-1]);
253 BuildMI(*MBB, I, dl,
TII->get(X86::XCH_F)).addReg(STReg);
257 void duplicateToTop(
unsigned RegNo,
unsigned AsReg,
MachineInstr *I) {
259 unsigned STReg = getSTReg(RegNo);
262 BuildMI(*MBB, I, dl,
TII->get(X86::LD_Frr)).addReg(STReg);
268 void duplicatePendingSTBeforeKill(
unsigned RegNo,
MachineInstr *I) {
269 for (
unsigned i = 0; i != NumPendingSTs; ++i) {
270 if (PendingST[i] != RegNo)
272 unsigned SR = getScratchReg();
273 DEBUG(
dbgs() <<
"Duplicating pending ST" << i
274 <<
" in FP" << RegNo <<
" to FP" << SR <<
'\n');
275 duplicateToTop(RegNo, SR, I);
301 void shuffleStackTop(
const unsigned char *FixStack,
unsigned FixCount,
319 return X86::RFP80RegClass.contains(DstReg) ||
320 X86::RFP80RegClass.contains(SrcReg);
331 assert(MO.
isReg() &&
"Expected an FP register!");
332 unsigned Reg = MO.
getReg();
333 assert(Reg >= X86::FP0 && Reg <= X86::FP6 &&
"Expected FP register!");
334 return Reg - X86::FP0;
343 bool FPIsUsed =
false;
345 assert(X86::FP6 == X86::FP0+6 &&
"Register enums aren't sorted right!");
346 for (
unsigned i = 0; i <= 6; ++i)
353 if (!FPIsUsed)
return false;
355 Bundles = &getAnalysis<EdgeBundles>();
368 bool Changed =
false;
372 Changed |= processBasicBlock(MF, **I);
378 Changed |= processBasicBlock(MF, *BB);
391 assert(LiveBundles.empty() &&
"Stale data in LiveBundles");
392 LiveBundles.resize(Bundles->getNumBundles());
397 const unsigned Mask = calcLiveInMask(MBB);
401 LiveBundles[Bundles->getBundle(MBB->
getNumber(),
false)].Mask |= Mask;
409 bool Changed =
false;
423 if (MI->
isCopy() && isFPCopy(MI))
449 switch (FPInstClass) {
462 for (
unsigned i = 0, e = DeadRegs.
size(); i != e; ++i) {
463 unsigned Reg = DeadRegs[i];
464 if (Reg >= X86::FP0 && Reg <= X86::FP6) {
465 DEBUG(
dbgs() <<
"Register FP#" << Reg-X86::FP0 <<
" is dead!\n");
466 freeStackSlotAfter(I, Reg-X86::FP0);
474 dbgs() <<
"Just deleted pseudo instruction\n";
478 while (Start != BB.
begin() &&
prior(Start) != PrevI) --Start;
479 dbgs() <<
"Inserted instructions:\n\t";
497 void FPS::setupBlockStack() {
499 <<
" derived from " << MBB->
getName() <<
".\n");
502 const LiveBundle &Bundle =
503 LiveBundles[Bundles->getBundle(MBB->
getNumber(),
false)];
506 DEBUG(
dbgs() <<
"Block has no FP live-ins.\n");
511 assert(Bundle.isFixed() &&
"Reached block before any predecessors");
514 for (
unsigned i = Bundle.FixCount; i > 0; --i) {
516 DEBUG(
dbgs() <<
"Live-in st(" << (i-1) <<
"): %FP"
517 <<
unsigned(Bundle.FixStack[i-1]) <<
'\n');
518 pushReg(Bundle.FixStack[i-1]);
524 adjustLiveRegs(calcLiveInMask(MBB), MBB->
begin());
532 void FPS::finishBlockStack() {
538 <<
" derived from " << MBB->
getName() <<
".\n");
541 unsigned BundleIdx = Bundles->getBundle(MBB->
getNumber(),
true);
542 LiveBundle &Bundle = LiveBundles[BundleIdx];
547 adjustLiveRegs(Bundle.Mask, Term);
555 DEBUG(
dbgs() <<
"LB#" << BundleIdx <<
": ");
556 if (Bundle.isFixed()) {
557 DEBUG(
dbgs() <<
"Shuffling stack to match.\n");
558 shuffleStackTop(Bundle.FixStack, Bundle.FixCount, Term);
561 DEBUG(
dbgs() <<
"Fixing stack order now.\n");
562 Bundle.FixCount = StackTop;
563 for (
unsigned i = 0; i < StackTop; ++i)
564 Bundle.FixStack[i] = getStackEntry(i);
577 bool operator<(
const TableEntry &TE)
const {
return from < TE.from; }
578 friend bool operator<(
const TableEntry &TE,
unsigned V) {
582 const TableEntry &TE) {
590 for (
unsigned i = 0; i != NumEntries-1; ++i)
591 if (!(Table[i] < Table[i+1]))
return false;
596 static int Lookup(
const TableEntry *Table,
unsigned N,
unsigned Opcode) {
597 const TableEntry *I = std::lower_bound(Table, Table+N, Opcode);
598 if (I != Table+N && I->from == Opcode)
604 #define ASSERT_SORTED(TABLE)
606 #define ASSERT_SORTED(TABLE) \
607 { static bool TABLE##Checked = false; \
608 if (!TABLE##Checked) { \
609 assert(TableIsSorted(TABLE, array_lengthof(TABLE)) && \
610 "All lookup tables must be sorted for efficient access!"); \
611 TABLE##Checked = true; \
625 { X86::ABS_Fp32 , X86::ABS_F },
626 { X86::ABS_Fp64 , X86::ABS_F },
627 { X86::ABS_Fp80 , X86::ABS_F },
628 { X86::ADD_Fp32m , X86::ADD_F32m },
629 { X86::ADD_Fp64m , X86::ADD_F64m },
630 { X86::ADD_Fp64m32 , X86::ADD_F32m },
631 { X86::ADD_Fp80m32 , X86::ADD_F32m },
632 { X86::ADD_Fp80m64 , X86::ADD_F64m },
633 { X86::ADD_FpI16m32 , X86::ADD_FI16m },
634 { X86::ADD_FpI16m64 , X86::ADD_FI16m },
635 { X86::ADD_FpI16m80 , X86::ADD_FI16m },
636 { X86::ADD_FpI32m32 , X86::ADD_FI32m },
637 { X86::ADD_FpI32m64 , X86::ADD_FI32m },
638 { X86::ADD_FpI32m80 , X86::ADD_FI32m },
639 { X86::CHS_Fp32 , X86::CHS_F },
640 { X86::CHS_Fp64 , X86::CHS_F },
641 { X86::CHS_Fp80 , X86::CHS_F },
642 { X86::CMOVBE_Fp32 , X86::CMOVBE_F },
643 { X86::CMOVBE_Fp64 , X86::CMOVBE_F },
644 { X86::CMOVBE_Fp80 , X86::CMOVBE_F },
645 { X86::CMOVB_Fp32 , X86::CMOVB_F },
646 { X86::CMOVB_Fp64 , X86::CMOVB_F },
647 { X86::CMOVB_Fp80 , X86::CMOVB_F },
648 { X86::CMOVE_Fp32 , X86::CMOVE_F },
649 { X86::CMOVE_Fp64 , X86::CMOVE_F },
650 { X86::CMOVE_Fp80 , X86::CMOVE_F },
651 { X86::CMOVNBE_Fp32 , X86::CMOVNBE_F },
652 { X86::CMOVNBE_Fp64 , X86::CMOVNBE_F },
653 { X86::CMOVNBE_Fp80 , X86::CMOVNBE_F },
654 { X86::CMOVNB_Fp32 , X86::CMOVNB_F },
655 { X86::CMOVNB_Fp64 , X86::CMOVNB_F },
656 { X86::CMOVNB_Fp80 , X86::CMOVNB_F },
657 { X86::CMOVNE_Fp32 , X86::CMOVNE_F },
658 { X86::CMOVNE_Fp64 , X86::CMOVNE_F },
659 { X86::CMOVNE_Fp80 , X86::CMOVNE_F },
660 { X86::CMOVNP_Fp32 , X86::CMOVNP_F },
661 { X86::CMOVNP_Fp64 , X86::CMOVNP_F },
662 { X86::CMOVNP_Fp80 , X86::CMOVNP_F },
663 { X86::CMOVP_Fp32 , X86::CMOVP_F },
664 { X86::CMOVP_Fp64 , X86::CMOVP_F },
665 { X86::CMOVP_Fp80 , X86::CMOVP_F },
666 { X86::COS_Fp32 , X86::COS_F },
667 { X86::COS_Fp64 , X86::COS_F },
668 { X86::COS_Fp80 , X86::COS_F },
669 { X86::DIVR_Fp32m , X86::DIVR_F32m },
670 { X86::DIVR_Fp64m , X86::DIVR_F64m },
671 { X86::DIVR_Fp64m32 , X86::DIVR_F32m },
672 { X86::DIVR_Fp80m32 , X86::DIVR_F32m },
673 { X86::DIVR_Fp80m64 , X86::DIVR_F64m },
674 { X86::DIVR_FpI16m32, X86::DIVR_FI16m},
675 { X86::DIVR_FpI16m64, X86::DIVR_FI16m},
676 { X86::DIVR_FpI16m80, X86::DIVR_FI16m},
677 { X86::DIVR_FpI32m32, X86::DIVR_FI32m},
678 { X86::DIVR_FpI32m64, X86::DIVR_FI32m},
679 { X86::DIVR_FpI32m80, X86::DIVR_FI32m},
680 { X86::DIV_Fp32m , X86::DIV_F32m },
681 { X86::DIV_Fp64m , X86::DIV_F64m },
682 { X86::DIV_Fp64m32 , X86::DIV_F32m },
683 { X86::DIV_Fp80m32 , X86::DIV_F32m },
684 { X86::DIV_Fp80m64 , X86::DIV_F64m },
685 { X86::DIV_FpI16m32 , X86::DIV_FI16m },
686 { X86::DIV_FpI16m64 , X86::DIV_FI16m },
687 { X86::DIV_FpI16m80 , X86::DIV_FI16m },
688 { X86::DIV_FpI32m32 , X86::DIV_FI32m },
689 { X86::DIV_FpI32m64 , X86::DIV_FI32m },
690 { X86::DIV_FpI32m80 , X86::DIV_FI32m },
691 { X86::ILD_Fp16m32 , X86::ILD_F16m },
692 { X86::ILD_Fp16m64 , X86::ILD_F16m },
693 { X86::ILD_Fp16m80 , X86::ILD_F16m },
694 { X86::ILD_Fp32m32 , X86::ILD_F32m },
695 { X86::ILD_Fp32m64 , X86::ILD_F32m },
696 { X86::ILD_Fp32m80 , X86::ILD_F32m },
697 { X86::ILD_Fp64m32 , X86::ILD_F64m },
698 { X86::ILD_Fp64m64 , X86::ILD_F64m },
699 { X86::ILD_Fp64m80 , X86::ILD_F64m },
700 { X86::ISTT_Fp16m32 , X86::ISTT_FP16m},
701 { X86::ISTT_Fp16m64 , X86::ISTT_FP16m},
702 { X86::ISTT_Fp16m80 , X86::ISTT_FP16m},
703 { X86::ISTT_Fp32m32 , X86::ISTT_FP32m},
704 { X86::ISTT_Fp32m64 , X86::ISTT_FP32m},
705 { X86::ISTT_Fp32m80 , X86::ISTT_FP32m},
706 { X86::ISTT_Fp64m32 , X86::ISTT_FP64m},
707 { X86::ISTT_Fp64m64 , X86::ISTT_FP64m},
708 { X86::ISTT_Fp64m80 , X86::ISTT_FP64m},
709 { X86::IST_Fp16m32 , X86::IST_F16m },
710 { X86::IST_Fp16m64 , X86::IST_F16m },
711 { X86::IST_Fp16m80 , X86::IST_F16m },
712 { X86::IST_Fp32m32 , X86::IST_F32m },
713 { X86::IST_Fp32m64 , X86::IST_F32m },
714 { X86::IST_Fp32m80 , X86::IST_F32m },
715 { X86::IST_Fp64m32 , X86::IST_FP64m },
716 { X86::IST_Fp64m64 , X86::IST_FP64m },
717 { X86::IST_Fp64m80 , X86::IST_FP64m },
718 { X86::LD_Fp032 , X86::LD_F0 },
719 { X86::LD_Fp064 , X86::LD_F0 },
720 { X86::LD_Fp080 , X86::LD_F0 },
721 { X86::LD_Fp132 , X86::LD_F1 },
722 { X86::LD_Fp164 , X86::LD_F1 },
723 { X86::LD_Fp180 , X86::LD_F1 },
724 { X86::LD_Fp32m , X86::LD_F32m },
725 { X86::LD_Fp32m64 , X86::LD_F32m },
726 { X86::LD_Fp32m80 , X86::LD_F32m },
727 { X86::LD_Fp64m , X86::LD_F64m },
728 { X86::LD_Fp64m80 , X86::LD_F64m },
729 { X86::LD_Fp80m , X86::LD_F80m },
730 { X86::MUL_Fp32m , X86::MUL_F32m },
731 { X86::MUL_Fp64m , X86::MUL_F64m },
732 { X86::MUL_Fp64m32 , X86::MUL_F32m },
733 { X86::MUL_Fp80m32 , X86::MUL_F32m },
734 { X86::MUL_Fp80m64 , X86::MUL_F64m },
735 { X86::MUL_FpI16m32 , X86::MUL_FI16m },
736 { X86::MUL_FpI16m64 , X86::MUL_FI16m },
737 { X86::MUL_FpI16m80 , X86::MUL_FI16m },
738 { X86::MUL_FpI32m32 , X86::MUL_FI32m },
739 { X86::MUL_FpI32m64 , X86::MUL_FI32m },
740 { X86::MUL_FpI32m80 , X86::MUL_FI32m },
741 { X86::SIN_Fp32 , X86::SIN_F },
742 { X86::SIN_Fp64 , X86::SIN_F },
743 { X86::SIN_Fp80 , X86::SIN_F },
744 { X86::SQRT_Fp32 , X86::SQRT_F },
745 { X86::SQRT_Fp64 , X86::SQRT_F },
746 { X86::SQRT_Fp80 , X86::SQRT_F },
747 { X86::ST_Fp32m , X86::ST_F32m },
748 { X86::ST_Fp64m , X86::ST_F64m },
749 { X86::ST_Fp64m32 , X86::ST_F32m },
750 { X86::ST_Fp80m32 , X86::ST_F32m },
751 { X86::ST_Fp80m64 , X86::ST_F64m },
752 { X86::ST_FpP80m , X86::ST_FP80m },
753 { X86::SUBR_Fp32m , X86::SUBR_F32m },
754 { X86::SUBR_Fp64m , X86::SUBR_F64m },
755 { X86::SUBR_Fp64m32 , X86::SUBR_F32m },
756 { X86::SUBR_Fp80m32 , X86::SUBR_F32m },
757 { X86::SUBR_Fp80m64 , X86::SUBR_F64m },
758 { X86::SUBR_FpI16m32, X86::SUBR_FI16m},
759 { X86::SUBR_FpI16m64, X86::SUBR_FI16m},
760 { X86::SUBR_FpI16m80, X86::SUBR_FI16m},
761 { X86::SUBR_FpI32m32, X86::SUBR_FI32m},
762 { X86::SUBR_FpI32m64, X86::SUBR_FI32m},
763 { X86::SUBR_FpI32m80, X86::SUBR_FI32m},
764 { X86::SUB_Fp32m , X86::SUB_F32m },
765 { X86::SUB_Fp64m , X86::SUB_F64m },
766 { X86::SUB_Fp64m32 , X86::SUB_F32m },
767 { X86::SUB_Fp80m32 , X86::SUB_F32m },
768 { X86::SUB_Fp80m64 , X86::SUB_F64m },
769 { X86::SUB_FpI16m32 , X86::SUB_FI16m },
770 { X86::SUB_FpI16m64 , X86::SUB_FI16m },
771 { X86::SUB_FpI16m80 , X86::SUB_FI16m },
772 { X86::SUB_FpI32m32 , X86::SUB_FI32m },
773 { X86::SUB_FpI32m64 , X86::SUB_FI32m },
774 { X86::SUB_FpI32m80 , X86::SUB_FI32m },
775 { X86::TST_Fp32 , X86::TST_F },
776 { X86::TST_Fp64 , X86::TST_F },
777 { X86::TST_Fp80 , X86::TST_F },
778 { X86::UCOM_FpIr32 , X86::UCOM_FIr },
779 { X86::UCOM_FpIr64 , X86::UCOM_FIr },
780 { X86::UCOM_FpIr80 , X86::UCOM_FIr },
781 { X86::UCOM_Fpr32 , X86::UCOM_Fr },
782 { X86::UCOM_Fpr64 , X86::UCOM_Fr },
783 { X86::UCOM_Fpr80 , X86::UCOM_Fr },
789 assert(Opc != -1 &&
"FP Stack instruction not in OpcodeTable!");
801 { X86::ADD_FrST0 , X86::ADD_FPrST0 },
803 { X86::DIVR_FrST0, X86::DIVR_FPrST0 },
804 { X86::DIV_FrST0 , X86::DIV_FPrST0 },
806 { X86::IST_F16m , X86::IST_FP16m },
807 { X86::IST_F32m , X86::IST_FP32m },
809 { X86::MUL_FrST0 , X86::MUL_FPrST0 },
811 { X86::ST_F32m , X86::ST_FP32m },
812 { X86::ST_F64m , X86::ST_FP64m },
813 { X86::ST_Frr , X86::ST_FPrr },
815 { X86::SUBR_FrST0, X86::SUBR_FPrST0 },
816 { X86::SUB_FrST0 , X86::SUB_FPrST0 },
818 { X86::UCOM_FIr , X86::UCOM_FIPr },
820 { X86::UCOM_FPr , X86::UCOM_FPPr },
821 { X86::UCOM_Fr , X86::UCOM_FPr },
836 RegMap[Stack[--StackTop]] = ~0;
841 I->setDesc(TII->get(Opcode));
842 if (Opcode == X86::UCOM_FPPr)
845 I =
BuildMI(*MBB, ++I, dl, TII->
get(X86::ST_FPrr)).addReg(X86::ST0);
854 if (getStackEntry(0) == FPRegNo) {
862 I = freeStackSlotBefore(++I, FPRegNo);
869 unsigned STReg = getSTReg(FPRegNo);
870 unsigned OldSlot = getSlot(FPRegNo);
871 unsigned TopReg = Stack[StackTop-1];
872 Stack[OldSlot] = TopReg;
873 RegMap[TopReg] = OldSlot;
874 RegMap[FPRegNo] = ~0;
875 Stack[--StackTop] = ~0;
882 unsigned Defs = Mask;
884 for (
unsigned i = 0; i < StackTop; ++i) {
885 unsigned RegNo = Stack[i];
886 if (!(Defs & (1 << RegNo)))
888 Kills |= (1 << RegNo);
891 Defs &= ~(1 << RegNo);
893 assert((Kills & Defs) == 0 &&
"Register needs killing and def'ing?");
896 while (Kills && Defs) {
899 DEBUG(
dbgs() <<
"Renaming %FP" << KReg <<
" as imp %FP" << DReg <<
"\n");
900 std::swap(Stack[getSlot(KReg)], Stack[getSlot(DReg)]);
902 Kills &= ~(1 << KReg);
903 Defs &= ~(1 << DReg);
907 if (Kills && I != MBB->
begin()) {
910 unsigned KReg = getStackEntry(0);
911 if (!(Kills & (1 << KReg)))
913 DEBUG(
dbgs() <<
"Popping %FP" << KReg <<
"\n");
915 Kills &= ~(1 << KReg);
922 DEBUG(
dbgs() <<
"Killing %FP" << KReg <<
"\n");
923 freeStackSlotBefore(I, KReg);
924 Kills &= ~(1 << KReg);
930 DEBUG(
dbgs() <<
"Defining %FP" << DReg <<
" as 0\n");
933 Defs &= ~(1 << DReg);
944 void FPS::shuffleStackTop(
const unsigned char *FixStack,
950 unsigned OldReg = getStackEntry(FixCount);
952 unsigned Reg = FixStack[FixCount];
958 moveToTop(OldReg, I);
988 "Can only handle fst* & ftst instructions!");
995 duplicatePendingSTBeforeKill(Reg, I);
1017 duplicateToTop(Reg, getScratchReg(), I);
1026 if (MI->
getOpcode() == X86::IST_FP64m ||
1034 }
else if (KillsSrc) {
1052 assert(NumOps >= 2 &&
"FPRW instructions must have 2 ops!!");
1060 duplicatePendingSTBeforeKill(Reg, I);
1087 { X86::ADD_Fp32 , X86::ADD_FST0r },
1088 { X86::ADD_Fp64 , X86::ADD_FST0r },
1089 { X86::ADD_Fp80 , X86::ADD_FST0r },
1090 { X86::DIV_Fp32 , X86::DIV_FST0r },
1091 { X86::DIV_Fp64 , X86::DIV_FST0r },
1092 { X86::DIV_Fp80 , X86::DIV_FST0r },
1093 { X86::MUL_Fp32 , X86::MUL_FST0r },
1094 { X86::MUL_Fp64 , X86::MUL_FST0r },
1095 { X86::MUL_Fp80 , X86::MUL_FST0r },
1096 { X86::SUB_Fp32 , X86::SUB_FST0r },
1097 { X86::SUB_Fp64 , X86::SUB_FST0r },
1098 { X86::SUB_Fp80 , X86::SUB_FST0r },
1103 { X86::ADD_Fp32 , X86::ADD_FST0r },
1104 { X86::ADD_Fp64 , X86::ADD_FST0r },
1105 { X86::ADD_Fp80 , X86::ADD_FST0r },
1106 { X86::DIV_Fp32 , X86::DIVR_FST0r },
1107 { X86::DIV_Fp64 , X86::DIVR_FST0r },
1108 { X86::DIV_Fp80 , X86::DIVR_FST0r },
1109 { X86::MUL_Fp32 , X86::MUL_FST0r },
1110 { X86::MUL_Fp64 , X86::MUL_FST0r },
1111 { X86::MUL_Fp80 , X86::MUL_FST0r },
1112 { X86::SUB_Fp32 , X86::SUBR_FST0r },
1113 { X86::SUB_Fp64 , X86::SUBR_FST0r },
1114 { X86::SUB_Fp80 , X86::SUBR_FST0r },
1119 { X86::ADD_Fp32 , X86::ADD_FrST0 },
1120 { X86::ADD_Fp64 , X86::ADD_FrST0 },
1121 { X86::ADD_Fp80 , X86::ADD_FrST0 },
1122 { X86::DIV_Fp32 , X86::DIVR_FrST0 },
1123 { X86::DIV_Fp64 , X86::DIVR_FrST0 },
1124 { X86::DIV_Fp80 , X86::DIVR_FrST0 },
1125 { X86::MUL_Fp32 , X86::MUL_FrST0 },
1126 { X86::MUL_Fp64 , X86::MUL_FrST0 },
1127 { X86::MUL_Fp80 , X86::MUL_FrST0 },
1128 { X86::SUB_Fp32 , X86::SUBR_FrST0 },
1129 { X86::SUB_Fp64 , X86::SUBR_FrST0 },
1130 { X86::SUB_Fp80 , X86::SUBR_FrST0 },
1135 { X86::ADD_Fp32 , X86::ADD_FrST0 },
1136 { X86::ADD_Fp64 , X86::ADD_FrST0 },
1137 { X86::ADD_Fp80 , X86::ADD_FrST0 },
1138 { X86::DIV_Fp32 , X86::DIV_FrST0 },
1139 { X86::DIV_Fp64 , X86::DIV_FrST0 },
1140 { X86::DIV_Fp80 , X86::DIV_FrST0 },
1141 { X86::MUL_Fp32 , X86::MUL_FrST0 },
1142 { X86::MUL_Fp64 , X86::MUL_FrST0 },
1143 { X86::MUL_Fp80 , X86::MUL_FrST0 },
1144 { X86::SUB_Fp32 , X86::SUB_FrST0 },
1145 { X86::SUB_Fp64 , X86::SUB_FrST0 },
1146 { X86::SUB_Fp80 , X86::SUB_FrST0 },
1164 assert(NumOperands == 3 &&
"Illegal TwoArgFP instruction!");
1172 unsigned TOS = getStackEntry(0);
1176 if (Op0 != TOS && Op1 != TOS) {
1183 }
else if (KillsOp1) {
1192 duplicateToTop(Op0, Dest, I);
1196 }
else if (!KillsOp0 && !KillsOp1) {
1200 duplicateToTop(Op0, Dest, I);
1207 assert((TOS == Op0 || TOS == Op1) && (KillsOp0 || KillsOp1) &&
1208 "Stack conditions not set up right!");
1212 const TableEntry *InstTable;
1213 bool isForward = TOS == Op0;
1214 bool updateST0 = (TOS == Op0 && !KillsOp1) || (TOS == Op1 && !KillsOp0);
1229 assert(Opcode != -1 &&
"Unknown TwoArgFP pseudo instruction!");
1232 unsigned NotTOS = (TOS == Op0) ? Op1 : Op0;
1236 I =
BuildMI(*MBB, I, dl, TII->
get(Opcode)).addReg(getSTReg(NotTOS));
1240 if (KillsOp0 && KillsOp1 && Op0 != Op1) {
1241 assert(!updateST0 &&
"Should have updated other operand!");
1247 unsigned UpdatedSlot = getSlot(updateST0 ? TOS : NotTOS);
1248 assert(UpdatedSlot < StackTop && Dest < 7);
1249 Stack[UpdatedSlot] = Dest;
1250 RegMap[Dest] = UpdatedSlot;
1263 assert(NumOperands == 2 &&
"Illegal FUCOM* instruction!");
1279 if (KillsOp0) freeStackSlotAfter(I, Op0);
1280 if (KillsOp1 && Op0 != Op1) freeStackSlotAfter(I, Op1);
1305 if (Op0 != Op1 && KillsOp1) {
1307 freeStackSlotAfter(I, Op1);
1324 unsigned DstST = MO0.
getReg() - X86::ST0;
1325 unsigned SrcST = MO1.
getReg() - X86::ST0;
1331 assert(isLive(SrcFP) &&
"Cannot copy dead register");
1332 assert(!MO0.
isDead() &&
"Cannot copy to dead ST register");
1335 while (NumPendingSTs <= DstST)
1336 PendingST[NumPendingSTs++] = NumFPRegs;
1339 if (isScratchReg(PendingST[DstST])) {
1340 DEBUG(
dbgs() <<
"Clobbering old ST in FP" <<
unsigned(PendingST[DstST])
1342 freeStackSlotBefore(MI, PendingST[DstST]);
1347 duplicatePendingSTBeforeKill(SrcFP, I);
1348 unsigned Slot = getSlot(SrcFP);
1349 unsigned SR = getScratchReg();
1350 PendingST[DstST] = SR;
1354 PendingST[DstST] = SrcFP;
1363 assert(!isLive(DstFP) &&
"Cannot copy ST to live FP register");
1364 assert(NumPendingSTs > SrcST &&
"Cannot copy from dead ST register");
1365 unsigned SrcFP = PendingST[SrcST];
1366 assert(isScratchReg(SrcFP) &&
"Expected ST in a scratch register");
1367 assert(isLive(SrcFP) &&
"Scratch holding ST is dead");
1370 unsigned Slot = getSlot(SrcFP);
1371 Stack[Slot] = DstFP;
1372 RegMap[DstFP] = Slot;
1375 PendingST[SrcST] = NumFPRegs;
1376 while (NumPendingSTs && PendingST[NumPendingSTs - 1] == NumFPRegs)
1384 assert(isLive(SrcFP) &&
"Cannot copy dead register");
1388 unsigned Slot = getSlot(SrcFP);
1389 Stack[Slot] = DstFP;
1390 RegMap[DstFP] = Slot;
1394 duplicateToTop(SrcFP, DstFP, I);
1402 DEBUG(
dbgs() <<
"Emitting LD_F0 for implicit FP" << Reg <<
'\n');
1408 case X86::FpPOP_RETVAL: {
1422 assert(StackTop < 8 &&
"Stack overflowed before FpPOP_RETVAL");
1424 std::copy_backward(Stack, Stack + StackTop, Stack + StackTop + 1);
1425 for (
unsigned i = 0; i != NumFPRegs; ++i)
1472 unsigned STUses = 0, STDefs = 0, STClobbers = 0, STDeadDefs = 0;
1473 unsigned NumOps = 0;
1474 for (
unsigned i = InlineAsm::MIOp_FirstOperand, e = MI->
getNumOperands();
1483 unsigned STReg = MO.
getReg() - X86::ST0;
1488 case InlineAsm::Kind_RegUse:
1489 STUses |= (1u << STReg);
1491 case InlineAsm::Kind_RegDef:
1492 case InlineAsm::Kind_RegDefEarlyClobber:
1493 STDefs |= (1u << STReg);
1495 STDeadDefs |= (1u << STReg);
1497 case InlineAsm::Kind_Clobber:
1498 STClobbers |= (1u << STReg);
1506 MI->
emitError(
"fixed input regs must be last on the x87 stack");
1511 MI->
emitError(
"output regs must be last on the x87 stack");
1517 if (STClobbers && !
isMask_32(STDefs | STClobbers))
1518 MI->
emitError(
"clobbers must be last on the x87 stack");
1521 unsigned STPopped = STUses & (STDefs | STClobbers);
1523 MI->
emitError(
"implicitly popped regs must be last on the x87 stack");
1526 DEBUG(
dbgs() <<
"Asm uses " << NumSTUses <<
" fixed regs, pops "
1527 << NumSTPopped <<
", and defines " << NumSTDefs <<
" regs.\n");
1532 unsigned FPKills = ((1u << NumFPRegs) - 1) & ~0xff;
1533 unsigned FPUsed = 0;
1539 MI->
emitError(
"illegal \"f\" output constraint");
1541 FPUsed |= 1U << FPReg;
1547 FPKills |= 1U << FPReg;
1554 for (
unsigned i = 0; i < NumSTPopped; ++i) {
1555 if ((FPKills & ~FPUsed) & (1u << PendingST[i]))
1557 unsigned SR = getScratchReg();
1558 duplicateToTop(PendingST[i], SR, I);
1559 DEBUG(
dbgs() <<
"Duplicating ST" << i <<
" in FP"
1560 <<
unsigned(PendingST[i]) <<
" to avoid clobbering it.\n");
1566 for (
unsigned i = 0; i < NumSTUses; ++i) {
1567 if (i < NumPendingSTs && PendingST[i] < NumFPRegs) {
1569 for (
unsigned j = 0; j < i; ++j) {
1570 if (PendingST[j] != PendingST[i])
1573 unsigned SR = getScratchReg();
1574 duplicateToTop(PendingST[i], SR, I);
1575 DEBUG(
dbgs() <<
"Duplicating ST" << i <<
" in FP"
1576 <<
unsigned(PendingST[i])
1577 <<
" to avoid collision with ST" << j <<
'\n');
1582 unsigned SR = getScratchReg();
1583 DEBUG(
dbgs() <<
"Emitting LD_F0 for ST" << i <<
" in FP" << SR <<
'\n');
1587 if (NumPendingSTs == i)
1590 assert(NumPendingSTs >= NumSTUses &&
"Fixed registers should be assigned");
1593 shuffleStackTop(PendingST, NumPendingSTs, I);
1594 DEBUG({
dbgs() <<
"Before asm: "; dumpStack();});
1602 Op.
setReg(getSTReg(FPReg));
1606 StackTop -= NumSTPopped;
1611 for (
unsigned i = 0; i < NumSTDefs; ++i) {
1612 unsigned SR = getScratchReg();
1614 FPKills &= ~(1u << SR);
1616 for (
unsigned i = 0; i < NumSTDefs; ++i)
1617 PendingST[NumPendingSTs++] = getStackEntry(i);
1618 DEBUG({
dbgs() <<
"After asm: "; dumpStack();});
1623 for (
unsigned i = 0; STDefs & (1u << i); ++i) {
1624 if (!(STDeadDefs & (1u << i)))
1626 freeStackSlotAfter(InsertPt, PendingST[i]);
1627 PendingST[i] = NumFPRegs;
1629 while (NumPendingSTs && PendingST[NumPendingSTs - 1] == NumFPRegs)
1642 freeStackSlotAfter(InsertPt, FPReg);
1643 FPKills &= ~(1U << FPReg);
1649 case X86::WIN_FTOL_32:
1650 case X86::WIN_FTOL_64: {
1657 moveToTop(FPReg, I);
1659 duplicateToTop(FPReg, FPReg, I);
1663 .addExternalSymbol(
"_ftol2")
1680 unsigned FirstFPRegOp = ~0U, SecondFPRegOp = ~0U;
1681 unsigned LiveMask = 0;
1689 assert(Op.
isUse() &&
1693 "Ret only defs operands, and values aren't live beyond it");
1695 if (FirstFPRegOp == ~0U)
1698 assert(SecondFPRegOp == ~0U &&
"More than two fp operands!");
1710 adjustLiveRegs(LiveMask, MI);
1711 if (!LiveMask)
return;
1717 if (SecondFPRegOp == ~0U) {
1719 assert(StackTop == 1 && FirstFPRegOp == getStackEntry(0) &&
1720 "Top of stack not the right register for RET!");
1732 if (StackTop == 1) {
1733 assert(FirstFPRegOp == SecondFPRegOp && FirstFPRegOp == getStackEntry(0)&&
1734 "Stack misconfiguration for RET!");
1738 unsigned NewReg = getScratchReg();
1739 duplicateToTop(FirstFPRegOp, NewReg, MI);
1740 FirstFPRegOp = NewReg;
1744 assert(StackTop == 2 &&
"Must have two values live!");
1748 if (getStackEntry(0) == SecondFPRegOp) {
1749 assert(getStackEntry(1) == FirstFPRegOp &&
"Unknown regs live");
1750 moveToTop(FirstFPRegOp, MI);
1755 assert(getStackEntry(0) == FirstFPRegOp &&
"Unknown regs live");
1756 assert(getStackEntry(1) == SecondFPRegOp &&
"Unknown regs live");
1765 if (I == MBB->
begin()) {
1766 DEBUG(
dbgs() <<
"Inserting dummy KILL\n");
static bool TableIsSorted(const TableEntry *Table, unsigned NumEntries)
static const TableEntry OpcodeTable[]
void push_back(const T &Elt)
const MachineFunction * getParent() const
instr_iterator erase(instr_iterator I)
static PassRegistry * getPassRegistry()
static unsigned getConcreteOpcode(unsigned Opcode)
char & MachineDominatorsID
MachineDominators - This pass is a machine dominators analysis pass.
std::vector< unsigned >::const_iterator livein_iterator
iterator getFirstTerminator()
STATISTIC(NumFXCH,"Number of fxch instructions inserted")
void addLiveIn(unsigned Reg)
void operator<(const Optional< T > &X, const Optional< U > &Y)
Poison comparison between two Optional objects. Clients needs to explicitly compare the underlying va...
const MCInstrDesc & getDesc() const
static const TableEntry ReverseSTiTable[]
LLVM_ATTRIBUTE_NORETURN void report_fatal_error(const char *reason, bool gen_crash_diag=true)
livein_iterator livein_begin() const
AnalysisUsage & addRequired()
char & MachineLoopInfoID
MachineLoopInfo - This pass is a loop analysis pass.
const HexagonInstrInfo * TII
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
#define llvm_unreachable(msg)
bool isReg() const
isReg - Tests if this is a MO_Register operand.
static const TableEntry ForwardST0Table[]
ID
LLVM Calling Convention Representation.
unsigned getNumOperands() const
void RemoveOperand(unsigned i)
static unsigned getFPReg(const MachineOperand &MO)
size_t array_lengthof(T(&)[N])
Find the length of an array.
enable_if_c< std::numeric_limits< T >::is_integer &&!std::numeric_limits< T >::is_signed, std::size_t >::type countTrailingZeros(T Val, ZeroBehavior ZB=ZB_Width)
Count number of 0's from the least significant bit to the most stopping at the first 1...
AnalysisUsage & addPreservedID(const void *ID)
static const TableEntry PopTable[]
bool isImplicitDef() const
bundle_iterator< MachineInstr, instr_iterator > iterator
df_ext_iterator< T, SetTy > df_ext_end(const T &G, SetTy &S)
static const TableEntry ForwardSTiTable[]
livein_iterator livein_end() const
const MachineOperand & getOperand(unsigned i) const
static int Lookup(const TableEntry *Table, unsigned N, unsigned Opcode)
static unsigned getNumOperandRegisters(unsigned Flag)
df_ext_iterator< T, SetTy > df_ext_begin(const T &G, SetTy &S)
ItTy next(ItTy it, Dist n)
#define LLVM_ATTRIBUTE_UNUSED
static unsigned getKind(unsigned Flags)
AddrNumOperands - Total number of operands in a memory reference.
MachineInstrBuilder BuildMI(MachineFunction &MF, DebugLoc DL, const MCInstrDesc &MCID)
void DeleteMachineInstr(MachineInstr *MI)
uint64_t NextPowerOf2(uint64_t A)
void emitError(StringRef Msg) const
virtual const TargetInstrInfo * getInstrInfo() const
unsigned CountPopulation_32(uint32_t Value)
void setDesc(const MCInstrDesc &tid)
unsigned CountTrailingOnes_32(uint32_t Value)
bool killsRegister(unsigned Reg, const TargetRegisterInfo *TRI=NULL) const
MachineInstr * remove(MachineInstr *I)
raw_ostream & dbgs()
dbgs - Return a circular-buffered debug stream.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
FunctionPass * createX86FloatingPointStackifierPass()
StringRef getName() const
bool isMask_32(uint32_t Value)
static DebugLoc get(unsigned Line, unsigned Col, MDNode *Scope, MDNode *InlinedAt=0)
static const TableEntry ReverseST0Table[]
MachineRegisterInfo & getRegInfo()
virtual void getAnalysisUsage(AnalysisUsage &AU) const
IMPLICIT_DEF - This is the MachineInstr-level equivalent of undef.
void setReg(unsigned Reg)
const TargetMachine & getTarget() const
unsigned getReg() const
getReg - Returns the register number.
unsigned getNumOperands() const
Return the number of declared MachineOperands for this MachineInstruction. Note that variadic (isVari...
#define ASSERT_SORTED(TABLE)
bool isPhysRegUsed(unsigned Reg) const
BasicBlockListType::iterator iterator
ItTy prior(ItTy it, Dist n)
void initializeEdgeBundlesPass(PassRegistry &)
const MachineInstrBuilder & addReg(unsigned RegNo, unsigned flags=0, unsigned SubReg=0) const
DebugLoc getDebugLoc() const